From e6e2c3c516f3e440ca215a5c69c232573a93a30d Mon Sep 17 00:00:00 2001 From: bacalfa Date: Wed, 14 Jun 2017 13:19:50 -0400 Subject: [PATCH] Added objective function value and MI return values to IT-based --- .../information_theoretical_based/CIFE.py | 10 ++++++--- .../information_theoretical_based/CMIM.py | 18 ++++++++++++--- .../information_theoretical_based/DISR.py | 22 ++++++++++++++----- .../information_theoretical_based/FCBF.py | 7 +++++- .../information_theoretical_based/ICAP.py | 14 +++++++++++- .../information_theoretical_based/JMI.py | 10 ++++++--- .../information_theoretical_based/LCSI.py | 22 ++++++++++++++----- .../information_theoretical_based/MIFS.py | 10 ++++++--- .../information_theoretical_based/MIM.py | 10 ++++++--- .../information_theoretical_based/MRMR.py | 10 ++++++--- 10 files changed, 103 insertions(+), 30 deletions(-) diff --git a/skfeature/function/information_theoretical_based/CIFE.py b/skfeature/function/information_theoretical_based/CIFE.py index e641b00..2a5104c 100644 --- a/skfeature/function/information_theoretical_based/CIFE.py +++ b/skfeature/function/information_theoretical_based/CIFE.py @@ -19,6 +19,10 @@ def cife(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -27,7 +31,7 @@ def cife(X, y, **kwargs): if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] - F = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features) + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features) else: - F = LCSI.lcsi(X, y, beta=1, gamma=1) - return F + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=1, gamma=1) + return F, J_CMI, MIfy diff --git a/skfeature/function/information_theoretical_based/CMIM.py b/skfeature/function/information_theoretical_based/CMIM.py index 9d16ac1..a56bad2 100644 --- a/skfeature/function/information_theoretical_based/CMIM.py +++ b/skfeature/function/information_theoretical_based/CMIM.py @@ -20,6 +20,10 @@ def cmim(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_CMIM: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -29,6 +33,10 @@ def cmim(X, y, **kwargs): n_samples, n_features = X.shape # index of selected features, initialized to be empty F = [] + # Objective function value for selected features + J_CMIM = [] + # Mutual information between feature and response + MIfy = [] # indicate whether the user specifies the number of features is_n_selected_features_specified = False @@ -54,12 +62,14 @@ def cmim(X, y, **kwargs): # select the feature whose mutual information is the largest idx = np.argmax(t1) F.append(idx) + J_CMIM.append(t1[idx]) + MIfy.append(t1[idx]) f_select = X[:, idx] - if is_n_selected_features_specified is True: + if is_n_selected_features_specified: if len(F) == n_selected_features: break - if is_n_selected_features_specified is not True: + else: if j_cmim <= 0: break @@ -79,6 +89,8 @@ def cmim(X, y, **kwargs): j_cmim = t idx = i F.append(idx) + J_CMIM.append(j_cmim) + MIfy.append(t1[idx]) f_select = X[:, idx] - return np.array(F) \ No newline at end of file + return np.array(F), np.array(J_CMIM), np.array(MIfy) \ No newline at end of file diff --git a/skfeature/function/information_theoretical_based/DISR.py b/skfeature/function/information_theoretical_based/DISR.py index edcc365..2148e09 100644 --- a/skfeature/function/information_theoretical_based/DISR.py +++ b/skfeature/function/information_theoretical_based/DISR.py @@ -22,6 +22,10 @@ def disr(X, y, **kwargs): ------ F: {numpy array}, shape (n_features, ) index of selected features, F[0] is the most important feature + J_DISR: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -31,6 +35,10 @@ def disr(X, y, **kwargs): n_samples, n_features = X.shape # index of selected features, initialized to be empty F = [] + # Objective function value for selected features + J_DISR = [] + # Mutual information between feature and response + MIfy = [] # indicate whether the user specifies the number of features is_n_selected_features_specified = False @@ -54,6 +62,8 @@ def disr(X, y, **kwargs): # select the feature whose mutual information is the largest idx = np.argmax(t1) F.append(idx) + J_DISR.append(t1[idx]) + MIfy.append(t1[idx]) f_select = X[:, idx] if is_n_selected_features_specified is True: @@ -64,19 +74,21 @@ def disr(X, y, **kwargs): break # we assign an extreme small value to j_disr to ensure that it is smaller than all possible value of j_disr - j_disr = -1000000000000 + j_disr = -1E30 for i in range(n_features): if i not in F: f = X[:, i] - t1 = midd(f_select, y) + cmidd(f, y, f_select) - t2 = entropyd(f) + conditional_entropy(f_select, f) + (conditional_entropy(y, f_select) - cmidd(y, f, f_select)) - sum[i] += np.true_divide(t1, t2) + t2 = midd(f_select, y) + cmidd(f, y, f_select) + t3 = entropyd(f) + conditional_entropy(f_select, f) + (conditional_entropy(y, f_select) - cmidd(y, f, f_select)) + sum[i] += np.true_divide(t2, t3) # record the largest j_disr and the corresponding feature index if sum[i] > j_disr: j_disr = sum[i] idx = i F.append(idx) + J_DISR.append(j_disr) + MIfy.append(t1[idx]) f_select = X[:, idx] - return np.array(F) + return np.array(F), np.array(J_DISR), np.array(MIfy) diff --git a/skfeature/function/information_theoretical_based/FCBF.py b/skfeature/function/information_theoretical_based/FCBF.py index e3e21b9..98d0028 100644 --- a/skfeature/function/information_theoretical_based/FCBF.py +++ b/skfeature/function/information_theoretical_based/FCBF.py @@ -20,6 +20,8 @@ def fcbf(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + SU: {numpy array}, shape (n_features,) + symmetrical uncertainty of selected features Reference --------- @@ -42,6 +44,8 @@ def fcbf(X, y, **kwargs): s_list = t1[t1[:, 1] > delta, :] # index of selected features, initialized to be empty F = [] + # Symmetrical uncertainty of selected features + SU = [] while len(s_list) != 0: # select the largest su inside s_list idx = np.argmax(s_list[:, 1]) @@ -49,6 +53,7 @@ def fcbf(X, y, **kwargs): fp = X[:, s_list[idx, 0]] np.delete(s_list, idx, 0) F.append(s_list[idx, 0]) + SU.append(s_list[idx, 1]) for i in s_list[:, 0]: fi = X[:, i] if su_calculation(fp, fi) >= t1[i, 1]: @@ -60,4 +65,4 @@ def fcbf(X, y, **kwargs): s_list = s_list[idx] length = len(s_list)/2 s_list = s_list.reshape((length, 2)) - return np.array(F, dtype=int) \ No newline at end of file + return np.array(F, dtype=int), np.array(SU) \ No newline at end of file diff --git a/skfeature/function/information_theoretical_based/ICAP.py b/skfeature/function/information_theoretical_based/ICAP.py index b6247b7..5437f75 100644 --- a/skfeature/function/information_theoretical_based/ICAP.py +++ b/skfeature/function/information_theoretical_based/ICAP.py @@ -20,10 +20,18 @@ def icap(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_ICAP: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response """ n_samples, n_features = X.shape # index of selected features, initialized to be empty F = [] + # Objective function value for selected features + J_ICAP = [] + # Mutual information between feature and response + MIfy = [] # indicate whether the user specifies the number of features is_n_selected_features_specified = False if 'n_selected_features' in kwargs.keys(): @@ -46,6 +54,8 @@ def icap(X, y, **kwargs): # select the feature whose mutual information is the largest idx = np.argmax(t1) F.append(idx) + J_ICAP.append(t1[idx]) + MIfy.append(t1[idx]) f_select = X[:, idx] if is_n_selected_features_specified is True: @@ -71,6 +81,8 @@ def icap(X, y, **kwargs): j_icap = t idx = i F.append(idx) + J_ICAP.append(j_icap) + MIfy.append(t1[idx]) f_select = X[:, idx] - return np.array(F) + return np.array(F), np.array(J_ICAP), np.array(MIfy) diff --git a/skfeature/function/information_theoretical_based/JMI.py b/skfeature/function/information_theoretical_based/JMI.py index d9c1d97..58fb228 100644 --- a/skfeature/function/information_theoretical_based/JMI.py +++ b/skfeature/function/information_theoretical_based/JMI.py @@ -19,6 +19,10 @@ def jmi(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -26,7 +30,7 @@ def jmi(X, y, **kwargs): """ if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] - F = LCSI.lcsi(X, y, function_name='JMI', n_selected_features=n_selected_features) + F, J_CMI, MIfy = LCSI.lcsi(X, y, function_name='JMI', n_selected_features=n_selected_features) else: - F = LCSI.lcsi(X, y, function_name='JMI') - return F \ No newline at end of file + F, J_CMI, MIfy = LCSI.lcsi(X, y, function_name='JMI') + return F, J_CMI, MIfy \ No newline at end of file diff --git a/skfeature/function/information_theoretical_based/LCSI.py b/skfeature/function/information_theoretical_based/LCSI.py index a08f402..a50838e 100644 --- a/skfeature/function/information_theoretical_based/LCSI.py +++ b/skfeature/function/information_theoretical_based/LCSI.py @@ -27,6 +27,10 @@ def lcsi(X, y, **kwargs): ------ F: {numpy array}, shape: (n_features,) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -36,6 +40,10 @@ def lcsi(X, y, **kwargs): n_samples, n_features = X.shape # index of selected features, initialized to be empty F = [] + # Objective function value for selected features + J_CMI = [] + # Mutual information between feature and response + MIfy = [] # indicate whether the user specifies the number of features is_n_selected_features_specified = False # initialize the parameters @@ -50,7 +58,7 @@ def lcsi(X, y, **kwargs): # select the feature whose j_cmi is the largest # t1 stores I(f;y) for each feature f t1 = np.zeros(n_features) - # t2 sotres sum_j(I(fj;f)) for each feature f + # t2 stores sum_j(I(fj;f)) for each feature f t2 = np.zeros(n_features) # t3 stores sum_j(I(fj;f|y)) for each feature f t3 = np.zeros(n_features) @@ -66,17 +74,19 @@ def lcsi(X, y, **kwargs): # select the feature whose mutual information is the largest idx = np.argmax(t1) F.append(idx) + J_CMI.append(t1[idx]) + MIfy.append(t1[idx]) f_select = X[:, idx] - if is_n_selected_features_specified is True: + if is_n_selected_features_specified: if len(F) == n_selected_features: break - if is_n_selected_features_specified is not True: + else: if j_cmi < 0: break # we assign an extreme small value to j_cmi to ensure it is smaller than all possible values of j_cmi - j_cmi = -1000000000000 + j_cmi = -1E30 if 'function_name' in kwargs.keys(): if kwargs['function_name'] == 'MRMR': beta = 1.0 / len(F) @@ -95,9 +105,11 @@ def lcsi(X, y, **kwargs): j_cmi = t idx = i F.append(idx) + J_CMI.append(j_cmi) + MIfy.append(t1[idx]) f_select = X[:, idx] - return np.array(F) + return np.array(F), np.array(J_CMI), np.array(MIfy) diff --git a/skfeature/function/information_theoretical_based/MIFS.py b/skfeature/function/information_theoretical_based/MIFS.py index 85e4307..9b881d0 100644 --- a/skfeature/function/information_theoretical_based/MIFS.py +++ b/skfeature/function/information_theoretical_based/MIFS.py @@ -19,6 +19,10 @@ def mifs(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -31,7 +35,7 @@ def mifs(X, y, **kwargs): beta = kwargs['beta'] if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] - F = LCSI.lcsi(X, y, beta=beta, gamma=0, n_selected_features=n_selected_features) + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0, n_selected_features=n_selected_features) else: - F = LCSI.lcsi(X, y, beta=beta, gamma=0) - return F + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0) + return F, J_CMI, MIfy diff --git a/skfeature/function/information_theoretical_based/MIM.py b/skfeature/function/information_theoretical_based/MIM.py index 0c8a0a9..56285a6 100644 --- a/skfeature/function/information_theoretical_based/MIM.py +++ b/skfeature/function/information_theoretical_based/MIM.py @@ -19,6 +19,10 @@ def mim(X, y, **kwargs): ------ F: {numpy array}, shape (n_features, ) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -27,7 +31,7 @@ def mim(X, y, **kwargs): if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] - F = LCSI.lcsi(X, y, beta=0, gamma=0, n_selected_features=n_selected_features) + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=0, gamma=0, n_selected_features=n_selected_features) else: - F = LCSI.lcsi(X, y, beta=0, gamma=0) - return F + F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=0, gamma=0) + return F, J_CMI, MIfy diff --git a/skfeature/function/information_theoretical_based/MRMR.py b/skfeature/function/information_theoretical_based/MRMR.py index 6d49e64..8f5ba69 100644 --- a/skfeature/function/information_theoretical_based/MRMR.py +++ b/skfeature/function/information_theoretical_based/MRMR.py @@ -19,6 +19,10 @@ def mrmr(X, y, **kwargs): ------ F: {numpy array}, shape (n_features,) index of selected features, F[0] is the most important feature + J_CMI: {numpy array}, shape: (n_features,) + corresponding objective function value of selected features + MIfy: {numpy array}, shape: (n_features,) + corresponding mutual information between selected features and response Reference --------- @@ -26,7 +30,7 @@ def mrmr(X, y, **kwargs): """ if 'n_selected_features' in kwargs.keys(): n_selected_features = kwargs['n_selected_features'] - F = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features) + F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features) else: - F = LCSI.lcsi(X, y, gamma=0, function_name='MRMR') - return F \ No newline at end of file + F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR') + return F, J_CMI, MIfy \ No newline at end of file