Skip to content

Commit

Permalink
Added objective function value and MI return values to IT-based
Browse files Browse the repository at this point in the history
  • Loading branch information
bacalfa committed Jun 14, 2017
1 parent f99c6bb commit e6e2c3c
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 30 deletions.
10 changes: 7 additions & 3 deletions skfeature/function/information_theoretical_based/CIFE.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def cife(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -27,7 +31,7 @@ def cife(X, y, **kwargs):

if 'n_selected_features' in kwargs.keys():
n_selected_features = kwargs['n_selected_features']
F = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features)
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=1, gamma=1, n_selected_features=n_selected_features)
else:
F = LCSI.lcsi(X, y, beta=1, gamma=1)
return F
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=1, gamma=1)
return F, J_CMI, MIfy
18 changes: 15 additions & 3 deletions skfeature/function/information_theoretical_based/CMIM.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def cmim(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMIM: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -29,6 +33,10 @@ def cmim(X, y, **kwargs):
n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_CMIM = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False

Expand All @@ -54,12 +62,14 @@ def cmim(X, y, **kwargs):
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_CMIM.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
if is_n_selected_features_specified:
if len(F) == n_selected_features:
break
if is_n_selected_features_specified is not True:
else:
if j_cmim <= 0:
break

Expand All @@ -79,6 +89,8 @@ def cmim(X, y, **kwargs):
j_cmim = t
idx = i
F.append(idx)
J_CMIM.append(j_cmim)
MIfy.append(t1[idx])
f_select = X[:, idx]

return np.array(F)
return np.array(F), np.array(J_CMIM), np.array(MIfy)
22 changes: 17 additions & 5 deletions skfeature/function/information_theoretical_based/DISR.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ def disr(X, y, **kwargs):
------
F: {numpy array}, shape (n_features, )
index of selected features, F[0] is the most important feature
J_DISR: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -31,6 +35,10 @@ def disr(X, y, **kwargs):
n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_DISR = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False

Expand All @@ -54,6 +62,8 @@ def disr(X, y, **kwargs):
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_DISR.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
Expand All @@ -64,19 +74,21 @@ def disr(X, y, **kwargs):
break

# we assign an extreme small value to j_disr to ensure that it is smaller than all possible value of j_disr
j_disr = -1000000000000
j_disr = -1E30
for i in range(n_features):
if i not in F:
f = X[:, i]
t1 = midd(f_select, y) + cmidd(f, y, f_select)
t2 = entropyd(f) + conditional_entropy(f_select, f) + (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
sum[i] += np.true_divide(t1, t2)
t2 = midd(f_select, y) + cmidd(f, y, f_select)
t3 = entropyd(f) + conditional_entropy(f_select, f) + (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
sum[i] += np.true_divide(t2, t3)
# record the largest j_disr and the corresponding feature index
if sum[i] > j_disr:
j_disr = sum[i]
idx = i
F.append(idx)
J_DISR.append(j_disr)
MIfy.append(t1[idx])
f_select = X[:, idx]

return np.array(F)
return np.array(F), np.array(J_DISR), np.array(MIfy)

7 changes: 6 additions & 1 deletion skfeature/function/information_theoretical_based/FCBF.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def fcbf(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
SU: {numpy array}, shape (n_features,)
symmetrical uncertainty of selected features
Reference
---------
Expand All @@ -42,13 +44,16 @@ def fcbf(X, y, **kwargs):
s_list = t1[t1[:, 1] > delta, :]
# index of selected features, initialized to be empty
F = []
# Symmetrical uncertainty of selected features
SU = []
while len(s_list) != 0:
# select the largest su inside s_list
idx = np.argmax(s_list[:, 1])
# record the index of the feature with the largest su
fp = X[:, s_list[idx, 0]]
np.delete(s_list, idx, 0)
F.append(s_list[idx, 0])
SU.append(s_list[idx, 1])
for i in s_list[:, 0]:
fi = X[:, i]
if su_calculation(fp, fi) >= t1[i, 1]:
Expand All @@ -60,4 +65,4 @@ def fcbf(X, y, **kwargs):
s_list = s_list[idx]
length = len(s_list)/2
s_list = s_list.reshape((length, 2))
return np.array(F, dtype=int)
return np.array(F, dtype=int), np.array(SU)
14 changes: 13 additions & 1 deletion skfeature/function/information_theoretical_based/ICAP.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,18 @@ def icap(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_ICAP: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
"""
n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_ICAP = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False
if 'n_selected_features' in kwargs.keys():
Expand All @@ -46,6 +54,8 @@ def icap(X, y, **kwargs):
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_ICAP.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
Expand All @@ -71,6 +81,8 @@ def icap(X, y, **kwargs):
j_icap = t
idx = i
F.append(idx)
J_ICAP.append(j_icap)
MIfy.append(t1[idx])
f_select = X[:, idx]

return np.array(F)
return np.array(F), np.array(J_ICAP), np.array(MIfy)
10 changes: 7 additions & 3 deletions skfeature/function/information_theoretical_based/JMI.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@ def jmi(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
"""
if 'n_selected_features' in kwargs.keys():
n_selected_features = kwargs['n_selected_features']
F = LCSI.lcsi(X, y, function_name='JMI', n_selected_features=n_selected_features)
F, J_CMI, MIfy = LCSI.lcsi(X, y, function_name='JMI', n_selected_features=n_selected_features)
else:
F = LCSI.lcsi(X, y, function_name='JMI')
return F
F, J_CMI, MIfy = LCSI.lcsi(X, y, function_name='JMI')
return F, J_CMI, MIfy
22 changes: 17 additions & 5 deletions skfeature/function/information_theoretical_based/LCSI.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ def lcsi(X, y, **kwargs):
------
F: {numpy array}, shape: (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -36,6 +40,10 @@ def lcsi(X, y, **kwargs):
n_samples, n_features = X.shape
# index of selected features, initialized to be empty
F = []
# Objective function value for selected features
J_CMI = []
# Mutual information between feature and response
MIfy = []
# indicate whether the user specifies the number of features
is_n_selected_features_specified = False
# initialize the parameters
Expand All @@ -50,7 +58,7 @@ def lcsi(X, y, **kwargs):
# select the feature whose j_cmi is the largest
# t1 stores I(f;y) for each feature f
t1 = np.zeros(n_features)
# t2 sotres sum_j(I(fj;f)) for each feature f
# t2 stores sum_j(I(fj;f)) for each feature f
t2 = np.zeros(n_features)
# t3 stores sum_j(I(fj;f|y)) for each feature f
t3 = np.zeros(n_features)
Expand All @@ -66,17 +74,19 @@ def lcsi(X, y, **kwargs):
# select the feature whose mutual information is the largest
idx = np.argmax(t1)
F.append(idx)
J_CMI.append(t1[idx])
MIfy.append(t1[idx])
f_select = X[:, idx]

if is_n_selected_features_specified is True:
if is_n_selected_features_specified:
if len(F) == n_selected_features:
break
if is_n_selected_features_specified is not True:
else:
if j_cmi < 0:
break

# we assign an extreme small value to j_cmi to ensure it is smaller than all possible values of j_cmi
j_cmi = -1000000000000
j_cmi = -1E30
if 'function_name' in kwargs.keys():
if kwargs['function_name'] == 'MRMR':
beta = 1.0 / len(F)
Expand All @@ -95,9 +105,11 @@ def lcsi(X, y, **kwargs):
j_cmi = t
idx = i
F.append(idx)
J_CMI.append(j_cmi)
MIfy.append(t1[idx])
f_select = X[:, idx]

return np.array(F)
return np.array(F), np.array(J_CMI), np.array(MIfy)



Expand Down
10 changes: 7 additions & 3 deletions skfeature/function/information_theoretical_based/MIFS.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def mifs(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -31,7 +35,7 @@ def mifs(X, y, **kwargs):
beta = kwargs['beta']
if 'n_selected_features' in kwargs.keys():
n_selected_features = kwargs['n_selected_features']
F = LCSI.lcsi(X, y, beta=beta, gamma=0, n_selected_features=n_selected_features)
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0, n_selected_features=n_selected_features)
else:
F = LCSI.lcsi(X, y, beta=beta, gamma=0)
return F
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0)
return F, J_CMI, MIfy
10 changes: 7 additions & 3 deletions skfeature/function/information_theoretical_based/MIM.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ def mim(X, y, **kwargs):
------
F: {numpy array}, shape (n_features, )
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Expand All @@ -27,7 +31,7 @@ def mim(X, y, **kwargs):

if 'n_selected_features' in kwargs.keys():
n_selected_features = kwargs['n_selected_features']
F = LCSI.lcsi(X, y, beta=0, gamma=0, n_selected_features=n_selected_features)
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=0, gamma=0, n_selected_features=n_selected_features)
else:
F = LCSI.lcsi(X, y, beta=0, gamma=0)
return F
F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=0, gamma=0)
return F, J_CMI, MIfy
10 changes: 7 additions & 3 deletions skfeature/function/information_theoretical_based/MRMR.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@ def mrmr(X, y, **kwargs):
------
F: {numpy array}, shape (n_features,)
index of selected features, F[0] is the most important feature
J_CMI: {numpy array}, shape: (n_features,)
corresponding objective function value of selected features
MIfy: {numpy array}, shape: (n_features,)
corresponding mutual information between selected features and response
Reference
---------
Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.
"""
if 'n_selected_features' in kwargs.keys():
n_selected_features = kwargs['n_selected_features']
F = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features)
F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR', n_selected_features=n_selected_features)
else:
F = LCSI.lcsi(X, y, gamma=0, function_name='MRMR')
return F
F, J_CMI, MIfy = LCSI.lcsi(X, y, gamma=0, function_name='MRMR')
return F, J_CMI, MIfy

0 comments on commit e6e2c3c

Please sign in to comment.