forked from jundongl/scikit-feature
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update
- Loading branch information
Showing
116 changed files
with
5,520 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn import svm | ||
from sklearn import cross_validation | ||
from sklearn.metrics import accuracy_score | ||
from skfeature.function.statistical_based import CFS | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 100 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of selected features on training set | ||
idx = CFS.cfs(X[train], y[train]) | ||
|
||
# obtain the dataset on the selected features | ||
selected_features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(selected_features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(selected_features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import CIFE | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = CIFE.cife(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import CMIM | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = CMIM.cmim(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import DISR | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = DISR.disr(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import FCBF | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import ICAP | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = ICAP.icap(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import scipy.io | ||
from sklearn.metrics import accuracy_score | ||
from sklearn import cross_validation | ||
from sklearn import svm | ||
from skfeature.function.information_theoretical_based import JMI | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/colon.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
n_samples, n_features = X.shape # number of samples and number of features | ||
|
||
# split data into 10 folds | ||
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) | ||
|
||
# perform evaluation on classification task | ||
num_fea = 10 # number of selected features | ||
clf = svm.LinearSVC() # linear SVM | ||
|
||
correct = 0 | ||
for train, test in ss: | ||
# obtain the index of each feature on the training set | ||
idx = JMI.jmi(X[train], y[train], n_selected_features=num_fea) | ||
|
||
# obtain the dataset on the selected features | ||
features = X[:, idx[0:num_fea]] | ||
|
||
# train a classification model with the selected features on the training dataset | ||
clf.fit(features[train], y[train]) | ||
|
||
# predict the class labels of test data | ||
y_predict = clf.predict(features[test]) | ||
|
||
# obtain the classification accuracy on the test data | ||
acc = accuracy_score(y[test], y_predict) | ||
correct = correct + acc | ||
|
||
# output the average classification accuracy over all 10 folds | ||
print 'Accuracy:', float(correct)/10 | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import scipy.io | ||
from skfeature.function.sparse_learning_based import MCFS | ||
from skfeature.utility import construct_W | ||
from skfeature.utility import unsupervised_evaluation | ||
|
||
|
||
def main(): | ||
# load data | ||
mat = scipy.io.loadmat('../data/COIL20.mat') | ||
X = mat['X'] # data | ||
X = X.astype(float) | ||
y = mat['Y'] # label | ||
y = y[:, 0] | ||
|
||
# construct affinity matrix | ||
kwargs = {"metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1} | ||
W = construct_W.construct_W(X, **kwargs) | ||
|
||
num_fea = 100 # specify the number of selected features | ||
num_cluster = 20 # specify the number of clusters, it is usually set as the number of classes in the ground truth | ||
|
||
# obtain the feature weight matrix | ||
Weight = MCFS.mcfs(X, n_selected_features=num_fea, W=W, n_clusters=20) | ||
|
||
# sort the feature scores in an ascending order according to the feature scores | ||
idx = MCFS.feature_ranking(Weight) | ||
|
||
# obtain the dataset on the selected features | ||
selected_features = X[:, idx[0:num_fea]] | ||
|
||
# perform kmeans clustering based on the selected features and repeats 20 times | ||
nmi_total = 0 | ||
acc_total = 0 | ||
for i in range(0, 20): | ||
nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y) | ||
nmi_total += nmi | ||
acc_total += acc | ||
|
||
# output the average NMI and average ACC | ||
print 'NMI:', float(nmi_total)/20 | ||
print 'ACC:', float(acc_total)/20 | ||
|
||
if __name__ == '__main__': | ||
main() |
Oops, something went wrong.