update

dav009 · Feb 22, 2016 · 28e1f6a · 28e1f6a
1 parent 454ec77
commit 28e1f6a
Show file tree

Hide file tree

Showing 116 changed files with 5,520 additions and 0 deletions.
diff --git a/skfeature/__init__.py b/skfeature/__init__.py
diff --git a/skfeature/data/ALLAML.mat b/skfeature/data/ALLAML.mat
diff --git a/skfeature/data/BASEHOCK.mat b/skfeature/data/BASEHOCK.mat
diff --git a/skfeature/data/CLL-SUB-111.mat b/skfeature/data/CLL-SUB-111.mat
diff --git a/skfeature/data/COIL20.mat b/skfeature/data/COIL20.mat
diff --git a/skfeature/data/Carcinom.mat b/skfeature/data/Carcinom.mat
diff --git a/skfeature/data/GLI-85.mat b/skfeature/data/GLI-85.mat
diff --git a/skfeature/data/GLIOMA.mat b/skfeature/data/GLIOMA.mat
diff --git a/skfeature/data/Isolet.mat b/skfeature/data/Isolet.mat
diff --git a/skfeature/data/ORL.mat b/skfeature/data/ORL.mat
diff --git a/skfeature/data/PCMAC.mat b/skfeature/data/PCMAC.mat
diff --git a/skfeature/data/Prostate-GE.mat b/skfeature/data/Prostate-GE.mat
diff --git a/skfeature/data/RELATHE.mat b/skfeature/data/RELATHE.mat
diff --git a/skfeature/data/SMK-CAN-187.mat b/skfeature/data/SMK-CAN-187.mat
diff --git a/skfeature/data/TOX-171.mat b/skfeature/data/TOX-171.mat
diff --git a/skfeature/data/USPS.mat b/skfeature/data/USPS.mat
diff --git a/skfeature/data/Yale.mat b/skfeature/data/Yale.mat
diff --git a/skfeature/data/arcene.mat b/skfeature/data/arcene.mat
diff --git a/skfeature/data/colon.mat b/skfeature/data/colon.mat
diff --git a/skfeature/data/gisette.mat b/skfeature/data/gisette.mat
diff --git a/skfeature/data/leukemia.mat b/skfeature/data/leukemia.mat
diff --git a/skfeature/data/lung.mat b/skfeature/data/lung.mat
diff --git a/skfeature/data/lung_small.mat b/skfeature/data/lung_small.mat
diff --git a/skfeature/data/lymphoma.mat b/skfeature/data/lymphoma.mat
diff --git a/skfeature/data/madelon.mat b/skfeature/data/madelon.mat
diff --git a/skfeature/data/nci9.mat b/skfeature/data/nci9.mat
diff --git a/skfeature/data/orlraws10P.mat b/skfeature/data/orlraws10P.mat
diff --git a/skfeature/data/pixraw10P.mat b/skfeature/data/pixraw10P.mat
diff --git a/skfeature/data/warpAR10P.mat b/skfeature/data/warpAR10P.mat
diff --git a/skfeature/data/warpPIE10P.mat b/skfeature/data/warpPIE10P.mat
diff --git a/skfeature/example/test_CFS.py b/skfeature/example/test_CFS.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn import svm
+from sklearn import cross_validation
+from sklearn.metrics import accuracy_score
+from skfeature.function.statistical_based import CFS
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 100    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of selected features on training set
+        idx = CFS.cfs(X[train], y[train])
+
+        # obtain the dataset on the selected features
+        selected_features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(selected_features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(selected_features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_CIFE.py b/skfeature/example/test_CIFE.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import CIFE
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = CIFE.cife(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_CMIM.py b/skfeature/example/test_CMIM.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import CMIM
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_DISR.py b/skfeature/example/test_DISR.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import DISR
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = DISR.disr(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_FCBF.py b/skfeature/example/test_FCBF.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import FCBF
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_ICAP.py b/skfeature/example/test_ICAP.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import ICAP
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = ICAP.icap(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_JMI.py b/skfeature/example/test_JMI.py
@@ -0,0 +1,46 @@
+import scipy.io
+from sklearn.metrics import accuracy_score
+from sklearn import cross_validation
+from sklearn import svm
+from skfeature.function.information_theoretical_based import JMI
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/colon.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+    n_samples, n_features = X.shape    # number of samples and number of features
+
+    # split data into 10 folds
+    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
+
+    # perform evaluation on classification task
+    num_fea = 10    # number of selected features
+    clf = svm.LinearSVC()    # linear SVM
+
+    correct = 0
+    for train, test in ss:
+        # obtain the index of each feature on the training set
+        idx = JMI.jmi(X[train], y[train], n_selected_features=num_fea)
+
+        # obtain the dataset on the selected features
+        features = X[:, idx[0:num_fea]]
+
+        # train a classification model with the selected features on the training dataset
+        clf.fit(features[train], y[train])
+
+        # predict the class labels of test data
+        y_predict = clf.predict(features[test])
+
+        # obtain the classification accuracy on the test data
+        acc = accuracy_score(y[test], y_predict)
+        correct = correct + acc
+
+    # output the average classification accuracy over all 10 folds
+    print 'Accuracy:', float(correct)/10
+
+if __name__ == '__main__':
+    main()
diff --git a/skfeature/example/test_MCFS.py b/skfeature/example/test_MCFS.py
@@ -0,0 +1,44 @@
+import scipy.io
+from skfeature.function.sparse_learning_based import MCFS
+from skfeature.utility import construct_W
+from skfeature.utility import unsupervised_evaluation
+
+
+def main():
+    # load data
+    mat = scipy.io.loadmat('../data/COIL20.mat')
+    X = mat['X']    # data
+    X = X.astype(float)
+    y = mat['Y']    # label
+    y = y[:, 0]
+
+    # construct affinity matrix
+    kwargs = {"metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1}
+    W = construct_W.construct_W(X, **kwargs)
+
+    num_fea = 100    # specify the number of selected features
+    num_cluster = 20    # specify the number of clusters, it is usually set as the number of classes in the ground truth
+
+    # obtain the feature weight matrix
+    Weight = MCFS.mcfs(X, n_selected_features=num_fea, W=W, n_clusters=20)
+
+    # sort the feature scores in an ascending order according to the feature scores
+    idx = MCFS.feature_ranking(Weight)
+
+    # obtain the dataset on the selected features
+    selected_features = X[:, idx[0:num_fea]]
+
+    # perform kmeans clustering based on the selected features and repeats 20 times
+    nmi_total = 0
+    acc_total = 0
+    for i in range(0, 20):
+        nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y)
+        nmi_total += nmi
+        acc_total += acc
+
+    # output the average NMI and average ACC
+    print 'NMI:', float(nmi_total)/20
+    print 'ACC:', float(acc_total)/20
+
+if __name__ == '__main__':
+    main()