Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
update
  • Loading branch information
jundongl committed Feb 22, 2016
1 parent 454ec77 commit 28e1f6a
Show file tree
Hide file tree
Showing 116 changed files with 5,520 additions and 0 deletions.
Empty file added skfeature/__init__.py
Empty file.
Binary file added skfeature/data/ALLAML.mat
Binary file not shown.
Binary file added skfeature/data/BASEHOCK.mat
Binary file not shown.
Binary file added skfeature/data/CLL-SUB-111.mat
Binary file not shown.
Binary file added skfeature/data/COIL20.mat
Binary file not shown.
Binary file added skfeature/data/Carcinom.mat
Binary file not shown.
Binary file added skfeature/data/GLI-85.mat
Binary file not shown.
Binary file added skfeature/data/GLIOMA.mat
Binary file not shown.
Binary file added skfeature/data/Isolet.mat
Binary file not shown.
Binary file added skfeature/data/ORL.mat
Binary file not shown.
Binary file added skfeature/data/PCMAC.mat
Binary file not shown.
Binary file added skfeature/data/Prostate-GE.mat
Binary file not shown.
Binary file added skfeature/data/RELATHE.mat
Binary file not shown.
Binary file added skfeature/data/SMK-CAN-187.mat
Binary file not shown.
Binary file added skfeature/data/TOX-171.mat
Binary file not shown.
Binary file added skfeature/data/USPS.mat
Binary file not shown.
Binary file added skfeature/data/Yale.mat
Binary file not shown.
Binary file added skfeature/data/arcene.mat
Binary file not shown.
Binary file added skfeature/data/colon.mat
Binary file not shown.
Binary file added skfeature/data/gisette.mat
Binary file not shown.
Binary file added skfeature/data/leukemia.mat
Binary file not shown.
Binary file added skfeature/data/lung.mat
Binary file not shown.
Binary file added skfeature/data/lung_small.mat
Binary file not shown.
Binary file added skfeature/data/lymphoma.mat
Binary file not shown.
Binary file added skfeature/data/madelon.mat
Binary file not shown.
Binary file added skfeature/data/nci9.mat
Binary file not shown.
Binary file added skfeature/data/orlraws10P.mat
Binary file not shown.
Binary file added skfeature/data/pixraw10P.mat
Binary file not shown.
Binary file added skfeature/data/warpAR10P.mat
Binary file not shown.
Binary file added skfeature/data/warpPIE10P.mat
Binary file not shown.
46 changes: 46 additions & 0 deletions skfeature/example/test_CFS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import accuracy_score
from skfeature.function.statistical_based import CFS


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 100 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of selected features on training set
idx = CFS.cfs(X[train], y[train])

# obtain the dataset on the selected features
selected_features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(selected_features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(selected_features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_CIFE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import CIFE


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = CIFE.cife(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_CMIM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import CMIM


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_DISR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import DISR


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = DISR.disr(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_FCBF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import FCBF


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = FCBF.fcbf(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_ICAP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import ICAP


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = ICAP.icap(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
46 changes: 46 additions & 0 deletions skfeature/example/test_JMI.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import scipy.io
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn import svm
from skfeature.function.information_theoretical_based import JMI


def main():
# load data
mat = scipy.io.loadmat('../data/colon.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]
n_samples, n_features = X.shape # number of samples and number of features

# split data into 10 folds
ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

# perform evaluation on classification task
num_fea = 10 # number of selected features
clf = svm.LinearSVC() # linear SVM

correct = 0
for train, test in ss:
# obtain the index of each feature on the training set
idx = JMI.jmi(X[train], y[train], n_selected_features=num_fea)

# obtain the dataset on the selected features
features = X[:, idx[0:num_fea]]

# train a classification model with the selected features on the training dataset
clf.fit(features[train], y[train])

# predict the class labels of test data
y_predict = clf.predict(features[test])

# obtain the classification accuracy on the test data
acc = accuracy_score(y[test], y_predict)
correct = correct + acc

# output the average classification accuracy over all 10 folds
print 'Accuracy:', float(correct)/10

if __name__ == '__main__':
main()
44 changes: 44 additions & 0 deletions skfeature/example/test_MCFS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import scipy.io
from skfeature.function.sparse_learning_based import MCFS
from skfeature.utility import construct_W
from skfeature.utility import unsupervised_evaluation


def main():
# load data
mat = scipy.io.loadmat('../data/COIL20.mat')
X = mat['X'] # data
X = X.astype(float)
y = mat['Y'] # label
y = y[:, 0]

# construct affinity matrix
kwargs = {"metric": "euclidean", "neighborMode": "knn", "weightMode": "heatKernel", "k": 5, 't': 1}
W = construct_W.construct_W(X, **kwargs)

num_fea = 100 # specify the number of selected features
num_cluster = 20 # specify the number of clusters, it is usually set as the number of classes in the ground truth

# obtain the feature weight matrix
Weight = MCFS.mcfs(X, n_selected_features=num_fea, W=W, n_clusters=20)

# sort the feature scores in an ascending order according to the feature scores
idx = MCFS.feature_ranking(Weight)

# obtain the dataset on the selected features
selected_features = X[:, idx[0:num_fea]]

# perform kmeans clustering based on the selected features and repeats 20 times
nmi_total = 0
acc_total = 0
for i in range(0, 20):
nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y)
nmi_total += nmi
acc_total += acc

# output the average NMI and average ACC
print 'NMI:', float(nmi_total)/20
print 'ACC:', float(acc_total)/20

if __name__ == '__main__':
main()
Loading

0 comments on commit 28e1f6a

Please sign in to comment.