Skip to content

Commit

Permalink
added multiple traning algorithm for multi-class classification
Browse files Browse the repository at this point in the history
  • Loading branch information
marwankefah committed Jun 12, 2022
1 parent 4f93559 commit 0de2f7c
Showing 1 changed file with 81 additions and 35 deletions.
116 changes: 81 additions & 35 deletions feature/training.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from sklearn import svm
from sklearn.decomposition._pca import PCA
from sklearn.ensemble._forest import RandomForestClassifier
from sklearn.neighbors._classification import KNeighborsClassifier

import feature.helper as hf
import torch
from pathlib import Path
Expand All @@ -7,24 +12,68 @@
import pickle as pkl
from settings import model_path
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

train_transforms = hf.defineTransforms()
test_transforms = hf.defineTransformstest()
# Path To Dataset
data_path = Path("data/cropped")
test_path = Path("data/cropped_test/3/")
feat_path = Path("gabor_index.csv")
modelpath = Path("model/checkpoint.pth")

test_path = Path("data/cropped_test/1/")
feat_path = Path("gabor_index_1000.csv")
root_output = Path("data/ML_Results")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

batch_size = 256
max_epoch = 100
max_iter = int(30000 / batch_size * max_epoch)
n_iter_no_change = 30

trainloader, testloader = hf.getProcessedData(
data_path, feat_path, 15000, 15000, transform=train_transforms)

gold_standard_test_loader = hf.getProcessedDataTest(
test_path, feat_path, 1000, transform=test_transforms)

names = [
"Nearest Neighbors",
"Linear SVM",
"RBF SVM",
"Random Forest",
"Neural Net",
"AdaBoost",
"Naive Bayes",
"QDA"
]

classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.025),
SVC(),

RandomForestClassifier(max_depth=2, n_estimators=128),

MLPClassifier(solver='adam', alpha=0.0001,
hidden_layer_sizes=(128, 64),
learning_rate_init=0.0001, random_state=1,
batch_size=128, verbose=True, max_iter=max_iter,
early_stopping=True, n_iter_no_change=n_iter_no_change),
AdaBoostClassifier(),
GaussianNB(),
QuadraticDiscriminantAnalysis(),

]


def roc_auc_score_multiclass(actual_class, pred_class, average="macro"):
# creating a set of all the unique classes using the actual class list
Expand All @@ -46,57 +95,54 @@ def roc_auc_score_multiclass(actual_class, pred_class, average="macro"):
return roc_auc_dict


batch_size = 256
max_epoch = 100
max_iter = int(30000 / batch_size * max_epoch)
n_iter_no_change = 50

for train_images, train_labels in trainloader:
train_images = train_images.numpy()
train_labels = train_labels.numpy()
# model = SVC(probability=True)
# pca.fit(train_images)
# train_images = pca.transform(train_images)

for images, labels in testloader:
val_images = images.numpy()
val_labels = labels.numpy()

model = MLPClassifier(solver='adam', alpha=1e-5,
hidden_layer_sizes=(64, 128, 256, 128, 64),
learning_rate_init=0.0001, random_state=1,
batch_size=128, verbose=True, max_iter=max_iter,
early_stopping=True, n_iter_no_change=n_iter_no_change)
for images, labels in gold_standard_test_loader:
test_images = images.numpy()
test_labels = labels.numpy()

model.fit(train_images, train_labels)
pred = model.predict(train_images)
for name, clf in zip(names, classifiers):
print(name)
clf.fit(train_images, train_labels)
pred = clf.predict(train_images)
train_auc = roc_auc_score_multiclass(
train_labels, pred)
train_metrics = pd.DataFrame(classification_report(
train_labels, pred, output_dict=True)).reset_index()
train_metrics.loc[train_metrics.shape[0]] = [
"ROC", train_auc[0], train_auc[1], train_auc[2], "", "", ""]
print(train_metrics)
train_metrics.to_csv("data/output/train_metrics.csv")

for images, labels in testloader:
images = images.numpy()
labels = labels.numpy()
val_pred = model.predict(images)
train_metrics.to_csv("data/ML_Results/" + name + "_train_metrics.csv")

val_pred = clf.predict(val_images)
val_auc = roc_auc_score_multiclass(
labels, val_pred)
val_labels, val_pred)
val_metrics = pd.DataFrame(classification_report(
labels, val_pred, output_dict=True)).reset_index()
val_labels, val_pred, output_dict=True)).reset_index()
val_metrics.loc[val_metrics.shape[0]] = [
"ROC", val_auc[0], val_auc[1], val_auc[2], "", "", ""]
print(val_metrics)
val_metrics.to_csv("data/output/val_metrics.csv")
val_metrics.to_csv("data/ML_Results/" + name + "_val_metrics.csv")

for images, labels in gold_standard_test_loader:
images = images.numpy()
labels = labels.numpy()
test_pred = model.predict(images)
test_auc = roc_auc_score_multiclass(labels, test_pred)
test_pred = clf.predict(test_images)

test_auc = roc_auc_score_multiclass(test_labels, test_pred)
test_metrics = pd.DataFrame(classification_report(
labels, test_pred, output_dict=True)).reset_index()
test_labels, test_pred, output_dict=True)).reset_index()
test_metrics.loc[test_metrics.shape[0]] = [
"ROC", test_auc[0], test_auc[1], test_auc[2], "", "", ""]
print(test_metrics)
test_metrics.to_csv("data/output/test_metrics.csv")
test_metrics.to_csv("data/ML_Results/" + name + "_test_metrics.csv")

modelpath = root_output / Path("model_" + name + "_.pkl")

fileObject = open(model_path, 'wb')
pkl.dump(model, fileObject)
fileObject = open(modelpath, 'wb')
pkl.dump(clf, fileObject)

0 comments on commit 0de2f7c

Please sign in to comment.