Skip to content

Commit 8557fc2

Browse files
author
Rupak Chakraborty
committedNov 1, 2017
Added Shelter Outcome
1 parent 22a8ea1 commit 8557fc2

File tree

3 files changed

+70
-0
lines changed

3 files changed

+70
-0
lines changed
 
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import pandas as pd
2+
from sklearn.ensemble import RandomForestClassifier
3+
from sklearn.ensemble import GradientBoostingClassifier
4+
from sklearn.ensemble import BaggingClassifier
5+
from sklearn.ensemble import AdaBoostClassifier
6+
from sklearn.ensemble import ExtraTreesClassifier
7+
from sklearn.neural_network import MLPClassifier
8+
from sklearn.naive_bayes import GaussianNB
9+
from sklearn.naive_bayes import BernoulliNB
10+
from sklearn.naive_bayes import MultinomialNB
11+
from sklearn import metrics
12+
from sklearn.preprocessing import LabelEncoder
13+
from sklearn.preprocessing import Imputer
14+
from sklearn.model_selection import train_test_split
15+
16+
17+
def get_naive_bayes_models():
18+
gnb = GaussianNB()
19+
mnb = MultinomialNB()
20+
bnb = BernoulliNB()
21+
classifier_list = [gnb,mnb,bnb]
22+
classifier_name_list = ['Gaussian NB','Multinomial NB','Bernoulli NB']
23+
return classifier_list,classifier_name_list
24+
25+
26+
def get_neural_network(hidden_layer_size=50):
27+
mlp = MLPClassifier(hidden_layer_sizes=hidden_layer_size)
28+
return [mlp], ['MultiLayer Perceptron']
29+
30+
31+
def get_ensemble_models():
32+
rf = RandomForestClassifier(n_estimators=51,min_samples_leaf=5,min_samples_split=3)
33+
bagg = BaggingClassifier(n_estimators=71,random_state=42)
34+
extra = ExtraTreesClassifier(n_estimators=57,random_state=42)
35+
ada = AdaBoostClassifier(n_estimators=51,random_state=42)
36+
grad = GradientBoostingClassifier(n_estimators=101,random_state=42)
37+
classifier_list = [rf,bagg,extra,ada,grad]
38+
classifier_name_list = ['Random Forests','Bagging','Extra Trees','AdaBoost','Gradient Boost']
39+
return classifier_list,classifier_name_list
40+
41+
42+
def label_encode_frame(dataframe):
43+
columns = dataframe.columns
44+
encoder = LabelEncoder()
45+
for column in columns:
46+
if type(dataframe[column][0]) is str:
47+
dataframe[column] = encoder.fit_transform(dataframe[column].values)
48+
return dataframe
49+
50+
51+
def print_evaluation_metrics(trained_model,trained_model_name,X_test,y_test):
52+
print '--------- For Model : ', trained_model_name
53+
predicted_values = trained_model.predict(X_test)
54+
print metrics.classification_report(y_test,predicted_values)
55+
print "Accuracy Score : ",metrics.accuracy_score(y_test,predicted_values)
56+
print "---------------------------------------\n"
57+
58+
59+
filename = 'train.csv'
60+
shelter_frame = pd.read_csv(filename)
61+
target_variable = 'OutcomeType'
62+
columns_to_drop = ['AnimalID','Name','DateTime','OutcomeType','OutcomeSubtype']
63+
class_labels = list(shelter_frame[target_variable].values)
64+
shelter_frame.drop(columns_to_drop,axis=1,inplace=True)
65+
encoded_frame = label_encode_frame(shelter_frame)
66+
X_train,X_test,y_train,y_test = train_test_split(encoded_frame,class_labels,test_size=0.2,random_state=42)
67+
classifier_list,classifier_name_list = get_ensemble_models()
68+
for classifier,classifier_name in zip(classifier_list,classifier_name_list):
69+
classifier.fit(X_train,y_train)
70+
print_evaluation_metrics(classifier,classifier_name,X_test,y_test)

0 commit comments

Comments
 (0)