1
+ import pandas as pd
2
+ from sklearn .ensemble import RandomForestClassifier
3
+ from sklearn .ensemble import GradientBoostingClassifier
4
+ from sklearn .ensemble import BaggingClassifier
5
+ from sklearn .ensemble import AdaBoostClassifier
6
+ from sklearn .ensemble import ExtraTreesClassifier
7
+ from sklearn .neural_network import MLPClassifier
8
+ from sklearn .naive_bayes import GaussianNB
9
+ from sklearn .naive_bayes import BernoulliNB
10
+ from sklearn .naive_bayes import MultinomialNB
11
+ from sklearn import metrics
12
+ from sklearn .preprocessing import LabelEncoder
13
+ from sklearn .preprocessing import Imputer
14
+ from sklearn .model_selection import train_test_split
15
+
16
+
17
+ def get_naive_bayes_models ():
18
+ gnb = GaussianNB ()
19
+ mnb = MultinomialNB ()
20
+ bnb = BernoulliNB ()
21
+ classifier_list = [gnb ,mnb ,bnb ]
22
+ classifier_name_list = ['Gaussian NB' ,'Multinomial NB' ,'Bernoulli NB' ]
23
+ return classifier_list ,classifier_name_list
24
+
25
+
26
+ def get_neural_network (hidden_layer_size = 50 ):
27
+ mlp = MLPClassifier (hidden_layer_sizes = hidden_layer_size )
28
+ return [mlp ], ['MultiLayer Perceptron' ]
29
+
30
+
31
+ def get_ensemble_models ():
32
+ rf = RandomForestClassifier (n_estimators = 51 ,min_samples_leaf = 5 ,min_samples_split = 3 )
33
+ bagg = BaggingClassifier (n_estimators = 71 ,random_state = 42 )
34
+ extra = ExtraTreesClassifier (n_estimators = 57 ,random_state = 42 )
35
+ ada = AdaBoostClassifier (n_estimators = 51 ,random_state = 42 )
36
+ grad = GradientBoostingClassifier (n_estimators = 101 ,random_state = 42 )
37
+ classifier_list = [rf ,bagg ,extra ,ada ,grad ]
38
+ classifier_name_list = ['Random Forests' ,'Bagging' ,'Extra Trees' ,'AdaBoost' ,'Gradient Boost' ]
39
+ return classifier_list ,classifier_name_list
40
+
41
+
42
+ def label_encode_frame (dataframe ):
43
+ columns = dataframe .columns
44
+ encoder = LabelEncoder ()
45
+ for column in columns :
46
+ if type (dataframe [column ][0 ]) is str :
47
+ dataframe [column ] = encoder .fit_transform (dataframe [column ].values )
48
+ return dataframe
49
+
50
+
51
+ def print_evaluation_metrics (trained_model ,trained_model_name ,X_test ,y_test ):
52
+ print '--------- For Model : ' , trained_model_name
53
+ predicted_values = trained_model .predict (X_test )
54
+ print metrics .classification_report (y_test ,predicted_values )
55
+ print "Accuracy Score : " ,metrics .accuracy_score (y_test ,predicted_values )
56
+ print "---------------------------------------\n "
57
+
58
+
59
+ filename = 'train.csv'
60
+ shelter_frame = pd .read_csv (filename )
61
+ target_variable = 'OutcomeType'
62
+ columns_to_drop = ['AnimalID' ,'Name' ,'DateTime' ,'OutcomeType' ,'OutcomeSubtype' ]
63
+ class_labels = list (shelter_frame [target_variable ].values )
64
+ shelter_frame .drop (columns_to_drop ,axis = 1 ,inplace = True )
65
+ encoded_frame = label_encode_frame (shelter_frame )
66
+ X_train ,X_test ,y_train ,y_test = train_test_split (encoded_frame ,class_labels ,test_size = 0.2 ,random_state = 42 )
67
+ classifier_list ,classifier_name_list = get_ensemble_models ()
68
+ for classifier ,classifier_name in zip (classifier_list ,classifier_name_list ):
69
+ classifier .fit (X_train ,y_train )
70
+ print_evaluation_metrics (classifier ,classifier_name ,X_test ,y_test )
0 commit comments