-
Notifications
You must be signed in to change notification settings - Fork 5
/
produceModelsStatsandSubmission.py
65 lines (47 loc) · 2.14 KB
/
produceModelsStatsandSubmission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import raop.pipeline as pipeline
import raop.model.ml as model
from sklearn.externals import joblib
import numpy as np
import raop.helper as helper
import os
def buildModels(classifier, modelName, directoryName, modelOutpath, description, additionalFeaturesList, trainFile, testFile):
#fetch features and requestor results (i.e. X's and Y's),
#params = (file, train = 0 test = 1, additonal features to include)
print 'Extracting Features....'
features, pizzas = pipeline.getFeatures(trainFile,0,additionalFeaturesList)
#normalize the feature set
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(features)
features = scaler.transform(features)
#PRODUCE MODEL and EVAL METRICS
print 'Building Model and Evaluating Results....'
pipeline.modelPipeline(classifier, features, pizzas, modelOutpath, modelName, directoryName, description)
#PREPARE SUBMISSION FILE
#path and filename info
print 'Loading specificied model and extracting features on unseen data....'
inputModelFileName = modelOutpath + directoryName + '/' + modelName
model = joblib.load(inputModelFileName)
#fetch features (i.e. X's )
#params = (file, train = 0 test = 1, additonal features to include)
features = pipeline.getFeatures(testFile,1,additionalFeaturesList)
listofID = []
testdata = helper.loadJSONfromFile(testFile)
for item in testdata:
listofID.append(item["request_id"])
#scale the features
features = scaler.transform(features)
#get results predcitions and convert to list
print 'Model is making predictions...'
results = model.predict(features)
results_list=results.tolist()
#write results and user details to submission file
outputFile=open(inputModelFileName + '-KaggleSubmission.csv','w')
outputFile.write("request_id,requester_received_pizza\n")
for counter,id in enumerate(listofID):
outputFile.write(id+",")
if results_list[counter]:
outputFile.write('1\n')
else:
outputFile.write('0\n')
outputFile.close()
print 'File is ready for submission to Kaggle...'