4
4
sys .path .extend (['.' ,'..' ])
5
5
import h2o_cmd , h2o , h2o_hosts , h2o_browse as h2b , h2o_import as h2i , h2o_rf , h2o_jobs
6
6
7
- csv_header = ('h2o_build' ,'nMachines' ,'nJVMs' ,'Xmx/JVM' ,'dataset' ,'nTrainRows' ,'nTestRows' ,'nCols' ,'trainParseWallTime' ,'classification' ,'gbmBuildTime' ,'Error' )
7
+ csv_header = ('h2o_build' ,'nMachines' ,'nJVMs' ,'Xmx/JVM' ,'dataset' ,'nTrainRows' ,'nTestRows' ,'nCols' ,'trainParseWallTime' ,'nTrees' , 'minRows' , 'maxDepth' , 'learnRate' , ' classification' ,'gbmBuildTime' ,'Error' )
8
8
9
9
files = {'Airlines' : {'train' : ('AirlinesTrain1x' , 'AirlinesTrain10x' , 'AirlinesTrain100x' ), 'test' : 'AirlinesTest' },
10
10
'AllBedrooms' : {'train' : ('AllBedroomsTrain1x' , 'AllBedroomsTrain10x' , 'AllBedroomsTrain100x' ), 'test' : 'AllBedroomsTest' },
13
13
build = ""
14
14
debug = False
15
15
def doGBM (f , folderPath , ignored_cols , classification , testFilehex , ntrees , depth , minrows , nbins , learnRate , response , row ):
16
- # debug = False
16
+ debug = False
17
17
bench = "bench"
18
18
if debug :
19
19
print "Doing GBM DEBUG"
@@ -52,18 +52,18 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
52
52
header = 1 ,
53
53
header_from_file = headerKey ,
54
54
separator = 44 ,
55
- timeoutSecs = 7200 ,
55
+ timeoutSecs = 16000 ,
56
56
retryDelaySecs = 5 ,
57
- pollTimeoutSecs = 7200 ,
57
+ pollTimeoutSecs = 16000 ,
58
58
noPoll = True ,
59
59
doSummary = False
60
60
)
61
- h2o_jobs .pollWaitJobs (timeoutSecs = 7200 , pollTimeoutSecs = 7200 , retryDelaySecs = 5 )
61
+ h2o_jobs .pollWaitJobs (timeoutSecs = 16000 , pollTimeoutSecs = 16000 , retryDelaySecs = 5 )
62
62
parseWallTime = time .time () - trainParseWallStart
63
63
print "Parsing training file took " , parseWallTime ," seconds."
64
64
h2o .beta_features = False #make sure false for the inspect as well!
65
- inspect_train = h2o .nodes [0 ].inspect (hex_key , timeoutSecs = 7200 )
66
- inspect_test = h2o .nodes [0 ].inspect (testFilehex , timeoutSecs = 7200 )
65
+ inspect_train = h2o .nodes [0 ].inspect (hex_key , timeoutSecs = 16000 )
66
+ inspect_test = h2o .nodes [0 ].inspect (testFilehex , timeoutSecs = 16000 )
67
67
h2o .beta_features = True #ok, can be true again
68
68
nMachines = 1 if len (h2o_hosts .hosts ) is 0 else len (h2o_hosts .hosts )
69
69
row .update ( {'h2o_build' : build ,
@@ -75,6 +75,10 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
75
75
'nTestRows' : inspect_test ['num_rows' ],
76
76
'nCols' : inspect_train ['num_cols' ],
77
77
'trainParseWallTime' : parseWallTime ,
78
+ 'nTrees' : ntrees ,
79
+ 'minRows' : minrows ,
80
+ 'maxDepth' : depth ,
81
+ 'learnRate' : learnRate ,
78
82
'classification' : classification ,
79
83
})
80
84
@@ -95,7 +99,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
95
99
gbmStart = time .time ()
96
100
#TODO(spencer): Uses jobs to poll for gbm completion
97
101
gbm = h2o_cmd .runGBM (parseResult = parseResult , noPoll = True , timeoutSecs = 4800 , ** kwargs )
98
- h2o_jobs .pollWaitJobs (timeoutSecs = 7200 , pollTimeoutSecs = 120 , retryDelaySecs = 5 )
102
+ h2o_jobs .pollWaitJobs (timeoutSecs = 16000 , pollTimeoutSecs = 120 , retryDelaySecs = 5 )
99
103
gbmTime = time .time () - gbmStart
100
104
row .update ( {'gbmBuildTime' : gbmTime ,
101
105
})
@@ -119,6 +123,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
119
123
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'
120
124
bench = "bench"
121
125
h2o .beta_features = True
126
+ debug = False
122
127
if debug :
123
128
bench = "bench/debug"
124
129
@@ -137,7 +142,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
137
142
h2i .import_only (bucket = 'home-0xdiag-datasets' , path = headerPathname )
138
143
headerKey = h2i .find_key (hK )
139
144
testFile = h2i .import_parse (bucket = 'home-0xdiag-datasets' , path = bench + '/Airlines/AirlinesTest.csv' , schema = 'local' , hex_key = "atest.hex" , header = 1 , header_from_file = headerKey , separator = 44 , noPoll = True ,doSummary = False )
140
- h2o_jobs .pollWaitJobs (timeoutSecs = 7200 , pollTimeoutSecs = 7200 , retryDelaySecs = 5 )
145
+ h2o_jobs .pollWaitJobs (timeoutSecs = 16000 , pollTimeoutSecs = 16000 , retryDelaySecs = 5 )
141
146
elapsedAirlinesTestParse = time .time () - airlinesTestParseStart
142
147
row = {'testParseWallTime' : elapsedAirlinesTestParse }
143
148
response = 'IsDepDelayed'
@@ -163,7 +168,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
163
168
h2i .import_only (bucket = 'home-0xdiag-datasets' , path = headerPathname )
164
169
headerKey = h2i .find_key (hK )
165
170
testFile = h2i .import_parse (bucket = 'home-0xdiag-datasets' , path = bench + '/AllBedrooms/AllBedroomsTest.csv' , schema = 'local' , hex_key = "allBTest.hex" , header = 1 , header_from_file = headerKey , separator = 44 ,noPoll = True ,doSummary = False )
166
- h2o_jobs .pollWaitJobs (timeoutSecs = 7200 , pollTimeoutSecs = 7200 , retryDelaySecs = 5 )
171
+ h2o_jobs .pollWaitJobs (timeoutSecs = 16000 , pollTimeoutSecs = 16000 , retryDelaySecs = 5 )
167
172
elapsedAllBedroomsTestParse = time .time () - allBedroomsTestParseStart
168
173
row = {'testParseWallTime' : elapsedAllBedroomsTestParse }
169
174
response = 'medrent'
@@ -188,7 +193,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
188
193
#h2i.import_only(bucket='home-0xdiag-datasets', path=headerPathname)
189
194
#headerKey = h2i.find_key(hK)
190
195
#testFile = h2i.import_parse(bucket='home-0xdiag-datasets', path=bench+'/CovType/CovTypeTest.csv', schema='local', hex_key="covTtest.hex", header=1, header_from_file=headerKey, separator=44, noPoll=True,doSummary=False)
191
- #h2o_jobs.pollWaitJobs(timeoutSecs=7200 , pollTimeoutSecs=7200 , retryDelaySecs=5)
196
+ #h2o_jobs.pollWaitJobs(timeoutSecs=16000 , pollTimeoutSecs=16000 , retryDelaySecs=5)
192
197
#elapsedCovTypeTestParse = time.time() - covTypeTestParseStart
193
198
#row = {'testParseWallTime' : elapsedCovTypeTestParse}
194
199
#response = 'C55'
0 commit comments