Skip to content

Commit

Permalink
did anything change in treeStats? add more debug
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Normoyle committed Oct 19, 2014
1 parent c3ba0b2 commit 6517700
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 62 deletions.
2 changes: 2 additions & 0 deletions py/h2o_rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def simpleCheckRFView(node=None, rfv=None, checkScoringOnly=False, noPrint=False

varimp = rf_model['varimp']
treeStats = rf_model['treeStats']
if not treeStats:
raise Exception("treeStats not right?: %s" % h2o.dump_json(treestats))
# print "json:", h2o.dump_json(rfv)
data_key = rf_model['_dataKey']
model_key = rf_model['_key']
Expand Down
124 changes: 62 additions & 62 deletions py/testdir_single_jvm/test_speedrf_covtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
'seed': None,
}

DO_OOBE = False
DO_PLOT = False
DO_OOBE = True
DO_PLOT = True
# TRY = 'max_depth'
# TRY = 'ntrees'
TRY = 'nbins'
TRY = 'ntrees'
# TRY = 'nbins'


class Basic(unittest.TestCase):
Expand Down Expand Up @@ -120,71 +120,71 @@ def test_speedrf_covtype_fvec(self):

start = time.time()
rfResult = h2o_cmd.runSpeeDRF(parseResult=parseTrainResult, timeoutSecs=timeoutSecs,
noPoll=True, **kwargs)
noPoll=True, **kwargs)
trainElapsed = time.time() - start
print 'rf train end', i, 'on', csvTrainPathname, 'took', trainElapsed, 'seconds'

# don't cancel the last one
#if not lastOne:
# time.sleep(1)
# h2o_jobs.cancelAllJobs(timeoutSecs=2)
h2o_jobs.cancelAllJobs(timeoutSecs=2)
if not lastOne:
time.sleep(1)
h2o_jobs.cancelAllJobs(timeoutSecs=2)
# h2o_jobs.cancelAllJobs(timeoutSecs=2)



#### print "rfView", h2o.dump_json(rfView)
##print "We have a result from the RF above, completed but didn't do RFView yet"
## could the RF indicate 'done' too soon?
## if rfResult['state']=='RUNNING':
## raise Exception("Why is this RF still in RUNNING state? %s" % h2o.dump_json(rfResult))

## if 'drf_model' not in rfResult:
## raise Exception("How come there's no drf_model in this RF result? %s" % h2o.dump_json(rfResult))
#h2o_jobs.pollWaitJobs(timeoutSecs=300)
#rfView = h2o_cmd.runSpeeDRFView(None, model_key, timeoutSecs=60)
#print "rfView:", h2o.dump_json(rfView)

#rfView["drf_model"] = rfView.pop("speedrf_model")
#rf_model = rfView['drf_model']
#cms = rf_model['cms']
#### print "cm:", h2o.dump_json(cm)
#ntrees = rf_model['N']
#errs = rf_model['errs']
#N = rf_model['N']
#varimp = rf_model['varimp']
#treeStats = rf_model['treeStats']

#print "maxDepth:", treeStats['maxDepth']
#print "maxLeaves:", treeStats['maxLeaves']
#print "minDepth:", treeStats['minDepth']
#print "minLeaves:", treeStats['minLeaves']
#print "meanLeaves:", treeStats['meanLeaves']
#print "meanDepth:", treeStats['meanDepth']
#print "errs[0]:", errs[0]
#print "errs[-1]:", errs[-1]
#print "errs:", errs

#(classification_error, classErrorPctList, totalScores) = h2o_rf.simpleCheckRFView(rfv=rfView)
## we iterate over params, so can't really do this check
## self.assertAlmostEqual(classification_error, 0.03, delta=0.5, msg="Classification error %s differs too much" % classification_error)

#print "classErrorPctList:", classErrorPctList
#self.assertEqual(len(classErrorPctList), 7, "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict")
## FIX! should update this expected classification error
#predict = h2o.nodes[0].generate_predictions(model_key=model_key, data_key=data_key)

#eList.append(classErrorPctList[4])
#fList.append(trainElapsed)
#if DO_PLOT:
# if TRY == 'max_depth':
# xLabel = 'max_depth'
# elif TRY == 'ntrees':
# xLabel = 'ntrees'
# elif TRY == 'nbins':
# xLabel = 'nbins'
# else:
# raise Exception("huh? %s" % TRY)
# xList.append(paramDict[xLabel])
### print "rfView", h2o.dump_json(rfView)
#print "We have a result from the RF above, completed but didn't do RFView yet"
# could the RF indicate 'done' too soon?
# if rfResult['state']=='RUNNING':
# raise Exception("Why is this RF still in RUNNING state? %s" % h2o.dump_json(rfResult))

# if 'drf_model' not in rfResult:
# raise Exception("How come there's no drf_model in this RF result? %s" % h2o.dump_json(rfResult))
h2o_jobs.pollWaitJobs(timeoutSecs=300)
rfView = h2o_cmd.runSpeeDRFView(None, model_key, timeoutSecs=60)
print "rfView:", h2o.dump_json(rfView)

rfView["drf_model"] = rfView.pop("speedrf_model")
rf_model = rfView['drf_model']
cms = rf_model['cms']
### print "cm:", h2o.dump_json(cm)
ntrees = rf_model['N']
errs = rf_model['errs']
N = rf_model['N']
varimp = rf_model['varimp']
treeStats = rf_model['treeStats']

print "maxDepth:", treeStats['maxDepth']
print "maxLeaves:", treeStats['maxLeaves']
print "minDepth:", treeStats['minDepth']
print "minLeaves:", treeStats['minLeaves']
print "meanLeaves:", treeStats['meanLeaves']
print "meanDepth:", treeStats['meanDepth']
print "errs[0]:", errs[0]
print "errs[-1]:", errs[-1]
print "errs:", errs

(classification_error, classErrorPctList, totalScores) = h2o_rf.simpleCheckRFView(rfv=rfView)
# we iterate over params, so can't really do this check
# self.assertAlmostEqual(classification_error, 0.03, delta=0.5, msg="Classification error %s differs too much" % classification_error)

print "classErrorPctList:", classErrorPctList
self.assertEqual(len(classErrorPctList), 7, "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict")
# FIX! should update this expected classification error
predict = h2o.nodes[0].generate_predictions(model_key=model_key, data_key=data_key)

eList.append(classErrorPctList[4])
fList.append(trainElapsed)
if DO_PLOT:
if TRY == 'max_depth':
xLabel = 'max_depth'
elif TRY == 'ntrees':
xLabel = 'ntrees'
elif TRY == 'nbins':
xLabel = 'nbins'
else:
raise Exception("huh? %s" % TRY)
xList.append(paramDict[xLabel])

if DO_PLOT:
eLabel = 'class 4 pctWrong'
Expand Down

0 comments on commit 6517700

Please sign in to comment.