forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/0xdata/h2o
- Loading branch information
Showing
9 changed files
with
1,598 additions
and
1,432 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
import unittest, random, sys, time | ||
sys.path.extend(['.','..','py']) | ||
import h2o, h2o_cmd, h2o_rf as h2o_rf, h2o_hosts, h2o_import as h2i, h2o_exec, h2o_jobs, h2o_gbm | ||
|
||
paramDict = { | ||
'response': 'C55', | ||
'cols': None, | ||
# 'ignored_cols_by_name': 'C1,C2,C6,C7,C8', | ||
'ignored_cols_by_name': None, | ||
'classification': 1, | ||
'validation': None, | ||
# fail case | ||
# 'ntrees': 1, | ||
# 'max_depth': 30, | ||
# 'nbins': 100, | ||
'ntrees': 10, | ||
'max_depth': 20, | ||
|
||
'min_rows': 1, # normally 1 for classification, 5 for regression | ||
'nbins': 200, | ||
'mtries': None, | ||
'sample_rate': 0.66, | ||
'importance': 0, | ||
'seed': None, | ||
} | ||
|
||
DO_OOBE = False | ||
# TRY = 'max_depth' | ||
# TRY = 'ntrees' | ||
TRY = 'nbins' | ||
|
||
class Basic(unittest.TestCase): | ||
def tearDown(self): | ||
h2o.check_sandbox_for_errors() | ||
|
||
@classmethod | ||
def setUpClass(cls): | ||
global localhost | ||
localhost = h2o.decide_if_localhost() | ||
if (localhost): | ||
h2o.build_cloud(1, java_heap_GB=4) | ||
else: | ||
h2o_hosts.build_cloud_with_hosts() | ||
|
||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
h2o.tear_down_cloud() | ||
|
||
def test_export_import(self): | ||
SYNDATASETS_DIR = h2o.make_syn_dir() | ||
h2o.beta_features = True # fvec | ||
importFolderPath = "standard" | ||
|
||
# Parse Train ****************************************************** | ||
csvTrainFilename = 'covtype.shuffled.90pct.data' | ||
csvTrainPathname = importFolderPath + "/" + csvTrainFilename | ||
trainKey = csvTrainFilename + ".hex" | ||
parseTrainResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvTrainPathname, hex_key=trainKey, | ||
timeoutSecs=180, doSummary=False) | ||
inspect = h2o_cmd.runInspect(None, trainKey) | ||
|
||
# Parse Test ****************************************************** | ||
csvTestFilename = 'covtype.shuffled.10pct.data' | ||
csvTestPathname = importFolderPath + "/" + csvTestFilename | ||
testKey = csvTestFilename + ".hex" | ||
parseTestResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvTestPathname, hex_key=testKey, | ||
timeoutSecs=180) | ||
inspect = h2o_cmd.runInspect(None, testKey) | ||
|
||
|
||
trial = 0 | ||
ntreesList = [5, 10, 20, 30] | ||
# ntreesList = [2] | ||
nbinsList = [10, 100, 1000] | ||
|
||
if TRY == 'max_depth': | ||
tryList = depthList | ||
elif TRY == 'ntrees': | ||
tryList = ntreesList | ||
elif TRY == 'nbins': | ||
tryList = nbinsList | ||
else: | ||
raise Exception("huh? %s" % TRY) | ||
|
||
for d in tryList: | ||
if TRY == 'max_depth': | ||
paramDict['max_depth'] = d | ||
elif TRY == 'ntrees': | ||
paramDict['ntrees'] = d | ||
elif TRY == 'nbins': | ||
paramDict['nbins'] = d | ||
else: | ||
raise Exception("huh? %s" % TRY) | ||
|
||
# adjust timeoutSecs with the number of trees | ||
# seems ec2 can be really slow | ||
if DO_OOBE: | ||
paramDict['validation'] = None | ||
else: | ||
paramDict['validation'] = parseTestResult['destination_key'] | ||
|
||
timeoutSecs = 30 + paramDict['ntrees'] * 200 | ||
|
||
|
||
# do ten starts, to see the bad id problem? | ||
trial += 1 | ||
kwargs = paramDict.copy() | ||
modelKey = 'RFModel_' + str(trial) | ||
kwargs['destination_key'] = modelKey | ||
|
||
start = time.time() | ||
rfResult = h2o_cmd.runRF(parseResult=parseTrainResult, timeoutSecs=timeoutSecs, **kwargs) | ||
trainElapsed = time.time() - start | ||
print 'rf train end on', csvTrainPathname, 'took', trainElapsed, 'seconds' | ||
|
||
h2o.nodes[0].export_files(src_key=testKey, path=SYNDATASETS_DIR + "/" + testKey, force=1) | ||
h2o.nodes[0].export_files(src_key=trainKey, path=SYNDATASETS_DIR + "/" + trainKey, force=1) | ||
# h2o.nodes[0].export_files(src_key=modelKey, path=SYNDATASETS_DIR + "/" + modelKey, force=1) | ||
|
||
|
||
rf_model = rfResult['drf_model'] | ||
cms = rf_model['cms'] | ||
### print "cm:", h2o.dump_json(cm) | ||
ntrees = rf_model['N'] | ||
errs = rf_model['errs'] | ||
N = rf_model['N'] | ||
varimp = rf_model['varimp'] | ||
treeStats = rf_model['treeStats'] | ||
|
||
print "maxDepth:", treeStats['maxDepth'] | ||
print "maxLeaves:", treeStats['maxLeaves'] | ||
print "minDepth:", treeStats['minDepth'] | ||
print "minLeaves:", treeStats['minLeaves'] | ||
print "meanLeaves:", treeStats['meanLeaves'] | ||
print "meanDepth:", treeStats['meanDepth'] | ||
print "errs[0]:", errs[0] | ||
print "errs[-1]:", errs[-1] | ||
print "errs:", errs | ||
|
||
(classification_error, classErrorPctList, totalScores) = h2o_rf.simpleCheckRFView(rfv=rfResult) | ||
print "classErrorPctList:", classErrorPctList | ||
self.assertEqual(len(classErrorPctList), 7, "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict") | ||
# FIX! should update this expected classification error | ||
predict = h2o.nodes[0].generate_predictions(model_key=modelKey, data_key=testKey) | ||
|
||
if __name__ == '__main__': | ||
h2o.unit_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,17 @@ | ||
package water.api; | ||
|
||
import water.AbstractBuildVersion; | ||
import water.H2O; | ||
|
||
/** | ||
* Redirect to online documentation page. | ||
*/ | ||
public class Documentation extends HTMLOnlyRequest { | ||
protected String build(Response response) { | ||
return "<meta http-equiv=\"refresh\" content=\"0; url=http://docs.0xdata.com/\">"; | ||
AbstractBuildVersion abv = H2O.getBuildVersion(); | ||
String branchName = abv.branchName(); | ||
String buildNumber = abv.buildNumber(); | ||
String url = "http://s3.amazonaws.com/h2o-release/h2o/" + branchName + "/" + buildNumber + "/docs-website/index.html"; | ||
return "<meta http-equiv=\"refresh\" content=\"0; url=" + url + "\">"; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters