Skip to content

Commit

Permalink
conflict resolve merge
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffclick committed Sep 5, 2013
2 parents 628092c + fe2c27d commit 5b828fc
Show file tree
Hide file tree
Showing 12 changed files with 272 additions and 122 deletions.
10 changes: 6 additions & 4 deletions py/h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,7 +1383,7 @@ def random_forest_treeview(self, tree_number, data_key, model_key,
time.sleep(3) # to be able to see it
return a

def GBM(self, data_key, timeoutSecs=600, **kwargs):
def gbm(self, data_key, timeoutSecs=600, **kwargs):
params_dict = {
'destination_key':None,
'source':data_key,
Expand All @@ -1394,19 +1394,21 @@ def GBM(self, data_key, timeoutSecs=600, **kwargs):
'vresponse':None
}
params_dict.update(kwargs)
a = self.__do_json_request('GBM.json',timeout=timeoutSecs,params=params_dict)
a = self.__do_json_request('GBM.json',timeout=timeoutSecs,params=params_dict)
verboseprint("\nGBM result:", dump_json(a))
return a

def PCA(self, data_key, timeoutSecs=600, **kwargs):
def pca(self, data_key, timeoutSecs=600, **kwargs):
params_dict = {
'destination_key':None,
'key':None,
'key':data_key,
'ignore':None,
'tolerance':None,
'standardize':None
}
params_dict.update(kwargs)
a = self.__do_json_request('PCA.json',timeout=timeoutSecs,params=params_dict)
verboseprint("\npca result:", dump_json(a))
return a

def summary_page(self, key, max_column_display=1000, timeoutSecs=60, noPrint=True, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion py/testdir_multi_jvm/test_KMeans_sphere100.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# should do this, but does it make h2o kmeans fail?
SHUFFLE_SPHERES = True
R_NOISE = True
ALLOWED_CENTER_DELTA = 1
ALLOWED_CENTER_DELTA = 3

def get_xyz_sphere(R):
u = random.random() # 0 to 1
Expand Down
2 changes: 1 addition & 1 deletion py/testdir_single_jvm/test_GBM_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_GBM_mnist(self):
timeoutSecs = 1800
start = time.time()
node = h2o.nodes[0]
GBMResult = node.GBM(data_key=trainKey, **kwargs)
GBMResult = node.gbm(data_key=trainKey, **kwargs)
elapsed = time.time() - start
print "GBM completed in", elapsed, "seconds.", \
"%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
Expand Down
50 changes: 50 additions & 0 deletions py/testdir_single_jvm/test_PCA_UCIwine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest
import random, sys, time, re
sys.path.extend(['.','..','py'])

import h2o, h2o_cmd, h2o_hosts, h2o_browse as h2b, h2o_import2 as h2i, h2o_glm, h2o_util, h2o_rf
class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()

@classmethod
def setUpClass(cls):
h2o.build_cloud(1, java_heap_GB=8)

@classmethod
def tearDownClass(cls):
h2o.tear_down_cloud()

def test_PCA_UCIwine(self):
csvFilename = "wine.data"
timeoutSecs=180
trialStart = time.time()

# PARSE ****************************************
trainKey = csvFilename + "_" + ".hex"
start = time.time()
parseResult = h2i.import_parse(bucket='smalldata', path=csvFilename,
hex_key=trainKey, timeoutSecs=timeoutSecs)
elapsed = time.time() - start
print "parse end on ", csvFilename, 'took', elapsed, 'seconds',\
"%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
print "parse result:", parseResult['destination_key']

# PCA****************************************
params = {
'destination_key': "python_PCA_key",
'ignore':0,
'tolerance':0.0,
'standardize':1
}

kwargs = params.copy()
start = time.time()
node = h2o.nodes[0]
PCAResult = node.pca(data_key=trainKey, **kwargs)
elapsed = time.time() - start
print "PCA completed in", elapsed, "seconds.", \
"%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

if __name__ == '__main__':
h2o.unit_main()
28 changes: 19 additions & 9 deletions py/testdir_single_jvm/test_fp_many_cols.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
sys.path.extend(['.','..','py'])
import h2o, h2o_cmd, h2o_hosts, h2o_browse as h2b, h2o_import2 as h2i, h2o_exec as h2e

H2O_SUPPORTS_OVER_100K_COLS = False

print "Stress the # of cols with fp reals here."
print "Can pick fp format but will start with just the first (e0)"
def write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE, sel):
Expand Down Expand Up @@ -114,16 +116,24 @@ def test_many_cols_and_values_with_syn(self):
(100, 70000, 'cD', 30, 120),
(100, 90000, 'cE', 30, 120),
(100, 100000, 'cF', 30, 120),
(100, 200000, 'cG', 30, 120),
(100, 300000, 'cH', 30, 120),
(100, 400000, 'cI', 30, 120),
(100, 500000, 'cJ', 30, 120),
(100, 600000, 'cK', 30, 120),
(100, 700000, 'cL', 30, 120),
(100, 800000, 'cM', 30, 120),
(100, 900000, 'cN', 30, 120),
(100, 1000000, 'cO', 30, 120),
]

if not H2O_SUPPORTS_OVER_100K_COLS:
print "Restricting number of columns tested to 100,000"
else:
tryList = tryList + [
(100, 200000, 'cG', 30, 120),
(100, 300000, 'cH', 30, 120),
(100, 400000, 'cI', 30, 120),
(100, 500000, 'cJ', 30, 120),
(100, 600000, 'cK', 30, 120),
(100, 700000, 'cL', 30, 120),
(100, 800000, 'cM', 30, 120),
(100, 900000, 'cN', 30, 120),
(100, 1000000, 'cO', 30, 120),
]



for (rowCount, colCount, hex_key, timeoutSecs, timeoutSecs2) in tryList:
SEEDPERFILE = random.randint(0, sys.maxint)
Expand Down
Loading

0 comments on commit 5b828fc

Please sign in to comment.