Skip to content

Commit 2e63a8e

Browse files
committed
Merge branch 'master' of https://github.com/0xdata/h2o
2 parents 8899b95 + d3f33e0 commit 2e63a8e

File tree

9 files changed

+36
-55
lines changed

9 files changed

+36
-55
lines changed

R/h2oRClient-package/R/Internal.R

+5-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ h2o.__remoteSend <- function(client, page, ...) {
5252
#TODO (Spencer): Create "commands.log" using: list(...)
5353
# Sends the given arguments as URL arguments to the given page on the specified server
5454
url = paste("http://", ip, ":", port, "/", page, sep="")
55-
temp = postForm(url, style = "POST", ...)
55+
# temp = postForm(url, style = "POST", ...)
56+
if(length(list(...)) == 0)
57+
temp = getURLContent(url)
58+
else
59+
temp = getForm(url, ..., .checkParams = FALSE) # Some H2O params overlap with Curl params
5660
# after = gsub("NaN", "\"NaN\"", temp[1])
5761
after = gsub("\\\\\\\"NaN\\\\\\\"", "NaN", temp[1]) # TODO: Don't escape NaN in the JSON!
5862
after = gsub("NaN", "\"NaN\"", after)

bench/BMscripts/glm2Bench.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#GLM2 bench
2-
import os, sys, time, csv
2+
import os, sys, time, csv, re, requests
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -108,15 +108,27 @@ def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row
108108
row.update( {'AIC' : glmView['glm_model']['validation']['aic'],
109109
'nIterations' : glmView['glm_model']['iteration'],
110110
'nPredictors' : len(glmView['glm_model']['beta']),
111-
'AverageError' : glmView['glm_model']['validation']['avg_err'],
111+
#'AverageError' : glmView['glm_model']['validation']['avg_err'],
112112
})
113113
if family == "binomial":
114+
#Scrape html of 2/glmmodelview to get best threshold,
115+
#then, multiply by 100 and cast to int...
116+
#then ask for the coresponding CM from _cms inside glmView
117+
url = 'http://%s:%d/2/GLMModelView.html?_modelKey=%s' % (h2o.nodes[0].http_addr, 55555, 'GLM('+f+')')
118+
r = requests.get(url).text
119+
p1 = re.compile('threshold[:<>/a-z]*[0-9]\.[0-9]*')
120+
p2 = re.compile('[0-9]\.[0-9]*')
121+
best = int(float(p2.search(p1.search(text).group()).group()) * 100)
122+
best_cm = glmView['glm_model']['validation']['_cms'][best]['_arr']
123+
avg_err = (best_cm[0][1] + best_cm[1][0]) / (sum([i for sublist in best_cm for i in sublist]))
114124
row.update( {#'scoreTime' : scoreTime,
115125
'AUC' : glmView['glm_model']['validation']['auc'],
126+
'AverageError' : avg_err,
116127
})
117128
else:
118129
row.update( {#'scoreTime' : scoreTime,
119130
'AUC' : 'NA',
131+
'AverageError' : glmView['glm_model']['validation']['avg_err'],
120132
})
121133
csvWrt.writerow(row)
122134
finally:

py/h2o.py

-30
Original file line numberDiff line numberDiff line change
@@ -1190,36 +1190,6 @@ def kmeans(self, key, key2=None,
11901190
time.sleep(5)
11911191
return a
11921192

1193-
def kmeans_grid(self, key, key2=None,
1194-
timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180,
1195-
**kwargs):
1196-
# defaults
1197-
params_dict = {
1198-
'initialization': 'Furthest',
1199-
'k': 1,
1200-
'max_iter': 10,
1201-
'source_key': key,
1202-
'destination_key': 'python_KMeans_Grid_destination.hex',
1203-
}
1204-
browseAlso = kwargs.get('browseAlso', False)
1205-
params_dict.update(kwargs)
1206-
print "\nKMeansGrid params list:", params_dict
1207-
a = self.__do_json_request('KMeansGrid.json', timeout=timeoutSecs, params=params_dict)
1208-
1209-
# Check that the response has the right Progress url it's going to steer us to.
1210-
if a['response']['redirect_request']!='Progress':
1211-
print dump_json(a)
1212-
raise Exception('H2O kmeans_grid redirect is not Progress. KMeans json response precedes.')
1213-
a = self.poll_url(a, timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs,
1214-
initialDelaySecs=initialDelaySecs, pollTimeoutSecs=pollTimeoutSecs)
1215-
verboseprint("\nKMeansGrid result:", dump_json(a))
1216-
1217-
if (browseAlso | browse_json):
1218-
print "Redoing the KMeansGrid through the browser, no results saved though"
1219-
h2b.browseJsonHistoryAsUrlLastMatch('KMeansGrid')
1220-
time.sleep(5)
1221-
return a
1222-
12231193
# params:
12241194
# header=1,
12251195
# header_from_file

py/h2o_cmd.py

-7
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,6 @@ def runKMeans(node=None, parseResult=None, timeoutSecs=20, retryDelaySecs=2, noP
5959
if not node: node = h2o.nodes[0]
6060
return node.kmeans(parseResult['destination_key'], None, timeoutSecs, retryDelaySecs, noPoll=noPoll, **kwargs)
6161

62-
def runKMeansGrid(node=None, parseResult=None,
63-
timeoutSecs=60, retryDelaySecs=2, noise=None, **kwargs):
64-
if not parseResult: raise Exception('No parseResult for KMeansGrid')
65-
if not node: node = h2o.nodes[0]
66-
# no such thing as KMeansGridView..don't use retryDelaySecs
67-
return node.kmeans_grid(parseResult['destination_key'], timeoutSecs, **kwargs)
68-
6962
def runGLM(node=None, parseResult=None,
7063
timeoutSecs=20, retryDelaySecs=2, noise=None, **kwargs):
7164
if not parseResult: raise Exception('No parseResult for GLM')

py/testdir_multi_jvm/test_KMeansGrid_basic.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,17 @@ def test_KMeansGrid_basic(self):
5353
'initialization': None,
5454
'seed': 3923021996079663354,
5555
'normalize': 0,
56-
'max_iter': 10
56+
'max_iter': "c(2,5,10)"
5757
}
58+
5859
for trial in range(3):
5960
kwargs = params.copy()
60-
61+
h2o.beta_features = True
6162
start = time.time()
62-
kmeans = h2o_cmd.runKMeansGrid(parseResult=parseResult, \
63+
kmeans = h2o_cmd.runKMeans(parseResult=parseResult, \
6364
timeoutSecs=timeoutSecs, retryDelaySecs=2, pollTimeoutSecs=60, **kwargs)
6465
elapsed = time.time() - start
65-
print "kmeans grid end on ", csvPathname, 'took', elapsed, 'seconds.', \
66+
print "kmeans (with grid) end on ", csvPathname, 'took', elapsed, 'seconds.', \
6667
"%d pct. of timeout" % ((elapsed/timeoutSecs) * 100)
6768
h2o_kmeans.simpleCheckKMeans(self, kmeans, **kwargs)
6869

py/testdir_multi_jvm/test_KMeansGrid_params_rand2.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,17 @@ def test_KMeans_params_rand2(self):
6060
" num_cols:", "{:,}".format(inspect['num_cols'])
6161

6262
paramDict = define_params(SEED)
63+
h2o.beta_features = True # no grid for VA
6364
for trial in range(3):
6465
# default
65-
params = {'k': 1 }
66+
params = {'k': 'c(1,2,3)' }
6667
# 'destination_key': csvFilename + "_" + str(trial) + '.hex'}
6768

6869
h2o_kmeans.pickRandKMeansParams(paramDict, params)
6970
kwargs = params.copy()
70-
71+
7172
start = time.time()
72-
kmeans = h2o_cmd.runKMeansGrid(parseResult=parseResult, \
73+
kmeans = h2o_cmd.runKMeans(parseResult=parseResult, \
7374
timeoutSecs=timeoutSecs, retryDelaySecs=2, pollTimeoutSecs=60, **kwargs)
7475
elapsed = time.time() - start
7576
print "kmeans grid end on ", csvPathname, 'took', elapsed, 'seconds.', \

py/testdir_single_jvm/test_GBM_many_cols.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def setUpClass(cls):
3535
# fails
3636
# h2o.build_cloud(1,java_heap_MB=100, enable_benchmark_log=True)
3737
# 400 fails
38-
h2o.build_cloud(1,java_heap_GB=8, enable_benchmark_log=True)
38+
h2o.build_cloud(1,java_heap_GB=2, enable_benchmark_log=True)
3939
else:
4040
h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=True)
4141

py/testdir_single_jvm/test_enum_multi_permission.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_cols_enum_multi_import(self):
7676
# DON"T get redirected to S3! (EC2 hack in config, remember!)
7777
# use it at the node level directly (because we gen'ed the files.
7878
# use regex. the only files in the dir will be the ones we just created with *fileN* match
79-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
79+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
8080
exclude=None, header=1, timeoutSecs=timeoutSecs)
8181
print "parseResult['destination_key']: " + parseResult['destination_key']
8282
print 'parse time:', parseResult['response']['time']
@@ -104,23 +104,23 @@ def test_cols_enum_multi_import(self):
104104
print "checking os.chmod and parse"
105105
# os.chmod(badPathname, stat.S_IRWXU | stat.S_IRWXO)
106106
# always have to re-import because source key is deleted by h2o
107-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
107+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
108108
exclude=None, header=1, timeoutSecs=timeoutSecs)
109109
print "parseResult['destination_key']: " + parseResult['destination_key']
110110
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
111111
h2o_cmd.infoFromInspect(inspect, csvPathname)
112112

113113
print "write by owner, only, and parse"
114114
os.chmod(badPathname, stat.S_IWRITE)
115-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
115+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
116116
exclude=None, header=1, timeoutSecs=timeoutSecs)
117117
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
118118
h2o_cmd.infoFromInspect(inspect, csvPathname)
119119

120120
print "execute by owner, only, and parse"
121121
os.chmod(badPathname, stat.S_IEXEC)
122122
h2o.nodes[0].import_files(SYNDATASETS_DIR)
123-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
123+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
124124
exclude=None, header=1, timeoutSecs=timeoutSecs)
125125
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
126126
h2o_cmd.infoFromInspect(inspect, csvPathname)
@@ -136,11 +136,11 @@ def test_cols_enum_multi_import(self):
136136

137137
print "parsing after one bad uid"
138138
os.chown(badPathname, badUid, origGid)
139-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
139+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
140140
exclude=None, header=1, timeoutSecs=timeoutSecs)
141141
print "parsing after one bad gid"
142142
os.chown(badPathname, origUid, badGid)
143-
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='put',
143+
parseResult = h2i.import_parse(path=SYNDATASETS_DIR + '/*'+rowxcol+'*', schema='local',
144144
exclude=None, header=1, timeoutSecs=timeoutSecs)
145145

146146
os.chown(badPathname, origUid, origGid)

src/main/java/hex/pca/PCAScore.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public static String link(Key modelKey, String content) {
9090
}
9191

9292
public static String link(String key_param, Key k, String content) {
93-
RString rs = new RString("<a href='PCAScore.query?%key_param=%$key'>%content</a>");
93+
RString rs = new RString("<a href='2/PCAScore.query?%key_param=%$key'>%content</a>");
9494
rs.replace("key_param", key_param);
9595
rs.replace("key", k.toString());
9696
rs.replace("content", content);

0 commit comments

Comments
 (0)