Skip to content

Commit

Permalink
move shared code to h2o_cmd for info from inspect
Browse files Browse the repository at this point in the history
new test that fails: mixed number + string in col causes NA
  • Loading branch information
Kevin Normoyle committed Mar 15, 2013
1 parent 609c7dd commit 38895c7
Show file tree
Hide file tree
Showing 7 changed files with 418 additions and 95 deletions.
27 changes: 27 additions & 0 deletions py/h2o_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,33 @@ def parseFile(node=None, csvPathname=None, key=None, key2=None,
timeoutSecs=timeoutSecs, retryDelaySecs=retryDelaySecs, pollTimeoutSecs=pollTimeoutSecs,
noise=noise, noPoll=noPoll)


def info_from_inspect(inspect, csvPathname):
# need more info about this dataset for debug
cols = inspect['cols']
# look for nonzero num_missing_values count in each col
sum_num_missing_values = 0
for i, colDict in enumerate(cols):
num_missing_values = colDict['num_missing_values']
if num_missing_values != 0:
print "%s: col: %d, num_missing_values: %d" % (csvPathname, i, num_missing_values)
sum_num_missing_values += num_missing_values

num_cols = inspect['num_cols']
num_rows = inspect['num_rows']
row_size = inspect['row_size']
ptype = inspect['type']
value_size_bytes = inspect['value_size_bytes']
response = inspect['response']
ptime = response['time']

print "num_cols: %s, num_rows: %s, row_size: %s, ptype: %s, \
value_size_bytes: %s, time: %s" % \
(num_cols, num_rows, row_size, ptype, value_size_bytes, ptime)
# sum of num_missing_values from all the columns
return sum_num_missing_values


def runInspect(node=None, key=None, timeoutSecs=5, **kwargs):
if not key: raise Exception('No key for Inspect specified')
if not node: node = h2o.nodes[0]
Expand Down
9 changes: 7 additions & 2 deletions py/testdir_hosts/pytest_config-kevin.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

"h2o_per_host": 1,
"sigar": true,
"java_heap_GB": 4,
"java_heap_GB": 10,

"hdfs_name_node": "192.168.1.176",
"hdfs_version": "cdh3u5",
Expand All @@ -20,7 +20,12 @@
"192.168.1.172",
"192.168.1.173",
"192.168.1.174",
"192.168.1.175"
"192.168.1.175",
"192.168.1.176",
"192.168.1.177",
"192.168.1.178",
"192.168.1.179",
"192.168.1.180"
]
}

24 changes: 1 addition & 23 deletions py/testdir_single_jvm/test_GLM_binomial_goalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,6 @@ def define_params():
}
return paramDict

def info_from_inspect(inspect, csvPathname):
# need more info about this dataset for debug
cols = inspect['cols']
# look for nonzero num_missing_values count in each col
for i, colDict in enumerate(cols):
num_missing_values = colDict['num_missing_values']
if num_missing_values != 0:
### print "%s: col: %d, num_missing_values: %d" % (csvPathname, i, num_missing_values)
pass

num_cols = inspect['num_cols']
num_rows = inspect['num_rows']
row_size = inspect['row_size']
ptype = inspect['type']
value_size_bytes = inspect['value_size_bytes']
response = inspect['response']
ptime = response['time']

print "num_cols: %s, num_rows: %s, row_size: %s, ptype: %s, \
value_size_bytes: %s, response: %s, time: %s" % \
(num_cols, num_rows, row_size, ptype, value_size_bytes, response, ptime)

class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
Expand All @@ -74,7 +52,7 @@ def test_loop_random_param_covtype(self):
inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])

# need more info about the dataset for debug
info_from_inspect(inspect, csvPathname)
h2o_cmd.info_from_inspect(inspect, csvPathname)

# for determinism, I guess we should spit out the seed?
# random.seed(SEED)
Expand Down
24 changes: 1 addition & 23 deletions py/testdir_single_jvm/test_GLM_poisson_goalies_admm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,6 @@ def define_params():
}
return paramDict

def info_from_inspect(inspect, csvPathname):
# need more info about this dataset for debug
cols = inspect['cols']
# look for nonzero num_missing_values count in each col
for i, colDict in enumerate(cols):
num_missing_values = colDict['num_missing_values']
if num_missing_values != 0:
### print "%s: col: %d, num_missing_values: %d" % (csvPathname, i, num_missing_values)
pass

num_cols = inspect['num_cols']
num_rows = inspect['num_rows']
row_size = inspect['row_size']
ptype = inspect['type']
value_size_bytes = inspect['value_size_bytes']
response = inspect['response']
ptime = response['time']

print "num_cols: %s, num_rows: %s, row_size: %s, ptype: %s, \
value_size_bytes: %s, response: %s, time: %s" % \
(num_cols, num_rows, row_size, ptype, value_size_bytes, response, ptime)

class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
Expand All @@ -70,7 +48,7 @@ def test_loop_random_param_covtype(self):
inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])

# need more info about the dataset for debug
info_from_inspect(inspect, csvPathname)
h2o_cmd.info_from_inspect(inspect, csvPathname)

# for determinism, I guess we should spit out the seed?
# random.seed(SEED)
Expand Down
24 changes: 1 addition & 23 deletions py/testdir_single_jvm/test_GLM_poisson_goalies_gg.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,6 @@ def define_params():
}
return paramDict

def info_from_inspect(inspect, csvPathname):
# need more info about this dataset for debug
cols = inspect['cols']
# look for nonzero num_missing_values count in each col
for i, colDict in enumerate(cols):
num_missing_values = colDict['num_missing_values']
if num_missing_values != 0:
### print "%s: col: %d, num_missing_values: %d" % (csvPathname, i, num_missing_values)
pass

num_cols = inspect['num_cols']
num_rows = inspect['num_rows']
row_size = inspect['row_size']
ptype = inspect['type']
value_size_bytes = inspect['value_size_bytes']
response = inspect['response']
ptime = response['time']

print "num_cols: %s, num_rows: %s, row_size: %s, ptype: %s, \
value_size_bytes: %s, response: %s, time: %s" % \
(num_cols, num_rows, row_size, ptype, value_size_bytes, response, ptime)

class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
Expand All @@ -70,7 +48,7 @@ def test_loop_random_param_covtype(self):
inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])

# need more info about the dataset for debug
info_from_inspect(inspect, csvPathname)
h2o_cmd.info_from_inspect(inspect, csvPathname)

# for determinism, I guess we should spit out the seed?
# random.seed(SEED)
Expand Down
25 changes: 1 addition & 24 deletions py/testdir_single_jvm/test_rf_covtype_train_oobe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,6 @@
'exclusive_split_limit': 0,
}

def info_from_inspect(inspect, csvPathname):
# need more info about this dataset for debug
cols = inspect['cols']
# look for nonzero num_missing_values count in each col
for i, colDict in enumerate(cols):
num_missing_values = colDict['num_missing_values']
if num_missing_values != 0:
print "%s: col: %d, num_missing_values: %d" % (csvPathname, i, num_missing_values)
pass

num_cols = inspect['num_cols']
num_rows = inspect['num_rows']
row_size = inspect['row_size']
ptype = inspect['type']
value_size_bytes = inspect['value_size_bytes']
response = inspect['response']
ptime = response['time']

print "num_cols: %s, num_rows: %s, row_size: %s, ptype: %s, \
value_size_bytes: %s, time: %s" % \
(num_cols, num_rows, row_size, ptype, value_size_bytes, ptime)


class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
Expand Down Expand Up @@ -83,7 +60,7 @@ def test_rf_covtype_train_oobe(self):


inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])
info_from_inspect(inspect, csvPathname)
h2o_cmd.info_from_inspect(inspect, csvPathname)

for trial in range(1):
# params is mutable. This is default.
Expand Down
Loading

0 comments on commit 38895c7

Please sign in to comment.