forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_rf_params_rand2_4201285065147091758.py
68 lines (60 loc) · 2.48 KB
/
test_rf_params_rand2_4201285065147091758.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import unittest
import random, sys
sys.path.extend(['.','..','py'])
import h2o, h2o_cmd, h2o_rf, h2o_hosts
# make a dict of lists, with some legal choices for each. None means no value.
# assume poker1000 datset
# we can pass ntree thru kwargs if we don't use the "trees" parameter in runRF
# only classes 1-7 in the 55th col
# don't allow None on ntree..causes 50 tree default!
# FIX! binLimit causes fail if == 1
print "Temporarily not using binLimit=1 to 3"
paramDict = {
'response_variable': [None,54],
'class_weights': [None,'1=2','2=2','3=2','4=2','5=2','6=2','7=2'],
'ntree': [1,3,7,23],
'model_key': ['model_keyA', '012345', '__hello'],
'out_of_bag_error_estimate': [None,0,1],
'stat_type': [None, 'ENTROPY', 'GINI'],
'depth': [None, 1,10,20,100],
'bin_limit': [None,4,5,10,100,1000],
'parallel': [None,0,1],
'ignore': [None,0,1,2,3,4,5,6,7,8,9],
'sample': [None,20,40,60,80,90],
'seed': [None,'0','1','11111','19823134','1231231'],
'features': [1,3,5,7,9,11,13,17,19,23,37,53],
'exclusive_split_limit': [None,0,3,5],
}
class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()
@classmethod
def setUpClass(cls):
global localhost
localhost = h2o.decide_if_localhost()
if (localhost):
h2o.build_cloud(node_count=1, java_heap_GB=10)
else:
h2o_hosts.build_cloud_with_hosts(node_count=1, java_heap_GB=10)
@classmethod
def tearDownClass(cls):
h2o.tear_down_cloud()
def test_loop_random_param_covtype(self):
csvPathname = h2o.find_dataset('UCI/UCI-large/covtype/covtype.data')
# for determinism, I guess we should spit out the seed?
##### SEED = random.randint(0, sys.maxint)
# if you have to force to redo a test
SEED = 4201285065147091758
random.seed(SEED)
print "\nUsing random seed:", SEED
for trial in range(10):
# params is mutable. This is default.
params = {'ntree': 13, 'parallel': 1, 'features': 7}
colX = h2o_rf.pickRandRfParams(paramDict, params)
kwargs = params.copy()
# adjust timeoutSecs with the number of trees
timeoutSecs = 30 + ((kwargs['ntree']*20) * max(1,kwargs['features']/15) * (kwargs['parallel'] and 1 or 3))
h2o_cmd.runRF(timeoutSecs=timeoutSecs, csvPathname=csvPathname, **kwargs)
print "Trial #", trial, "completed"
if __name__ == '__main__':
h2o.unit_main()