forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
h2o_hosts.py
156 lines (137 loc) · 6.31 KB
/
h2o_hosts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import getpass, json, h2o
import random, os
# UPDATE: all multi-machine testing will pass list of IP and base port addresses to H2O
# means we won't realy on h2o self-discovery of cluster
def find_config(base):
f = base
if not os.path.exists(f): f = 'testdir_hosts/' + base
if not os.path.exists(f): f = 'py/testdir_hosts/' + base
if not os.path.exists(f):
raise Exception("unable to find config %s" % base)
return f
# node_count is sometimes used positionally...break that out. all others are keyword args
def build_cloud_with_hosts(node_count=None, **kwargs):
# legacy: we allow node_count to be positional.
# if it's used positionally, stick in in kwargs (overwrite if there too)
if node_count is not None:
# we use h2o_per_host in the config file. will translate to node_count for build_cloud
kwargs['h2o_per_host'] = node_count
# set node_count to None to make sure we don't use it below. 'h2o_per_host' should be used
node_count = None
# randomizing default base_port used
offset = random.randint(0,31)
# for new params:
# Just update this list with the param name and default and you're done
allParamsDefault = {
'use_flatfile': None,
'use_hdfs': True, # default to true, so when we flip import folder to hdfs+s3n import on ec2, the cloud is built correctly
'hdfs_name_node': None,
'hdfs_config': None,
'hdfs_version': None,
'base_port': None,
'java_heap_GB': None,
'java_heap_MB': None,
'java_extra_args': None,
'timeoutSecs': 60,
'retryDelaySecs': 2,
'cleanup': True,
'slow_connection': False,
'h2o_per_host': 2,
'ip':'["127.0.0.1"]', # this is for creating the hosts list
'base_port': 54300 + offset,
'username':'0xdiag',
'password': None,
'rand_shuffle': True,
'use_home_for_ice': False,
'key_filename': None,
'aws_credentials': None,
'redirect_import_folder_to_s3_path': None,
'redirect_import_folder_to_s3n_path': None,
'disable_h2o_log': False,
'enable_benchmark_log': False,
'h2o_remote_buckets_root': None,
'conservative': False,
'create_json': False,
# pass this from cloud building to the common "release" h2o_test.py classes
# for deciding whether keys should be deleted when a test ends.
'delete_keys_at_teardown': False,
'clone_cloud': False,
'cloud_name': None,
}
# initialize the default values
paramsToUse = {}
for k,v in allParamsDefault.iteritems():
paramsToUse[k] = allParamsDefault.setdefault(k, v)
# allow user to specify the config json at the command line. config_json is a global.
if h2o.config_json:
configFilename = find_config(h2o.config_json)
else:
# configs may be in the testdir_hosts
configFilename = find_config(h2o.default_hosts_file())
h2o.verboseprint("Loading host config from", configFilename)
with open(configFilename, 'rb') as fp:
hostDict = json.load(fp)
for k,v in hostDict.iteritems():
# Don't take in params that we don't have in the list above
# Because michal has extra params in here for ec2! and comments!
if k in paramsToUse:
paramsToUse[k] = hostDict.setdefault(k, v)
# Now overwrite with anything passed by the test
# whatever the test passes, always overrules the config json
for k,v in kwargs.iteritems():
paramsToUse[k] = kwargs.setdefault(k, v)
# Let's assume we should set the h2o_remote_buckets_root (only affects
# schema=local), to the home directory of whatever remote user
# is being used for the hosts. Better than living with a decision
# we made from scanning locally (remote might not match local)
# assume the remote user has a /home/<username> (linux targets?)
# This only affects import folder path name generation by python tests
if paramsToUse['username']:
paramsToUse['h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']
h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)
#********************
global hosts
# Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
# this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
# hosts should be None for everyone if normal build_cloud is desired
if paramsToUse['ip']== ["127.0.0.1"]:
hosts = None
else:
h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
hosts = []
for h in paramsToUse['ip']:
h2o.verboseprint("Connecting to:", h)
# expand any ~ or ~user in the string
key_filename = paramsToUse['key_filename']
if key_filename: # don't try to expand if None
key_filename=os.path.expanduser(key_filename)
hosts.append(h2o.RemoteHost(addr=h,
username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename))
# done with these, don't pass to build_cloud
paramsToUse.pop('ip') # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud
# we want to save username in the node info. don't pop
# paramsToUse.pop('username')
paramsToUse.pop('password')
paramsToUse.pop('key_filename')
# handles hosts=None correctly
h2o.write_flatfile(
node_count=paramsToUse['h2o_per_host'],
base_port=paramsToUse['base_port'],
hosts=hosts,
rand_shuffle=paramsToUse['rand_shuffle']
)
if hosts is not None:
# this uploads the flatfile too
h2o.upload_jar_to_remote_hosts(hosts, slow_connection=paramsToUse['slow_connection'])
# timeout wants to be larger for large numbers of hosts * h2oPerHost
# use 60 sec min, 5 sec per node.
timeoutSecs = max(60, 8*(len(hosts) * paramsToUse['h2o_per_host']))
else: # for 127.0.0.1 case
timeoutSecs = 60
paramsToUse.pop('slow_connection')
# sandbox gets cleaned in build_cloud
# legacy param issue
node_count = paramsToUse['h2o_per_host']
paramsToUse.pop('h2o_per_host')
print "java_heap_GB", paramsToUse['java_heap_GB']
h2o.build_cloud(node_count, hosts=hosts, **paramsToUse)