forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunBench.sh
240 lines (209 loc) · 7.17 KB
/
runBench.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/bin/bash
#set -x
#sleep 13000
h2oBuild=
benchmarks="benchmarks"
DATE=`date +%Y-%m-%d`
archive="Archive"
branchName="hilbert"
function all {
doAlgo summary; wait; makeDead 2> /dev/null;
doAlgo pca; wait; makeDead 2> /dev/null;
doAlgo kmeans wait; makeDead 2> /dev/null;
doAlgo glm; wait; makeDead 2> /dev/null;
doAlgo glm2; wait; makeDead 2> /dev/null;
doAlgo gbm; wait; makeDead 2> /dev/null;
# doAlgo gbmgrid
#doAlgo bigkmeans; wait; makeDead 2> /dev/null;
}
function doAlgo {
echo "Clear caches!"
bash startloggers.sh ${JSON} clear_
echo "Running $1 benchmark..."
if [ ${LOG} -eq 1 ]
then
echo "Changing little logger phase..."
bash startloggers.sh ${JSON} changePhase $1
fi
pyScript="BMscripts/"$1"Bench.py"
wait
if [ ! $1 = "bigkmeans" ]
then
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air1x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air1x sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air10x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air10x sandbox/; wait; rm -rf sandbox/;
if [ $1 = "gbm" ]
then
continue
fi
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB1x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB1x sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB10x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB10x sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB100x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB100x sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air100x; wait; makeDead 2> /dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air100x sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
else
JSON2=161
echo "Doing KMeans.. Using ${JSON2} config file..."
python ${pyScript} -cj BMscripts/${JSON2} ${h2oBuild} False BigK; wait; makeDead 2>/dev/null;
zip -r ${archive}/${h2oBuild}-${DATE}-$1-BIGK sandbox/; wait; rm -rf sandbox/;
bash startloggers.sh ${JSON} clear_; wait;
fi
bash startloggers.sh ${JSON} ice $1 #gather up the ice h2ologs from the machines for this phase
}
function makeDead {
ps -efww | grep h2o|grep spencer|grep jar| awk '{print $2}' | xargs kill
ps -efww | grep h2o|grep 0xdiag |grep jar| awk '{print $2}' | xargs kill
}
function debug {
for a in $@
do
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True Air1x; wait;
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True Air10x; wait;
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True AllB1x; wait;
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True AllB10x; wait;
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True AllB100x; wait;
python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} True Air100x; wait;
#python BMscripts/$a"Bench.py" -cj BMscripts/${JSON} ${h2oBuild} ${DEBUG}
done
}
usage()
{
cat << EOF
USAGE: $0 [options]
This script obtains the latest h2o jar from S3 and runs the benchmarks for PCA, KMeans, GLM, and BigKMeans.
OPTIONS:
-h Show this message
-t Run task:
Choices are:
all -- Runs PCA, GLM, KMEANS, GBM, GLM2, GBMGRID, and BIGKMEANS
pca -- Runs PCA on Airlines/AllBedrooms/Covtype data
kmeans -- Runs KMeans on Airlines/AllBedrooms/Covtype data
glm -- Runs logistic regression on Airlines/AllBedrooms/Covtype data
glm2 -- Runs logistic regression on Airlines/AllBedrooms/Covtype data
gbm -- Runs GBM on Airlines/AllBedrooms/Covtype data
gbmgrid -- Runs GBM grid search on Airlines/AllBedrooms/Covtype data
bigkmeans -- Runs KMeans on 180 GB & 1TB of synthetic data
-j JSON config:
Choices are:
161 -- Runs benchmark(s) on single machine on 161 (100GB)
162 -- Runs benchmark(s) on single machine on 162 (100GB)
163 -- Runs benchmark(s) on single machine on 163 (100GB)
164 -- Runs benchmark(s) on single machine on 164 (100GB)
161_163 -- Runs benchmark(s) on four machines 161-163 (133GB Each)
161_164 -- Runs benchmark(s) on four machines 161-164 (100GB Each)
EOF
}
TASK=
JSON=
BUILDN=
DEBUG=0
LOG=0
DEEP=0
while getopts "ht:j:b:dL" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
t)
TEST=$OPTARG
;;
j)
JSON=$OPTARG
;;
b)
BUILDN=$OPTARG
;;
d)
DEBUG=1
LOG=0
;;
L)
LOG=1
;;
D)
DEEP=1
;;
?)
usage
exit 1
;;
*)
usage
exit 1
;;
esac
done
if [ -z "$TEST" ] || [ -z "$JSON" ]
then
usage
exit
fi
#bash S3getLatest.sh
#wait
dir=`pwd`
latest=$dir/latest
if [ ! -f $latest ]
then
echo "No 'latest' file was found..."
echo "Either create one, or use S3getLatest.sh."
exit 1
fi
h2oBuild=`cat latest`
if [ ! -d ${benchmarks}/${h2oBuild} ]; then
mkdir -p ${benchmarks}/${h2oBuild}
fi
if [ $DEEP -eq 1 ]
then
bash startloggers.sh ${JSON} deep
fi
if [ ${LOG} -eq 1 ]
then
#global starttime out to all loggers
starttime=`date +%s`
echo $starttime > BMLogs/starttime
#Gentlemen...Start your loggers!
bash startloggers.sh ${JSON} big
bash startloggers.sh ${JSON} little
fi
if [ ${DEBUG} -eq 1 ]
then
echo "Running in debug mode... "
if [ ${TEST} = "all" ]
then
debug pca glm kmeans glm2 gbm bigkmeans #gbmgrid bigkmeans
wait
else
debug ${TEST}
wait
fi
wait
else
if [ ! ${TEST} = "all" ]
then
doAlgo ${TEST}
else
${TEST}
fi
wait
fi
bash startloggers.sh ${JSON} stop_
#remove annoying useless files
rm pytest*flatfile*
#archive nohup
if [ -a nohup.out ]; then
mv nohup.out ${archive}/${h2oBuild}-${DATE}-nohup.out
fi
wait
bash createBench.sh ${branchName} ${JSON}