Skip to content

Commit dd06c35

Browse files
committedOct 29, 2013
loggers grab RSS, cpu counters to track to cache-misses from perf stat
1 parent 213b388 commit dd06c35

9 files changed

+65
-34
lines changed
 

‎bench/BMLogs/bigLogger.sh

+17-10
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ swapPerfFile=${OUTDIR}/`date +%Y-%m-%d`"-sisoPerf_"$mach".csv"
1818

1919
#headers
2020
cpuheader='time(s)'
21-
head -n 33 /proc/stat | tail -n 32 | awk -F' ' 'OFS="," {print $1}' > tmpfile
21+
head -n 33 /proc/stat | awk -F' ' 'OFS="," {print $1}' > tmpfile
2222
cpuheader=$cpuheader,`./transpose.sh tmpfile`
2323
rm tmpfile
24-
memheader='time(s),MemTotal,MemFree,Cached,Writeback'
24+
memheader='time(s),MemTotal,MemFree,Cached,Writeback,RSS'
2525
topheader='time(s),PID,USER,RES,%CPU,%MEM,COMMAND'
2626
netheader='time(s),dev,bytes,packets,errs,drop'
2727
sisoheader='time(s),si,so'
@@ -48,14 +48,15 @@ function echoLine {
4848
then
4949
line=`cat $1`
5050
echo $(( `date +%s` - $2 )),$6,$line >> $3
51+
else
52+
line=`cat $1`
53+
echo $(( `date +%s` - $2 )),$line >> $3
5154
fi
52-
line=`cat $1`
53-
echo $(( `date +%s` - $2 )),$line >> $3
5455
else
5556
if [ $5 ]
5657
then
5758
line=`./transpose.sh $1 | awk -F, 'OFS="," {print $1,$2,$4,$17}'`
58-
echo $(( `date +%s` - $2 )),$line >> $3
59+
echo $(( `date +%s` - $2 )),$line,$6 >> $3
5960
else
6061
line=`./transpose.sh $1`
6162
echo $(( `date +%s` - $2 )),$line >> $3
@@ -86,6 +87,12 @@ done
8687

8788
start=`cat starttime`
8889
while :; do
90+
h2oPID=`ps -efww | grep h2o | grep 0xdiag| grep jar|awk '{print $2}' | xargs`
91+
if [ -z $h2oPID ]
92+
then
93+
continue
94+
fi
95+
8996
#dump raw logs first
9097
ts=`date +"%Y-%m-%d-%H-%M-%S"`
9198
cat /proc/stat >> ${rawLogs}/procstat/${ts}_procstat_${mach}
@@ -94,13 +101,13 @@ while :; do
94101
vmstat >> ${rawLogs}/vmstat/${ts}_vmstat_${mach}
95102
top -b -n 1 >> ${rawLogs}/top/${ts}_top_${mach}
96103
a=1
97-
for i in {0..34}
104+
for i in {0..35}
98105
do
99106
TOTALS[$i]=0
100107
done
101108
while read -a CPU
102109
do a=$(($a+1));
103-
if [ $a -eq 34 ]
110+
if [ $a -eq 35 ]
104111
then
105112
break
106113
fi
@@ -128,7 +135,7 @@ while :; do
128135
echo $(( `date +%s` - $start )),$linecpu >> $cpuPerfFile
129136
echo $(( `date +%s` - $start )),$lineidle >> $idlePerfFile
130137
echo $(( `date +%s` - $start )),$lineiowait >> $iowaitPerfFile
131-
138+
RSS=`ps v $h2oPID | awk -F' ' 'OFS="," {print $8}' | tail -n 1`
132139
cat /proc/meminfo | awk -F' ' 'OFS="," {gsub(":","", $1); print $2}' > bmemTMP
133140
echo $pwd
134141
devstat=
@@ -140,13 +147,13 @@ while :; do
140147
esac
141148
grep $devstat /proc/net/dev | awk -F' ' 'OFS="," {print $2,$3,$4,$5}' > brecTMP
142149
grep $devstat /proc/net/dev | awk -F' ' 'OFS="," {print $10,$11,$12,$13}' > btraTMP
143-
echoLine bmemTMP $start $memPerfFile 1 1
150+
echoLine bmemTMP $start $memPerfFile 1 1 $RSS
144151
echoLine brecTMP $start $netReceivePerfFile 0 1 $devstat
145152
echoLine btraTMP $start $netTransmitPerfFile 0 1 $devstat
146153
#get top 10 processes from top and then just store them, may/not be interesting...
147154
ti="$(( `date +%s` - ${start} ))"
148155
top -b | head -n 17 | tail -n 10 | awk -v t=$ti -F' ' 'OFS="," {print t,$1,$2,$6,$9,$10,$12}' >> $topPerfFile
149156
vmstat | tail -n 1 | awk -v t=$ti -F' ' 'OFS="," {print t,$7,$8}' >> $swapPerfFile
157+
perf stat -x, -e instructions,cycles,cache-references,cache-misses,faults -a -o bTMP -p $h2oPID sleep 10
150158
rm b*TMP
151-
sleep 10
152159
done

‎bench/BMLogs/littleLogger.sh

+10-9
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ cachePerfFile=${OUTDIR}/$1/$1-`date +%Y-%m-%d`"-cachePerf_"$mach".csv"
2828

2929
#headers
3030
cpuheader='time(s)'
31-
head -n 33 /proc/stat | tail -n 32 | awk -F' ' 'OFS="," {print $1}' > tmpfile
31+
head -n 33 /proc/stat | awk -F' ' 'OFS="," {print $1}' > tmpfile
3232
cpuheader=$cpuheader,`./transpose.sh tmpfile`
3333

3434
if [ -a tmpfile ]
3535
then
3636
rm tmpfile
3737
fi
3838

39-
memheader='time(s),MemTotal,MemFree,Cached,Writeback'
39+
memheader='time(s),MemTotal,MemFree,Cached,Writeback,RSS'
4040
topheader='time(s),PID,USER,RES,%CPU,%MEM,COMMAND'
4141
netheader='time(s),dev,bytes,packets,errs,drop'
4242
sisoheader='time(s),si,so'
@@ -57,14 +57,15 @@ function echoLine {
5757
then
5858
line=`cat $1`
5959
echo $(( `date +%s` - $2 )),$6,$line >> $3
60+
else
61+
line=`cat $1`
62+
echo $(( `date +%s` - $2 )),$line >> $3
6063
fi
61-
line=`cat $1`
62-
echo $(( `date +%s` - $2 )),$line >> $3
6364
else
6465
if [ $5 -eq 1 ]
6566
then
6667
line=`./transpose.sh $1 | awk -F, 'OFS="," {print $1,$2,$4,$17}'`
67-
echo $(( `date +%s` - $2 )),$line >> $3
68+
echo $(( `date +%s` - $2 )),$line,$6 >> $3
6869
else
6970
line=`./transpose.sh $1`
7071
echo $(( `date +%s` - $2 )),$line >> $3
@@ -82,7 +83,7 @@ checkExists $netTransmitPerfFile $netheader
8283
checkExists $swapPerfFile $sisoheader
8384
checkExists $cachePerfFile $cacheheader
8485

85-
for i in {0..34}
86+
for i in {0..35}
8687
do
8788
PREVTOTALS[$i]=0
8889
done
@@ -96,13 +97,13 @@ while :; do
9697
continue
9798
fi
9899
a=1
99-
for i in {0..34}
100+
for i in {0..35}
100101
do
101102
TOTALS[$i]=0
102103
done
103104
while read -a CPU
104105
do a=$(($a+1));
105-
if [ $a -eq 34 ]
106+
if [ $a -eq 35 ]
106107
then
107108
break
108109
fi
@@ -143,7 +144,7 @@ while :; do
143144
grep $devstat /proc/net/dev | awk -F' ' 'OFS="," {print $2,$3,$4,$5}' > lrecTMP
144145
grep $devstat /proc/net/dev | awk -F' ' 'OFS="," {print $10,$11,$12,$13}' > ltraTMP
145146

146-
echoLine lmemTMP $start $memPerfFile 1 1
147+
echoLine lmemTMP $start $memPerfFile 1 1 $RSS
147148
echoLine lrecTMP $start $netReceivePerfFile 0 1 $devstat
148149
echoLine ltraTMP $start $netTransmitPerfFile 0 1 $devstat
149150
#get top 10 processes from top and then just store them, may/not be interesting...

‎bench/BMscripts/gbmBench.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#GBM bench
2-
import os, sys, time, csv
2+
import os, sys, time, csv, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -12,6 +12,7 @@
1212
}
1313
build = ""
1414
debug = False
15+
json = ""
1516
def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, depth, minrows, nbins, learnRate, response, row):
1617
debug = False
1718
bench = "bench"
@@ -101,6 +102,8 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
101102
gbm = h2o_cmd.runGBM(parseResult = parseResult, noPoll=True, timeoutSecs=4800, **kwargs)
102103
h2o_jobs.pollWaitJobs(timeoutSecs=16000, pollTimeoutSecs=120, retryDelaySecs=5)
103104
gbmTime = time.time() - gbmStart
105+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
106+
os.system(cmd)
104107
row.update( {'gbmBuildTime' : gbmTime,
105108
})
106109
gbmTrainView = h2o_cmd.runGBMView(model_key='GBM('+f+')')
@@ -118,6 +121,7 @@ def doGBM(f, folderPath, ignored_cols, classification, testFilehex, ntrees, dept
118121
dat = sys.argv.pop(-1)
119122
debug = sys.argv.pop(-1)
120123
build = sys.argv.pop(-1)
124+
json = sys.argv[-1].split('/')[-1]
121125
h2o.parse_our_args()
122126
h2o_hosts.build_cloud_with_hosts(enable_benchmark_log=False)
123127
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'

‎bench/BMscripts/glm2Bench.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#GLM2 bench
2-
import os, sys, time, csv, re, requests
2+
import os, sys, time, csv, re, requests, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -12,6 +12,7 @@
1212
}
1313
build = ""
1414
debug = False
15+
json = ""
1516
def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row, case_mode, case_val):
1617
debug = False
1718
bench = "bench"
@@ -96,6 +97,8 @@ def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row
9697
glm = h2o_cmd.runGLM(parseResult = parseResult, timeoutSecs=1800, noPoll=True, **kwargs)
9798
h2o_jobs.pollWaitJobs(timeoutSecs=7200, pollTimeoutSecs=7200, retryDelaySecs=5)
9899
glmTime = time.time() - glmStart
100+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
101+
os.system(cmd)
99102
#glm = h2o.nodes[0].inspect("GLM("+f+")")
100103
row.update( {'glm2BuildTime' : glmTime,
101104
#'AverageErrorOver10Folds' : glm['glm_model']['validations'][0]['err'],
@@ -141,6 +144,7 @@ def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row
141144
dat = sys.argv.pop(-1)
142145
debug = sys.argv.pop(-1)
143146
build = sys.argv.pop(-1)
147+
json = sys.argv[-1].split('/')[-1]
144148
h2o.parse_our_args()
145149
h2o_hosts.build_cloud_with_hosts()
146150
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'

‎bench/BMscripts/glmBench.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#GLM bench
2-
import os, sys, time, csv, socket
2+
import os, sys, time, csv, socket, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -11,6 +11,7 @@
1111
}
1212
build = ""
1313
debug = False
14+
json = ""
1415
def doGLM(f, folderPath, family, link, lambda_, alpha, nfolds, y, x, testFilehex, row):
1516
debug = False
1617
bench = "bench"
@@ -101,6 +102,8 @@ def doGLM(f, folderPath, family, link, lambda_, alpha, nfolds, y, x, testFilehex
101102
model_key = params['destination_key'],
102103
timeoutSecs = 1800)
103104
scoreTime = time.time() - glmScoreStart
105+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
106+
os.system(cmd)
104107
if family == "binomial":
105108
row.update( {'scoreTime' : scoreTime,
106109
'AUC' : glmScore['validation']['auc'],
@@ -121,6 +124,7 @@ def doGLM(f, folderPath, family, link, lambda_, alpha, nfolds, y, x, testFilehex
121124
dat = sys.argv.pop(-1)
122125
debug = sys.argv.pop(-1)
123126
build = sys.argv.pop(-1)
127+
json = sys.argv[-1].split('/')[-1]
124128
h2o.parse_our_args()
125129
h2o_hosts.build_cloud_with_hosts()
126130
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'

‎bench/BMscripts/kmeansBench.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#KMeans bench
2-
import os, sys, time, csv
2+
import os, sys, time, csv, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -11,6 +11,7 @@
1111
}
1212
build = ""
1313
debug = False
14+
json = ""
1415
def doKMeans(f, folderPath):
1516
debug = False
1617
bench = "bench"
@@ -90,6 +91,8 @@ def doKMeans(f, folderPath):
9091
timeoutSecs=7200,
9192
**kwargs)
9293
kmeansTime = time.time() - kmeansStart
94+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
95+
os.system(cmd)
9396
row.update({'kmeansBuildTime' : kmeansTime})
9497
csvWrt.writerow(row)
9598
finally:
@@ -99,6 +102,7 @@ def doKMeans(f, folderPath):
99102
dat = sys.argv.pop(-1)
100103
debug = sys.argv.pop(-1)
101104
build = sys.argv.pop(-1)
105+
json = sys.argv[-1].split('/')[-1]
102106
h2o.parse_our_args()
103107
h2o_hosts.build_cloud_with_hosts()
104108
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'

‎bench/BMscripts/pcaBench.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#PCA bench
2-
import os, sys, time, csv
2+
import os, sys, time, csv, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_util
@@ -12,6 +12,7 @@
1212
}
1313
build = ""
1414
debug = False
15+
json = ""
1516
def doPCA(f, folderPath):
1617
debug = False
1718
bench = "bench"
@@ -84,7 +85,9 @@ def doPCA(f, folderPath):
8485
timeoutSecs = 7200,
8586
**kwargs)
8687
pcaTime = time.time() - pcaStart
87-
88+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
89+
#stop all loggers
90+
os.system(cmd)
8891
row.update({'pcaBuildTime' : pcaTime})
8992
csvWrt.writerow(row)
9093
finally:
@@ -94,6 +97,7 @@ def doPCA(f, folderPath):
9497
dat = sys.argv.pop(-1)
9598
debug = sys.argv.pop(-1)
9699
build = sys.argv.pop(-1)
100+
json = sys.argv[-1].split('/')[-1]
97101
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'
98102
h2o.parse_our_args()
99103
h2o_hosts.build_cloud_with_hosts()

‎bench/BMscripts/summaryBench.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#summary bench
2-
import os, sys, time, csv
2+
import os, sys, time, csv, string
33
sys.path.append('../py/')
44
sys.path.extend(['.','..'])
55
import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_util
@@ -12,6 +12,7 @@
1212
}
1313
build = ""
1414
debug = False
15+
json = ""
1516
def doSUM(f, folderPath):
1617
debug = False
1718
bench = "bench"
@@ -78,7 +79,8 @@ def doSUM(f, folderPath):
7879
timeoutSecs = 7200)
7980

8081
sumTime = time.time() - sumStart
81-
82+
cmd = 'cd ..; bash startloggers.sh ' + json + ' stop_'
83+
os.system(cmd)
8284
row.update({'summaryBuildTime' : sumTime})
8385
csvWrt.writerow(row)
8486
finally:
@@ -88,6 +90,7 @@ def doSUM(f, folderPath):
8890
dat = sys.argv.pop(-1)
8991
debug = sys.argv.pop(-1)
9092
build = sys.argv.pop(-1)
93+
json = sys.argv[-1].split('/')[-1]
9194
fp = 'Airlines' if 'Air' in dat else 'AllBedrooms'
9295
h2o.parse_our_args()
9396
h2o_hosts.build_cloud_with_hosts()

‎bench/runBench.sh

+7-7
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,26 @@ function doAlgo {
3636
then
3737
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air1x; wait; makeDead 2> /dev/null;
3838
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air1x sandbox/; wait; rm -rf sandbox/;
39-
bash startloggers.sh ${JSON} clear_; wait;
39+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
4040
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air10x; wait; makeDead 2> /dev/null;
4141
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air10x sandbox/; wait; rm -rf sandbox/;
42-
if [ $2 = "gbm" ]
42+
if [ $1 = "gbm" ]
4343
then
4444
continue
4545
fi
46-
bash startloggers.sh ${JSON} clear_; wait;
46+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
4747
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB1x; wait; makeDead 2> /dev/null;
4848
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB1x sandbox/; wait; rm -rf sandbox/;
49-
bash startloggers.sh ${JSON} clear_; wait;
49+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
5050
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB10x; wait; makeDead 2> /dev/null;
5151
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB10x sandbox/; wait; rm -rf sandbox/;
52-
bash startloggers.sh ${JSON} clear_; wait;
52+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
5353
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False AllB100x; wait; makeDead 2> /dev/null;
5454
zip -r ${archive}/${h2oBuild}-${DATE}-$1-AllB100x sandbox/; wait; rm -rf sandbox/;
55-
bash startloggers.sh ${JSON} clear_; wait;
55+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
5656
python ${pyScript} -cj BMscripts/${JSON} ${h2oBuild} False Air100x; wait; makeDead 2> /dev/null;
5757
zip -r ${archive}/${h2oBuild}-${DATE}-$1-Air100x sandbox/; wait; rm -rf sandbox/;
58-
bash startloggers.sh ${JSON} clear_; wait;
58+
bash startloggers.sh ${JSON} clear_; wait; bash startloggers.sh ${JSON} changePhase $1;
5959
else
6060
JSON2=161
6161
echo "Doing KMeans.. Using ${JSON2} config file..."

0 commit comments

Comments
 (0)
Please sign in to comment.