2
2
import random , sys , time , os
3
3
sys .path .extend (['.' ,'..' ,'../..' ,'py' ])
4
4
import h2o , h2o_cmd , h2o_browse as h2b , h2o_import as h2i , h2o_kmeans
5
-
6
5
class Basic (unittest .TestCase ):
7
6
def tearDown (self ):
8
7
h2o .check_sandbox_for_errors ()
@@ -16,10 +15,41 @@ def setUpClass(cls):
16
15
@classmethod
17
16
def tearDownClass (cls ):
18
17
# wait while I inspect things
19
- # time .sleep(1500)
18
+ # h2o .sleep(1500)
20
19
h2o .tear_down_cloud ()
21
20
22
21
def test_KMeans_libsvm_fvec (self ):
22
+
23
+ # hack this into a function so we can call it before and after kmeans
24
+ # kmeans is changing the last col to enum?? (and changing the data)
25
+ def do_summary_and_inspect ():
26
+ # SUMMARY******************************************
27
+ summaryResult = h2o_cmd .runSummary (key = hex_key )
28
+ coltypeList = h2o_cmd .infoFromSummary (summaryResult )
29
+
30
+ # INSPECT******************************************
31
+ inspect = h2o_cmd .runInspect (None , parseResult ['destination_key' ], timeoutSecs = 360 )
32
+ h2o_cmd .infoFromInspect (inspect , csvFilename )
33
+
34
+ numRows = inspect ['numRows' ]
35
+ numCols = inspect ['numCols' ]
36
+
37
+ # Now check both inspect and summary
38
+ if csvFilename == 'covtype.binary.svm' :
39
+ for k in range (55 ):
40
+ naCnt = inspect ['cols' ][k ]['naCnt' ]
41
+ self .assertEqual (0 , naCnt , msg = 'col %s naCnt %d should be %s' % (k , naCnt , 0 ))
42
+ stype = inspect ['cols' ][k ]['type' ]
43
+ print k , stype
44
+ self .assertEqual ('Int' , stype , msg = 'col %s type %s should be %s' % (k , stype , 'Int' ))
45
+
46
+ # summary may report type differently than inspect..check it too!
47
+ # we could check na here too
48
+ for i ,c in enumerate (coltypeList ):
49
+ print "column index: %s column type: %s" % (i , c )
50
+ # inspect says 'int?"
51
+ assert c == 'Numeric' , "All cols in covtype.binary.svm should be parsed as Numeric! %s %s" % (i ,c )
52
+
23
53
# just do the import folder once
24
54
# make the timeout variable per dataset. it can be 10 secs for covtype 20x (col key creation)
25
55
# so probably 10x that for covtype200
@@ -42,8 +72,12 @@ def test_KMeans_libsvm_fvec(self):
42
72
("syn_0_100_1000.svm" , "cL" , 30 , 1 ),
43
73
]
44
74
75
+ csvFilenameList = [
76
+ ("covtype.binary.svm" , "cC" , 30 , 1 ),
77
+ ]
78
+
45
79
### csvFilenameList = random.sample(csvFilenameAll,1)
46
- # h2b.browseTheCloud()
80
+ h2b .browseTheCloud ()
47
81
lenNodes = len (h2o .nodes )
48
82
49
83
firstDone = False
@@ -55,16 +89,9 @@ def test_KMeans_libsvm_fvec(self):
55
89
# PARSE******************************************
56
90
# creates csvFilename.hex from file in importFolder dir
57
91
parseResult = h2i .import_parse (bucket = 'home-0xdiag-datasets' , path = csvPathname ,
58
- hex_key = hex_key , timeoutSecs = 2000 )
59
- print "Parse result['destination_key']:" , parseResult ['destination_key' ]
92
+ hex_key = hex_key , timeoutSecs = 2000 , doSummary = False )
60
93
61
- # INSPECT******************************************
62
- start = time .time ()
63
- inspect = h2o_cmd .runInspect (None , parseResult ['destination_key' ], timeoutSecs = 360 )
64
- print "Inspect:" , parseResult ['destination_key' ], "took" , time .time () - start , "seconds"
65
- h2o_cmd .infoFromInspect (inspect , csvFilename )
66
- numRows = inspect ['numRows' ]
67
- numCols = inspect ['numCols' ]
94
+ do_summary_and_inspect ()
68
95
69
96
# KMEANS******************************************
70
97
for trial in range (1 ):
@@ -87,12 +114,16 @@ def test_KMeans_libsvm_fvec(self):
87
114
elapsed = time .time () - start
88
115
print "kmeans end on " , csvPathname , 'took' , elapsed , 'seconds.' , \
89
116
"%d pct. of timeout" % ((elapsed / timeoutSecs ) * 100 )
117
+
118
+ do_summary_and_inspect ()
119
+
90
120
# this does an inspect of the model and prints the clusters
91
121
h2o_kmeans .simpleCheckKMeans (self , kmeans , ** kwargs )
92
122
123
+ print "hello"
93
124
(centers , tupleResultList ) = h2o_kmeans .bigCheckResults (self , kmeans , csvPathname , parseResult , 'd' , ** kwargs )
94
125
95
-
126
+ do_summary_and_inspect ()
96
127
97
128
if __name__ == '__main__' :
98
129
h2o .unit_main ()
0 commit comments