Skip to content

Commit

Permalink
Added new tests for KMeans using UCI wines data.
Browse files Browse the repository at this point in the history
  • Loading branch information
spennihana committed Jun 8, 2013
1 parent 3f1c027 commit 13eb262
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 0 deletions.
83 changes: 83 additions & 0 deletions py/testdir_multi_jvm/test_KMeans_winesPCA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os, json, unittest, time, shutil, sys, random
sys.path.extend(['.','..','py'])

import h2o, h2o_cmd, h2o_glm, h2o_hosts, h2o_kmeans
import h2o_browse as h2b, h2o_import as h2i

#uses the wines data from http://archive.ics.uci.edu/ml/datasets/Wine
#PCA performed to collect data into 2 rows.
#3 groups, small & easy


class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()

@classmethod
def setUpClass(cls):
global localhost
localhost = h2o.decide_if_localhost()
if (localhost):
h2o.build_cloud(2,java_heap_GB=5)
else:
h2o_hosts.build_cloud_with_hosts()

@classmethod
def tearDownClass(cls):
h2o.tear_down_cloud()

def test_KMeans_winesPCA(self):
if localhost:
csvFilenameList = [
#with winesPCA2.csv speciy cols = "1,2"
('winesPCA.csv', 480, 'cA'),
]
else:
# None is okay for key2
csvFilenameList = [
('winesPCA.csv', 480,'cA'),
# ('covtype200x.data', 1000,'cE'),
]

importFolderPath = '/home/ec2-user/h2o/smalldata'
h2i.setupImportFolder(None, importFolderPath)
for csvFilename, timeoutSecs, key2 in csvFilenameList:
csvPathname = importFolderPath + "/" + csvFilename
# creates csvFilename.hex from file in importFolder dir
start = time.time()
parseKey = h2i.parseImportFolderFile(None, csvFilename, importFolderPath,
timeoutSecs=2000, key2=key2, noise=('JStack', None))
print "parse end on ", csvPathname, 'took', time.time() - start, 'seconds'
h2o.check_sandbox_for_errors()

inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])
print "\n" + csvPathname, \
" num_rows:", "{:,}".format(inspect['num_rows']), \
" num_cols:", "{:,}".format(inspect['num_cols'])

kwargs = {
#appears not to take 'cols'?
'cols': None,
'epsilon': 1e-6,
'k': 3
}

start = time.time()
kmeans = h2o_cmd.runKMeansOnly(parseKey=parseKey, \
timeoutSecs=timeoutSecs, retryDelaySecs=2, pollTimeoutSecs=60, **kwargs)
elapsed = time.time() - start
print "kmeans end on ", csvPathname, 'took', elapsed, 'seconds.', \
"%d pct. of timeout" % ((elapsed/timeoutSecs) * 100)
h2o_kmeans.simpleCheckKMeans(self, kmeans, **kwargs)
centers = h2o_kmeans.bigCheckResults(self, kmeans, csvPathname, parseKey, 'd', **kwargs)
print "Expected centers: [-2.276318, -0.965151], with 59 rows."
print " [0.0388763, 1.63886039], with 71 rows."
print " [2.740469, -1.237816], with 48 rows."
model_key = kmeans['destination_key']
kmeansScoreResult = h2o.nodes[0].kmeans_score(
key = parseKey['destination_key'], model_key = model_key)
score = kmeansScoreResult['score']


if __name__ == '__main__':
h2o.unit_main()
178 changes: 178 additions & 0 deletions smalldata/winesPCA.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
-3.307420974,-1.439402253
-2.203249813,0.332455071
-2.509660695,-1.028250724
-3.74649719,-2.748618391
-1.00607049,-0.867384035
-3.041673726,-2.116430917
-2.442200515,-1.171545343
-2.053643789,-1.604437144
-2.503811346,-0.915488474
-2.745882383,-0.787217029
-3.469948375,-1.298669845
-1.749816878,-0.61025577
-2.107517287,-0.673805614
-3.448429214,-1.127449476
-4.300652282,-2.090079711
-2.298703833,-1.657875063
-2.165845683,-2.320758754
-1.893629475,-1.626779928
-3.532021672,-2.511259707
-2.078658558,-1.058153071
-3.115613762,-0.784683607
-1.083513606,-0.24106354
-2.528092634,0.091582278
-1.640361079,0.514826666
-1.756620656,0.31625681
-0.987294063,-0.938021293
-1.77028387,-0.684244961
-1.231948785,0.089554419
-2.182250468,-0.687629899
-2.249762671,-0.190923365
-2.493187044,-1.237343437
-2.669879637,-1.467733346
-1.623998013,-0.052556196
-1.897338697,-1.628466725
-1.406421181,-0.695971074
-1.898470874,-0.176213873
-1.380966689,-0.656787136
-1.119050703,-0.113788776
-1.49796891,0.767267636
-2.522684901,-1.79793023
-2.580815256,-0.777423286
-0.666601591,-0.16948285
-3.062168979,-1.15266742
-0.460908969,-0.329811773
-2.095440943,0.070809177
-1.132970204,-1.772108487
-2.718931185,-1.187983533
-2.813403002,-0.644440709
-2.004197251,-1.243521635
-2.69987528,-1.747039219
-3.205874088,-0.166522256
-2.85091773,-0.743182376
-3.495743283,-1.608197324
-2.218533165,-1.869893255
-2.140948465,-1.013891472
-2.462383396,-1.325269883
-2.733806175,-1.432507851
-2.167626307,-1.208789989
-3.130549248,-1.726708279
0.925969919,3.064840615
1.538141226,1.377557583
1.831084494,0.827649423
-0.030520739,1.259233996
-2.044494334,1.919617594
0.607965828,1.90269154
-0.897695547,0.761762633
-2.242182264,1.879291229
-0.182868177,2.420318686
0.810518651,0.219893693
-1.970063187,1.399335867
1.567793662,0.882493728
-1.653018841,0.954021019
0.723331955,1.060653425
-2.555019769,-0.259466626
-1.827412658,1.284255467
0.865551287,2.437226063
-0.368973574,2.147848153
1.453277523,1.379460477
-1.259378295,0.768681173
-0.375092282,1.024154387
-0.75992026,3.365559969
-1.031667756,1.446628971
0.493484695,2.374545219
2.531835081,0.087197384
-0.832970436,1.469525197
-0.785688283,2.020925733
0.804562581,2.227546749
0.556472882,2.366310351
1.111974296,1.797177569
0.554159612,2.650064521
1.345489817,2.112043648
1.560081802,1.847004344
1.927119438,1.555108682
-0.744565612,2.306425559
-0.954762094,2.217273771
-2.536709431,-0.168797864
0.54242248,0.367888776
-1.028149464,2.558352543
-2.245574925,1.428711158
-1.406249155,2.160098391
-0.795475851,2.370262577
0.547985924,2.286678198
0.160720367,1.161207694
0.657938975,2.672422602
-0.391250737,2.092828092
1.767513135,1.712457831
0.365237067,2.163251026
1.61611371,1.351770206
-0.082303615,2.299747276
-1.57383547,1.457921668
-1.416573265,1.4142173
0.277918778,1.925137507
1.299479291,0.761025552
0.455786148,2.263031866
0.492795729,1.933590622
-0.480718361,3.860892733
0.252177515,2.813555672
0.106926012,1.923496091
2.426168672,1.253604772
0.549539355,2.215910732
-0.737541413,1.40499335
-1.332562734,-0.252624308
1.173775915,0.662099138
0.461034485,0.616548969
-0.975721685,1.441504188
0.096537406,2.104062685
-0.038378884,1.263198777
1.592665782,1.204745133
0.478215926,1.933386808
1.787790331,1.147052407
1.323368592,-0.169909936
2.377793365,-0.373528925
2.928678651,-0.263119601
2.140772272,-0.367219071
2.363203176,0.458341882
3.055223151,-0.352418705
3.904738983,-0.154147687
3.925390335,-0.657831569
3.085572089,-0.347861484
2.36779237,-0.291159027
2.770996299,-0.285998107
2.280129313,-0.371460001
2.977235064,-0.487841765
2.368513406,-0.480976939
2.203649295,-1.15678934
2.618235277,-0.561576624
4.268597577,-0.647843475
3.572563599,-1.269122707
2.799167604,-1.566115962
2.891502751,-2.035315627
2.314208871,-2.349737747
2.542658413,-2.039529822
1.807442706,-1.523348762
2.752380507,-2.132915648
2.729451047,-0.408733283
3.59472857,-1.79731421
2.881697075,-1.919803079
3.382614128,-1.308186152
1.045233419,-3.505201937
1.605383688,-2.399868419
3.134289512,-0.736084638
2.233855458,-1.172158769
2.839663426,-0.554479845
2.59019044,-0.696002198
2.941003156,-1.550933966
3.520102479,-0.880044297
2.399342284,-2.585064019
2.920845372,-1.270861999
2.175276584,-2.071693314
2.374230368,-2.581385653
3.202583112,0.250542354
3.667572938,-0.845363176
2.458620325,-2.187627269
3.361043046,-2.21005484
2.594636692,-1.752286361
2.670306845,-2.753132874
2.380302543,-2.29088437
3.199732104,-2.761130747

0 comments on commit 13eb262

Please sign in to comment.