1
1
package h2o .samples ;
2
2
3
+ import hex .KMeansModel ;
4
+
3
5
import java .text .DecimalFormat ;
4
6
import java .util .Random ;
5
7
@@ -20,26 +22,28 @@ public static void main(String[] args) throws Exception {
20
22
// Starts a one node cluster
21
23
H2O .main (args );
22
24
23
- // Loads, unzip and parse a file. Data is distributed to other nodes in a round-robin way.
24
- Key key = TestUtil .loadAndParseFile ("test" , "smalldata/gaussian/sdss174052 .csv.gz " );
25
+ // Load and parse a file. Data is distributed to other nodes in a round-robin way
26
+ Key key = TestUtil .loadAndParseFile ("test.hex " , "lib/resources/datasets/gaussian .csv" );
25
27
26
28
// ValueArray is a distributed 2D array
27
29
ValueArray va = DKV .get (key ).get ();
28
30
29
- // Which columns to use, e.g. skip first for this dataset
30
- int [] cols = new int [va ._cols .length - 1 ];
31
+ // Which columns to use, use all in this case
32
+ int [] cols = new int [va ._cols .length ];
31
33
for ( int i = 0 ; i < cols .length ; i ++ )
32
- cols [i ] = i + 1 ;
34
+ cols [i ] = i ;
33
35
34
36
// Create k clusters as arrays of doubles
35
- int k = 3 ;
37
+ int k = 7 ;
36
38
double [][] clusters = new double [k ][va ._cols .length ];
37
39
38
- // Initialize first cluster to random row
40
+ // Initialize clusters to random rows
39
41
Random rand = new Random ();
40
- long row = Math .max (0 , (long ) (rand .nextDouble () * va ._numrows ) - 1 );
41
- for ( int i = 0 ; i < cols .length ; i ++ )
42
- clusters [0 ][i ] = va .datad (row , cols [i ]);
42
+ for ( int cluster = 0 ; cluster < clusters .length ; cluster ++ ) {
43
+ long row = Math .max (0 , (long ) (rand .nextDouble () * va ._numrows ) - 1 );
44
+ for ( int i = 0 ; i < cols .length ; i ++ )
45
+ clusters [cluster ][i ] = va .datad (row , cols [i ]);
46
+ }
43
47
44
48
// Iterate over the dataset and show error for each step
45
49
for ( int i = 0 ; i < 20 ; i ++ ) {
@@ -68,6 +72,13 @@ public static void main(String[] args) throws Exception {
68
72
System .out .print (df .format (clusters [cluster ][column ]) + ", " );
69
73
System .out .println ("" );
70
74
}
75
+
76
+ // Experimental: debug plot
77
+ KMeansModel model = new KMeansModel (Key .make ("test.kmeans" ), cols , key );
78
+ model ._clusters = clusters ;
79
+ UKV .put (model ._selfKey , model );
80
+ System .out .println ("Plot:" );
81
+ System .out .println ("http://127.0.0.1:54321/Plot.png?source_key=test.hex&cols=0%2C1&clusters=test.kmeans" );
71
82
}
72
83
}
73
84
0 commit comments