Skip to content

Commit e3dc4fa

Browse files
author
cypof
committed
Samples: fixed new API, added debug plot to k-means
1 parent 62b18d5 commit e3dc4fa

File tree

11 files changed

+67
-39
lines changed

11 files changed

+67
-39
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ syn_datasets
2424
target/
2525
.settings/*
2626
cypof*
27-
typemap
27+
typemap
28+
/libpeerconnection.log

src/main/java/hex/Plot.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,21 @@ static class Pixels extends MRTask {
4646
Column cY = va._cols[_cols[1]];
4747
double x = va.datad(bits, row, cX);
4848
double y = va.datad(bits, row, cY);
49-
int iX = scale(x, cX, _width);;
49+
int iX = scale(x, cX, _width);
5050
int iY = scale(y, cY, _height);
5151
int value = _pixels[iY * _width + iX] & 0xff;
5252
value = value == 0xff ? value : value + 1;
53-
_pixels[iY * _width + iX] = (byte) value;
53+
dot(iX, iY, value);
5454
}
5555
}
5656

57+
private void dot(int iX, int iY, int value) {
58+
int r = 2;
59+
for( int y = Math.max(0, iY - r); y < Math.min(_height, iY + r); y++ )
60+
for( int x = Math.max(0, iX - r); x < Math.min(_width, iX + r); x++ )
61+
_pixels[y * _width + x] = (byte) value;
62+
}
63+
5764
@Override public void reduce(DRemoteTask rt) {
5865
Pixels task = (Pixels) rt;
5966

src/main/java/water/api/Plot.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public NanoHTTPD.Response serve(NanoHTTPD server, Properties args, RequestType t
6666
pixel[0] = 255;
6767
pixel[1] = 0;
6868
pixel[2] = 0;
69-
int r = 2;
69+
int r = 3;
7070
for( int y = Math.max(0, y0 - r); y < Math.min(height, y0 + r); y++ )
7171
for( int x = Math.max(0, x0 - r); x < Math.min(width, x0 + r); x++ )
7272
raster.setPixel(x, y, pixel);

src/main/java/water/fvec/C4Chunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class C4Chunk extends Chunk {
88
@Override public long get ( int i ) { return UDP.get4(_mem,i<<2); }
99
@Override public double getd( int i ) { return UDP.get4(_mem,i<<2); }
1010
@Override void append2 ( long l, int exp ) { throw H2O.fail(); }
11-
@Override public AutoBuffer write(AutoBuffer bb) { throw H2O.fail(); }
11+
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem,_mem.length); }
1212
@Override public C4Chunk read(AutoBuffer bb) {
1313
_mem = bb.bufClose();
1414
_start = -1;

src/main/java/water/fvec/C4FChunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class C4FChunk extends Chunk {
88
@Override public long get ( int i ) { return (long)UDP.get4f(_mem,i<<2); }
99
@Override public double getd( int i ) { return UDP.get4f(_mem,i<<2); }
1010
@Override void append2 ( long l, int exp ) { throw H2O.fail(); }
11-
@Override public AutoBuffer write(AutoBuffer bb) { throw H2O.fail(); }
11+
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem,_mem.length); }
1212
@Override public C4FChunk read(AutoBuffer bb) {
1313
_mem = bb.bufClose();
1414
_start = -1;

src/main/java/water/fvec/C8Chunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class C8Chunk extends Chunk {
88
@Override public long get ( int i ) { return UDP.get8(_mem,i<<3); }
99
@Override public double getd( int i ) { return UDP.get8(_mem,i<<3); }
1010
@Override void append2 ( long l, int exp ) { throw H2O.fail(); }
11-
@Override public AutoBuffer write(AutoBuffer bb) { throw H2O.fail(); }
11+
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem,_mem.length); }
1212
@Override public C8Chunk read(AutoBuffer bb) {
1313
_mem = bb.bufClose();
1414
_start = -1;

src/main/java/water/fvec/C8DChunk.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class C8DChunk extends Chunk {
88
@Override public long get ( int i ) { return (long)UDP.get8d(_mem,i<<3); }
99
@Override public double getd( int i ) { return UDP.get8d(_mem,i<<3); }
1010
@Override void append2 ( long l, int exp ) { throw H2O.fail(); }
11-
@Override public AutoBuffer write(AutoBuffer bb) { throw H2O.fail(); }
11+
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem,_mem.length); }
1212
@Override public C8DChunk read(AutoBuffer bb) {
1313
_mem = bb.bufClose();
1414
_start = -1;

src/main/java/water/fvec/Vec.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ Chunk nextBV( Chunk bv ) {
106106

107107
// Fetch element the slow way
108108
public long get( long i ) { return elem2BV(elem2ChunkIdx(i)).at(i); }
109-
public double getd( long i ) { throw H2O.unimpl(); }
109+
public double getd( long i ) { return elem2BV(elem2ChunkIdx(i)).atd(i); }
110110

111111
// [#elems, min/mean/max]
112112
@Override public String toString() {

src/samples/h2o/samples/Part05_KMeans.java

+21-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package h2o.samples;
22

3+
import hex.KMeansModel;
4+
35
import java.text.DecimalFormat;
46
import java.util.Random;
57

@@ -20,26 +22,28 @@ public static void main(String[] args) throws Exception {
2022
// Starts a one node cluster
2123
H2O.main(args);
2224

23-
// Loads, unzip and parse a file. Data is distributed to other nodes in a round-robin way.
24-
Key key = TestUtil.loadAndParseFile("test", "smalldata/gaussian/sdss174052.csv.gz");
25+
// Load and parse a file. Data is distributed to other nodes in a round-robin way
26+
Key key = TestUtil.loadAndParseFile("test.hex", "lib/resources/datasets/gaussian.csv");
2527

2628
// ValueArray is a distributed 2D array
2729
ValueArray va = DKV.get(key).get();
2830

29-
// Which columns to use, e.g. skip first for this dataset
30-
int[] cols = new int[va._cols.length - 1];
31+
// Which columns to use, use all in this case
32+
int[] cols = new int[va._cols.length];
3133
for( int i = 0; i < cols.length; i++ )
32-
cols[i] = i + 1;
34+
cols[i] = i;
3335

3436
// Create k clusters as arrays of doubles
35-
int k = 3;
37+
int k = 7;
3638
double[][] clusters = new double[k][va._cols.length];
3739

38-
// Initialize first cluster to random row
40+
// Initialize clusters to random rows
3941
Random rand = new Random();
40-
long row = Math.max(0, (long) (rand.nextDouble() * va._numrows) - 1);
41-
for( int i = 0; i < cols.length; i++ )
42-
clusters[0][i] = va.datad(row, cols[i]);
42+
for( int cluster = 0; cluster < clusters.length; cluster++ ) {
43+
long row = Math.max(0, (long) (rand.nextDouble() * va._numrows) - 1);
44+
for( int i = 0; i < cols.length; i++ )
45+
clusters[cluster][i] = va.datad(row, cols[i]);
46+
}
4347

4448
// Iterate over the dataset and show error for each step
4549
for( int i = 0; i < 20; i++ ) {
@@ -68,6 +72,13 @@ public static void main(String[] args) throws Exception {
6872
System.out.print(df.format(clusters[cluster][column]) + ", ");
6973
System.out.println("");
7074
}
75+
76+
// Experimental: debug plot
77+
KMeansModel model = new KMeansModel(Key.make("test.kmeans"), cols, key);
78+
model._clusters = clusters;
79+
UKV.put(model._selfKey, model);
80+
System.out.println("Plot:");
81+
System.out.println("http://127.0.0.1:54321/Plot.png?source_key=test.hex&cols=0%2C1&clusters=test.kmeans");
7182
}
7283
}
7384

src/samples/h2o/samples/Part05_KMeansNewAPI.java

+10-9
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,24 @@ public static class UserMain {
2222
public static void main(String[] args) throws Exception {
2323
H2O.main(args);
2424

25-
// Parse file
26-
Key file = NFSFileVec.make(new File("smalldata/gaussian/sdss174052.csv.gz"));
25+
// Load and parse a file. Data is distributed to other nodes in a round-robin way
26+
Key file = NFSFileVec.make(new File("lib/resources/datasets/gaussian.csv"));
2727
Frame frame = ParseDataset2.parse(Key.make("test"), new Key[] { file });
2828

29-
// Create a frame with only the columns to use, e.g. skip first for this dataset
29+
// Optionally create a frame with less columns, e.g. skip first
3030
frame = new Frame(null, Utils.remove(frame._names, 0), Utils.remove(frame._vecs, 0));
3131

3232
// Create k clusters as arrays of doubles
33-
int k = 3;
33+
int k = 7;
3434
double[][] clusters = new double[k][frame.vecs().length];
3535

3636
// Initialize first cluster to random row
3737
Random rand = new Random();
38-
long row = Math.max(0, (long) (rand.nextDouble() * frame.vecs().length) - 1);
39-
for( int i = 0; i < frame.vecs().length; i++ )
40-
clusters[0][i] = frame.vecs()[i].getd(row);
38+
for( int cluster = 0; cluster < clusters.length; cluster++ ) {
39+
long row = Math.max(0, (long) (rand.nextDouble() * frame.vecs().length) - 1);
40+
for( int i = 0; i < frame.vecs().length; i++ )
41+
clusters[cluster][i] = frame.vecs()[i].getd(row);
42+
}
4143

4244
// Iterate over the dataset and show error for each step
4345
for( int i = 0; i < 10; i++ ) {
@@ -56,9 +58,8 @@ public static void main(String[] args) throws Exception {
5658
System.out.println("Error is " + task._error);
5759
}
5860

59-
DecimalFormat df = new DecimalFormat("#.00");
60-
6161
System.out.println("Clusters:");
62+
DecimalFormat df = new DecimalFormat("#.00");
6263
for( int c = 0; c < clusters.length; c++ ) {
6364
for( int v = 0; v < frame.vecs().length; v++ )
6465
System.out.print(df.format(clusters[c][v]) + ", ");

src/test/java/water/Sandbox.java

+19-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package water;
22

3+
import hex.KMeans;
4+
import hex.KMeansModel;
5+
36
import java.io.File;
47

58
import org.apache.commons.lang.ArrayUtils;
@@ -14,7 +17,7 @@ public static void main(String[] args) throws Exception {
1417

1518
//File f = new File("py/testdir_multi_jvm/syn_datasets/syn_spheres100.csv");
1619
//File f = new File("smalldata/covtype/covtype.20k.data");
17-
File f = new File("smalldata/allstate/claim_prediction_train_set_10000.csv.gz");
20+
File f = new File("lib/resources/datasets/gaussian.csv");
1821
//File f = new File("smalldata/airlines/allyears2k.zip");
1922
// // File f = new File("../../aaaa/datasets/millionx7_logreg.data.gz");
2023
// // File f = new File("smalldata/test/rmodels/iris_x-iris-1-4_y-species_ntree-500.rdata");
@@ -30,17 +33,22 @@ public static void main(String[] args) throws Exception {
3033
// double[][] array = KMeansTest.gauss(columns, 10000, goals);
3134
// ValueArray va = TestUtil.va_maker(key, (Object[]) array);
3235

33-
// Key km = Key.make("test.kmeans");
34-
// int[] cols = new int[va._cols.length];
35-
// for( int i = 0; i < cols.length; i++ )
36-
// cols[i] = i;
37-
// for( int i = 0; i < 100; i++ ) {
38-
// KMeans job = KMeans.start(km, va, 5, 1e-3, 0, i, false, cols);
39-
// KMeansModel m = job.get();
40-
// System.out.println(m._error);
41-
// }
36+
Key km = Key.make("test.kmeans");
37+
int[] cols = new int[va._cols.length];
38+
for( int i = 0; i < cols.length; i++ )
39+
cols[i] = i;
40+
for( int i = 0; i < 1; i++ ) {
41+
KMeans job = KMeans.start(km, va, 7, 1e-3, 0, i, false, cols);
42+
KMeansModel m = job.get();
43+
System.out.println(m._error);
44+
}
4245

43-
// String u = "/Plot.png?source_key=test.hex&cols=0%2C1&clusters=test.kmeans";
46+
// String s = "";
47+
// for( int i = 0; i < cols.length; i++ ) {
48+
// s += s.length() != 0 ? "%2C" : "";
49+
// s += cols[i];
50+
// }
51+
// String u = "/Plot.png?source_key=test.hex&cols=" + s + "&clusters=test.kmeans";
4452
// Desktop.getDesktop().browse(new URI("http://localhost:54321" + u));
4553
}
4654

0 commit comments

Comments
 (0)