Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
spennihana committed Aug 6, 2014
2 parents cd4c087 + 8b06fbd commit 1770d50
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 16 deletions.
2 changes: 1 addition & 1 deletion prj.el
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
'(jde-run-option-debug nil)
'(jde-run-option-vm-args (quote ("-XX:+PrintGC")))
'(jde-compile-option-directory "./target/classes")
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "water.MRThrow" "water.exec.DdplyTest" "water.TestKeySnapshot")))
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "hex.drf.DRFTest")))
'(jde-debugger (quote ("JDEbug")))
'(jde-compile-option-source (quote ("1.6")))
'(jde-compile-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/commons-math3-3.2.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar" "./lib/joda/joda-time-2.3.jar")))
Expand Down
18 changes: 11 additions & 7 deletions py/testdir_multi_jvm/test_many_fp_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,16 @@ def test_many_fp_formats(self):
h2o.beta_features = True
SYNDATASETS_DIR = h2o.make_syn_dir()
tryList = [
# (100, 100, 'cB', 180),
(100000, 10, 'cA', 180),
(100, 1000, 'cB', 180),
# (100, 900, 'cC', 30),
# (100, 500, 'cD', 30),
# (100, 100, 'cE', 30),
]

for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
NUM_CASES = h2o_util.fp_format()
print "Will do %s" % NUM_CASES
for sel in range(NUM_CASES): # len(caseList)
SEEDPERFILE = random.randint(0, sys.maxint)
csvFilename = "syn_%s_%s_%s_%s.csv" % (SEEDPERFILE, sel, rowCount, colCount)
Expand All @@ -62,12 +63,15 @@ def test_many_fp_formats(self):
print "Creating random", csvPathname
write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE, sel)

selKey2 = hex_key + "_" + str(sel)
parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=selKey2,
timeoutSecs=timeoutSecs)
print "Parse result['destination_key']:", parseResult['destination_key']
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
print "\n" + csvFilename
hex_key = hex_key + "_" + str(sel)
parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key,
timeoutSecs=timeoutSecs, doSummary=False)
h2o_cmd.runSummary(key=parseResult['destination_key'], max_qbins=100)

print "Parse result['destination_key']:", hex_key
inspect = h2o_cmd.runInspect(None, hex_key)
print "Removing", hex_key
h2o.nodes[0].remove_key(hex_key)

# if not h2o.browse_disable:
# h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
Expand Down
35 changes: 35 additions & 0 deletions smalldata/test/drf_infinitys.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FirstNameMiddleInitialsLastNameDateofBirth
00.510
3NaN41
680
0.60.70.81
+0.6+0.7+0.80
-0.6-0.7-0.81
.6.7.80
+.6+.7+.81
-.6-.7-.80
+0.6e0+0.7e0+0.8e01
-0.6e0-0.7e0-0.8e00
.6e0.7e0.8e01
+.6e0+.7e0+.8e00
-.6e0-.7e0-.8e01
+0.6e00+0.7e00+0.8e000
-0.6e00-0.7e00-0.8e001
.6e00.7e00.8e000
+.6e00+.7e00+.8e001
-.6e00-.7e00-.8e000
+0.6e-01+0.7e-01+0.8e-011
-0.6e-01-0.7e-01-0.8e-010
.6e-01.7e-01.8e-011
+.6e-01+.7e-01+.8e-010
-.6e-01-.7e-01-.8e-011
+0.6e+01+0.7e+01+0.8e+010
-0.6e+01-0.7e+01-0.8e+011
.6e+01.7e+01.8e+010
+.6e+01+.7e+01+.8e+011
-.6e+01-.7e+01-.8e+010
+0.6e102+0.7e102+0.8e1021
-0.6e102-0.7e102-0.8e1020
.6e102.7e102.8e1021
+.6e102+.7e102+.8e1020
-.6e102-.7e102-.8e1021
6 changes: 3 additions & 3 deletions src/main/java/hex/gbm/DHistogram.java
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,11 @@ static public DHistogram[] initialHist(Frame fr, int ncols, int nbins, DHistogra
Vec vecs[] = fr.vecs();
for( int c=0; c<ncols; c++ ) {
Vec v = vecs[c];
final float minIn = (float)v.min(); // inclusive vector min
final float maxIn = (float)v.max(); // inclusive vector max
final float minIn = (float)Math.max(v.min(),-Float.MAX_VALUE); // inclusive vector min
final float maxIn = (float)Math.min(v.max(), Float.MAX_VALUE); // inclusive vector max
final float maxEx = find_maxEx(maxIn,v.isInt()?1:0); // smallest exclusive max
final long vlen = v.length();
hs[c] = v.naCnt()==vlen || v.min()==v.max() || Float.isInfinite(minIn) || Float.isInfinite(maxIn) ? null :
hs[c] = v.naCnt()==vlen || v.min()==v.max() ? null :
make(fr._names[c],nbins,(byte)(v.isEnum() ? 2 : (v.isInt()?1:0)),minIn,maxEx,vlen,doGrpSplit,isBinom);
}
return hs;
Expand Down
94 changes: 94 additions & 0 deletions src/test/java/hex/KMeans2RandomTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package hex;

import junit.framework.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import water.*;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.ParseDataset2;
import water.util.Log;

import java.util.Random;

public class KMeans2RandomTest extends TestUtil {
@BeforeClass public static void stall() {
stall_till_cloudsize(JUnitRunnerDebug.NODES);
}

@Test
@Ignore //currently fails
public void run() {
long seed = 0xDECAF;
Random rng = new Random(seed);
String[] datasets = new String[2];
int[][] responses = new int[datasets.length][];
datasets[0] = "smalldata/./logreg/prostate.csv";
responses[0] = new int[]{1, 2, 8}; //CAPSULE (binomial), AGE (regression), GLEASON (multi-class)
datasets[1] = "smalldata/iris/iris.csv";
responses[1] = new int[]{4}; //Iris-type (multi-class)


int testcount = 0;
int count = 0;
for (int i = 0; i < datasets.length; ++i) {
String dataset = datasets[i];
Key file = NFSFileVec.make(find_test_file(dataset));
Frame frame = ParseDataset2.parse(Key.make(), new Key[]{file});
Key vfile = NFSFileVec.make(find_test_file(dataset));
Frame vframe = ParseDataset2.parse(Key.make(), new Key[]{vfile});

for (int clusters : new int[]{1,10}) {
for (int max_iter : new int[]{1,10,100}) {
for (boolean normalize : new boolean[]{false, true}) {
for (boolean drop_na_cols : new boolean[]{false, true}) {
for (KMeans2.Initialization init : new KMeans2.Initialization[]{
KMeans2.Initialization.Furthest,
KMeans2.Initialization.None,
KMeans2.Initialization.PlusPlus}) {
count++;

KMeans2 k = new KMeans2();
k.k = clusters;
k.initialization = init;
k.destination_key = Key.make();
k.seed = 0xC0FFEE;
k.source = frame;
k.max_iter = max_iter;
k.normalize = normalize;
k.drop_na_cols = drop_na_cols;
k.invoke();

KMeans2.KMeans2Model m = UKV.get(k.dest());
for (double d : m.between_cluster_variances) Assert.assertFalse(Double.isNaN(d));
for (double d : m.within_cluster_variances) Assert.assertFalse(Double.isNaN(d));
Assert.assertFalse(Double.isNaN(m.between_cluster_SS));
Assert.assertFalse(Double.isNaN(m.total_SS));
Assert.assertFalse(Double.isNaN(m.total_within_SS));
for (long o : m.size) Assert.assertTrue(o > 0); //have at least one point per centroid
for (double[] dc : m.centers) for (double d : dc) Assert.assertFalse(Double.isNaN(d));

// make prediction (cluster assignment)
Frame score = m.score(frame);
for (long j=0; j<score.numRows(); ++j) org.junit.Assert.assertTrue(score.anyVec().at8(j) >= 0 && score.anyVec().at8(j) < clusters);
score.delete();

Log.info("Parameters combination " + count + ": PASS");
testcount++;

m.delete();
}
}
}
}
}

frame.delete();
vframe.delete();
}
Log.info("\n\n=============================================");
Log.info("Tested " + testcount + " out of " + count + " parameter combinations.");
Log.info("=============================================");
}
}
13 changes: 11 additions & 2 deletions src/test/java/hex/drf/DRFTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ abstract static class PrepData { abstract int prep(Frame fr); }
s("3", "4", "5", "6", "8"));
}

@Test
public void testConstantCols() throws Throwable {
@Test public void testConstantCols() throws Throwable {
try {
basicDRFTestOOBE(
"./smalldata/poker/poker100","poker.hex",
Expand All @@ -97,6 +96,16 @@ public void testConstantCols() throws Throwable {
} catch( IllegalArgumentException iae ) { /*pass*/ }
}

@Test public void testBadData() throws Throwable {
basicDRFTestOOBE(
"./smalldata/test/drf_infinitys.csv","infinitys.hex",
new PrepData() { @Override int prep(Frame fr) { return fr.find("DateofBirth"); } },
1,
a( a(6, 0),
a(7, 0)),
s("0", "1"));
}

//@Test
public void testCreditSample1() throws Throwable {
basicDRFTestOOBE(
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/water/JUnitRunnerDebug.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.junit.runner.Result;
import org.junit.runner.notification.Failure;
import water.deploy.NodeCL;
import water.fvec.*;
import water.util.Log;
import water.util.Utils;

Expand Down Expand Up @@ -84,8 +83,9 @@ public static void userMain(String[] args) {
// tests.add(MRUtilsTest.class); //test MR sampling/rebalancing
// tests.add(DropoutTest.class); //test NN Dropput

tests.add(ParserTest2.class);
tests.add(ParserTest2.ParseAllSmalldata.class);
// tests.add(ParserTest2.class);
// tests.add(ParserTest2.ParseAllSmalldata.class);
tests.add(KMeans2RandomTest.class);
// tests.add(GLMRandomTest.Short.class);
// tests.add(SpeeDRFTest.class);
// tests.add(SpeeDRFTest2.class);
Expand Down

0 comments on commit 1770d50

Please sign in to comment.