Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
dearirenelang committed May 9, 2014
2 parents 6aa67e0 + 15c93d2 commit d75c437
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 66 deletions.
79 changes: 79 additions & 0 deletions R/tests/testdir_munging/exec/runit_NOPASS_pub-668.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../../findNSourceUtils.R')

# library(h2o)
# library(testthat)
# h2o.setLogPath(getwd(), "Command")
# h2o.setLogPath(getwd(), "Error")
# h2o.startLogging()
# conn = h2o.init()


test.pub.668 <- function(conn) {

a_initial = as.data.frame(cbind(
c(0,0,1,0,0,1,0,0,0,0),
c(1,1,1,0,1,0,1,0,1,0),
c(1,0,1,0,1,0,1,0,0,1),
c(1,1,0,0,0,1,0,0,0,1),
c(1,1,1,0,1,0,0,0,1,1),
c(1,0,1,0,0,0,0,0,1,1),
c(1,1,1,0,0,0,1,1,1,0),
c(0,0,1,1,1,0,0,1,1,0),
c(0,1,1,1,1,0,0,1,1,0),
c(0,0,0,0,0,1,1,0,0,0)
))


a = a_initial
a.h2o <- as.h2o(conn, a_initial, key="r.hex")
d0 = apply(a.h2o, 2, sum)
d = ifelse(F, a.h2o[1,] , d0)
dd = ifelse(F, a[1,] , apply(a, 2, sum))
a.h2o.R = as.matrix(a.h2o)
a
a.h2o.R
expect_that(all(a == a.h2o.R), equals(T))
expect_that(all(d == dd), equals(T))


a = a_initial
a.h2o <- as.h2o(conn, a_initial, key="r.hex")
d0 = apply(a.h2o, 2, sum)
d = ifelse(F, a.h2o[1,] , apply(a.h2o, 2, sum))
dd = ifelse(F, a[1,] , apply(a, 2, sum))
a.h2o.R = as.matrix(a.h2o)
a
a.h2o.R
expect_that(all(a == a.h2o.R), equals(T))
expect_that(all(d == dd), equals(T))


a = a_initial
a.h2o <- as.h2o(conn, a_initial, key="r.hex")
d = ifelse(F, a.h2o[1,] , 0)
dd = ifelse(F, a[1,] , 0)
a.h2o.R = as.matrix(a.h2o)
a
a.h2o.R
expect_that(all(a == a.h2o.R), equals(T))
expect_that(all(d == dd), equals(T))


a = a_initial
a.h2o <- as.h2o(conn, a_initial, key="r.hex")
d = ifelse(FALSE, a.h2o[1,] , apply(a.h2o,2,sum)); g = ifelse(FALSE, 1.23<2.34 , min(1,2))
dd = ifelse(FALSE, a[1,] , apply(a,2,sum)); gg = ifelse(FALSE, 1.23<2.34, min(1,2))
a.h2o.R = as.matrix(a.h2o)
a
a.h2o.R
expect_that(all(a == a.h2o.R), equals(T))
expect_that(all(d == dd), equals(T))


testEnd()
}

doTest("Test for pub-668.", test.pub.668)

81 changes: 44 additions & 37 deletions R/tests/testdir_munging/exec/runit_pub-657.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,45 @@
library(h2o)
library(testthat)
conn = h2o.init()

a_initial = cbind(c(0,0,0,0), c(1,1,1,1))
a = a_initial

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = c(0)
a[,1] = c(0)
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = c(1)
a[,1] = c(1)
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = 0
a[,1] = 0
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = 1
a[,1] = 1
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

# a.h2o[,1] = c(0,0)
# Error in `[<-`(`*tmp*`, , 1, value = c(0, 0)) :
# value must be either a single number or a vector of length 4



# library(h2o)
# library(testthat)
# conn = h2o.init()

setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../../findNSourceUtils.R')

test.pub.657 <- function(conn) {

a_initial = cbind(c(0,0,0,0), c(1,1,1,1))
a = a_initial

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = c(0)
a[,1] = c(0)
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = c(1)
a[,1] = c(1)
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = 0
a[,1] = 0
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

a.h2o <- as.h2o(conn, a_initial, key="A.hex")
a.h2o[,1] = 1
a[,1] = 1
a.h2o.R = as.matrix(a.h2o)
expect_that(all(a == a.h2o.R), equals(T))

# a.h2o[,1] = c(0,0)
# Error in `[<-`(`*tmp*`, , 1, value = c(0, 0)) :
# value must be either a single number or a vector of length 4

testEnd()
}

doTest("Test for pub-657.", test.pub.657)

6 changes: 6 additions & 0 deletions h2o-perf/bench/py/h2oPerf/H2O.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,12 @@ def stop_remote(self):
pass
except OSError:
pass
try:
requests.get("http://" + self.ip + ":" + self.port + "/Shutdown.html", timeout=5)
except Exception, e:
print "Got Exception trying to shutdown H2O:"
print e
pass
print "Successfully shutdown h2o!"
self.pid = -1

Expand Down
2 changes: 1 addition & 1 deletion py/testdir_multi_jvm/test_GBM_params_rand2.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def tearDownClass(cls):
h2o.tear_down_cloud()

def test_GBM_params_rand2(self):
h2o.beta_features = False
h2o.beta_features = True
bucket = 'home-0xdiag-datasets'
modelKey = 'GBMModelKey'
files = [
Expand Down
47 changes: 35 additions & 12 deletions py/testdir_multi_jvm/test_NN2_mnist_multi.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest, time, sys, random, string
sys.path.extend(['.','..','py'])
import h2o, h2o_nn, h2o_cmd, h2o_hosts, h2o_import as h2i, h2o_jobs, h2o_browse as h2b
import h2o, h2o_nn, h2o_cmd, h2o_hosts, h2o_import as h2i, h2o_jobs, h2o_browse as h2b, h2o_gbm

class Basic(unittest.TestCase):
def tearDown(self):
Expand Down Expand Up @@ -29,7 +29,7 @@ def test_NN2_mnist_multi(self):
csvPathname_test = 'mnist/test.csv.gz'
hex_key = 'mnist_train.hex'
validation_key = 'mnist_test.hex'
timeoutSecs = 60
timeoutSecs = 90
parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname_train, schema='put', hex_key=hex_key, timeoutSecs=timeoutSecs)
parseResultV = h2i.import_parse(bucket='smalldata', path=csvPathname_test, schema='put', hex_key=validation_key, timeoutSecs=timeoutSecs)
inspect = h2o_cmd.runInspect(None, hex_key)
Expand Down Expand Up @@ -74,18 +74,41 @@ def test_NN2_mnist_multi(self):
nn = h2o_cmd.runDeepLearning(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
print "neural net end on ", csvPathname_train, " and ", csvPathname_test, 'took', time.time() - start, 'seconds'

### Now score using the model, and check the validation error
#### Now score using the model, and check the validation error
expectedErr = 0.04
relTol = 0.01
predict_key = 'Predict.hex'

kwargs = {
'source' : validation_key,
'max_rows': 0,
'response': response,
'ignored_cols': None, # this is not consistent with ignored_cols_by_name
'classification': 1,
'destination_key': 'score_' + identifier + '.hex',
'model': model_key,
'data_key': validation_key,
'destination_key': predict_key,
'model_key': model_key
}
nnScoreResult = h2o_cmd.runDeepLearningScore(key=parseResult['destination_key'], timeoutSecs=timeoutSecs, **kwargs)
h2o_nn.checkScoreResult(self, nnScoreResult, expectedErr, relTol, **kwargs)
predictResult = h2o_cmd.runPredict(timeoutSecs=timeoutSecs, **kwargs)
h2o_cmd.runInspect(key=predict_key, verbose=True)

kwargs = {
}

predictCMResult = h2o.nodes[0].predict_confusion_matrix(
actual=validation_key,
vactual=response,
predict=predict_key,
vpredict='predict',
timeoutSecs=timeoutSecs, **kwargs)

cm = predictCMResult['cm']

print h2o_gbm.pp_cm(cm)
actualErr = h2o_gbm.pp_cm_summary(cm)/100.

print "actual classification error:" + format(actualErr)
print "expected classification error:" + format(expectedErr)
if actualErr != expectedErr and abs((expectedErr - actualErr)/expectedErr) > relTol:
raise Exception("Scored classification error of %s is not within %s %% relative error of %s" %
(actualErr, float(relTol)*100, expectedErr))



if __name__ == '__main__':
h2o.unit_main()
4 changes: 2 additions & 2 deletions py/testdir_single_jvm/test_GLM2_many_cols.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def test_GLM2_many_cols(self):
'response': 'C' + str(y),
'max_iter': 10,
'n_folds': 1,
'alpha': 0.2,
'lambda': 1e-5,
'alpha': 0.0,
'lambda': 0.0,
}

start = time.time()
Expand Down
4 changes: 2 additions & 2 deletions py/testdir_single_jvm_fvec/test_exec2_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def setUpClass(cls):
if (localhost):
h2o.build_cloud(1, java_heap_GB=28)
else:
h2o_hosts.build_cloud_with_hosts(1)
h2o_hosts.build_cloud_with_hosts(1, java_heap_GB=40)

@classmethod
def tearDownClass(cls):
Expand All @@ -62,9 +62,9 @@ def test_exec2_sum(self):
if getpass.getuser()=='jenkins':
csvPathname = 'standard/billion_rows.csv.gz'
else:
csvPathname = '1B/reals_1000000x1000_15f.data'
csvPathname = '1B/reals_100000x1000_15f.data'
csvPathname = '1B/reals_1B_15f.data'
csvPathname = '1B/reals_1000000x1000_15f.data'

hex_key = 'r1'
parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='local',
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/water/Value.java
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ public Value(Key k, Iced pojo, byte be ) {
_key = k;
_pojo = pojo;
_type = (short)pojo.frozenType();
_mem = pojo.write(new AutoBuffer()).buf();
_mem = (pojo instanceof Chunk)?((Chunk)pojo).getBytes():pojo.write(new AutoBuffer()).buf();
_max = _mem.length;
// For the ICE backend, assume new values are not-yet-written.
// For HDFS & NFS backends, assume we from global data and preserve the
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/water/fvec/NewChunk.java
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,17 @@ protected void cancel_sparse(){
// Study this NewVector and determine an appropriate compression scheme.
// Return the data so compressed.
static final int MAX_FLOAT_MANTISSA = 0x7FFFFF;

Chunk compress() {
Chunk res = compress2();
// force everything to null after compress to free up the memory
_id = null;
_xs = null;
_ds = null;
_ls = null;
return res;
}
private final Chunk compress2() {
// Check for basic mode info: all missing or all strings or mixed stuff
byte mode = type();
if( mode==AppendableVec.NA ) // ALL NAs, nothing to do
Expand Down
20 changes: 12 additions & 8 deletions src/main/java/water/fvec/ParseDataset2.java
Original file line number Diff line number Diff line change
Expand Up @@ -535,20 +535,20 @@ else if(_chunk2Enum != uzpt._chunk2Enum) { // we're sharing global array!
}
}

private Enum [] enums(){
if(!_enums.containsKey(_eKey)){
Enum [] enums = new Enum[_setup._ncols];
private static Enum [] enums(Key eKey, int ncols){
if(!_enums.containsKey(eKey)){
Enum [] enums = new Enum[ncols];
for(int i = 0; i < enums.length; ++i)enums[i] = new Enum();
_enums.putIfAbsent(_eKey, enums);
_enums.putIfAbsent(eKey, enums);
}
return _enums.get(_eKey);
return _enums.get(eKey);
}
// ------------------------------------------------------------------------
// Zipped file; no parallel decompression; decompress into local chunks,
// parse local chunks; distribute chunks later.
private FVecDataOut streamParse( final InputStream is, final CustomParser.ParserSetup localSetup, int vecIdStart, int chunkStartIdx, ParseProgressMonitor pmon) throws IOException {
// All output into a fresh pile of NewChunks, one per column
FVecDataOut dout = new FVecDataOut(_vg, chunkStartIdx, localSetup._ncols, vecIdStart, enums());
FVecDataOut dout = new FVecDataOut(_vg, chunkStartIdx, localSetup._ncols, vecIdStart, enums(_eKey,localSetup._ncols));
CustomParser p = localSetup.parser();
// assume 2x inflation rate
if(localSetup._pType.parallelParseSupported)
Expand All @@ -562,12 +562,14 @@ private FVecDataOut streamParse( final InputStream is, final CustomParser.Parser
return dout;
}

private class DParse extends MRTask2<DParse> {
private static class DParse extends MRTask2<DParse> {
final CustomParser.ParserSetup _setup;
final int _vecIdStart;
final int _startChunkIdx; // for multifile parse, offset of the first chunk in the final dataset
final VectorGroup _vg;
FVecDataOut _dout;
final Key _eKey;
final Key _progress;
transient final MultiFileParseTask _outerMFPT;

DParse(VectorGroup vg, CustomParser.ParserSetup setup, int vecIdstart, int startChunkIdx, MultiFileParseTask mfpt) {
Expand All @@ -576,9 +578,11 @@ private class DParse extends MRTask2<DParse> {
_vecIdStart = vecIdstart;
_startChunkIdx = startChunkIdx;
_outerMFPT = mfpt;
_eKey = mfpt._eKey;
_progress = mfpt._progress;
}
@Override public void map( Chunk in ) {
Enum [] enums = enums();
Enum [] enums = enums(_eKey,_setup._ncols);
// Break out the input & output vectors before the parse loop
// The Parser
FVecDataIn din = new FVecDataIn(in);
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/hex/DeepLearningSpiralsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ public class DeepLearningSpiralsTest extends TestUtil {
CM.toASCII(sb);
double error = new ConfusionMatrix(CM.cm).err();
Log.info(sb);
if (error >= 0.01) {
Assert.fail("Classification error is not less than 0.01, but " + error + ".");
if (error >= 0.02) {
Assert.fail("Classification error is not less than 0.02, but " + error + ".");
}
pred.delete();
mymodel.delete();
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/water/fvec/NewVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ private void testImpl( long[] ls, int[] xs, int [] id, Class C, boolean hasFloat
nv._ls = new long[]{0,0,0,0}; // A 4-row chunk
nv._xs = new int []{0,0,0,0};
nv._len= nv._len2 = nv._ls.length;
long [] ls = nv._ls;
nv.close(0,null);
Vec vec = av.close(new Futures());
assertEquals( nv._len2, vec.length() );
Expand All @@ -102,7 +103,7 @@ private void testImpl( long[] ls, int[] xs, int [] id, Class C, boolean hasFloat
assertTrue( "Found chunk class "+c0.getClass()+" but expected C0LChunk", c0 instanceof C0LChunk );
assertEquals( false, c0.hasFloat() );
// Also, we can decompress correctly
for( int i=0; i<nv._ls.length; i++ )
for( int i=0; i<ls.length; i++ )
assertEquals(0, c0.at0(i), c0.at0(i)*EPSILON);

// Now write a zero into slot 0
Expand Down

0 comments on commit d75c437

Please sign in to comment.