Skip to content

Commit

Permalink
Merge branch 'master' of github.com:0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffclick committed Jun 23, 2013
2 parents 4469e13 + 72cacc4 commit c34f5aa
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 15 deletions.
Binary file modified docs/h2oOnHadoopWhitepaper.docx
Binary file not shown.
Binary file modified docs/h2oOnHadoopWhitepaper.pdf
Binary file not shown.
Binary file modified docs/pictures/GLMAlgoMem.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/pictures/GLMAlgoMem.vsd
Binary file not shown.
11 changes: 5 additions & 6 deletions py/h2o.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def __url(self, loc, port=None):
u = 'http://%s:%d/%s' % (self.http_addr, port, loc)
return u

def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params={},
def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params=None,
cmd='get', extraComment=None, ignoreH2oError=False, **kwargs):
# if url param is used, use it as full url. otherwise crate from the jsonRequest
if fullUrl:
Expand All @@ -659,12 +659,11 @@ def __do_json_request(self, jsonRequest=None, fullUrl=None, timeout=10, params={

# remove any params that are 'None'
# need to copy dictionary, since can't delete while iterating
params2 = params.copy()
for k in params2:
if params2[k] is None:
del params[k]

if params is not None:
params2 = params.copy()
for k in params2:
if params2[k] is None:
del params[k]
paramsStr = '?' + '&'.join(['%s=%s' % (k,v) for (k,v) in params.items()])
else:
paramsStr = ''
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/hex/LR2.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ public static class CalcSumsTask extends MRTask2<CalcSumsTask> {
double _sumX,_sumY,_sumX2; // Sum of X's, Y's, X^2's
@Override public void map( Chunk xs, Chunk ys ) {
for( int i=0; i<xs._len; i++ ) {
double X = xs.getd(i);
double Y = ys.getd(i);
double X = xs.at0(i);
double Y = ys.at0(i);
if( !Double.isNaN(X) && !Double.isNaN(Y)) {
_sumX += X;
_sumY += Y;
Expand All @@ -77,8 +77,8 @@ public static class CalcSquareErrorsTasks extends MRTask2<CalcSquareErrorsTasks>
CalcSquareErrorsTasks( double meanX, double meanY ) { _meanX = meanX; _meanY = meanY; }
@Override public void map( Chunk xs, Chunk ys ) {
for( int i=0; i<xs._len; i++ ) {
double Xa = xs.getd(i);
double Ya = ys.getd(i);
double Xa = xs.at0(i);
double Ya = ys.at0(i);
if(!Double.isNaN(Xa) && !Double.isNaN(Ya)) {
Xa -= _meanX;
Ya -= _meanY;
Expand All @@ -103,7 +103,7 @@ public static class CalcRegressionTask extends MRTask2<CalcRegressionTask> {
CalcRegressionTask(double beta0, double beta1, double meanY) {_beta0=beta0; _beta1=beta1; _meanY=meanY;}
@Override public void map( Chunk xs, Chunk ys ) {
for( int i=0; i<xs._len; i++ ) {
double X = xs.getd(i); double Y = ys.getd(i);
double X = xs.at0(i); double Y = ys.at0(i);
if( !Double.isNaN(X) && !Double.isNaN(Y) ) {
double fit = _beta1*X + _beta0;
double rs = fit-Y;
Expand Down
66 changes: 66 additions & 0 deletions src/main/java/water/H2O.java
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,64 @@ public static class OptArgs extends Arguments.Opt {
public String random_udp_drop = null; // test only, randomly drop udp incoming
public int pparse_limit = Integer.MAX_VALUE;
public String no_requests_log = null; // disable logging of Web requests
public String h = null;
public String help = null;
}

public static void printHelp() {
String s =
"Start an H2O node.\n" +
"\n" +
"Usage: java [-Xmx<size>] -jar h2o.jar [options]\n" +
" (Note that every option has a default and is optional.)\n" +
"\n" +
" -name <h2oCloudName>\n" +
" Cloud name used for Multicast discovery.\n" +
" Nodes with the same cloud name will form an H2O cloud\n" +
" (also known as an H2O cluster).\n" +
" (Not to be used with -flatfile.)\n" +
"\n" +
" -flatfile <flatFileName>\n" +
" Configuration file explicitly listing H2O cloud node members.\n" +
" (Not to be used with -name.)\n" +
"\n" +
" -ip <ipAddressOfNode>\n" +
" IP address of this node.\n" +
"\n" +
" -port <port>\n" +
" Port number for this node (note: port+1 is also used).\n" +
" (The default port is " + DEFAULT_PORT + ".)\n" +
"\n" +
" -ice_root <fileSystemPath>" +
" The directory where H2O spills temporary data to disk." +
" (The default is '" + DEFAULT_ICE_ROOT + "'.)\n" +
"\n" +
" -h | -help\n" +
" Print this help.\n" +
"\n" +
"Cloud formation behavior:\n" +
"\n" +
" New H2O nodes join together to form a cloud at startup time.\n" +
" Once a cloud is given work to perform, it locks out new members.\n" +
" from joining.\n" +
"\n" +
"Examples:\n" +
"\n" +
" Start an H2O node with 4GB of memory and a default cloud name:\n" +
" java -Xmx4g -jar h2o.jar\n" +
"\n" +
" Start an H2O node with 6GB of memory and a specify the cloud name:\n" +
" java -Xmx6g -jar h2o.jar -name MyCloud\n" +
"\n" +
" Start an H2O cloud with three 2GB nodes and a default cloud name:\n" +
" java -Xmx2g -jar h2o.jar\n" +
" java -Xmx2g -jar h2o.jar\n" +
" java -Xmx2g -jar h2o.jar\n" +
"\n";

System.out.print(s);
}

public static boolean IS_SYSTEM_RUNNING = false;

// Start up an H2O Node and join any local Cloud
Expand All @@ -536,6 +593,12 @@ public static void main( String[] args ) {
Arguments arguments = new Arguments(args);
arguments.extract(OPT_ARGS);
ARGS = arguments.toStringArray();

if ((OPT_ARGS.h != null) || (OPT_ARGS.help != null)) {
printHelp();
System.exit (0);
}

ParseDataset.PLIMIT = OPT_ARGS.pparse_limit;

// Get ice path before loading Log or Persist class
Expand All @@ -546,6 +609,7 @@ public static void main( String[] args ) {
} catch(URISyntaxException ex) {
throw new RuntimeException("Invalid ice_root: " + ice + ", " + ex.getMessage());
}
Log.info ("ICE root: '" + ICE_ROOT + "'");

SELF_ADDRESS = findInetAddressForSelf();

Expand Down Expand Up @@ -592,6 +656,8 @@ private static void startLocalNode() {
STATIC_H2OS.add(SELF);
}

Log.info ("H2O cloud name: '" + NAME + "'");

Log.info("(v"+VERSION+") '"+NAME+"' on " + SELF+(OPT_ARGS.flatfile==null
? (", discovery address "+CLOUD_MULTICAST_GROUP+":"+CLOUD_MULTICAST_PORT)
: ", static configuration based on -flatfile "+OPT_ARGS.flatfile));
Expand Down
10 changes: 6 additions & 4 deletions src/samples/water/Part05_KMeansNewAPI.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ public static void main(String[] args) throws Exception {
Random rand = new Random();
for( int cluster = 0; cluster < clusters.length; cluster++ ) {
long row = Math.max(0, (long) (rand.nextDouble() * frame.vecs().length) - 1);
for( int i = 0; i < frame.vecs().length; i++ )
clusters[cluster][i] = frame.vecs()[i].getd(row);
for( int i = 0; i < frame.vecs().length; i++ ) {
Vec v = frame.vecs()[i];
clusters[cluster][i] = v.at(row);
}
}

// Iterate over the dataset and show error for each step
Expand Down Expand Up @@ -89,7 +91,7 @@ public static class KMeans extends MRTask2<KMeans> {
for( int cluster = 0; cluster < _clusters.length; cluster++ ) {
double sqr = 0; // Sum of dimensional distances
for( int column = 0; column < chunks.length; column++ ) {
double delta = chunks[column].atd(row) - _clusters[cluster][column];
double delta = chunks[column].at0(row) - _clusters[cluster][column];
sqr += delta * delta;
}
if( sqr < minSqr ) {
Expand All @@ -101,7 +103,7 @@ public static class KMeans extends MRTask2<KMeans> {

// Add values and increment counter for chosen cluster
for( int column = 0; column < chunks.length; column++ )
_sums[nearest][column] += chunks[column].atd(row);
_sums[nearest][column] += chunks[column].at0(row);
_counts[nearest]++;
}
_clusters = null;
Expand Down

0 comments on commit c34f5aa

Please sign in to comment.