Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
dearirenelang committed May 7, 2014
2 parents 29328f5 + 27b7194 commit dec284b
Show file tree
Hide file tree
Showing 16 changed files with 185 additions and 38 deletions.
Binary file modified lib/javassist-sources.jar
Binary file not shown.
Binary file modified lib/javassist.jar
Binary file not shown.
12 changes: 12 additions & 0 deletions lib/resources/h2o/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,15 @@ $(function () {
$('[rel=tooltip]').tooltip();
$('[rel=popover]').popover();
});

function collectValuesCBox(name) {
var values = $('input[name="'+name+'"]:not(:checked)').map( function() { return this.value } ).get();
return values.join(',')
}

function redirectWithCols(e,name) {
var target = e.href.replace(/&ignored_cols=.*/g,'') + "&ignored_cols="+collectValuesCBox(name);
console.log(target)
e.href = target;
return true;
}
78 changes: 78 additions & 0 deletions py/testdir_single_jvm_fvec/test_exec2_log_like_R.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import unittest, random, sys, time, getpass
sys.path.extend(['.','..','py'])
import h2o, h2o_browse as h2b, h2o_exec as h2e, h2o_hosts, h2o_import as h2i, h2o_cmd

# new ...ability to reference cols
# src[ src$age<17 && src$zip=95120 && ... , ]
# can specify values for enums ..values are 0 thru n-1 for n enums
initList = [
]

exprList = [
'Last.value.0 = r1[,c(1)]',
'Last.value.1 = any.factor(Last.value.0)',
'Last.value.2 = Last.value.0 + 1',
'Last.value.3 = log(Last.value.2)',

'Last.value.4 = r1[,c(1)]' ,
'Last.value.5 = any.factor(Last.value.4)',
'Last.value.6 = Last.value.4 + 1',
'Last.value.7 = log(Last.value.6)',

'Last.value.8 = r1[,c(1)]' ,
'Last.value.9 = any.factor(Last.value.8)',
'Last.value.10 = Last.value.8 + 1',
'Last.value.11 = log(Last.value.10)',

]

class Basic(unittest.TestCase):
def tearDown(self):
h2o.check_sandbox_for_errors()

@classmethod
def setUpClass(cls):
global SEED, localhost
SEED = h2o.setup_random_seed()
localhost = h2o.decide_if_localhost()
if (localhost):
h2o.build_cloud(1, java_heap_GB=14)
else:
h2o_hosts.build_cloud_with_hosts(1, java_heap_GB=100)

@classmethod
def tearDownClass(cls):
h2o.tear_down_cloud()

def test_exec2_sum(self):
h2o.beta_features = True
bucket = 'home-0xdiag-datasets'
# csvPathname = 'airlines/year2013.csv'
if localhost:
# csvPathname = 'standard/billion_rows.csv.gz'
csvPathname = '1B/reals_100000x1000_15f.data'
else:
# csvPathname = '1B/reals_1000000x1000_15f.data'
# csvPathname = '1B/reals_100000x1000_15f.data'
csvPathname = '1B/reals_1B_15f.data'

hex_key = 'r1'
parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='local',
hex_key=hex_key, timeoutSecs=3000, retryDelaySecs=2)
inspect = h2o_cmd.runInspect(key=hex_key)
print "numRows:", inspect['numRows']
print "numCols:", inspect['numCols']
inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
print "inspect offset = -1:", h2o.dump_json(inspect)

for execExpr in exprList:
start = time.time()
execResult, result = h2e.exec_expr(h2o.nodes[0], execExpr, resultKey=None, timeoutSecs=300)
print 'exec took', time.time() - start, 'seconds'
print "result:", result

h2o.check_sandbox_for_errors()


if __name__ == '__main__':
h2o.unit_main()
38 changes: 29 additions & 9 deletions py/testdir_single_jvm_fvec/test_exec2_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,33 @@
initList = [
]

exprList = [
'a=c(1); a = sum(r1[1,])',
'b=c(1); b = sum(r1[1,])',
'd=c(1); d = sum(r1[1,])',
'e=c(1); e = sum(r1[1,])',
'f=c(1); f = sum(r1[1,])',
'f=c(1); g = sum(r1[1,])',
'h=c(1); h = sum(r1[1,])',
]
DO_SUM = False

if DO_SUM:
exprList = [
'a=c(1); a = sum(r1[1,])',
'b=c(1); b = sum(r1[1,])',
'd=c(1); d = sum(r1[1,])',
'e=c(1); e = sum(r1[1,])',
'f=c(1); f = sum(r1[1,])',
'f=c(1); g = sum(r1[1,])',
'h=c(1); h = sum(r1[1,])',
]
else:
exprList = [
'a=c(1); a = log(r1[1,]+0)',
'b=c(1); b = log(r1[1,]+0)',
'c=c(1); c = log(r1[1,]+0)',
'd=c(1); d = log(r1[1,]+0)',
'e=c(1); e = log(r1[1,]+1)',
'f=c(1); f = log(r1[1,]+1)',
'g=c(1); g = log(r1[1,]+1)',
'h=c(1); h = log(r1[1,]+1)',
'i=c(1); i = log(r1[1,]+2)',
'j=c(1); j = log(r1[1,]+2)',
'k=c(1); k = log(r1[1,]+2)',
'l=c(1); l = log(r1[1,]+2)',
]

class Basic(unittest.TestCase):
def tearDown(self):
Expand All @@ -44,6 +62,8 @@ def test_exec2_sum(self):
if getpass.getuser()=='jenkins':
csvPathname = 'standard/billion_rows.csv.gz'
else:
csvPathname = '1B/reals_1000000x1000_15f.data'
csvPathname = '1B/reals_100000x1000_15f.data'
csvPathname = '1B/reals_1B_15f.data'

hex_key = 'r1'
Expand Down
14 changes: 13 additions & 1 deletion src/main/java/hex/ConfusionMatrix.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,12 @@ public void reComputeErrors(){
_classErr[i] = classErr(i);
_predErr = err();
}

public final long classErrCount(int c) {
long s = 0;
for( long x : _arr[c] )
s += x;
return s - _arr[c][c];
}
public final double classErr(int c) {
long s = 0;
for( long x : _arr[c] )
Expand Down Expand Up @@ -127,6 +132,13 @@ public double err() {
err -= _arr[d][d];
return (double) err / n;
}
public long errCount() {
long n = totalRows();
long err = n;
for( int d = 0; d < _arr.length; ++d )
err -= _arr[d][d];
return err;
}
/**
* The percentage of predictions that are correct.
*/
Expand Down
11 changes: 9 additions & 2 deletions src/main/java/hex/VarImp.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package hex;

import water.Iced;
import water.Model;
import water.api.DocGen;
import water.api.Request.API;
import water.util.UIUtils;
import water.util.Utils;

import java.util.Arrays;
Expand Down Expand Up @@ -44,13 +46,17 @@ protected VarImp(float[] varimp, String[] variables, VarImpMethod method) {
public void setVariables(String[] variables) { this.variables = variables; }

/** Generate variable importance HTML code. */
public final StringBuilder toHTML(StringBuilder sb) {
public final <T extends Model> StringBuilder toHTML(T model, StringBuilder sb) {
DocGen.HTML.section(sb,"Variable importance of input variables: " + method);
sb.append("<div class=\"alert\">");
sb.append(UIUtils.builderModelLink(model.getClass(), model._dataKey, model.responseName(), "Build a new model using selected variables", "redirectWithCols(this,'vi_chkb')"));
sb.append("</div>");

DocGen.HTML.arrayHead(sb);
// Create a sort order
Integer[] sortOrder = getSortOrder();
// Generate variable labels and raw scores
if (variables != null) DocGen.HTML.tableLine(sb, "Variable", variables, sortOrder, Math.min(max_var, variables.length));
if (variables != null) DocGen.HTML.tableLine(sb, "Variable", variables, sortOrder, Math.min(max_var, variables.length), true, "vi_chkb");
if (varimp != null) DocGen.HTML.tableLine(sb, method.toString(), varimp, sortOrder, Math.min(max_var, variables.length));
// Print a specific information
toHTMLAppendMoreTableLines(sb, sortOrder);
Expand All @@ -76,6 +82,7 @@ static final StringBuilder toHTMLGraph(StringBuilder sb, String[] names, float[]
DocGen.HTML.toJSArray(new StringBuilder(), names, so, Math.min(max, vals.length)),
DocGen.HTML.toJSArray(new StringBuilder(), vals , so, Math.min(max, vals.length))
);
sb.append("<button id=\"sortBars\" class=\"btn btn-primary\">Sort</button>\n");
return sb;
}
/** By default provides a sort order according to raw scores stored in <code>varimp</code>. */
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/hex/deeplearning/DeepLearningModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -1098,8 +1098,7 @@ else if (error.trainAUC != null) {

// Variable importance
if (error.variable_importances != null) {
error.variable_importances.toHTML(sb);
sb.append("<button id=\"sortBars\" class=\"btn btn-primary\">Sort</button>\n");
error.variable_importances.toHTML(this, sb);
}

DocGen.HTML.title(sb, "Scoring history");
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/hex/gbm/DTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ public void generateHTML(String title, StringBuilder sb) {
sb.append(Inspect2.link("Inspect training data ("+_dataKey.toString()+")", _dataKey)).append(", ");
sb.append(Predict.link(_key,"Score on dataset")).append(", ");
if (_dataKey != null)
sb.append(UIUtils.builderLink(this.getClass(), _dataKey, responseName(), "Compute new model")).append(", ");
sb.append(UIUtils.builderModelLink(this.getClass(), _dataKey, responseName(), "Compute new model")).append(", ");
sb.append("<i class=\"icon-play\"></i>&nbsp;").append("Continue training this model");
sb.append("</div>");
DocGen.HTML.paragraph(sb,"Model Key: "+_key);
Expand Down Expand Up @@ -751,7 +751,6 @@ public void generateHTML(String title, StringBuilder sb) {
// Show variable importance
if (varimp != null) {
generateHTMLVarImp(sb);
sb.append("<button id=\"sortBars\" class=\"btn btn-primary\">Sort</button>\n");
}
}

Expand All @@ -777,7 +776,7 @@ protected void generateHTMLVarImp(StringBuilder sb) {
if (varimp!=null) {
// Set up variable names for importance
varimp.setVariables(Arrays.copyOf(_names, _names.length-1));
varimp.toHTML(sb);
varimp.toHTML(this, sb);
}
}

Expand Down
3 changes: 1 addition & 2 deletions src/main/java/hex/singlenoderf/SpeeDRFModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ public void generateHTML(String title, StringBuilder sb) {
}
if (varimp != null) {
generateHTMLVarImp(sb);
sb.append("<button id=\"sortBars\" class=\"btn btn-primary\">Sort</button>\n");
}
}

Expand Down Expand Up @@ -560,7 +559,7 @@ protected void generateHTMLVarImp(StringBuilder sb) {
if (varimp!=null) {
// Set up variable names for importance
varimp.setVariables(Arrays.copyOf(_names, _names.length-1));
varimp.toHTML(sb);
varimp.toHTML(this, sb);
}
}

Expand Down
20 changes: 14 additions & 6 deletions src/main/java/water/NanoHTTPD.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package water;
import water.fvec.UploadFileVec;
import water.util.Log;
import water.util.Log.Tag.Sys;
import water.util.Utils;

import java.io.*;
import java.net.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
import java.net.URLEncoder;
import java.util.*;
import java.util.regex.Pattern;

import water.fvec.UploadFileVec;
import water.util.*;
import water.util.Log.Tag.Sys;

/**
* A simple, tiny, nicely embeddable HTTP 1.0 (partially 1.1) server in Java
*
Expand Down Expand Up @@ -697,7 +701,11 @@ private void sendResponse( String status, String mime, Properties header, InputS
{
int read = data.read( buff, 0, ( (pending>theBufferSize) ? theBufferSize : pending ));
if (read <= 0) break;
out.write( buff, 0, read );
try {
out.write(buff, 0, read);
} catch (SocketException ex) {
// don't print exceptions from NanoHTTPD
}
//pending -= read;
pending = data.available();
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/water/api/AUC.java
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ void plotROC(StringBuilder sb) {
" }\n" +
"})\n" +
".on(\"mouseover\", function(d,i){\n" +
" if(i <= " + _fprs.length + ") {" +
" if(i < " + _fprs.length + ") {" +
" document.getElementById(\"select\").selectedIndex = i\n" +
" show_cm(i)\n" +
" }\n" +
Expand Down
13 changes: 11 additions & 2 deletions src/main/java/water/api/DocGen.java
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,19 @@ public <T> StringBuilder tableLine(StringBuilder sb, String title, T[] values, I
return tableLine(sb, title, values, sortOrder, values.length);
}
public <T> StringBuilder tableLine(StringBuilder sb, String title, T[] values, Integer[] sortOrder, int maxValues) {
return tableLine(sb, title, values, sortOrder, maxValues, false, null);

}
public <T> StringBuilder tableLine(StringBuilder sb, String title, T[] values, Integer[] sortOrder, int maxValues, boolean checkBoxes, String idName) {
assert sortOrder == null || values.length == sortOrder.length;
sb.append("<tr><th>").append(title).append("</th>");
for( int i=0; i<maxValues; i++ )
sb.append("<td>").append(values[sortOrder!=null ? sortOrder[i] : i]).append("</td>");
for( int i=0; i<maxValues; i++ ) {
sb.append("<td>");
T val = values[sortOrder!=null ? sortOrder[i] : i];
if (checkBoxes) sb.append("<input type=\"checkbox\" name=\"").append(idName).append("\" value=\"").append(val).append("\" checked />&nbsp;");
sb.append(val);
sb.append("</td>");
}
sb.append("</tr>");
return sb;
}
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/water/api/Tutorials.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@ public class Tutorials extends HTMLOnlyRequest {

+ "<div class='span2 col'>"
+ " <h2>Random Forest</h2>"
+ "<p>Random Forest is a classical machine learning method for classification and regression. Learn how to use it with H<sub>2</sub>O.</it></p>"
+ "<p>Random Forest is a classical machine learning method for classification and regression. Learn how to use it with H<sub>2</sub>O for better predictions.</it></p>"
+ "<a href='/TutorialRFIris.html' class='btn btn-primary'>Try it!</a>"
+ "</div>"

+ "<div class='span2 col'>"
+ " <h2>GBM</h2>"
+ "<p>GBM uses gradient boosted regression trees for highly predictive regression and classification.</p>"
+ "<p>GBM uses gradient boosted trees for regression and classification, and is one of the most powerful machine learning methods.</p>"
+ "<a href='/TutorialGBM.html' class='btn btn-primary'>Try it!</a>"
+ "</div>"

+ "<div class='span2 col'>"
+ "<h2>GLM</h2>"
+ "<p>Generalized linear model is a generalization of linear regression. Experience its unique power on top of H<sub>2</sub>O.</p>"
+ "<p>Generalized linear model is a generalization of linear regression. Experience its unique power and blazing speed on top of H<sub>2</sub>O.</p>"
+ "<a href='/TutorialGLMProstate.html' class='btn btn-primary'>Try it!</a>"
+ "</div>"

+ "<div class='span2 col'>"
+ "<h2>K-Means</h2>"
+ "<p>Perform cluster analysis with H<sub>2</sub>O. It employs K-means, a highly scalable clustering algorithm.</p>"
+ "<p>Perform cluster analysis with H<sub>2</sub>O. It employs K-means, a highly scalable clustering algorithm for unsupervised learning on big data.</p>"
+ "<a href='/TutorialKMeans.html' class='btn btn-primary'>Try it!</a>"
+ "</div>"

+ "<div class='span2 col'>"
+ "<h2>Deep Learning</h2>"
+ "<p>H<sub>2</sub>O's distributed Deep Learning models high-level abstractions in data with deep artificial neural networks.</p>"
+ "<p>H<sub>2</sub>O's distributed Deep Learning gives you the power of deep neural networks for highest predictive accuracy in classification and regression.</p>"
+ "<a href='/TutorialDeepLearning.html' class='btn btn-primary'>Try it!</a>"
+ "</div>"

Expand Down
Loading

0 comments on commit dec284b

Please sign in to comment.