Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Normoyle committed Feb 1, 2014
2 parents 780b2d7 + 0503a45 commit 3a77173
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 33 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ build_package:
echo $(PROJECT_VERSION) > target/project_version
rm -fr target/h2o-$(PROJECT_VERSION)
mkdir target/h2o-$(PROJECT_VERSION)
cp -rp target/R target/h2o-$(PROJECT_VERSION)
mkdir target/h2o-$(PROJECT_VERSION)/R
cp -p target/R/src/contrib/h2o_$(PROJECT_VERSION).tar.gz target/h2o-$(PROJECT_VERSION)/R
cp -p R/README.txt target/h2o-$(PROJECT_VERSION)/R
cp -rp target/hadoop target/h2o-$(PROJECT_VERSION)
cp -p target/h2o.jar target/h2o-$(PROJECT_VERSION)
cp -p target/h2o-sources.jar target/h2o-$(PROJECT_VERSION)
Expand Down
4 changes: 2 additions & 2 deletions packaging/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This package contains:
R/ Directory containing H2O package for R.
R/README.txt Read this if you are interested in writing R
programs that interact with H2O.
R/h2oWrapper*.tar.gz The R package for you to install.
R/h2o*.tar.gz The R package for you to install.

hadoop/ Directory containing hadoop integration for H2O.
hadoop/README.txt Read this if you want to run H2O on a hadoop
Expand All @@ -32,7 +32,7 @@ and then go to this URL to visit the H2O Web UI:
http://localhost:54321


For further documentation, please visit:
For further documentation, including how to use H2O from R, please visit:
http://docs.0xdata.com


Expand Down
51 changes: 40 additions & 11 deletions packaging/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,51 @@ <h2 align="center">
<td align="center"><a href="h2o-SUBST_PROJECT_VERSION.zip" id="dzip">h2o-SUBST_PROJECT_VERSION.zip</a></td>
</tr>

<!--
</table>

<br>
<br>

<h2 align="center">
Installation of the H2O package for R from a package repository
</h2>

<table border="5" cellpadding="5" align="center">
<tr>
<td align="center">Windows (64-bit only)</td>
<td align="center">Installer package</td>
<td>For the most user-friendly experience on Windows.</td>
<td align="center"><a href="h2o-SUBST_PROJECT_VERSION-windows-installer.exe">h2o-SUBST_PROJECT_VERSION-windows-installer.exe</a></td>
<th>Platform</th>
<th>Instructions</th>
</tr>
<tr>
<td align="center">MacOSX</td>
<td align="center">Installer package</td>
<td>For the most user-friendly experience on a Mac.</td>
<td align="center"><a href="h2o-SUBST_PROJECT_VERSION-osx-installer.dmg">h2o-SUBST_PROJECT_VERSION-osx-installer.dmg</a></td>
</tr>
<td align="center">Any</td>
<td align="left">

-->
# Detach and remove any previously installed H2O packages. These might give error output if
<br>
# H2O is not loaded or was not previously installed. You can safely ignore these errors.
<br>
detach("package:h2o", unload=TRUE)
<br>
detach("package:h2oRClient", unload=TRUE)
<br>
remove.packages("h2o")
<br>
remove.packages("h2oRClient")
<br>
<br>
# Download, install and initialize the H2O package directly from inside R.
<br>
install.packages("h2o", repos=(c("<script type="text/javascript">var url = location.href; var url2 = url.substring(0, url.lastIndexOf("/")); document.write(url2);</script>/R", getOption("repos"))))
<br>
library(h2o)
<br>
localH2O = h2o.init()
<br>
<br>
# Run a demo.
<br>
demo(h2o.glm)
</td>
</tr>

</table>

Expand Down
20 changes: 9 additions & 11 deletions src/main/java/hex/gbm/SharedTreeModelBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -594,8 +594,9 @@ public Score doIt(Model model, Frame fr, Frame validation, boolean oob, boolean
Frame adapValidation = frs[0]; // adapted validation dataset
// All columns including response of validation frame are already adapted to model
if (_nclass>1) { // Classification
for( int i=0; i<_nclass; i++ )
for( int i=0; i<_nclass; i++ ) // Distribution of response classes
adapValidation.add("ClassDist"+i,res.vecs()[i+1]);
adapValidation.add("Prediction",res.vecs()[0]); // Predicted values
} else { // Regression
adapValidation.add("Prediction",res.vecs()[0]);
}
Expand All @@ -619,9 +620,9 @@ public Score doIt(Model model, Frame fr, Frame validation, boolean oob, boolean
double sum;
if( _validation ) { // Passed in a class distribution from scoring
for( int i=0; i<_nclass; i++ )
ds[i] = chks[i+_ncols+1].at0(row); // Get the class distros
if (_nclass > 1 ) sum = 1.0; // Sum of a distribution is 1.0 for classification
else sum = ds[0]; // Sum is the same as prediction for regression.
ds[i] = chks[i+_ncols+1].at0(row); // Get the class distros
if (_nclass > 1 ) sum = 1.0; // Sum of a distribution is 1.0 for classification
else sum = ds[0]; // Sum is the same as prediction for regression.
} else { // Passed in the model-specific columns
sum = score0(chks,ds,row);
}
Expand All @@ -645,13 +646,10 @@ public Score doIt(Model model, Frame fr, Frame validation, boolean oob, boolean
_sum += err*err; // Squared error
assert !Double.isNaN(_sum);
// Pick highest prob for our prediction. Count all ties for best.
int best=0, tie_cnt=0; ties[tie_cnt] = 0;
for( int c=1; c<_nclass; c++ )
if( ds[best] < ds[c] ) { best=c; ties[ tie_cnt=0]=c; }
else if( ds[best] == ds[c] ) { ties[++tie_cnt ]=c; }
// Break ties psuedo-randomly: (row# mod #ties).
if( tie_cnt >= 1 ) { best = ties[row%(tie_cnt+1)]; }
_cm[ycls][best]++; // Bump Confusion Matrix also
if (_nclass > 1) { // fill CM only for classification
int best = _validation ? (int) chks[_ncols+1+_nclass].at80(row) : Model.getPrediction(ds, ties, row);
_cm[ycls][best]++; // Bump Confusion Matrix also
}
_snrows++;
}
}
Expand Down
56 changes: 48 additions & 8 deletions src/main/java/water/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,19 @@ public Frame score( Frame fr) {
}
new MRTask2() {
@Override public void map( Chunk chks[] ) {
double tmp[] = new double[_names.length];
float preds[] = new float[nclasses()];
double tmp [] = new double[_names.length];
float preds[] = new float [nclasses()];
int ties [] = new int [nclasses()];
Chunk p = chks[_names.length-1];
for( int i=0; i<p._len; i++ ) {
float[] out = score0(chks,i,tmp,preds);
for( int row=0; row<p._len; row++ ) {
float[] out = score0(chks,row,tmp,preds);
if( nclasses() > 1 ) {
if( Float.isNaN(out[0]) ) p.setNA0(i);
else p.set0(i, Utils.maxIndex(out));
if( Float.isNaN(out[0]) ) p.setNA0(row);
else p.set0(row, Model.getPrediction(out, ties, row));
for( int c=0; c<nclasses(); c++ )
chks[_names.length+c].set0(i,out[c]);
chks[_names.length+c].set0(row,out[c]);
} else {
p.set0(i,out[0]);
p.set0(row,out[0]);
}
}
}
Expand Down Expand Up @@ -286,6 +287,45 @@ protected float[] score0( Chunk chks[], int row_in_chunk, double[] tmp, float[]
// Data must be in proper order. Handy for JUnit tests.
public double score(double [] data){ return Utils.maxIndex(score0(data,new float[nclasses()])); }

/**
* Utility function to get a best prediction from an array of class prediction distribution if you know the row number.
* It returns index of max value if predicted values are unique.
* In the case of tie, the implementation solve it in sudo-random way based on number of row in chunk.
*
* @param preds an array of prediction distribution. Length of arrays is equal to a number of classes.
* @param ties a pre-allocated array to hold class numbers participating in tie
* @return the best prediction (index of class)
*/
public static final int getPrediction(float[] preds, int[] ties, int rowInChunk) {
assert preds.length == ties.length;
int best=0; int tieCnt = 0; ties[tieCnt] = 0;
for (int c=1; c<preds.length; c++) {
if (preds[best] < preds[c]) {
best = c; // take the max index
ties[tieCnt=0] = c;
} else if (preds[best] == preds[c]) {
ties[++tieCnt] = c;
}
}
if (tieCnt >= 1) best = ties[rowInChunk % (tieCnt+1)]; // override max decision
return best;
}
// Argh Java needs templates for primitive types
public static final int getPrediction(double[] preds, int[] ties, int rowInChunk) {
assert preds.length == ties.length;
int best=0; int tieCnt = 0; ties[tieCnt] = 0;
for (int c=1; c<preds.length; c++) {
if (preds[best] < preds[c]) {
best = c; // take the max index
ties[tieCnt=0] = c;
} else if (preds[best] == preds[c]) {
ties[++tieCnt] = c;
}
}
if (tieCnt >= 1) best = ties[rowInChunk % (tieCnt+1)]; // override max decision
return best;
}


/** Return a String which is a valid Java program representing a class that
* implements the Model. The Java is of the form:
Expand Down

0 comments on commit 3a77173

Please sign in to comment.