Skip to content

Commit

Permalink
Merge branch 'master' of github.com:0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevin Normoyle committed Feb 25, 2014
2 parents 89ed18e + 54109d3 commit 2dd08ae
Show file tree
Hide file tree
Showing 19 changed files with 414 additions and 251 deletions.
2 changes: 1 addition & 1 deletion .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@
<classpathentry kind="lib" path="lib/jogamp/gluegen-rt-natives-linux-amd64.jar"/>
<classpathentry kind="lib" path="lib/jogamp/gluegen-rt.jar" sourcepath="lib/jogamp/gluegen-rt-sources.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/python) interpreter library"/>
<classpathentry kind="lib" path="lib/joda/joda-time-2.3.jar"/>
<classpathentry kind="lib" path="lib/joda/joda-time-2.3.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
2 changes: 1 addition & 1 deletion R/h2o-DESCRIPTION.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ License: Apache License (== 2.0)
Depends: R (>= 2.13.0), RCurl, rjson, statmod, tools, methods, utils
Collate: Wrapper.R Internal.R Classes.R ParseImport.R Algorithms.R
NeedsCompilation: no
SystemRequirements: java
SystemRequirements: java 1.6 or higher
URL: http://www.0xdata.com
21 changes: 11 additions & 10 deletions R/h2o-package/R/Algorithms.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ h2o.gbm <- function(x, y, distribution='multinomial', data, n.trees=10, interact
if(params$distribution == "multinomial") {
# temp = matrix(unlist(res$cm), nrow = length(res$cm))
# mySum$prediction_error = 1-sum(diag(temp))/sum(temp)
mySum$prediction_error = tail(res$cm, 1)[[1]]$'_predErr'
mySum$prediction_error = tail(res$'cms', 1)[[1]]$'_predErr'
}
return(mySum)
}
Expand All @@ -68,8 +68,8 @@ h2o.gbm <- function(x, y, distribution='multinomial', data, n.trees=10, interact
result$params = params

if(result$params$distribution == "multinomial") {
class_names = tail(res$'_domains', 1)[[1]]
result$confusion = .build_cm(tail(res$cm, 1)[[1]]$'_arr', class_names) # res$'_domains'[[length(res$'_domains')]])
class_names = res$'cmDomain' #tail(res$'_domains', 1)[[1]]
result$confusion = .build_cm(tail(res$'cms', 1)[[1]]$'_arr', class_names) # res$'_domains'[[length(res$'_domains')]])
result$classification <- T
} else
result$classification <- F
Expand Down Expand Up @@ -375,7 +375,7 @@ h2o.glm.FV <- function(x, y, data, family, nfolds = 10, alpha = 0.5, lambda = 1e
result$auc = as.numeric(valid$auc)

# Construct confusion matrix
cm_ind = trunc(100*result$best_threshold) + 2
cm_ind = trunc(100*result$best_threshold) + 1
temp = data.frame(t(sapply(valid$'_cms'[[cm_ind]]$'_arr', c)))
temp[,3] = c(temp[1,2], temp[2,1])/apply(temp, 1, sum)
temp[3,] = c(temp[2,1], temp[1,2], 0)/apply(temp, 2, sum)
Expand Down Expand Up @@ -781,7 +781,7 @@ h2o.randomForest.VA <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3,
}

# -------------------------- FluidVecs -------------------------- #
h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3, nbins=100, seed=-1, validation, nodesize=1) {
h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, nodesize=1, sample.rate=2/3, nbins=100, seed=-1, importance = FALSE, validation) {
args <- .verify_dataxy(data, x, y)
if(!is.numeric(ntree)) stop('ntree must be a number')
if( any(ntree < 1) ) stop('ntree must be >= 1')
Expand All @@ -792,7 +792,8 @@ h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3,
if(!is.numeric(nbins)) stop('nbins must be a number')
if( any(nbins < 1)) stop('nbins must be an integer >= 1')
if(!is.numeric(seed)) stop("seed must be an integer >= 0")

if(!is.logical(importance)) stop("importance be logical (TRUE or FALSE)')")

if(missing(validation)) validation = data
# else if(class(validation) != "H2OParsedData") stop("validation must be an H2O dataset")
else if(!class(validation) %in% c("H2OParsedData", "H2OParsedDataVA")) stop("validation must be an H2O parsed dataset")
Expand All @@ -801,7 +802,7 @@ h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3,

# NB: externally, 1 based indexing; internally, 0 based
cols <- paste(args$x_i - 1, collapse=',')
res = .h2o.__remoteSend(data@h2o, .h2o.__PAGE_DRF, source=data@key, response=args$y, cols=cols, ntrees=ntree, max_depth=depth, min_rows=nodesize, sample_rate=sample.rate, nbins=nbins, seed=seed)
res = .h2o.__remoteSend(data@h2o, .h2o.__PAGE_DRF, source=data@key, response=args$y, cols=cols, ntrees=ntree, max_depth=depth, min_rows=nodesize, sample_rate=sample.rate, nbins=nbins, seed=seed, importance=as.numeric(importance))
params = list(x=args$x, y=args$y, ntree=ntree, depth=depth, sample.rate=sample.rate, nbins=nbins)

if(length(ntree) == 1 && length(depth) == 1 && length(nodesize) == 1 && length(sample.rate) == 1 && length(nbins) == 1) {
Expand All @@ -827,7 +828,7 @@ h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3,

# temp = matrix(unlist(res$cm), nrow = length(res$cm))
# mySum$prediction_error = 1-sum(diag(temp))/sum(temp)
mySum$prediction_error = tail(res$cm, 1)[[1]]$'_predErr'
mySum$prediction_error = tail(res$'cms', 1)[[1]]$'_predErr'
return(mySum)
}

Expand All @@ -845,8 +846,8 @@ h2o.randomForest.FV <- function(x, y, data, ntree=50, depth=50, sample.rate=2/3,
rownames(rf_matrix) = c("Depth", "Leaves")
result$forest = rf_matrix

class_names = tail(res$'_domains', 1)[[1]]
result$confusion = .build_cm(tail(res$cm, 1)[[1]]$'_arr', class_names) # res$'_domains'[[length(res$'_domains')]])
class_names = res$'cmDomain' # tail(res$'_domains', 1)[[1]]
result$confusion = .build_cm(tail(res$'cms', 1)[[1]]$'_arr', class_names) #res$'_domains'[[length(res$'_domains')]])
result$mse = as.numeric(res$errs)
# result$ntree = res$N
return(result)
Expand Down
16 changes: 11 additions & 5 deletions R/h2o-package/R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -975,12 +975,15 @@ head.H2OParsedDataVA <- function(x, n = 6L, ...) {
if(n > .MAX_INSPECT_VIEW) stop(paste("Cannot view more than", .MAX_INSPECT_VIEW, "rows"))

res = .h2o.__remoteSend(x@h2o, .h2o.__PAGE_INSPECT, key=x@key, offset=0, view=n)
res2 = .h2o.__remoteSend(x@h2o, .h2o.__HACK_LEVELS, key=x@key)
blanks = sapply(res$cols, function(y) { nchar(y$name) == 0 }) # Must stop R from auto-renaming cols with no name
temp = lapply(res$rows, function(y) { y$row = NULL; tmp = as.data.frame(y); names(tmp)[blanks] = ""; return(tmp) })
nums = sapply(res2$levels, is.null) # Must stop R from coercing all columns with "NA" to factors, confusing rbind if it is actually numeric

temp = lapply(res$rows, function(y) { y$row = NULL; na_num = (y[nums] == "NA"); y[nums][na_num] = as.numeric(NA);
tmp = as.data.frame(y); names(tmp)[blanks] = ""; return(tmp) })
if(is.null(temp)) return(temp)
x.slice = do.call(rbind, temp)

res2 = .h2o.__remoteSend(x@h2o, .h2o.__HACK_LEVELS, key = x@key)

for(i in 1:ncol(x)) {
if(!is.null(res2$levels[[i]]))
x.slice[,i] <- factor(x.slice[,i], levels = res2$levels[[i]])
Expand All @@ -997,13 +1000,16 @@ tail.H2OParsedDataVA <- function(x, n = 6L, ...) {

idx = seq.int(to = nrx, length.out = n)
res = .h2o.__remoteSend(x@h2o, .h2o.__PAGE_INSPECT, key=x@key, offset=idx[1], view=length(idx))
res2 = .h2o.__remoteSend(x@h2o, .h2o.__HACK_LEVELS, key=x@key)
blanks = sapply(res$cols, function(y) { nchar(y$name) == 0 }) # Must stop R from auto-renaming cols with no name
temp = lapply(res$rows, function(y) { y$row = NULL; tmp = as.data.frame(y); names(tmp)[blanks] = ""; return(tmp) })
nums = sapply(res2$levels, is.null) # Must stop R from coercing all columns with "NA" to factors, confusing rbind if it is actually numeric

temp = lapply(res$rows, function(y) { y$row = NULL; na_num = (y[nums] == "NA"); y[nums][na_num] = as.numeric(NA);
tmp = as.data.frame(y); names(tmp)[blanks] = ""; return(tmp) })
if(is.null(temp)) return(temp)
x.slice = do.call(rbind, temp)
rownames(x.slice) = idx

res2 = .h2o.__remoteSend(x@h2o, .h2o.__HACK_LEVELS, key = x@key)
for(i in 1:ncol(x)) {
if(!is.null(res2$levels[[i]]))
x.slice[,i] <- factor(x.slice[,i], levels = res2$levels[[i]])
Expand Down
7 changes: 7 additions & 0 deletions R/h2o-package/R/Wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ h2o.shutdown <- function(client, prompt = TRUE) {
}

# ----------------------- Diagnostics ----------------------- #


# **** TODO: This isn't really a cluster status... it's a node status check for the node we're connected to.
# This is possibly confusing because this can come back without warning,
# but if a user tries to do any remoteSend, they will get a "cloud sick warning"
# Suggest cribbing the code from Internal.R that checks cloud status (or just call it here?)

h2o.clusterStatus <- function(client) {
if(missing(client) || class(client) != "H2OClient") stop("client must be a H2OClient object")
myURL = paste("http://", client@ip, ":", client@port, "/", .h2o.__PAGE_CLOUD, sep = "")
Expand Down
112 changes: 64 additions & 48 deletions h2o-docs/source/Ruser/R_studio.rst
Original file line number Diff line number Diff line change
@@ -1,34 +1,38 @@


H\ :sub:`2`\ O in R Studio
---------------------------
H\ :sub:`2`\ O installation in R Studio
------------------------------------------


These instructions assume you are using R Studio 2.14.0 or later.
These instructions assume you are using R 2.14.0 or later.

**STEP 1**

To use H\ :sub: `2`\ O in R, users need a copy of H\ :sub: `2`\ O.
The download package can be obtained by clicking on the button Download H\ :sub:`2`\ O at `http://0xdata.com/downloadtable <http://0xdata.com/downloadtable/>`_.
The download package containing the H\ :sub:`2`\ O jar file can be
obtained by visiting H\ :sub:`2`\ O available downloads at
`http://0xdata.com/downloadtable <http://0xdata.com/downloadtable/>`_.

Unzip the downloaded H\ :sub:`2`\ O zip file.
Choose the version of H\ :sub:`2`\ O best for you, and unzip the
downloaded H\ :sub:`2`\ O zip file. The most recent promoted build is
reccomended.

**STEP 2**

Start an instance of H\ :sub:`2`\ O. For help with this see :ref:`GettingStartedFromaZipFile`
Start an instance of H\ :sub:`2`\ O. For help with this see
:ref:`GettingStartedFromaZipFile`

If users do not start an instance of H\ :sub:`2`\ O, one will be
started automatically for them at localhost: 54321 (see **STEP 4** for
more detail).

Users should be aware that in order for H\ :sub:`2`\ O to successfully run through R, an instance of H\ :sub:`2`\ O must also simultaneously be running. If the instance of H\ :sub:`2`\ O is stopped, the R program will no longer run, and work done will be lost.
If the instance of H\ :sub:`2`\ O is stopped, the R
program will no longer run, and work done will be lost.

**STEP 3:**

**STEP 3**

For users who may have already installed a prior version of the H2O
package. New users may skip this step.

For packages to be successfully removed and updated in R studio - they
must first be detatched from the R environment and then uninstalled.
Simply enter the following:
New users may skip this step, while users who have previously
installed the H\ :sub:`2`\ O R packages should uninstall them by entering the
following commands to the R console:

::

Expand All @@ -38,58 +42,70 @@ Simply enter the following:

Note: users may get warnings of the type "Error in
detatch("package:h2o", unload = TRUE): invalid 'name' argument.
This tells users that there is no h2o package to uninstall. These
This tells users that there is no H\ :sub:`2`\ O package to uninstall. These
warnings can safely be ignored.

.. image:: Rstudioinstall1.jpg
:width: 90%


**STEP 4**

Install the H\ :sub:`2`\ O package from the H2ORepo, the H2O cran that
functions exactly like the usual R cran, but is managed and maintained
by H2O.
Simply enter the call:
**STEP 4:**

::
Install the H\ :sub:`2`\ O package via the H\ :sub:`2`\ O
repository. This repository functions exactly like the R repository,
but is maintained by H\ :sub:`2`\ O.

install.packages("h2o", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-jacobi/2/R", getOption("repos"))))
as shown here:
**DO NOT CUT AND PASTE THIS CALL INTO R**
The call shown below is specifically for the jacobi/2 build, which may
be older than the build you would like to use. Your call should look
similar to this, and you can find an exact command to copy and paste
by going to H\ :sub:`2`\ O available downloads at
`http://0xdata.com/downloadtable
<http://0xdata.com/downloadtable/>`_ and selecting the correct version
there.

.. image:: Rstudioinstall2.jpg
:width: 90%

`install.packages("h2o", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-jacobi/2/R", getOption("repos"))))`


**STEP 4:**

Once the H\ :sub:`2`\ O R package has been installed, call the
package, and establish a connection to a running instance of H\
:sub:`2`\ O.

**STEP 4**
If there is no running instance of H\ :sub:`2`\ O prior to using
the command "h2o.init()", H\ :sub:`2`\ O in R will start an instance
automatically for the user at localhost:54321, and the user will be
notified. If you would like to connect to an instance at an IP and
port other than localhost:54321, these details must be specified as
arguments in the R call.

If you have not started an instance of H2O from your command line
terminal, R will start an instance for you automatically. If you have
already started an instance, H2O R will connect to this instance, and
no other instance will be started.

Get R Studio talking to your instance of H\ :sub:`2`\ O by typing in the call:

::

>localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
library(h2o)
localH2O <- h2o.init()

Your IP and port may be different, depending on whether you are running H\ :sub:`2`\ O from your computer or a server. If you are running on a server, where it says IP enter the IP address of the server, and the appropriate port number. In the picture below the IP number is everything before the colon, and the port number is the 5 digit string after the colon.

Users who wish to specify a connection
with a server (other than localhost at port 54321) must explicity
state the IP address and port number in the h2o.init call.
An example is given below, but **do not cut and paste**; users should
specify the IP and port number appropriate to their specific
environment.

::

Upgrading the H\ :sub:`2`\ O R Packages
"""""""""""""""""""""""""""""""""""""""
library(h2o)
localH2O = h2o.init(ip = "192.555.1.123", port = 12345, startH2O = FALSE)


Users may wish to manually upgrade their R packages. They can do this
by returning to STEP 3, and following the instructions through
STEP 4.
**STEP 5: Upgrading Packages**

Users may wish to manually upgrade their R packages. For instance, if
you are running the bleeding edge developer build, it’s possible that
the code has changed, but that the revision number has not, in which
case manually upgrading ensures the most current version of not only
the H\ :sub:`2`\ O code, but the corresponding R code as well.

This can be done by returning to STEP 3, and following the commands
through STEP 4.



Expand Down
37 changes: 22 additions & 15 deletions h2o-docs/source/Ruser/Rinstall.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ reccomended.
Start an instance of H\ :sub:`2`\ O. For help with this see
:ref:`GettingStartedFromaZipFile`

Users should be aware that in order for H\ :sub:`2`\ O to successfully
run through R, an instance of H\ :sub:`2`\ O must also simultaneously
be running. If the instance of H\ :sub:`2`\ O is stopped, the R
If users do not start an instance of H\ :sub:`2`\ O, one will be
started automatically for them at localhost: 54321 (see **STEP 4** for
more detail).

If the instance of H\ :sub:`2`\ O is stopped, the R
program will no longer run, and work done will be lost.

**STEP 3:**
Expand All @@ -45,14 +47,20 @@ warnings can safely be ignored.

**STEP 4:**

Install the H\ :sub:`2`\ O package, and the H\ :sub:`2`\ O client
package via the H\ :sub:`2`\ O cran. This repository functions
exactly like the R repository, but is maintained by H\ :sub:`2`\ O.
Install the H\ :sub:`2`\ O package via the H\ :sub:`2`\ O
repository. This repository functions exactly like the R repository,
but is maintained by H\ :sub:`2`\ O.

::
**DO NOT CUT AND PASTE THIS CALL INTO R**
The call shown below is specifically for the jacobi/2 build, which may
be older than the build you would like to use. Your call should look
similar to this, and you can find an exact command to copy and paste
by going to H\ :sub:`2`\ O available downloads at
`http://0xdata.com/downloadtable
<http://0xdata.com/downloadtable/>`_ and selecting the correct version
there.

install.packages("h2o", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-jacobi/2/R", getOption("repos"))))
`install.packages("h2o", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-jacobi/2/R", getOption("repos"))))`


**STEP 4:**
Expand All @@ -63,12 +71,11 @@ package, and establish a connection to a running instance of H\

If there is no running instance of H\ :sub:`2`\ O prior to using
the command "h2o.init()", H\ :sub:`2`\ O in R will start an instance
automatically for the user.
automatically for the user at localhost:54321, and the user will be
notified. If you would like to connect to an instance at an IP and
port other than localhost:54321, these details must be specified as
arguments in the R call.

Note that in the call "localH2O <- h2o.init()" the h2o.init object is
being named localH2O in the R environment for use later in model
specification. Entering the call exactly as it is written below assumes the
user wishes to connect to IP localhost and port: 54321.

::

Expand All @@ -77,7 +84,7 @@ user wishes to connect to IP localhost and port: 54321.


Users who wish to specify a connection
with a server (rather than localhost at port 54321) must explicity
with a server (other than localhost at port 54321) must explicity
state the IP address and port number in the h2o.init call.
An example is given below, but **do not cut and paste**; users should
specify the IP and port number appropriate to their specific
Expand Down
Binary file removed h2o-docs/source/Ruser/Rstudioinstall1.jpg
Binary file not shown.
Binary file removed h2o-docs/source/Ruser/Rstudioinstall2.jpg
Binary file not shown.
6 changes: 3 additions & 3 deletions h2o-docs/source/userguide/general.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@ Step by step instructions on how to use each of the algorithms and
tools can be found in tutorials . Users have a variety of options for
accessing and running H\ :sub:`2`\ O. For instructions on how to get
started using H\ :sub:`2`\ O (for example through R, using Java, or
via git-hub), please see the Quick Start Guides. New users may also
find the :ref:`glossary` useful for familiarizing themselves with H\
:sub:`2`\ O's computing and statistics terms.
via git-hub), please see the Quick Start Guides, and Walk Through
Tutorials. New users may also find the :ref:`glossary` useful for
familiarizing themselves with H\ :sub:`2`\ O's computing and statistics terms.
Loading

0 comments on commit 2dd08ae

Please sign in to comment.