Skip to content

Commit

Permalink
Merge branch 'master' into summary2
Browse files Browse the repository at this point in the history
  • Loading branch information
tongxin committed Oct 8, 2013
2 parents 4daa621 + 2b42a5d commit 76ed2b1
Show file tree
Hide file tree
Showing 35 changed files with 215 additions and 112 deletions.
6 changes: 3 additions & 3 deletions R/h2o-package/demo/h2o.gbm.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# This is a demo of H2O's GBM function
# It imports a data set, parses it, and prints a summary
# Then, it runs GBM on a subset of the dataset
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, system.file("extdata", "prostate.csv", package="h2o"), "prostate.hex")
summary(prostate.hex)
Expand Down
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs GLM with a binomial link function using 10-fold cross-validation
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip = "localhost", port = 54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, path = system.file("extdata", "prostate.csv", package="h2o"), key = "prostate.hex")
summary(prostate.hex)
Expand All @@ -17,4 +17,4 @@ axis(1, at = 1:length(myLabels), labels = myLabels)
abline(h = 0, col = 2, lty = 2)
title("Coefficients from Logistic Regression\n of Prostate Cancer Data")

barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")
barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.kmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs K-Means with k = 5 centers on a subset of characteristics
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, system.file("extdata", "prostate.csv", package="h2o"), "prostate.hex")
summary(prostate.hex)
Expand Down Expand Up @@ -34,4 +34,4 @@ par(mfrow = c(1,2))
prostate.ctrs = as.data.frame(prostate.km@model$centers)
plot(prostate.ctrs[,1:2])
plot(prostate.ctrs[,3:4])
title("K-Means Centers for k = 10", outer = TRUE, line = -2.0)
title("K-Means Centers for k = 10", outer = TRUE, line = -2.0)
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.prcomp.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# This is a demo of H2O's PCA function
# It imports a data set, parses it, and prints a summary
# Then, it runs PCA on a subset of the features
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

australia.hex = h2o.importFile(localH2O, system.file("extdata", "australia.csv", package="h2o"), "australia.hex")
summary(australia.hex)
Expand All @@ -13,4 +13,4 @@ print(australia.pca)
plot(australia.pca)

australia.pca2 = h2o.prcomp(australia.hex, tol = 0.5, standardize = FALSE)
print(australia.pca2)
print(australia.pca2)
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.randomForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs RF with 50 trees, maximum depth of 100, using the iris class as the response
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

iris.hex = h2o.importFile(localH2O, path = system.file("extdata", "iris.csv", package="h2o"), key = "iris.hex")
summary(iris.hex)
Expand All @@ -16,4 +16,4 @@ invisible(readline("Hit <Return> to continue: "))
covtype.hex = h2o.importFile(localH2O, path = system.file("extdata", "covtype.csv", package="h2o"), key = "covtype.hex")
summary(covtype.hex)
covtype.rf = h2o.randomForest(y = "Cover_Type", x = setdiff(colnames(covtype.hex), c("Cover_Type", "Aspect", "Hillshade_9am")), data = covtype.hex, ntree = 50, depth = 150)
print(covtype.rf)
print(covtype.rf)
1 change: 0 additions & 1 deletion R/h2o-package/man/H2OClient-class.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ Objects can be created by calls of the form \code{new("H2OClient", ...)}
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
showClass("H2OClient")
}
\keyword{classes}
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.checkClient.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ h2o.checkClient(object)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
h2o.checkClient(localH2O)
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
Expand Down
77 changes: 77 additions & 0 deletions R/h2o-package/man/h2o.gbm.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
\name{h2o.gbm}
\alias{h2o.gbm}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
H2O: GBM
}
%% ~~function to do ... ~~

\description{Builds gradient boosed classification trees on a parsed data set.
}
\usage{
h2o.gbm(y, x, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
}
\arguments{
\item{y}{
The name or index of the response variable. If the data does not contain a header, this is the column index number starting at 0, and increasing from left to right. (The response must be either an integer or a categorical variable).
}
\item{x}{
A vector containing the names or indices of the predictor variables to use in building the GBM model.
}
\item{distribution}{
The type of GBM model to be produced, categorization is "multinomial" (default), "gaussian" used for regression.
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{n.trees}{
Number of trees to grow. Must be a nonnegative integer.
}
\item{interaction.depth}{
Maximum depth to grow the tree.
}
\item{n.minobsinnode}{
Minimum number of rows to assign to teminal nodes.
}
\item{shrinkage}{
A learning-rate parameter defining step size reduction.
}
}
\value{
An object of class \code{\linkS4class{H2OGBM}} with slots key, data, and model, where the last is a list of the following components:
\item{type }{The type of the tree, which currently must be classification.}
\item{n.trees }{Number of trees grown.}
\item{oob_err }{Out of bag error rate.}
\item{forest }{A matrix giving the minimum, mean, and maximum of the tree depth and number of leaves.}
\item{confusion }{Confusion matrix of the prediction.}
}
\references{

1. Elith, Jane, John R Leathwick, and Trevor Hastie. "A Working Guide to
Boosted Regression Trees." Journal of Animal Ecology 77.4 (2008): 802-813

2. Friedman, Jerome, Trevor Hastie, Saharon Rosset, Robert Tibshirani,
and Ji Zhu. "Discussion of Boosting Papers." Ann. Statist 32 (2004):
102-107

3. Hastie, Trevor, Robert Tibshirani, and J Jerome H Friedman. The
Elements of Statistical Learning.
Vol.1. N.p.: Springer New York, 2001.
http://www.stanford.edu/~hastie/local.ftp/Springer/OLD//ESLII_print4.pdf
}


\seealso{
For more information see: http://docs.0xdata.com
}
\examples{
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
# Run classification GBM on CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
h2o.gbm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, n.trees = 100, interaction.depth = 8, n.minobsinnode = 10, shrinkage = 0.2)
# Run regression GBM on CAPSULE ~ AGE + RACE + PSA + DCAPS
myX = setdiff(colnames(prostate.hex), c("ID", "DPROS", "DCAPS", "VOL"))
h2o.gbm(y = "VOL", x = myX, data = prostate.hex, n.trees = 10, interaction.depth = 5, shrinkage = 0.1)
}
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.getTree.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ The particular tree to retrieve. (Must be an integer between 1 and \code{ntree})
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
iris.rf = h2o.randomForest(y = 4, data = iris.hex, ntree = 50, depth = 100)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.glm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ The slot xval is a list of \code{\linkS4class{H2OGLMModel}} objects representing
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
# Run GLM of CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
h2o.glm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, family =
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importFile.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ If \code{parse = TRUE}, the function returns an object of class \code{\linkS4cla
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
summary(iris.hex)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importFolder.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ WARNING: In H2O, import is lazy! Do not modify the data files on hard disk until
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
myPath = paste(path.package("h2o"), "extdata", sep="/")
all_files.hex = h2o.importFolder(localH2O, path = myPath)
for(i in 1:length(all_files.hex))
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importHDFS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ When the path is a directory, if \code{parse = TRUE}, the function returns a lis
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
covtype.hex = importHDFS(localH2O, path = "hdfs://192.168.1.173:54321/0xdiag/datasets/standard/covtype.data", parse = TRUE)
summary(covtype.hex)

Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importURL.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ If \code{parse = TRUE}, the function returns an object of class \code{\linkS4cla
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
summary(prostate.hex)
}
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.kmeans.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ An object of class \code{\linkS4class{H2OKMeansModel}} with slots key, data, and
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
h2o.kmeans(data = prostate.hex, centers = 10, cols = c("AGE", "RACE", "VOL", "GLEASON"))
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.parseRaw.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ After the raw data file is parsed, it will be automatically deleted from the H2O
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.raw = h2o.importFile(localH2O, path = prosPath, parse = FALSE)
# Do not modify prostate.csv on disk at this point!
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.prcomp.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ The signs of the columns of the rotation matrix are arbitrary, and so may differ
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
prostate.pca = h2o.prcomp(data = prostate.hex, standardize = TRUE)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.predict.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ A \code{\linkS4class{H2OParsedData}} object containing the predictions.
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
# Run GLM of CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
prostate.glm = h2o.glm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, family =
Expand Down
5 changes: 2 additions & 3 deletions R/h2o-package/man/h2o.randomForest.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ h2o.randomForest(y, x, data, ntree = 50, depth = 2147483647, classwt = as.numeri
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{y}{
The name or index of the response variable. If the data does not contain a header, this is the column index. (The response must be either an integer or a categorical variable).
The name or index of the response variable. If the data does not contain a header, this is the column index, designated by increasing numbers from left to right. (The response must be either an integer or a categorical variable).
}
\item{x}{
A vector containing the names or indices of the predictor variables to use in building the random forest model.
Expand All @@ -31,7 +31,7 @@ Number of trees to grow. (Must be a nonnegative integer).
(Optional) Priors of the classes. Need not add up to one. If missing, defaults to all weights set at 1.0.}
}
\details{
Currently, only classification regression trees are supported. Note that indexing begins at zero, so for example, to specify the first column as the response variable, set \code{y = 0}.
Currently, only classification trees are supported. Note that indexing begins at zero, so for example, to specify the first column as the response variable, set \code{y = 0}.
}
\value{
An object of class \code{\linkS4class{H2ORForestModel}} with slots key, data, and model, where the last is a list of the following components:
Expand Down Expand Up @@ -65,7 +65,6 @@ An object of class \code{\linkS4class{H2ORForestModel}} with slots key, data, an
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
h2o.randomForest(y = 5, x = c(2,3,4), data = iris.hex, ntree = 50, depth = 100,
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.setColNames.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ This function modifies the \code{data} file directly. It does not create a new c
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
summary(iris.hex)
Expand Down
5 changes: 3 additions & 2 deletions R/h2oWrapper-package/R/h2oWrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ h2oWrapper.checkPackage <- function(myURL, silentUpgrade, promptUpgrade) {
warning("Mismatched MD5 hash! Check you have downloaded complete R package.")
install.packages(paste(getwd(), myFile, sep="/"), repos = NULL, type = "source")
file.remove(paste(getwd(), myFile, sep="/"))
cat("\nSuccess\nYou may now type 'library(h2o)' to load the R package\n\n")
#cat("\nSuccess\nYou may now type 'library(h2o)' to load the R package\n\n")
library(h2o)
}
}

Expand All @@ -153,4 +154,4 @@ h2oWrapper.__formatError <- function(error, prefix=" ") {
for (i in 1:length(items))
result = paste(result, prefix, items[i], "\n", sep="")
result
}
}
2 changes: 1 addition & 1 deletion prj.el
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
'(jde-sourcepath (quote ("./src")))
'(jde-run-option-hotspot-type (quote server))
'(jde-compile-option-target (quote ("1.6")))
'(jde-run-option-heap-size (quote ((1100 . "megabytes") (1100 . "megabytes"))))
'(jde-run-option-heap-size (quote ((4000 . "megabytes") (4000 . "megabytes"))))
'(jde-run-application-class "water.Boot")
'(jde-compile-option-debug (quote ("all" (t t t)))))
Loading

0 comments on commit 76ed2b1

Please sign in to comment.