Skip to content

Commit

Permalink
Merge branch 'master' of github.com:0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasnykodym committed Oct 8, 2013
2 parents d6c93c6 + 73c72c0 commit 28f47f1
Show file tree
Hide file tree
Showing 35 changed files with 400 additions and 564 deletions.
41 changes: 25 additions & 16 deletions R/h2o-package/R/Algorithms.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,28 @@ setGeneric("h2o.prcomp", function(data, tol = 0, standardize = TRUE, retx = FALS
setGeneric("h2o.pcr", function(x, y, data, ncomp, family, nfolds = 10, alpha = 0.5, lambda = 1.0e-5, tweedie.p = ifelse(family=="tweedie", 0, NA)) { standardGeneric("h2o.pcr") })
setGeneric("h2o.randomForest", function(x, y, data, ntree = 50, depth = 2147483647, classwt = as.numeric(NA)) { standardGeneric("h2o.randomForest") })
setGeneric("h2o.getTree", function(forest, k, plot = FALSE) { standardGeneric("h2o.getTree") })
setGeneric("h2o.gbm", function(x, y, data, n.trees = 100, interaction.depth = 5, n.minobsinnode = 10, shrinkage = 0.1) { standardGeneric("h2o.gbm") })
setGeneric("h2o.gbm", function(x, y, distribution='multinomial', data, n.trees = 10, interaction.depth = 8, n.minobsinnode = 10, shrinkage = 0.2) { standardGeneric("h2o.gbm") })
# setGeneric("h2o.gbmgrid", function(x, y, data, n.trees = c(10,100), interaction.depth = c(1,5,10), n.minobsinnode = 10, shrinkage = c(0.01,0.1,0.2)) { standardGeneric("h2o.gbmgrid") })
setGeneric("h2o.predict", function(object, newdata) { standardGeneric("h2o.predict") })

#----------------------- Generalized Boosting Machines (GBM) -----------------------#
setMethod("h2o.gbm", signature(x="numeric", y="numeric", data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
setMethod("h2o.gbm", signature(x="numeric", y="numeric", distribution='character', data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
if (length(x) < 1) stop("GBM requires at least one explanatory variable")
if(any( x < 1 | x > ncol(data))) stop(paste('Out of range explanatory variable', paste(x[which(x < 1 || x > ncol(data))], collapse=',')))
if( y < 1 || y > ncol(data) ) stop(paste('Response variable index', y, 'is out of range'))
if( y %in% x ) stop(paste(colnames(data)[y], 'is both an explanatory and dependent variable'))
x <- x - 1
cols=paste(x,collapse=',')

if( missing(distribution) )
distribution <- 'multinomial'
if( !(distribution %in% c('multinomial', 'gaussian')) )
stop(paste(distribution, "is not a valid distribution; only [multinomial, guassian] are supported"))
classification <- ifelse(distribution == 'multinomial', 1, ifelse(distribution=='gaussian', 0, -1))

destKey = paste("__GBMModel_", UUIDgenerate(), sep="")
res = h2o.__remoteSend(data@h2o, h2o.__PAGE_GBM, destination_key=destKey, source=data@key, response=colnames(data)[y], cols=cols, ntrees=n.trees, max_depth=interaction.depth, learn_rate=shrinkage, min_rows=n.minobsinnode)
res = h2o.__remoteSend(data@h2o, h2o.__PAGE_GBM, destination_key=destKey, source=data@key, response=colnames(data)[y], cols=cols, ntrees=n.trees, max_depth=interaction.depth, learn_rate=shrinkage, min_rows=n.minobsinnode, classification=classification)
while(h2o.__poll(data@h2o, res$job_key) != -1) { Sys.sleep(1) }
res2 = h2o.__remoteSend(data@h2o, h2o.__PAGE_GBMModelView, '_modelKey'=destKey)

Expand All @@ -45,25 +51,28 @@ setMethod("h2o.gbm", signature(x="numeric", y="numeric", data="H2OParsedData", n
new("H2OGBMModel", key=destKey, data=data, model=result)
})

setMethod("h2o.gbm", signature(x="numeric", y="character", data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
setMethod("h2o.gbm", signature(x="numeric", y="character", distribution='ANY', data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
cc <- colnames( data )
if( !(y %in% cc) ) stop(paste(y, 'is not a valid column name'))
y_i <- which(y==cc)
h2o.gbm(x, y_i, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
h2o.gbm(x, y_i, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
})

setMethod("h2o.gbm", signature(x="character", y="character", data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
setMethod("h2o.gbm", signature(x="character", y="character", distribution='ANY', data="H2OParsedData", n.trees="numeric", interaction.depth="numeric", n.minobsinnode="numeric", shrinkage="numeric"),
function(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
cc <- colnames( data )
if( y %in% x ) stop(paste(y, 'is both an explanatory and dependent variable'))
if(any(!(x %in% cc))) stop(paste(paste(x[which(!(x %in% cc))], collapse=','), 'is not a valid column name'))
x_i = match(x, cc)
h2o.gbm(x_i, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
h2o.gbm(x_i, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
})

setMethod("h2o.gbm", signature(x="ANY", y="character", data="H2OParsedData", n.trees="ANY", interaction.depth="ANY", n.minobsinnode="ANY", shrinkage="ANY"),
function(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
setMethod("h2o.gbm", signature(x="ANY", y="character", distribution='ANY', data="H2OParsedData", n.trees="ANY", interaction.depth="ANY", n.minobsinnode="ANY", shrinkage="ANY"),
function(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
if( missing(distribution) ) distribution='multinomial'
if( !(distribution %in% c('multinomial', 'gaussian')) )
stop(paste(distribution, "is not a valid distribution; only [multinomial, guassian] are supported"))
if(!(missing(x) || class(x) == "numeric" || class(x) == "character"))
stop(paste("x cannot be of class", class(x)))
else if(!(missing(n.trees) || class(n.trees) == "numeric"))
Expand All @@ -75,13 +84,13 @@ setMethod("h2o.gbm", signature(x="ANY", y="character", data="H2OParsedData", n.t
else if(!(missing(n.minobsinnode) || class(n.minobsinnode) == "numeric"))
stop(paste("n.minobsinnode cannot be of class", class(n.minobsinnode)))
if(missing(x)) x = setdiff(colnames(data), y)
h2o.gbm(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
h2o.gbm(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
})

setMethod("h2o.gbm", signature(x="ANY", y="numeric", data="H2OParsedData", n.trees="ANY", interaction.depth="ANY", n.minobsinnode="ANY", shrinkage="ANY"),
function(x, y, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
setMethod("h2o.gbm", signature(x="ANY", y="numeric", distribution='ANY', data="H2OParsedData", n.trees="ANY", interaction.depth="ANY", n.minobsinnode="ANY", shrinkage="ANY"),
function(x, y, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage) {
if( y < 1 || y > ncol( data ) ) stop(paste(y, 'is not a valid column index'))
h2o.gbm(x, colnames(data)[y], data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
h2o.gbm(x, colnames(data)[y], data, distribution, n.trees, interaction.depth, n.minobsinnode, shrinkage)
})

#----------------------------- Generalized Linear Models (GLM) ---------------------------#
Expand Down
6 changes: 3 additions & 3 deletions R/h2o-package/demo/h2o.gbm.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# This is a demo of H2O's GBM function
# It imports a data set, parses it, and prints a summary
# Then, it runs GBM on a subset of the dataset
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, system.file("extdata", "prostate.csv", package="h2o"), "prostate.hex")
summary(prostate.hex)
Expand Down
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs GLM with a binomial link function using 10-fold cross-validation
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip = "localhost", port = 54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, path = system.file("extdata", "prostate.csv", package="h2o"), key = "prostate.hex")
summary(prostate.hex)
Expand All @@ -17,4 +17,4 @@ axis(1, at = 1:length(myLabels), labels = myLabels)
abline(h = 0, col = 2, lty = 2)
title("Coefficients from Logistic Regression\n of Prostate Cancer Data")

barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")
barplot(prostate.glm@model$coefficients, main = "Coefficients from Logistic Regression\n of Prostate Cancer Data")
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.kmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs K-Means with k = 5 centers on a subset of characteristics
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

prostate.hex = h2o.importFile(localH2O, system.file("extdata", "prostate.csv", package="h2o"), "prostate.hex")
summary(prostate.hex)
Expand Down Expand Up @@ -34,4 +34,4 @@ par(mfrow = c(1,2))
prostate.ctrs = as.data.frame(prostate.km@model$centers)
plot(prostate.ctrs[,1:2])
plot(prostate.ctrs[,3:4])
title("K-Means Centers for k = 10", outer = TRUE, line = -2.0)
title("K-Means Centers for k = 10", outer = TRUE, line = -2.0)
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.prcomp.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# This is a demo of H2O's PCA function
# It imports a data set, parses it, and prints a summary
# Then, it runs PCA on a subset of the features
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

australia.hex = h2o.importFile(localH2O, system.file("extdata", "australia.csv", package="h2o"), "australia.hex")
summary(australia.hex)
Expand All @@ -13,4 +13,4 @@ print(australia.pca)
plot(australia.pca)

australia.pca2 = h2o.prcomp(australia.hex, tol = 0.5, standardize = FALSE)
print(australia.pca2)
print(australia.pca2)
8 changes: 4 additions & 4 deletions R/h2o-package/demo/h2o.randomForest.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# It imports a data set, parses it, and prints a summary
# Then, it runs RF with 50 trees, maximum depth of 100, using the iris class as the response
# Note: This demo runs H2O on localhost:54321
library(h2o)
localH2O = new("H2OClient", ip="localhost", port=54321)
h2o.checkClient(localH2O)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)

iris.hex = h2o.importFile(localH2O, path = system.file("extdata", "iris.csv", package="h2o"), key = "iris.hex")
summary(iris.hex)
Expand All @@ -16,4 +16,4 @@ invisible(readline("Hit <Return> to continue: "))
covtype.hex = h2o.importFile(localH2O, path = system.file("extdata", "covtype.csv", package="h2o"), key = "covtype.hex")
summary(covtype.hex)
covtype.rf = h2o.randomForest(y = "Cover_Type", x = setdiff(colnames(covtype.hex), c("Cover_Type", "Aspect", "Hillshade_9am")), data = covtype.hex, ntree = 50, depth = 150)
print(covtype.rf)
print(covtype.rf)
1 change: 0 additions & 1 deletion R/h2o-package/man/H2OClient-class.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ Objects can be created by calls of the form \code{new("H2OClient", ...)}
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
showClass("H2OClient")
}
\keyword{classes}
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.checkClient.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ h2o.checkClient(object)
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
h2o.checkClient(localH2O)
}
% Add one or more standard keywords, see file 'KEYWORDS' in the
Expand Down
77 changes: 77 additions & 0 deletions R/h2o-package/man/h2o.gbm.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
\name{h2o.gbm}
\alias{h2o.gbm}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{
H2O: GBM
}
%% ~~function to do ... ~~

\description{Builds gradient boosed classification trees on a parsed data set.
}
\usage{
h2o.gbm(y, x, distribution, data, n.trees, interaction.depth, n.minobsinnode, shrinkage)
}
\arguments{
\item{y}{
The name or index of the response variable. If the data does not contain a header, this is the column index number starting at 0, and increasing from left to right. (The response must be either an integer or a categorical variable).
}
\item{x}{
A vector containing the names or indices of the predictor variables to use in building the GBM model.
}
\item{distribution}{
The type of GBM model to be produced, categorization is "multinomial" (default), "gaussian" used for regression.
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{n.trees}{
Number of trees to grow. Must be a nonnegative integer.
}
\item{interaction.depth}{
Maximum depth to grow the tree.
}
\item{n.minobsinnode}{
Minimum number of rows to assign to teminal nodes.
}
\item{shrinkage}{
A learning-rate parameter defining step size reduction.
}
}
\value{
An object of class \code{\linkS4class{H2OGBM}} with slots key, data, and model, where the last is a list of the following components:
\item{type }{The type of the tree, which currently must be classification.}
\item{n.trees }{Number of trees grown.}
\item{oob_err }{Out of bag error rate.}
\item{forest }{A matrix giving the minimum, mean, and maximum of the tree depth and number of leaves.}
\item{confusion }{Confusion matrix of the prediction.}
}
\references{

1. Elith, Jane, John R Leathwick, and Trevor Hastie. "A Working Guide to
Boosted Regression Trees." Journal of Animal Ecology 77.4 (2008): 802-813

2. Friedman, Jerome, Trevor Hastie, Saharon Rosset, Robert Tibshirani,
and Ji Zhu. "Discussion of Boosting Papers." Ann. Statist 32 (2004):
102-107

3. Hastie, Trevor, Robert Tibshirani, and J Jerome H Friedman. The
Elements of Statistical Learning.
Vol.1. N.p.: Springer New York, 2001.
http://www.stanford.edu/~hastie/local.ftp/Springer/OLD//ESLII_print4.pdf
}


\seealso{
For more information see: http://docs.0xdata.com
}
\examples{
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
# Run classification GBM on CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
h2o.gbm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, n.trees = 100, interaction.depth = 8, n.minobsinnode = 10, shrinkage = 0.2)
# Run regression GBM on CAPSULE ~ AGE + RACE + PSA + DCAPS
myX = setdiff(colnames(prostate.hex), c("ID", "DPROS", "DCAPS", "VOL"))
h2o.gbm(y = "VOL", x = myX, data = prostate.hex, n.trees = 10, interaction.depth = 5, shrinkage = 0.1)
}
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.getTree.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ The particular tree to retrieve. (Must be an integer between 1 and \code{ntree})
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
iris.rf = h2o.randomForest(y = 4, data = iris.hex, ntree = 50, depth = 100)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.glm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ The slot xval is a list of \code{\linkS4class{H2OGLMModel}} objects representing
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
# Run GLM of CAPSULE ~ AGE + RACE + PSA + DCAPS
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
h2o.glm(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), data = prostate.hex, family =
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importFile.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ If \code{parse = TRUE}, the function returns an object of class \code{\linkS4cla
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
irisPath = system.file("extdata", "iris.csv", package="h2o")
iris.hex = h2o.importFile(localH2O, path = irisPath, key = "iris.hex")
summary(iris.hex)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importFolder.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ WARNING: In H2O, import is lazy! Do not modify the data files on hard disk until
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
myPath = paste(path.package("h2o"), "extdata", sep="/")
all_files.hex = h2o.importFolder(localH2O, path = myPath)
for(i in 1:length(all_files.hex))
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importHDFS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ When the path is a directory, if \code{parse = TRUE}, the function returns a lis
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
covtype.hex = importHDFS(localH2O, path = "hdfs://192.168.1.173:54321/0xdiag/datasets/standard/covtype.data", parse = TRUE)
summary(covtype.hex)

Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.importURL.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ If \code{parse = TRUE}, the function returns an object of class \code{\linkS4cla
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
summary(prostate.hex)
}
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.kmeans.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ An object of class \code{\linkS4class{H2OKMeansModel}} with slots key, data, and
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
h2o.kmeans(data = prostate.hex, centers = 10, cols = c("AGE", "RACE", "VOL", "GLEASON"))
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.parseRaw.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ After the raw data file is parsed, it will be automatically deleted from the H2O
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.raw = h2o.importFile(localH2O, path = prosPath, parse = FALSE)
# Do not modify prostate.csv on disk at this point!
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.prcomp.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ The signs of the columns of the rotation matrix are arbitrary, and so may differ
library(h2oWrapper)
h2oWrapper.installDepPkgs()
localH2O = h2oWrapper.init(ip = "localhost", port = 54321, startH2O = TRUE, silentUpgrade = TRUE, promptUpgrade = FALSE)
library(h2o)
prosPath = system.file("extdata", "prostate.csv", package="h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
prostate.pca = h2o.prcomp(data = prostate.hex, standardize = TRUE)
Expand Down
Loading

0 comments on commit 28f47f1

Please sign in to comment.