Skip to content

Commit

Permalink
Merge branch 'master' of github.com:0xdata/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
cliffclick committed Jul 16, 2014
2 parents a4c443d + 1f40457 commit 50bc74b
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 111 deletions.
98 changes: 66 additions & 32 deletions R/h2o-package/R/Algorithms.R

Large diffs are not rendered by default.

11 changes: 7 additions & 4 deletions R/h2o-package/man/h2o.SpeeDRF.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ H2O: Single-Node Random Forest
Performs single-node random forest classification on a data set.
}
\usage{
h2o.SpeeDRF(x, y, data, classification = TRUE, nfolds = 0, validation, mtry = -1,
ntree = 50, depth = 50, sample.rate = 2/3, oobee = TRUE, importance = FALSE,
nbins = 1024, seed = -1, stat.type = "ENTROPY", balance.classes = FALSE,
verbose = FALSE)
h2o.SpeeDRF(x, y, data, key = "", classification = TRUE, nfolds = 0, validation,
mtry = -1, ntree = 50, depth = 50, sample.rate = 2/3, oobee = TRUE,
importance = FALSE, nbins = 1024, seed = -1, stat.type = "ENTROPY",
balance.classes = FALSE, verbose = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
Expand All @@ -22,6 +22,9 @@ The name or index of the response variable. If the data does not contain a heade
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{classification}{
(Optional) A logical value indicating whether a classification model should be built (as opposed to regression).
Expand Down
5 changes: 3 additions & 2 deletions R/h2o-package/man/h2o.deeplearning.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ H2O: Deep Learning Neural Networks
Performs Deep Learning neural networks on an \code{\linkS4class{H2OParsedData}} object.
}
\usage{
h2o.deeplearning(x, y, data, classification = TRUE, nfolds = 0, validation,
autoencoder, use_all_factor_levels,
h2o.deeplearning(x, y, data, key = "", classification = TRUE, nfolds = 0,
validation, autoencoder, use_all_factor_levels,
activation, hidden, epochs, train_samples_per_iteration, seed, adaptive_rate,
rho, epsilon, rate, rate_annealing, rate_decay, momentum_start,
momentum_ramp, momentum_stable, nesterov_accelerated_gradient,
Expand All @@ -28,6 +28,7 @@ h2o.deeplearning(x, y, data, classification = TRUE, nfolds = 0, validation,
\item{x}{ A vector containing the names of the predictors in the model. }
\item{y}{ The name of the response variable in the model. }
\item{data}{ An \code{\linkS4class{H2OParsedData}} object containing the variables in the model. }
\item{key}{ (Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.}
\item{classification}{ (Optional) A logical value indicating whether the algorithm should conduct classification. }
\item{nfolds}{(Optional) Number of folds for cross-validation. If \code{nfolds >= 2}, then \code{validation} must remain empty.}
\item{validation}{(Optional) An \code{\linkS4class{H2OParsedData}} object indicating the validation dataset used to construct confusion matrix. If left blank, this defaults to the training data when \code{nfolds = 0}.}
Expand Down
10 changes: 7 additions & 3 deletions R/h2o-package/man/h2o.gbm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ H2O: Gradient Boosted Machines
on a parsed data set.
}
\usage{
h2o.gbm(x, y, distribution = "multinomial", data, n.trees = 10, interaction.depth = 5,
n.minobsinnode = 10, shrinkage = 0.1, n.bins = 100, importance = FALSE, nfolds = 0,
validation, balance.classes = FALSE, max.after.balance.size = 5)
h2o.gbm(x, y, distribution = "multinomial", data, key = "", n.trees = 10,
interaction.depth = 5, n.minobsinnode = 10, shrinkage = 0.1, n.bins = 100,
importance = FALSE, nfolds = 0, validation, balance.classes = FALSE,
max.after.balance.size = 5)
}
\arguments{
\item{x}{
Expand All @@ -26,6 +27,9 @@ The type of GBM model to be produced: classification is "multinomial" (default),
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{n.trees}{
(Optional) Number of trees to grow. Must be a nonnegative integer.
Expand Down
14 changes: 9 additions & 5 deletions R/h2o-package/man/h2o.glm.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ H2O: Generalized Linear Models
Fit a generalized linear model, specified by a response variable, a set of predictors, and a description of the error distribution.
}
\usage{
h2o.glm(x, y, data, family, nfolds = 0, alpha = 0.5, nlambda = -1, lambda.min.ratio = -1,
lambda = 1e-5, epsilon = 1e-4, standardize = TRUE, prior, variable_importances = 1,
use_all_factor_levels = 0, tweedie.p = ifelse(family == 'tweedie', 1.5,
as.numeric(NA)), iter.max = 100, higher_accuracy = FALSE, lambda_search = FALSE,
return_all_lambda = FALSE, max_predictors = -1)
h2o.glm(x, y, data, key = "", family, nfolds = 0, alpha = 0.5, nlambda = -1,
lambda.min.ratio = -1, lambda = 1e-5, epsilon = 1e-4, standardize = TRUE,
prior, variable_importances = 1, use_all_factor_levels = 0, tweedie.p =
ifelse(family == 'tweedie', 1.5, as.numeric(NA)), iter.max = 100,
higher_accuracy = FALSE, lambda_search = FALSE, return_all_lambda = FALSE,
max_predictors = -1)
}
\arguments{
\item{x}{
Expand All @@ -22,6 +23,9 @@ The name of the response variable in the model.
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{family}{
A description of the error distribution and corresponding link function to be used in the model. Currently, Gaussian, binomial, Poisson, gamma, and Tweedie are supported. When a model is specified as Tweedie, users must also specify the appropriate Tweedie power.
Expand Down
119 changes: 61 additions & 58 deletions R/h2o-package/man/h2o.kmeans.Rd
Original file line number Diff line number Diff line change
@@ -1,58 +1,61 @@
\name{h2o.kmeans}
\alias{h2o.kmeans}
\title{
H2O: K-Means Clustering
}
\description{Performs k-means clustering on a data set.}
\usage{
h2o.kmeans(data, centers, cols = "", iter.max = 10, normalize = FALSE,
init = "none", seed = 0, dropNACols = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{centers}{
The number of clusters k.
}
\item{cols}{
(Optional) A vector containing the names of the data columns on which k-means runs. If blank, k-means clustering will be run on the entire data set.
}
\item{iter.max}{
(Optional) The maximum number of iterations allowed.
}
\item{normalize}{
(Optional) A logical value indicating whether the data should be normalized before running k-means.
}
\item{init}{
(Optional) Method by which to select the k initial cluster centroids. Possible values are \code{"none"} for random initialization, \code{"plusplus"} for k-means++ initialization, and \code{"furthest"} for initialization at the furthest point from each successive centroid. See the \href{http://docs.0xdata.com/datascience/kmeans.html}{H2O K-means documentation} for more details.
}
\item{seed}{
(Optional) Random seed used to initialize the cluster centroids.
}
\item{dropNACols}{
(Optional) A logical value indicating whether to drop columns with more than 10\% entries that are NA.
}
}
\value{
An object of class \code{\linkS4class{H2OKMeansModel}} with slots key, data, and model, where the last is a list of the following components:
\item{centers }{A matrix of cluster centers.}
\item{cluster }{A \code{\linkS4class{H2OParsedData}} object containing the vector of integers (from 1 to k), which indicate the cluster to which each point is allocated.}
\item{size }{The number of points in each cluster.}
\item{withinss }{Vector of within-cluster sum of squares, with one component per cluster.}
\item{tot.withinss }{Total within-cluster sum of squares, i.e., sum(withinss).}
}

\seealso{
%% ~~objects to See Also as \code{\link{help}}, ~~~
\code{\link{h2o.importFile}, \link{h2o.importFolder}, \link{h2o.importHDFS}, \link{h2o.importURL}, \link{h2o.uploadFile}}
}

\examples{
library(h2o)
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
prosPath = system.file("extdata", "prostate.csv", package = "h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
h2o.kmeans(data = prostate.hex, centers = 10, cols = c("AGE", "RACE", "VOL", "GLEASON"))
}
\name{h2o.kmeans}
\alias{h2o.kmeans}
\title{
H2O: K-Means Clustering
}
\description{Performs k-means clustering on a data set.}
\usage{
h2o.kmeans(data, centers, cols = "", key = "", iter.max = 10,
normalize = FALSE, init = "none", seed = 0, dropNACols = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{centers}{
The number of clusters k.
}
\item{cols}{
(Optional) A vector containing the names of the data columns on which k-means runs. If blank, k-means clustering will be run on the entire data set.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{iter.max}{
(Optional) The maximum number of iterations allowed.
}
\item{normalize}{
(Optional) A logical value indicating whether the data should be normalized before running k-means.
}
\item{init}{
(Optional) Method by which to select the k initial cluster centroids. Possible values are \code{"none"} for random initialization, \code{"plusplus"} for k-means++ initialization, and \code{"furthest"} for initialization at the furthest point from each successive centroid. See the \href{http://docs.0xdata.com/datascience/kmeans.html}{H2O K-means documentation} for more details.
}
\item{seed}{
(Optional) Random seed used to initialize the cluster centroids.
}
\item{dropNACols}{
(Optional) A logical value indicating whether to drop columns with more than 10\% entries that are NA.
}
}
\value{
An object of class \code{\linkS4class{H2OKMeansModel}} with slots key, data, and model, where the last is a list of the following components:
\item{centers }{A matrix of cluster centers.}
\item{cluster }{A \code{\linkS4class{H2OParsedData}} object containing the vector of integers (from 1 to k), which indicate the cluster to which each point is allocated.}
\item{size }{The number of points in each cluster.}
\item{withinss }{Vector of within-cluster sum of squares, with one component per cluster.}
\item{tot.withinss }{Total within-cluster sum of squares, i.e., sum(withinss).}
}

\seealso{
%% ~~objects to See Also as \code{\link{help}}, ~~~
\code{\link{h2o.importFile}, \link{h2o.importFolder}, \link{h2o.importHDFS}, \link{h2o.importURL}, \link{h2o.uploadFile}}
}

\examples{
library(h2o)
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
prosPath = system.file("extdata", "prostate.csv", package = "h2o")
prostate.hex = h2o.importFile(localH2O, path = prosPath)
h2o.kmeans(data = prostate.hex, centers = 10, cols = c("AGE", "RACE", "VOL", "GLEASON"))
}
5 changes: 4 additions & 1 deletion R/h2o-package/man/h2o.naiveBayes.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ H2O: Naive Bayes Classifier
\description{Builds gradient boosted classification trees, and gradient boosted regression trees on a parsed data set.
}
\usage{
h2o.naiveBayes(x, y, data, laplace = 0, dropNACols = FALSE)
h2o.naiveBayes(x, y, data, key = "", laplace = 0, dropNACols = FALSE)
}
\arguments{
\item{x}{
Expand All @@ -20,6 +20,9 @@ The name of the response variable in the model.
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} (\code{version = 2}) object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{laplace}{
(Optional) A positive number controlling Laplace smoothing. The default (0) disables Laplace smoothing.
Expand Down
5 changes: 4 additions & 1 deletion R/h2o-package/man/h2o.pcr.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
Runs GLM regression on PCA results, and allows for transformation of test data to match PCA transformations of training data.
}
\usage{
h2o.pcr(x, y, data, ncomp, family, nfolds = 10, alpha = 0.5, lambda = 1e-05,
h2o.pcr(x, y, data, key = "", ncomp, family, nfolds = 10, alpha = 0.5, lambda = 1e-05,
epsilon = 1e-05, tweedie.p)
}

Expand All @@ -21,6 +21,9 @@ h2o.pcr(x, y, data, ncomp, family, nfolds = 10, alpha = 0.5, lambda = 1e-05,
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{ncomp}{
A number indicating the number of principal components to use in the regression model.
}
Expand Down
5 changes: 4 additions & 1 deletion R/h2o-package/man/h2o.prcomp.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Principal Components Analysis
Performs principal components analysis on the given data set.
}
\usage{
h2o.prcomp(data, tol = 0, cols = "", standardize = TRUE, retx = FALSE)
h2o.prcomp(data, tol = 0, cols = "", key = "", standardize = TRUE, retx = FALSE)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
Expand All @@ -21,6 +21,9 @@ h2o.prcomp(data, tol = 0, cols = "", standardize = TRUE, retx = FALSE)
\item{cols}{
(Optional) A vector of column names or indices indicating the features to perform PCA on. By default, all columns in the dataset are analyzed.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{standardize}{
(Optional) A logical value indicating whether the variables should be shifted to be zero centered and scaled to have unit variance before the analysis takes place.
}
Expand Down
11 changes: 7 additions & 4 deletions R/h2o-package/man/h2o.randomForest.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ H2O: Random Forest
Performs random forest classification on a data set.
}
\usage{
h2o.randomForest(x, y, data, classification = TRUE, ntree = 50, depth = 20,
mtries = -1, sample.rate = 2/3, nbins = 100, seed = -1, importance = FALSE,
nfolds = 0, validation, nodesize = 1, balance.classes = FALSE,
max.after.balance.size = 5)
h2o.randomForest(x, y, data, key = "", classification = TRUE, ntree = 50,
depth = 20, mtries = -1, sample.rate = 2/3, nbins = 100, seed = -1,
importance = FALSE, nfolds = 0, validation, nodesize = 1,
balance.classes = FALSE, max.after.balance.size = 5)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
Expand All @@ -22,6 +22,9 @@ The name or index of the response variable. If the data does not contain a heade
}
\item{data}{
An \code{\linkS4class{H2OParsedData}} object containing the variables in the model.
}
\item{key}{
(Optional) The unique hex key assigned to the resulting model. If none is given, a key will automatically be generated.
}
\item{classification}{
(Optional) A logical value indicating whether a classification model should be built (as opposed to regression).
Expand Down

0 comments on commit 50bc74b

Please sign in to comment.