Skip to content

Commit

Permalink
Merge branch 'master' into summary2
Browse files Browse the repository at this point in the history
  • Loading branch information
tongxin committed Nov 12, 2013
2 parents 271175e + cdff5e6 commit 28224ff
Show file tree
Hide file tree
Showing 33 changed files with 654 additions and 515 deletions.
98 changes: 48 additions & 50 deletions R/examples/H2OExec2Demo.R
Original file line number Diff line number Diff line change
@@ -1,51 +1,49 @@
library(h2o)
h2o.installDepPkgs()
myIP = "127.0.0.1"; myPort = 54321
localH2O = h2o.init(ip = myIP, port = myPort, startH2O = TRUE, silentUpgrade = FALSE, promptUpgrade = TRUE)

# Import iris file to H2O
prosPath = system.file("extdata", "prostate.csv", package="h2oRClient")
prostate.hex = h2o.importFile.FV(localH2O, path = prosPath, key = "prostate.hex")

# Print out basic summary
summary(prostate.hex)
head(prostate.hex)
tail(prostate.hex)

# Get quantiles and examine outliers
prostate.qs = quantile(prostate.hex$PSA)
prostate.qs

# Note: Right now, assignment must be done manually with h2o.assign!
outliers.low = prostate.hex[prostate.hex$PSA <= prostate.qs[2],]
outliers.low = h2o.assign(outliers.low, "PSA.low")
outliers.high = prostate.hex[prostate.hex$PSA >= prostate.qs[10],]
outliers.high = h2o.assign(outliers.high, "PSA.high")

nrow(outliers.low) + nrow(outliers.high)
head(outliers.low); tail(outliers.low)
head(outliers.high); tail(outliers.high)

# Drop outliers from data
prostate.trim = prostate.hex[prostate.hex$PSA > prostate.qs[2],]
prostate.trim = h2o.assign(prostate.trim, "prostate.trim")
prostate.trim = prostate.trim[prostate.trim$PSA < prostate.qs[10],]
prostate.trim = h2o.assign(prostate.trim, "prostate.trim")
nrow(prostate.trim)

# Construct test and training sets
s = runif(nrow(prostate.hex))
prostate.train = prostate.hex[s <= 0.8,]
prostate.train = h2o.assign(prostate.train, "prostate.train")
prostate.test = prostate.hex[s > 0.8,]
prostate.test = h2o.assign(prostate.test, "prostate.test")
nrow(prostate.train) + nrow(prostate.test)

# Run GBM on training set and predict on test set
myY = "CAPSULE"; myX = setdiff(colnames(prostate.train), c(myY, "ID"))
prostate.gbm = h2o.gbm(x = myX, y = myY, distribution = "multinomial", data = prostate.train)
prostate.gbm
prostate.pred = h2o.predict(prostate.gbm, prostate.test)
summary(prostate.pred)
head(prostate.pred)
library(h2o)
h2o.installDepPkgs()
myIP = "127.0.0.1"; myPort = 54321
localH2O = h2o.init(ip = myIP, port = myPort, startH2O = TRUE, silentUpgrade = FALSE, promptUpgrade = TRUE)

# Import iris file to H2O
prosPath = system.file("extdata", "prostate.csv", package="h2oRClient")
prostate.hex = h2o.importFile.FV(localH2O, path = prosPath, key = "prostate.hex")

# Print out basic summary
summary(prostate.hex)
head(prostate.hex)
tail(prostate.hex)
table(prostate.hex$RACE) # Note: Currently only works on a single integer/factor column

# Get quantiles and examine outliers
prostate.qs = quantile(prostate.hex$PSA)
print(prostate.qs)

# Note: Right now, assignment must be done manually with h2o.assign!
# PSA.outliers = prostate.hex[prostate.hex$PSA <= prostate.qs[2] | prostate.hex$PSA >= prostate.qs[10],]
PSA.outliers.ind = prostate.hex$PSA <= prostate.qs[2] | prostate.hex$PSA >= prostate.qs[10]
PSA.outliers = prostate.hex[PSA.outliers.ind,]
PSA.outliers = h2o.assign(PSA.outliers, "PSA.outliers")
nrow(PSA.outliers)
head(PSA.outliers); tail(PSA.outliers)

# Drop outliers from data
# prostate.trim = prostate.hex[prostate.hex$PSA > prostate.qs[2] && prostate.hex$PSA < prostate.qs[10],]
prostate.trim = prostate.hex[!PSA.outliers.ind,]
prostate.trim = h2o.assign(prostate.trim, "prostate.trim")
nrow(prostate.trim)

# Construct test and training sets
s = runif(nrow(prostate.hex))
prostate.train = prostate.hex[s <= 0.8,]
prostate.train = h2o.assign(prostate.train, "prostate.train")
prostate.test = prostate.hex[s > 0.8,]
prostate.test = h2o.assign(prostate.test, "prostate.test")
nrow(prostate.train) + nrow(prostate.test)

# Run GBM on training set and predict on test set
myY = "CAPSULE"; myX = setdiff(colnames(prostate.train), c(myY, "ID"))
prostate.gbm = h2o.gbm(x = myX, y = myY, distribution = "multinomial", data = prostate.train)
print(prostate.gbm)
prostate.pred = h2o.predict(prostate.gbm, prostate.test)
summary(prostate.pred)
head(prostate.pred)
tail(prostate.pred)
117 changes: 57 additions & 60 deletions R/h2oRClient-package/R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ setMethod("h2o.factor", signature(data="H2OParsedData", col="character"),
h2o.factor(data, ind-1)
})

#--------------------------------- FluidVecs --------------------------------------#
#------------------------------------ FluidVecs ----------------------------------------#
setMethod("show", "H2ORawData2", function(object) {
print(object@h2o)
cat("Raw Data Key:", object@key, "\n")
Expand All @@ -415,6 +415,7 @@ setMethod("show", "H2ORawData2", function(object) {
setMethod("show", "H2OParsedData2", function(object) {
print(object@h2o)
cat("Parsed Data Key:", object@key, "\n")
if(ncol(object) <= 1000) print(head(object))
})

setMethod("[", "H2OParsedData2", function(x, i, j, ..., drop = TRUE) {
Expand Down Expand Up @@ -490,41 +491,61 @@ setMethod("$", "H2OParsedData2", function(x, name) {
new("H2OParsedData2", h2o=x@h2o, key=res$dest_key)
})

setMethod("+", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("+", e1, e2) })
setMethod("-", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("-", e1, e2) })
setMethod("*", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("*", e1, e2) })
setMethod("/", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("/", e1, e2) })
# setMethod("%%", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("%", e1, e2) })
setMethod("==", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("==", e1, e2) })
setMethod(">", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2(">", e1, e2) })
setMethod("<", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("<", e1, e2) })
setMethod("!=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("!=", e1, e2) })
setMethod(">=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2(">=", e1, e2) })
setMethod("<=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("<=", e1, e2) })

setMethod("+", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("+", e1, e2) })
setMethod("-", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("-", e1, e2) })
setMethod("*", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("*", e1, e2) })
setMethod("/", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("/", e1, e2) })
# setMethod("%%", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("%", e1, e2) })
setMethod("==", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("==", e1, e2) })
setMethod(">", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2(">", e1, e2) })
setMethod("<", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("<", e1, e2) })
setMethod("!=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("!=", e1, e2) })
setMethod(">=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2(">=", e1, e2) })
setMethod("<=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__operator2("<=", e1, e2) })

setMethod("+", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("+", e1, e2) })
setMethod("-", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("-", e1, e2) })
setMethod("*", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("*", e1, e2) })
setMethod("/", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("/", e1, e2) })
# setMethod("%%", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("%", e1, e2) })
setMethod("==", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("==", e1, e2) })
setMethod(">", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2(">", e1, e2) })
setMethod("<", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("<", e1, e2) })
setMethod("!=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("!=", e1, e2) })
setMethod(">=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2(">=", e1, e2) })
setMethod("<=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__operator2("<=", e1, e2) })
setMethod("+", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("+", e1, e2) })
setMethod("-", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("-", e1, e2) })
setMethod("*", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("*", e1, e2) })
setMethod("/", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("/", e1, e2) })
setMethod("%%", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("%", e1, e2) })
setMethod("==", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("==", e1, e2) })
setMethod(">", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2(">", e1, e2) })
setMethod("<", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("<", e1, e2) })
setMethod("!=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("!=", e1, e2) })
setMethod(">=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2(">=", e1, e2) })
setMethod("<=", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("<=", e1, e2) })
setMethod("&", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) {h2o.__binop2("&&", e1, e2) })
setMethod("|", c("H2OParsedData2", "H2OParsedData2"), function(e1, e2) {h2o.__binop2("||", e1, e2) })

setMethod("+", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("+", e1, e2) })
setMethod("-", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("-", e1, e2) })
setMethod("*", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("*", e1, e2) })
setMethod("/", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("/", e1, e2) })
setMethod("%%", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("%", e1, e2) })
setMethod("==", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("==", e1, e2) })
setMethod(">", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2(">", e1, e2) })
setMethod("<", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("<", e1, e2) })
setMethod("!=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("!=", e1, e2) })
setMethod(">=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2(">=", e1, e2) })
setMethod("<=", c("numeric", "H2OParsedData2"), function(e1, e2) { h2o.__binop2("<=", e1, e2) })
setMethod("&", c("numeric", "H2OParsedData2"), function(e1, e2) {h2o.__binop2("&&", e1, e2) })
setMethod("|", c("numeric", "H2OParsedData2"), function(e1, e2) {h2o.__binop2("||", e1, e2) })

setMethod("+", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("+", e1, e2) })
setMethod("-", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("-", e1, e2) })
setMethod("*", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("*", e1, e2) })
setMethod("/", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("/", e1, e2) })
setMethod("%%", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("%", e1, e2) })
setMethod("==", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("==", e1, e2) })
setMethod(">", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2(">", e1, e2) })
setMethod("<", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("<", e1, e2) })
setMethod("!=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("!=", e1, e2) })
setMethod(">=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2(">=", e1, e2) })
setMethod("<=", c("H2OParsedData2", "numeric"), function(e1, e2) { h2o.__binop2("<=", e1, e2) })
setMethod("&", c("H2OParsedData2", "numeric"), function(e1, e2) {h2o.__binop2("&&", e1, e2) })
setMethod("|", c("H2OParsedData2", "numeric"), function(e1, e2) {h2o.__binop2("||", e1, e2) })

setMethod("!", "H2OParsedData2", function(x) { h2o.__unop2("!", x) })
setMethod("abs", "H2OParsedData2", function(x) { h2o.__unop2("abs", x) })
setMethod("sign", "H2OParsedData2", function(x) { h2o.__unop2("sgn", x) })
setMethod("sqrt", "H2OParsedData2", function(x) { h2o.__unop2("sqrt", x) })
setMethod("ceiling", "H2OParsedData2", function(x) { h2o.__unop2("ceil", x) })
setMethod("floor", "H2OParsedData2", function(x) { h2o.__unop2("floor", x) })
setMethod("log", "H2OParsedData2", function(x) { h2o.__unop2("log", x) })
setMethod("exp", "H2OParsedData2", function(x) { h2o.__unop2("exp", x) })
setMethod("sum", "H2OParsedData2", function(x) { h2o.__unop2("sum", x) })
setMethod("is.na", "H2OParsedData2", function(x) { h2o.__unop2("is.na", x) })

table <- function(object) { UseMethod("table", object) }
setMethod("table", "H2OParsedData2", function(object) { h2o.__unop2("table", object) })

setMethod("colnames", "H2OParsedData2", function(x) {
res = h2o.__remoteSend(x@h2o, h2o.__PAGE_INSPECT2, src_key=x@key)
Expand All @@ -539,15 +560,6 @@ setMethod("nrow", "H2OParsedData2", function(x) {
setMethod("ncol", "H2OParsedData2", function(x) {
res = h2o.__remoteSend(x@h2o, h2o.__PAGE_INSPECT2, src_key=x@key); as.numeric(res$numCols) })

setMethod("sign", "H2OParsedData2", function(x) {
expr = paste("sgn(", x@key, ")")
res = h2o.__exec2(x@h2o, expr)
if(res$num_rows == 0 && res$num_cols == 0)
res$scalar
else
new("H2OParsedData2", h2o=x@h2o, key=res$dest_key)
})

setMethod("min", "H2OParsedData2", function(x) {
res = h2o.__remoteSend(x@h2o, h2o.__PAGE_INSPECT2, src_key=x@key)
min(sapply(res$cols, function(x) { x$min }))
Expand All @@ -564,12 +576,6 @@ setMethod("range", "H2OParsedData2", function(x) {
c(min(temp[1,]), max(temp[2,]))
})

setMethod("sum", "H2OParsedData2", function(x) {
expr = paste("sum(", x@key, ")", sep="")
res = h2o.__exec2(x@h2o, expr)
res$scalar
})

setMethod("colMeans", "H2OParsedData2", function(x) {
res = h2o.__remoteSend(x@h2o, h2o.__PAGE_INSPECT2, src_key=x@key)
temp = sapply(res$cols, function(x) { x$mean })
Expand All @@ -594,15 +600,6 @@ setMethod("tail", "H2OParsedData2", function(x, n = 6L, ...) {
tail(new("H2OParsedData", h2o=x@h2o, key=x@key), n, ...)
})

setMethod("is.na", "H2OParsedData2", function(x) {
expr = paste("is.na(", x@key, ")")
res = h2o.__exec2(x@h2o, expr)
if(res$num_rows == 0 && res$num_cols == 0)
res$scalar
else
new("H2OLogicalData2", h2o=x@h2o, key=res$dest_key)
})

setMethod("is.factor", "H2OParsedData2", function(x) {
res = h2o.__remoteSend(x@h2o, h2o.__PAGE_SUMMARY2, source=x@key)
temp = sapply(res$summaries, function(x) { is.null(x$domains) })
Expand Down
27 changes: 20 additions & 7 deletions R/h2oRClient-package/R/Internal.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Hack to get around Exec.json always dumping to same Result.hex key
# TODO: Need better way to manage temporary/intermediate values in calculations! Right now, overwriting may occur silently
pkg.env = new.env()
pkg.env$result_count = 0
pkg.env$temp_count = 0
pkg.env$IS_LOGGING = FALSE
TEMP_KEY = "Last.value"
RESULT_MAX = 100
LOGICAL_OPERATORS = c("==", ">", "<", "!=", ">=", "<=")
RESULT_MAX = 200
LOGICAL_OPERATORS = c("==", ">", "<", "!=", ">=", "<=", "&&", "||", "!")

# Initialize functions for R logging
myPath = paste(Sys.getenv("HOME"), "Library/Application Support/h2o", sep="/")
Expand Down Expand Up @@ -290,13 +292,24 @@ h2o.__exec2_dest_key <- function(client, expr, destKey) {
return(res)
}

h2o.__operator2 <- function(op, x, y) {
h2o.__unop2 <- function(op, x) {
expr = paste(op, "(", x@key, ")")
res = h2o.__exec2(x@h2o, expr)
if(res$num_rows == 0 && res$num_cols == 0) # TODO: If logical operator, need to indicate
return(res$scalar)
if(op %in% LOGICAL_OPERATORS)
new("H2OLogicalData2", h2o=x@h2o, key=res$dest_key)
else
new("H2OParsedData2", h2o=x@h2o, key=res$dest_key)
}

h2o.__binop2 <- function(op, x, y) {
# if(!((ncol(x) == 1 || class(x) == "numeric") && (ncol(y) == 1 || class(y) == "numeric")))
# stop("Can only operate on single column vectors")
LHS = ifelse(class(x) == "H2OParsedData2", x@key, x)
RHS = ifelse(class(y) == "H2OParsedData2", y@key, y)
LHS = ifelse(class(x) == "H2OParsedData2" || class(x) == "H2OLogicalData2", x@key, x)
RHS = ifelse(class(y) == "H2OParsedData2" || class(y) == "H2OLogicalData2", y@key, y)
expr = paste(LHS, op, RHS)
if(class(x) == "H2OParsedData2") myClient = x@h2o
if(class(x) == "H2OParsedData2" || class(x) == "H2OLogicalData2") myClient = x@h2o
else myClient = y@h2o
res = h2o.__exec2(myClient, expr)

Expand All @@ -306,4 +319,4 @@ h2o.__operator2 <- function(op, x, y) {
new("H2OLogicalData2", h2o=myClient, key=res$dest_key)
else
new("H2OParsedData2", h2o=myClient, key=res$dest_key)
}
}
30 changes: 30 additions & 0 deletions lib/resources/h2o/css/graphs.css
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
.bar.negative {
fill: brown;
}
.bar:hover {
fill: orange;
}

.axis text {
font: 10px sans-serif;
Expand All @@ -16,3 +19,30 @@
stroke: #000;
shape-rendering: crispEdges;
}

#d3tip {
position: absolute;
width: 120px;
height: auto;
padding: 2px;
background: lightsteelblue;
border: 0px;
-webkit-border-radius: 10px;
-moz-border-radius: 10px;
border-radius: 10px;
-webkit-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
-moz-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
pointer-events: none;
}

#d3tip.hidden {
display: none;
}

#d3tip p {
text-align: center;
margin: 0;
font-family: sans-serif;
font-size: 12px;
}
Loading

0 comments on commit 28224ff

Please sign in to comment.