Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/h2oai/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed May 15, 2015
2 parents f2926d8 + c53ced7 commit ed59791
Show file tree
Hide file tree
Showing 17 changed files with 210 additions and 95 deletions.
1 change: 1 addition & 0 deletions R/h2o-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export('h2o.shutdown', 'h2o.init', 'h2o.coxph', 'h2o.coxph.control', 'h2o.glm',
'h2o.importFolder', 'h2o.uploadFile', 'h2o.parseRaw', 'h2o.ls',
'h2o.rm', 'h2o.cut', '.', 'h2o..', 'h2o.ddply', 'cbind.H2OParsedData')
S3method("as.Date", "H2OParsedData")
S3method("as.numeric", "H2OParsedData")
S3method("as.data.frame", "H2OParsedData")
S3method("as.matrix", "H2OParsedData")
S3method("as.table", "H2OParsedData")
Expand Down
78 changes: 45 additions & 33 deletions R/h2o-package/R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -1815,41 +1815,53 @@ function(test, yes, no, type) {
}
}

ifelse<-
function (test, yes, no)
{
if (.check.ifelse.conditions(test, yes, no, "test")) {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", test, yes, no))

} else if ( class(yes) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))

} else if (class(no) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))
}
if (is.atomic(test))
storage.mode(test) <- "logical"
else test <- if (isS4(test))
as(test, "logical")
else as.logical(test)
ans <- test
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test &
ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test &
ok]
ans[nas] <- NA
ans
setMethod("ifelse", signature(test="H2OParsedData", yes="ANY", no="ANY"), function(test, yes, no) {
.h2o.ifelse(test,yes,no)
})

setMethod("ifelse", signature(test="ANY",yes="H2OParsedData", no="H2OParsedData"), function(test,yes,no) {
.h2o.ifelse(test,yes,no)
})

.h2o.ifelse <- function(test,yes,no) {
if (.check.ifelse.conditions(test, yes, no, "test")) {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", test, yes, no))

} else if ( class(yes) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))

} else if (class(no) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))
}

if( is(test, "H2OParsedData") ) {
if( is.character(yes) ) yes <- deparse(yes)
if( is.character(no) ) no <- deparse(no)
return(.h2o.__multop2("ifelse",test,yes,no))
}

if (is.atomic(test)) storage.mode(test) <- "logical"
else if( isS4(test) ) test <- as(test, "logical")
else test <- as.logical("test")
ans <- test
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test &
ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test &
ok]
ans[nas] <- NA
ans
}


#.getDomainMapping2 <- function(l, s = "") {
# if (is.list(l)) {
# return( .getDomainMapping2( l[[length(l)]], s))
Expand Down
54 changes: 27 additions & 27 deletions R/h2o-package/R/Internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ h2o.setMaxLastValue <- function(val = 1000000000) {
.myPath = paste(Sys.getenv("HOME"), "Library", "Application Support", "h2o", sep=.Platform$file.sep)
if(.Platform$OS.type == "windows")
.myPath = paste(Sys.getenv("APPDATA"), "h2o", sep=.Platform$file.sep)

.pkg.env$h2o.__LOG_COMMAND = paste(.myPath, "commands.log", sep=.Platform$file.sep)
.pkg.env$h2o.__LOG_ERROR = paste(.myPath, "errors.log", sep=.Platform$file.sep)

Expand All @@ -39,7 +39,7 @@ h2o.startLogging <- function() {
warning(errDir, " directory does not exist. Creating it now...")
dir.create(errDir, recursive = TRUE)
}

cat("Appending to log file", .pkg.env$h2o.__LOG_COMMAND, "\n")
cat("Appending to log file", .pkg.env$h2o.__LOG_ERROR, "\n")
assign("IS_LOGGING", TRUE, envir = .pkg.env)
Expand All @@ -58,9 +58,9 @@ h2o.openLog <- function(type) {
stop("type must be either 'Command' or 'Error'")
myFile = switch(type, Command = .pkg.env$h2o.__LOG_COMMAND, Error = .pkg.env$h2o.__LOG_ERROR)
if(!file.exists(myFile)) stop(myFile, " does not exist")

myOS = Sys.info()["sysname"]
if(myOS == "Windows") shell.exec(paste("open '", myFile, "'", sep=""))
if(myOS == "Windows") shell.exec(paste("open '", myFile, "'", sep=""))
else system(paste("open '", myFile, "'", sep=""))
}

Expand All @@ -69,7 +69,7 @@ h2o.setLogPath <- function(path, type) {
if(!file.exists(path)) stop(path, " directory does not exist")
if(missing(type) || !type %in% c("Command", "Error"))
stop("type must be either 'Command' or 'Error'")

myVar = switch(type, Command = "h2o.__LOG_COMMAND", Error = "h2o.__LOG_ERROR")
myFile = switch(type, Command = "commands.log", Error = "errors.log")
cmd <- paste(path, myFile, sep = .Platform$file.sep)
Expand All @@ -95,7 +95,7 @@ h2o.setLogPath <- function(path, type) {
if(commandOrErr == "Command")
h <- paste(h, ifelse(isPost, "POST", "GET"), sep = "\n")
s <- paste(h, "\n", s)

myFile <- ifelse(commandOrErr == "Command", .pkg.env$h2o.__LOG_COMMAND, .pkg.env$h2o.__LOG_ERROR)
myDir <- normalizePath(dirname(myFile))
if(!file.exists(myDir)) stop(myDir, " directory does not exist")
Expand Down Expand Up @@ -200,7 +200,7 @@ h2o.setLogPath <- function(path, type) {

cmd = sprintf("%s)", cmd)
#cat(sprintf("TOM: cmd is %s\n", cmd))

rv = eval(parse(text=cmd))
return(rv)
}
Expand All @@ -219,31 +219,31 @@ h2o.setLogPath <- function(path, type) {
if(.pkg.env$IS_LOGGING) {
# Log list of parameters sent to H2O
.h2o.__logIt(myURL, list(...), "Command")

hg = basicHeaderGatherer()
tg = basicTextGatherer()
postForm(myURL, style = "POST", .opts = curlOptions(headerfunction = hg$update, writefunc = tg[[1]], useragent=R.version.string), ...)
postForm(myURL, style = "POST", .opts = curlOptions(httpheader = c('Expect' = ''), headerfunction = hg$update, writefunc = tg[[1]], useragent=R.version.string), ...)
temp = tg$value()

# Log HTTP response from H2O
hh <- hg$value()
s <- paste(hh["Date"], "\nHTTP status code: ", hh["status"], "\n ", temp, sep = "")
s <- paste(s, "\n\n------------------------------------------------------------------\n")

cmdDir <- normalizePath(dirname(.pkg.env$h2o.__LOG_COMMAND))
if(!file.exists(cmdDir)) stop(cmdDir, " directory does not exist")
write(s, file = .pkg.env$h2o.__LOG_COMMAND, append = TRUE)
} else
temp = postForm(myURL, style = "POST", .opts = curlOptions(useragent=R.version.string), ...)
temp = postForm(myURL, style = "POST", .opts = curlOptions(httpheader = c('Expect' = ''), useragent=R.version.string), ...)

# The GET code that we used temporarily while NanoHTTPD POST was known to be busted.
#
#if(length(list(...)) == 0)
# temp = getURLContent(myURL)
#else
# temp = getForm(myURL, ..., .checkParams = FALSE) # Some H2O params overlap with Curl params
# after = gsub("\\\\\\\"NaN\\\\\\\"", "NaN", temp[1])

# after = gsub("\\\\\\\"NaN\\\\\\\"", "NaN", temp[1])
# after = gsub("NaN", '"NaN"', after)
after = gsub('"Infinity"', '"Inf"', temp[1])
after = gsub('"-Infinity"', '"-Inf"', after)
Expand Down Expand Up @@ -303,7 +303,7 @@ h2o.setLogPath <- function(path, type) {
if(class(client) != "H2OClient") stop("client must be a H2OClient object")
if(missing(keyName)) stop("keyName is missing!")
if(!is.character(keyName) || nchar(keyName) == 0) stop("keyName must be a non-empty string")

res = .h2o.__remoteSend(client, .h2o.__PAGE_JOBS)
res = res$jobs
if(length(res) == 0) stop("No jobs found in queue")
Expand Down Expand Up @@ -344,18 +344,18 @@ h2o.setLogPath <- function(path, type) {
finally = .h2o.__cancelJob(client, job_key))
setTxtProgressBar(pb, 1.0); close(pb)
} else
tryCatch(while(.h2o.__poll(client, job_key) != -1) { Sys.sleep(pollInterval) },
tryCatch(while(.h2o.__poll(client, job_key) != -1) { Sys.sleep(pollInterval) },
finally = .h2o.__cancelJob(client, job_key))
}

# For checking progress from each algorithm's progress page (no longer used)
# .h2o.__isDone <- function(client, algo, resH) {
# if(!algo %in% c("GBM", "KM", "RF1", "RF2", "DeepLearning", "GLM1", "GLM2", "GLM1Grid", "PCA")) stop(algo, " is not a supported algorithm")
# version = ifelse(algo %in% c("RF1", "GLM1", "GLM1Grid"), 1, 2)
# page = switch(algo, GBM = .h2o.__PAGE_GBMProgress, KM = .h2o.__PAGE_KM2Progress, RF1 = .h2o.__PAGE_RFVIEW,
# RF2 = .h2o.__PAGE_DRFProgress, DeepLearning = .h2o.__PAGE_DeepLearningProgress, GLM1 = .h2o.__PAGE_GLMProgress,
# page = switch(algo, GBM = .h2o.__PAGE_GBMProgress, KM = .h2o.__PAGE_KM2Progress, RF1 = .h2o.__PAGE_RFVIEW,
# RF2 = .h2o.__PAGE_DRFProgress, DeepLearning = .h2o.__PAGE_DeepLearningProgress, GLM1 = .h2o.__PAGE_GLMProgress,
# GLM1Grid = .h2o.__PAGE_GLMGridProgress, GLM2 = .h2o.__PAGE_GLM2Progress, PCA = .h2o.__PAGE_PCAProgress)
#
#
# if(version == 1) {
# job_key = resH$response$redirect_request_args$job
# dest_key = resH$destination_key
Expand All @@ -369,7 +369,7 @@ h2o.setLogPath <- function(path, type) {
# job_key = resH$job_key; dest_key = resH$destination_key
# res = .h2o.__remoteSend(client, page, job_key = job_key, destination_key = dest_key)
# if(res$response_info$status == "error") stop(res$error)
#
#
# if(!is.null(res$response_info$redirect_url)) {
# ind = regexpr("\\?", res$response_info$redirect_url)[1]
# url = ifelse(ind > 1, substr(res$response_info$redirect_url, 1, ind-1), res$response_info$redirect_url)
Expand Down Expand Up @@ -791,20 +791,20 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
# if(!((ncol(x) == 1 || class(x) == "numeric") && (ncol(y) == 1 || class(y) == "numeric")))
# stop("Can only operate on single column vectors")
if(class(x) == "H2OParsedData") LHS <- x@key else LHS <- x

if((class(x) == "H2OParsedData" || class(y) == "H2OParsedData") && !( op %in% c('==', '!='))) {
anyFactorsX <- .h2o.__checkForFactors(x)
anyFactorsY <- .h2o.__checkForFactors(y)
anyFactors <- any(c(anyFactorsX, anyFactorsY))
if(anyFactors) warning("Operation not meaningful for factors.")
}

if(class(y) == "H2OParsedData") RHS <- y@key else RHS <- y
expr <- paste(LHS, op, RHS)
if(class(x) == "H2OParsedData") myClient = x@h2o
else myClient <- y@h2o
res <- .h2o.__exec2(myClient, expr)

if(res$num_rows == 0 && res$num_cols == 0) {
if(op %in% .LOGICAL_OPERATORS) res$scalar <- as.logical(res$scalar)
return(res$scalar)
Expand All @@ -821,7 +821,7 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
idx = which(sapply(myInput, function(x) { class(x) == "H2OParsedData" }))[1]
if(is.na(idx)) stop("H2OClient not specified in any input parameter!")
myClient = myInput[[idx]]@h2o

myArgs = lapply(myInput, function(x) { if(class(x) == "H2OParsedData") x@key else x })
expr = paste(op, "(", paste(myArgs, collapse = ","), ")", sep="")
res = .h2o.__exec2(myClient, expr)
Expand All @@ -843,7 +843,7 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
result[i] = paste(nams[i], ": ", vec[i], sep="")
paste(result, collapse="\n")
}

cat("Writing JSON response to", fileName, "\n")
temp = strsplit(as.character(Sys.time()), " ")[[1]]
# myDate = gsub("-", "", temp[1]); myTime = gsub(":", "", temp[2])
Expand Down Expand Up @@ -1039,7 +1039,7 @@ h2o.getFrame <- function(h2o, key) {
.h2o.__getFamily <- function(family, link, tweedie.var.p = 0, tweedie.link.p = 1-tweedie.var.p) {
if(family == "tweedie")
return(tweedie(var.power = tweedie.var.p, link.power = tweedie.link.p))

if(missing(link)) {
switch(family,
"gaussian" = gaussian(),
Expand Down
6 changes: 3 additions & 3 deletions R/h2o-package/R/ParseImport.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ h2o.uploadFile <- function(object, path, key = "", parse = TRUE, header, header_
url = paste(url, "?key=", URLencode(path), sep="")
if(file.exists(h2o.getLogPath("Command"))) .h2o.__logIt(url, NULL, "Command")
if(silent)
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(useragent=R.version.string))
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(httpheader = c('Expect' = ''), useragent=R.version.string))
else
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(verbose = TRUE, useragent=R.version.string))
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(httpheader = c('Expect' = ''), verbose = TRUE, useragent=R.version.string))
rawData = new("H2ORawData", h2o=object, key=path)
if(parse) parsedData = h2o.parseRaw(data=rawData, key=key, header=header, header_with_hash=header_with_hash, sep=sep, col.names=col.names, parser_type = parser_type) else rawData
}
Expand Down Expand Up @@ -549,4 +549,4 @@ h2o.order <- function(data, cols, n = 5, decreasing = T) {
inds <- verified_cols$cols_ind - 1
res <- .h2o.__remoteSend(data@h2o,.h2o.__PAGE_Order, source=data@key, cols=inds, n = n, rev = rev)
h2o.getFrame(data@h2o, res$destination_key)
}
}
8 changes: 5 additions & 3 deletions R/h2o-package/man/ifelse.Rd
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
\name{ifelse}
\alias{ifelse}
%- Also NEED an '\alias' for EACH other topic documented here.
\alias{ifelse,ANY,H2OParsedData,H2OParsedData-method}
\alias{ifelse,H2OParsedData,ANY,ANY-method}
\title{
Applies conditional statements to an \code{\linkS4class{H2OParsedData} object.
}}
\description{
Applies conditional statements to numeric vectors in H2O parsed data objects when the data are numeric.
}
\usage{
ifelse(test, yes, no)
\S4method{ifelse}{H2OParsedData,ANY,ANY}(test, yes, no)

\S4method{ifelse}{ANY,H2OParsedData,H2OParsedData}(test, yes, no)
}

\arguments{
Expand Down
15 changes: 12 additions & 3 deletions R/tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ def read_test_list_file(self, test_list_file):
print("")
sys.exit(1)

def build_test_list(self, test_group, run_small, run_medium, run_large):
def build_test_list(self, test_group, run_small, run_medium, run_large, run_xlarge):
"""
Recursively find the list of tests to run and store them in the object.
Fills in self.tests and self.tests_not_started.
Expand All @@ -746,11 +746,14 @@ def build_test_list(self, test_group, run_small, run_medium, run_large):
is_small = False
is_medium = False
is_large = False
is_xlarge= False

if (re.match(".*large.*", f)):
is_large = True
elif (re.match(".*medium.*", f)):
is_large = True
elif "xlarge" in f:
is_xlarge = True
else:
is_small = True

Expand All @@ -760,6 +763,8 @@ def build_test_list(self, test_group, run_small, run_medium, run_large):
continue
if (is_large and not run_large):
continue
if is_xlarge and not run_xlarge:
continue

if (test_group is not None):
test_short_dir = self._calc_test_short_dir(os.path.join(root, f))
Expand Down Expand Up @@ -838,7 +843,7 @@ def run_tests(self):
cloud = self.clouds[0]
port = cloud.get_port()
ip = "127.0.0.1:"
if (g_use_cloud2):
if g_use_cloud or g_use_cloud2:
ip = cloud.get_ip()+":"
cmd = ["R",
"--quiet",
Expand Down Expand Up @@ -1139,6 +1144,7 @@ def __str__(self):
g_run_small = True
g_run_medium = True
g_run_large = True
g_run_xlarge = True
g_use_cloud = False
g_use_cloud2 = False
g_config = None
Expand Down Expand Up @@ -1301,6 +1307,7 @@ def parse_args(argv):
global g_run_small
global g_run_medium
global g_run_large
global g_run_xlarge
global g_use_cloud
global g_use_cloud2
global g_config
Expand Down Expand Up @@ -1360,6 +1367,8 @@ def parse_args(argv):
g_run_medium = False
if (not 'l' in v):
g_run_large = False
if not "xl" in v:
g_run_xlarge = False
else:
bad_arg(s)
elif (s == "--usecloud"):
Expand Down Expand Up @@ -1496,7 +1505,7 @@ def main(argv):
g_runner.read_test_list_file(g_test_list_file)
else:
# Test group can be None or not.
g_runner.build_test_list(g_test_group, g_run_small, g_run_medium, g_run_large)
g_runner.build_test_list(g_test_group, g_run_small, g_run_medium, g_run_large, g_run_xlarge)

# If no run is specified, then do an early exit here.
if (g_no_run):
Expand Down
Loading

0 comments on commit ed59791

Please sign in to comment.