Skip to content

Commit

Permalink
Merge branch 'master' of github.com:h2oai/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasnykodym committed May 13, 2015
2 parents 132962f + 7e92d10 commit f608b75
Show file tree
Hide file tree
Showing 15 changed files with 376 additions and 81 deletions.
78 changes: 45 additions & 33 deletions R/h2o-package/R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -1815,41 +1815,53 @@ function(test, yes, no, type) {
}
}

ifelse<-
function (test, yes, no)
{
if (.check.ifelse.conditions(test, yes, no, "test")) {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", test, yes, no))

} else if ( class(yes) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))

} else if (class(no) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))
}
if (is.atomic(test))
storage.mode(test) <- "logical"
else test <- if (isS4(test))
as(test, "logical")
else as.logical(test)
ans <- test
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test &
ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test &
ok]
ans[nas] <- NA
ans
setMethod("ifelse", signature(test="H2OParsedData", yes="ANY", no="ANY"), function(test, yes, no) {
.h2o.ifelse(test,yes,no)
})

setMethod("ifelse", signature(test="ANY",yes="H2OParsedData", no="H2OParsedData"), function(test,yes,no) {
.h2o.ifelse(test,yes,no)
})

.h2o.ifelse <- function(test,yes,no) {
if (.check.ifelse.conditions(test, yes, no, "test")) {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", test, yes, no))

} else if ( class(yes) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))

} else if (class(no) == "H2OParsedData" && class(test) == "logical") {
if (is.logical(yes)) yes <- as.numeric(yes)
if (is.logical(no)) no <- as.numeric(no)
return(.h2o.__multop2("ifelse", as.numeric(test), yes, no))
}

if( is(test, "H2OParsedData") ) {
if( is.character(yes) ) yes <- deparse(yes)
if( is.character(no) ) no <- deparse(no)
return(.h2o.__multop2("ifelse",test,yes,no))
}

if (is.atomic(test)) storage.mode(test) <- "logical"
else if( isS4(test) ) test <- as(test, "logical")
else test <- as.logical("test")
ans <- test
ok <- !(nas <- is.na(test))
if (any(test[ok]))
ans[test & ok] <- rep(yes, length.out = length(ans))[test &
ok]
if (any(!test[ok]))
ans[!test & ok] <- rep(no, length.out = length(ans))[!test &
ok]
ans[nas] <- NA
ans
}


#.getDomainMapping2 <- function(l, s = "") {
# if (is.list(l)) {
# return( .getDomainMapping2( l[[length(l)]], s))
Expand Down
54 changes: 27 additions & 27 deletions R/h2o-package/R/Internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ h2o.setMaxLastValue <- function(val = 1000000000) {
.myPath = paste(Sys.getenv("HOME"), "Library", "Application Support", "h2o", sep=.Platform$file.sep)
if(.Platform$OS.type == "windows")
.myPath = paste(Sys.getenv("APPDATA"), "h2o", sep=.Platform$file.sep)

.pkg.env$h2o.__LOG_COMMAND = paste(.myPath, "commands.log", sep=.Platform$file.sep)
.pkg.env$h2o.__LOG_ERROR = paste(.myPath, "errors.log", sep=.Platform$file.sep)

Expand All @@ -39,7 +39,7 @@ h2o.startLogging <- function() {
warning(errDir, " directory does not exist. Creating it now...")
dir.create(errDir, recursive = TRUE)
}

cat("Appending to log file", .pkg.env$h2o.__LOG_COMMAND, "\n")
cat("Appending to log file", .pkg.env$h2o.__LOG_ERROR, "\n")
assign("IS_LOGGING", TRUE, envir = .pkg.env)
Expand All @@ -58,9 +58,9 @@ h2o.openLog <- function(type) {
stop("type must be either 'Command' or 'Error'")
myFile = switch(type, Command = .pkg.env$h2o.__LOG_COMMAND, Error = .pkg.env$h2o.__LOG_ERROR)
if(!file.exists(myFile)) stop(myFile, " does not exist")

myOS = Sys.info()["sysname"]
if(myOS == "Windows") shell.exec(paste("open '", myFile, "'", sep=""))
if(myOS == "Windows") shell.exec(paste("open '", myFile, "'", sep=""))
else system(paste("open '", myFile, "'", sep=""))
}

Expand All @@ -69,7 +69,7 @@ h2o.setLogPath <- function(path, type) {
if(!file.exists(path)) stop(path, " directory does not exist")
if(missing(type) || !type %in% c("Command", "Error"))
stop("type must be either 'Command' or 'Error'")

myVar = switch(type, Command = "h2o.__LOG_COMMAND", Error = "h2o.__LOG_ERROR")
myFile = switch(type, Command = "commands.log", Error = "errors.log")
cmd <- paste(path, myFile, sep = .Platform$file.sep)
Expand All @@ -95,7 +95,7 @@ h2o.setLogPath <- function(path, type) {
if(commandOrErr == "Command")
h <- paste(h, ifelse(isPost, "POST", "GET"), sep = "\n")
s <- paste(h, "\n", s)

myFile <- ifelse(commandOrErr == "Command", .pkg.env$h2o.__LOG_COMMAND, .pkg.env$h2o.__LOG_ERROR)
myDir <- normalizePath(dirname(myFile))
if(!file.exists(myDir)) stop(myDir, " directory does not exist")
Expand Down Expand Up @@ -200,7 +200,7 @@ h2o.setLogPath <- function(path, type) {

cmd = sprintf("%s)", cmd)
#cat(sprintf("TOM: cmd is %s\n", cmd))

rv = eval(parse(text=cmd))
return(rv)
}
Expand All @@ -219,31 +219,31 @@ h2o.setLogPath <- function(path, type) {
if(.pkg.env$IS_LOGGING) {
# Log list of parameters sent to H2O
.h2o.__logIt(myURL, list(...), "Command")

hg = basicHeaderGatherer()
tg = basicTextGatherer()
postForm(myURL, style = "POST", .opts = curlOptions(headerfunction = hg$update, writefunc = tg[[1]], useragent=R.version.string), ...)
postForm(myURL, style = "POST", .opts = curlOptions(httpheader = c('Expect' = ''), headerfunction = hg$update, writefunc = tg[[1]], useragent=R.version.string), ...)
temp = tg$value()

# Log HTTP response from H2O
hh <- hg$value()
s <- paste(hh["Date"], "\nHTTP status code: ", hh["status"], "\n ", temp, sep = "")
s <- paste(s, "\n\n------------------------------------------------------------------\n")

cmdDir <- normalizePath(dirname(.pkg.env$h2o.__LOG_COMMAND))
if(!file.exists(cmdDir)) stop(cmdDir, " directory does not exist")
write(s, file = .pkg.env$h2o.__LOG_COMMAND, append = TRUE)
} else
temp = postForm(myURL, style = "POST", .opts = curlOptions(useragent=R.version.string), ...)
temp = postForm(myURL, style = "POST", .opts = curlOptions(httpheader = c('Expect' = ''), useragent=R.version.string), ...)

# The GET code that we used temporarily while NanoHTTPD POST was known to be busted.
#
#if(length(list(...)) == 0)
# temp = getURLContent(myURL)
#else
# temp = getForm(myURL, ..., .checkParams = FALSE) # Some H2O params overlap with Curl params
# after = gsub("\\\\\\\"NaN\\\\\\\"", "NaN", temp[1])

# after = gsub("\\\\\\\"NaN\\\\\\\"", "NaN", temp[1])
# after = gsub("NaN", '"NaN"', after)
after = gsub('"Infinity"', '"Inf"', temp[1])
after = gsub('"-Infinity"', '"-Inf"', after)
Expand Down Expand Up @@ -303,7 +303,7 @@ h2o.setLogPath <- function(path, type) {
if(class(client) != "H2OClient") stop("client must be a H2OClient object")
if(missing(keyName)) stop("keyName is missing!")
if(!is.character(keyName) || nchar(keyName) == 0) stop("keyName must be a non-empty string")

res = .h2o.__remoteSend(client, .h2o.__PAGE_JOBS)
res = res$jobs
if(length(res) == 0) stop("No jobs found in queue")
Expand Down Expand Up @@ -344,18 +344,18 @@ h2o.setLogPath <- function(path, type) {
finally = .h2o.__cancelJob(client, job_key))
setTxtProgressBar(pb, 1.0); close(pb)
} else
tryCatch(while(.h2o.__poll(client, job_key) != -1) { Sys.sleep(pollInterval) },
tryCatch(while(.h2o.__poll(client, job_key) != -1) { Sys.sleep(pollInterval) },
finally = .h2o.__cancelJob(client, job_key))
}

# For checking progress from each algorithm's progress page (no longer used)
# .h2o.__isDone <- function(client, algo, resH) {
# if(!algo %in% c("GBM", "KM", "RF1", "RF2", "DeepLearning", "GLM1", "GLM2", "GLM1Grid", "PCA")) stop(algo, " is not a supported algorithm")
# version = ifelse(algo %in% c("RF1", "GLM1", "GLM1Grid"), 1, 2)
# page = switch(algo, GBM = .h2o.__PAGE_GBMProgress, KM = .h2o.__PAGE_KM2Progress, RF1 = .h2o.__PAGE_RFVIEW,
# RF2 = .h2o.__PAGE_DRFProgress, DeepLearning = .h2o.__PAGE_DeepLearningProgress, GLM1 = .h2o.__PAGE_GLMProgress,
# page = switch(algo, GBM = .h2o.__PAGE_GBMProgress, KM = .h2o.__PAGE_KM2Progress, RF1 = .h2o.__PAGE_RFVIEW,
# RF2 = .h2o.__PAGE_DRFProgress, DeepLearning = .h2o.__PAGE_DeepLearningProgress, GLM1 = .h2o.__PAGE_GLMProgress,
# GLM1Grid = .h2o.__PAGE_GLMGridProgress, GLM2 = .h2o.__PAGE_GLM2Progress, PCA = .h2o.__PAGE_PCAProgress)
#
#
# if(version == 1) {
# job_key = resH$response$redirect_request_args$job
# dest_key = resH$destination_key
Expand All @@ -369,7 +369,7 @@ h2o.setLogPath <- function(path, type) {
# job_key = resH$job_key; dest_key = resH$destination_key
# res = .h2o.__remoteSend(client, page, job_key = job_key, destination_key = dest_key)
# if(res$response_info$status == "error") stop(res$error)
#
#
# if(!is.null(res$response_info$redirect_url)) {
# ind = regexpr("\\?", res$response_info$redirect_url)[1]
# url = ifelse(ind > 1, substr(res$response_info$redirect_url, 1, ind-1), res$response_info$redirect_url)
Expand Down Expand Up @@ -791,20 +791,20 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
# if(!((ncol(x) == 1 || class(x) == "numeric") && (ncol(y) == 1 || class(y) == "numeric")))
# stop("Can only operate on single column vectors")
if(class(x) == "H2OParsedData") LHS <- x@key else LHS <- x

if((class(x) == "H2OParsedData" || class(y) == "H2OParsedData") && !( op %in% c('==', '!='))) {
anyFactorsX <- .h2o.__checkForFactors(x)
anyFactorsY <- .h2o.__checkForFactors(y)
anyFactors <- any(c(anyFactorsX, anyFactorsY))
if(anyFactors) warning("Operation not meaningful for factors.")
}

if(class(y) == "H2OParsedData") RHS <- y@key else RHS <- y
expr <- paste(LHS, op, RHS)
if(class(x) == "H2OParsedData") myClient = x@h2o
else myClient <- y@h2o
res <- .h2o.__exec2(myClient, expr)

if(res$num_rows == 0 && res$num_cols == 0) {
if(op %in% .LOGICAL_OPERATORS) res$scalar <- as.logical(res$scalar)
return(res$scalar)
Expand All @@ -821,7 +821,7 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
idx = which(sapply(myInput, function(x) { class(x) == "H2OParsedData" }))[1]
if(is.na(idx)) stop("H2OClient not specified in any input parameter!")
myClient = myInput[[idx]]@h2o

myArgs = lapply(myInput, function(x) { if(class(x) == "H2OParsedData") x@key else x })
expr = paste(op, "(", paste(myArgs, collapse = ","), ")", sep="")
res = .h2o.__exec2(myClient, expr)
Expand All @@ -843,7 +843,7 @@ function(expr, envir = globalenv(), expr_only = FALSE) {
result[i] = paste(nams[i], ": ", vec[i], sep="")
paste(result, collapse="\n")
}

cat("Writing JSON response to", fileName, "\n")
temp = strsplit(as.character(Sys.time()), " ")[[1]]
# myDate = gsub("-", "", temp[1]); myTime = gsub(":", "", temp[2])
Expand Down Expand Up @@ -1039,7 +1039,7 @@ h2o.getFrame <- function(h2o, key) {
.h2o.__getFamily <- function(family, link, tweedie.var.p = 0, tweedie.link.p = 1-tweedie.var.p) {
if(family == "tweedie")
return(tweedie(var.power = tweedie.var.p, link.power = tweedie.link.p))

if(missing(link)) {
switch(family,
"gaussian" = gaussian(),
Expand Down
6 changes: 3 additions & 3 deletions R/h2o-package/R/ParseImport.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ h2o.uploadFile <- function(object, path, key = "", parse = TRUE, header, header_
url = paste(url, "?key=", URLencode(path), sep="")
if(file.exists(h2o.getLogPath("Command"))) .h2o.__logIt(url, NULL, "Command")
if(silent)
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(useragent=R.version.string))
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(httpheader = c('Expect' = ''), useragent=R.version.string))
else
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(verbose = TRUE, useragent=R.version.string))
temp = postForm(url, .params = list(fileData = fileUpload(normalizePath(path))), .opts = curlOptions(httpheader = c('Expect' = ''), verbose = TRUE, useragent=R.version.string))
rawData = new("H2ORawData", h2o=object, key=path)
if(parse) parsedData = h2o.parseRaw(data=rawData, key=key, header=header, header_with_hash=header_with_hash, sep=sep, col.names=col.names, parser_type = parser_type) else rawData
}
Expand Down Expand Up @@ -549,4 +549,4 @@ h2o.order <- function(data, cols, n = 5, decreasing = T) {
inds <- verified_cols$cols_ind - 1
res <- .h2o.__remoteSend(data@h2o,.h2o.__PAGE_Order, source=data@key, cols=inds, n = n, rev = rev)
h2o.getFrame(data@h2o, res$destination_key)
}
}
6 changes: 3 additions & 3 deletions R/tests/Utils/glmR.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ gaussian_obj <- function(deviance, nobs, lambda, alpha, beta) {
}

binomial_obj <- function(deviance, nobs, lambda, alpha, beta) {
deviance/2 + lambda * penalty(alpha, beta)
deviance/2 * (1/nobs) + lambda * penalty(alpha, beta)
}

###
Expand All @@ -43,9 +43,9 @@ checkGLMModel2 <- function(myGLM.h2o,myGLM.r){
numfeat = length(myGLM.h2o@model$coefficients)
beta = myGLM.h2o@model$coefficients[-numfeat]

r_dev = myGLM.r$nulldev*(1-myGLM.r$dev.ratio)
r_nobs = myGLM.r$nobs
r_lambda = myGLM.r$lambda
r_dev = myGLM.r$nulldev*(1-myGLM.r$dev.ratio[length(r_lambda)])
r_nobs = myGLM.r$nobs
r_beta = myGLM.r$beta[-numfeat,length(r_lambda)]
}

Expand Down
2 changes: 1 addition & 1 deletion R/tests/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ def run_tests(self):
cloud = self.clouds[0]
port = cloud.get_port()
ip = "127.0.0.1:"
if (g_use_cloud2):
if g_use_cloud or g_use_cloud2:
ip = cloud.get_ip()+":"
cmd = ["R",
"--quiet",
Expand Down
22 changes: 22 additions & 0 deletions R/tests/testdir_algos/coxph/runit_NOPASS_CoxPH_predict.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../../findNSourceUtils.R')

test.CoxPH.predict <- function(conn) {
Log.info("Import PBC from surivival package into H2O...")
pbc.hex <- as.h2o(conn, pbc, key = "pbc.hex")
Log.info("Create response and predictor features...")
pbc.hex$statusOf2 <- pbc.hex$status == 2
pbc.hex$logBili <- log(pbc.hex$bili)
pbc.hex$logProtime <- log(pbc.hex$protime)
pbc.hex$logAlbumin <- log(pbc.hex$albumin)
Log.info("Build Cox PH model with features including both numeric and factor types...")
pbcmodel <- h2o.coxph(x = c("age", "edema", "logBili", "logProtime", "logAlbumin", "sex"),
y = c("time", "statusOf2"), data = pbc.hex)

Log.info("Predict on the Cox PH model...")
pred <- h2o.predict(object = pbcmodel, newdata = pbc.hex)
#### Missing comparison against cox model in R. Input after AIOO error fixed for h2o.predict on Cox model.
testEnd()
}

doTest("Cox PH Model Test: Categorical Column", test.CoxPH.predict)
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ test.bc.illegal <- function(conn) {
family_type = "binomial"

## Function to run GLM with specific beta_constraints
run_glm <- function(bc) {
run_glm <- function(bc, data = modelStack) {
h2o.glm(x = indVars, y = depVars, data = modelStack, family = family_type, use_all_factor_levels = T, key = "a",
lambda = 0, higher_accuracy = T,
alpha = alpha, beta_constraints = bc)
lambda = 0, higher_accuracy = T, alpha = alpha, beta_constraints = bc)
}

Log.info("Use beta constraints with same feature listed twice: ")
Expand All @@ -43,15 +42,32 @@ test.bc.illegal <- function(conn) {
# Illegal Argument Exception: uknown predictor name 'fakeFeature'
checkException(run_glm(b), "Did not catch fake feature in file.", silent = T)

# Log.info("Used empty frame for beta constraints: ")
# empty <- betaConstraints.hex[betaConstraints.hex$names == "fake"]
# checkException(run_glm(empty), "Did not reject empty frame.", silent = T)
Log.info("Used empty frame for beta constraints: ")
empty <- betaConstraints.hex[betaConstraints.hex$names == "fake"]
m1 <- run_glm(empty)
m2 <- run_glm(NULL)
checkEqualsNumeric(m1@model$deviance, m2@model$deviance)

# Log.info("Typo in one of column names.")
# c <- bc
# names(c) <- gsub("lower_bounds", replacement = "lowerbounds", x = names(bc))
# checkException(run_glm(c), "Did not detect one of the columns had a typo, GLM ran without lower bounds.", silent = T)

Log.info("Change column to enum and try to input first level as beta constraints with use_all_factors = F.")
# Choose column to use as categorical and convert column to enum column.
cat_col = "C217"
a = modelStack[,c(indVars,depVars)]
a[,cat_col] = as.factor(a[,cat_col])

bc_cat <- data.frame( names = c( "C217.0","C217.1", "C217.2", "C217.3", "C217.6"),
lower_bounds = rep(-10000,5), upper_bounds = rep(10000,5),
beta_given = c(0.1, -1, .5, 2.4, 1.5),
rho = rep( 1, 5))
bc_cat <- rbind(bc_cat, bc[!(bc$names == cat_col),])
checkException(run_glm(bc_cat, data = a), "Did not block user from using first factor level in beta constraints.", silent = T)
# Log.info("Bound was not expanded out for enum column, should reject.")
# checkException(run_glm(bc, data = a), "Fail to reject name C217 even though column is enum.", silent = T)

testEnd()
}

Expand Down
Loading

0 comments on commit f608b75

Please sign in to comment.