Skip to content

Commit

Permalink
Small test cases for GLM, K-means, and RF
Browse files Browse the repository at this point in the history
  • Loading branch information
anqif committed Jun 26, 2013
1 parent 55dc9cc commit f93bdcb
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 58 deletions.
78 changes: 41 additions & 37 deletions R/H2O.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ h2o.poll <- function(keyName) {
prog$end_time
}

h2o.poll_rf <- function(keyName) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
if (type != "character")
keyName = deparse(substitute(keyName))
res = h2o.__remoteSend(h2o.__PAGE_JOBS)
res = res$jobs
for(i in 1:length(res)) {
if(res[[i]]$destination_key == keyName)
prog = res[[i]]
}
prog$end_time
}

# Inspects the given key on H2O cloud. Key can be either a string or a literal which will be translated to a string.
# Returns a list with key name (key), value type (type), number of rows in the value (num_rows), number of columns (num_cols),
# size of a single row in bytes (rowSize) and total size in bytes of the value (size). Also list of all columns
Expand Down Expand Up @@ -153,8 +166,8 @@ h2o.importUrl <- function(keyName, url, parse = TRUE) {
h2o.__printIfVerbose(" parsing key ",uploadKey," to key ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_PARSE, source_key = uploadKey, destination_key = paste(keyName,".hex",sep=""))
}
#res$destination_key
res
# res
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
}

# Imports a file local to the server the interop is connecting to. Other arguments are the same as for the importUrl
Expand All @@ -164,7 +177,6 @@ h2o.importFile <- function(keyName, fileName, parse = TRUE) {
if (type != "character")
keyName = deparse(substitute(keyName))
h2o.importUrl(keyName,paste("file://",fileName,sep=""),parse = parse)

}

# shorthands ----------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -221,28 +233,8 @@ h2o.glm = function(keyName, y, case="1.0", x = "", negX = "", family = "gaussian
negX = paste(negX,sep="",collapse=",")
h2o.__printIfVerbose(" running GLM on vector ",keyName," response column ",y)
res = h2o.__remoteSend(h2o.__PAGE_GLM, key = keyName, y = y, case=case, x = x, "-x" = negX, family = family, xval = xval, threshold = threshold, norm = norm, lambda = lambda, rho = rho, alpha = alpha)
res
}

h2o.inspect_glm = function(keyName) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
if (type != "character")
keyName = deparse(substitute(keyName))
h2o.__printIfVerbose(" Inspecting key ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_INSPECT, key = keyName)
res = res$GLMModel
result = list()
result$key = res$model_key
# result$col_names = res$column_names
result$dof = res$dof
# result$coef = res$coefficients
result$coef = data.frame(res$coefficients)
colnames(result$coef) = c(res$column_names, "Intercept")
# result$norm_coef = res$normalized_coefficients
result$norm_coef = data.frame(res$normalized_coefficients)
colnames(result$norm_coef) = c(res$column_names, "Intercept")
result$params = res$GLMParams
result
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
h2o.__printModel(res$destination_key, type = "GLM")
}

# K-means function.
Expand All @@ -255,16 +247,8 @@ h2o.kmeans = function(keyName, k = 5, epsilon = 1.0E-6, normalize = 0) {

h2o.__printIfVerbose(" running ", k, "-means on vector ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_KMEANS, source_key = keyName, k = k, epsilon = epsilon, normalize = normalize)
res
}

h2o.inspect_kmeans = function(keyName) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
if (type != "character")
keyName = deparse(substitute(keyName))
h2o.__printIfVerbose(" Inspecting key ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_INSPECT, key = keyName)
res$KMeansModel$clusters
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
h2o.__printModel(res$destination_key, type = "KM")
}

# RF function.
Expand All @@ -279,7 +263,9 @@ h2o.rf = function(keyName, ntree="", class = "", negX = "", family = "gaussian",

h2o.__printIfVerbose(" running RF on vector ",keyName," class column ",class, " number of trees ", ntree)
res = h2o.__remoteSend(h2o.__PAGE_RF, data_key = keyName, ntree = ntree, class = class)
res
# res
while(h2o.poll_rf(res$response$redirect_request_args$model_key) == "") { Sys.sleep(1) }
h2o.inspect_rf(res$model_key, res$data_key)
}

h2o.inspect_rf = function(modelkey, datakey, oob_err = 1) {
Expand Down Expand Up @@ -325,7 +311,7 @@ h2o.__printIfVerbose <- function(...) {

h2o.__remoteSend <- function(page,...) {
# Sends the given arguments as URL arguments to the given page on the specified server
#h2o.__printIfVerbose(page)
# h2o.__printIfVerbose(page)
url = paste(h2o.SERVER,page,sep="/")
# res = fromJSON(postForm(url, style = "POST", ...))
temp = postForm(url, style = "POST", ...)
Expand Down Expand Up @@ -366,6 +352,24 @@ h2o.__convertToRData <- function(res,forceDataFrame=FALSE) {
}
}

h2o.__printModel <- function(keyName, type) {
h2o.__printIfVerbose(" Inspecting key ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_INSPECT, key = keyName)
if(type == "GLM") {
res = res$GLMModel
result = list()
result$key = res$model_key
result$dof = res$dof
result$coef = data.frame(res$coefficients)
colnames(result$coef) = c(res$column_names, "Intercept")
result$norm_coef = data.frame(res$normalized_coefficients)
colnames(result$norm_coef) = c(res$column_names, "Intercept")
result$params = res$GLMParams
result
}
else if(type == "KM") { res$KMeansModel$clusters }
}

# h2o.rf <- function(key,ntree, depth=30,model=FALSE,gini=1,seed=42,wait=TRUE) {
# if (model==FALSE)
# model = paste(key,"_model",sep="")
Expand Down
28 changes: 7 additions & 21 deletions R/H2OTestDemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,37 @@ h2o.SERVER="localhost:54321"

# Run expressions on covtype
# h2o.importFile("covtype", paste(getwd(), "../smalldata/covtype/covtype.20k.data", sep="/"))
temp = h2o.importUrl("covtype", "http://www.stanford.edu/~anqif/covtype.20k.data")
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
h2o.importUrl("covtype", "http://www.stanford.edu/~anqif/covtype.20k.data")
cov.view <- h2o.inspect(covtype.hex)
print(cov.view$cols)
#h2o(slice(covtype.hex,100,100))
#h2o(sum(covtype.hex[12]))

# h2o.glm(covtype.hex, y = 12, case="1",family=binomial)
# h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8",case=1, family=binomial)
temp <- h2o.glm(covtype.hex, y = 12, case = "1", family = binomial)
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
cov.glm1 <- h2o.inspect_glm(temp$destination_key)
cov.glm1 <- h2o.glm(covtype.hex, y = 12, case = "1", family = binomial)
print(cov.glm1$coef)
print(cov.glm1$dof)
temp <- h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8", case = 1, family = binomial)
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
cov.glm2 <- h2o.inspect_glm(temp$destination_key)
cov.glm2 <- h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8", case = 1, family = binomial)
print(cov.glm2$coef)
print(cov.glm2$dof)

#h2o.filter(covtype.hex, covtype.hex[6] < mean(covtype.hex[6]))
#h2o(covtype[1] + covtype[2] * 4 + max(covtype[6]) * covtype[1] + 7 - covtype[3])
#h2o(log(covtype[1]))
# add randomforest test
# h2o.rf(covtype.hex, class = "54", ntree = "10")
temp <- h2o.rf(covtype.hex, class = "54", ntree = "10")
while(h2o.poll(temp$response$redirect_request_args$model_key) == "") { Sys.sleep(1) }
cov.rf <- h2o.inspect_rf(temp$model_key, temp$data_key)
print(cov.rf)
h2o.rf(covtype.hex, class = "54", ntree = "10")
# while(h2o.poll(temp$response$redirect_request_args$key) == "") { Sys.sleep(1) }

# Run GLM
# h2o.importFile("prostate", paste(getwd(),"../smalldata/logreg/prostate.csv",sep="/"))
h2o.importUrl("prostate", "http://www.stanford.edu/~anqif/prostate.csv")
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
prostate.view <- h2o.inspect(prostate.hex)
print(prostate.view$cols)
# h2o.glm(prostate.hex, y = CAPSULE, x = "ID,AGE,RACE,PSA,DCAPS", family=binomial)
temp <- h2o.glm(prostate.hex, y = CAPSULE, x = "ID,AGE,RACE,PSA,DCAPS", family = binomial)
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
prostate.glm <- h2o.inspect_glm(temp$destination_key)
prostate.glm <- h2o.glm(prostate.hex, y = CAPSULE, x = "ID,AGE,RACE,PSA,DCAPS", family = binomial)
print(prostate.glm$coef)
print(prostate.glm$dof)

# Run K-Means
temp <- h2o.kmeans(covtype.hex, k = 10)
while(h2o.poll(temp$response$redirect_request_args$job) == "") { Sys.sleep(1) }
cov.km <- h2o.inspect_kmeans(temp$destination_key)
print(cov.km)
h2o.kmeans(covtype.hex, k = 10)

0 comments on commit f93bdcb

Please sign in to comment.