Skip to content

Commit

Permalink
Added jblas to BLM
Browse files Browse the repository at this point in the history
  • Loading branch information
anqif committed Jul 4, 2013
1 parent b90252b commit b7d88ad
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 79 deletions.
67 changes: 34 additions & 33 deletions .classpath
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/main/java"/>
<classpathentry kind="src" path="src/test/java"/>
<classpathentry kind="src" path="src/main/resources"/>
<classpathentry kind="src" path="src/samples/java"/>
<classpathentry kind="lib" path="lib/apache/commons-configuration-1.6.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-lang-2.4.jar" sourcepath="lib/apache/commons-lang-2.4-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-logging-1.1.1.jar" sourcepath="lib/apache/commons-logging-1.1.1-src.zip"/>
<classpathentry kind="lib" path="lib/apache/httpclient-4.1.1.jar"/>
<classpathentry kind="lib" path="lib/apache/httpcore-4.1.jar" sourcepath="lib/apache/httpcore-4.1-sources.jar"/>
<classpathentry kind="lib" path="lib/junit/junit-4.11.jar" sourcepath="lib/junit/junit-4.11-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/guava-12.0.1.jar" sourcepath="lib/apache/guava-12.0.1-sources.jar"/>
<classpathentry kind="lib" path="lib/gson/gson-2.2.2.jar" sourcepath="lib/gson/gson-2.2.2-sources.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-3.8-20120326.jar" sourcepath="lib/poi/poi-3.8-sources.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-ooxml-3.8-20120326.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-ooxml-schemas-3.8-20120326.jar"/>
<classpathentry kind="lib" path="lib/s3/aws-java-sdk-1.3.27.jar" sourcepath="lib/s3/aws-java-sdk-1.3.27-sources.jar"/>
<classpathentry kind="lib" path="lib/jama/Jama.jar"/>
<classpathentry kind="lib" path="lib/javassist.jar" sourcepath="lib/javassist-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-codec-1.4.jar" sourcepath="lib/apache/commons-codec-1.4-sources.zip"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/jets3t/commons-httpclient-3.1.jar"/>
<classpathentry kind="lib" path="lib/jets3t/jets3t-0.6.1.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/commons-cli-1.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/guava-r09-jarjar.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/jackson-core-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/jackson-mapper-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/log4j-1.2.15.jar" sourcepath="/home/cypof/.m2/repository/log4j/log4j/1.2.15/log4j-1.2.15-sources.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/hadoop-core-0.20.2-cdh3u6.jar" sourcepath="lib/hadoop/cdh3/hadoop-core-0.20.2-cdh3u6-sources.jar"/>
<classpathentry kind="lib" path="lib/log4j/log4j-1.2.15.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/main/java"/>
<classpathentry kind="src" path="src/test/java"/>
<classpathentry kind="src" path="src/main/resources"/>
<classpathentry kind="src" path="src/samples/java"/>
<classpathentry kind="lib" path="lib/apache/commons-configuration-1.6.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-lang-2.4.jar" sourcepath="lib/apache/commons-lang-2.4-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-logging-1.1.1.jar" sourcepath="lib/apache/commons-logging-1.1.1-src.zip"/>
<classpathentry kind="lib" path="lib/apache/httpclient-4.1.1.jar"/>
<classpathentry kind="lib" path="lib/apache/httpcore-4.1.jar" sourcepath="lib/apache/httpcore-4.1-sources.jar"/>
<classpathentry kind="lib" path="lib/junit/junit-4.11.jar" sourcepath="lib/junit/junit-4.11-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/guava-12.0.1.jar" sourcepath="lib/apache/guava-12.0.1-sources.jar"/>
<classpathentry kind="lib" path="lib/gson/gson-2.2.2.jar" sourcepath="lib/gson/gson-2.2.2-sources.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-3.8-20120326.jar" sourcepath="lib/poi/poi-3.8-sources.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-ooxml-3.8-20120326.jar"/>
<classpathentry kind="lib" path="lib/poi/poi-ooxml-schemas-3.8-20120326.jar"/>
<classpathentry kind="lib" path="lib/s3/aws-java-sdk-1.3.27.jar" sourcepath="lib/s3/aws-java-sdk-1.3.27-sources.jar"/>
<classpathentry kind="lib" path="lib/jama/Jama.jar"/>
<classpathentry kind="lib" path="lib/javassist.jar" sourcepath="lib/javassist-sources.jar"/>
<classpathentry kind="lib" path="lib/apache/commons-codec-1.4.jar" sourcepath="lib/apache/commons-codec-1.4-sources.zip"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/jets3t/commons-httpclient-3.1.jar"/>
<classpathentry kind="lib" path="lib/jets3t/jets3t-0.6.1.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/commons-cli-1.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/guava-r09-jarjar.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/jackson-core-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/jackson-mapper-asl-1.5.2.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/log4j-1.2.15.jar" sourcepath="/home/cypof/.m2/repository/log4j/log4j/1.2.15/log4j-1.2.15-sources.jar"/>
<classpathentry kind="lib" path="lib/hadoop/cdh3/hadoop-core-0.20.2-cdh3u6.jar" sourcepath="lib/hadoop/cdh3/hadoop-core-0.20.2-cdh3u6-sources.jar"/>
<classpathentry kind="lib" path="lib/log4j/log4j-1.2.15.jar"/>
<classpathentry kind="lib" path="lib/jblas/jblas-1.2.3.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
124 changes: 99 additions & 25 deletions R/H2O.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,8 @@ h2o.poll <- function(keyName) {
if(res[[i]]$key == keyName)
prog = res[[i]]
}
prog$end_time
}

h2o.poll_rf <- function(keyName) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
if (type != "character")
keyName = deparse(substitute(keyName))
res = h2o.__remoteSend(h2o.__PAGE_JOBS)
res = res$jobs
for(i in 1:length(res)) {
if(res[[i]]$destination_key == keyName)
prog = res[[i]]
}
prog$end_time
# prog$end_time
prog$progress
}

# Inspects the given key on H2O cloud. Key can be either a string or a literal which will be translated to a string.
Expand Down Expand Up @@ -126,6 +114,7 @@ h2o.inspect <- function(keyName) {
result = c(result,from[[i]][what]);
result;
}
# result$rows = do.call(rbind, lapply(res$rows, data.frame))
res = res$cols;
result$cols = data.frame(name = as.character(extract(res,"name")),
offset = as.numeric(extract(res,"offset")),
Expand Down Expand Up @@ -165,9 +154,10 @@ h2o.importUrl <- function(keyName, url, parse = TRUE) {
if (parse) {
h2o.__printIfVerbose(" parsing key ",uploadKey," to key ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_PARSE, source_key = uploadKey, destination_key = paste(keyName,".hex",sep=""))
}
# res
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
# while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
while(h2o.poll(res$response$redirect_request_args$job) != -1) { Sys.sleep(1) }
}
res$response$redirect_request_args$job
}

# Imports a file local to the server the interop is connecting to. Other arguments are the same as for the importUrl
Expand Down Expand Up @@ -216,7 +206,8 @@ h2o.filter <- function(keyName, expr, maxRows = h2o.MAX_GET_ROWS, forceDataFrame

# GLM function. This should be rewiewed by someone who actually understands the GLM:-D
# Please note that the x and negX arguments cannot be specified without quotes as lists are expected.
h2o.glm = function(keyName, y, case="1.0", x = "", negX = "", family = "gaussian", xval = 0, threshold = 0.5, norm = "NONE", lambda = 0.1, rho = 1.0, alpha = 1.0) {
# h2o.glm = function(keyName, y, case="1.0", x = "", negX = "", family = "gaussian", xval = 0, threshold = 0.5, norm = "NONE", lambda = 0.1, rho = 1.0, alpha = 1.0) {
h2o.glm = function(keyName, y, case="1.0", x = "", negX = "", family = "gaussian", xval = 0, threshold = 0.5, lambda = 1.0e-5, alpha = 0.5, weight = 1.0, rho = 1.0) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
if (type != "character")
keyName = deparse(substitute(keyName))
Expand All @@ -233,10 +224,34 @@ h2o.glm = function(keyName, y, case="1.0", x = "", negX = "", family = "gaussian
negX = paste(negX,sep="",collapse=",")
h2o.__printIfVerbose(" running GLM on vector ",keyName," response column ",y)
res = h2o.__remoteSend(h2o.__PAGE_GLM, key = keyName, y = y, case=case, x = x, "-x" = negX, family = family, xval = xval, threshold = threshold, norm = norm, lambda = lambda, rho = rho, alpha = alpha)
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
# while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
while(h2o.poll(res$response$redirect_request_args$job) != -1) { Sys.sleep(1) }
h2o.__printModel(res$destination_key, type = "GLM")
}

h2o.score = function(modelKey, scoreKey, type = "GLM") {
type = tryCatch({ typeof(modelKey) }, error = function(e) { "expr" })
if (type != "character")
modelKey = deparse(substitute(modelKey))
type = tryCatch({ typeof(scoreKey) }, error = function(e) { "expr" })
scoreKey = deparse(substitute(scoreKey))
result = list()
if(type == "GLM") {
res = h2o.__remoteSend(h2o.__PAGE_GLMSCORE, modelKey, scoreKey)
res = res$validation
result$resDev = res$resDev
result$nullDev = res$nullDev
result$aic = res$aic
result$training_error = res$err
}
else if(type == "KM") {
res = h2o.__remoteSend(h2o.__PAGE_KMSCORE, modelKey, scoreKey)
res = res$score
result$sq_error = res$sqr_error_per_cluster
}
result
}

# K-means function.
h2o.kmeans = function(keyName, k = 5, epsilon = 1.0E-6, normalize = 0) {
type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
Expand All @@ -247,7 +262,8 @@ h2o.kmeans = function(keyName, k = 5, epsilon = 1.0E-6, normalize = 0) {

h2o.__printIfVerbose(" running ", k, "-means on vector ",keyName)
res = h2o.__remoteSend(h2o.__PAGE_KMEANS, source_key = keyName, k = k, epsilon = epsilon, normalize = normalize)
while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
# while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
while(h2o.poll(res$response$redirect_request_args$job) != -1) { Sys.sleep(1) }
h2o.__printModel(res$destination_key, type = "KM")
}

Expand All @@ -263,11 +279,27 @@ h2o.rf = function(keyName, ntree="", class = "", negX = "", family = "gaussian",

h2o.__printIfVerbose(" running RF on vector ",keyName," class column ",class, " number of trees ", ntree)
res = h2o.__remoteSend(h2o.__PAGE_RF, data_key = keyName, ntree = ntree, class = class)
# res
while(h2o.poll_rf(res$response$redirect_request_args$model_key) == "") { Sys.sleep(1) }
# while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
while(h2o.poll(res$response$redirect_request_args$job) != -1) { Sys.sleep(1) }
h2o.inspect_rf(res$model_key, res$data_key)
}

# h2o.poll_rf <- function(keyName) {
# type = tryCatch({ typeof(keyName) }, error = function(e) { "expr" })
# if (type != "character")
# keyName = deparse(substitute(keyName))
# res = h2o.__remoteSend(h2o.__PAGE_JOBS)
# res = res$jobs
# for(i in 1:length(res)) {
# if(res[[i]]$key == keyName)
# prog = res[[i]]
# else if(grepl(keyName, res[[i]]$destination_key) &&
# regexpr("ConfusionMatrix of ",res[[i]]$destination_key)[1] == 1)
# conf = res[[i]]
# }
# prog$end_time
# }

h2o.inspect_rf = function(modelkey, datakey, oob_err = 1) {
type = tryCatch({ typeof(modelkey) }, error = function(e) { "expr" })
if (type != "character")
Expand All @@ -279,15 +311,42 @@ h2o.inspect_rf = function(modelkey, datakey, oob_err = 1) {
h2o.__printIfVerbose(" Inspecting model key ",modelkey, " for data key ", datakey)
res = h2o.__remoteSend(h2o.__PAGE_RFVIEW, model_key = modelkey, data_key = datakey, out_of_bag_error_estimate = oob_err)
result = list()
result$data_key = res$data_key
result$model_key = res$model_key
result$data_key = res$data_key
result$num_trees = res$ntree
result$confusion_matrix = res$confusion_matrix
# result$confusion_matrix = res$confusion_matrix
result$scores = res$scores
result$classes_errors = res$classes_errors
result$trees = res$trees
result
}

# h2o.score_rf = function(modelKey, dataKey, ntree = 50, no_confusion_matrix = 0, clear_confusion_matrix = 0) {
# type = tryCatch({ typeof(modelKey) }, error = function(e) { "expr" })
# if (type != "character")
# modelKey = deparse(substitute(modelKey))
# type = tryCatch({ typeof(dataKey) }, error = function(e) { "expr" })
# if (type != "character")
# dataKey = deparse(substitute(datakey))
# res = h2o.__remoteSend(h2o.__PAGE_RFSCORE, dataKey, modelKey, ntree, no_confusion_matrix, clear_confusion_matrix)
# result = list()
# result$classes_errors = res$classes_errors
# result$scores = res$scores
# result$trees = res$trees
# }

h2o.km_apply = function(modelKey, dataKey, destKey) {
type = tryCatch({ typeof(modelKey) }, error = function(e) { "expr" })
if(type != "character")
modelKey = deparse(substitute(modelKey))
type = tryCatch({ typeof(dataKey) }, error = function(e) { "expr" })
if (type != "character")
dataKey = deparse(substitute(datakey))
res = h2o.__remoteSend(h2o.__PAGE_KMAPPLY, model_key = modelKey, data_key = dataKey, destination_key = destKey)
# while(h2o.poll(res$response$redirect_request_args$job) == "") { Sys.sleep(1) }
while(h2o.poll(res$response$redirect_request_args$job) != -1) { Sys.sleep(1) }
h2o.__printModel(res$destination_key, type = "KMApply")
}

# Internal functions & declarations -----------------------------------------------------------------------------------

Expand All @@ -299,9 +358,13 @@ h2o.__PAGE_REMOVE = "Remove.json"
h2o.__PAGE_IMPORT = "ImportUrl.json"
h2o.__PAGE_PARSE = "Parse.json"
h2o.__PAGE_GLM = "GLM.json"
h2o.__PAGE_GLMSCORE = "GLMScore.json"
h2o.__PAGE_KMEANS = "KMeans.json"
h2o.__PAGE_KMSCORE = "KMeansScore.json"
h2o.__PAGE_KMAPPLY = "KMeansApply.json"
h2o.__PAGE_RF = "RF.json"
h2o.__PAGE_RFVIEW = "RFView.json"
h2o.__PAGE_RFSCORE = "RFScore.json"
h2o.__PAGE_JOBS = "Jobs.json"

h2o.__printIfVerbose <- function(...) {
Expand Down Expand Up @@ -367,7 +430,18 @@ h2o.__printModel <- function(keyName, type) {
result$params = res$GLMParams
result
}
else if(type == "KM") { res$KMeansModel$clusters }
else if(type == "KM") {
result = list()
result$key = keyName
result$clusters = res$KMeansModel$clusters
result
}
else if(type == "KMApply") {
# res = h2o.__remoteSend(h2o.__PAGE_INSPECT, key = keyName, view = 200)
result = res$rows
result = do.call(rbind, lapply(result, data.frame))[,2]
result
}
}

# h2o.rf <- function(key,ntree, depth=30,model=FALSE,gini=1,seed=42,wait=TRUE) {
Expand Down
15 changes: 10 additions & 5 deletions R/H2OTestDemo.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@ h2o.SERVER="localhost:54321"
h2o.importUrl("covtype", "http://www.stanford.edu/~anqif/covtype.20k.data")
cov.view <- h2o.inspect(covtype.hex)
print(cov.view$cols)
#h2o(slice(covtype.hex,100,100))
#h2o(sum(covtype.hex[12]))

# h2o.glm(covtype.hex, y = 12, case="1",family=binomial)
# h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8",case=1, family=binomial)
cov.glm1 <- h2o.glm(covtype.hex, y = 12, case = "1", family = binomial)
print(cov.glm1$coef)
print(cov.glm1$dof)
cov.glm2 <- h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8", case = 1, family = binomial)
cov.glm2 <- h2o.glm(covtype.hex, y = 12, x = "1,2,3,4,5,6,7,8", family = binomial)
print(cov.glm2$coef)
print(cov.glm2$dof)

Expand All @@ -26,7 +24,12 @@ print(cov.glm2$dof)
#h2o(log(covtype[1]))
# add randomforest test
h2o.rf(covtype.hex, class = "54", ntree = "10")
# while(h2o.poll(temp$response$redirect_request_args$key) == "") { Sys.sleep(1) }

h2o.importUrl("iris", "http://www.stanford.edu/~anqif/iris.csv")
iris.view <- h2o.inspect(iris.hex)
print(iris.view$cols)
iris.rf <- h2o.rf(iris.hex, class = "4", ntree = "50")
# h2o.score_rf(iris.rf$model_key, iris.hex)

# Run GLM
# h2o.importFile("prostate", paste(getwd(),"../smalldata/logreg/prostate.csv",sep="/"))
Expand All @@ -39,4 +42,6 @@ print(prostate.glm$coef)
print(prostate.glm$dof)

# Run K-Means
h2o.kmeans(covtype.hex, k = 10)
covtype.km.model <- h2o.kmeans(covtype.hex, k = 10)
covtype.km.cat <- h2o.km_apply(covtype.km.model$key, "covtype.hex", "covtype.km_apply")
print(covtype.km.cat)
3 changes: 2 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ JAR_ROOT=lib

# additional dependencies, relative to this file, but all dependencies should be
# inside the JAR_ROOT tree so that they are packed to the jar file properly
DEPENDENCIES="${JAR_ROOT}/jama/*${SEP}${JAR_ROOT}/apache/*${SEP}${JAR_ROOT}/junit/*${SEP}${JAR_ROOT}/gson/*${SEP}${JAR_ROOT}/javassist.jar${SEP}${JAR_ROOT}/poi/*${SEP}${JAR_ROOT}/s3/*${SEP}${JAR_ROOT}/jets3t/*${SEP}${JAR_ROOT}/log4j/*"
# DEPENDENCIES="${JAR_ROOT}/jama/*${SEP}${JAR_ROOT}/apache/*${SEP}${JAR_ROOT}/junit/*${SEP}${JAR_ROOT}/gson/*${SEP}${JAR_ROOT}/javassist.jar${SEP}${JAR_ROOT}/poi/*${SEP}${JAR_ROOT}/s3/*${SEP}${JAR_ROOT}/jets3t/*${SEP}${JAR_ROOT}/log4j/*"
DEPENDENCIES="${JAR_ROOT}/jblas/*${SEP}${JAR_ROOT}/apache/*${SEP}${JAR_ROOT}/junit/*${SEP}${JAR_ROOT}/gson/*${SEP}${JAR_ROOT}/javassist.jar${SEP}${JAR_ROOT}/poi/*${SEP}${JAR_ROOT}/s3/*${SEP}${JAR_ROOT}/jets3t/*${SEP}${JAR_ROOT}/log4j/*"

DEFAULT_HADOOP_VERSION="cdh3"
OUTDIR="target"
Expand Down
Binary file added lib/jblas/jblas-1.2.3.jar
Binary file not shown.
Loading

0 comments on commit b7d88ad

Please sign in to comment.