Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/h2oai/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed May 19, 2015
2 parents 7848dd1 + fd9a67f commit 2e506ce
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 0 deletions.
11 changes: 11 additions & 0 deletions R/h2o-package/R/Internal.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ h2o.setLogPath <- function(path, type) {
.h2o.__SET_DOMAIN = "2/SetDomains.json"
.h2o.__PAGE_ALLMODELS = "2/Models.json"
.h2o.__GAINS <- "2/GainsLiftTable.json"
.h2o.__PAGE_GARBAGECOLLECT = "GarbageCollect.json"

.h2o.__PAGE_IMPUTE= "2/Impute.json"
.h2o.__PAGE_EXEC2 = "2/Exec2.json"
Expand Down Expand Up @@ -1054,3 +1055,13 @@ h2o.getFrame <- function(h2o, key) {
"gamma" = Gamma(link))
}
}

#
# This function is internal intentionally.
#
# Call it as:
# h2o:::.h2o.garbageCollect(localH2O)
#
.h2o.garbageCollect <- function(client) {
res = .h2o.__remoteSend(client, .h2o.__PAGE_GARBAGECOLLECT)
}
58 changes: 58 additions & 0 deletions R/tests/testdir_hdfs_xlarge/runit_GLM_airlines_billion_xlarge.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#----------------------------------------------------------------------
# Purpose: This test exercises HDFS operations from R.
#----------------------------------------------------------------------

setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../findNSourceUtils.R')

ipPort <- get_args(commandArgs(trailingOnly = TRUE))
myIP <- ipPort[[1]]
myPort <- ipPort[[2]]
hdfs_name_node <- Sys.getenv(c("NAME_NODE"))
print(hdfs_name_node)

library(RCurl)
library(testthat)
library(h2o)

heading("BEGIN TEST")
conn <- h2o.init(ip=myIP, port=myPort, startH2O = FALSE)

hdfs_data_file = "/datasets/airlinesbillion.csv"

#----------------------------------------------------------------------
# Single file cases.
#----------------------------------------------------------------------

heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])

data1.hex <- data.hex

n <- nrow(data.hex)
print(n)
if (n != 1166952590) {
stop("nrows is wrong")
}

#Constructing validation and train sets by sampling (20/80)
#creating a column as tall as airlines(nrow(air))
s <- h2o.runif(data.hex) # Useful when number of rows too large for R to handle
data.train <- data.hex[s <= 0.8,]
data.valid <- data.hex[s > 0.8,]

## Response = Distance

myY = "C19"
#myX = setdiff(names(data.hex), c(myY, ""))
myX = c("C20", "C21", "C22", "C23", "C24", "C25", "C26", "C27", "C28", "C29")
## Build GLM Model and compare AUC with h2o1

#glm_irlsm_time <- system.time(data_irlsm.glm <- h2o.glm(x = myX, y = myY, data = data.train, validation=data.valid, family = "gaussian", solver = "IRLSM"))
glm_time <- system.time(data.glm <- h2o.glm(x = myX, y = myY, data = data.train, family = "gaussian"))
data.glm
paste("Time it took to build GLM ", glm_time[[1]])

PASS_BANNER()
34 changes: 34 additions & 0 deletions src/main/java/water/api/GarbageCollect.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package water.api;

import water.DTask;
import water.H2O;
import water.H2ONode;
import water.RPC;
import water.util.Log;

public class GarbageCollect extends Request {
private static class GCTask extends DTask<GCTask> {
public GCTask() {
}

@Override public void compute2() {
Log.info("Calling System.gc() now...");
System.gc();
Log.info("System.gc() finished");
tryComplete();
}

@Override public byte priority() {
return H2O.MIN_HI_PRIORITY;
}
}

@Override public RequestBuilders.Response serve(){
for (H2ONode node : H2O.CLOUD._memary) {
GCTask t = new GCTask();
new RPC<GCTask>(node, t).call().get();
}

return RequestBuilders.Response.doneEmpty();
}
}
2 changes: 2 additions & 0 deletions src/main/java/water/api/RequestServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ public enum API_VERSION {
registerRequest(new UnlockKeys());
registerRequest(new Order());
registerRequest(new RemoveVec());
registerRequest(new GarbageCollect());
} else {
Request.addToNavbar(registerRequest(new MatrixMultiply()), "Matrix Multiply", "Beta");
Request.addToNavbar(registerRequest(new hex.LR2()), "Linear Regression2", "Beta");
Expand All @@ -189,6 +190,7 @@ public enum API_VERSION {
Request.addToNavbar(registerRequest(new UnlockKeys()), "Unlock Keys (use with caution)","Beta");
Request.addToNavbar(registerRequest(new Order()), "Order", "Beta");
Request.addToNavbar(registerRequest(new RemoveVec()), "RemoveVec", "Beta");
Request.addToNavbar(registerRequest(new GarbageCollect()), "GarbageCollect", "Beta");
}

registerRequest(new Up());
Expand Down

0 comments on commit 2e506ce

Please sign in to comment.