Skip to content

Commit

Permalink
Merge branch 'master' into tomas-parse
Browse files Browse the repository at this point in the history
Conflicts:
	src/main/java/hex/glm/GLM2.java
	src/main/java/water/DTask.java
	src/main/java/water/H2O.java
	src/main/java/water/Job.java
	src/main/java/water/RPC.java
	src/main/java/water/api/GLMPredict.java
	src/main/java/water/fvec/ParseDataset2.java
	src/main/java/water/parser/ParseDataset.java
	src/test/java/water/fvec/DatasetCompare.java
  • Loading branch information
tomasnykodym committed Aug 6, 2014
2 parents 3fa93c4 + c389939 commit a9a8b48
Show file tree
Hide file tree
Showing 438 changed files with 7,033 additions and 24,143 deletions.
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ build:
$(MAKE) build_h2o PROJECT_VERSION=$(PROJECT_VERSION)

@echo
@echo "PHASE: Building hadoop driver..."
@echo "PHASE: Building Hadoop driver..."
@echo
$(MAKE) -C hadoop build PROJECT_VERSION=$(PROJECT_VERSION) 1> target/logs/hadoop_build.log

Expand All @@ -119,6 +119,11 @@ build:
@echo
$(MAKE) build_package PROJECT_VERSION=$(PROJECT_VERSION) 1> target/logs/package_build.log

@echo
@echo "PHASE: Building ZooKeeper jar..."
@echo
$(MAKE) -C h2o-zookeeper PROJECT_VERSION=$(PROJECT_VERSION) 1> target/logs/zookeeper_build.log

BUILD_BRANCH=$(shell git branch | grep '*' | sed 's/* //')
BUILD_HASH=$(shell git log -1 --format="%H")
BUILD_DESCRIBE=$(shell git describe --always --dirty)
Expand Down Expand Up @@ -304,6 +309,7 @@ clean:
$(MAKE) -C client clean
$(MAKE) -C h2o-scala clean
$(MAKE) -C hadoop clean
$(MAKE) -C h2o-zookeeper clean
$(MAKE) -C R clean
$(MAKE) -C launcher clean
$(MAKE) -C installer clean
Expand Down
4 changes: 2 additions & 2 deletions R/H2O_Load.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Change this global variable to match your own system's path
ROOT.PATH <- "/Users/spencer/master/h2o/R/h2o-package/R/"
ROOT.PATH <- "/Users/anqi_fu/Documents/workspace/h2o/R/h2o-package/R/"
src <-
function() {
warning("MAY NOT WORK ON YOUR SYSTEM -- **TRY TO CHANGE `ROOT.PATH`!**")
to_src <- c("Wrapper.R", "Internal.R", "Classes.R", "ParseImport.R", "Algorithms.R")
to_src <- c("Wrapper.R", "Internal.R", "Classes.R", "ParseImport.R", "models.R", "Algorithms.R")
require(rjson); require(RCurl)
invisible(lapply(to_src,function(x){source(paste(ROOT.PATH, x, sep = ""))}))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' What this demo covers:
#' Algorithms:
#' h2o.randomForest --The distributed random forest algorithm
#' h2o.SpeeDRF --A faster random forest, but not as accurate
#' h2o.randomForest --A faster random forest, but not as accurate
#' h2o.deeplearning --Distributed, parallel deep learning
#' h2o.gbm --Gradient Boosted Machines (a tree-based algo)
#' h2o.glm --GLM again
Expand Down Expand Up @@ -39,7 +39,7 @@ library(plyr)

# Read in the data
# Path is relative to the location that I started h2o (i.e. which dir did I java -jar in?)
flights <- h2o.importFile(h, "../../../smalldata/airlines/allyears2k_headers.zip", "flights.hex")
flights <- h2o.importFile(h, normalizePath("../../../smalldata/airlines/allyears2k_headers.zip"), "flights.hex")

#################################################################################
#
Expand Down Expand Up @@ -95,7 +95,7 @@ function(origin, dataset) {
dataset <- dataset[dataset$Origin == origin,]
print("Beginning Random Forest with 50 trees, 20 depth, and 10-fold Cross Validation\n")
t0 <- Sys.time()
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = Delayed, data = dataset, ntree = 50, depth = 20, nfolds = 10)
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = Delayed, data = dataset, ntree = 50, depth = 20, nfolds = 10, type = "BigData")
elapsed_seconds <- as.numeric(Sys.time() - t0)
modelkey <- model@key
result <- list(list(model, origin, elapsed_seconds))
Expand All @@ -108,7 +108,7 @@ function(origin, dataset) {
dataset <- dataset[dataset$Origin == origin,]
print("Beginning Speedy Random Forest with 50 trees, 20 depth, and 10-fold Cross Validation\n")
t0 <- Sys.time()
model <- h2o.SpeeDRF(x = c(FlightDate, ScheduledTimes, FlightInfo), y = Delayed, data = dataset, ntree = 50, depth = 20, nfolds = 10)
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = Delayed, data = dataset, ntree = 50, depth = 20, nfolds = 10, type = "fast")
elapsed_seconds <- as.numeric(Sys.time() - t0)
modelkey <- model@key
result <- list(list(model, origin, elapsed_seconds))
Expand Down Expand Up @@ -148,7 +148,7 @@ function(fitMethod, origins, dataset) {
}

#iterate over the fit fcns as well
model.fit.fcns <- c(lr.fit, rf.fit, srf.fit, gbm.fit, dl.fit)
model.fit.fcns <- c(lr.fit, srf.fit) #rf.fit, srf.fit, gbm.fit, dl.fit)
# See the Notes section below to get insight into the following one-liner
models.by.airport.origin <- unlist(recursive = F, lapply(model.fit.fcns, all.fit, frequent.origin.codes, flights))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ library(plyr)


# Read in the data
flights <- h2o.importFile(h, "../../../smalldata/airlines/allyears2k_headers.zip", "flights.hex")
flights <- h2o.importFile(h, normalizePath("../../../smalldata/airlines/allyears2k_headers.zip"), "flights.hex")

#################################################################################
#
Expand Down Expand Up @@ -168,7 +168,7 @@ rf.fit<-
function(response, dataset, testdata) {
print("Beginning Random Forest with 10 trees, 20 depth, and 2-fold Cross Validation\n")
t0 <- Sys.time()
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = response, data = dataset, ntree = 10, depth = 20, nfolds = 2, balance.classes = T)
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = response, data = dataset, ntree = 10, depth = 20, nfolds = 2, balance.classes = T, type = "BigData")
elapsed_seconds <- as.numeric(Sys.time() - t0)
modelkey <- model@key

Expand All @@ -185,7 +185,7 @@ srf.fit<-
function(response, dataset, testdata) {
print("Beginning Speedy Random Forest with 10 trees, 20 depth, and 2-fold Cross Validation\n")
t0 <- Sys.time()
model <- h2o.SpeeDRF(x = c(FlightDate, ScheduledTimes, FlightInfo), y = response, data = dataset, ntree = 10, depth = 20, nfolds = 2, balance.classes = T)
model <- h2o.randomForest(x = c(FlightDate, ScheduledTimes, FlightInfo), y = response, data = dataset, ntree = 10, depth = 20, nfolds = 2, balance.classes = T, type = "fast")
elapsed_seconds <- as.numeric(Sys.time() - t0)
modelkey <- model@key

Expand Down Expand Up @@ -238,7 +238,7 @@ function(fitMethod, responses, dataset, testdata) {
}

#iterate over the fit fcns as well as the tgts
model.fit.fcns <- c(lr.fit, rf.fit, srf.fit, gbm.fit, dl.fit)
model.fit.fcns <- c(lr.fit, rf.fit, srf.fit, gbm.fit)#, dl.fit)

# This will loop over all of the models and score for each of the responses in tgts
models.by.tgt <- unlist(recursive = F, lapply(model.fit.fcns, all.fit, tgts, train, test))
Expand Down
4 changes: 2 additions & 2 deletions R/h2o-DESCRIPTION.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ Title: H2O R Interface
Version: SUBST_PROJECT_VERSION
Date: 2014-05-15
Author: Anqi Fu, Tom Kraljevic and Petr Maj, with contributions from the 0xdata team
Maintainer: Anqi Fu <anqi@0xdata.com>
Maintainer: Ariel Rao <ariel@0xdata.com>
Description: This is a package for running H2O via its REST API from within R.
License: Apache License (== 2.0)
Depends: R (>= 2.13.0), RCurl, rjson, statmod, tools, methods, utils
Collate: Wrapper.R Internal.R Classes.R ParseImport.R Algorithms.R
Collate: Wrapper.R Internal.R Classes.R ParseImport.R models.R Algorithms.R
NeedsCompilation: no
SystemRequirements: Java (>= 1.6)
URL: http://www.0xdata.com
2 changes: 1 addition & 1 deletion R/h2o-package.template
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Note that no actual data is stored in the R workspace; and no actual work is car
\author{
Anqi Fu, Tom Kraljevic and Petr Maj, with contributions from the 0xdata team

Maintainer: Anqi Fu <anqi@0xdata.com>
Maintainer: Ariel Rao <ariel@0xdata.com>
}
\references{
\itemize{
Expand Down
Loading

0 comments on commit a9a8b48

Please sign in to comment.