Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/h2oai/h2o
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Mar 17, 2015
2 parents c3ff09b + 7ca3a0c commit 44b07e7
Show file tree
Hide file tree
Showing 27 changed files with 839 additions and 34 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ dw_3:
mkdir $(BUILD_WEBSITE_DIR)
cp -r h2o-docs/build/html/* $(BUILD_WEBSITE_DIR)
mkdir -p $(BUILD_WEBSITE_DIR)/bits
cp -p docs/0xdata_H2O_Algorithms.pdf $(BUILD_WEBSITE_DIR)/bits
cp -p docs/H2O-Algorithms-Road-Map.pdf $(BUILD_WEBSITE_DIR)/bits
cp -rp target/javadoc $(BUILD_WEBSITE_DIR)/bits
mkdir -p $(BUILD_WEBSITE_DIR)/bits/hadoop
cp -p hadoop/README.txt $(BUILD_WEBSITE_DIR)/bits/hadoop
Expand Down
8 changes: 8 additions & 0 deletions R/h2o-package/R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -1648,6 +1648,14 @@ h2o.sub <- function(pattern, replacement, x, ignore.case = FALSE) {
return(res)
}

h2o.setLevel <- function(x, level) {
expr <- paste("setLevel(", paste(x@key, deparse(level), sep = ","), ")", sep = "")
res <- .h2o.__exec2(x@h2o, expr)
res <- .h2o.exec2(res$dest_key, h2o = x@h2o, res$dest_key)
res@logic <- FALSE
res
}

trim <- function(x) {
if (!inherits(x, "H2OParsedData")) stop("x must be an H2OParsedData object")
.h2o.__unop2("trim", x)
Expand Down
1 change: 0 additions & 1 deletion R/h2o-package/man/h2o.kmeans.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ The number of clusters k.
\value{
An object of class \code{\linkS4class{H2OKMeansModel}} with slots key, data, and model, where the last is a list of the following components:
\item{centers }{A matrix of cluster centers.}
\item{cluster }{A \code{\linkS4class{H2OParsedData}} object containing the vector of integers (from 1 to k), which indicate the cluster to which each point is allocated.}
\item{size }{The number of points in each cluster.}
\item{withinss }{Vector of within-cluster sum of squares, with one component per cluster.}
\item{tot.withinss }{Total within-cluster sum of squares, i.e., sum(withinss).}
Expand Down
3 changes: 1 addition & 2 deletions R/h2o-package/man/h2o.performance.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ h2o.performance(data, reference, measure = "accuracy", thresholds, gains = TRUE,
}
}
\item{thresholds}{
(Optional) A numeric vector from 0 to 1 indicating the threshold values at which to compute the performance measure. If missing, the range will be automatically generated.
TODO: Still not sure I understand what exactly these thresholds are, is it the FPR or something else?
(Optional) A numeric vector from 0 to 1 indicating the threshold values at which to compute the performance measure. If missing, the range will be automatically generated. Changing the thresholds will change the number of plot points used to calculate the AUC value, use with caution!
}
\item{gains}{If TRUE, then `h2o.performance` will additionally compute the gains and lift charts. These can be accessed via @gains}
\item{\dots}{Additional arguments to pass to the `h2o.gains` method. Accepts "percents" and "groups".}
Expand Down
22 changes: 22 additions & 0 deletions R/h2o-package/man/h2o.setLevel.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
\name{h2o.setLevel}
\alias{h2o.setLevel}
\title{Pattern Replacement}
\description{ \code{h2o.setLevel}, a method to set a factor column to one of the levels.}
\usage{h2o.setLevel(x, level)}
\arguments{
\item{x}{An \code{\linkS4class{H2OParsedData}} object with a single factor column.}
\item{level}{The level at which the column will be set.}
}
\details{
Replace all other occurrences with `level` in a factor column.
}
\value{
An object of class "H2OParsedData".
}

\examples{
library(h2o)
localH2O <- h2o.init(ip = "localhost", port = 54321, startH2O = TRUE)
hex <- as.h2o(localH2O , iris)
hex$Species <- h2o.setLevel(hex$Species, "versicolor")
}
2 changes: 2 additions & 0 deletions R/tests/Utils/setupR.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ if(!"ROCR" %in% rownames(installed.packages())) install.packages("ROCR")
if (!"plyr" %in% rownames(installed.packages())) install.packages("plyr")
#if (!"rgl" %in% rownames(installed.packages())) install.packages("rgl")
if (!"randomForest" %in% rownames(installed.packages())) install.packages("randomForest")
if(!"AUC" %in% rownames(installed.packages())) install.packages("AUC")
require(glmnet)
require(gbm)
require(ROCR)
require(AUC)

#Global Variables
myIP <- ipPort[[1]]
Expand Down
26 changes: 19 additions & 7 deletions R/tests/testdir_jira/runit_hex_1897_glm_offset.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,38 @@ test.GLM.offset <- function(conn) {
prostate.hex = h2o.importFile(object = conn, system.file("extdata", "prostate.csv", package = "h2o"))
prostate.csv = as.data.frame(prostate.hex)

# family_type = c("binomial", "poisson", "gaussian")
family_type = c("binomial", "poisson")

check_models <- function (family_type) {
Log.info (paste ("Checking", family_type, "models without offset..."))
prostate.glm.r = glm(formula = CAPSULE ~ . - ID - AGE, family = family_type, data = prostate.csv)
prostate.glm.h2o = h2o.glm(x = c("RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"), y = "CAPSULE", data = prostate.hex, family = family_type, standardize = F, higher_accuracy = T)
check_models <- function (family_type, intercept = T) {
Log.info (paste ("Checking", family_type, "models without offset... ", ifelse (intercept, "with intercept", "without intercept") ))
if(intercept) {
prostate.glm.r = glm(formula = CAPSULE ~ . - ID - AGE, family = family_type, data = prostate.csv)
} else {
prostate.glm.r = glm(formula = CAPSULE ~ . + 0 - ID - AGE, family = family_type, data = prostate.csv)
}
prostate.glm.h2o = h2o.glm(x = c("RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"), y = "CAPSULE", data = prostate.hex, family = family_type, standardize = F, intercept = intercept)

compare_res_deviance(prostate.glm.h2o, prostate.glm.r)
compare_scores(prostate.glm.h2o, prostate.glm.r)

Log.info (paste ("Checking", family_type, "models with offset..."))
options(warn=-1)
prostate.glm.r = glm(formula = CAPSULE ~ . - ID - AGE, family = family_type, data = prostate.csv, offset = prostate.csv$AGE)
prostate.glm.h2o = h2o.glm(x = c("RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"), y = "CAPSULE", data = prostate.hex, family = family_type, offset = "AGE", standardize = F, higher_accuracy = T)
if(intercept) {
prostate.glm.r = glm(formula = CAPSULE ~ . - ID - AGE, family = family_type, data = prostate.csv, offset = prostate.csv$AGE)
} else {
prostate.glm.r = glm(formula = CAPSULE ~ . + 0 - ID - AGE, family = family_type, data = prostate.csv, offset = prostate.csv$AGE)
}
prostate.glm.h2o = h2o.glm(x = c("RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"), y = "CAPSULE", data = prostate.hex, family = family_type, offset = "AGE", standardize = F, intercept = intercept)
compare_res_deviance(prostate.glm.h2o, prostate.glm.r)
compare_scores(prostate.glm.h2o, prostate.glm.r)
print("PASSED")
}

run_models = sapply(family_type, check_models)
run_models = sapply(family_type, function(family) check_models(family, intercept = T))
# run_models_wo_intercept = sapply(family_type, function(family) check_models(family, intercept = F))
print(run_models)
# print(run_models_wo_intercept)
testEnd()
}

Expand Down
3 changes: 1 addition & 2 deletions R/tests/testdir_jira/runit_hex_2020_LR_beta_constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ test.LR.betaConstraints <- function(conn) {
myX = c("AGE","RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON")
lowerbound = rep(-1, times = length(myX))
upperbound = rep(1, times = length(myX))
starting = rep(-0.008235181, times = length(myX))
betaConstraints = data.frame(names = myX, lower_bounds = lowerbound, upper_bounds = upperbound, beta_given= starting)
betaConstraints = data.frame(names = myX, lower_bounds = lowerbound, upper_bounds = upperbound)
betaConstraints.hex = as.h2o(conn, betaConstraints)
prostate.csv = as.data.frame(prostate.hex)

Expand Down
2 changes: 1 addition & 1 deletion R/tests/testdir_jira/runit_hex_2022_prior_constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ source('../findNSourceUtils.R')
test.Priors.BetaConstraints <- function(conn) {
Log.info("Import modelStack data into H2O...")
## Import data
pathToFile = "/mnt/0xcustomer-datasets/c27/modelStack.csv"
pathToFile = "/mnt/0xcustomer-datasets/c27/data.csv"
pathToConstraints <- "/mnt/0xcustomer-datasets/c27/constraints_indices.csv"
modelStack = h2o.importFile(conn, pathToFile)
betaConstraints.hex = h2o.importFile(conn, pathToConstraints)
Expand Down
17 changes: 9 additions & 8 deletions R/tests/testdir_misc/runit_nfold.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../findNSourceUtils.R')

test.h2o.nfold <- function(conn) {
tolerance <- 1e-4
tolerance <- 1e-2

hex <- h2o.importFile(conn, normalizePath(locate("smalldata/logreg/prostate.csv")))
predictors = c(3:9)
Expand All @@ -29,22 +29,23 @@ test.h2o.nfold <- function(conn) {
}

# compare metrics
perf <- h2o.performance(as.h2o(conn,predictions), hex[,response])
perf_auc <- h2o.performance(as.h2o(conn,predictions), hex[,response], measure = "F1")
perf_cm <- h2o.performance(as.h2o(conn,predictions), hex[,response], thresholds = m@model$best_cutoff)
auc <- m@model$auc
accuracy <- m@model$accuracy
cm <- m@model$confusion

auc
perf@model$auc
if (abs(auc - perf@model$auc) > tolerance) stop("AUC is wrong")
perf_auc@model$auc
if (abs(auc - perf_auc@model$auc) > tolerance) stop("AUC is wrong")

accuracy
perf@model$accuracy
if (abs(accuracy - perf@model$accuracy) > tolerance) stop("accuracy is wrong")
perf_cm@model$accuracy
if (abs(accuracy - perf_cm@model$accuracy) > tolerance) stop("accuracy is wrong")

cm
perf@model$confusion
if (max(abs(cm[1:9] - perf@model$confusion[1:9])) != 0) stop("cm is wrong")
perf_cm@model$confusion
if (max(abs(cm[1:9] - perf_cm@model$confusion[1:9])) > 2) stop("cm is wrong")

testEnd()
}
Expand Down
Binary file added docs/H2O-Algorithms-Road-Map.docx
Binary file not shown.
Binary file added docs/H2O-Algorithms-Road-Map.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions h2o-docs/source/resources/algoroadmap.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
.. _Roadmap:

H\ :sub:`2`\ O Algorithms Roadmap
==================================
=================================

.. raw:: html

<div style="margin-top:10px;">
<iframe width=900 height=900 src="../bits/0xdata_H2O_Algorithms.pdf" frameborder="0" allowfullscreen></iframe>
<iframe width=900 height=900 src="../bits/H2O-Algorithms-Road-Map.pdf" frameborder="0" allowfullscreen></iframe>
</div>

1 change: 1 addition & 0 deletions h2o-samples/devops-automation/application.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
name: Phil
48 changes: 48 additions & 0 deletions h2o-samples/devops-automation/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
buildscript {
ext {
springBootVersion = '1.3.0.BUILD-SNAPSHOT'
}
repositories {
// NOTE: You should declare only repositories that you need here
mavenLocal()
mavenCentral()
maven { url "http://repo.spring.io/release" }
maven { url "http://repo.spring.io/milestone" }
maven { url "http://repo.spring.io/snapshot" }
}
dependencies {
classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}")
}
}

apply plugin: 'java'
apply plugin: 'eclipse'
apply plugin: 'idea'
apply plugin: 'spring-boot'

jar {
baseName = 'spring-boot-sample-simple'
version = '0.0.0'
}

run {
systemProperties = System.properties
}

repositories {
// NOTE: You should declare only repositories that you need here
mavenLocal()
mavenCentral()
maven { url "http://repo.spring.io/release" }
maven { url "http://repo.spring.io/milestone" }
maven { url "http://repo.spring.io/snapshot" }
}

dependencies {
compile("org.springframework.boot:spring-boot-starter")
testCompile("org.springframework.boot:spring-boot-starter-test")
}

task wrapper(type: Wrapper) {
gradleVersion = '1.6'
}
63 changes: 63 additions & 0 deletions h2o-samples/devops-automation/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

<modelVersion>4.0.0</modelVersion>

<parent>
<artifactId>spring-boot-starter-parent</artifactId>
<groupId>org.springframework.boot</groupId>
<version>1.2.0.RELEASE</version>
<relativePath/>
</parent>

<prerequisites>
<maven>3.0.0</maven>
</prerequisites>


<artifactId>spring-boot-sample-simple</artifactId>
<name>Spring Boot Simple Sample</name>
<description>Spring Boot Simple Sample</description>
<url>http://projects.spring.io/spring-boot/</url>
<organization>
<name>Pivotal Software, Inc.</name>
<url>http://www.spring.io</url>
</organization>
<properties>
<main.basedir>${basedir}/../..</main.basedir>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20141113</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<jvmArguments>-Djava.rmi.server.hostname=localhost -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005</jvmArguments>
<arguments>
<argument>--spring.profiles.active=dev</argument>
</arguments>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright 2012-2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sample.simple;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

import sample.simple.service.HelloWorldService;
import sample.simple.service.h2oService;

@SpringBootApplication
public class SampleSimpleApplication implements CommandLineRunner {

static final String IMPORT_FILE = "/Users/paragsanghavi/Documents/h2o/smalldata/prostate/prostate.csv.zip";

// Simple example shows how a command line spring application can execute an
// injected bean service. Also demonstrates how you can use @Value to inject
// command line args ('--name=whatever') or application properties

@Autowired
private HelloWorldService helloWorldService;

@Autowired
private h2oService h2oservice;

private final Logger log = LoggerFactory.getLogger(SampleSimpleApplication.class);

@Override
public void run(String... args) {
System.out.println(this.helloWorldService.getHelloMessage());
String key_value = h2oservice.ImportCSVFile(IMPORT_FILE);
if(!key_value.equalsIgnoreCase("error")){
String destination_key = h2oservice.ParseCSVFile(key_value,"prostate_csv.hex");
System.out.println(destination_key);
if(destination_key!=null){
String gbm_key_value = h2oservice.BuildGBMModel("gbmmodelDestinationKey", destination_key);
if(gbm_key_value!=null) {
String predict = h2oservice.PredictGBM(gbm_key_value, destination_key);
if(predict!=null){
Double AUC = h2oservice.CalculateAUC(destination_key,"CAPSULE", "1");
String download_pojo_location = h2oservice.DownloadPOJO("gbmmodelDestinationKey");
System.out.println(download_pojo_location);
}else
{
System.out.println("Error in prediction");
}
}else{
System.out.println("Error in building GBM model");
}

}else{
System.out.println("Error in parsing data set");
}

}else{
log.debug("Error occurred in Importing File, {}", IMPORT_FILE);
}
}

public static void main(String[] args) throws Exception {
SpringApplication.run(SampleSimpleApplication.class, args);
System.exit(0);
}
}
Loading

0 comments on commit 44b07e7

Please sign in to comment.