Skip to content

Commit

Permalink
PUB-1153: add runit_setLevel.R
Browse files Browse the repository at this point in the history
  • Loading branch information
spennihana committed Mar 18, 2015
1 parent 69515eb commit 26bb553
Showing 1 changed file with 100 additions and 0 deletions.
100 changes: 100 additions & 0 deletions R/tests/testdir_misc/runit_setLevel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../findNSourceUtils.R')

test.setLevel <- function(conn) {
library(MASS)

# ---------------------------------------------------------------------------
# 1. generate data

set.seed(1)
N <- 5000
x1 <- runif(N)
x2 <- runif(N)
sigma <- matrix(c(1, 0.95, 0.95, 1), 2, 2)
x34 <- mvrnorm(N, c(0, 0), sigma)
x3 <- x34[, 1]
x4 <- x34[, 2]
x5 <- factor(sample(letters[1:3], N, replace = TRUE))
x6 <- factor(sample(letters[1:3], N, replace = TRUE))
x7 <- factor(sample(letters[1:10], N, replace = TRUE))
expit <- function(x) 1 / (1 + exp(-x))
p <- expit(-1 + 2 * x1 ^ 0.5 + 0.5 * log(1 + x2) - x3 ^ 2 +
c(-1, 0.5, 0)[as.numeric(x5)])
y <- rbinom(N, 1, p)
data <- data.frame(y = y, x1 = x1, x2 = x2, x3 = x3, x4 = x4, x5 = x5,
x6 = x6, x7 = x7)
data.hex <- as.h2o(conn, data)
head(data.hex)
str(data.hex)

# ---------------------------------------------------------------------------
# 2. fit a gbm model
fit.gbm <- h2o.gbm(y = 1, x = 2:8, distribution = "bernoulli", n.trees = 100,
data = data.hex, interaction.depth = 4, shrinkage = 0.03,
importance = T)

p1=h2o.predict(fit.gbm, data.hex)
head(p1)
# predict X0 X1
# 1 0 0.5448750 0.4551250
# 2 0 0.7948446 0.2051553
# 3 0 0.6927410 0.3072590
# 4 0 0.5632815 0.4367184
# 5 0 0.8724055 0.1275945
# 6 0 0.5093251 0.4906749
# ---------------------------------------------------------------------------


# ---------------------------------------------------------------------------
# 3. fix x5 column at "a"
x5_original <- data.hex$x5
data.hex$x5 <- h2o.setLevel(data.hex$x5, "a")
head(data.hex)
p2=h2o.predict(fit.gbm, data.hex)
head(p2)
# predict X0 X1
# 1 0 0.8122386 0.1877614
# 2 0 0.7948446 0.2051553
# 3 0 0.9031955 0.0968045
# 4 0 0.6892335 0.3107665
# 5 0 0.8724055 0.1275945
# 6 0 0.6813020 0.3186980

# 4. fix x5 at "b"
# data.hex$x5 <- x5_original
data.hex$x5 <- h2o.setLevel(data.hex$x5, "b")
head(data.hex)
p2=h2o.predict(fit.gbm, data.hex)
head(p2)
# predict X0 X1
# 1 0 0.5662285 0.4337715
# 2 1 0.4209059 0.5790941
# 3 0 0.6822745 0.3177256
# 4 1 0.4773917 0.5226083
# 5 0 0.6659734 0.3340266
# 6 1 0.4713104 0.5286896
data.hex$x5 <- h2o.setLevel(data.hex$x5, "c")
head(data.hex)
p2=h2o.predict(fit.gbm, data.hex)
head(p2)
# predict X0 X1
# 1 0 0.6266587 0.3733413
# 2 0 0.5623179 0.4376821
# 3 0 0.8490875 0.1509124
# 4 0 0.5839345 0.4160654
# 5 0 0.8178309 0.1821690
# 6 0 0.5456495 0.4543505

# ---------------------------------------------------------------------------
# Conclusion: the prediction from step 4 is not right!
# another question: how is the best_cutoff chosen?

fit.gbm@model$best_cutoff # 0.36
accu <- h2o.performance(p1[, 3], data.hex$y, measure = "accuracy")
accu@cutoffs[accu@measure == max(accu@measure)] # 0.46

testEnd()
}

doTest("Import a dataset with a header H2OParsedData Object", test.setLevel)

0 comments on commit 26bb553

Please sign in to comment.