Skip to content

Commit

Permalink
standardize dataset for glm separation test
Browse files Browse the repository at this point in the history
  • Loading branch information
raoariel committed Aug 6, 2014
1 parent 9145f2e commit 86bc80b
Show file tree
Hide file tree
Showing 4 changed files with 1,158 additions and 23 deletions.
17 changes: 3 additions & 14 deletions R/tests/testdir_algos/glm/runit_GLM_perfectSeparation_balanced.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,9 @@ source('../../findNSourceUtils.R')

test <- function(conn) {

print("Generate balanced dataset by column in R")
y.a = sample(0:0, 200, replace=T)
y.b = sample(1:1, 200, replace=T)
x1.a = sample(-1203:-1, 200, replace=T)
x1.b = sample(1:1, 200, replace=T)
x2.a = sample(0:0, 200, replace=T)
x2.b = sample(1:1203, 200, replace=T)
data.a = cbind(y.a, x1.a, x2.a)
data.b = cbind(y.b, x1.b, x2.b)
data.balanced = rbind(data.a, data.b)
colnames(data.balanced) <- c("y", "x1", "x2")

print("Read data into H20.")
data.b.hex <- as.h2o(conn, as.data.frame(data.balanced), "data.b.hex")
print("Read in synthetic balanced dataset")
data.b.hex <- h2o.uploadFile(conn, locate("smalldata/synthetic_perfect_separation/balanced.csv"), key="data.b.hex")

print("Fit model on dataset.")
model.balanced <- h2o.glm(x=c("x1", "x2"), y="y", data.b.hex, family="binomial", lambda_search=TRUE, use_all_factor_levels=1, alpha=0.5, nfolds=0, higher_accuracy=TRUE, lambda=0)
print("Check line search invoked even with higher_accuracy off")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,9 @@ source('../../findNSourceUtils.R')

test <- function(conn) {

print("Generate unbalanced dataset by column in R")
y = sample(0:0, 10000, replace=T)
x1 = sample(-1:-10, 10000, replace=T)
x2 = sample(6:10, 10000, replace=T)
data = cbind(y, x1, x2)
data.unbalanced = rbind(data, c(1, 30, 7))

print("Read data into H20.")
data.u.hex <- as.h2o(conn, as.data.frame(data.unbalanced), "data.u.hex")
print("Read in synthetic balanced dataset")
data.u.hex <- h2o.uploadFile(conn, locate("smalldata/synthetic_perfect_separation/unbalanced.csv"), key="data.u.hex")

print("Fit model on dataset.")
model.unbalanced <- h2o.glm(x=c("x1", "x2"), y="y", data.u.hex, family="binomial", lambda_search=TRUE, use_all_factor_levels=1, alpha=0.5, nfolds=0, higher_accuracy=TRUE, lambda=0)
print("Check line search invoked even with higher_accuracy off")
Expand Down
Loading

0 comments on commit 86bc80b

Please sign in to comment.