forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add unit tests for glm link functions; compare performance to R
- Loading branch information
Showing
6 changed files
with
315 additions
and
0 deletions.
There are no files selected for viewing
63 changes: 63 additions & 0 deletions
63
R/tests/testdir_algos/glm/runit_GLM_link_functions_binomial.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
## | ||
# Comparison of H2O to R with varying link functions for the BINOMIAL family on prostate dataset | ||
# Link functions: logit (canonical link) | ||
# log | ||
## | ||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test.linkFunctions <- function(conn) { | ||
|
||
print("Read in prostate data.") | ||
h2o.data = h2o.uploadFile(conn, locate("smalldata/prostate/prostate_complete.csv.zip"), key="h2o.data") | ||
R.data = as.data.frame(as.matrix(h2o.data)) | ||
|
||
print("Testing for family: BINOMIAL") | ||
print("Set variables for h2o.") | ||
myY = "CAPSULE" | ||
myX = c("AGE","RACE","DCAPS","PSA","VOL","DPROS","GLEASON") | ||
print("Define formula for R") | ||
R.formula = (R.data[,"CAPSULE"]~.) | ||
|
||
print("Create models with canonical link: LOGIT") | ||
model.h2o.binomial.logit <- h2o.glm(x=myX, y=myY, data=h2o.data, family="binomial", link="logit",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.binomial.logit <- glm(formula=R.formula, data=R.data[,3:9], family=binomial(link=logit), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function logit") | ||
deviance.h2o.logit = model.h2o.binomial.logit@model$deviance / model.h2o.binomial.logit@model$null | ||
deviance.R.logit = deviance(model.R.binomial.logit) / model.h2o.binomial.logit@model$null | ||
difference = deviance.R.logit - deviance.h2o.logit | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.logit)) | ||
print(cat("Deviance in R: ", deviance.R.logit)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
print("|=======================================================|") | ||
print("WARNING: Additonal log functions in tests skipped over") | ||
print("|=======================================================|") | ||
|
||
#Issue with unspecified start values: | ||
|
||
# print("Create models with link: LOG") | ||
# model.h2o.binomial.log <- h2o.glm(x=myX, y=myY, data=h2o.data, family="binomial", link="log",alpha=0.5, lambda=0, nfolds=0) | ||
# model.R.binomial.log <- glm(formula=R.formula, data=R.data[,3:9], family=binomial(link=log), na.action=na.omit) | ||
|
||
# print("Compare model deviances for link function log") | ||
# deviance.h2o.log = model.h2o.binomial.log@model$deviance / model.h2o.binomial.log@model$null | ||
# deviance.R.log = deviance(model.R.binomial.log) / model.h2o.binomial.log@model$null | ||
# difference = deviance.R.log - deviance.h2o.log | ||
# if (difference > 0.01) { | ||
# print(cat("Deviance in H2O: ", deviance.h2o.log)) | ||
# print(cat("Deviance in R: ", deviance.R.log)) | ||
# checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
# } | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Comparison of H2O to R with varying link functions for the BINOMIAL family", test.linkFunctions) | ||
|
||
|
72 changes: 72 additions & 0 deletions
72
R/tests/testdir_algos/glm/runit_GLM_link_functions_gamma.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
## | ||
# Comparison of H2O to R with varying link functions for the GAMMA family on prostate dataset | ||
# Link functions: inverse (canonical link) | ||
# log | ||
# identity | ||
## | ||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test.linkFunctions <- function(conn) { | ||
|
||
print("Read in prostate data.") | ||
h2o.data = h2o.uploadFile(conn, locate("smalldata/prostate/prostate_complete.csv.zip"), key="h2o.data") | ||
R.data = as.data.frame(as.matrix(h2o.data)) | ||
|
||
print("Testing for family: GAMMA") | ||
print("Set variables for h2o.") | ||
myY = "DPROS" | ||
myX = c("ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","GLEASON") | ||
print("Define formula for R") | ||
R.formula = (R.data[,"DPROS"]~.) | ||
|
||
print("Create models with canonical link: INVERSE") | ||
model.h2o.gamma.inverse <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gamma", link="inverse",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.gamma.inverse <- glm(formula=R.formula, data=R.data[,c(1:4,6:9)], family=Gamma(link=inverse), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function inverse") | ||
deviance.h2o.inverse = model.h2o.gamma.inverse@model$deviance / model.h2o.gamma.inverse@model$null | ||
deviance.R.inverse = deviance(model.R.gamma.inverse) / model.h2o.gamma.inverse@model$null | ||
difference = deviance.R.inverse - deviance.h2o.inverse | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.inverse)) | ||
print(cat("Deviance in R: ", deviance.R.inverse)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
print("Create models with link function: LOG") | ||
model.h2o.gamma.log <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gamma", link="log",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.gamma.log <- glm(formula=R.formula, data=R.data[,c(1:4,6:9)], family=Gamma(link=log), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function log") | ||
deviance.h2o.log = model.h2o.gamma.log@model$deviance / model.h2o.gamma.log@model$null | ||
deviance.R.log = deviance(model.R.gamma.log) / model.h2o.gamma.log@model$null | ||
difference = deviance.R.log - deviance.h2o.log | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.log)) | ||
print(cat("Deviance in R: ", deviance.R.log)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
print("Create models with link: IDENTITY") | ||
model.h2o.gamma.identity <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gamma", link="identity",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.gamma.identity <- glm(formula=R.formula, data=R.data[,c(1:4,6:9)], family=Gamma(link=identity), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function identity") | ||
deviance.h2o.identity = model.h2o.gamma.identity@model$deviance / model.h2o.gamma.identity@model$null | ||
deviance.R.identity = deviance(model.R.gamma.identity) / model.h2o.gamma.identity@model$null | ||
difference = deviance.R.identity - deviance.h2o.identity | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.identity)) | ||
print(cat("Deviance in R: ", deviance.R.identity)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Comparison of H2O to R with varying link functions for the GAMMA family", test.linkFunctions) | ||
|
||
|
81 changes: 81 additions & 0 deletions
81
R/tests/testdir_algos/glm/runit_GLM_link_functions_gaussian.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
## | ||
# Comparison of H2O to R with varying link functions for the GAUSSIAN family on prostate dataset | ||
# Link functions: identity (canonical link) | ||
# log | ||
# inverse | ||
## | ||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test.linkFunctions <- function(conn) { | ||
|
||
print("Read in prostate data.") | ||
h2o.data = h2o.uploadFile(conn, locate("smalldata/prostate/prostate_complete.csv.zip"), key="h2o.data") | ||
R.data = as.data.frame(as.matrix(h2o.data)) | ||
|
||
print("Testing for family: GAUSSIAN") | ||
print("Set variables for h2o.") | ||
myY = "GLEASON" | ||
myX = c("ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS") | ||
print("Define formula for R") | ||
R.formula = (R.data[,"GLEASON"]~.) | ||
|
||
print("Create models with canonical link: IDENTITY") | ||
model.h2o.gaussian.identity <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gaussian", link="identity",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.gaussian.identity <- glm(formula=R.formula, data=R.data[,1:8], family=gaussian(link=identity), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function identity") | ||
deviance.h2o.identity = model.h2o.gaussian.identity@model$deviance / model.h2o.gaussian.identity@model$null | ||
deviance.R.identity = deviance(model.R.gaussian.identity) / model.h2o.gaussian.identity@model$null | ||
difference = deviance.R.identity - deviance.h2o.identity | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.identity)) | ||
print(cat("Deviance in R: ", deviance.R.identity)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
print("|=======================================================|") | ||
print("WARNING: Additonal log functions in tests skipped over") | ||
print("|=======================================================|") | ||
|
||
#Issue with unspecified start values: | ||
|
||
# print("Create models with link: LOG") | ||
# model.h2o.gaussian.log <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gaussian", link="log",alpha=0.5, lambda=0, nfolds=0) | ||
# model.R.gaussian.log <- glm(formula=R.formula, data=R.data[,1:8], family=gaussian(link=log), na.action=na.omit) | ||
|
||
# print("Compare model deviances for link function log") | ||
# deviance.h2o.log = model.h2o.gaussian.log@model$deviance / model.h2o.gaussian.log@model$null | ||
# deviance.R.log = deviance(model.R.gaussian.log) / model.h2o.gaussian.log@model$null | ||
# difference = deviance.R.log - deviance.h2o.log | ||
# if (difference > 0.01) { | ||
# print(cat("Deviance in H2O: ", deviance.h2o.log)) | ||
# print(cat("Deviance in R: ", deviance.R.log)) | ||
# checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
# } | ||
|
||
|
||
#Issue with non positive values: | ||
|
||
# print("Create models with link: INVERSE") | ||
# model.h2o.gaussian.inverse <- h2o.glm(x=myX, y=myY, data=h2o.data, family="gaussian", link="inverse",alpha=0.5, lambda=0, nfolds=0) | ||
# model.R.gaussian.inverse <- glm(formula=R.formula, data=R.data[,1:8], family=gaussian(link=inverse), na.action=na.omit) | ||
|
||
# print("Compare model deviances for link function inverse") | ||
# deviance.h2o.inverse = model.h2o.gaussian.inverse@model$deviance / model.h2o.gaussian.inverse@model$null | ||
# deviance.R.inverse = deviance(model.R.gaussian.inverse) / model.h2o.gaussian.inverse@model$null | ||
# difference = deviance.R.inverse - deviance.h2o.inverse | ||
# if (difference > 0.01) { | ||
# print(cat("Deviance in H2O: ", deviance.h2o.inverse)) | ||
# print(cat("Deviance in R: ", deviance.R.inverse)) | ||
# checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
# } | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Comparison of H2O to R with varying link functions for the GAUSSIAN family", test.linkFunctions) | ||
|
||
|
57 changes: 57 additions & 0 deletions
57
R/tests/testdir_algos/glm/runit_GLM_link_functions_poisson.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
## | ||
# Comparison of H2O to R with varying link functions for the POISSON family on prostate dataset | ||
# Link functions: log (canonical link) | ||
# identity | ||
## | ||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test.linkFunctions <- function(conn) { | ||
|
||
print("Read in prostate data.") | ||
h2o.data = h2o.uploadFile(conn, locate("smalldata/prostate/prostate_complete.csv.zip"), key="h2o.data") | ||
R.data = as.data.frame(as.matrix(h2o.data)) | ||
|
||
print("Testing for family: POISSON") | ||
print("Set variables for h2o.") | ||
myY = "GLEASON" | ||
myX = c("ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS") | ||
print("Define formula for R") | ||
R.formula = (R.data[,"GLEASON"]~.) | ||
|
||
print("Create models with canonical link: LOG") | ||
model.h2o.poisson.log <- h2o.glm(x=myX, y=myY, data=h2o.data, family="poisson", link="log",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.poisson.log <- glm(formula=R.formula, data=R.data[,1:8], family=poisson(link=log), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function log") | ||
deviance.h2o.log = model.h2o.poisson.log@model$deviance / model.h2o.poisson.log@model$null | ||
deviance.R.log = deviance(model.R.poisson.log) / model.h2o.poisson.log@model$null | ||
difference = deviance.R.log - deviance.h2o.log | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.log)) | ||
print(cat("Deviance in R: ", deviance.R.log)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
print("Create models with link: IDENTITY") | ||
model.h2o.poisson.identity <- h2o.glm(x=myX, y=myY, data=h2o.data, family="poisson", link="identity",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.poisson.identity <- glm(formula=R.formula, data=R.data[,1:8], family=poisson(link=identity), na.action=na.omit) | ||
|
||
print("Compare model deviances for link function identity") | ||
deviance.h2o.identity = model.h2o.poisson.identity@model$deviance / model.h2o.poisson.identity@model$null | ||
deviance.R.identity = deviance(model.R.poisson.identity) / model.h2o.poisson.identity@model$null | ||
difference = deviance.R.identity - deviance.h2o.identity | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.identity)) | ||
print(cat("Deviance in R: ", deviance.R.identity)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Comparison of H2O to R with varying link functions for the POISSON family", test.linkFunctions) | ||
|
||
|
42 changes: 42 additions & 0 deletions
42
R/tests/testdir_algos/glm/runit_GLM_link_functions_tweedie.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
## | ||
# Comparison of H2O to R with varying link functions for the TWEEDIE family on prostate dataset | ||
# Link functions: tweedie (canonical link) | ||
## | ||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test.linkFunctions <- function(conn) { | ||
|
||
print("Read in prostate data.") | ||
h2o.data = h2o.uploadFile(conn, locate("smalldata/prostate/prostate_complete.csv.zip"), key="h2o.data") | ||
R.data = as.data.frame(as.matrix(h2o.data)) | ||
|
||
print("Testing for family: TWEEDIE") | ||
print("Set variables for h2o.") | ||
myY = "CAPSULE" | ||
myX = c("AGE","RACE","DCAPS","PSA","VOL","DPROS","GLEASON") | ||
print("Define formula for R") | ||
R.formula = (R.data[,"CAPSULE"]~.) | ||
|
||
print("Create models with canonical link: TWEEDIE") | ||
model.h2o.tweedie.tweedie <- h2o.glm(x=myX, y=myY, data=h2o.data, family="tweedie", link="tweedie",alpha=0.5, lambda=0, nfolds=0) | ||
model.R.tweedie.tweedie <- glm(formula=R.formula, data=R.data[,3:9], family=tweedie, na.action=na.omit) | ||
|
||
print("Compare model deviances for link function tweedie") | ||
deviance.h2o.tweedie = model.h2o.tweedie.tweedie@model$deviance / model.h2o.tweedie.tweedie@model$null | ||
deviance.R.tweedie = deviance(model.R.tweedie.tweedie) / model.h2o.tweedie.tweedie@model$null | ||
difference = deviance.R.tweedie - deviance.h2o.tweedie | ||
if (difference > 0.01) { | ||
print(cat("Deviance in H2O: ", deviance.h2o.tweedie)) | ||
print(cat("Deviance in R: ", deviance.R.tweedie)) | ||
checkTrue(difference <= 0.01, "h2o's model's residualDeviance/nullDeviance is more than 0.01 lower than R's model's") | ||
} | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Comparison of H2O to R with varying link functions for the TWEEDIE family", test.linkFunctions) | ||
|
||
|
Binary file not shown.