forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bead377
commit 87127a8
Showing
14 changed files
with
1,658 additions
and
550 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../findNSourceUtils.R') | ||
|
||
# a useful function to make a quick copy of a data frame in H2O | ||
cp <- function(this) this[1:nrow(this), 1:ncol(this)] | ||
|
||
# a useful function to count number of NAs in a column | ||
numNAs <- function(col) sum(is.na(col)) | ||
|
||
conn <- h2o.init() | ||
|
||
prostate.hex <- h2o.uploadFile(conn, "../../../smalldata/logreg/prostate_missing.csv", "prostate.hex") | ||
dim(prostate.hex) | ||
|
||
print("Summary of the data in iris_missing.csv") | ||
print("Each column has 50 missing observations (at random)") | ||
summary(prostate.hex) | ||
|
||
|
||
print("Make a copy of the original dataset to play with.") | ||
hex <- cp(prostate.hex) | ||
print(hex@key) | ||
print(prostate.hex@key) | ||
print(prostate.hex) | ||
print(hex) | ||
|
||
|
||
print("Impute a numeric column with the mean") | ||
nas <- numNAs(hex[,"DPROS"]) | ||
print(paste("NAs before imputation:", nas)) | ||
h2o.impute(hex, .(DPROS), method = "mean") | ||
|
||
nas <- numNAs(hex[,"DPROS"]) | ||
print(paste("NAs after imputation: ", nas)) | ||
|
||
|
||
|
||
# OTHER POSSIBLE SYNTAXES ALLOWED: | ||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, 8, method = "mean") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, c("VOL"), method = "mean") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, "VOL", method = "mean") | ||
|
||
# USING MEDIAN | ||
print("Impute a numeric column with the median") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, .(VOL), method = "median") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, 8, method = "median") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, c("VOL"), method = "median") | ||
|
||
hex <- cp(prostate.hex) | ||
h2o.impute(hex, "VOL", method = "median") | ||
|
||
testEnd() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
cp <- function(this) this[1:nrow(this), 1:ncol(this)] | ||
|
||
test.eq2.h2o.assign<- | ||
function(conn) { | ||
iris.hex <- h2o.uploadFile(conn, locate("smalldata/iris/iris_missing.csv"), "iris.hex") | ||
dim(iris.hex) | ||
|
||
Log.info("Summary of the data in iris_missing.csv") | ||
Log.info("Each column has 50 missing observations (at random)") | ||
summary(iris.hex) | ||
|
||
|
||
Log.info("Make a copy of the original dataset to play with.") | ||
hex <- cp(iris.hex) | ||
print(hex@key) | ||
print(iris.hex@key) | ||
print(iris.hex) | ||
print(hex) | ||
|
||
Log.info("Impute a numeric column with the mean") | ||
h2o.impute(hex, .(Sepal.Length), method = "mean") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, 1, method = "mean") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, c("Sepal.Length"), method = "mean") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, "Sepal.Length", method = "mean") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
|
||
Log.info("Impute a numeric column with the median") | ||
h2o.impute(hex, .(Sepal.Length), method = "median") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, 1, method = "median") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, c("Sepal.Length"), method = "median") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, "Sepal.Length", method = "median") | ||
expect_that(sum(is.na(hex[,"Sepal.Length"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
|
||
Log.info("Impute a factor column (uses the mode)") | ||
h2o.impute(hex, .(Species), method = "mode") | ||
expect_that(sum(is.na(hex[,"Species"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, 5, method = "mode") | ||
expect_that(sum(is.na(hex[,"Species"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, c("Species"), method = "mode") | ||
expect_that(sum(is.na(hex[,"Species"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, "Species", method = "mode") | ||
expect_that(sum(is.na(hex[,"Species"])), equals(0)) | ||
hex <- cp(iris.hex) | ||
|
||
Log.info("Now check that imputing with column groupings works...") | ||
h2o.impute(hex, .(Sepal.Length), method = "mean", groupBy = c("Sepal.Width", "Petal.Width")) | ||
# possibly some NAs still present in the column, because of NAs in the groupBy columns | ||
print(hex) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, 1, method = "median", groupBy = c("Species", "Petal.Width", "Petal.Length")) | ||
print(hex) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, "Petal.Width", method = "mean", groupBy = c(1,2,5)) | ||
print(hex) | ||
hex <- cp(iris.hex) | ||
h2o.impute(hex, "Species", method = "mode", groupBy = c(1,3,4)) | ||
print(hex) | ||
|
||
|
||
testEnd() | ||
} | ||
|
||
doTest("Test h2o.assign(data,key)", test.eq2.h2o.assign) | ||
|
Oops, something went wrong.