Skip to content

Commit

Permalink
updating the way xl tests print time
Browse files Browse the repository at this point in the history
  • Loading branch information
vitreuz committed Jun 10, 2015
1 parent 7a8f465 commit af5aa40
Show file tree
Hide file tree
Showing 18 changed files with 74 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ conn <- h2o.init(ip=myIP, port=myPort, startH2O = FALSE)
# Parameters for the test.
#----------------------------------------------------------------------
parse_time <- system.time(data.hex <- h2o.importFile(conn, "/mnt/0xcustomer-datasets/c25/df_h2o.csv", header = T))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

colNames = {}
for(col in names(data.hex)) {
Expand All @@ -42,7 +43,8 @@ myX = setdiff(names(data.hex), myY)
#Deep Learning
dl_time <- system.time(data1.dl <- h2o.deeplearning(x=myX, y=myY, data=data.hex,
epochs=.1, hidden=c(5,5)))
paste("Time it took to build DL ", dl_time[[1]])
print("Time it took to build DL ")
print(dl_time)
data1.dl

PASS_BANNER()
6 changes: 4 additions & 2 deletions R/tests/testdir_hdfs_xlarge/runit_DL_1MRows_2.2KCols_xlarge.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/1Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
dl_time <- system.time(mdl.dl <- h2o.deeplearning(x=predictors, y=response,
data=data.hex, replicate_training_data=FALSE, epochs=.1, hidden=c(5,5)))
mdl.dl
paste("Time it took to build DL ", dl_time[[1]])
print("Time it took to build DL ")
print(dl_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -52,6 +53,7 @@ dl_time <- system.time(data1.dl <- h2o.deeplearning(x=myX, y=myY,
data=data.train, validation=data.valid, replicate_training_data=FALSE,
epochs=.1, hidden=c(5,5)))
data1.dl
paste("Time it took to build DL ", dl_time[[1]])
print("Time it took to build DL ")
print(dl_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/15Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -35,7 +36,8 @@ predictors=c(3:ncol(data.hex))
# Gradient Boosted Trees
gbm_time <- system.time(mdl.gbm <- h2o.gbm(x=predictors, y=response, data=data.hex, distribution = "bernoulli"))
mdl.gbm
paste("Time it took to build GBM ", gbm_time[[1]])
print("Time it took to build DL ")
print(gbm_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ conn <- h2o.init(ip=myIP, port=myPort, startH2O = FALSE)
# Parameters for the test.
#----------------------------------------------------------------------
parse_time <- system.time(data.hex <- h2o.importFile(conn, "/mnt/0xcustomer-datasets/c25/df_h2o.csv", header = T))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

colNames = {}
for(col in names(data.hex)) {
Expand All @@ -42,7 +43,8 @@ myX = setdiff(names(data.hex), myY)
#GBM on original dataset
gbm_time <- system.time(data1.gbm <- h2o.gbm(x = myX, y = myY, data = data.hex,
n.trees = 10, interaction.depth = 5, distribution = "multinomial"))
paste("Time it took to build GBM ", gbm_time[[1]])
print("Time it took to build DL ")
print(gbm_time)
data1.gbm

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/1Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
gbm_time <- system.time(mdl.gbm <- h2o.gbm(x=predictors, y=response,
data=data.hex, distribution = "bernoulli"))
mdl.gbm
paste("Time it took to build GBM ", gbm_time[[1]])
print("Time it took to build DL ")
print(gbm_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ h2o.ls(conn)
# Parameters for the test.
#----------------------------------------------------------------------
parse_time <- system.time(data.hex <- h2o.importFile(conn, "/mnt/0xcustomer-datasets/c28/mr_output.tsv.sorted.gz"))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

dim(data.hex)

Expand All @@ -36,7 +37,8 @@ valid = data.hex[s > 0.8,]
#GBM model
gbm_time <- system.time(model.gbm <- h2o.gbm(x = 3:(ncol(train)), y = 2,
data = train, validation=valid, n.trees=10, interaction.depth=5))
paste("Time it took to build GBM ", gbm_time[[1]])
print("Time it took to build DL ")
print(gbm_time)
model.gbm

pred = h2o.predict(model.gbm, valid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -51,12 +52,14 @@ gbm_10tree_time <- system.time(data1.gbm <- h2o.gbm(x = myX, y = myY,
data = data.train, validation=data.valid, n.trees = 10, interaction.depth = 5,
distribution = "AUTO"))
data1.gbm
paste("Time it took to build GBM ", gbm_10tree_time[[1]])
print("Time it took to build DL ")
print(gbm_10tree_time)

gbm_50tree_time <- system.time(data2.gbm <- h2o.gbm(x = myX, y = myY,
data = data.train, validation=data.valid, n.trees = 50, interaction.depth = 5,
distribution = "AUTO"))
data2.gbm
paste("Time it took to build GBM ", gbm_50tree_time[[1]])
print("Time it took to build DL ")
print(gbm_50tree_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -51,6 +52,7 @@ gbm_10tree_time <- system.time(data1.gbm <- h2o.gbm(x = myX, y = myY,
data = data.train, validation=data.valid, n.trees = 10, interaction.depth = 5,
distribution = "bernoulli"))
data1.gbm
paste("Time it took to build GBM ", gbm_10tree_time[[1]])
print("Time it took to build DL ")
print(gbm_10tree_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -51,6 +52,7 @@ gbm_10tree_time <- system.time(data1.gbm <- h2o.gbm(x = myX, y = myY,
data = data.train, validation=data.valid, n.trees = 10, interaction.depth = 5,
distribution = "multinomial"))
data1.gbm
paste("Time it took to build GBM ", gbm_10tree_time[[1]])
print("Time it took to build DL ")
print(gbm_10tree_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/15Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
glm_time <- system.time(mdl.glm <- h2o.glm(x=predictors, y=response,
data=data.hex, family = "binomial"))
mdl.glm
paste("Time it took to build GLM ", glm_time[[1]])
print("Time it took to build DL ")
print(glm_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ conn <- h2o.init(ip=myIP, port=myPort, startH2O = FALSE)
# Parameters for the test.
#----------------------------------------------------------------------
parse_time <- system.time(data.hex <- h2o.importFile(conn, "/mnt/0xcustomer-datasets/c25/df_h2o.csv", header = T))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

colNames = {}
for(col in names(data.hex)) {
Expand All @@ -43,6 +44,7 @@ myX = setdiff(names(data.hex), myY)
glm_time <- system.time(data1.glm <- h2o.glm(x=myX, y=myY, data = data.hex,
family="gaussian"))
data1.glm
paste("Time it took to build GLM ", glm_time[[1]])
print("Time it took to build DL ")
print(glm_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/1Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
glm_time <- system.time(mdl.glm <- h2o.glm(x=predictors, y=response,
data=data.hex, family = "binomial"))
mdl.glm
paste("Time it took to build GLM ", glm_time[[1]])
print("Time it took to build DL ")
print(glm_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ h2o.ls(conn)
# Parameters for the test.
#----------------------------------------------------------------------
parse_time <- system.time(data.hex <- h2o.importFile(conn, "/mnt/0xcustomer-datasets/c28/mr_output.tsv.sorted.gz"))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

dim(data.hex)

Expand All @@ -36,7 +37,8 @@ valid = data.hex[s > 0.8,]
#GLM Model
glm_time <- system.time(model.glm <- h2o.glm(x = 3:(ncol(train)), y = 6,
data = train, validation=valid, family = "binomial"))
paste("Time it took to build GLM ", glm_time[[1]])
print("Time it took to build DL ")
print(glm_time)
model.glm

pred = h2o.predict(model.glm, valid)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -53,6 +54,7 @@ myX = c("C20", "C21", "C22", "C23", "C24", "C25", "C26", "C27", "C28", "C29")
#glm_irlsm_time <- system.time(data_irlsm.glm <- h2o.glm(x = myX, y = myY, data = data.train, validation=data.valid, family = "gaussian", solver = "IRLSM"))
glm_time <- system.time(data.glm <- h2o.glm(x = myX, y = myY, data = data.train, family = "gaussian"))
data.glm
paste("Time it took to build GLM ", glm_time[[1]])
print("Time it took to build DL ")
print(glm_time)

PASS_BANNER()
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/15Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
rf_time <- system.time(mdl.rf <- h2o.randomForest(x=predictors, y=response,
data=data.hex, ntree=10, depth=5))
mdl.rf
paste("Time it took to build RF ", rf_time[[1]])
print("Time it took to build DL ")
print(rf_time)

PASS_BANNER()

6 changes: 4 additions & 2 deletions R/tests/testdir_hdfs_xlarge/runit_RF_1MRows_2.2KCols_xlarge.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ hdfs_data_file = "/datasets/1Mx2.2k.csv"

url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

response=1 #1:1000 imbalance
predictors=c(3:ncol(data.hex))
Expand All @@ -36,7 +37,8 @@ predictors=c(3:ncol(data.hex))
rf_time <- system.time(mdl.rf <- h2o.randomForest(x=predictors, y=response,
data=data.hex, n.tree=10, interaction.depth=5))
mdl.rf
paste("Time it took to build RF ", rf_time[[1]])
print("Time it took to build DL ")
print(rf_time)

PASS_BANNER()

Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ hdfs_data_file = "/datasets/airlinesbillion.csv"
heading("Testing single file importHDFS")
url <- sprintf("hdfs://%s%s", hdfs_name_node, hdfs_data_file)
parse_time <- system.time(data.hex <- h2o.importFile(conn, url))
paste("Time it took to parse", parse_time[[1]])
print("Time it took to parse")
print(parse_time)

data1.hex <- data.hex

Expand All @@ -53,6 +54,7 @@ rf_time <- system.time(data1.rf <- h2o.randomForest(x = myX, y = myY,
data = data.train, validation=data.valid, ntree = 10, depth = 5,
type = "BigData"))
data1.rf
paste("Time it took to build RF ", rf_time[[1]])
print("Time it took to build DL ")
print(rf_time)

PASS_BANNER()

0 comments on commit af5aa40

Please sign in to comment.