Skip to content

Commit

Permalink
Code for demos from H2O in R meetup. Must have latest R package (git …
Browse files Browse the repository at this point in the history
…pull, make) to run the data munging part!
  • Loading branch information
anqif committed Aug 21, 2013
1 parent 36b7671 commit 2c4a8a5
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 0 deletions.
29 changes: 29 additions & 0 deletions R/examples/H2OMeetupDemo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library(h2o)
localH2O = new("H2OClient")
h2o.checkClient(localH2O)

# For hands-on audience participation
# H2O Import, Summary, GLM and K-Means on prostate cancer data set
# prostate.hex = h2o.importFile(localH2O, path = "../../smalldata/logreg/prostate.hex", key = "prostate.hex")
prostate.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv", key = "prostate.hex")
summary(prostate.hex)
prostate.glm = h2o.glm(y = "CAPSULE", x = c("AGE","RACE","PSA","GLEASON"), data = prostate.hex, family = "binomial", nfolds = 10, alpha = 0.5)
print(prostate.glm)
prostate.km = h2o.kmeans(data = prostate.hex, centers = 5, cols = c("AGE","RACE","GLEASON","CAPSULE","PSA"))
print(prostate.km)

# Still in Beta! H2O Data Munging on prostate cancer data set
head(prostate.hex, n = 10)
tail(prostate.hex)
summary(prostate.hex$AGE)
summary(prostate.hex[prostate.hex$AGE > 67,])
prostate.small = as.data.frame(prostate.hex[1:200,])
glm(CAPSULE ~ AGE + RACE + DPROS + DCAPS, family = binomial, data = prostate.small)

# R Import, Summary, GLM and K-Means on prostate cancer data set
prostate.data = read.csv(url("https://raw.github.com/0xdata/h2o/master/smalldata/logreg/prostate.csv"), header = TRUE)
summary(prostate.data)
prostate.glm2 = glm(CAPSULE ~ AGE + RACE + PSA + GLEASON, family = binomial, data = prostate.data)
print(prostate.glm2)
prostate.km2 = kmeans(prostate.data[c("AGE","RACE","GLEASON","CAPSULE","PSA")], centers = 5)
print(prostate.km2)
29 changes: 29 additions & 0 deletions R/examples/H2OMeetupDemo2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
library(h2o)
localH2O = new("H2OClient")
h2o.checkClient(localH2O)

# For hands-off demo of H2O vs. R
# H2O Import, Summary and GLM of small airlines data set on local machine
airlines.hex = h2o.importURL(localH2O, path = "https://raw.github.com/0xdata/h2o/master/smalldata/airlines/allyears2k_headers.zip", key = "airlines.hex")
summary(airlines.hex)
x_ignore = c("IsArrDelayed", "ActualElapsedTime", "ArrDelay", "DepDelay", "Canceled", "Diverted", "IsDepDelayed")
myX = setdiff(colnames(airlines.hex), x_ignore)
airlines.glm = h2o.glm(y = "IsArrDelayed", x = myX, data = airlines.hex, family = "binomial", nfolds = 10, alpha = 0.5)
print(airlines.glm)

# For hands-on demo of running H2O remotely
# H2O Import, Summary and GLM of large airlines data set on remote machine
remoteH2O = new("H2OClient", ip="192.168.1.161", port=54329)
h2o.checkClient(remoteH2O)

airlines_big.hex = h2o.importFile(remoteH2O, path = "/home/earl/./oldairlines/airlines.orig.all.withheader.25.csv", key = "airlines_big.hex")
summary(airlines_big.hex)
airlines_big.glm = h2o.glm(y = "IsArrDelayed", x = myX, data = airlines_big.hex, family = "binomial", nfolds = 10, alpha = 0.5)
print(airlines_big.glm)

# Still in Beta! H2O Data Munging on large airlines data set
head(airlines_big.hex, n = 10)
tail(airlines_big.hex)
summary(airlines_big.hex$ArrDelay)
airlines_small.data = as.data.frame(airlines_big.hex[1:10000,])
glm(IsArrDelayed ~ Origin + Dest, family = binomial, data = airlines_small.data)

0 comments on commit 2c4a8a5

Please sign in to comment.