Skip to content

Commit

Permalink
Merge pull request h2oai#17 from earlh/elh
Browse files Browse the repository at this point in the history
R: pub-215 passes
  • Loading branch information
anqif committed Mar 14, 2014
2 parents b3548a4 + d59ba91 commit 4989205
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 60 deletions.
59 changes: 0 additions & 59 deletions R/tests/testdir_jira/runit_NOPASS_v_11_datemanipulation.R

This file was deleted.

23 changes: 23 additions & 0 deletions R/tests/testdir_jira/runit_pub_180_ddply.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,20 @@ ddplytest <- function(conn){
h2o.addFunction(conn, fn4)
df.h.4 <- ddply(df.h, .(colgroup, colgroup2), fn4)

Log.info('testing all column address modes')
df.h.4b <- ddply(df.h, c('colgroup', 'colgroup2'), fn4)
df.h.4c <- ddply(df.h, 1:2, fn4)
df.h.4d <- ddply(df.h, h2o..('colgroup', 'colgroup2'), fn4)


Log.info('pulling data locally')
df.1 <- as.data.frame( df.h.1 )
df.2 <- as.data.frame( df.h.2 )
df.3 <- as.data.frame( df.h.3 )
df.4 <- as.data.frame( df.h.4 )
df.4b <- as.data.frame( df.h.4b )
df.4c <- as.data.frame( df.h.4c )
df.4d <- as.data.frame( df.h.4d )

Log.info('avoid factor issues by making grouping columns into character')
df.1$colgroup <- as.character(df.1$colgroup)
Expand All @@ -49,13 +58,22 @@ ddplytest <- function(conn){
df.2$colgroup2 <- as.character(df.2$colgroup2)
df.4$colgroup2 <- as.character(df.4$colgroup2)

df.4b$colgroup <- as.character(df.4b$colgroup)
df.4b$colgroup2 <- as.character(df.4b$colgroup2)
df.4c$colgroup <- as.character(df.4c$colgroup)
df.4c$colgroup2 <- as.character(df.4c$colgroup2)
df.4d$colgroup <- as.character(df.4d$colgroup)
df.4d$colgroup2 <- as.character(df.4d$colgroup2)


# h2o doesnt sort
df.1 <- df.1[order(df.1$colgroup), ]
df.2 <- df.2[order(df.2$colgroup, df.2$colgroup2), ]
df.3 <- df.3[order(df.3$colgroup), ]
df.4 <- df.4[order(df.4$colgroup, df.4$colgroup2), ]
df.4b <- df.4b[order(df.4b$colgroup, df.4b$colgroup2), ]
df.4c <- df.4c[order(df.4c$colgroup, df.4c$colgroup2), ]
df.4d <- df.4d[order(df.4d$colgroup, df.4d$colgroup2), ]

Log.info('testing')
expect_that( dim(df.1), equals( c(3,2) ) )
Expand All @@ -79,6 +97,11 @@ ddplytest <- function(conn){
expect_that(df.4[,2], equals(paste('group', c(1,1,3,1,2), sep='')) )
expect_that(df.4[,3], equals(c(3,7,18,11,11)) )

# column addressing options
expect_that( all(df.4b == df.4), equals(T))
expect_that( all(df.4c == df.4), equals(T))
expect_that( all(df.4d == df.4), equals(T))


testEnd()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dash_filename_test <- function(conn){
Log.info('printing from h2o')
Log.info( head(df.h) )

res <- as.data.frame(table(df.h$l>0))
res <- as.data.frame(h2o.table(df.h$l>0))

testEnd()
}
Expand Down
76 changes: 76 additions & 0 deletions R/tests/testdir_jira/runit_v_11_datemanipulation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#
# date parsing and field extraction tests
#


setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../findNSourceUtils.R')



datetest <- function(conn){

Log.info('uploading date testing dataset')
hdf <- h2o.importFile(conn, locate('smalldata/jira/v-11.csv'))
# df should be 5 columns: ds1:5

Log.info('data as loaded into h2o:')
Log.info(head(hdf))

# NB: columns 1,5 are currently unsupported as date types
# that is, h2o cannot understand:
# 1 integer days since epoch (or since any other date);
# 2 dates formatted as %d/%m/%y (in strptime format strings)

Log.info('adding date columns')
# NB: h2o automagically recognizes and if it doesn't recognize, you're out of luck

Log.info('extracting year and month from posix date objects')
hdf$year2 <- year(hdf$ds2)
hdf$year3 <- year(hdf$ds3)
hdf$year4 <- year(hdf$ds4)
hdf$mon2 <- month(hdf$ds2)
hdf$mon3 <- month(hdf$ds3)
hdf$mon4 <- month(hdf$ds4)
hdf$idx2 <- year(hdf$ds2) * 12 + month(hdf$ds2)
hdf$idx3 <- year(hdf$ds3) * 12 + month(hdf$ds3)
hdf$idx4 <- year(hdf$ds4) * 12 + month(hdf$ds4)

cc <- colnames(hdf)
nn <- c( paste('year', 2:4, sep=''), paste('month', 2:4, sep=''), paste('idx', 2:4, sep='') )
cc[ (length(cc) - length(nn) + 1):length(cc) ] <- nn
colnames(hdf) <- cc

Log.info('pulling year/month indices local')
ldf <- as.data.frame( hdf )

# build the truth using R internal date fns
rdf <- read.csv(locate('smalldata/jira/v-11.csv'))
rdf$days1 <- as.Date(rdf$ds1, origin='1970-01-01')
rdf$days2 <- as.Date(rdf$ds2, format='%Y-%m-%d')
rdf$days3 <- as.Date(rdf$ds3, format='%d-%b-%y')
rdf$days4 <- as.Date(rdf$ds4, format='%d-%B-%Y')
rdf$days5 <- as.Date(rdf$ds5, format='%d/%m/%y')

months <- data.frame(lapply(rdf[,6:10], function(x) as.POSIXlt(x)$mon))
years <- data.frame(lapply(rdf[,6:10], function(x) as.POSIXlt(x)$year))
idx <- 12*years + months

Log.info('testing correctness')
expect_that( ldf$year2, equals(years[,2]) )
expect_that( ldf$year3, equals(years[,3]) )
expect_that( ldf$year4, equals(years[,4]) )

expect_that( ldf$month2, equals(months[,2]) )
expect_that( ldf$month3, equals(months[,3]) )
expect_that( ldf$month4, equals(months[,4]) )

expect_that( ldf$idx2, equals(idx[,2]) )
expect_that( ldf$idx3, equals(idx[,3]) )
expect_that( ldf$idx4, equals(idx[,4]) )

testEnd()
}


doTest('date testing', datetest)
35 changes: 35 additions & 0 deletions R/tests/testdir_munging/slice/runit_[[.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
##
##

setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../../findNSourceUtils.R')

test.columndereference <- function(conn) {
Log.info('test column dereference')

hdf <- h2o.uploadFile.FV(conn, locate('smalldata/jira/pub-180.csv'))
otherhdf <- h2o.uploadFile.FV(conn, locate('smalldata/jira/v-11.csv'))

column <- 'colgroup2'

# get a single column out
expect_that( dim(hdf[['colgroup2']]), equals(c(12,1)) )
expect_that( dim(hdf[[column]]), equals(c(12,1)) )

# NULL if column name doesn't exist
expect_that( hdf[['col2group2']], equals(NULL))

# we can overwrite a column
hdf[['colgroup2']] <- hdf[['col2']]
ldf <- as.data.frame( hdf[[ column ]] )[,1]
expect_that(ldf, equals(c(2,4,6,11,3,4,6,11,2,4,6,11)) )

# don't overwrite with the wrong number of rows
expect_that( hdf[[column]] <- otherhdf[['ds1']], throws_error())
expect_that( hdf[[column]] <- hdf[,2:3], throws_error() )

testEnd()
}

doTest("test column dereference and assignment", test.columndereference)

35 changes: 35 additions & 0 deletions R/tests/testdir_munging/slice/runit_cbind.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
##
##

setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f")))
source('../../findNSourceUtils.R')

test.cbind <- function(conn) {
Log.info('test cbind')

hdf <- h2o.uploadFile(conn, locate('../../../smalldata/jira/pub-180.csv'))
otherhdf <- h2o.uploadFile(conn, locate('../../../smalldata/jira/v-11.csv'))

##### WORKS #####
# cbind self to self
hdf2 <- cbind(hdf, hdf)
expect_that( dim(hdf2), equals(c(12, 8)) )

# cbind a sliced column to a sliced column
xx <- hdf[,1]
yy <- hdf[,2]
expect_that( dim(cbind(xx,yy)), equals(c(12,2)) )

# cbind unequal rows fails
expect_that(cbind(hdf, otherhdf), throws_error())


##### FAILS #####
# cbind a df to a slice
#expect_that( dim(cbind(hdf, hdf[,1])), equals(12,5) )

testEnd()
}

doTest("test cbind", test.cbind)

0 comments on commit 4989205

Please sign in to comment.