Skip to content

Commit

Permalink
Issue openml#140: add general tests
Browse files Browse the repository at this point in the history
  • Loading branch information
giuseppec committed Jan 17, 2016
1 parent d03ae95 commit 828e243
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 9 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Suggests:
Imports:
BBmisc (>= 1.9),
checkmate (>= 1.6.3),
ParamHelpers (>= 1.6),
data.table,
digest,
httr,
Expand Down
4 changes: 2 additions & 2 deletions R/listOMLTasks.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ listOMLTasks = function(verbosity = NULL, status = "active") {
# get the tag indices and paste them together as single column
tag.ind = names(strings) == "tag"
strings = c(strings[!tag.ind], "tags" = collapse(strings[tag.ind], sep = ", "))
out.vars = c("task_id", "task_type", "did", "status", "name", "tags",
out.vars = c("task_id", "task_type", "did", "status", "name", "target_feature", "tags",
"estimation_procedure", "evaluation_measures", names[names(names)%in%"quality"])
return(as.list(strings[out.vars]))
})
li = as.data.frame(rbindlist(info, fill = TRUE))
li = li[, !is.na(colnames(li))]
int.vars = setdiff(colnames(li), c("task_type", "status", "name", "tags", "evaluation_measures"))
int.vars = setdiff(colnames(li), c("task_type", "status", "name", "target_feature", "tags", "evaluation_measures"))
li[, int.vars] = lapply(int.vars, function(x) as.integer(li[, x]))

estproc = listOMLEstimationProcedures(verbosity = FALSE)
Expand Down
1 change: 1 addition & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#' @import BBmisc
#' @import checkmate
#' @import ParamHelpers
#' @import digest
#' @import httr
#' @import RCurl
Expand Down
8 changes: 5 additions & 3 deletions tests/testthat/test_base_listOMLDataSets.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ context("listOMLDataSets")
test_that("listOMLDataSets", {
dsl = listOMLDataSets()
expect_is(dsl, "data.frame")
expect_true(nrow(dsl) > 100L && ncol(dsl) == 10L)
expect_true(setequal(names(dsl), c("did", "status", "name", "NumberOfClasses", "NumberOfFeatures",
"NumberOfInstances", "NumberOfInstancesWithMissingValues", "NumberOfMissingValues",
expect_true(nrow(dsl) > 100L && ncol(dsl) == 14L)
expect_true(setequal(names(dsl), c("did", "status", "name", "MajorityClassSize",
"MaxNominalAttDistinctValues", "MinorityClassSize", "NumBinaryAtts",
"NumberOfClasses", "NumberOfFeatures", "NumberOfInstances",
"NumberOfInstancesWithMissingValues", "NumberOfMissingValues",
"NumberOfNumericFeatures", "NumberOfSymbolicFeatures")))
inds = which(names(dsl) %in% c("status", "name"))
expect_true(all(apply(dsl[, inds], 2, is.character)))
Expand Down
39 changes: 35 additions & 4 deletions tests/testthat/test_base_listOMLTasks.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
context("listOMLTasks")

test_that("listOMLTasks", {
exp.names = c("task_id", "task_type", "did", "status", "name", "tags",
"estimation_procedure", "evaluation_measures", "NumberOfClasses",
"NumberOfFeatures", "NumberOfInstances", "NumberOfInstancesWithMissingValues",
"NumberOfMissingValues", "NumberOfNumericFeatures", "NumberOfSymbolicFeatures"
exp.names = c("task_id", "task_type", "did", "status", "name", "target_feature", "tags",
"estimation_procedure", "evaluation_measures", "MajorityClassSize",
"MaxNominalAttDistinctValues", "MinorityClassSize", "NumBinaryAtts",
"NumberOfClasses", "NumberOfFeatures", "NumberOfInstances",
"NumberOfInstancesWithMissingValues", "NumberOfMissingValues",
"NumberOfNumericFeatures", "NumberOfSymbolicFeatures"
)

tasks = listOMLTasks()
expect_is(tasks, "data.frame")
expect_true(nrow(tasks) > 5L)
expect_true(isSuperset(colnames(tasks), exp.names))

# check if qualities are meaningful
# FIXME: code below must also work with na.rm = FALSE
na.rm = TRUE

# check number of classes
tasks2 = subset(tasks, NumberOfClasses == 2)
expect_true(all(rowSums(tasks2[, c("MinorityClassSize", "MajorityClassSize")]) == tasks2$NumberOfInstances))

tasksClass = subset(tasks, task_type == "Supervised Classification")
sumMinMajClass = rowSums(tasksClass[, c("MinorityClassSize", "MajorityClassSize")])
expect_true(all(sumMinMajClass <= tasksClass$NumberOfInstances, na.rm = na.rm))

# check features
expect_true(all(tasks$NumBinaryAtts <= tasks$NumberOfSymbolicFeatures, na.rm = na.rm))

sumNumSymFeat = rowSums(tasks[, c("NumberOfNumericFeatures", "NumberOfSymbolicFeatures")])
expect_true(all(tasks$NumberOfFeatures >= sumNumSymFeat))

# FIXME: sometimes NumFeat + SymFeat = AllFeat and sometimes NumFeat + SymFeat + 1 = AllFeat
# sumNumSymFeat = rowSums(tasks[, c("NumberOfNumericFeatures", "NumberOfSymbolicFeatures")]) + 1
# expect_true(all(tasks$NumberOfFeatures == sumNumSymFeat))
# cols = c("task_id", "NumberOfNumericFeatures", "NumberOfSymbolicFeatures", "NumberOfFeatures", "task_type")
# summary(tasks[(tasks$NumberOfFeatures != sumNumSymFeat), cols])
# summary(tasks[(tasks$NumberOfFeatures == sumNumSymFeat), cols])

# check missings
expect_true(all(tasks$NumberOfInstancesWithMissingValues <= tasks$NumberOfInstances))
expect_true(all(tasks$NumberOfMissingValues <= tasks$NumberOfInstances*tasks$NumberOfFeatures))
})

0 comments on commit 828e243

Please sign in to comment.