Skip to content

Commit

Permalink
[SPARK-20543][SPARKR] skip tests when running on CRAN
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

General rule on skip or not:
skip if
- RDD tests
- tests could run long or complicated (streaming, hivecontext)
- tests on error conditions
- tests won't likely change/break

## How was this patch tested?

unit tests, `R CMD check --as-cran`, `R CMD check`

Author: Felix Cheung <[email protected]>

Closes apache#17817 from felixcheung/rskiptest.
  • Loading branch information
felixcheung authored and Felix Cheung committed May 4, 2017
1 parent 02bbe73 commit fc472bd
Show file tree
Hide file tree
Showing 20 changed files with 307 additions and 4 deletions.
6 changes: 6 additions & 0 deletions R/pkg/inst/tests/testthat/test_Serde.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ context("SerDe functionality")
sparkSession <- sparkR.session(enableHiveSupport = FALSE)

test_that("SerDe of primitive types", {
skip_on_cran()

x <- callJStatic("SparkRHandler", "echo", 1L)
expect_equal(x, 1L)
expect_equal(class(x), "integer")
Expand All @@ -38,6 +40,8 @@ test_that("SerDe of primitive types", {
})

test_that("SerDe of list of primitive types", {
skip_on_cran()

x <- list(1L, 2L, 3L)
y <- callJStatic("SparkRHandler", "echo", x)
expect_equal(x, y)
Expand Down Expand Up @@ -65,6 +69,8 @@ test_that("SerDe of list of primitive types", {
})

test_that("SerDe of list of lists", {
skip_on_cran()

x <- list(list(1L, 2L, 3L), list(1, 2, 3),
list(TRUE, FALSE), list("a", "b", "c"))
y <- callJStatic("SparkRHandler", "echo", x)
Expand Down
2 changes: 2 additions & 0 deletions R/pkg/inst/tests/testthat/test_Windows.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
context("Windows-specific tests")

test_that("sparkJars tag in SparkContext", {
skip_on_cran()

if (.Platform$OS.type != "windows") {
skip("This test is only for Windows, skipped")
}
Expand Down
8 changes: 8 additions & 0 deletions R/pkg/inst/tests/testthat/test_binaryFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
mockFile <- c("Spark is pretty.", "Spark is awesome.")

test_that("saveAsObjectFile()/objectFile() following textFile() works", {
skip_on_cran()

fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
writeLines(mockFile, fileName1)
Expand All @@ -38,6 +40,8 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
})

test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
skip_on_cran()

fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")

l <- list(1, 2, 3)
Expand All @@ -50,6 +54,8 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
})

test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
skip_on_cran()

fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
writeLines(mockFile, fileName1)
Expand All @@ -74,6 +80,8 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
})

test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
skip_on_cran()

fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")

Expand Down
6 changes: 6 additions & 0 deletions R/pkg/inst/tests/testthat/test_binary_function.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ rdd <- parallelize(sc, nums, 2L)
mockFile <- c("Spark is pretty.", "Spark is awesome.")

test_that("union on two RDDs", {
skip_on_cran()

actual <- collectRDD(unionRDD(rdd, rdd))
expect_equal(actual, as.list(rep(nums, 2)))

Expand All @@ -51,6 +53,8 @@ test_that("union on two RDDs", {
})

test_that("cogroup on two RDDs", {
skip_on_cran()

rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
Expand All @@ -69,6 +73,8 @@ test_that("cogroup on two RDDs", {
})

test_that("zipPartitions() on RDDs", {
skip_on_cran()

rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2
rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4
rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/inst/tests/testthat/test_broadcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ nums <- 1:2
rrdd <- parallelize(sc, nums, 2L)

test_that("using broadcast variable", {
skip_on_cran()

randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
randomMatBr <- broadcast(sc, randomMat)

Expand All @@ -38,6 +40,8 @@ test_that("using broadcast variable", {
})

test_that("without using broadcast variable", {
skip_on_cran()

randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))

useBroadcast <- function(x) {
Expand Down
8 changes: 8 additions & 0 deletions R/pkg/inst/tests/testthat/test_client.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
context("functions in client.R")

test_that("adding spark-testing-base as a package works", {
skip_on_cran()

args <- generateSparkSubmitArgs("", "", "", "",
"holdenk:spark-testing-base:1.3.0_0.0.5")
expect_equal(gsub("[[:space:]]", "", args),
Expand All @@ -26,16 +28,22 @@ test_that("adding spark-testing-base as a package works", {
})

test_that("no package specified doesn't add packages flag", {
skip_on_cran()

args <- generateSparkSubmitArgs("", "", "", "", "")
expect_equal(gsub("[[:space:]]", "", args),
"")
})

test_that("multiple packages don't produce a warning", {
skip_on_cran()

expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
})

test_that("sparkJars sparkPackages as character vectors", {
skip_on_cran()

args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
c("com.databricks:spark-avro_2.10:2.0.1"))
expect_match(args, "--jars one.jar,two.jar,three.jar")
Expand Down
16 changes: 16 additions & 0 deletions R/pkg/inst/tests/testthat/test_context.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
context("test functions in sparkR.R")

test_that("Check masked functions", {
skip_on_cran()

# Check that we are not masking any new function from base, stats, testthat unexpectedly
# NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it
# hard for users to use base R functions. Please check when in doubt.
Expand Down Expand Up @@ -55,6 +57,8 @@ test_that("Check masked functions", {
})

test_that("repeatedly starting and stopping SparkR", {
skip_on_cran()

for (i in 1:4) {
sc <- suppressWarnings(sparkR.init())
rdd <- parallelize(sc, 1:20, 2L)
Expand All @@ -73,6 +77,8 @@ test_that("repeatedly starting and stopping SparkSession", {
})

test_that("rdd GC across sparkR.stop", {
skip_on_cran()

sc <- sparkR.sparkContext() # sc should get id 0
rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
Expand All @@ -96,6 +102,8 @@ test_that("rdd GC across sparkR.stop", {
})

test_that("job group functions can be called", {
skip_on_cran()

sc <- sparkR.sparkContext()
setJobGroup("groupId", "job description", TRUE)
cancelJobGroup("groupId")
Expand All @@ -108,12 +116,16 @@ test_that("job group functions can be called", {
})

test_that("utility function can be called", {
skip_on_cran()

sparkR.sparkContext()
setLogLevel("ERROR")
sparkR.session.stop()
})

test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
skip_on_cran()

e <- new.env()
e[["spark.driver.memory"]] <- "512m"
ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
Expand Down Expand Up @@ -141,6 +153,8 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli
})

test_that("sparkJars sparkPackages as comma-separated strings", {
skip_on_cran()

expect_warning(processSparkJars(" a, b "))
jars <- suppressWarnings(processSparkJars(" a, b "))
expect_equal(lapply(jars, basename), list("a", "b"))
Expand Down Expand Up @@ -168,6 +182,8 @@ test_that("spark.lapply should perform simple transforms", {
})

test_that("add and get file to be downloaded with Spark job on every node", {
skip_on_cran()

sparkR.sparkContext()
# Test add file.
path <- tempfile(pattern = "hello", fileext = ".txt")
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/inst/tests/testthat/test_includePackage.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ nums <- 1:2
rdd <- parallelize(sc, nums, 2L)

test_that("include inside function", {
skip_on_cran()

# Only run the test if plyr is installed.
if ("plyr" %in% rownames(installed.packages())) {
suppressPackageStartupMessages(library(plyr))
Expand All @@ -42,6 +44,8 @@ test_that("include inside function", {
})

test_that("use include package", {
skip_on_cran()

# Only run the test if plyr is installed.
if ("plyr" %in% rownames(installed.packages())) {
suppressPackageStartupMessages(library(plyr))
Expand Down
4 changes: 4 additions & 0 deletions R/pkg/inst/tests/testthat/test_mllib_clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ test_that("spark.lda with libsvm", {
})

test_that("spark.lda with text input", {
skip_on_cran()

text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
model <- spark.lda(text, optimizer = "online", features = "value")

Expand Down Expand Up @@ -297,6 +299,8 @@ test_that("spark.lda with text input", {
})

test_that("spark.posterior and spark.perplexity", {
skip_on_cran()

text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
model <- spark.lda(text, features = "value", k = 3)

Expand Down
12 changes: 12 additions & 0 deletions R/pkg/inst/tests/testthat/test_mllib_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ context("MLlib regression algorithms, except for tree-based algorithms")
sparkSession <- sparkR.session(enableHiveSupport = FALSE)

test_that("formula of spark.glm", {
skip_on_cran()

training <- suppressWarnings(createDataFrame(iris))
# directly calling the spark API
# dot minus and intercept vs native glm
Expand Down Expand Up @@ -195,6 +197,8 @@ test_that("spark.glm summary", {
})

test_that("spark.glm save/load", {
skip_on_cran()

training <- suppressWarnings(createDataFrame(iris))
m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
s <- summary(m)
Expand Down Expand Up @@ -222,6 +226,8 @@ test_that("spark.glm save/load", {
})

test_that("formula of glm", {
skip_on_cran()

training <- suppressWarnings(createDataFrame(iris))
# dot minus and intercept vs native glm
model <- glm(Sepal_Width ~ . - Species + 0, data = training)
Expand All @@ -248,6 +254,8 @@ test_that("formula of glm", {
})

test_that("glm and predict", {
skip_on_cran()

training <- suppressWarnings(createDataFrame(iris))
# gaussian family
model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
Expand Down Expand Up @@ -292,6 +300,8 @@ test_that("glm and predict", {
})

test_that("glm summary", {
skip_on_cran()

# gaussian family
training <- suppressWarnings(createDataFrame(iris))
stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training))
Expand Down Expand Up @@ -341,6 +351,8 @@ test_that("glm summary", {
})

test_that("glm save/load", {
skip_on_cran()

training <- suppressWarnings(createDataFrame(iris))
m <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
s <- summary(m)
Expand Down
8 changes: 8 additions & 0 deletions R/pkg/inst/tests/testthat/test_parallelize_collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
# Tests

test_that("parallelize() on simple vectors and lists returns an RDD", {
skip_on_cran()

numVectorRDD <- parallelize(jsc, numVector, 1)
numVectorRDD2 <- parallelize(jsc, numVector, 10)
numListRDD <- parallelize(jsc, numList, 1)
Expand Down Expand Up @@ -66,6 +68,8 @@ test_that("parallelize() on simple vectors and lists returns an RDD", {
})

test_that("collect(), following a parallelize(), gives back the original collections", {
skip_on_cran()

numVectorRDD <- parallelize(jsc, numVector, 10)
expect_equal(collectRDD(numVectorRDD), as.list(numVector))

Expand All @@ -86,6 +90,8 @@ test_that("collect(), following a parallelize(), gives back the original collect
})

test_that("regression: collect() following a parallelize() does not drop elements", {
skip_on_cran()

# 10 %/% 6 = 1, ceiling(10 / 6) = 2
collLen <- 10
numPart <- 6
Expand All @@ -95,6 +101,8 @@ test_that("regression: collect() following a parallelize() does not drop element
})

test_that("parallelize() and collect() work for lists of pairs (pairwise data)", {
skip_on_cran()

# use the pairwise logical to indicate pairwise data
numPairsRDDD1 <- parallelize(jsc, numPairs, 1)
numPairsRDDD2 <- parallelize(jsc, numPairs, 2)
Expand Down
Loading

0 comments on commit fc472bd

Please sign in to comment.