Skip to content

Commit

Permalink
[SPARK-24186][R][SQL] change reverse and concat to collection functio…
Browse files Browse the repository at this point in the history
…ns in R

## What changes were proposed in this pull request?

reverse and concat are already in functions.R as column string functions. Since now these two functions are categorized as collection functions in scala and python, we will do the same in R.

## How was this patch tested?

Add test in test_sparkSQL.R

Author: Huaxin Gao <[email protected]>

Closes apache#21307 from huaxingao/spark_24186.
  • Loading branch information
huaxingao authored and HyukjinKwon committed May 14, 2018
1 parent 2fa3364 commit 3f0e801
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 21 deletions.
35 changes: 18 additions & 17 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ NULL
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
#' head(select(tmp, flatten(tmp$v1)))
#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
#' head(tmp2)
#' head(select(tmp, posexplode(tmp$v1)))
Expand All @@ -218,7 +218,10 @@ NULL
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
#' head(select(tmp3, map_keys(tmp3$v3)))
#' head(select(tmp3, map_values(tmp3$v3)))
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))}
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$hp))
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5)))
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
NULL

#' Window functions for Column operations
Expand Down Expand Up @@ -1260,9 +1263,9 @@ setMethod("quarter",
})

#' @details
#' \code{reverse}: Reverses the string column and returns it as a new string column.
#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
#'
#' @rdname column_string_functions
#' @rdname column_collection_functions
#' @aliases reverse reverse,Column-method
#' @note reverse since 1.5.0
setMethod("reverse",
Expand Down Expand Up @@ -2055,20 +2058,10 @@ setMethod("countDistinct",

#' @details
#' \code{concat}: Concatenates multiple input columns together into a single column.
#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
#' The function works with strings, binary and compatible array columns.
#'
#' @rdname column_string_functions
#' @rdname column_collection_functions
#' @aliases concat concat,Column-method
#' @examples
#'
#' \dontrun{
#' # concatenate strings
#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
#' s2 = concat(df$Class, df$Sex, df$Age),
#' s3 = concat(df$Class, df$Sex, df$Age, df$Class),
#' s4 = concat_ws("_", df$Class, df$Sex),
#' s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
#' head(tmp)}
#' @note concat since 1.5.0
setMethod("concat",
signature(x = "Column"),
Expand Down Expand Up @@ -2409,6 +2402,13 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
#' @param sep separator to use.
#' @rdname column_string_functions
#' @aliases concat_ws concat_ws,character,Column-method
#' @examples
#'
#' \dontrun{
#' # concatenate strings
#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
#' s2 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
#' head(tmp)}
#' @note concat_ws since 1.5.0
setMethod("concat_ws", signature(sep = "character", x = "Column"),
function(sep, x, ...) {
Expand Down Expand Up @@ -3063,7 +3063,8 @@ setMethod("array_sort",
})

#' @details
#' \code{flatten}: Transforms an array of arrays into a single array.
#' \code{flatten}: Creates a single array from an array of arrays.
#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
#'
#' @rdname column_collection_functions
#' @aliases flatten flatten,Column-method
Expand Down
4 changes: 2 additions & 2 deletions R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -817,7 +817,7 @@ setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
#' @rdname column
setGeneric("column", function(x) { standardGeneric("column") })

#' @rdname column_string_functions
#' @rdname column_collection_functions
#' @name NULL
setGeneric("concat", function(x, ...) { standardGeneric("concat") })

Expand Down Expand Up @@ -1134,7 +1134,7 @@ setGeneric("regexp_replace",
#' @name NULL
setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })

#' @rdname column_string_functions
#' @rdname column_collection_functions
#' @name NULL
setGeneric("reverse", function(x) { standardGeneric("reverse") })

Expand Down
17 changes: 15 additions & 2 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1479,7 +1479,7 @@ test_that("column functions", {
df5 <- createDataFrame(list(list(a = "010101")))
expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")

# Test array_contains(), array_max(), array_min(), array_position() and element_at()
# Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
expect_equal(result, c(TRUE, FALSE))
Expand All @@ -1496,6 +1496,13 @@ test_that("column functions", {
result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
expect_equal(result, c(1, 6))

result <- collect(select(df, reverse(df[[1]])))[[1]]
expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))

df2 <- createDataFrame(list(list("abc")))
result <- collect(select(df2, reverse(df2[[1]])))[[1]]
expect_equal(result, "cba")

# Test array_sort() and sort_array()
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))

Expand All @@ -1512,7 +1519,13 @@ test_that("column functions", {
result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
expect_equal(result, list(list(2L, 3L), list(5L)))

# Test flattern
# Test concat()
df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))

# Test flatten()
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
list(list(list(5L, 6L), list(7L, 8L)))))
result <- collect(select(df, flatten(df[[1]])))[[1]]
Expand Down

0 comments on commit 3f0e801

Please sign in to comment.