Skip to content

Commit

Permalink
ARROW-12869: [R] Bindings for utf8_reverse and ascii_reverse
Browse files Browse the repository at this point in the history
This adds tests of the ascii_reverse kernel and a binding and tests for the stri_reverse function which calls the utf8_reverse kernel

Closes apache#10589 from thisisnic/ARROW-12869_str_reverse

Lead-authored-by: Nic Crane <[email protected]>
Co-authored-by: Ian Cook <[email protected]>
Signed-off-by: Ian Cook <[email protected]>
  • Loading branch information
thisisnic and ianmcook committed Jun 24, 2021
1 parent 5275e72 commit c4a20e9
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 3 deletions.
1 change: 1 addition & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Suggests:
pkgload,
reticulate,
rmarkdown,
stringi,
stringr,
testthat,
tibble,
Expand Down
1 change: 1 addition & 0 deletions r/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"str_length" = "utf8_length",
"str_to_lower" = "utf8_lower",
"str_to_upper" = "utf8_upper",
"str_reverse" = "utf8_reverse",
# str_trim is defined in dplyr-functions.R
"year" = "year",
"isoyear" = "iso_year",
Expand Down
42 changes: 39 additions & 3 deletions r/tests/testthat/test-dplyr-string-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ skip_if_not_available("utf8proc")

library(dplyr)
library(stringr)
library(stringi)

test_that("paste, paste0, and str_c", {
df <- tibble(
Expand Down Expand Up @@ -712,7 +713,6 @@ test_that("strptime", {
tstamp,
check.tzone = FALSE
)

})

test_that("errors in strptime", {
Expand All @@ -725,6 +725,43 @@ test_that("errors in strptime", {
)
})

test_that("stri_reverse and arrow_ascii_reverse functions", {

df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))

df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))

expect_dplyr_equal(
input %>%
mutate(x = stri_reverse(x)) %>%
collect(),
df_utf8
)

expect_dplyr_equal(
input %>%
mutate(x = stri_reverse(x)) %>%
collect(),
df_ascii
)

expect_equivalent(
df_ascii %>%
Table$create() %>%
mutate(x = arrow_ascii_reverse(x)) %>%
collect(),
tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab"))
)

expect_error(
df_utf8 %>%
Table$create() %>%
mutate(x = arrow_ascii_reverse(x)) %>%
collect(),
"Invalid: Non-ASCII sequence in input"
)
})

test_that("str_like", {

df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
Expand Down Expand Up @@ -783,7 +820,6 @@ test_that("str_like", {
input %>%
mutate(x = str_like(x, "%baz%")) %>%
collect(),
df,
df
)

})

0 comments on commit c4a20e9

Please sign in to comment.