Skip to content

Commit

Permalink
Implement recode(), a vectorised switch().
Browse files Browse the repository at this point in the history
Includes extraction of replace_with() function that is also used in if_else() and coalesce().

Fixes tidyverse#1710.
  • Loading branch information
hadley committed Mar 14, 2016
1 parent b50e3ae commit ee097ad
Show file tree
Hide file tree
Showing 13 changed files with 287 additions and 59 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ Collate:
'progress.R'
'query.r'
'rank.R'
'recode.R'
'rowwise.r'
'sample.R'
'select-utils.R'
Expand All @@ -112,6 +113,7 @@ Collate:
'translate-sql.r'
'type-sum.r'
'utils-format.r'
'utils-replace-with.R'
'utils.r'
'view.r'
'zzz.r'
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ S3method(print,tbl_sql)
S3method(print,trunc_mat)
S3method(query,DBIConnection)
S3method(rbind,grouped_df)
S3method(recode,character)
S3method(recode,factor)
S3method(recode,numeric)
S3method(rename_,data.frame)
S3method(rename_,grouped_df)
S3method(rename_,tbl_cube)
Expand Down Expand Up @@ -384,6 +387,7 @@ export(progress_estimated)
export(query)
export(rbind_all)
export(rbind_list)
export(recode)
export(regroup)
export(rename)
export(rename_)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dplyr 0.4.3.9000

* `recode()` provides a vectorised equivalent to `switch()` (#1710).

* `if_else()` is a vectorisd if statement: it's a stricter (type-safe), faster,
and more predictable version of `ifelse()`. In SQL it is translated to a
`CASE` statement.
Expand Down
15 changes: 2 additions & 13 deletions R/coalesce.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,16 @@
#' @examples
#' # Use a single value to replace all missing values
#' x <- sample(c(1:5, NA, NA, NA))
#' coalesce(x, 0)
#' coalesce(x, 0L)
#'
#' # Or match together a complete vector from missing pieces
#' y <- c(1, 2, NA, NA, 5)
#' z <- c(NA, NA, 3, 4, 5)
#' coalesce(y, z)
coalesce <- function(x, ...) {
n <- length(x)

values <- list(...)
for (i in seq_along(values)) {
val <- values[[i]]
val_n <- length(val)

if (val_n == 1L) {
x[is.na(x)] <- val
} else if (val_n == n) {
x[is.na(x)] <- val[is.na(x)]
} else {
stop("Vector at position ", i, " is not length 1 or ", n, call. = FALSE)
}
x <- replace_with(x, is.na(x), values[[i]], paste0("Vector ", i))
}
x
}
46 changes: 9 additions & 37 deletions R/if_else.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,59 +11,31 @@
#' or length 1. They must also be the same type: \code{if_else} checks that
#' they have the same type and same class. All other attributes are
#' taken from \code{true}.
#' @param missing If not \code{NULL}, will be used to replace missing
#' values.
#' @return Where \code{condition} is \code{TRUE}, the matching value from
#' \code{true}, where it's \code{FALSE}, the matching value from \code{false},
#' otherwise \code{NA}.
#' @export
#' @examples
#' x <- -10:10
#' ifelse(x < 0, NA, x)
#' x <- c(-5:5, NA)
#' if_else(x < 0, NA_integer_, x)
#' if_else(x < 0, "negative", "positive", "missing")
#'
#' # Unlike ifelse, if_else preserves types
#' x <- factor(sample(letters[1:5], 10, replace = TRUE))
#' ifelse(x %in% c("a", "b", "c"), x, factor(NA))
#' if_else(x %in% c("a", "b", "c"), x, factor(NA))
#' # Attributes are taken from the `true` vector,
if_else <- function(condition, true, false) {
if_else <- function(condition, true, false, missing = NULL) {
if (!is.logical(condition)) {
stop("`condition` must be logical", call. = FALSE)
}
if (length(true) != length(condition) && length(true) != 1L) {
stop("`true` must be length one or the same length as `condition`", call. = FALSE)
}
if (length(false) != length(condition) && length(false) != 1L) {
stop("`false` must be length one or the same length as `condition`", call. = FALSE)
}
if (typeof(false) != typeof(true)) {
stop(
"`true` and `false` must be the same type ",
"(", typeof(false), " vs ", typeof(true), ")",
call. = FALSE
)
}
if (!identical(class(false), class(true))) {
stop(
"`true` and `false` must have same class ",
"(", paste(class(false), collapse = "/"), " vs ", paste(class(true), collapse = "/"), ")",
call. = FALSE
)
}

out <- true[rep(NA_integer_, length(condition))]

is_true <- condition & !is.na(condition)
if (length(true) == 1L) {
out[is_true] <- true
} else {
out[is_true] <- true[is_true]
}

is_false <- !condition & !is.na(condition)
if (length(false) == 1L) {
out[is_false] <- false
} else {
out[is_false] <- false[is_false]
}
out <- replace_with(out, condition & !is.na(condition), true, "`true`")
out <- replace_with(out, !condition & !is.na(condition), false, "`false`")
out <- replace_with(out, is.na(condition), missing, "`missing`")

out
}
115 changes: 115 additions & 0 deletions R/recode.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#' Recode values
#'
#' This is a vectorised version of \code{\link{switch}()}: you can replace
#' numeric values based on their position, and character values by their
#' name. This is an S3 generic: dplyr provides methods for numeric, character,
#' and factors. For logical vectors, use \code{\link{if_else}}
#'
#' @param x A vector to modify
#' @param ... Replacments. These should be named for character and factor
#' \code{x}. All replacements must be the same type, and must have either
#' length one or the same length as x.
#' @param default If supplied, all values not otherwise matched will be
#' given this value instead of \code{NA}. Must be either length 1 or the same
#' length as \code{x}.
#' @param missing If supplied, any missing values in \code{x} will be
#' replaced by this value. Must be either length 1 or the same length as
#' \code{x}.
#' @return A vector the same length as \code{x}, and the same type as the
#' first of \code{...}, \code{default}, or \code{missing}.
#' @export
#' @examples
#' x <- c(1:5, NA)
#' recode(x, "a", "b", "c")
#' recode(x, "a", "b", "c", default = "other")
#' recode(x, "a", "b", "c", default = "other", missing = "missing")
#'
#' # Use named arguments with a character vector
#' x <- sample(c("a", "b", "c"), 10, replace = TRUE)
#' recode(x, a = "Apple")
#' recode(x, a = "Apple", default = x)
recode <- function(x, ..., default = NULL, missing = NULL) {
UseMethod("recode")
}

#' @export
recode.numeric <- function(x, ..., default = NULL, missing = NULL) {
values <- list(...)
if (any(has_names(values))) {
warning("Names are ignored", call. = FALSE)
}

n <- length(x)
template <- find_template(..., default, missing)
out <- template[rep(NA_integer_, n)]
replaced <- rep(FALSE, n)

for (i in seq_along(values)) {
out <- replace_with(out, x == i, values[[i]], paste0("Vector ", i))
replaced[x == i] <- TRUE
}

out <- replace_with(out, !replaced & !is.na(x), default, "`default`")
out <- replace_with(out, is.na(x), missing, "`missing`")
out
}

#' @export
recode.character <- function(x, ..., default = NULL, missing = NULL) {
values <- list(...)
if (!all(has_names(values))) {
stop("All replacements must be named", call. = FALSE)
}

n <- length(x)
template <- find_template(..., default, missing)
out <- template[rep(NA_integer_, n)]
replaced <- rep(FALSE, n)

for (nm in names(values)) {
out <- replace_with(out, x == nm, values[[nm]], paste0("`", nm, "`"))
replaced[x == nm] <- TRUE
}

out <- replace_with(out, !replaced & !is.na(x), default, "`default`")
out <- replace_with(out, is.na(x), missing, "`missing`")
out
}

#' @export
recode.factor <- function(x, ..., default = NULL, missing = NULL) {
values <- list(...)
if (length(values) == 0) {
stop("No replacements provided", call. = FALSE)
}

if (!all(has_names(values))) {
stop("All replacements must be named", call. = FALSE)
}
if (!is.null(missing)) {
stop("`missing` is not supported for factors", call. = FALSE)
}

out <- rep(NA_character_, length(levels(x)))
replaced <- rep(FALSE, length(levels(x)))

for (nm in names(values)) {
out <- replace_with(out, levels(x) == nm, values[[nm]], paste0("`", nm, "`"))
replaced[levels(x) == nm] <- TRUE
}

out <- replace_with(out, !replaced, default, "`default`")
levels(x) <- out

x
}

find_template <- function(...) {
x <- compact(list(...))

if (length(x) == 0) {
stop("No replacements provided", call. = FALSE)
}

x[[1]]
}
55 changes: 55 additions & 0 deletions R/utils-replace-with.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

replace_with <- function(x, i, val, name) {
if (is.null(val)) {
return(x)
}

check_length(val, x, name)
check_type(val, x, name)
check_class(val, x, name)

if (length(val) == 1L) {
x[i] <- val
} else {
x[i] <- val[i]
}

x
}

check_length <- function(x, template, name = deparse(substitute(x))) {
n <- length(template)
if (length(x) == n) {
return()
}

if (length(x) == 1L) {
return()
}

stop(name, " is length ", length(x), " not 1 or ", n, ".", call. = FALSE)
}

check_type <- function(x, template, name = deparse(substitute(x))) {
if (identical(typeof(x), typeof(template))) {
return()
}

stop(
name, " has type '", typeof(x), "' not '", typeof(template), "'",
call. = FALSE
)
}

check_class <- function(x, template, name = deparse(substitute(x))) {
if (!is.object(x)) {
return()
}

if (identical(class(x), class(template))) {
return()
}

stop(name, " has class ", paste(class(x), collapse = "/"), " not ",
paste(class(template), collapse = "/"), call. = FALSE)
}
2 changes: 1 addition & 1 deletion man/coalesce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions man/if_else.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions man/recode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ee097ad

Please sign in to comment.