Skip to content

Commit

Permalink
Export and document select helpers.
Browse files Browse the repository at this point in the history
Also includes new strategy for passing current variable information along, in a way that's more extensible. Fixes tidyverse#1410.
  • Loading branch information
hadley committed Mar 7, 2016
1 parent 1e4a97d commit 2151d8b
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 64 deletions.
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ export(collect)
export(combine)
export(compare_tbls)
export(compute)
export(contains)
export(copy_lahman)
export(copy_nycflights13)
export(copy_to)
Expand All @@ -332,6 +333,7 @@ export(cumall)
export(cumany)
export(cume_dist)
export(cummean)
export(current_vars)
export(data_frame)
export(data_frame_)
export(db_analyze)
Expand All @@ -356,8 +358,10 @@ export(distinct)
export(distinct_)
export(do)
export(do_)
export(ends_with)
export(escape)
export(eval_tbls)
export(everything)
export(explain)
export(failwith)
export(filter)
Expand Down Expand Up @@ -402,6 +406,7 @@ export(location)
export(lst)
export(lst_)
export(make_tbl)
export(matches)
export(min_rank)
export(mutate)
export(mutate_)
Expand All @@ -414,8 +419,10 @@ export(n_groups)
export(near)
export(nth)
export(ntile)
export(num_range)
export(nycflights13_postgres)
export(nycflights13_sqlite)
export(one_of)
export(order_by)
export(partial_eval)
export(percent_rank)
Expand Down Expand Up @@ -469,6 +476,7 @@ export(src_sql)
export(src_sqlite)
export(src_tbls)
export(src_translate_env)
export(starts_with)
export(summarise)
export(summarise_)
export(summarise_each)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dplyr 0.4.3.9000

* The `select()` helpers are now exported and have their own documentation
(#1410).

* `one_of()` gives a useful error message if variables names are not found
in data frame (#1407).

Expand Down
17 changes: 1 addition & 16 deletions R/manip.r
Original file line number Diff line number Diff line change
Expand Up @@ -214,22 +214,7 @@ arrange_ <- function(.data, ..., .dots) {
#' As well as using existing functions like \code{:} and \code{c}, there are
#' a number of special functions that only work inside \code{select}
#'
#' \itemize{
#' \item \code{starts_with(x, ignore.case = TRUE)}:
#' names starts with \code{x}
#' \item \code{ends_with(x, ignore.case = TRUE)}:
#' names ends in \code{x}
#' \item \code{contains(x, ignore.case = TRUE)}:
#' selects all variables whose name contains \code{x}
#' \item \code{matches(x, ignore.case = TRUE)}:
#' selects all variables whose name matches the regular expression \code{x}
#' \item \code{num_range("x", 1:5, width = 2)}:
#' selects all variables (numerically) from x01 to x05.
#' \item \code{one_of("x", "y", "z")}:
#' selects variables provided in a character vector.
#' \item \code{everything()}:
#' selects all variables.
#' }

#'
#' To drop variables, use \code{-}. You can rename variables with
#' named arguments.
Expand Down
80 changes: 73 additions & 7 deletions R/select-utils.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,53 @@
starts_with <- function(vars, match, ignore.case = TRUE) {
#' Select helpers
#'
#' These functions allow you to select variables based on their names.
#' \itemize{
#' \item \code{starts_with()}: starts with a prefix
#' \item \code{ends_with()}: ends with a prefix
#' \item \code{contains()}: contains a literal string
#' \item \code{matches()}: matches a regular expression
#' \item \code{num_range()}: a numerical range like x01, x02, x03.
#' \item \code{one_of()}: varables in character vector.
#' \item \code{everything()}: all variables.
#' }
#'
#' @param match A string.
#' @param ignore.case If \code{TRUE}, the default, ignores case when matching
#' names.
#' @param vars A character vector of variable names. When called from inside
#' \code{\link{select}()} these are automatically set to the names of the
#' table.
#' @name select_helpers
#' @return An integer vector given the position of the matched variables.
#' @examples
#' iris <- tbl_df(iris) # so it prints a little nicer
#' select(iris, starts_with("Petal"))
#' select(iris, ends_with("Width"))
#' select(iris, contains("etal"))
#' select(iris, matches(".t."))
#' select(iris, Petal.Length, Petal.Width)
#' select(iris, everything())
#' vars <- c("Petal.Length", "Petal.Width")
#' select(iris, one_of(vars))
NULL

cur_vars_env <- new.env()

set_current_vars <- function(x) {
stopifnot(is.character(x))
cur_vars_env$selected <- x
}
reset_current_vars <- function() {
set_current_vars(character())
}

#' @export
#' @rdname select_helpers
current_vars <- function() cur_vars_env$selected

#' @export
#' @rdname select_helpers
starts_with <- function(match, ignore.case = TRUE, vars = current_vars()) {
stopifnot(is.string(match), !is.na(match), nchar(match) > 0)

if (ignore.case) match <- tolower(match)
Expand All @@ -8,7 +57,9 @@ starts_with <- function(vars, match, ignore.case = TRUE) {
which(substr(vars, 1, n) == match)
}

ends_with <- function(vars, match, ignore.case = TRUE) {
#' @export
#' @rdname select_helpers
ends_with <- function(match, ignore.case = TRUE, vars = current_vars()) {
stopifnot(is.string(match), !is.na(match), nchar(match) > 0)

if (ignore.case) match <- tolower(match)
Expand All @@ -20,7 +71,9 @@ ends_with <- function(vars, match, ignore.case = TRUE) {
which(substr(vars, pmax(1, length - n + 1), length) == match)
}

contains <- function(vars, match, ignore.case = TRUE) {
#' @export
#' @rdname select_helpers
contains <- function(match, ignore.case = TRUE, vars = current_vars()) {
stopifnot(is.string(match), nchar(match) > 0)

if (ignore.case) {
Expand All @@ -30,20 +83,31 @@ contains <- function(vars, match, ignore.case = TRUE) {
grep(match, vars, fixed = TRUE)
}

matches <- function(vars, match, ignore.case = TRUE) {
#' @export
#' @rdname select_helpers
matches <- function(match, ignore.case = TRUE, vars = current_vars()) {
stopifnot(is.string(match), nchar(match) > 0)

grep(match, vars, ignore.case = ignore.case)
}

num_range <- function(vars, prefix, range, width = NULL) {
#' @export
#' @rdname select_helpers
#' @param prefix A prefix that starts the numeric range.
#' @param range A sequence of integers, like \code{1:5}
#' @param width Optionally, the "width" of the numeric range. For example,
#' a range of 2 gives "01", a range of three "001", etc.
num_range <- function(prefix, range, width = NULL, vars = current_vars()) {
if (!is.null(width)) {
range <- sprintf(paste0("%0", width, "d"), range)
}
match(paste0(prefix, range), vars)
}

one_of <- function(vars, ...) {
#' @export
#' @rdname select_helpers
#' @param ... One or more character vectors.
one_of <- function(..., vars = current_vars()) {
keep <- c(...)

if (!is.character(keep)) {
Expand All @@ -58,6 +122,8 @@ one_of <- function(vars, ...) {
match(keep, vars)
}

everything <- function(vars) {
#' @export
#' @rdname select_helpers
everything <- function(vars = current_vars()) {
seq_along(vars)
}
21 changes: 6 additions & 15 deletions R/select-vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,15 @@ select_vars_ <- function(vars, args, include = character(), exclude = character(
return(setNames(vars, vars))
}

args <- lazyeval::as.lazy_dots(args)
# Set current_vars so avaialble to select_helpers
set_current_vars(vars)
on.exit(reset_current_vars(), add = TRUE)

# No non-standard evaluation - but all names mapped to their position.
# Keep integer semantics: include = +, exclude = -
# Map variable names to their positions: this keeps integer semantics
args <- lazyeval::as.lazy_dots(args)
names_list <- setNames(as.list(seq_along(vars)), vars)

select_funs <- list(
starts_with = function(...) starts_with(vars, ...),
ends_with = function(...) ends_with(vars, ...),
contains = function(...) contains(vars, ...),
matches = function(...) matches(vars, ...),
num_range = function(...) num_range(vars, ...),
one_of = function(...) one_of(vars, ...),
everything = function(...) everything(vars, ...)
)

ind_list <- lazyeval::lazy_eval(args, c(names_list, select_funs))
ind_list <- lazyeval::lazy_eval(args, names_list)
names(ind_list) <- names2(args)

is_numeric <- vapply(ind_list, is.numeric, logical(1))
Expand All @@ -94,7 +86,6 @@ select_vars_ <- function(vars, args, include = character(), exclude = character(
sel <- c(setdiff2(include, sel), sel)
sel <- setdiff2(sel, exclude)


# Ensure all output vars named
if (length(sel) == 0) {
names(sel) <- sel
Expand Down
16 changes: 0 additions & 16 deletions man/select.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 76 additions & 0 deletions man/select_helpers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/test-select-helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
context("select-helpers")
# one_of ------------------------------------------------------------------

test_that("one_of gives useful error", {
vars <- c("x", "y")

expect_error(select(df, one_of("z", vars = vars)), "Unknown variables: `z`")
expect_error(select(df, one_of("x", vars = vars)), "Unknown variables: `z`")
})

10 changes: 0 additions & 10 deletions tests/testthat/test-select.r
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,3 @@ test_that("invalid inputs raise error", {
expect_error(combine_vars(names(mtcars), list(c(-1, 1))), "positive or negative")
expect_error(combine_vars(names(mtcars), list(12)), "must be between")
})

# one_of ------------------------------------------------------------------

test_that("one_of gives useful error", {
df <- data_frame(x = 1, y = 1)

expect_error(select(df, one_of("z")), "Unknown variables: `z`")
expect_error(select(df, one_of("x", "z")), "Unknown variables: `z`")
})

0 comments on commit 2151d8b

Please sign in to comment.