Skip to content

Commit

Permalink
Add select_if() to select based on predicate (tidyverse#1880)
Browse files Browse the repository at this point in the history
* Add select_if() to select based on predicate

This function is analogous to summarise_if() and mutate_if()

Closes tidyverse#497, closes tidyverse#1569

* Silently add grouping variables in select_if()
  • Loading branch information
lionel- authored and hadley committed Jun 2, 2016
1 parent 265db7b commit 6d70cc7
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 8 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ export(sample_frac)
export(sample_n)
export(select)
export(select_)
export(select_if)
export(select_query)
export(select_vars)
export(select_vars_)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@
functions instead of a list of functions generated by `funs()`
(though this is only useful for local sources). (#1845, @lionel-)

* `select_if()` lets you select columns with a predicate function.
Only compatible with local sources. (#497, #1569, @lionel-)

## Local backends

### dtplyr
Expand Down
4 changes: 2 additions & 2 deletions R/colwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ mutate_if <- function(.tbl, .predicate, .funs, ...) {
mutate_(.tbl, .dots = vars)
}

probe_colwise_names <- function(tbl, p) {
probe_colwise_names <- function(tbl, p, ...) {
if (is.logical(p)) {
stopifnot(length(p) == length(tbl))
selected <- p
} else {
selected <- vapply(tbl, p, logical(1))
selected <- vapply(tbl, p, logical(1), ...)
}

vars <- tbl_vars(tbl)
Expand Down
17 changes: 11 additions & 6 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,24 @@ cbind.grouped_df <- function(...) {
select_.grouped_df <- function(.data, ..., .dots) {
dots <- lazyeval::all_dots(.dots, ...)
vars <- select_vars_(names(.data), dots)
vars <- ensure_grouped_vars(vars, .data)

# Ensure all grouping variables are present, notifying user with a message
group_names <- vapply(groups(.data), as.character, character(1))
select_impl(.data, vars)
}

ensure_grouped_vars <- function(vars, data, notify = TRUE) {
group_names <- vapply(groups(data), as.character, character(1))
missing <- setdiff(group_names, vars)

if (length(missing) > 0) {
message("Adding missing grouping variables: ",
paste0("`", missing, "`", collapse = ", "))

if (notify) {
message("Adding missing grouping variables: ",
paste0("`", missing, "`", collapse = ", "))
}
vars <- c(stats::setNames(missing, missing), vars)
}

select_impl(.data, vars)
vars
}

#' @export
Expand Down
27 changes: 27 additions & 0 deletions R/manip.r
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,33 @@ select_ <- function(.data, ..., .dots) {
UseMethod("select_")
}

#' Select columns using a predicate
#'
#' This verb is analogous to \code{\link{summarise_if}()} and
#' \code{\link{mutate_if}()} in that it lets you use a predicate on
#' the columns of a data frame. Only those columns for which the
#' predicate returns \code{TRUE} will be selected.
#'
#' Predicates can only be used with local sources like a data frame.
#'
#' @inheritParams summarise_all
#' @param .data A local tbl source.
#' @param ... Additional arguments passed to \code{.predicate}.
#' @export
#' @examples
#' iris %>% select_if(is.factor)
#' iris %>% select_if(is.numeric)
#' iris %>% select_if(function(col) is.numeric(col) && mean(col) > 3.5)
select_if <- function(.data, .predicate, ...) {
if (inherits(.data, "tbl_lazy")) {
stop("Selection with predicate currently require local sources",
call. = FALSE)
}
vars <- probe_colwise_names(.data, .predicate, ...)
vars <- ensure_grouped_vars(vars, .data, notify = FALSE)
select_(.data, .dots = vars)
}

#' @rdname select
#' @export
rename <- function(.data, ...) {
Expand Down
32 changes: 32 additions & 0 deletions man/select_if.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/test-select.r
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,16 @@ test_that("select succeeds in presence of raw columns (#1803)", {
expect_identical(select(df, b), df["b"])
expect_identical(select(df, -b), df["a"])
})

test_that("select_if can use predicate", {
expect_identical(iris %>% select_if(is.factor), iris["Species"])
})

test_that("select_if fails with databases", {
expect_error(memdb_frame(x = 1) %>% select_if(is.numeric) %>% collect())
})

test_that("select_if keeps grouping cols", {
expect_silent(df <- iris %>% group_by(Species) %>% select_if(is.numeric))
expect_equal(df, tbl_df(iris[c(5, 1:4)]))
})

0 comments on commit 6d70cc7

Please sign in to comment.