Skip to content

Commit

Permalink
Merge branch 'master' of github.com:hadley/dplyr
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Jun 7, 2016
2 parents a1453c4 + 6d70cc7 commit ac5bfd2
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 32 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ export(sample_frac)
export(sample_n)
export(select)
export(select_)
export(select_if)
export(select_query)
export(select_vars)
export(select_vars_)
Expand Down
11 changes: 10 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# dplyr 0.4.3.9000

* `as_data_frame()` on SQL sources now returns all rows (#1752, #1821,
@krlmlr).

* Avoiding segfaults in presence of `raw` columns (#1803, #1817, @krlmlr).

* `all_equal()` shows better error message when comparing raw values
or when types are incompatible and `convert = TRUE` (#1820, @krlmlr).

* Fixed bug about joins when factor levels not equal (#1712, #1559).

* anti and semi joins give correct result when by variable is a factor
and don't warn (#1571).

* setdiff handles factors with NA (#1526).
* `setdiff()` handles factors with `NA` (#1526).

* enabling joining of data frames that don't have the same encoding of
column names (#1513).
Expand Down Expand Up @@ -82,6 +88,9 @@
functions instead of a list of functions generated by `funs()`
(though this is only useful for local sources). (#1845, @lionel-)

* `select_if()` lets you select columns with a predicate function.
Only compatible with local sources. (#497, #1569, @lionel-)

## Local backends

### dtplyr
Expand Down
4 changes: 2 additions & 2 deletions R/colwise.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ mutate_if <- function(.tbl, .predicate, .funs, ...) {
mutate_(.tbl, .dots = vars)
}

probe_colwise_names <- function(tbl, p) {
probe_colwise_names <- function(tbl, p, ...) {
if (is.logical(p)) {
stopifnot(length(p) == length(tbl))
selected <- p
} else {
selected <- vapply(tbl, p, logical(1))
selected <- vapply(tbl, p, logical(1), ...)
}

vars <- tbl_vars(tbl)
Expand Down
17 changes: 11 additions & 6 deletions R/grouped-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,24 @@ cbind.grouped_df <- function(...) {
select_.grouped_df <- function(.data, ..., .dots) {
dots <- lazyeval::all_dots(.dots, ...)
vars <- select_vars_(names(.data), dots)
vars <- ensure_grouped_vars(vars, .data)

# Ensure all grouping variables are present, notifying user with a message
group_names <- vapply(groups(.data), as.character, character(1))
select_impl(.data, vars)
}

ensure_grouped_vars <- function(vars, data, notify = TRUE) {
group_names <- vapply(groups(data), as.character, character(1))
missing <- setdiff(group_names, vars)

if (length(missing) > 0) {
message("Adding missing grouping variables: ",
paste0("`", missing, "`", collapse = ", "))

if (notify) {
message("Adding missing grouping variables: ",
paste0("`", missing, "`", collapse = ", "))
}
vars <- c(stats::setNames(missing, missing), vars)
}

select_impl(.data, vars)
vars
}

#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/join.r
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ common_by <- function(by = NULL, x, y) {
if (length(by) == 0) {
stop("No common variables. Please specify `by` param.", call. = FALSE)
}
message("Joining by: ", utils::capture.output(dput(by)))
message("Joining, by = ", utils::capture.output(dput(by)))

list(
x = by,
Expand Down
27 changes: 27 additions & 0 deletions R/manip.r
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,33 @@ select_ <- function(.data, ..., .dots) {
UseMethod("select_")
}

#' Select columns using a predicate
#'
#' This verb is analogous to \code{\link{summarise_if}()} and
#' \code{\link{mutate_if}()} in that it lets you use a predicate on
#' the columns of a data frame. Only those columns for which the
#' predicate returns \code{TRUE} will be selected.
#'
#' Predicates can only be used with local sources like a data frame.
#'
#' @inheritParams summarise_all
#' @param .data A local tbl source.
#' @param ... Additional arguments passed to \code{.predicate}.
#' @export
#' @examples
#' iris %>% select_if(is.factor)
#' iris %>% select_if(is.numeric)
#' iris %>% select_if(function(col) is.numeric(col) && mean(col) > 3.5)
select_if <- function(.data, .predicate, ...) {
if (inherits(.data, "tbl_lazy")) {
stop("Selection with predicate currently require local sources",
call. = FALSE)
}
vars <- probe_colwise_names(.data, .predicate, ...)
vars <- ensure_grouped_vars(vars, .data, notify = FALSE)
select_(.data, .dots = vars)
}

#' @rdname select
#' @export
rename <- function(.data, ...) {
Expand Down
2 changes: 1 addition & 1 deletion R/tbl-sql.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ n_groups.tbl_sql <- function(x) {

#' @export
as.data.frame.tbl_sql <- function(x, row.names = NULL, optional = NULL,
..., n = 1e5L) {
..., n = Inf) {
as.data.frame(collect(x, n = n))
}

Expand Down
17 changes: 5 additions & 12 deletions inst/include/dplyr/Collecter.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,17 +237,6 @@ namespace dplyr {

inline bool compatible(SEXP x) {
return Rf_inherits(x, "POSIXct") ;
if( !Rf_inherits(x, "POSIXct" ) ) return false ;
SEXP xtz = Rf_getAttrib(x, Rf_install("tzone") ) ;

if( Rf_isNull(tz) ) {
tz = xtz ;
return true ;
}

if( Rf_isNull( xtz ) ) return false ;

return STRING_ELT(tz, 0) == STRING_ELT(xtz, 0 ) ;
}

inline bool can_promote(SEXP x) const {
Expand Down Expand Up @@ -379,7 +368,11 @@ namespace dplyr {
return new Collecter_Impl<CPLXSXP>(n) ;
case LGLSXP: return new Collecter_Impl<LGLSXP>(n) ;
case STRSXP: return new Collecter_Impl<STRSXP>(n) ;
case VECSXP: return new Collecter_Impl<VECSXP>(n) ;
case VECSXP:
if( Rf_inherits( model, "POSIXlt" )) {
stop( "POSIXlt not supported" ) ;
}
return new Collecter_Impl<VECSXP>(n) ;
default: stop("Unsupported vector type %s", Rf_type2char(TYPEOF(model))) ;
}
return 0 ;
Expand Down
6 changes: 5 additions & 1 deletion inst/include/dplyr/Result/GroupedSubset.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ namespace dplyr {
case LGLSXP: return new GroupedSubsetTemplate<LGLSXP>(x, max_size) ;
case STRSXP: return new GroupedSubsetTemplate<STRSXP>(x, max_size) ;
case VECSXP:
if( Rf_inherits(x, "data.frame") ) return new DataFrameGroupedSubset(x) ;
if( Rf_inherits( x, "data.frame" ) )
return new DataFrameGroupedSubset(x) ;
if( Rf_inherits( x, "POSIXlt" ) ) {
stop( "POSIXlt not supported" ) ;
}
return new GroupedSubsetTemplate<VECSXP>(x, max_size) ;
case CPLXSXP: return new GroupedSubsetTemplate<CPLXSXP>(x, max_size) ;
default: stop("Unsupported vector type %s", Rf_type2char(TYPEOF(x)));
Expand Down
2 changes: 1 addition & 1 deletion inst/include/tools/match.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@ inline IntegerVector r_match( SEXP x, SEXP y ) {
return m(x, y);
}

};
}

#endif
32 changes: 32 additions & 0 deletions man/select_if.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions src/dplyr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,18 +970,18 @@ dplyr::BoolResult compatible_data_frame( DataFrame x, DataFrame y, bool ignore_c
SubsetVectorVisitor* py = vy.get() ;

if( typeid(*px) != typeid(*py) ) {
if( !convert ) {
ss << "Incompatible type for column "
<< name.get_cstring()
<< ": x " << vx->get_r_type()
<< ", y " << vy->get_r_type() ;
ss << "Incompatible type for column "
<< name.get_cstring()
<< ": x " << vx->get_r_type()
<< ", y " << vy->get_r_type() ;

if( !convert ) {
ok = false ;
continue ;
}
}

if( ! vx->is_compatible( vy.get(), ss, name ) ) {
if( ! vx->is_compatible( py, ss, name ) ) {
ok = false ;
}
}
Expand Down
38 changes: 38 additions & 0 deletions tests/testthat/test-as-data-frame.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
context("as-data-frame")


# as.data.frame and as_data_frame -----------------------------------------

test_that("as.data.frame works for SQL sources", {
lf1 <- memdb_frame(x = letters)
out <- lf1 %>%
as.data.frame()

expect_equal(out, data.frame(x = letters, stringsAsFactors = FALSE))
})

test_that("as_data_frame works for SQL sources", {
lf1 <- memdb_frame(x = letters)
out <- lf1 %>%
as_data_frame()

expect_equal(out, data_frame(x = letters))
})

test_that("as.data.frame is unlimited", {
x <- rep(1:2, formals(collect.tbl_sql)$n)
lf1 <- memdb_frame(x = x)
out <- lf1 %>%
as.data.frame()

expect_equal(out, data.frame(x = x))
})

test_that("as_data_frame is unlimited", {
x <- rep(1:2, formals(collect.tbl_sql)$n)
lf1 <- memdb_frame(x = x)
out <- lf1 %>%
as_data_frame()

expect_equal(out, data_frame(x = x))
})
6 changes: 6 additions & 0 deletions tests/testthat/test-binds.R
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,9 @@ test_that("bind_cols infers classes from first result (#1692)", {
expect_equal( class(bind_rows(d5,d1)), c("tbl_df", "tbl", "data.frame") )

})

test_that("bind_rows rejects POSIXlt columns (#1789)", {
df <- data_frame(x = Sys.time() + 1:12)
df$y <- as.POSIXlt(df$x)
expect_error(bind_rows(df, df), "not supported")
})
26 changes: 25 additions & 1 deletion tests/testthat/test-equality.r
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,31 @@ test_that("equality test fails when convert is FALSE and types don't match (#148
expect_warning( all_equal(df1, df2, convert = TRUE) )
})

test_that("equality handles data frames with 0 columns (#1506)", {
test_that("equality handles data frames with 0 rows (#1506)", {
df0 <- data_frame(x = numeric(0), y = character(0) )
expect_equal(df0, df0)
})

test_that("equality handles data frames with 0 columns (#1506)", {
df0 <- data_frame(a = 1:10)[-1]
expect_equal(df0, df0)
})

test_that("equality cannot be checked in presence of raw columns", {
df <- data_frame(a = 1:3, b = as.raw(1:3))
expect_error(all.equal(df, df), "Unsupported vector type raw")
})

test_that("equality returns a message for convert = TRUE", {
df1 <- data_frame(x = 1:3)
df2 <- data_frame(x = as.character(1:3))
expect_match(all.equal(df1, df2), "Incompatible")
expect_match(all.equal(df1, df2, convert = TRUE), "Incompatible")
})

test_that("numeric and integer can be compared if convert = TRUE", {
df1 <- data_frame(x = 1:3)
df2 <- data_frame(x = as.numeric(1:3))
expect_match(all.equal(df1, df2), "Incompatible")
expect_true(all.equal(df1, df2, convert = TRUE))
})
13 changes: 13 additions & 0 deletions tests/testthat/test-select.r
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,16 @@ test_that("select succeeds in presence of raw columns (#1803)", {
expect_identical(select(df, b), df["b"])
expect_identical(select(df, -b), df["a"])
})

test_that("select_if can use predicate", {
expect_identical(iris %>% select_if(is.factor), iris["Species"])
})

test_that("select_if fails with databases", {
expect_error(memdb_frame(x = 1) %>% select_if(is.numeric) %>% collect())
})

test_that("select_if keeps grouping cols", {
expect_silent(df <- iris %>% group_by(Species) %>% select_if(is.numeric))
expect_equal(df, tbl_df(iris[c(5, 1:4)]))
})

0 comments on commit ac5bfd2

Please sign in to comment.