Merge branch 'master' of github.com:hadley/dplyr

kvpradap · Jun 7, 2016 · ac5bfd2 · ac5bfd2
2 parents a1453c4 + 6d70cc7
commit ac5bfd2
Show file tree

Hide file tree

Showing 16 changed files with 184 additions and 32 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -446,6 +446,7 @@ export(sample_frac)
 export(sample_n)
 export(select)
 export(select_)
+export(select_if)
 export(select_query)
 export(select_vars)
 export(select_vars_)

diff --git a/NEWS.md b/NEWS.md
@@ -1,13 +1,19 @@
 # dplyr 0.4.3.9000
 
+* `as_data_frame()` on SQL sources now returns all rows (#1752, #1821,
+  @krlmlr).
+
 * Avoiding segfaults in presence of `raw` columns (#1803, #1817, @krlmlr).
 
+* `all_equal()` shows better error message when comparing raw values
+  or when types are incompatible and `convert = TRUE` (#1820, @krlmlr).
+
 * Fixed bug about joins when factor levels not equal (#1712, #1559).
 
 * anti and semi joins give correct result when by variable is a factor
   and don't warn (#1571).
 
-* setdiff handles factors with NA (#1526).
+* `setdiff()` handles factors with `NA` (#1526).
 
 * enabling joining of data frames that don't have the same encoding of
   column names (#1513).
@@ -82,6 +88,9 @@
   functions instead of a list of functions generated by `funs()`
   (though this is only useful for local sources). (#1845, @lionel-)
 
+* `select_if()` lets you select columns with a predicate function.
+  Only compatible with local sources. (#497, #1569, @lionel-)
+
 ## Local backends
 
 ### dtplyr

diff --git a/R/colwise.R b/R/colwise.R
@@ -110,12 +110,12 @@ mutate_if <- function(.tbl, .predicate, .funs, ...) {
   mutate_(.tbl, .dots = vars)
 }
 
-probe_colwise_names <- function(tbl, p) {
+probe_colwise_names <- function(tbl, p, ...) {
   if (is.logical(p)) {
     stopifnot(length(p) == length(tbl))
     selected <- p
   } else {
-    selected <- vapply(tbl, p, logical(1))
+    selected <- vapply(tbl, p, logical(1), ...)
   }
 
   vars <- tbl_vars(tbl)

diff --git a/R/grouped-df.r b/R/grouped-df.r
@@ -93,19 +93,24 @@ cbind.grouped_df <- function(...) {
 select_.grouped_df <- function(.data, ..., .dots) {
   dots <- lazyeval::all_dots(.dots, ...)
   vars <- select_vars_(names(.data), dots)
+  vars <- ensure_grouped_vars(vars, .data)
 
-  # Ensure all grouping variables are present, notifying user with a message
-  group_names <- vapply(groups(.data), as.character, character(1))
+  select_impl(.data, vars)
+}
+
+ensure_grouped_vars <- function(vars, data, notify = TRUE) {
+  group_names <- vapply(groups(data), as.character, character(1))
   missing <- setdiff(group_names, vars)
 
   if (length(missing) > 0) {
-    message("Adding missing grouping variables: ",
-      paste0("`", missing, "`", collapse = ", "))
-
+    if (notify) {
+      message("Adding missing grouping variables: ",
+        paste0("`", missing, "`", collapse = ", "))
+    }
     vars <- c(stats::setNames(missing, missing), vars)
   }
 
-  select_impl(.data, vars)
+  vars
 }
 
 #' @export

diff --git a/R/join.r b/R/join.r
@@ -122,7 +122,7 @@ common_by <- function(by = NULL, x, y) {
   if (length(by) == 0) {
     stop("No common variables. Please specify `by` param.", call. = FALSE)
   }
-  message("Joining by: ", utils::capture.output(dput(by)))
+  message("Joining, by = ", utils::capture.output(dput(by)))
 
   list(
     x = by,

diff --git a/R/manip.r b/R/manip.r
@@ -281,6 +281,33 @@ select_ <- function(.data, ..., .dots) {
   UseMethod("select_")
 }
 
+#' Select columns using a predicate
+#'
+#' This verb is analogous to \code{\link{summarise_if}()} and
+#' \code{\link{mutate_if}()} in that it lets you use a predicate on
+#' the columns of a data frame. Only those columns for which the
+#' predicate returns \code{TRUE} will be selected.
+#'
+#' Predicates can only be used with local sources like a data frame.
+#'
+#' @inheritParams summarise_all
+#' @param .data A local tbl source.
+#' @param ... Additional arguments passed to \code{.predicate}.
+#' @export
+#' @examples
+#' iris %>% select_if(is.factor)
+#' iris %>% select_if(is.numeric)
+#' iris %>% select_if(function(col) is.numeric(col) && mean(col) > 3.5)
+select_if <- function(.data, .predicate, ...) {
+  if (inherits(.data, "tbl_lazy")) {
+    stop("Selection with predicate currently require local sources",
+      call. = FALSE)
+  }
+  vars <- probe_colwise_names(.data, .predicate, ...)
+  vars <- ensure_grouped_vars(vars, .data, notify = FALSE)
+  select_(.data, .dots = vars)
+}
+
 #' @rdname select
 #' @export
 rename <- function(.data, ...) {

diff --git a/R/tbl-sql.r b/R/tbl-sql.r
@@ -48,7 +48,7 @@ n_groups.tbl_sql <- function(x) {
 
 #' @export
 as.data.frame.tbl_sql <- function(x, row.names = NULL, optional = NULL,
-                                  ..., n = 1e5L) {
+                                  ..., n = Inf) {
   as.data.frame(collect(x, n = n))
 }
 

diff --git a/inst/include/dplyr/Collecter.h b/inst/include/dplyr/Collecter.h
@@ -237,17 +237,6 @@ namespace dplyr {
 
         inline bool compatible(SEXP x) {
             return Rf_inherits(x, "POSIXct") ;
-            if( !Rf_inherits(x, "POSIXct" ) ) return false ;
-            SEXP xtz = Rf_getAttrib(x, Rf_install("tzone") ) ;
-
-            if( Rf_isNull(tz) ) {
-                tz = xtz ;
-                return true ;
-            }
-
-            if( Rf_isNull( xtz ) ) return false ;
-
-            return STRING_ELT(tz, 0) == STRING_ELT(xtz, 0 ) ;
         }
 
         inline bool can_promote(SEXP x) const {
@@ -379,7 +368,11 @@ namespace dplyr {
             return new Collecter_Impl<CPLXSXP>(n) ;
         case LGLSXP: return new Collecter_Impl<LGLSXP>(n) ;
         case STRSXP: return new Collecter_Impl<STRSXP>(n) ;
-        case VECSXP: return new Collecter_Impl<VECSXP>(n) ;
+        case VECSXP:
+            if( Rf_inherits( model, "POSIXlt" )) {
+                stop( "POSIXlt not supported" ) ;
+            }
+            return new Collecter_Impl<VECSXP>(n) ;
         default: stop("Unsupported vector type %s", Rf_type2char(TYPEOF(model))) ;
         }
         return 0 ;

diff --git a/inst/include/dplyr/Result/GroupedSubset.h b/inst/include/dplyr/Result/GroupedSubset.h
@@ -64,7 +64,11 @@ namespace dplyr {
             case LGLSXP: return new GroupedSubsetTemplate<LGLSXP>(x, max_size) ;
             case STRSXP: return new GroupedSubsetTemplate<STRSXP>(x, max_size) ;
             case VECSXP:
-                if( Rf_inherits(x, "data.frame") ) return new DataFrameGroupedSubset(x) ;
+                if( Rf_inherits( x, "data.frame" ) )
+                    return new DataFrameGroupedSubset(x) ;
+                if( Rf_inherits( x, "POSIXlt" ) ) {
+                    stop( "POSIXlt not supported" ) ;
+                }
                 return new GroupedSubsetTemplate<VECSXP>(x, max_size) ;
             case CPLXSXP: return new GroupedSubsetTemplate<CPLXSXP>(x, max_size) ;
             default: stop("Unsupported vector type %s", Rf_type2char(TYPEOF(x)));

diff --git a/inst/include/tools/match.h b/inst/include/tools/match.h
@@ -20,6 +20,6 @@ inline IntegerVector r_match( SEXP x, SEXP y ) {
   return m(x, y);
 }
 
-};
+}
 
 #endif
diff --git a/man/select_if.Rd b/man/select_if.Rd
diff --git a/src/dplyr.cpp b/src/dplyr.cpp
@@ -970,18 +970,18 @@ dplyr::BoolResult compatible_data_frame( DataFrame x, DataFrame y, bool ignore_c
       SubsetVectorVisitor* py = vy.get() ;
 
       if( typeid(*px) != typeid(*py) ) {
-        if( !convert ) {
-          ss << "Incompatible type for column "
-             << name.get_cstring()
-             << ": x " << vx->get_r_type()
-             << ", y " << vy->get_r_type() ;
+        ss << "Incompatible type for column "
+           << name.get_cstring()
+           << ": x " << vx->get_r_type()
+           << ", y " << vy->get_r_type() ;
 
+        if( !convert ) {
           ok = false ;
           continue ;
         }
       }
 
-      if( ! vx->is_compatible( vy.get(), ss, name ) ) {
+      if( ! vx->is_compatible( py, ss, name ) ) {
         ok = false ;
       }
     }

diff --git a/tests/testthat/test-as-data-frame.R b/tests/testthat/test-as-data-frame.R
@@ -0,0 +1,38 @@
+context("as-data-frame")
+
+
+# as.data.frame and as_data_frame -----------------------------------------
+
+test_that("as.data.frame works for SQL sources", {
+  lf1 <- memdb_frame(x = letters)
+  out <- lf1 %>%
+    as.data.frame()
+
+  expect_equal(out, data.frame(x = letters, stringsAsFactors = FALSE))
+})
+
+test_that("as_data_frame works for SQL sources", {
+  lf1 <- memdb_frame(x = letters)
+  out <- lf1 %>%
+    as_data_frame()
+
+  expect_equal(out, data_frame(x = letters))
+})
+
+test_that("as.data.frame is unlimited", {
+  x <- rep(1:2, formals(collect.tbl_sql)$n)
+  lf1 <- memdb_frame(x = x)
+  out <- lf1 %>%
+    as.data.frame()
+
+  expect_equal(out, data.frame(x = x))
+})
+
+test_that("as_data_frame is unlimited", {
+  x <- rep(1:2, formals(collect.tbl_sql)$n)
+  lf1 <- memdb_frame(x = x)
+  out <- lf1 %>%
+    as_data_frame()
+
+  expect_equal(out, data_frame(x = x))
+})
diff --git a/tests/testthat/test-binds.R b/tests/testthat/test-binds.R
@@ -386,3 +386,9 @@ test_that("bind_cols infers classes from first result (#1692)", {
   expect_equal( class(bind_rows(d5,d1)), c("tbl_df", "tbl", "data.frame") )
 
 })
+
+test_that("bind_rows rejects POSIXlt columns (#1789)", {
+  df <- data_frame(x = Sys.time() + 1:12)
+  df$y <- as.POSIXlt(df$x)
+  expect_error(bind_rows(df, df), "not supported")
+})
diff --git a/tests/testthat/test-equality.r b/tests/testthat/test-equality.r
@@ -90,7 +90,31 @@ test_that("equality test fails when convert is FALSE and types don't match (#148
   expect_warning( all_equal(df1, df2, convert = TRUE) )
 })
 
-test_that("equality handles data frames with 0 columns (#1506)", {
+test_that("equality handles data frames with 0 rows (#1506)", {
   df0 <- data_frame(x = numeric(0), y = character(0) )
   expect_equal(df0, df0)
 })
+
+test_that("equality handles data frames with 0 columns (#1506)", {
+  df0 <- data_frame(a = 1:10)[-1]
+  expect_equal(df0, df0)
+})
+
+test_that("equality cannot be checked in presence of raw columns", {
+  df <- data_frame(a = 1:3, b = as.raw(1:3))
+  expect_error(all.equal(df, df), "Unsupported vector type raw")
+})
+
+test_that("equality returns a message for convert = TRUE", {
+  df1 <- data_frame(x = 1:3)
+  df2 <- data_frame(x = as.character(1:3))
+  expect_match(all.equal(df1, df2), "Incompatible")
+  expect_match(all.equal(df1, df2, convert = TRUE), "Incompatible")
+})
+
+test_that("numeric and integer can be compared if convert = TRUE", {
+  df1 <- data_frame(x = 1:3)
+  df2 <- data_frame(x = as.numeric(1:3))
+  expect_match(all.equal(df1, df2), "Incompatible")
+  expect_true(all.equal(df1, df2, convert = TRUE))
+})
diff --git a/tests/testthat/test-select.r b/tests/testthat/test-select.r
@@ -165,3 +165,16 @@ test_that("select succeeds in presence of raw columns (#1803)", {
   expect_identical(select(df, b), df["b"])
   expect_identical(select(df, -b), df["a"])
 })
+
+test_that("select_if can use predicate", {
+  expect_identical(iris %>% select_if(is.factor), iris["Species"])
+})
+
+test_that("select_if fails with databases", {
+  expect_error(memdb_frame(x = 1) %>% select_if(is.numeric) %>% collect())
+})
+
+test_that("select_if keeps grouping cols", {
+  expect_silent(df <- iris %>% group_by(Species) %>% select_if(is.numeric))
+  expect_equal(df, tbl_df(iris[c(5, 1:4)]))
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,6 +20,6 @@ inline IntegerVector r_match( SEXP x, SEXP y ) { @@
       return m(x, y);
     }
-    };
+    }
     #endif