forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- New
add_count()
and add_tally()
for adding an n
column within…
… groups (tidyverse#2078, @dgrtwo). * Added draft of add_tally and add_count functions, which add an "n" column based on counting within groups. * Added simple test cases for add_count and add_tally
- Loading branch information
Showing
6 changed files
with
233 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#' Add a column counting or tallying observations within groups | ||
#' | ||
#' \code{add_tally} adds a column "n" to a table based on the number | ||
#' of items within each existing group, while \code{add_count} is a shortcut that | ||
#' does the grouping as well. These functions are to \code{\link{tally}} | ||
#' and \code{\link{count}} as \code{\link{mutate}} is to \code{\link{summarise}}: | ||
#' they add an additional column rather than collapsing each group. | ||
#' | ||
#' @param x a \code{tbl}. | ||
#' @param wt (Optional) If omitted, will count the number of rows. Otherwise, | ||
#' performs a weighted tally by summing the (non-missing) values of variable wt | ||
#' @param sort Whether to sort the result in descending order of n | ||
#' @param ...,vars Variables to group by. | ||
#' | ||
#' @details \code{add_count} counts within the current groups of the data when present, | ||
#' and preserves those groups (it does not add the new ones). | ||
#' | ||
#' @examples | ||
#' | ||
#' add_tally(mtcars) | ||
#' add_tally(group_by(mtcars, cyl)) | ||
#' add_tally(group_by(mtcars, cyl), sort = TRUE) | ||
#' | ||
#' add_count(mtcars, cyl) | ||
#' add_count(mtcars, cyl, am) | ||
#' | ||
#' if (require("Lahman")) { | ||
#' batting_tbl <- tbl_df(Batting) | ||
#' | ||
#' # get records of players who played in multiple stints in the same year | ||
#' batting_tbl %>% | ||
#' add_count(playerID, yearID) %>% | ||
#' filter(n > 1) | ||
#' | ||
#' # get only players who played in more than three stints total | ||
#' batting_tbl %>% | ||
#' add_count(playerID) %>% | ||
#' filter(n > 3) | ||
#' | ||
#' # get only players with at least 1000 ABs | ||
#' batting_tbl %>% | ||
#' add_count(playerID, wt = AB) %>% | ||
#' filter(n >= 1000) | ||
#' } | ||
#' | ||
#' @export | ||
add_tally <- function(x, wt, sort = FALSE) { | ||
if (missing(wt)) { | ||
if ("n" %in% names(x)) { | ||
message("Using n as weighting variable") | ||
wt <- quote(n) | ||
} | ||
else { | ||
wt <- NULL | ||
} | ||
} | ||
else { | ||
wt <- substitute(wt) | ||
} | ||
add_tally_(x, wt, sort = sort) | ||
} | ||
|
||
|
||
#' @rdname add_tally | ||
#' @export | ||
add_tally_ <- function(x, wt = NULL, sort = FALSE) { | ||
g <- groups(x) | ||
if (is.null(wt)) { | ||
n <- quote(n()) | ||
} | ||
else { | ||
n <- lazyeval::interp(quote(sum(wt, na.rm = TRUE)), wt = wt) | ||
} | ||
n_name <- n_name(tbl_vars(x)) | ||
out <- mutate_(x, .dots = setNames(list(n), n_name)) | ||
|
||
if (sort) { | ||
desc_n <- lazyeval::interp(quote(desc(n)), n = as.name(n_name)) | ||
out <- arrange_(out, desc_n) | ||
} | ||
group_by_(out, .dots = g) | ||
} | ||
|
||
|
||
#' @rdname add_tally | ||
#' @export | ||
add_count <- function(x, ..., wt = NULL, sort = FALSE) { | ||
vars <- lazyeval::lazy_dots(...) | ||
wt <- substitute(wt) | ||
add_count_(x, vars, wt, sort = sort) | ||
} | ||
|
||
|
||
#' @rdname add_tally | ||
#' @export | ||
add_count_ <- function(x, vars, wt = NULL, sort = FALSE) { | ||
g <- groups(x) | ||
grouped <- group_by_(x, .dots = vars, add = TRUE) | ||
|
||
ret <- add_tally_(grouped, wt = wt, sort = sort) | ||
group_by_(ret, .dots = g) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
context("add_count") | ||
|
||
test_that("can add counts of a variable called n", { | ||
df <- data.frame(n = c(1, 1, 2, 2, 2)) | ||
|
||
out <- df %>% add_count(n) | ||
expect_equal(names(out), c("n", "nn")) | ||
expect_equal(out$n, df$n) | ||
expect_equal(out$nn, c(2, 2, 3, 3, 3)) | ||
|
||
out <- df %>% add_count(n, sort = TRUE) | ||
expect_equal(out$nn, c(3, 3, 3, 2, 2)) | ||
}) | ||
|
||
test_that("add_count respects and preserves existing groups", { | ||
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c")) | ||
res <- df %>% add_count(val) | ||
expect_equal(res$n, c(3, 3, 3, 1)) | ||
expect_null(groups(res)) | ||
|
||
res <- df %>% group_by(g) %>% add_count(val) | ||
expect_equal(res$n, c(1, 2, 2, 1)) | ||
expect_equal(as.character(groups(res)), "g") | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
context("add_tally") | ||
|
||
test_that("can add tallies of a variable", { | ||
df <- data.frame(a = c(1, 1, 2, 2, 2)) | ||
|
||
out <- df %>% group_by(a) %>% add_tally() | ||
expect_equal(names(out), c("a", "n")) | ||
expect_equal(out$a, df$a) | ||
expect_equal(out$n, c(2, 2, 3, 3, 3)) | ||
|
||
out <- df %>% group_by(a) %>% add_tally(sort = TRUE) | ||
expect_equal(out$n, c(3, 3, 3, 2, 2)) | ||
}) | ||
|
||
test_that("add_tally respects and preserves existing groups", { | ||
df <- data.frame(g = c(1, 2, 2, 2), val = c("b", "b", "b", "c")) | ||
res <- df %>% group_by(val) %>% add_tally() | ||
expect_equal(res$n, c(3, 3, 3, 1)) | ||
expect_equal(as.character(groups(res)), "val") | ||
|
||
res <- df %>% group_by(g, val) %>% add_tally() | ||
expect_equal(res$n, c(1, 2, 2, 1)) | ||
expect_equal(as.character(groups(res)), c("g", "val")) | ||
}) | ||
|
||
test_that("add_tally can be given a weighting variable", { | ||
df <- data.frame(a = c(1, 1, 2, 2, 2), w = c(1, 1, 2, 3, 4)) | ||
|
||
out <- df %>% group_by(a) %>% add_tally(wt = w) | ||
expect_equal(out$n, c(2, 2, 9, 9, 9)) | ||
|
||
out <- df %>% group_by(a) %>% add_tally(wt = w + 1) | ||
expect_equal(out$n, c(4, 4, 12, 12, 12)) | ||
}) |