forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtally.R
91 lines (81 loc) · 2.46 KB
/
tally.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#' Counts/tally observations by group.
#'
#' \code{tally} is a convenient wrapper for summarise that will either call
#' \code{\link{n}} or \code{\link{sum}(n)} depending on whether you're tallying
#' for the first time, or re-tallying. \code{count()} is similar, but also
#' does the \code{\link{group_by}} for you.
#'
#' @param x a \code{\link{tbl}} to tally/count.
#' @param ...,vars Variables to group by.
#' @param wt (Optional) If omitted, will count the number of rows. If specified,
#' will perform a "weighted" tally by summing the (non-missing) values of
#' variable \code{wt}.
#' @param sort if \code{TRUE} will sort output in descending order of \code{n}
#' @export
#' @examples
#' if (require("Lahman")) {
#' batting_tbl <- tbl_df(Batting)
#' tally(group_by(batting_tbl, yearID))
#' tally(group_by(batting_tbl, yearID), sort = TRUE)
#'
#' # Multiple tallys progressively roll up the groups
#' plays_by_year <- tally(group_by(batting_tbl, playerID, stint), sort = TRUE)
#' tally(plays_by_year, sort = TRUE)
#' tally(tally(plays_by_year))
#'
#' # This looks a little nicer if you use the infix %>% operator
#' batting_tbl %>% group_by(playerID) %>% tally(sort = TRUE)
#'
#' # count is even more succinct - it also does the grouping for you
#' batting_tbl %>% count(playerID)
#' batting_tbl %>% count(playerID, wt = G)
#' batting_tbl %>% count(playerID, wt = G, sort = TRUE)
#' }
tally <- function(x, wt, sort = FALSE) {
if (missing(wt)) {
if ("n" %in% names(x)) {
message("Using n as weighting variable")
wt <- quote(n)
} else {
wt <- NULL
}
} else {
wt <- substitute(wt)
}
tally_(x, wt, sort = sort)
}
tally_ <- function(x, wt, sort = FALSE) {
if (is.null(wt)) {
n <- quote(n())
} else {
n <- lazyeval::interp(quote(sum(wt, na.rm = TRUE)), wt = wt)
}
n_name <- n_name(tbl_vars(x))
out <- summarise_(x, .dots = setNames(list(n), n_name))
if (!sort) {
out
} else {
desc_n <- lazyeval::interp(quote(desc(n)), n = as.name(n_name))
arrange_(out, desc_n)
}
}
n_name <- function(x) {
name <- "n"
while (name %in% x) {
name <- paste0(name, "n")
}
name
}
#' @export
#' @rdname tally
count <- function(x, ..., wt = NULL, sort = FALSE) {
vars <- lazyeval::lazy_dots(...)
wt <- substitute(wt)
count_(x, vars, wt, sort = sort)
}
#' @export
#' @rdname tally
count_ <- function(x, vars, wt = NULL, sort = FALSE) {
grouped <- group_by_(x, .dots = vars, add = TRUE)
tally_(grouped, wt = wt, sort = sort)
}