forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgroup-by.r
167 lines (154 loc) · 4.42 KB
/
group-by.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#' Group by one or more variables
#'
#' @description
#' Most data operations are done on groups defined by variables.
#' `group_by()` takes an existing tbl and converts it into a grouped tbl
#' where operations are performed "by group". `ungroup()` removes grouping.
#'
#' @section Tbl types:
#'
#' `group_by()` is an S3 generic with methods for the three built-in
#' tbls. See the help for the corresponding classes and their manip
#' methods for more details:
#'
#' \itemize{
#' \item data.frame: [grouped_df]
#' \item data.table: [dtplyr::grouped_dt]
#' \item SQLite: [src_sqlite()]
#' \item PostgreSQL: [src_postgres()]
#' \item MySQL: [src_mysql()]
#' }
#'
#' @section Scoped grouping:
#'
#' The three [scoped] variants ([group_by_all()], [group_by_if()] and
#' [group_by_at()]) make it easy to group a dataset by a selection of
#' variables.
#'
#' @param .data a tbl
#' @param ... Variables to group by. All tbls accept variable names.
#' Some tbls will accept functions of variables. Duplicated groups
#' will be silently dropped.
#' @param add When `add = FALSE`, the default, `group_by()` will
#' override existing groups. To add to the existing groups, use
#' `add = TRUE`.
#' @inheritParams filter
#' @export
#' @examples
#' by_cyl <- mtcars %>% group_by(cyl)
#'
#' # grouping doesn't change how the data looks (apart from listing
#' # how it's grouped):
#' by_cyl
#'
#' # It changes how it acts with the other dplyr verbs:
#' by_cyl %>% summarise(
#' disp = mean(disp),
#' hp = mean(hp)
#' )
#' by_cyl %>% filter(disp == max(disp))
#'
#' # Each call to summarise() removes a layer of grouping
#' by_vs_am <- mtcars %>% group_by(vs, am)
#' by_vs <- by_vs_am %>% summarise(n = n())
#' by_vs
#' by_vs %>% summarise(n = sum(n))
#'
#' # To removing grouping, use ungroup
#' by_vs %>%
#' ungroup() %>%
#' summarise(n = sum(n))
#'
#' # You can group by expressions: this is just short-hand for
#' # a mutate/rename followed by a simple group_by
#' mtcars %>% group_by(vsam = vs + am)
#'
#' # By default, group_by overrides existing grouping
#' by_cyl %>%
#' group_by(vs, am) %>%
#' group_vars()
#'
#' # Use add = TRUE to instead append
#' by_cyl %>%
#' group_by(vs, am, add = TRUE) %>%
#' group_vars()
group_by <- function(.data, ..., add = FALSE) {
UseMethod("group_by")
}
#' @export
group_by.default <- function(.data, ..., add = FALSE) {
group_by_(.data, .dots = compat_as_lazy_dots(...))
}
#' @export
#' @rdname se-deprecated
#' @inheritParams group_by
group_by_ <- function(.data, ..., .dots = list(), add = FALSE) {
UseMethod("group_by_")
}
#' @rdname group_by
#' @export
#' @param x A [tbl()]
ungroup <- function(x, ...) {
UseMethod("ungroup")
}
#' Prepare for grouping.
#'
#' Performs standard operations that should happen before individual methods
#' process the data. This includes mutating the tbl to add new grouping columns
#' and updating the groups (based on add)
#'
#' @return A list
#' \item{data}{Modified tbl}
#' \item{groups}{Modified groups}
#' @export
#' @keywords internal
group_by_prepare <- function(.data, ..., .dots = list(), add = FALSE) {
new_groups <- c(quos(...), compat_lazy_dots(.dots, caller_env()))
# If any calls, use mutate to add new columns, then group by those
.data <- add_computed_columns(.data, new_groups)
# Once we've done the mutate, we need to name all objects
new_groups <- exprs_auto_name(new_groups, printer = tidy_text)
group_names <- names(new_groups)
if (add) {
group_names <- c(group_vars(.data), group_names)
}
group_names <- unique(group_names)
list(
data = .data,
groups = syms(group_names),
group_names = group_names
)
}
add_computed_columns <- function(.data, vars) {
is_symbol <- map_lgl(vars, quo_is_variable_reference)
named <- have_name(vars)
needs_mutate <- named | !is_symbol
# Shortcut necessary, otherwise all columns are analyzed in mutate(),
# this can change behavior
mutate_vars <- vars[needs_mutate]
if (length(mutate_vars) == 0L) return(.data)
mutate(.data, !!!mutate_vars)
}
#' Return grouping variables
#'
#' `group_vars()` returns a character vector; `groups()` returns a list of
#' symbols.
#'
#' @param x A [tbl()]
#' @export
#' @examples
#' df <- tibble(x = 1, y = 2) %>% group_by(x, y)
#' group_vars(df)
#' groups(df)
groups <- function(x) {
UseMethod("groups")
}
#' @rdname groups
#' @export
group_vars <- function(x) {
UseMethod("group_vars")
}
#' @export
group_vars.default <- function(x) {
deparse_names(groups(x))
}