forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcolwise.R
192 lines (180 loc) · 6.67 KB
/
colwise.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#' Operate on a selection of variables
#'
#' The variants suffixed with `_if`, `_at` or `_all` apply an
#' expression (sometimes several) to all variables within a specified
#' subset. This subset can contain all variables (`_all` variants), a
#' [vars()] selection (`_at` variants), or variables selected with a
#' predicate (`_if` variants).
#'
#' The verbs with scoped variants are:
#'
#' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()].
#' * [filter()]. See [filter_all()].
#' * [group_by()]. See [group_by_all()].
#' * [rename()] and [select()]. See [select_all()].
#' * [arrange()]. See [arrange_all()]
#'
#' There are three kinds of scoped variants. They differ in the scope
#' of the variable selection on which operations are applied:
#'
#' * Verbs suffixed with `_all()` apply an operation on all variables.
#'
#' * Verbs suffixed with `_at()` apply an operation on a subset of
#' variables specified with the quoting function [vars()]. This
#' quoting function accepts [tidyselect::vars_select()] helpers like
#' [starts_with()]. Instead of a [vars()] selection, you can also
#' supply an [integerish][rlang::is_integerish] vector of column
#' positions or a character vector of column names.
#'
#' * Verbs suffixed with `_if()` apply an operation on the subset of
#' variables for which a predicate function returns `TRUE`. Instead
#' of a predicate function, you can also supply a logical vector.
#'
#' @param .tbl A `tbl` object.
#' @param .funs List of function calls generated by [funs()], or a
#' character vector of function names, or simply a function.
#'
#' Bare formulas are passed to [rlang::as_function()] to create
#' purrr-style lambda functions. Note that these lambda prevent
#' hybrid evaluation from happening and it is thus more efficient to
#' supply functions like `mean()` directly rather than in a
#' lambda-formula.
#' @param .vars A list of columns generated by [vars()],
#' a character vector of column names, a numeric vector of column
#' positions, or `NULL`.
#' @param .predicate A predicate function to be applied to the columns
#' or a logical vector. The variables for which `.predicate` is or
#' returns `TRUE` are selected. This argument is passed to
#' [rlang::as_function()] and thus supports quosure-style lambda
#' functions and strings representing function names.
#' @param ... Additional arguments for the function calls in
#' `.funs`. These are evaluated only once, with [tidy
#' dots][rlang::tidy-dots] support.
#' @name scoped
NULL
#' Select variables
#'
#' This helper is intended to provide equivalent semantics to
#' [select()]. It is used for instance in scoped summarising and
#' mutating verbs ([mutate_at()] and [summarise_at()]).
#'
#' Note that verbs accepting a `vars()` specification also accept an
#' [integerish][rlang::is_integerish] vector of positions or a
#' character vector of column names.
#'
#' @param ... Variables to include/exclude in mutate/summarise. You
#' can use same specifications as in [select()]. If missing,
#' defaults to all non-grouping variables.
#'
#' These arguments are automatically [quoted][rlang::quo] and later
#' [evaluated][rlang::eval_tidy] in the context of the data
#' frame. They support [unquoting][rlang::quasiquotation]. See
#' `vignette("programming")` for an introduction to these concepts.
#' @seealso [funs()], [all_vars()] and [any_vars()] for other quoting
#' functions that you can use with scoped verbs.
#' @export
vars <- function(...) {
quos(...)
}
#' Apply predicate to all variables
#'
#' These quoting functions signal to scoped filtering verbs
#' (e.g. [filter_if()] or [filter_all()]) that a predicate expression
#' should be applied to all relevant variables. The `all_vars()`
#' variant takes the intersection of the predicate expressions with
#' `&` while the `any_vars()` variant takes the union with `|`.
#'
#' @param expr A predicate expression. This variable supports
#' [unquoting][rlang::quasiquotation] and will be evaluated in the
#' context of the data frame. It should return a logical vector.
#'
#' This argument is automatically [quoted][rlang::quo] and later
#' [evaluated][rlang::eval_tidy] in the context of the data
#' frame. It supports [unquoting][rlang::quasiquotation]. See
#' `vignette("programming")` for an introduction to these concepts.
#' @seealso [funs()] and [vars()] for other quoting functions that you
#' can use with scoped verbs.
#' @export
all_vars <- function(expr) {
set_attrs(enquo(expr), class = c("all_vars", "quosure", "formula"))
}
#' @rdname all_vars
#' @export
any_vars <- function(expr) {
set_attrs(enquo(expr), class = c("any_vars", "quosure", "formula"))
}
#' @export
print.all_vars <- function(x, ...) {
cat("<predicate intersection>\n")
NextMethod()
}
#' @export
print.any_vars <- function(x, ...) {
cat("<predicate union>\n")
NextMethod()
}
# Requires tbl_vars() method
tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- tbl_vars(tbl)
} else {
tibble_vars <- tbl_nongroup_vars(tbl)
}
if (is_null(vars)) {
character()
} else if (is_character(vars)) {
vars
} else if (is_integerish(vars)) {
tibble_vars[vars]
} else if (is_quosures(vars)) {
out <- tidyselect::vars_select(tibble_vars, !!!vars)
if (!any(have_name(vars))) {
names(out) <- NULL
}
out
} else {
bad_args(".vars", "must be a character/numeric vector or a `vars()` object, ",
"not {type_of(vars)}"
)
}
}
tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE) {
vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars)
set_names(syms(vars), names(vars))
}
# Requires tbl_vars(), `[[`() and length() methods
tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
if (.include_group_vars) {
tibble_vars <- tbl_vars(.tbl)
} else {
tibble_vars <- tbl_nongroup_vars(.tbl)
}
if (is_logical(.p)) {
stopifnot(length(.p) == length(tibble_vars))
return(syms(tibble_vars[.p]))
}
if (inherits(.tbl, "tbl_lazy")) {
inform("Applying predicate on the first 100 rows")
.tbl <- collect(.tbl, n = 100)
}
if (is_fun_list(.p)) {
if (length(.p) != 1) {
bad_args(".predicate", "must have length 1, not {length(.p)}")
}
.p <- .p[[1]]
}
if (is_quosure(.p)) {
.p <- quo_as_function(.p)
} else {
.p <- as_function(.p, .env)
}
n <- length(tibble_vars)
selected <- lgl_len(n)
for (i in seq_len(n)) {
selected[[i]] <- .p(.tbl[[tibble_vars[[i]]]], ...)
}
tibble_vars[selected]
}
tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars))
}