forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtop-n.R
78 lines (75 loc) · 1.96 KB
/
top-n.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#' Select top (or bottom) n rows (by value)
#'
#' This is a convenient wrapper that uses [filter()] and
#' [min_rank()] to select the top or bottom entries in each group,
#' ordered by `wt`.
#'
#' @param x a [tbl()] to filter
#' @param n number of rows to return for `top_n()`, fraction of rows to
#' return for `top_frac()`.
#'
#' If `x` is grouped, this is the
#' number (or fraction) of rows per group. Will include more rows if
#' there are ties.
#'
#' If `n` is positive, selects the top rows. If negative,
#' selects the bottom rows.
#'
#' @param wt (Optional). The variable to use for ordering. If not
#' specified, defaults to the last variable in the tbl.
#'
#' @details
#' Both `n` and `wt` are automatically [quoted][rlang::enquo] and later
#' [evaluated][rlang::eval_tidy] in the context of the data
#' frame. It supports [unquoting][rlang::quasiquotation].
#'
#' @export
#' @examples
#' df <- data.frame(x = c(10, 4, 1, 6, 3, 1, 1))
#' df %>% top_n(2)
#'
#' # half the rows
#' df %>% top_n(n() * .5)
#' df %>% top_frac(.5)
#'
#' # Negative values select bottom from group. Note that we get more
#' # than 2 values here because there's a tie: top_n() either takes
#' # all rows with a value, or none.
#' df %>% top_n(-2)
#'
#' if (require("Lahman")) {
#' # Find 10 players with most games
#' tbl_df(Batting) %>%
#' group_by(playerID) %>%
#' tally(G) %>%
#' top_n(10)
#'
#' # Find year with most games for each player
#' \dontrun{
#' tbl_df(Batting) %>%
#' group_by(playerID) %>%
#' top_n(1, G)
#' }
#' }
top_n <- function(x, n, wt) {
wt <- enquo(wt)
if (quo_is_missing(wt)) {
vars <- tbl_vars(x)
wt_name <- vars[length(vars)]
inform(glue("Selecting by ", wt_name))
wt <- sym(wt_name)
}
filter(x, top_n_rank({{ n }}, !!wt))
}
top_n_rank <- function(n, wt) {
if (n > 0) {
min_rank(desc(wt)) <= n
} else {
min_rank(wt) <= abs(n)
}
}
#' @export
#' @rdname top_n
top_frac <- function(x, n, wt) {
top_n(x, {{ n }} * n(), {{ wt }})
}