forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench-compare.r
119 lines (108 loc) · 3.61 KB
/
bench-compare.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#' Evaluate, compare, benchmark operations of a set of srcs.
#'
#' These functions support the comparison of results and timings across
#' multiple sources.
#'
#' @param tbls A list of \code{\link{tbl}}s.
#' @param op A function with a single argument, called often with each
#' element of \code{tbls}.
#' @param ref For checking, an data frame to test results against. If not
#' supplied, defaults to the results from the first \code{src}.
#' @param compare A function used to compare the results. Defaults to
#' \code{equal_data_frame} which ignores the order of rows and columns.
#' @param times For benchmarking, the number of times each operation is
#' repeated.
#' @param \dots
#' For \code{compare_tbls}: additional parameters passed on the
#' \code{compare} function
#'
#' For \code{bench_tbls}: additional benchmarks to run.
#' @return
#' \code{eval_tbls}: a list of data frames.
#'
#' \code{compare_tbls}: an invisible \code{TRUE} on success, otherwise
#' an error is thrown.
#'
#' \code{bench_tbls}: an object of class
#' \code{\link[microbenchmark]{microbenchmark}}
#' @seealso \code{\link{src_local}} for working with local data
#' @examples
#' \dontrun{
#' if (require("microbenchmark") && has_lahman()) {
#' lahman_local <- lahman_srcs("df", "sqlite")
#' teams <- lapply(lahman_local, function(x) x %>% tbl("Teams"))
#'
#' compare_tbls(teams, function(x) x %>% filter(yearID == 2010))
#' bench_tbls(teams, function(x) x %>% filter(yearID == 2010))
#'
#' # You can also supply arbitrary additional arguments to bench_tbls
#' # if there are other operations you'd like to compare.
#' bench_tbls(teams, function(x) x %>% filter(yearID == 2010),
#' base = subset(Lahman::Teams, yearID == 2010))
#'
#' # A more complicated example using multiple tables
#' setup <- function(src) {
#' list(
#' src %>% tbl("Batting") %>% filter(stint == 1) %>% select(playerID:H),
#' src %>% tbl("Master") %>% select(playerID, birthYear)
#' )
#' }
#' two_tables <- lapply(lahman_local, setup)
#'
#' op <- function(tbls) {
#' semi_join(tbls[[1]], tbls[[2]], by = "playerID")
#' }
#' # compare_tbls(two_tables, op)
#' bench_tbls(two_tables, op, times = 2)
#'
#' }
#' }
#' @name bench_compare
NULL
#' @export
#' @rdname bench_compare
bench_tbls <- function(tbls, op, ..., times = 10) {
if (!requireNamespace("microbenchmark")) {
stop("Please install the microbenchmark package", call. = FALSE)
}
# Generate call to microbenchmark function that evaluates op for each tbl
calls <- lapply(seq_along(tbls), function(i) {
substitute(op(tbls[[i]]), list(i = i))
})
names(calls) <- names(tbls)
mb <- as.call(c(quote(microbenchmark::microbenchmark), calls, dots(...),
list(times = times)))
eval(mb)
}
#' @export
#' @rdname bench_compare
compare_tbls <- function(tbls, op, ref = NULL, compare = equal_data_frame, ...) {
if (length(tbls) < 2 && is.null(ref)) {
testthat::skip("Need at least two srcs to compare")
}
if (!requireNamespace("testthat", quietly = TRUE)) {
stop("Please install the testthat package", call. = FALSE)
}
results <- eval_tbls(tbls, op)
if (is.null(ref)) {
ref <- results[[1]]
ref_name <- names(results)[1]
rest <- results[-1]
} else {
rest <- results
ref_name <- "supplied comparison"
}
for(i in seq_along(rest)) {
ok <- compare(ref, rest[[i]], ...)
# if (!ok) browser()
msg <- paste0(names(rest)[[i]], " not equal to ", ref_name, "\n",
attr(ok, "comment"))
testthat::expect_true(ok, info = msg)
}
invisible(TRUE)
}
#' @export
#' @rdname bench_compare
eval_tbls <- function(tbls, op) {
lapply(tbls, function(x) as.data.frame(op(x)))
}