Skip to content

Commit

Permalink
Fix whitespace and re-document
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Jan 28, 2014
1 parent caf6b8f commit e224b3a
Show file tree
Hide file tree
Showing 44 changed files with 561 additions and 558 deletions.
12 changes: 6 additions & 6 deletions R/all-equal.r
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Provide a useful implementation of all.equal for data.frames.
#'
#'
#' @param target,current two data frames to compare
#' @param ignore_col_order should order of columns be ignored?
#' @param ignore_row_order should order of rows be ignored?
Expand All @@ -13,19 +13,19 @@
#' @export
#' @examples
#' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))]
#'
#'
#' # By default, ordering of rows and columns ignored
#' all.equal(mtcars, scramble(mtcars))
#'
#'
#' # But those can be overriden if desired
#' all.equal(mtcars, scramble(mtcars), ignore_col_order = FALSE)
#' all.equal(mtcars, scramble(mtcars), ignore_row_order = FALSE)
all.equal.data.frame <- function(target, current, ignore_col_order = TRUE,
all.equal.data.frame <- function(target, current, ignore_col_order = TRUE,
ignore_row_order = TRUE, convert = FALSE, ...) {

res <- equal_data_frame(target, current, ignore_col_order = ignore_col_order,
ignore_row_order = ignore_row_order, convert = convert)

if (res) {
TRUE
} else {
Expand Down
48 changes: 24 additions & 24 deletions R/bench-compare.r
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
#' Evaluate, compare, benchmark operations of a set of srcs.
#'
#'
#' These functions support the comparison of results and timings across
#' multiple sources.
#'
#'
#' @param tbls A list of \code{\link{tbl}}s.
#' @param op A function with a single argument, called often with each
#' element of \code{tbls}.
#' @param ref For checking, an data frame to test results against. If not
#' @param ref For checking, an data frame to test results against. If not
#' supplied, defaults to the results from the first \code{src}.
#' @param compare A function used to compare the results. Defaults to
#' @param compare A function used to compare the results. Defaults to
#' \code{equal_data_frame} which ignores the order of rows and columns.
#' @param times For benchmarking, the number of times each operation is
#' @param times For benchmarking, the number of times each operation is
#' repeated.
#' @param \dots
#' For \code{compare_tbls}: additional parameters passed on the
#' @param \dots
#' For \code{compare_tbls}: additional parameters passed on the
#' \code{compare} function
#'
#'
#' For \code{bench_tbls}: additional benchmarks to run.
#' @return
#' @return
#' \code{eval_tbls}: a list of data frames.
#'
#'
#' \code{compare_tbls}: an invisible \code{TRUE} on success, otherwise
#' an error is thrown.
#'
#' \code{bench_tbls}: an object of class
#'
#' \code{bench_tbls}: an object of class
#' \code{\link[microbenchmark]{microbenchmark}}
#' @seealso \code{\link{src_local}} for working with local data
#' @examples
#' if (require("Lahman") && require("microbenchmark")) {
#' lahman_local <- lahman_srcs("df", "dt", "cpp")
#' teams <- lapply(lahman_local, function(x) x %.% tbl("Teams"))
#'
#'
#' compare_tbls(teams, function(x) x %.% filter(yearID == 2010))
#' bench_tbls(teams, function(x) x %.% filter(yearID == 2010))
#'
#'
#' # You can also supply arbitrary additional arguments to bench_tbls
#' # if there are other operations you'd like to compare.
#' bench_tbls(teams, function(x) x %.% filter(yearID == 2010),
#' base = subset(Teams, yearID == 2010))
#'
#'
#' # A more complicated example using multiple tables
#' setup <- function(src) {
#' list(
Expand All @@ -47,13 +47,13 @@
#' )
#' }
#' two_tables <- lapply(lahman_local, setup)
#'
#'
#' op <- function(tbls) {
#' semi_join(tbls[[1]], tbls[[2]], by = "playerID")
#' }
#' # compare_tbls(two_tables, op)
#' bench_tbls(two_tables, op, times = 2)
#'
#'
#' }
#' @name bench_compare
NULL
Expand All @@ -70,8 +70,8 @@ bench_tbls <- function(tbls, op, ..., times = 10) {
substitute(op(tbls[[i]]), list(i = i))
})
names(calls) <- names(tbls)
mb <- as.call(c(quote(microbenchmark), calls, dots(...),

mb <- as.call(c(quote(microbenchmark), calls, dots(...),
list(times = times)))
eval(mb)
}
Expand All @@ -85,9 +85,9 @@ compare_tbls <- function(tbls, op, ref = NULL, compare = equal_data_frame, ...)
if (!require("testthat")) {
stop("Please install the testthat package", call. = FALSE)
}

results <- eval_tbls(tbls, op)

if (is.null(ref)) {
ref <- results[[1]]
ref_name <- names(results)[1]
Expand All @@ -96,15 +96,15 @@ compare_tbls <- function(tbls, op, ref = NULL, compare = equal_data_frame, ...)
rest <- results
ref_name <- "supplied comparison"
}

for(i in seq_along(rest)) {
ok <- compare(ref, rest[[i]], ...)
# if (!ok) browser()
msg <- paste0(names(rest)[[i]], " not equal to ", ref_name, "\n",
attr(ok, "comment"))
expect_true(ok, info = msg)
expect_true(ok, info = msg)
}

invisible(TRUE)
}

Expand Down
44 changes: 22 additions & 22 deletions R/chain.r
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#' Chain together multiple operations.
#'
#'
#' The downside of the functional nature of dplyr is that when you combine
#' multiple data manipulation operations, you have to read from the inside
#' out and the arguments may be very distant to the function call. These
#' out and the arguments may be very distant to the function call. These
#' functions providing an alternative way of calling dplyr (and other data
#' manipulation) functions that you read can from left to right.
#'
#'
#' The functions work via simple substitution so that \code{chain(x, f(y))} or
#' \code{x \%.\% f(y)} is translated into \code{f(x, y)}.
#'
#'
#' @param x,y A dataset and function to apply to it
#' @param ...,calls A sequence of data transformations, starting with a dataset.
#' The first argument of each call should be omitted - the value of the
#' previous step will be substituted in automatically. Use \code{chain} and
#' previous step will be substituted in automatically. Use \code{chain} and
#' \code{...} when working interactive; use \code{chain_q} and \code{calls}
#' when calling from another function.
#' @param env Environment in which to evaluation expressions. In ordinary
Expand All @@ -23,35 +23,35 @@
#' # If you're performing many operations you can either do step by step
#' a1 <- group_by(hflights, Year, Month, DayofMonth)
#' a2 <- select(a1, Year:DayofMonth, ArrDelay, DepDelay)
#' a3 <- summarise(a2,
#' arr = mean(ArrDelay, na.rm = TRUE),
#' a3 <- summarise(a2,
#' arr = mean(ArrDelay, na.rm = TRUE),
#' dep = mean(DepDelay, na.rm = TRUE))
#' a4 <- filter(a3, arr > 30 | dep > 30)
#'
#' # If you don't want to save the intermediate results, you need to
#'
#' # If you don't want to save the intermediate results, you need to
#' # wrap the functions:
#' filter(
#' summarise(
#' select(
#' group_by(hflights, Year, Month, DayofMonth),
#' group_by(hflights, Year, Month, DayofMonth),
#' Year:DayofMonth, ArrDelay, DepDelay
#' ),
#' arr = mean(ArrDelay, na.rm = TRUE),
#' ),
#' arr = mean(ArrDelay, na.rm = TRUE),
#' dep = mean(DepDelay, na.rm = TRUE)
#' ),
#' ),
#' arr > 30 | dep > 30
#' )
#'
#'
#' # This is difficult to read because the order of the operations is from
#' # inside to out, and the arguments are a long way away from the function.
#' # Alternatively you can use chain or %.% to sequence the operations
#' # linearly:
#'
#'
#' hflights %.%
#' group_by(Year, Month, DayofMonth) %.%
#' select(Year:DayofMonth, ArrDelay, DepDelay) %.%
#' summarise(
#' arr = mean(ArrDelay, na.rm = TRUE),
#' arr = mean(ArrDelay, na.rm = TRUE),
#' dep = mean(DepDelay, na.rm = TRUE)
#' ) %.%
#' filter(arr > 30 | dep > 30)
Expand All @@ -61,7 +61,7 @@
#' group_by(Year, Month, DayofMonth),
#' select(Year:DayofMonth, ArrDelay, DepDelay),
#' summarise(
#' arr = mean(ArrDelay, na.rm = TRUE),
#' arr = mean(ArrDelay, na.rm = TRUE),
#' dep = mean(DepDelay, na.rm = TRUE)
#' ),
#' filter(arr > 30 | dep > 30)
Expand All @@ -76,19 +76,19 @@ chain <- function(..., env = parent.frame()) {
chain_q <- function(calls, env = parent.frame()) {
if (length(calls) == 0) return()
if (length(calls) == 1) return(eval(calls[[1]], env))
# New environemnt for evalution - inherits from parent frame, and

# New environemnt for evalution - inherits from parent frame, and
# contains unusually named (to avoid conflicts) variable to represent
# result of previous computation
e <- new.env(parent = env)
e$`__prev` <- eval(calls[[1]], env)

for(call in calls[-1]) {
new_call <- as.call(c(call[[1]], quote(`__prev`), as.list(call[-1])))
e$`__prev` <- eval(new_call, e)
}
e$`__prev`

e$`__prev`
}

#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/compute-collect.r
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ collapse.tbl_sql <- function(x, vars = NULL, ...) {
nms <- auto_names(x$select)
vars <- lapply(nms, as.name)
}

update(tbl(x$src, x$query$sql, vars = vars, ...), group_by = groups(x))
}

Expand Down
38 changes: 19 additions & 19 deletions R/copy-to.r
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' Copy a local data frame to a remote src.
#'
#'
#' This uploads a local data frame into a remote data source, creating the
#' table definition as needed. Wherever possible, the new object will be
#' temporary, limited to the current connection to the source.
#'
#'
#' @param dest remote data source
#' @param df local data frame
#' @param name name for new remote table.
Expand All @@ -15,37 +15,37 @@ copy_to <- function(dest, df, name = deparse(substitute(df)), ...) {
}

#' Copy a local data fram to a sqlite src.
#'
#'
#' This standard method works for all sql sources.
#'
#'
#' @export
#' @param types a character vector giving variable types to use for the columns.
#' See \url{http://www.sqlite.org/datatype3.html} for available types.
#' @param temporary if \code{TRUE}, will create a temporary table that is
#' local to this connection and will be automatically deleted when the
#' connection expires
#' @param indexes a list of character vectors. Each element of the list
#' @param indexes a list of character vectors. Each element of the list
#' will create a new index.
#' @param analyze if \code{TRUE} (the default), will automatically ANALYZE the
#' new table so that the query optimiser has useful information.
#' @inheritParams copy_to
#' @return a sqlite \code{\link{tbl}} object
#' @examples
#' db <- src_sqlite(tempfile(), create = TRUE)
#' db <- src_sqlite(tempfile(), create = TRUE)
#'
#' iris2 <- copy_to(db, iris)
#' mtcars$model <- rownames(mtcars)
#' mtcars2 <- copy_to(db, mtcars, indexes = list("model"))
#'
#'
#' explain(filter(mtcars2, model == "Hornet 4 Drive"))
#'
#' # Note that tables are temporary by default, so they're not
#' # Note that tables are temporary by default, so they're not
#' # visible from other connections to the same database.
#' src_tbls(db)
#' db2 <- src_sqlite(db$path)
#' src_tbls(db2)
copy_to.src_sql <- function(dest, df, name = deparse(substitute(df)),
types = NULL, temporary = TRUE, indexes = NULL,
copy_to.src_sql <- function(dest, df, name = deparse(substitute(df)),
types = NULL, temporary = TRUE, indexes = NULL,
analyze = TRUE, ...) {
assert_that(is.data.frame(df), is.string(name), is.flag(temporary))
if (isTRUE(db_has_table(dest$con, name))) {
Expand All @@ -54,38 +54,38 @@ copy_to.src_sql <- function(dest, df, name = deparse(substitute(df)),

types <- types %||% db_data_type(dest$con, df)
names(types) <- names(df)

con <- dest$con

sql_begin_trans(con)
sql_create_table(con, name, types, temporary = temporary)
sql_insert_into(con, name, df)
sql_create_indexes(con, name, indexes)
if (analyze) sql_analyze(con, name)
sql_commit(con)

tbl(dest, name)
}

#' @export
copy_to.src_bigquery <- function(dest, df, name = deparse(substitute(df)), ...) {
job <- insert_upload_job(dest$con$project, dest$con$dataset, name, df,
job <- insert_upload_job(dest$con$project, dest$con$dataset, name, df,
billing = dest$con$billing)
wait_for(job)

tbl(dest, name)
}

auto_copy <- function(x, y, copy = FALSE, ...) {
if (same_src(x, y)) return(y)

if (!copy) {
stop("x and y don't share the same src. Set copy = TRUE to copy y into ",
stop("x and y don't share the same src. Set copy = TRUE to copy y into ",
"x's source (this may be time consuming).", call. = FALSE)
}

UseMethod("auto_copy")
}
}

#' @export
auto_copy.tbl_sql <- function(x, y, copy = FALSE, ...) {
Expand Down
6 changes: 3 additions & 3 deletions R/data-hflights.r
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Database versions of the hflights data
#'
#' These functions cache the data from the \code{hflights} database in
#'
#' These functions cache the data from the \code{hflights} database in
#' a local database, for use in examples and vignettes.
#'
#'
#' @keywords internal
#' @name hflights_df
NULL
Expand Down
Loading

0 comments on commit e224b3a

Please sign in to comment.