forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
227 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,3 +36,5 @@ Collate: | |
'manip-source.r' | ||
'manip.r' | ||
'manip-sqlite.r' | ||
'desc.r' | ||
'manip-data-frame.r' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#' Descending order. | ||
#' | ||
#' Transform a vector into a format that will be sorted in descending order. | ||
#' | ||
#' @param x vector to transform | ||
#' @export | ||
#' @examples | ||
#' desc(1:10) | ||
#' desc(factor(letters)) | ||
#' first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") | ||
#' desc(first_day) | ||
desc <- function(x) -xtfrm(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#' Data manipulation for data frames. | ||
#' | ||
#' @examples | ||
#' data("baseball", package = "plyr") | ||
# | ||
#' filter(baseball, year > 2005, g > 130) | ||
#' head(select(baseball, id:team)) | ||
#' summarise(baseball, g = mean(g), n = count()) | ||
#' head(mutate(baseball, rbi = 1.0 * r / ab)) | ||
#' head(arrange(baseball, id, desc(year))) | ||
#' | ||
#' @name manip_data_frame | ||
NULL | ||
|
||
#' @rdname manip_data_frame | ||
#' @export | ||
#' @method filter data.frame | ||
filter.data.frame <- function(.data, ...) { | ||
conds <- dots(...) | ||
|
||
r <- vapply(conds, eval, env = .data, enclos = parent.frame(), | ||
FUN.VALUE = logical(nrow(.data))) | ||
|
||
all <- rowSums(r, na.rm = TRUE) == ncol(r) | ||
.data[all, , drop = FALSE] | ||
} | ||
|
||
#' @rdname manip_data_frame | ||
#' @export | ||
#' @method summarise data.frame | ||
summarise.data.frame <- function(.data, ...) { | ||
cols <- named_dots(...) | ||
data_env <- list2env(.data, parent = parent.frame()) | ||
data_env$count <- function() nrow(.data) | ||
|
||
for (col in names(cols)) { | ||
data_env[[col]] <- eval(cols[[col]], data_env) | ||
} | ||
|
||
as_df(mget(names(cols), data_env)) | ||
} | ||
|
||
#' @rdname manip_data_frame | ||
#' @export | ||
#' @method mutate data.frame | ||
mutate.data.frame <- function(.data, ...) { | ||
cols <- named_dots(...) | ||
data_env <- list2env(.data, parent = parent.frame()) | ||
|
||
for(col in names(cols)) { | ||
data_env[[col]] <- eval(cols[[col]], data_env) | ||
} | ||
|
||
out_cols <- union(names(.data), names(cols)) | ||
as_df(mget(out_cols, data_env)) | ||
} | ||
|
||
#' @rdname manip_data_frame | ||
#' @export | ||
#' @method arrange data.frame | ||
arrange.data.frame <- function(.data, ...) { | ||
r <- eval(substitute(order(...)), .data, parent.frame()) | ||
if(length(r) != nrow(.data)) { | ||
stop("Ordering vectors not the same length as data", call. = FALSE) | ||
} | ||
.data[r, , drop = FALSE] | ||
} | ||
|
||
#' @rdname manip_data_frame | ||
#' @export | ||
#' @method select data.frame | ||
select.data.frame <- function(.data, ...) { | ||
nm <- names(.data) | ||
nm_env <- as.list(setNames(seq_along(nm), nm)) | ||
|
||
idx <- unlist(lapply(dots(...), eval, nm_env, parent.frame())) | ||
select <- nm[idx] | ||
|
||
.data[, nm[idx], drop = FALSE] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# What's the fastest way to get a subset of the colums in | ||
# a data frame into an environment? | ||
|
||
# list2env is extremely fast so - 20 µs for 200 cols | ||
# and doesn't seem to grow with underlying data | ||
# so don't worry about it - just use list2env | ||
|
||
# Just copy them all | ||
l1 <- function(df, cols) { | ||
list2env(df, parent = emptyenv()) | ||
} | ||
|
||
# Subset, then copy | ||
l2 <- function(df, cols) { | ||
list2env(df[cols], parent = emptyenv()) | ||
} | ||
|
||
# Copy by hand | ||
l3 <- function(df, cols) { | ||
env <- new.env(parent = emptyenv()) | ||
for (col in cols) { | ||
env[[col]] <- .subset2(df, col) | ||
} | ||
env | ||
} | ||
|
||
|
||
library(microbenchmark) | ||
options(digits = 3) | ||
data(baseball, package = "plyr") | ||
|
||
all <- names(baseball) | ||
print(microbenchmark( | ||
l1(baseball, all), | ||
l2(baseball, all), | ||
l3(baseball, all) | ||
)) | ||
|
||
long <- do.call("rbind", replicate(10, baseball, simplify = FALSE)) | ||
print(microbenchmark( | ||
l1(long, all), | ||
l3(long, all), | ||
l1(long, c("id", "sf")), | ||
l3(long, c("id", "sf")) | ||
)) | ||
|
||
wide <- do.call("cbind", replicate(10, baseball, simplify = FALSE)) | ||
print(microbenchmark( | ||
l1(wide, all), | ||
l3(wide, all), | ||
l1(wide, c("id", "sf")), | ||
l3(wide, c("id", "sf")) | ||
)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
\name{desc} | ||
\alias{desc} | ||
\title{Descending order.} | ||
\usage{ | ||
desc(x) | ||
} | ||
\arguments{ | ||
\item{x}{vector to transform} | ||
} | ||
\description{ | ||
Transform a vector into a format that will be sorted in | ||
descending order. | ||
} | ||
\examples{ | ||
desc(1:10) | ||
desc(factor(letters)) | ||
first_day <- seq(as.Date("1910/1/1"), as.Date("1920/1/1"), "years") | ||
desc(first_day) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
\name{manip_data_frame} | ||
\alias{arrange.data.frame} | ||
\alias{filter.data.frame} | ||
\alias{manip_data_frame} | ||
\alias{mutate.data.frame} | ||
\alias{select.data.frame} | ||
\alias{summarise.data.frame} | ||
\title{Data manipulation for data frames.} | ||
\usage{ | ||
\method{filter}{data.frame} (.data, ...) | ||
|
||
\method{summarise}{data.frame} (.data, ...) | ||
|
||
\method{mutate}{data.frame} (.data, ...) | ||
|
||
\method{arrange}{data.frame} (.data, ...) | ||
|
||
\method{select}{data.frame} (.data, ...) | ||
} | ||
\description{ | ||
Data manipulation for data frames. | ||
} | ||
\examples{ | ||
data("baseball", package = "plyr") | ||
filter(baseball, year > 2005, g > 130) | ||
head(select(baseball, id:team)) | ||
summarise(baseball, g = mean(g), n = count()) | ||
head(mutate(baseball, rbi = 1.0 * r / ab)) | ||
head(arrange(baseball, id, desc(year))) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters