forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbind.r
160 lines (154 loc) · 4.43 KB
/
bind.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#' Efficiently bind multiple data frames by row and column
#'
#' This is an efficient implementation of the common pattern of
#' `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many
#' data frames into one.
#'
#' The output of `bind_rows()` will contain a column if that column
#' appears in any of the inputs.
#'
#' @section Deprecated functions:
#' `rbind_list()` and `rbind_all()` have been deprecated. Instead use
#' `bind_rows()`.
#'
#' @param ... Data frames to combine.
#'
#' Each argument can either be a data frame, a list that could be a data
#' frame, or a list of data frames.
#'
#' When row-binding, columns are matched by name, and any missing
#' columns will be filled with NA.
#'
#' When column-binding, rows are matched by position, so all data
#' frames must have the same number of rows. To match by value, not
#' position, see [join].
#' @param .id Data frame identifier.
#'
#' When `.id` is supplied, a new column of identifiers is
#' created to link each row to its original data frame. The labels
#' are taken from the named arguments to `bind_rows()`. When a
#' list of data frames is supplied, the labels are taken from the
#' names of the list. If no names are found a numeric sequence is
#' used instead.
#' @return `bind_rows()` and `bind_cols()` return the same type as
#' the first input, either a data frame, `tbl_df`, or `grouped_df`.
#' @aliases rbind_all rbind_list
#' @examples
#' one <- mtcars[1:4, ]
#' two <- mtcars[11:14, ]
#'
#' # You can supply data frames as arguments:
#' bind_rows(one, two)
#'
#' # The contents of lists is automatically spliced:
#' bind_rows(list(one, two))
#' bind_rows(split(mtcars, mtcars$cyl))
#' bind_rows(list(one, two), list(two, one))
#'
#'
#' # In addition to data frames, you can supply vectors. In the rows
#' # direction, the vectors represent rows and should have inner
#' # names:
#' bind_rows(
#' c(a = 1, b = 2),
#' c(a = 3, b = 4)
#' )
#'
#' # You can mix vectors and data frames:
#' bind_rows(
#' c(a = 1, b = 2),
#' data_frame(a = 3:4, b = 5:6),
#' c(a = 7, b = 8)
#' )
#'
#'
#' # Note that for historical reasons, lists containg vectors are
#' # always treated as data frames. Thus their vectors are treated as
#' # columns rather than rows, and their inner names are ignored:
#' ll <- list(
#' a = c(A = 1, B = 2),
#' b = c(A = 3, B = 4)
#' )
#' bind_rows(ll)
#'
#' # You can circumvent that behaviour with explicit splicing:
#' bind_rows(!!!ll)
#'
#'
#' # When you supply a column name with the `.id` argument, a new
#' # column is created to link each row to its original data frame
#' bind_rows(list(one, two), .id = "id")
#' bind_rows(list(a = one, b = two), .id = "id")
#' bind_rows("group 1" = one, "group 2" = two, .id = "groups")
#'
#' # Columns don't need to match when row-binding
#' bind_rows(data.frame(x = 1:3), data.frame(y = 1:4))
#' \dontrun{
#' # Rows do need to match when column-binding
#' bind_cols(data.frame(x = 1), data.frame(y = 1:2))
#' }
#'
#' bind_cols(one, two)
#' bind_cols(list(one, two))
#'
#' # combine applies the same coercion rules
#' f1 <- factor("a")
#' f2 <- factor("b")
#' c(f1, f2)
#' unlist(list(f1, f2))
#'
#' combine(f1, f2)
#' combine(list(f1, f2))
#' @name bind
NULL
#' @export
#' @rdname bind
bind_rows <- function(..., .id = NULL) {
x <- flatten_bindable(dots_values(...))
if (!length(x)) {
# Handle corner cases gracefully, but always return a tibble
if (inherits(x, "data.frame")) {
return(x)
} else {
return(tibble())
}
}
if (!is_null(.id)) {
if (!(is_string(.id))) {
bad_args(".id", "must be a scalar string, ",
"not {type_of(.id)} of length {length(.id)}"
)
}
if (!all(have_name(x) | map_lgl(x, is_empty))) {
x <- compact(x)
names(x) <- seq_along(x)
}
}
bind_rows_(x, .id)
}
#' @export
#' @rdname bind
bind_cols <- function(...) {
x <- flatten_bindable(dots_values(...))
out <- cbind_all(x)
tibble::repair_names(out)
}
#' @description
#' `combine()` acts like [c()] or
#' [unlist()] but uses consistent dplyr coercion rules.
#'
#' @details
#' If `combine()` it is called with exactly one list argument, the list is
#' simplified (similarly to `unlist(recursive = FALSE)`. `NULL` arguments are
#' ignored. If the result is empty, `logical()` is returned.
#'
#' @export
#' @rdname bind
combine <- function(...) {
args <- list(...)
if (length(args) == 1 && is.list(args[[1]])) {
combine_all(args[[1]])
} else {
combine_all(args)
}
}