forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin-vars.R
65 lines (54 loc) · 1.72 KB
/
join-vars.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
join_vars <- function(x_names, y_names, by, suffix = list(x = ".x", y = ".y")) {
# Record position of join keys
idx <- get_join_var_idx(x_names, y_names, by)
x_names_by <- x_names[idx$x$by]
x_names_aux <- x_names[idx$x$aux]
y_names_aux <- y_names[idx$y$aux]
# Add suffix where needed
x_new <- x_names
x_new[idx$x$aux] <- add_suffixes(x_names_aux, c(x_names_by, y_names_aux), suffix$x)
y_new <- add_suffixes(y_names_aux, x_names, suffix$y)
x_x <- seq_along(x_names)
x_y <- idx$y$by[match(x_names, by$x)]
y_x <- rep_along(idx$y$aux, NA)
y_y <- seq_along(idx$y$aux)
# Return a list with 3 parallel vectors
# At each position, values in the 3 vectors represent
# alias - name of column in join result
# x - position of column from left table or NA if only from right table
# y - position of column from right table or NA if only from left table
ret <- list(alias = c(x_new, y_new), x = c(x_x, y_x), y = c(x_y, y_y))
# In addition, the idx component contains indices of "by" and "aux" variables
# for x and y, respectively (see get_join_var_idx())
ret$idx <- idx
ret
}
get_join_var_idx <- function(x_names, y_names, by) {
x_idx <- get_by_aux(x_names, by$x)
y_idx <- get_by_aux(y_names, by$y)
list(x = x_idx, y = y_idx)
}
get_by_aux <- function(names, by) {
if (length(by) > 0) {
by <- match(by, names)
aux <- seq_along(names)[-by]
} else {
by <- integer()
aux <- seq_along(names)
}
list(by = by, aux = aux)
}
add_suffixes <- function(x, y, suffix) {
if (identical(suffix, "")) {
return(x)
}
out <- chr_along(x)
for (i in seq_along(x)) {
nm <- x[[i]]
while (nm %in% y || nm %in% out) {
nm <- paste0(nm, suffix)
}
out[[i]] <- nm
}
out
}