-
Notifications
You must be signed in to change notification settings - Fork 61
/
Copy pathdifference_join.R
76 lines (63 loc) · 2.32 KB
/
difference_join.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#' Join two tables based on absolute difference between their columns
#'
#' @param x A tbl
#' @param y A tbl
#' @param by Columns by which to join the two tables
#' @param max_dist Maximum distance to use for joining
#' @param distance_col If given, will add a column with this
#' name containing the difference between the two
#' @param mode One of "inner", "left", "right", "full" "semi", or "anti"
#'
#' @examples
#'
#' library(dplyr)
#'
#' head(iris)
#' sepal_lengths <- data_frame(Sepal.Length = c(5, 6, 7), Type = 1:3)
#'
#' iris %>%
#' difference_inner_join(sepal_lengths, max_dist = .5)
#'
#' @export
difference_join <- function(x, y, by = NULL, max_dist = 1, mode = "inner",
distance_col = NULL) {
match_fun <- function(v1, v2) {
dist <- abs(v1 - v2)
ret <- data.frame(include = (dist <= max_dist))
if (!is.null(distance_col)) {
ret[[distance_col]] <- dist
}
ret
}
ensure_distance_col(fuzzy_join(x, y, by = by, match_fun = match_fun, mode = mode), distance_col, mode)
}
#' @rdname difference_join
#' @export
difference_inner_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "inner", distance_col = distance_col)
}
#' @rdname difference_join
#' @export
difference_left_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "left", distance_col = distance_col)
}
#' @rdname difference_join
#' @export
difference_right_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "right", distance_col = distance_col)
}
#' @rdname difference_join
#' @export
difference_full_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "full", distance_col = distance_col)
}
#' @rdname difference_join
#' @export
difference_semi_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "semi", distance_col = distance_col)
}
#' @rdname difference_join
#' @export
difference_anti_join <- function(x, y, by = NULL, max_dist = 1, distance_col = NULL) {
difference_join(x, y, by, max_dist = max_dist, mode = "anti", distance_col = distance_col)
}