forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathid.r
75 lines (64 loc) · 1.99 KB
/
id.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#' Compute a unique numeric id for each unique row in a data frame.
#'
#' Properties:
#' \itemize{
#' \item \code{order(id)} is equivalent to \code{do.call(order, df)}
#' \item rows containing the same data have the same value
#' \item if \code{drop = FALSE} then room for all possibilites
#' }
#'
#' @param .variables list of variables
#' @param drop drop unusued factor levels?
#' @return a numeric vector with attribute n, giving total number of
#' possibilities
#' @keywords internal
#' @export
id <- function(.variables, drop = FALSE) {
warning("id() is deprecated", call. = FALSE)
# Drop all zero length inputs
lengths <- vapply(.variables, length, integer(1))
.variables <- .variables[lengths != 0]
if (length(.variables) == 0) {
n <- nrow(.variables) %||% 0L
return(structure(seq_len(n), n = n))
}
# Special case for single variable
if (length(.variables) == 1) {
return(id_var(.variables[[1]], drop = drop))
}
# Calculate individual ids
ids <- rev(lapply(.variables, id_var, drop = drop))
p <- length(ids)
# Calculate dimensions
ndistinct <- vapply(ids, attr, "n", FUN.VALUE = numeric(1),
USE.NAMES = FALSE)
n <- prod(ndistinct)
if (n > 2 ^ 31) {
# Too big for integers, have to use strings, which will be much slower :(
char_id <- do.call("paste", c(ids, sep = "\r"))
res <- match(char_id, unique(char_id))
} else {
combs <- c(1, cumprod(ndistinct[-p]))
mat <- do.call("cbind", ids)
res <- c((mat - 1L) %*% combs + 1L)
}
attr(res, "n") <- n
if (drop) {
id_var(res, drop = TRUE)
} else {
structure(as.integer(res), n = attr(res, "n"))
}
}
id_var <- function(x, drop = FALSE) {
if (length(x) == 0) return(structure(integer(), n = 0L))
if (!is.null(attr(x, "n")) && !drop) return(x)
if (is.factor(x) && !drop) {
id <- as.integer(addNA(x, ifany = TRUE))
n <- length(levels(x))
} else {
levels <- sort(unique(x), na.last = TRUE)
id <- match(x, levels)
n <- max(id)
}
structure(id, n = n)
}