-
Notifications
You must be signed in to change notification settings - Fork 56
/
Copy pathhelper-margins.r
83 lines (71 loc) · 2.57 KB
/
helper-margins.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#' Figure out margining variables.
#'
#' Given the variables that form the rows and columns, and a set of desired
#' margins, works out which ones are possible. Variables that can't be
#' margined over are dropped silently.
#'
#' @param vars a list of character vectors giving the variables in each
#' dimension
#' @param margins a character vector of variable names to compute margins for.
#' \code{TRUE} will compute all possible margins.
#' @keywords manip internal
#' @return list of margining combinations, or \code{NULL} if none. These are
#' the combinations of variables that should have their values set to
#' \code{(all)}
margins <- function(vars, margins = NULL) {
if (is.null(margins) || identical(margins, FALSE)) return(NULL)
all_vars <- unlist(vars)
if (isTRUE(margins)) {
margins <- all_vars
}
# Start by grouping margins by dimension
dims <- lapply(vars, intersect, margins)
# Next, ensure high-level margins include lower-levels
dims <- mapply(function(vars, margin) {
lapply(margin, downto, vars)
}, vars, dims, SIMPLIFY = FALSE, USE.NAMES = FALSE)
# Finally, find intersections across all dimensions
seq_0 <- function(x) c(0, seq_along(x))
indices <- expand.grid(lapply(dims, seq_0), KEEP.OUT.ATTRS = FALSE)
# indices <- indices[rowSums(indices) > 0, ]
lapply(seq_len(nrow(indices)), function(i){
unlist(mapply("[", dims, indices[i, ], SIMPLIFY = FALSE))
})
}
upto <- function(a, b) {
b[seq_len(match(a, b, nomatch = 0))]
}
downto <- function(a, b) {
rev(upto(a, rev(b)))
}
#' Add margins to a data frame.
#'
#' Rownames are silently stripped. All margining variables will be converted
#' to factors.
#'
#' @param df input data frame
#' @param vars a list of character vectors giving the variables in each
#' dimension
#' @param margins a character vector of variable names to compute margins for.
#' \code{TRUE} will compute all possible margins.
#' @export
add_margins <- function(df, vars, margins = TRUE) {
margin_vars <- margins(vars, margins)
# Return data frame if no margining necessary
if (length(margin_vars) == 0) return(df)
# Prepare data frame for addition of margins
addAll <- function(x) {
x <- addNA(x, TRUE)
factor(x, levels = c(levels(x), "(all)"), exclude = NULL)
}
vars <- unique(unlist(margin_vars))
df[vars] <- lapply(df[vars], addAll)
rownames(df) <- NULL
# Loop through all combinations of margin variables, setting
# those variables to (all)
margin_dfs <- llply(margin_vars, function(vars) {
df[vars] <- rep(list(factor("(all)")), length(vars))
df
})
rbind.fill(margin_dfs)
}