forked from microbiome/mia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathestimateDominance.R
98 lines (88 loc) · 2.85 KB
/
estimateDominance.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
.gini_dominance <- function(x, w=rep(1, length(x))) {
# See also reldist::gini for an independent implementation
x <- as.vector(x)
o <- order(x)
x <- x[o]
w <- w[o]/sum(w)
p <- cumsum(w)
nu <- cumsum(w * x)
n <- length(nu)
nu <- nu/nu[[n]]
sum(nu[-1] * p[-n]) - sum(nu[-n] * p[-1])
}
.calc_gini_dominance <- function(mat, ...){
apply(mat, 2L, .gini_dominance)
}
.calc_core_dominance <- function(mat, ...){
getPrevalentAbundance(mat, detection = 0, as.relative = TRUE)
}
.calc_dominance <- function(mat, index, ntaxa = 1L, aggregate = TRUE, ...){
# Check ntaxa
if(!(.is_an_integer(ntaxa) && ntaxa>0 && ntaxa<3)){
stop("'ntaxa' must be a numerical value 1 or 2.", call. = FALSE)
}
# Check aggregate
if(!.is_a_bool(aggregate)){
stop("'aggregate' must be TRUE or FALSE.", call. = FALSE)
}
#
if (index == "absolute") {
# ntaxa=1 by default but can be tuned
as_relative <- FALSE
} else if (index == "relative") {
# ntaxa=1 by default but can be tuned
as_relative <- TRUE
} else if (index == "dbp") {
# Berger-Parker: if selected fix the following values
ntaxa <- 1
as_relative <- TRUE
} else if (index == "dmn") {
# McNaughton's dominance: if selected fix the following values
ntaxa <- 2
aggregate <- TRUE
as_relative <- TRUE
}
if (as_relative) {
# Calculates the relative abundance per sample
mat <- .calc_rel_abund(mat)
}
# Aggregate or not
if (!aggregate) {
idx <- apply(mat, 2L,
function(mc) {
order(as.vector(mc), decreasing = TRUE)[[ntaxa]]
})
} else {
idx <- apply(mat, 2L,
function(mc) {
order(as.vector(mc), decreasing = TRUE)[seq_len(ntaxa)]
})
idx <- split(as.vector(idx),
unlist(lapply(seq_len(length(idx) / ntaxa),rep.int,ntaxa)))
}
ans <- lapply(mapply(function(i,j,x){x[i,j]},
i = idx,
j = seq_len(ncol(mat)),
MoreArgs = list(x = mat),
SIMPLIFY = FALSE),
sum)
ans <- unlist(ans)
# Adds sample names to the table
names(ans) <- colnames(mat)
ans
}
.get_dominance_values <- function(
index, mat, ntaxa = 1, aggregate = TRUE, ...) {
FUN <- switch(index,
simpson_lambda = .simpson_lambda,
core_abundance = .calc_core_dominance,
gini = .calc_gini_dominance,
absolute = .calc_dominance,
relative = .calc_dominance,
dbp = .calc_dominance,
dmn = .calc_dominance
)
res <- FUN(index, mat = mat, ntaxa = ntaxa, aggregate = aggregate, ...)
res <- unname(res)
return(res)
}