forked from microbiome/mia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetCrossAssociation.Rd
323 lines (272 loc) · 12.3 KB
/
getCrossAssociation.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/getCrossAssociation.R
\name{getCrossAssociation}
\alias{getCrossAssociation}
\alias{getCrossAssociation,MultiAssayExperiment-method}
\alias{getCrossAssociation,SummarizedExperiment-method}
\title{Calculate correlations between features of two experiments.}
\usage{
getCrossAssociation(x, ...)
\S4method{getCrossAssociation}{MultiAssayExperiment}(
x,
experiment1 = 1,
experiment2 = 2,
assay.type1 = assay_name1,
assay_name1 = NULL,
assay.type2 = assay_name2,
assay_name2 = NULL,
altexp1 = NULL,
altexp2 = NULL,
col.var1 = colData_variable1,
colData_variable1 = NULL,
col.var2 = colData_variable2,
colData_variable2 = NULL,
by = MARGIN,
MARGIN = 1,
method = "kendall",
mode = "table",
p.adj.method = p_adj_method,
p_adj_method = c("fdr", "BH", "bonferroni", "BY", "hochberg", "holm", "hommel", "none"),
p.adj.threshold = p_adj_threshold,
p_adj_threshold = NULL,
cor.threshold = cor_threshold,
cor_threshold = NULL,
sort = FALSE,
filter.self.cor = filter_self_correlations,
filter_self_correlations = FALSE,
verbose = TRUE,
test.signif = test_significance,
test_significance = FALSE,
show.warnings = show_warnings,
show_warnings = TRUE,
paired = FALSE,
...
)
\S4method{getCrossAssociation}{SummarizedExperiment}(x, experiment2 = x, ...)
}
\arguments{
\item{x}{A
\code{\link[MultiAssayExperiment:MultiAssayExperiment-class]{MultiAssayExperiment}}
or
\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}
object.}
\item{...}{Additional arguments:
\itemize{
\item \code{symmetric}: \code{Logical scalar}. Specifies if
measure is symmetric or not. When \code{symmetric = TRUE}, associations
are calculated only for unique variable-pairs, and they are assigned to
corresponding variable-pair. This decreases the number of calculations in
2-fold meaning faster execution. (By default: \code{FALSE})
\item \code{association.fun}: A function that is used to calculate
(dis-)similarity between features. Function must take matrix as an input
and give numeric values as an output. Adjust \code{method} and other
parameters correspondingly. Supported functions are, for example,
\code{stats::dist} and \code{vegan::vegdist}.
\item \code{dimred1} \code{Character scalar} or \code{numeric scalar}.
Specifies reduced dimensionality from the \code{reducedDim} of experiment
\enumerate{
\item (Default: \code{NULL})
}
\item \code{dimred2} \code{Character scalar} or \code{numeric scalar}.
Specifies reduced dimensionality from the \code{reducedDim} of experiment
2. (Default: \code{NULL})
}}
\item{experiment1}{\code{Character scalar} or \code{numeric scalar}.
Selects the experiment 1 from \code{experiments(x)} of
\code{MultiassayExperiment} object. (Default: \code{1})}
\item{experiment2}{\code{Character scalar} or \code{numeric scalar}.
Selects the experiment 2 from\code{experiments(x)} of
\code{MultiAssayExperiment} object or
\code{altExp(x)} of \code{TreeSummarizedExperiment} object. Alternatively,
\code{experiment2} can also be \code{TreeSE} object when \code{x} is
\code{TreeSE} object. (Default: \code{2} when \code{x} is \code{MAE} and
\code{x} when \code{x} is \code{TreeSE})}
\item{assay.type1}{\code{Character scalar}. Specifies the name of the assay
in experiment 1 to be transformed. (Default: \code{NULL})}
\item{assay_name1}{Deprecated. Use \code{assay.type1} instead.}
\item{assay.type2}{\code{Character scalar}. Specifies the name of the
assay in experiment 2 to be transformed. (Default: \code{NULL})}
\item{assay_name2}{Deprecated. Use \code{assay.type2} instead.}
\item{altexp1}{\code{Character scalar} or \code{numeric scalar}. Specifies
alternative experiment from the \code{altExp} of experiment 1. If NULL, then
the experiment is itself and altExp option is disabled.
(Default: \code{NULL})}
\item{altexp2}{\code{Character scalar} or \code{numeric scalar}. Specifies
alternative experiment from the \code{altExp} of experiment 2. If NULL, then
the experiment is itself and altExp option is disabled.
(Default: \code{NULL})}
\item{col.var1}{\code{Character scalar}. Specifies column(s) from
\code{colData} of experiment 1. If col.var1 is used, assay.type1 is disabled.
(Default: \code{NULL})}
\item{colData_variable1}{Deprecated. Use \code{col.var1} instead.}
\item{col.var2}{\code{Character scalar}. Specifies column(s) from colData
of experiment 2. If col.var2 is used, assay.type2 is disabled.
(Default: \code{NULL})}
\item{colData_variable2}{Deprecated. Use \code{col.var2} instead.}
\item{by}{A\code{Character scalar}. Determines if association are calculated
row-wise / for features ('rows') or column-wise / for samples ('cols').
Must be \code{'rows'} or \code{'cols'}.}
\item{MARGIN}{Deprecated. Use \code{by} instead.}
\item{method}{\code{Character scalar}. Defines the association method
('kendall', pearson', or 'spearman' for continuous/numeric; 'categorical'
for discrete) (Default: \code{"kendall"})}
\item{mode}{\code{Character scalar}. Specifies the output format
Available formats are 'table' and 'matrix'. (Default: \code{"table"})}
\item{p.adj.method}{\code{Character scalar}. Specifies adjustment method of
p-values. Passed to \code{p.adjust} function.
(Default: \code{"fdr"})}
\item{p_adj_method}{Deprecated. Use \code{p.adj.method} instead.}
\item{p.adj.threshold}{\code{Numeric scalar}. From \code{0 to 1}, specifies
adjusted p-value threshold for filtering.
(Default: \code{NULL})}
\item{p_adj_threshold}{Deprecated. Use \code{p.dj.threshold} instead.}
\item{cor.threshold}{\code{Numeric scalar}. From \code{0 to 1}, specifies
correlation threshold for filtering.
(Default: \code{NULL})}
\item{cor_threshold}{Deprecated. Use \code{cor.threshold} instead.}
\item{sort}{\code{Logical scalar}. Specifies whether to sort features or not
in result matrices. Used method is hierarchical clustering.
(Default: \code{FALSE})}
\item{filter.self.cor}{\code{Logical scalar}. Specifies whether to
filter out correlations between identical items. Applies only when
correlation between experiment itself is tested, i.e., when assays are
identical. (Default: \code{FALSE})}
\item{filter_self_correlations}{Deprecated. Use \code{filter.self.cor}
instead.}
\item{verbose}{\code{Logical scalar}. Specifies whether to get messages
about progress of calculation. (Default: \code{FALSE})}
\item{test.signif}{\code{Logical scalar}. Specifies whether to test
statistical significance of associations.
(Default: \code{FALSE})}
\item{test_significance}{Deprecated. Use \code{test.signif} instead.}
\item{show.warnings}{\code{Logical scalar}. specifies whether to show
warnings that might occur when correlations and p-values are calculated.
(Default: \code{FALSE})}
\item{show_warnings}{Deprecated. use \code{show.warnings} instead.}
\item{paired}{\code{Logical scalar}. Specifies if samples are paired or not.
\code{colnames} must match between twp experiments. \code{paired} is disabled
when \code{by = 1}. (Default: \code{FALSE})}
}
\value{
This function returns associations in table or matrix format. In table
format, returned value is a data frame that includes features and
associations (and p-values) in columns. In matrix format, returned value
is a one matrix when only associations are calculated. If also significances
are tested, then returned value is a list of matrices.
}
\description{
Calculate correlations between features of two experiments.
}
\details{
The function \code{getCrossAssociation} calculates associations between
features of two experiments. By default, it not only computes associations
but also tests their significance. If desired, setting
\code{test.signif} to FALSE disables significance calculation.
We recommend the non-parametric Kendall's tau as the default method for
association analysis. Kendall's tau has desirable statistical properties and
robustness at lower sample sizes. Spearman rank correlation can provide
faster solutions when running times are critical.
}
\examples{
data(HintikkaXOData)
mae <- HintikkaXOData
# Subset so that less observations / quicker to run, just for example
mae[[1]] <- mae[[1]][1:20, 1:10]
mae[[2]] <- mae[[2]][1:20, 1:10]
# Several rows in the counts assay have a standard deviation of zero
# Remove them, since they do not add useful information about
# cross-association
mae[[1]] <- mae[[1]][rowSds(assay(mae[[1]])) > 0, ]
# Transform data
mae[[1]] <- transformAssay(mae[[1]], method = "rclr")
# Calculate cross-correlations
result <- getCrossAssociation(
mae, method = "pearson", assay.type1 = "counts", assay.type2 = "nmr",
show.warnings = FALSE, verbose = FALSE)
# Show first 5 entries
head(result, 5)
# Use altExp option to specify alternative experiment from the experiment
altExp(mae[[1]], "Phylum") <- agglomerateByRank(mae[[1]], rank = "Phylum")
# Transform data
altExp(mae[[1]], "Phylum") <- transformAssay(
altExp(mae[[1]], "Phylum"), method = "relabundance")
# When mode = "matrix", the return value is a matrix
result <- getCrossAssociation(
mae, experiment2 = 2, assay.type1 = "relabundance", assay.type2 = "nmr",
altexp1 = "Phylum", method = "pearson", mode = "matrix",
show.warnings = FALSE, verbose = FALSE)
# Show first 5 entries
head(result, 5)
# If test.signif = TRUE, then getCrossAssociation additionally returns
# significances
# filter.self.cor = TRUE filters self correlations
# p.adj.threshold can be used to filter those features that do not
# have any correlations whose p-value is lower than the threshold
result <- getCrossAssociation(
mae[[1]], experiment2 = mae[[1]], method = "pearson",
assay.type1 = "counts", assay.type2 = "counts",
filter.self.cor = TRUE, p.adj.threshold = 0.05, test.signif = TRUE,
show.warnings = FALSE, verbose = FALSE)
# Show first 5 entries
head(result, 5)
# Returned value is a list of matrices
names(result)
# Calculate Bray-Curtis dissimilarity between samples. If dataset includes
# paired samples, you can use paired = TRUE.
result <- getCrossAssociation(
mae[[1]], mae[[1]], by = 2, paired = FALSE,
assay.type1 = "counts", assay.type2 = "counts",
association.fun = getDissimilarity, method = "bray",
show.warnings = FALSE, verbose = FALSE)
# If experiments are equal and measure is symmetric
# (e.g., taxa1 vs taxa2 == taxa2 vs taxa1),
# it is possible to speed-up calculations by calculating association only
# for unique variable-pairs. Use "symmetric" to choose whether to measure
# association for only other half of of variable-pairs.
result <- getCrossAssociation(
mae, experiment1 = "microbiota", experiment2 = "microbiota",
assay.type1 = "counts", assay.type2 = "counts", symmetric = TRUE,
show.warnings = FALSE, verbose = FALSE)
# For big data sets, the calculations might take a long time.
# To speed them up, you can take a random sample from the data.
# When dealing with complex biological problems, random samples can be
# enough to describe the data. Here, our random sample is 30 \% of whole data.
sample_size <- 0.3
tse <- mae[[1]]
tse_sub <- tse[ sample( seq_len( nrow(tse) ), sample_size * nrow(tse) ), ]
result <- getCrossAssociation(
tse_sub, assay.type1 = "counts", assay.type2 = "counts",
show.warnings = FALSE, verbose = FALSE)
# It is also possible to choose variables from colData and calculate
# association between assay and sample metadata or between variables of
# sample metadata
mae[[1]] <- addAlpha(mae[[1]])
# col.var works similarly to assay.type. Instead of fetching
# an assay named assay.type from assay slot, it fetches a column named
# col.var from colData.
result <- getCrossAssociation(
mae[[1]], assay.type1 = "counts",
col.var2 = c("shannon_diversity", "coverage_diversity"),
test.signif = TRUE, show.warnings = FALSE, verbose = FALSE)
# If your data contains TreeSE with alternative experiment in altExp,
# correlations can be calculated as follows.
# Create TreeSE with altExp
tse <- mae[[1]]
altExp(tse, "metabolites") <- mae[[2]]
# Calculate
res <- getCrossAssociation(
tse,
altexp2 = "metabolites",
assay.type1 = "rclr",
assay.type2 = "nmr",
show.warnings = FALSE, verbose = FALSE
)
# To calculate correlation of features to principal coordinates, you have to
# first calculate PCoA...
library(scater)
tse <- addMDS(tse, assay.type = "rclr", method = "euclidean")
# ...then calculate the correlation.
res <- getCrossAssociation(tse, assay.type1 = "rclr", dimred2 = "MDS",
show.warnings = FALSE, verbose = FALSE)
head(res)
}