forked from microbiome/mia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaddNMF.Rd
103 lines (87 loc) · 3.47 KB
/
addNMF.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/addNMF.R
\name{getNMF}
\alias{getNMF}
\alias{addNMF}
\alias{getNMF,SummarizedExperiment-method}
\alias{addNMF,SummarizedExperiment-method}
\title{Non-negative Matrix Factorization}
\usage{
getNMF(x, ...)
addNMF(x, ...)
\S4method{getNMF}{SummarizedExperiment}(x, k = 2, assay.type = "counts", eval.metric = "evar", ...)
\S4method{addNMF}{SummarizedExperiment}(
x,
k = 2,
assay.type = "counts",
eval.metric = "evar",
name = "NMF",
...
)
}
\arguments{
\item{x}{a
\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
object.}
\item{...}{optional arguments passed to \code{nmf::NMF}.}
\item{k}{\code{numeric vector}. A number of latent vectors/topics.
(Default: \code{2})}
\item{assay.type}{\code{Character scalar}. Specifies which assay to use for
NMF ordination. (Default: \code{"counts"})}
\item{eval.metric}{\code{Character scalar}. Specifies the evaluation metric
that will be used to select the model with the best fit. Must be one of the
following options: \code{"evar"} (explained variance; maximized),
\code{"sparseness.basis"} (degree of sparsity in the basis matrix;
maximized), \code{"sparseness.coef"} (degree of sparsity in the coefficient
matrix; maximized), \code{"rss"} (residual sum of squares; minimized),
\code{"silhouette.coef"} (quality of clustering based on the coefficient
matrix; maximized), \code{"silhouette.basis"} (quality of clustering based
on the basis matrix; maximized), \code{"cophenetic"} (correlation between
cophenetic distances and original distances; maximized), \code{"dispersion"}
(spread of data points within clusters; minimized). (Default: \code{"evar"})}
\item{name}{\code{Character scalar}. The name to be used to store the result
in the reducedDims of the output. (Default: \code{"NMF"})}
}
\value{
For \code{getNMF}, the ordination matrix with feature loadings matrix
as attribute \code{"loadings"}.
For \code{addNMF}, a
\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
object is returned containing the ordination matrix in
\code{reducedDims(x, name)} with the following attributes:
\itemize{
\item "loadings" which is a matrix containing the feature loadings
\item "NMF_output" which is the output of function \code{nmf::NMF}
\item "best_fit" which is the result of the best fit if k is a vector of
integers
}
}
\description{
These functions perform Non-negative Matrix Factorization on data stored in a
\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}
object.
}
\details{
The functions \code{getNMF} and \code{addNMF} internally use \code{nmf::NMF}
compute the ordination matrix and
feature loadings.
If k is a vector of integers, NMF output is calculated for all the rank
values contained in k, and the best fit is selected based on
\code{eval.metric} value.
}
\examples{
data(GlobalPatterns)
tse <- GlobalPatterns
# Reduce the number of features
tse <- agglomerateByPrevalence(tse, rank = "Phylum")
# Run NMF and add the result to reducedDim(tse, "NMF").
tse <- addNMF(tse, k = 2, name = "NMF")
# Extract feature loadings
loadings_NMF <- attr(reducedDim(tse, "NMF"), "loadings")
head(loadings_NMF)
# Estimate models with number of topics from 2 to 4. Perform 2 runs.
tse <- addNMF(tse, k = c(2, 3, 4), name = "NMF_4", nrun = 2)
# Extract feature loadings
loadings_NMF_4 <- attr(reducedDim(tse, "NMF_4"), "loadings")
head(loadings_NMF_4)
}