forked from xlucpu/MOVICS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getMOIC.R
138 lines (129 loc) · 6.98 KB
/
getMOIC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#' @name getMOIC
#' @title Get subtypes from multi-omics integrative clustering
#' @description Using `getMOIC()`, users can choose one out of the ten algorithms embedded in `MOVICS`. Users can implement multi-omics clustering in a simplest way of which the only requirement is to specify and at least specify a list of matrices (argument of `data`), a number of cluster (argument of `N.clust`), and clustering method (argument of `methodslist`) in `getMOIC()`. It is possible to pass various arguments that are specific to each method. Of course, users can also directly call different algorithms by using functions start with `get` and end with the name of the algorithm (e.g., `getSNF`; please refer to `?get%algorithm_name%` for more details about the editable arguments)
#' @param data List of matrices (Maximum number of matrices is 6).
#' @param methodslist A string list specifying one or multiple methods to run (See Details).
#' @param N.clust Number of clusters.
#' @param type Data type corresponding to the list of matrics, which can be gaussian, binomial or possion.
#' @param ... Additionnal parameters for each method (only works when only one method chosen)
#' @examples # There is no example and please refer to vignette.
#' @export
#' @return A list of results returned by each specified algorithms.
#' @import SNFtool
#' @import IntNMF
#' @import mogsa
#' @import coca
#' @import iClusterPlus
#' @import CIMLR
#' @import PINSPlus
#' @import ConsensusClusterPlus
#' @details
#' Method for integrative clustering will be chosed according to the value of argument 'methodslist':
#'
#' If \code{methodslist == "IntNMF"}, Integrative clustering methods using Non-Negative Matrix Factorization
#'
#' If \code{methodslist == "SNF"}, Similarity network fusion.
#'
#' If \code{methodslist == "LRAcluster"}, Integrated cancer omics data analysis by low rank approximation.
#'
#' If \code{methodslist == "PINSPlus"}, Perturbation Clustering for data integration and disease subtyping
#'
#' If \code{methodslist == "ConsensusClustering"}, Consensus clustering
#'
#' If \code{methodslist == "NEMO"}, Neighborhood based multi-omics clustering
#'
#' If \code{methodslist == "COCA"}, Cluster Of Clusters Analysis
#'
#' If \code{methodslist == "CIMLR"}, Cancer Integration via Multikernel Learning (Support Feature Selection)
#'
#' If \code{methodslist == "MoCluster"}, Identifying joint patterns across multiple omics data sets (Support Feature Selection)
#'
#' If \code{methodslist == "iClusterBayes"}, Integrative clustering of multiple genomic data by fitting a Bayesian latent variable model (Support Feature Selection)
#'
#' @references
#' Pierre-Jean M, Deleuze J F, Le Floch E, et al. Clustering and variable selection evaluation of 13 unsupervised methods for multi-omics data integration[J]. Briefings in Bioinformatics, 2019.
#'
#' intNMF:
#' Chalise P, Fridley BL. Integrative clustering of multi-level omic data based on non-negative matrix factorization algorithm. PLoS One. 2017;12(5):e0176278.
#'
#' iClusterBayes:
#' Mo Q, Shen R, Guo C, Vannucci M, Chan KS, Hilsenbeck SG. A fully Bayesian latent variable model for integrative clustering analysis of multi-type omics data. Biostatistics. 2018;19(1):71-86.
#'
#' SNF:
#' Wang B, Mezlini AM, Demir F, et al. Similarity network fusion for aggregating data types on a genomic scale. Nat Methods. 2014;11(3):333-337.
#'
#' Mocluster:
#' Meng C, Helm D, Frejno M, Kuster B. moCluster: Identifying Joint Patterns Across Multiple Omics Data Sets. J Proteome Res. 2016;15(3):755-765.
#'
#' LRAcluster:
#' Wu D, Wang D, Zhang MQ, Gu J. Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification. BMC Genomics. 2015;16:1022.
#'
#' CIMLR:
#' Ramazzotti D, Lal A, Wang B, Batzoglou S, Sidow A. Multi-omic tumor data reveal diversity of molecular mechanisms that correlate with survival. Nat Commun. 2018;9(1):4453.
#'
#' PINSPlus:
#' Nguyen H, Shrestha S, Draghici S, Nguyen T. PINSPlus: a tool for tumor subtype discovery in integrated genomic data. Bioinformatics. 2019;35(16):2843-2846.
#'
#' ConsensusClustering:
#' Monti S, Tamayo P, Mesirov J, et al. Consensus Clustering: A Resampling-Based Method for Class Discovery and Visualization of Gene Expression Microarray Data. Machine Learning. 2003;52:91-118.
#'
#' NEMO:
#' Rappoport N, Shamir R. NEMO: cancer subtyping by integration of partial multi-omic data. Bioinformatics. 2019;35(18):3348-3356.
#'
#' COCA:
#' Hoadley KA, Yau C, Wolf DM, et al. Multiplatform analysis of 12 cancer types reveals molecular classification within and across tissues of origin. Cell. 2014;158(4):929-944.
getMOIC <- function(data = NULL,
methodslist = list("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes"),
N.clust = NULL,
type = rep("gaussian", length(data)),
...){
# check argument
if (!is.list(data)) {
stop("data is not a list!")
}
n_dat <- length(data)
if(n_dat > 6){
stop('current verision of MOVICS can support up to 6 omics data.')
}
if(n_dat < 2){
stop('current verision of MOVICS needs at least 2 omics data.')
}
if(is.null(names(data))){
names(data) <- sprintf("dat%s", 1:length(data))
}
num.methods <- length(unlist(methodslist))
if(is.vector(methodslist)) {methodslist <- as.list(methodslist)}
if(!all(is.element(unlist(methodslist), c("SNF", "CIMLR", "PINSPlus", "NEMO", "COCA", "MoCluster", "LRAcluster", "ConsensusClustering", "IntNMF", "iClusterBayes")))) {
stop("current version of MOVICS supports 10 algorithms. Allowed values contain c('SNF', 'CIMLR', 'PINSPlus', 'NEMO', 'COCA', 'MoCluster', 'LRAcluster', 'ConsensusClustering', 'IntNMF', 'iClusterBayes').")
}
if(num.methods > 1) {
message("--you choose more than 1 algorithm and all of them shall be run with parameters by default.")
}
# Check dimension
if(max(sapply(data, dim)[2,]) != min(sapply(data, dim)[2,])){
message(sprintf("number of samples in dat %s is %s\n", 1:length(data), sapply(data, dim)[2,]))
stop("data do not contain the same number of samples!")
}
reslist <- list()
for (method in unlist(methodslist)) {
doMOIC <- switch(method,
"IntNMF" = getIntNMF,
"iClusterBayes" = getiClusterBayes,
"SNF" = getSNF,
"MoCluster" = getMoCluster,
"LRAcluster" = getLRAcluster,
"CIMLR" = getCIMLR,
"PINSPlus" = getPINSPlus,
"ConsensusClustering" = getConsensusClustering,
"NEMO" = getNEMO,
"COCA" = getCOCA
)
reslist[[method]] <- doMOIC(data, N.clust, type, ...)
message(paste0(method," done..."))
}
if(num.methods == 1) {
return(reslist[[1]])
} else {
return(reslist)
}
}