latest files

infWang · Nov 2, 2020 · a8637ba · a8637ba
1 parent b8b2257
commit a8637ba
Show file tree

Hide file tree

Showing 35 changed files with 699 additions and 168 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: MOVICS
 Title: Multi-Omics integration and VIsualization in Cancer Subtyping
-Version: 0.99.3
+Version: 0.99.4
 Authors@R: 
     person(given = "Xiaofan",
            family = "Lu",
            role = c("aut", "cre"),
            email = "[email protected]",
            comment = c(ORCID = "0000-0003-2417-6548"))
 Description: This package provides a unified interface for 10 state-of-the-art multi-omics clustering algorithms, and forms a pipeline for most commonly used downstream analyses in cancer subtyping researches and to create feature rich customizable visualzations with minimal effort.
-Depends: R (>= 4.0.2)
+Depends: R (>= 4.0.1)
 License: GPL-3 + file LICENSE
 Encoding: UTF-8
 LazyData: true
@@ -81,6 +81,7 @@ Collate:
     'getClustNum.R'
     'getConsensusClustering.R'
     'getConsensusMOIC.R'
+    'getSilhouette.R'
     'getElites.R'
     'getIntNMF.R'
     'getLRAcluster.R'
@@ -94,10 +95,11 @@ Collate:
     'getiClusterBayes.R'
     'runDEA.R'
     'runGSEA.R'
+    'runGSVA.R'
     'runMarker.R'
     'runNTP.R'
-	'runPAM.R'
-	'runKappa.R'
+   	'runPAM.R'
+	  'runKappa.R'
     'pRRophetic.R'
     'LRAcluster.R'
     'quiet.R'

diff --git a/MOVICS.pdf b/MOVICS.pdf
diff --git a/NAMESPACE b/NAMESPACE
@@ -21,10 +21,12 @@ export(getMoHeatmap)
 export(getNEMO)
 export(getPINSPlus)
 export(getSNF)
+export(getSilhouette)
 export(getStdiz)
 export(getiClusterBayes)
 export(runDEA)
 export(runGSEA)
+export(runGSVA)
 export(runKappa)
 export(runMarker)
 export(runNTP)
@@ -37,6 +39,7 @@ import(ConsensusClusterPlus)
 import(IntNMF)
 import(PINSPlus)
 import(SNFtool)
+import(cluster)
 import(coca)
 import(ggalluvial)
 import(ggplot2)

diff --git a/R/compAgree.R b/R/compAgree.R
@@ -3,13 +3,13 @@
 #' @description Compute the Rand Index, Jaccard Index, Fowlkes-Mallows, and Normalized Mutual Information for agreement of two partitions, and generate alluvial diagrams for visualization.
 #' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
 #' @param subt2comp A data.frame of subtypes that need to compare with current subtype with rownames for samples and columns for other subtypes.
-#' @param doPlot A logic value to indicate if generating alluvial diagram to show the agreement of different subtypes.
+#' @param doPlot A logic value to indicate if generating alluvial diagram to show the agreement of different subtypes; TRUE by default.
 #' @param box.width A numeric valur to indicate the width for box in alluvial diagram.
 #' @param clust.col A string vector storing colors for each cluster.
 #' @param width A numeric value to indicate the width of alluvial diagram.
 #' @param height A numeric value to indicate the height of alluvial diagram.
-#' @param fig.path A string value to indicate the output path for storing the figure
-#' @param fig.name A string value to indicate the name of the figure
+#' @param fig.path A string value to indicate the output path for storing the figure.
+#' @param fig.name A string value to indicate the name of the figure.
 #' @return A figure of agreement (.pdf) if \code{doPlot = TRUE} and a data.frame storing four agreement measurements, including Rand Index (RI), Adjusted Mutual Information (AMI), Jaccard Index (JI), and Fowlkes-Mallows (FM).
 #' @export
 #' @import ggplot2
@@ -24,7 +24,7 @@
 compAgree <- function(moic.res  = NULL,
                       subt2comp = NULL,
                       doPlot    = TRUE,
-                      clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
+                      clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
                       box.width = 0.1,
                       fig.name  = NULL,
                       fig.path  = getwd(),

diff --git a/R/compDrugsen.R b/R/compDrugsen.R
@@ -11,7 +11,7 @@
 #' @param seed A integer value to indicate the seed for reproducing ridge regression.
 #' @param width A numeric value to indicate the width of boxviolin plot.
 #' @param height A numeric value to indicate the height of boxviolin plot.
-#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison.
+#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
 #' @return Data.frame(s) storing the estimated IC50 of specified drugs per sample within each Subtype.
 #' @export
 #' @import ggplot2
@@ -25,7 +25,7 @@ compDrugsen <- function(moic.res    = NULL,
                         drugs       = c("Cisplatin", "Paclitaxel"),
                         tissueType  = "all",
                         test.method = "nonparametric",
-                        clust.col   = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
+                        clust.col   = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
                         prefix      = NULL,
                         seed        = 123456,
                         fig.path    = getwd(),
@@ -90,15 +90,27 @@ compDrugsen <- function(moic.res    = NULL,
     # generate boxviolin plot with statistical testing
     if(n.moic == 2 & test.method == "nonparametric") {
       statistic = "wilcox.test"
+      ic50.test  <- wilcox.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
+      cat(paste0("Wilcoxon rank sum test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug))
     }
     if(n.moic == 2 & test.method == "parametric") {
       statistic = "t.test"
+      ic50.test  <- t.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
+      cat(paste0("Student's t test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug))
     }
     if(n.moic > 2 & test.method == "nonparametric") {
       statistic = "kruskal.test"
+      ic50.test  <- kruskal.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
+      pairwise.ic50.test <- pairwise.wilcox.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH")
+      cat(paste0(drug,": Kruskal-Wallis rank sum test p value = ", formatC(TMB.test, format = "e", digits = 2),"\npost-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:"))
+      print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2))
     }
     if(n.moic > 2 & test.method == "parametric") {
       statistic = "anova"
+      ic50.test  <- summary(aov(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype))[[1]][["Pr(>F)"]][1]
+      pairwise.ic50.test <- pairwise.t.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH")
+      cat(paste0(drug,": One-way anova test p value = ", formatC(ic50.test, format = "e", digits = 2),"\npost-hoc pairwise Student's t test with Benjamini-Hochberg adjustment presents below:"))
+      print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2))
     }
 
     p <- ggplot(data = predictedBoxdat[[drug]],

diff --git a/R/compFGA.R b/R/compFGA.R
@@ -5,7 +5,7 @@
 #' @param segment A data frame containing segmented copy number and columns must exactly include the following elements: c('sample','chrom','start','end','value'). Column of `value` should be segments value when \code{iscopynumber = FALSE} but copy-number value when \code{iscopynumber = TRUE}. Copy-number will be converted to segments by log2(copy-number/2).
 #' @param iscopynumber A logical value to indicate if the fifth column of segment input is copy-number. If segment file derived from CNV calling provides copy number instead of segment_mean value, this argument must be switched to TRUE. FALSE by default.
 #' @param cnathreshold A numeric value to indicate the cutoff for identifying copy-number gain or loss. 0.2 by default.
-#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison.
+#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
 #' @param barcolor A string vector to indicate the mapping color for bars of FGA, FGG and FGL.
 #' @param clust.col A string vector storing colors for each subtype.
 #' @param fig.path A string value to indicate the output path for storing the barplot.
@@ -14,15 +14,21 @@
 #' @param height A numeric value to indicate the height of barplot.
 #' @return A list contains the following components:
 #'
-#'         \code{summary}     a table summarizing the measurements of FGA, FGG, and FGL per sample
+#'         \code{summary}           a table summarizing the measurements of FGA, FGG, and FGL per sample
 #'
-#'         \code{FGA.p.value} a nominal p value quantifying the difference of FGA among current subtypes
+#'         \code{FGA.p.value}       a nominal p value quantifying the difference of FGA among current subtypes
 #'
-#'         \code{FGG.p.value} a nominal p value quantifying the difference of FGG among current subtypes
+#'         \code{pairwise.FGA.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGA
 #'
-#'         \code{FGL.p.value} a nominal p value quantifying the difference of FGL among current subtypes
+#'         \code{FGG.p.value}       a nominal p value quantifying the difference of FGG among current subtypes
 #'
-#'         \code{test.method} a string value indicating the statistical testing method to calculate p values
+#'         \code{pairwise.FGG.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGG
+#'
+#'         \code{FGL.p.value}       a nominal p value quantifying the difference of FGL among current subtypes
+#'
+#'         \code{pairwise.FGL.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGL
+#'
+#'         \code{test.method}       a string value indicating the statistical testing method to calculate p values
 #'
 #' @export
 #' @import ggplot2
@@ -39,7 +45,7 @@ compFGA <- function(moic.res     = NULL,
                     cnathreshold = 0.2,
                     test.method  = "nonparametric",
                     barcolor     = c("#008B8A", "#F2042C", "#21498D"),
-                    clust.col    = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
+                    clust.col    = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
                     fig.path     = getwd(),
                     fig.name     = NULL,
                     width        = 8,
@@ -146,13 +152,19 @@ compFGA <- function(moic.res     = NULL,
     FGA.test  <- kruskal.test(outTab$FGA ~ outTab$Subtype)$p.value
     FGG.test  <- kruskal.test(outTab$FGG ~ outTab$Subtype)$p.value
     FGL.test  <- kruskal.test(outTab$FGL ~ outTab$Subtype)$p.value
+    pairwise.FGA.test <- pairwise.wilcox.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGG.test <- pairwise.wilcox.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGL.test <- pairwise.wilcox.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
   }
 
   if(n.moic > 2 & test.method == "parametric") {
     statistic <- "anova"
     FGA.test  <- summary(aov(outTab$FGA ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
     FGG.test  <- summary(aov(outTab$FGG ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
     FGL.test  <- summary(aov(outTab$FGL ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
+    pairwise.FGA.test <- pairwise.t.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGG.test <- pairwise.t.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
+    pairwise.FGL.test <- pairwise.t.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
   }
 
   # generate barplot
@@ -253,5 +265,12 @@ compFGA <- function(moic.res     = NULL,
   # print to screen
   print(pal)
 
-  return(list(summary = outTab, FGA.p.value = FGA.test, FGG.p.value = FGG.test, FGL.p.value = FGL.test, test.method = statistic))
+  return(list(summary = outTab,
+              FGA.p.value = FGA.test,
+              pairwise.FGA.test = pairwise.FGA.test,
+              FGG.p.value = FGG.test,
+              pairwise.FGG.test = pairwise.FGG.test,
+              FGL.p.value = FGL.test,
+              pairwise.FGL.test = pairwise.FGL.test,
+              test.method = statistic))
 }
diff --git a/R/compMut.R b/R/compMut.R
@@ -8,8 +8,8 @@
 #' @param p.adj.method A string value to indicate the correction method for multiple comparision. Allowed values contain c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr'); BH by default.
 #' @param tab.name A string value to indicate the name of the output table.
 #' @param res.path A string value to indicate the path for saving the table.
-#' @param doWord A logic value to indicate if transformating the .txt outfile to a .docx WORD file (.txt file will be also kept).
-#' @param doPlot A logic value to indicate if generating oncoprint.
+#' @param doWord A logic value to indicate if transformating the .txt outfile to a .docx WORD file (.txt file will be also kept); TRUE by default.
+#' @param doPlot A logic value to indicate if generating oncoprint; TRUE by default.
 #' @param innerclust A logic value to indicate if perform clustering within each subtype; TRUE by default.
 #' @param fig.path A string value to indicate the output path for storing the oncoprint.
 #' @param fig.name A string value to indicate the name of the oncoprint.
@@ -48,7 +48,7 @@ compMut <- function(moic.res     = NULL,
                     bg.col       = "#dcddde",
                     p.cutoff     = 0.05,
                     p.adj.cutoff = 0.05,
-                    clust.col    = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
+                    clust.col    = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
                     width        = 8,
                     height       = 4) {
 

diff --git a/R/compSurv.R b/R/compSurv.R
@@ -4,18 +4,21 @@
 #' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
 #' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event)
 #' @param clust.col A string vector storing colors for each subtype.
-#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`).
+#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default.
 #' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve.
 #' @param fig.path A string value to indicate the name of the kaplan-meier curve.
-#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years.
-#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal.
+#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default.
+#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default.
+#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default.
 #' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time.
 #' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components:
 #'
 #'         \code{fitd}       an object returned by \link[survival]{survdiff}.
 #'
 #'         \code{fid}        an object returned by \link[survival]{survfit}.
 #'
+#'         \code{xyrs.est}   x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users.
+#'
 #'         \code{overall.p}  a nominal p.value calculated by Kaplain-Meier estimator with log-rank test.
 #'
 #'         \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}).
@@ -31,7 +34,8 @@ compSurv <- function(moic.res         = NULL,
                      surv.info        = NULL,
                      convt.time       = "d",
                      surv.cut         = NULL,
-                     clust.col        = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
+                     xyrs.est         = NULL,
+                     clust.col        = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
                      p.adjust.method  = "BH",
                      surv.median.line = "none",
                      fig.name         = NULL,
@@ -67,6 +71,15 @@ compSurv <- function(moic.res         = NULL,
   mosurv.res$Subtype <- paste0("CS", mosurv.res$clust)
   mosurv.res <- mosurv.res[order(mosurv.res$Subtype),]
 
+  # estimate x-year survival probability
+  if(!is.null(xyrs.est)) {
+    if(max(xyrs.est) * 365 < max(mosurv.res$futime)) {
+      xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365)
+    } else {
+      stop("the maximal survival time is less than the time point you want to estimate!")
+    }
+  }
+
   # convert survival time
   mosurv.res$futime <- switch(convt.time,
                               "d" = mosurv.res$futime,
@@ -202,8 +215,8 @@ compSurv <- function(moic.res         = NULL,
   print(p)
 
   if(n.moic > 2) {
-    return(list(fitd = fitd, fit = fit, overall.p = p.val, pairwise.p = ps))
+    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps))
   } else {
-    return(list(fitd = fitd, fit = fit, overall.p = p.val))
+    return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val))
   }
 }