Skip to content

Commit

Permalink
latest files
Browse files Browse the repository at this point in the history
  • Loading branch information
xlucpu committed Nov 2, 2020
1 parent b8b2257 commit a8637ba
Show file tree
Hide file tree
Showing 35 changed files with 699 additions and 168 deletions.
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: MOVICS
Title: Multi-Omics integration and VIsualization in Cancer Subtyping
Version: 0.99.3
Version: 0.99.4
Authors@R:
person(given = "Xiaofan",
family = "Lu",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0003-2417-6548"))
Description: This package provides a unified interface for 10 state-of-the-art multi-omics clustering algorithms, and forms a pipeline for most commonly used downstream analyses in cancer subtyping researches and to create feature rich customizable visualzations with minimal effort.
Depends: R (>= 4.0.2)
Depends: R (>= 4.0.1)
License: GPL-3 + file LICENSE
Encoding: UTF-8
LazyData: true
Expand Down Expand Up @@ -81,6 +81,7 @@ Collate:
'getClustNum.R'
'getConsensusClustering.R'
'getConsensusMOIC.R'
'getSilhouette.R'
'getElites.R'
'getIntNMF.R'
'getLRAcluster.R'
Expand All @@ -94,10 +95,11 @@ Collate:
'getiClusterBayes.R'
'runDEA.R'
'runGSEA.R'
'runGSVA.R'
'runMarker.R'
'runNTP.R'
'runPAM.R'
'runKappa.R'
'runPAM.R'
'runKappa.R'
'pRRophetic.R'
'LRAcluster.R'
'quiet.R'
Expand Down
Binary file modified MOVICS.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ export(getMoHeatmap)
export(getNEMO)
export(getPINSPlus)
export(getSNF)
export(getSilhouette)
export(getStdiz)
export(getiClusterBayes)
export(runDEA)
export(runGSEA)
export(runGSVA)
export(runKappa)
export(runMarker)
export(runNTP)
Expand All @@ -37,6 +39,7 @@ import(ConsensusClusterPlus)
import(IntNMF)
import(PINSPlus)
import(SNFtool)
import(cluster)
import(coca)
import(ggalluvial)
import(ggplot2)
Expand Down
8 changes: 4 additions & 4 deletions R/compAgree.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
#' @description Compute the Rand Index, Jaccard Index, Fowlkes-Mallows, and Normalized Mutual Information for agreement of two partitions, and generate alluvial diagrams for visualization.
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
#' @param subt2comp A data.frame of subtypes that need to compare with current subtype with rownames for samples and columns for other subtypes.
#' @param doPlot A logic value to indicate if generating alluvial diagram to show the agreement of different subtypes.
#' @param doPlot A logic value to indicate if generating alluvial diagram to show the agreement of different subtypes; TRUE by default.
#' @param box.width A numeric valur to indicate the width for box in alluvial diagram.
#' @param clust.col A string vector storing colors for each cluster.
#' @param width A numeric value to indicate the width of alluvial diagram.
#' @param height A numeric value to indicate the height of alluvial diagram.
#' @param fig.path A string value to indicate the output path for storing the figure
#' @param fig.name A string value to indicate the name of the figure
#' @param fig.path A string value to indicate the output path for storing the figure.
#' @param fig.name A string value to indicate the name of the figure.
#' @return A figure of agreement (.pdf) if \code{doPlot = TRUE} and a data.frame storing four agreement measurements, including Rand Index (RI), Adjusted Mutual Information (AMI), Jaccard Index (JI), and Fowlkes-Mallows (FM).
#' @export
#' @import ggplot2
Expand All @@ -24,7 +24,7 @@
compAgree <- function(moic.res = NULL,
subt2comp = NULL,
doPlot = TRUE,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
box.width = 0.1,
fig.name = NULL,
fig.path = getwd(),
Expand Down
16 changes: 14 additions & 2 deletions R/compDrugsen.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param seed A integer value to indicate the seed for reproducing ridge regression.
#' @param width A numeric value to indicate the width of boxviolin plot.
#' @param height A numeric value to indicate the height of boxviolin plot.
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison.
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
#' @return Data.frame(s) storing the estimated IC50 of specified drugs per sample within each Subtype.
#' @export
#' @import ggplot2
Expand All @@ -25,7 +25,7 @@ compDrugsen <- function(moic.res = NULL,
drugs = c("Cisplatin", "Paclitaxel"),
tissueType = "all",
test.method = "nonparametric",
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
prefix = NULL,
seed = 123456,
fig.path = getwd(),
Expand Down Expand Up @@ -90,15 +90,27 @@ compDrugsen <- function(moic.res = NULL,
# generate boxviolin plot with statistical testing
if(n.moic == 2 & test.method == "nonparametric") {
statistic = "wilcox.test"
ic50.test <- wilcox.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
cat(paste0("Wilcoxon rank sum test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug))
}
if(n.moic == 2 & test.method == "parametric") {
statistic = "t.test"
ic50.test <- t.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
cat(paste0("Student's t test p value = ", formatC(ic50.test, format = "e", digits = 2), " for ", drug))
}
if(n.moic > 2 & test.method == "nonparametric") {
statistic = "kruskal.test"
ic50.test <- kruskal.test(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype)$p.value
pairwise.ic50.test <- pairwise.wilcox.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH")
cat(paste0(drug,": Kruskal-Wallis rank sum test p value = ", formatC(TMB.test, format = "e", digits = 2),"\npost-hoc pairwise wilcoxon rank sum test with Benjamini-Hochberg adjustment presents below:"))
print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2))
}
if(n.moic > 2 & test.method == "parametric") {
statistic = "anova"
ic50.test <- summary(aov(predictedBoxdat[[drug]]$Est.IC50 ~ predictedBoxdat[[drug]]$Subtype))[[1]][["Pr(>F)"]][1]
pairwise.ic50.test <- pairwise.t.test(predictedBoxdat[[drug]]$Est.IC50,predictedBoxdat[[drug]]$Subtype,p.adjust.method = "BH")
cat(paste0(drug,": One-way anova test p value = ", formatC(ic50.test, format = "e", digits = 2),"\npost-hoc pairwise Student's t test with Benjamini-Hochberg adjustment presents below:"))
print(formatC(pairwise.ic50.test$p.value, format = "e", digits = 2))
}

p <- ggplot(data = predictedBoxdat[[drug]],
Expand Down
35 changes: 27 additions & 8 deletions R/compFGA.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param segment A data frame containing segmented copy number and columns must exactly include the following elements: c('sample','chrom','start','end','value'). Column of `value` should be segments value when \code{iscopynumber = FALSE} but copy-number value when \code{iscopynumber = TRUE}. Copy-number will be converted to segments by log2(copy-number/2).
#' @param iscopynumber A logical value to indicate if the fifth column of segment input is copy-number. If segment file derived from CNV calling provides copy number instead of segment_mean value, this argument must be switched to TRUE. FALSE by default.
#' @param cnathreshold A numeric value to indicate the cutoff for identifying copy-number gain or loss. 0.2 by default.
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison.
#' @param test.method A string value to indicate the method for statistical testing. Allowed values contain c('nonparametric', 'parametric'); nonparametric means two-sample wilcoxon rank sum test for two subtypes and Kruskal-Wallis rank sum test for multiple subtypes; parametric means two-sample t-test when only two subtypes are identified, and anova for multiple subtypes comparison; "nonparametric" by default.
#' @param barcolor A string vector to indicate the mapping color for bars of FGA, FGG and FGL.
#' @param clust.col A string vector storing colors for each subtype.
#' @param fig.path A string value to indicate the output path for storing the barplot.
Expand All @@ -14,15 +14,21 @@
#' @param height A numeric value to indicate the height of barplot.
#' @return A list contains the following components:
#'
#' \code{summary} a table summarizing the measurements of FGA, FGG, and FGL per sample
#' \code{summary} a table summarizing the measurements of FGA, FGG, and FGL per sample
#'
#' \code{FGA.p.value} a nominal p value quantifying the difference of FGA among current subtypes
#' \code{FGA.p.value} a nominal p value quantifying the difference of FGA among current subtypes
#'
#' \code{FGG.p.value} a nominal p value quantifying the difference of FGG among current subtypes
#' \code{pairwise.FGA.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGA
#'
#' \code{FGL.p.value} a nominal p value quantifying the difference of FGL among current subtypes
#' \code{FGG.p.value} a nominal p value quantifying the difference of FGG among current subtypes
#'
#' \code{test.method} a string value indicating the statistical testing method to calculate p values
#' \code{pairwise.FGG.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGG
#'
#' \code{FGL.p.value} a nominal p value quantifying the difference of FGL among current subtypes
#'
#' \code{pairwise.FGL.test} a pairwise BH adjusted p value matrix for multiple comparisons of FGL
#'
#' \code{test.method} a string value indicating the statistical testing method to calculate p values
#'
#' @export
#' @import ggplot2
Expand All @@ -39,7 +45,7 @@ compFGA <- function(moic.res = NULL,
cnathreshold = 0.2,
test.method = "nonparametric",
barcolor = c("#008B8A", "#F2042C", "#21498D"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
fig.path = getwd(),
fig.name = NULL,
width = 8,
Expand Down Expand Up @@ -146,13 +152,19 @@ compFGA <- function(moic.res = NULL,
FGA.test <- kruskal.test(outTab$FGA ~ outTab$Subtype)$p.value
FGG.test <- kruskal.test(outTab$FGG ~ outTab$Subtype)$p.value
FGL.test <- kruskal.test(outTab$FGL ~ outTab$Subtype)$p.value
pairwise.FGA.test <- pairwise.wilcox.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGG.test <- pairwise.wilcox.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGL.test <- pairwise.wilcox.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
}

if(n.moic > 2 & test.method == "parametric") {
statistic <- "anova"
FGA.test <- summary(aov(outTab$FGA ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
FGG.test <- summary(aov(outTab$FGG ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
FGL.test <- summary(aov(outTab$FGL ~ outTab$Subtype))[[1]][["Pr(>F)"]][1]
pairwise.FGA.test <- pairwise.t.test(outTab$FGA,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGG.test <- pairwise.t.test(outTab$FGG,outTab$Subtype,p.adjust.method = "BH")
pairwise.FGL.test <- pairwise.t.test(outTab$FGL,outTab$Subtype,p.adjust.method = "BH")
}

# generate barplot
Expand Down Expand Up @@ -253,5 +265,12 @@ compFGA <- function(moic.res = NULL,
# print to screen
print(pal)

return(list(summary = outTab, FGA.p.value = FGA.test, FGG.p.value = FGG.test, FGL.p.value = FGL.test, test.method = statistic))
return(list(summary = outTab,
FGA.p.value = FGA.test,
pairwise.FGA.test = pairwise.FGA.test,
FGG.p.value = FGG.test,
pairwise.FGG.test = pairwise.FGG.test,
FGL.p.value = FGL.test,
pairwise.FGL.test = pairwise.FGL.test,
test.method = statistic))
}
6 changes: 3 additions & 3 deletions R/compMut.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#' @param p.adj.method A string value to indicate the correction method for multiple comparision. Allowed values contain c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr'); BH by default.
#' @param tab.name A string value to indicate the name of the output table.
#' @param res.path A string value to indicate the path for saving the table.
#' @param doWord A logic value to indicate if transformating the .txt outfile to a .docx WORD file (.txt file will be also kept).
#' @param doPlot A logic value to indicate if generating oncoprint.
#' @param doWord A logic value to indicate if transformating the .txt outfile to a .docx WORD file (.txt file will be also kept); TRUE by default.
#' @param doPlot A logic value to indicate if generating oncoprint; TRUE by default.
#' @param innerclust A logic value to indicate if perform clustering within each subtype; TRUE by default.
#' @param fig.path A string value to indicate the output path for storing the oncoprint.
#' @param fig.name A string value to indicate the name of the oncoprint.
Expand Down Expand Up @@ -48,7 +48,7 @@ compMut <- function(moic.res = NULL,
bg.col = "#dcddde",
p.cutoff = 0.05,
p.adj.cutoff = 0.05,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
width = 8,
height = 4) {

Expand Down
25 changes: 19 additions & 6 deletions R/compSurv.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,21 @@
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
#' @param surv.info A data.frame with rownames of observations and with at least two columns of `futime` for survival time and `fustat` for survival status (0: censoring; 1: event)
#' @param clust.col A string vector storing colors for each subtype.
#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`).
#' @param p.adjust.method A string value for indicating method for adjusting p values (see \link[stats]{p.adjust}). Allowed values include one of c(`holm`, `hochberg`, `hommel`, `bonferroni`, `BH`, `BY`, `fdr`, `none`); "BH" by default.
#' @param fig.name A string value to indicate the output path for storing the kaplan-meier curve.
#' @param fig.path A string value to indicate the name of the kaplan-meier curve.
#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years.
#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal.
#' @param convt.time A string value to indicate how to convert the survival time; value of `d` for days, `m` for months and `y` for years; "d" by default.
#' @param surv.median.line A string value for drawing a horizontal/vertical line at median survival. Allowed values include one of c(`none`, `hv`, `h`, `v`). v: vertical, h:horizontal; "none" by default.
#' @param xyrs.est An integer vector to estimate probability of surviving beyond a certain number (x) of years (Estimating x-year survival); NULL by default.
#' @param surv.cut A numeric value to indicate the x-axis cutoff for showing the maximal survival time.
#' @return A figure of multi-omics Kaplan-Meier curve (.pdf) and a list with the following components:
#'
#' \code{fitd} an object returned by \link[survival]{survdiff}.
#'
#' \code{fid} an object returned by \link[survival]{survfit}.
#'
#' \code{xyrs.est} x-year probability of survival and the associated lower and upper bounds of the 95% confidence interval are also displayed if argument of `xyrs.est` was set by users.
#'
#' \code{overall.p} a nominal p.value calculated by Kaplain-Meier estimator with log-rank test.
#'
#' \code{pairwise.p} an object of class "pairwise.htest" which is a list containing the p values (see \link[survminer]{pairwise_survdiff}); (\emph{only returned when more than 2 subtypes are identified}).
Expand All @@ -31,7 +34,8 @@ compSurv <- function(moic.res = NULL,
surv.info = NULL,
convt.time = "d",
surv.cut = NULL,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","9D4EDD"),
xyrs.est = NULL,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
p.adjust.method = "BH",
surv.median.line = "none",
fig.name = NULL,
Expand Down Expand Up @@ -67,6 +71,15 @@ compSurv <- function(moic.res = NULL,
mosurv.res$Subtype <- paste0("CS", mosurv.res$clust)
mosurv.res <- mosurv.res[order(mosurv.res$Subtype),]

# estimate x-year survival probability
if(!is.null(xyrs.est)) {
if(max(xyrs.est) * 365 < max(mosurv.res$futime)) {
xyrs <- summary(survfit(Surv(futime, fustat) ~ Subtype, data = mosurv.res), times = xyrs.est * 365)
} else {
stop("the maximal survival time is less than the time point you want to estimate!")
}
}

# convert survival time
mosurv.res$futime <- switch(convt.time,
"d" = mosurv.res$futime,
Expand Down Expand Up @@ -202,8 +215,8 @@ compSurv <- function(moic.res = NULL,
print(p)

if(n.moic > 2) {
return(list(fitd = fitd, fit = fit, overall.p = p.val, pairwise.p = ps))
return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val, pairwise.p = ps))
} else {
return(list(fitd = fitd, fit = fit, overall.p = p.val))
return(list(fitd = fitd, fit = fit, xyrs.est = xyrs, overall.p = p.val))
}
}
Loading

0 comments on commit a8637ba

Please sign in to comment.