Added plotting functionality and testing for permutations

mapleknight · Feb 4, 2022 · 4016e27 · 4016e27
1 parent ee47662
commit 4016e27
Show file tree

Hide file tree

Showing 8 changed files with 319 additions and 5 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,6 +19,7 @@ Depends: R (>= 3.2.0)
 Imports:
 	ComplexHeatmap,
 	DT,
+	ggplot2,
 	gplots,
 	graphics,
 	grDevices,

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(InteractionCoefficientGraph)
 export(OutputData)
 export(OutputResults)
 export(PValueBoxPlots)
+export(PermutationSummary)
 export(PermuteIntLIM)
 export(PlotDistributions)
 export(PlotPCA)

diff --git a/R/crossvalfunctions.R b/R/crossvalfunctions.R
@@ -59,7 +59,7 @@ RunCrossValidation <- function(inputData,
                                         folds=folds)
 
   # Filter the folds.
-  inputDataFilt <- FilterDataFolds(inputData=inputDataFolds,
+  inputDataFilt <- FilterDataFolds(inputDataFolds=inputDataFolds,
                                    analyteType1perc=analyteType1perc,
                                    analyteType2perc=analyteType2perc, 
                                    analyteMiss=analyteMiss,

diff --git a/R/plottingfunctions.R b/R/plottingfunctions.R
@@ -823,4 +823,89 @@ HistogramPairs <- function(inputResults, type = 'outcome', breaks = 50){
   }else{
       stop("Only two valid types:  outcome or independent.  Invalid type entered")
   }
+}
+
+#' Return the number of significant analytes / pairs per permutation and the number 
+#' of permutations in which each analyte is significant.
+#' If plot = TRUE, show a box plot of number of significant analytes over permutations, 
+#' overlaid with the number of significant analytes in the original data.
+#'
+#' @param inputResults Data frame with model results (output of ProcessResults())
+#' @param permResults An object of type PermutationResults (output of PermuteIntLIM())
+#' @param plot Whether or not to show the boxplot. Default is TRUE.
+#' @export
+PermutationSummary <- function(inputResults, permResults, plot){
+
+  # Prevent "visible binding for global variable" notes.
+  Count <- Type <- NULL
+
+  # Compute pair significance counts.
+  allSignificantPairs <- do.call(c, permResults[[2]])
+  pairSignificanceCounts <- table(allSignificantPairs)
+  pairSignificanceCounts <- as.data.frame(pairSignificanceCounts[order(-pairSignificanceCounts)])
+
+  # Compute summary.
+  pairCountDistrib <- permResults[[1]]$Num_Significant_Pairs
+  analyte1 <- lapply(1:length(permResults[[2]]), function(i){
+    return(unlist(lapply(permResults[[2]][[i]], function(string){
+      return(strsplit(string, split="__V__")[[1]][1])
+    })))
+  })
+  analyte2 <- lapply(1:length(permResults[[2]]), function(i){
+    return(unlist(lapply(permResults[[2]][[i]], function(string){
+      return(strsplit(string, split="__V__")[[1]][2])
+    })))
+  })
+  analyte1CountDistrib <- unlist(lapply(1:length(analyte1), function(i){
+    return(length(unique(analyte1[[i]])))
+  }))
+  analyte2CountDistrib <- unlist(lapply(1:length(analyte2), function(i){
+    return(length(unique(analyte2[[i]])))
+  }))
+  countDistribs <- data.frame(Pairs = pairCountDistrib, 
+                              Independent = analyte1CountDistrib,
+                              Outcome = analyte2CountDistrib)
+
+  # Set up data for input.
+  significantCounts <- data.frame(Count=c(countDistribs$Pairs, 
+                                          countDistribs$Independent,
+                                          countDistribs$Outcome),
+                                  Type=c(rep("Pair", nrow(permResults[[1]])),
+                                         rep("Independent.Variable", nrow(permResults[[1]])),
+                                         rep("Outcome", nrow(permResults[[1]]))))
+
+  # Compute the same for the original data.
+  PairCount <- nrow(inputResults)
+  inputIndependentCount <- length(unique(inputResults$Analyte1))
+  inputOutcomeCount <- length(unique(inputResults$Analyte2))
+
+  # Make plot.
+  cols <- c("Original.Data"="red")
+  fills <- c("Permuted.Data"="black")
+  if(plot == TRUE){
+    plt <- ggplot2::ggplot(significantCounts, ggplot2::aes(x=Type, y=Count)) + 
+      ggplot2::geom_violin(ggplot2::aes(fill = "Permuted.Data")) + 
+      ggplot2::theme_bw() +
+      ggplot2::theme(axis.title.x=ggplot2::element_blank(), 
+                     panel.border = ggplot2::element_blank(),
+                     panel.grid.major = ggplot2::element_blank(),
+                     panel.grid.minor = ggplot2::element_blank(),
+                     axis.line = ggplot2::element_line(colour = "black")) + 
+      ggplot2::annotation_logticks(sides = "l", scaled = TRUE) + 
+      ggplot2::scale_y_log10() + 
+      ggplot2::geom_boxplot(width=0.05, fill = "white") + 
+      ggplot2::geom_segment(ggplot2::aes(x = 0.6, xend = 1.4, y = inputIndependentCount, 
+                                         yend = inputIndependentCount, color = "Original.Data")) + 
+      ggplot2::geom_segment(ggplot2::aes(x = 1.6, xend = 2.4, y = inputOutcomeCount, 
+                                         yend = inputOutcomeCount, color = "Original.Data")) +
+      ggplot2::geom_segment(ggplot2::aes(x = 2.6, xend = 3.4, y = PairCount, 
+                                         yend = PairCount, color = "Original.Data")) +
+      ggplot2::scale_colour_manual(name = "", values=cols) +
+      ggplot2::scale_fill_manual(name = "", values=fills)
+    print(plt)
+  }
+
+  # Return summary.
+  return(list(SignificanceCountTable = pairSignificanceCounts,
+              CountDistributions = countDistribs))
 }
diff --git a/man/PermutationSummary.Rd b/man/PermutationSummary.Rd
diff --git a/tests/testthat/test-perm.R b/tests/testthat/test-perm.R
@@ -0,0 +1,171 @@
+# When input data is not valid, should terminate early.
+test_that("Errors on wrong input type",{
+  expect_error(PermuteIntLIM(data = "?!%", stype="something", outcome = 1,
+                             independent.var.type = 2, num.permutations = 4), 
+               "The data must be an IntLimData object", ignore.case = TRUE)
+})
+
+# When the number of permutations is not sufficient, should terminate early.
+test_that("Insufficient fold count causes an error",{
+  # Create toy data.
+  pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8), 
+                      "Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
+                      "Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
+                      "Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
+                                "Low", "Medium"))
+  rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
+                       "Dino", "Hoppy")
+  geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3), 
+                         "Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
+                         "Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
+                         "Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
+  rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
+  metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3), 
+                          "Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
+                          "Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
+                          "Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
+  rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
+  metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
+                                c("Metab1", "Metab2", "Metab3"))
+  geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
+                               c("Gene1", "Gene2", "Gene3"))
+  dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
+                      analyteType2=as.matrix(geneData),
+                      analyteType1MetaData = metabMetaData,
+                      analyteType2MetaData = geneMetaData,
+                      sampleMetaData = pData)
+  expect_error(PermuteIntLIM(data = dat, stype="Level", outcome = 1,
+                             independent.var.type = 2, num.permutations = 0), 
+               "The number of permutations must be greater than or equal to 1", 
+               ignore.case = TRUE)
+})
+
+# Check that the multi-omic function works with meta-data.
+test_that("Function works correctly in multi-omic case.", {
+  # Create toy data.
+  pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8), 
+                      "Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
+                      "Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
+                      "Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
+                                "Low", "Medium"))
+  rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
+                       "Dino", "Hoppy")
+  geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3), 
+                         "Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
+                         "Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
+                         "Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
+  rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
+  metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3), 
+                          "Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
+                          "Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
+                          "Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
+  rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
+  metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
+                                c("Metab1", "Metab2", "Metab3"))
+  geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
+                               c("Gene1", "Gene2", "Gene3"))
+  dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
+                      analyteType2=as.matrix(geneData),
+                      analyteType1MetaData = metabMetaData,
+                      analyteType2MetaData = geneMetaData,
+                      sampleMetaData = pData)
+
+  # Discrete 
+  res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
+                       independent.var.type = 2, num.permutations = 4)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+
+  # Continuous
+  res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
+                       independent.var.type = 2, num.permutations = 4,
+                       continuous = TRUE)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+})
+
+# Check that we are still able to run without the metadata.
+test_that("Function still works when metadata is missing.", {
+
+  # Create toy data.
+  pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8), 
+                      "Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
+                      "Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
+                      "Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
+                                "Low", "Medium"))
+  rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
+                       "Dino", "Hoppy")
+  geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3), 
+                         "Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
+                         "Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
+                         "Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
+  rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
+  metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3), 
+                          "Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
+                          "Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
+                          "Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
+  rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
+  dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
+                      analyteType2=as.matrix(geneData),
+                      analyteType1MetaData = as.data.frame(matrix(, nrow = 0, ncol = 0)),
+                      analyteType2MetaData = as.data.frame(matrix(, nrow = 0, ncol = 0)),
+                      sampleMetaData = pData)
+
+  # Discrete
+  res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
+                       independent.var.type = 2, num.permutations = 4)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+
+  # Continuous
+  res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
+                       independent.var.type = 2, num.permutations = 4,
+                       continuous = TRUE)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+})
+
+# Check that we are able to run with both single-omic and multi-omic data.
+test_that("Single-omic data gives expected results.", {
+  # Create toy data.
+  pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8), 
+                      "Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
+                      "Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
+                      "Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
+                                "Low", "Medium"))
+  rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
+                       "Dino", "Hoppy")
+  metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3), 
+                          "Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
+                          "Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
+                          "Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
+  rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
+  metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
+                                c("Metab1", "Metab2", "Metab3"))
+  geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
+                               c("Gene1", "Gene2", "Gene3"))
+  dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
+                      analyteType2=matrix(, nrow = 0, ncol = 0),
+                      analyteType1MetaData = metabMetaData,
+                      analyteType2MetaData = geneMetaData,
+                      sampleMetaData = pData)
+
+  # Discrete
+  res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
+                       independent.var.type = 1, num.permutations = 4)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+
+  # Continuous
+  res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
+                       independent.var.type = 1, num.permutations = 4,
+                       continuous = TRUE)
+  expect_equal(nrow(res[[1]]), 4)
+  expect_equal(ncol(res[[1]]), 2)
+  expect_equal(length(res[[2]]), 4)
+})
diff --git a/vignettes/IntLimVignette.Rmd b/vignettes/IntLimVignette.Rmd
@@ -277,7 +277,8 @@ is a list of significant analyte pairs.
 ```{r}
 myres.gm.sig <- IntLIM::ProcessResults(inputResults = myres.gm, 
                                        inputData = inputDatafilt, 
-                                       pvalcutoff = 0.10, rsquaredCutoff = 0.2)
+                                       pvalcutoff = 0.10, rsquaredCutoff = 0.2,
+                                       interactionCoeffPercentile = 0.5)
 ```
 
 The __InteractionCoefficientGraph()__ function allows the user to observe the 
@@ -322,10 +323,25 @@ crossValResults <- RunCrossValidation(inputData = inputData, analyteType1perc =
                    analyteType2perc = 0.10, analyteMiss = 0.80,
                    stype="PBO_vs_Leukemia", covar = c("cellnbr"), outcome = c(1), 
                    independent.var.type = c(2), save.covar.pvals = TRUE,
-                   pvalcutoff = 0.10, rsquaredCutoff = 0.2,
+                   pvalcutoff = 0.10, rsquaredCutoff = 0.2, interactionCoeffPercentile = 0.5
                    folds = 4)
 IntLIM::PlotFoldOverlapUpSet(crossValResults$processed)
 ```
+
+##Run Permutation
+You can also choose to run end-to-end permutation. This combines the steps of
+permuting, running IntLIM, and processing results for multiple permutations.
+__PermutationSummary__ returns a summary of the permutation results and
+(optionally) a violin plot of the number of significant pairs and analytes.
+```{r eval = FALSE}
+perm.res <- IntLIM::PermuteIntLIM(data = inputDatafilt, stype = "PBO_vs_Leukemia", outcome = 1,
+                      independent.var.type = 2, pvalcutoff = 0.10, interactionCoeffPercentile = 0.5, 
+                   rsquaredCutoff = 0.2, covar = c("cellnbr"),
+                   num.permutations = 5, continuous = TRUE)
+summary <- PermutationSummary(permResults = perm.res, inputResults = myres.sig,
+                              plot = TRUE)
+```
+
 ##References
 
 Siddiqui JK, Baskin E, Liu M, Cantemir-Stone CZ, Zhang B, Bonneville R, McElroy JP, 

diff --git a/vignettes/IntLimVignetteContinuous.Rmd b/vignettes/IntLimVignetteContinuous.Rmd
@@ -275,7 +275,8 @@ ___NA___ for continuous phenotypes.
 myres.gm.sig <- IntLIM::ProcessResults(inputResults = myres.gm, 
                                        inputData = inputDatafilt, 
                                        pvalcutoff = 0.10, 
-                                       rsquaredCutoff = 0.2)
+                                       rsquaredCutoff = 0.2,
+                                       interactionCoeffPercentile = 0.5)
 ```
 The __HistogramPairs()__ function plots the count of analyte pairs by either
 outcome or independent variable analyte.
@@ -315,9 +316,24 @@ crossValResults <- RunCrossValidation(inputData = inputData, analyteType1perc =
                    stype="drugscore", covar = c("cellnbr"), outcome = c(1), 
                    independent.var.type = c(2), save.covar.pvals = TRUE,
                    pvalcutoff = 0.10, diffcorr = NA, rsquaredCutoff = 0.2,
-                   folds = 4, corrtype = NA, continuous = TRUE)
+                   folds = 4, corrtype = NA, continuous = TRUE,
+                   interactionCoeffPercentile = 0.5)
 IntLIM::PlotFoldOverlapUpSet(crossValResults$processed)
 ```
+
+##Run Permutation
+You can also choose to run end-to-end permutation. This combines the steps of
+permuting, running IntLIM, and processing results for multiple permutations.
+__PermutationSummary__ returns a summary of the permutation results and
+(optionally) a violin plot of the number of significant pairs and analytes.
+```{r eval = FALSE}
+perm.res <- IntLIM::PermuteIntLIM(data = inputDatafilt, stype = "drugscore", outcome = 1,
+                      independent.var.type = 2, pvalcutoff = 0.10, interactionCoeffPercentile = 0.5, 
+                   rsquaredCutoff = 0.2, covar = c("cellnbr"),
+                   num.permutations = 5, continuous = TRUE)
+summary <- PermutationSummary(permResults = perm.res, inputResults = myres.sig,
+                              plot = TRUE)
+```
 ##References
 
 Siddiqui JK, Baskin E, Liu M, Cantemir-Stone CZ, Zhang B, Bonneville R, McElroy JP,