Skip to content

Commit

Permalink
Added plotting functionality and testing for permutations
Browse files Browse the repository at this point in the history
  • Loading branch information
taraeicher committed Feb 4, 2022
1 parent ee47662 commit 4016e27
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 5 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Depends: R (>= 3.2.0)
Imports:
ComplexHeatmap,
DT,
ggplot2,
gplots,
graphics,
grDevices,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(InteractionCoefficientGraph)
export(OutputData)
export(OutputResults)
export(PValueBoxPlots)
export(PermutationSummary)
export(PermuteIntLIM)
export(PlotDistributions)
export(PlotPCA)
Expand Down
2 changes: 1 addition & 1 deletion R/crossvalfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ RunCrossValidation <- function(inputData,
folds=folds)

# Filter the folds.
inputDataFilt <- FilterDataFolds(inputData=inputDataFolds,
inputDataFilt <- FilterDataFolds(inputDataFolds=inputDataFolds,
analyteType1perc=analyteType1perc,
analyteType2perc=analyteType2perc,
analyteMiss=analyteMiss,
Expand Down
85 changes: 85 additions & 0 deletions R/plottingfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -823,4 +823,89 @@ HistogramPairs <- function(inputResults, type = 'outcome', breaks = 50){
}else{
stop("Only two valid types: outcome or independent. Invalid type entered")
}
}

#' Return the number of significant analytes / pairs per permutation and the number
#' of permutations in which each analyte is significant.
#' If plot = TRUE, show a box plot of number of significant analytes over permutations,
#' overlaid with the number of significant analytes in the original data.
#'
#' @param inputResults Data frame with model results (output of ProcessResults())
#' @param permResults An object of type PermutationResults (output of PermuteIntLIM())
#' @param plot Whether or not to show the boxplot. Default is TRUE.
#' @export
PermutationSummary <- function(inputResults, permResults, plot){

# Prevent "visible binding for global variable" notes.
Count <- Type <- NULL

# Compute pair significance counts.
allSignificantPairs <- do.call(c, permResults[[2]])
pairSignificanceCounts <- table(allSignificantPairs)
pairSignificanceCounts <- as.data.frame(pairSignificanceCounts[order(-pairSignificanceCounts)])

# Compute summary.
pairCountDistrib <- permResults[[1]]$Num_Significant_Pairs
analyte1 <- lapply(1:length(permResults[[2]]), function(i){
return(unlist(lapply(permResults[[2]][[i]], function(string){
return(strsplit(string, split="__V__")[[1]][1])
})))
})
analyte2 <- lapply(1:length(permResults[[2]]), function(i){
return(unlist(lapply(permResults[[2]][[i]], function(string){
return(strsplit(string, split="__V__")[[1]][2])
})))
})
analyte1CountDistrib <- unlist(lapply(1:length(analyte1), function(i){
return(length(unique(analyte1[[i]])))
}))
analyte2CountDistrib <- unlist(lapply(1:length(analyte2), function(i){
return(length(unique(analyte2[[i]])))
}))
countDistribs <- data.frame(Pairs = pairCountDistrib,
Independent = analyte1CountDistrib,
Outcome = analyte2CountDistrib)

# Set up data for input.
significantCounts <- data.frame(Count=c(countDistribs$Pairs,
countDistribs$Independent,
countDistribs$Outcome),
Type=c(rep("Pair", nrow(permResults[[1]])),
rep("Independent.Variable", nrow(permResults[[1]])),
rep("Outcome", nrow(permResults[[1]]))))

# Compute the same for the original data.
PairCount <- nrow(inputResults)
inputIndependentCount <- length(unique(inputResults$Analyte1))
inputOutcomeCount <- length(unique(inputResults$Analyte2))

# Make plot.
cols <- c("Original.Data"="red")
fills <- c("Permuted.Data"="black")
if(plot == TRUE){
plt <- ggplot2::ggplot(significantCounts, ggplot2::aes(x=Type, y=Count)) +
ggplot2::geom_violin(ggplot2::aes(fill = "Permuted.Data")) +
ggplot2::theme_bw() +
ggplot2::theme(axis.title.x=ggplot2::element_blank(),
panel.border = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
axis.line = ggplot2::element_line(colour = "black")) +
ggplot2::annotation_logticks(sides = "l", scaled = TRUE) +
ggplot2::scale_y_log10() +
ggplot2::geom_boxplot(width=0.05, fill = "white") +
ggplot2::geom_segment(ggplot2::aes(x = 0.6, xend = 1.4, y = inputIndependentCount,
yend = inputIndependentCount, color = "Original.Data")) +
ggplot2::geom_segment(ggplot2::aes(x = 1.6, xend = 2.4, y = inputOutcomeCount,
yend = inputOutcomeCount, color = "Original.Data")) +
ggplot2::geom_segment(ggplot2::aes(x = 2.6, xend = 3.4, y = PairCount,
yend = PairCount, color = "Original.Data")) +
ggplot2::scale_colour_manual(name = "", values=cols) +
ggplot2::scale_fill_manual(name = "", values=fills)
print(plt)
}

# Return summary.
return(list(SignificanceCountTable = pairSignificanceCounts,
CountDistributions = countDistribs))
}
24 changes: 24 additions & 0 deletions man/PermutationSummary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

171 changes: 171 additions & 0 deletions tests/testthat/test-perm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# When input data is not valid, should terminate early.
test_that("Errors on wrong input type",{
expect_error(PermuteIntLIM(data = "?!%", stype="something", outcome = 1,
independent.var.type = 2, num.permutations = 4),
"The data must be an IntLimData object", ignore.case = TRUE)
})

# When the number of permutations is not sufficient, should terminate early.
test_that("Insufficient fold count causes an error",{
# Create toy data.
pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8),
"Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
"Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
"Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
"Low", "Medium"))
rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
"Dino", "Hoppy")
geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3),
"Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
"Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
"Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3),
"Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
"Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
"Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
c("Metab1", "Metab2", "Metab3"))
geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
c("Gene1", "Gene2", "Gene3"))
dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
analyteType2=as.matrix(geneData),
analyteType1MetaData = metabMetaData,
analyteType2MetaData = geneMetaData,
sampleMetaData = pData)
expect_error(PermuteIntLIM(data = dat, stype="Level", outcome = 1,
independent.var.type = 2, num.permutations = 0),
"The number of permutations must be greater than or equal to 1",
ignore.case = TRUE)
})

# Check that the multi-omic function works with meta-data.
test_that("Function works correctly in multi-omic case.", {
# Create toy data.
pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8),
"Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
"Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
"Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
"Low", "Medium"))
rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
"Dino", "Hoppy")
geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3),
"Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
"Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
"Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3),
"Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
"Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
"Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
c("Metab1", "Metab2", "Metab3"))
geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
c("Gene1", "Gene2", "Gene3"))
dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
analyteType2=as.matrix(geneData),
analyteType1MetaData = metabMetaData,
analyteType2MetaData = geneMetaData,
sampleMetaData = pData)

# Discrete
res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
independent.var.type = 2, num.permutations = 4)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)

# Continuous
res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
independent.var.type = 2, num.permutations = 4,
continuous = TRUE)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)
})

# Check that we are still able to run without the metadata.
test_that("Function still works when metadata is missing.", {

# Create toy data.
pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8),
"Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
"Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
"Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
"Low", "Medium"))
rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
"Dino", "Hoppy")
geneData <- data.frame("Fred"=c(46.1,20.2,59.3), "Wilma"=c(11.1,34.2,19.3),
"Pebbles"=c(28.1,71.2,94.3), "Bambam"=c(51.1,91.2,32.3),
"Betty"=c(73.1,26.2,40.3), "Barney"=c(91.1,99.2,12.3),
"Dino"=c(38.1,44.2,60.3), "Hoppy"=c(91.1,93.2,63.3))
rownames(geneData) <- c("Gene1", "Gene2", "Gene3")
metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3),
"Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
"Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
"Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
analyteType2=as.matrix(geneData),
analyteType1MetaData = as.data.frame(matrix(, nrow = 0, ncol = 0)),
analyteType2MetaData = as.data.frame(matrix(, nrow = 0, ncol = 0)),
sampleMetaData = pData)

# Discrete
res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
independent.var.type = 2, num.permutations = 4)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)

# Continuous
res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
independent.var.type = 2, num.permutations = 4,
continuous = TRUE)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)
})

# Check that we are able to run with both single-omic and multi-omic data.
test_that("Single-omic data gives expected results.", {
# Create toy data.
pData <- data.frame("Feat1"=c(47.1,26.2,84.3,98.4,43.5,82.6,13.7,87.8),
"Feat2"=c(37.1,40.2,80.3,83.4,6.5,12.6,43.7,75.8),
"Feat3"=c(14.1,74.2,11.3,19.4,73.5,55.6,18.7,91.8),
"Level"=c("Low", "Medium", "Low", "Medium", "Medium", "Low",
"Low", "Medium"))
rownames(pData) <- c("Fred", "Wilma", "Pebbles", "Bambam", "Betty", "Barney",
"Dino", "Hoppy")
metabData <- data.frame("Fred"=c(60.1,32.2,81.3), "Wilma"=c(68.1,58.2,45.3),
"Pebbles"=c(30.1,61.2,67.3), "Bambam"=c(36.1,7.2,79.3),
"Betty"=c(5.1,87.2,91.3), "Barney"=c(5.1,87.2,91.3),
"Dino"=c(99.1,10.2,85.3), "Hoppy"=c(51.1,14.2,76.3))
rownames(metabData) <- c("Metab1", "Metab2", "Metab3")
metabMetaData <- data.frame("id"=c("Metab1", "Metab2", "Metab3"), "metabname"=
c("Metab1", "Metab2", "Metab3"))
geneMetaData <- data.frame("id"=c("Gene1", "Gene2", "Gene3"), "genename"=
c("Gene1", "Gene2", "Gene3"))
dat <- methods::new("IntLimData", analyteType1=as.matrix(metabData),
analyteType2=matrix(, nrow = 0, ncol = 0),
analyteType1MetaData = metabMetaData,
analyteType2MetaData = geneMetaData,
sampleMetaData = pData)

# Discrete
res <- PermuteIntLIM(data = dat, stype="Level", outcome = 1,
independent.var.type = 1, num.permutations = 4)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)

# Continuous
res <- PermuteIntLIM(data = dat, stype="Feat1", outcome = 1,
independent.var.type = 1, num.permutations = 4,
continuous = TRUE)
expect_equal(nrow(res[[1]]), 4)
expect_equal(ncol(res[[1]]), 2)
expect_equal(length(res[[2]]), 4)
})
20 changes: 18 additions & 2 deletions vignettes/IntLimVignette.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,8 @@ is a list of significant analyte pairs.
```{r}
myres.gm.sig <- IntLIM::ProcessResults(inputResults = myres.gm,
inputData = inputDatafilt,
pvalcutoff = 0.10, rsquaredCutoff = 0.2)
pvalcutoff = 0.10, rsquaredCutoff = 0.2,
interactionCoeffPercentile = 0.5)
```

The __InteractionCoefficientGraph()__ function allows the user to observe the
Expand Down Expand Up @@ -322,10 +323,25 @@ crossValResults <- RunCrossValidation(inputData = inputData, analyteType1perc =
analyteType2perc = 0.10, analyteMiss = 0.80,
stype="PBO_vs_Leukemia", covar = c("cellnbr"), outcome = c(1),
independent.var.type = c(2), save.covar.pvals = TRUE,
pvalcutoff = 0.10, rsquaredCutoff = 0.2,
pvalcutoff = 0.10, rsquaredCutoff = 0.2, interactionCoeffPercentile = 0.5
folds = 4)
IntLIM::PlotFoldOverlapUpSet(crossValResults$processed)
```

##Run Permutation
You can also choose to run end-to-end permutation. This combines the steps of
permuting, running IntLIM, and processing results for multiple permutations.
__PermutationSummary__ returns a summary of the permutation results and
(optionally) a violin plot of the number of significant pairs and analytes.
```{r eval = FALSE}
perm.res <- IntLIM::PermuteIntLIM(data = inputDatafilt, stype = "PBO_vs_Leukemia", outcome = 1,
independent.var.type = 2, pvalcutoff = 0.10, interactionCoeffPercentile = 0.5,
rsquaredCutoff = 0.2, covar = c("cellnbr"),
num.permutations = 5, continuous = TRUE)
summary <- PermutationSummary(permResults = perm.res, inputResults = myres.sig,
plot = TRUE)
```

##References

Siddiqui JK, Baskin E, Liu M, Cantemir-Stone CZ, Zhang B, Bonneville R, McElroy JP,
Expand Down
20 changes: 18 additions & 2 deletions vignettes/IntLimVignetteContinuous.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,8 @@ ___NA___ for continuous phenotypes.
myres.gm.sig <- IntLIM::ProcessResults(inputResults = myres.gm,
inputData = inputDatafilt,
pvalcutoff = 0.10,
rsquaredCutoff = 0.2)
rsquaredCutoff = 0.2,
interactionCoeffPercentile = 0.5)
```
The __HistogramPairs()__ function plots the count of analyte pairs by either
outcome or independent variable analyte.
Expand Down Expand Up @@ -315,9 +316,24 @@ crossValResults <- RunCrossValidation(inputData = inputData, analyteType1perc =
stype="drugscore", covar = c("cellnbr"), outcome = c(1),
independent.var.type = c(2), save.covar.pvals = TRUE,
pvalcutoff = 0.10, diffcorr = NA, rsquaredCutoff = 0.2,
folds = 4, corrtype = NA, continuous = TRUE)
folds = 4, corrtype = NA, continuous = TRUE,
interactionCoeffPercentile = 0.5)
IntLIM::PlotFoldOverlapUpSet(crossValResults$processed)
```

##Run Permutation
You can also choose to run end-to-end permutation. This combines the steps of
permuting, running IntLIM, and processing results for multiple permutations.
__PermutationSummary__ returns a summary of the permutation results and
(optionally) a violin plot of the number of significant pairs and analytes.
```{r eval = FALSE}
perm.res <- IntLIM::PermuteIntLIM(data = inputDatafilt, stype = "drugscore", outcome = 1,
independent.var.type = 2, pvalcutoff = 0.10, interactionCoeffPercentile = 0.5,
rsquaredCutoff = 0.2, covar = c("cellnbr"),
num.permutations = 5, continuous = TRUE)
summary <- PermutationSummary(permResults = perm.res, inputResults = myres.sig,
plot = TRUE)
```
##References

Siddiqui JK, Baskin E, Liu M, Cantemir-Stone CZ, Zhang B, Bonneville R, McElroy JP,
Expand Down

0 comments on commit 4016e27

Please sign in to comment.