Skip to content

ichcha-m/cophescan

Repository files navigation

CoPheScan

CRAN_Status_Badge

The cophescan package implements Coloc adapted Phenome-wide Scan (CoPheScan), a Bayesian method to perform Phenome-wide association studies (PheWAS) that identifies causal associations between genetic variants and phenotypes while simultaneously accounting for confounding due to linkage disequilibrium.

See the description vignette for background and references: Introduction to CoPheScan


Installation

Install cophescan from CRAN

install.packages("cophescan")

Install the developmental version from GitHub

if(!require("remotes"))
   install.packages("remotes") # if necessary
remotes::install_github("ichcha-m/cophescan")

Vignettes

For a detailed walkthrough of cophescan browse through the vignettes in https://ichcha-m.github.io/cophescan/.

Vignette articles:

1. Introduction to CoPheScan

2. Input data format

3. An example for running CoPheScan with fixed priors

4. An example for running CoPheScan with hierarchical priors


Quick start

Usage

library(cophescan)
## Load the simulated summary stats data of 24 traits
data("cophe_multi_trait_data")
names(cophe_multi_trait_data)
Single trait
query_trait_1 <- cophe_multi_trait_data$summ_stat[['Trait_1']]
querysnpid <- cophe_multi_trait_data$querysnpid
print(querysnpid)
query_trait_1$position <- sapply(query_trait_1$snp, function(x) as.numeric(unlist(strsplit(x, "-"))[2]))
plot_trait_manhat(query_trait_1, querysnpid)

# Run cophescan under a single causal variant assumption by providing the snpid of the query variant (querysnpid) for the query trait.
res.single <- cophe.single(query_trait_1, querysnpid = querysnpid, querytrait='Trait_1')
summary(res.single)
# Run cophescan with susie (multiple variants) by providing the snpid of the query variant (querysnpid) for the query trait
query_trait_1$LD <- cophe_multi_trait_data$LD
res.susie <- cophe.susie(query_trait_1, querysnpid = querysnpid, querytrait='Trait_1')
summary(res.susie)
Run multi-trait analysis
res.multi <- cophe.multitrait(cophe_multi_trait_data$summ_stat, querysnpid = querysnpid, querytrait.names = names(cophe_multi_trait_data$summ_stat), method = 'single')
Plot cophescan results
cophe.plots.res <- cophe_plot(res.multi, traits.dat = cophe_multi_trait_data$summ_stat, querysnpid = querysnpid)

ggpubr::ggarrange(cophe.plots.res$pval, cophe.plots.res$ppHa, cophe.plots.res$ppHc, nrow=1)

# cophe.plots.hmp <- cophe_heatmap(res.multi, traits.dat = cophe_multi_trait_data$summ_stat, querysnpid = querysnpid, color=colorRampPalette(rev(RColorBrewer::brewer.pal(n = 9, name ="Greens")))(100))
                                    
Run hierarchical model for priors
cophe.hier.res <- run_metrop_priors(res.multi, posterior=TRUE, avg_posterior=TRUE, pik=TRUE) 
ll <- cophe.hier.res$ll
params <- cophe.hier.res$parameters

### store user parameters
old_par = par(no.readonly = TRUE)

## Plot mcmc diagnostics
par(mfrow=c(2,2))
plot(1:length(ll), ll, main="loglik",type="l", col="orange")
plot(1:ncol(params), params[1,], main="alpha",type="l", col="orange")
plot(1:ncol(params), params[2,], main="beta",type="l", col="orange")

### reset user parameters
par(old_par)
Predict
res.post.prob = cbind(cophe.hier.res$avg.posterior, cophe.hier.res$data)
res.hier.predict <- cophe.hyp.predict(as.data.frame(res.post.prob ))
tail(res.hier.predict, row.names = F)