Providing code for the paper figures

PCAWG-11 · Jan 4, 2021 · d976b47 · d976b47
1 parent b66c8ac
commit d976b47
Show file tree

Hide file tree

Showing 102 changed files with 7,399 additions and 0 deletions.
diff --git a/code_figures_paper/README.md b/code_figures_paper/README.md
@@ -0,0 +1 @@
+Code and data to produce the manuscript figures
diff --git a/code_figures_paper/figure_1b/README.txt b/code_figures_paper/figure_1b/README.txt
@@ -0,0 +1 @@
+Before running: gunzip wgd.status.txt.gz
diff --git a/code_figures_paper/figure_1b/figure_1b.R b/code_figures_paper/figure_1b/figure_1b.R
@@ -0,0 +1,21 @@
+library(ggplot2)
+library(RColorBrewer)
+
+wgdStatusSeparation<-function(wgd_anno){
+
+  p1<-ggplot(data=wgd_anno, aes(y=ploidy, x=hom, colour=wgd_status))+
+    geom_point()+
+    scale_colour_manual(values=c("#404040","#E69F00"),labels=c("No WGD","WGD"),name="")+
+    scale_y_continuous(name="Ploidy")+
+    scale_x_continuous(name="Fraction of genome with LOH")+
+    geom_abline(intercept = 2.9, slope = -2, lty=2, colour="#009E73")+
+    theme_bw()+
+    theme(axis.line = element_line(colour = "black"), panel.border = element_blank(), panel.background = element_blank(),legend.position="bottom",axis.title=element_text(size=22),axis.text=element_text(size=20),panel.grid.major.y = element_line(size=0.5, colour="grey95"),
+          panel.grid.major.x = element_line(size=0.5, colour="grey95"), strip.text.x=element_text(size=18),strip.text.y=element_text(size=22),legend.text = element_text(size = 20),legend.title = element_text(size = 20))
+  ggsave(plot=p1,height=10,width=12, filename=paste("1c.wgdSeparation.pdf", sep=""), useDingbats=FALSE)
+
+}
+
+input<-read.table("wgd.status.txt", stringsAsFactors = F, sep = "\t", header=T)
+wgdStatusSeparation(input)
+
diff --git a/code_figures_paper/figure_1b/wgd.status.txt.gz b/code_figures_paper/figure_1b/wgd.status.txt.gz
diff --git a/code_figures_paper/figure_1c/README.txt b/code_figures_paper/figure_1c/README.txt
@@ -0,0 +1 @@
+Before running:  gunzip summary_stats_agreement.txt.gz
diff --git a/code_figures_paper/figure_1c/figure_1c.R b/code_figures_paper/figure_1c/figure_1c.R
@@ -0,0 +1,61 @@
+#
+# Creates figure showing agreement obtained by the consensus copynumber pipeline at different levels of confidence
+#
+
+library(ggplot2)
+library(readr)
+library(reshape2)
+
+px2cm = function(px) { return(px*2.54/96) }
+
+dat = readr::read_tsv("summary_stats_agreement.txt")
+dat$star_3 = dat$agreement_level_a+dat$agreement_level_b+dat$agreement_level_c
+dat$star_2 = dat$agreement_level_d+dat$agreement_level_e+dat$agreement_level_f
+dat$star_1 = dat$agreement_level_g+dat$agreement_level_h+dat$agreement_level_i
+
+colrs = c("#009E73", "#56B4E9", "#E69F00", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
+
+
+plot_data = data.frame(samplename=1:nrow(dat))
+for (i in 11:13) {
+  dat_s = data.frame(samplename=dat[[1]], agreement=rowSums(dat[, 11:i, drop=F]))
+  dat_s = dat_s[with(dat_s, order(agreement, decreasing=T)),]
+  dat_s$samplename = 1:nrow(dat)
+  plot_data = cbind(plot_data, dat_s[,2])
+}
+
+colnames(plot_data) = c("tumour", "(Near) complete agreement, clonal", "Strict majority agreement, clonal", "Complete or strict majority vote,\n rounded subclonal")
+plot_data_m = reshape2::melt(plot_data, id.vars=c("tumour"))
+plot_data_m$variable = factor(plot_data_m$variable, levels=sort(unique(plot_data_m$variable)))
+
+colnames(plot_data_m)[colnames(plot_data_m)=="variable"] = "Consensus"
+p = ggplot(plot_data_m) + geom_point(mapping=aes(x=tumour, y=value, colour=Consensus))
+p = p + xlab("Tumour") + ylab("Frac. genome agree") + scale_colour_manual(values=colrs) +
+  scale_x_continuous(breaks=seq(0, nrow(dat), 500), limits=c(0, nrow(dat))) +
+  theme_bw() + theme(axis.title.x=element_text(colour="black",size=22,face="plain"),
+                     axis.text.x=element_text(colour="black",size=18,face="plain"),
+                     axis.text.y = element_text(colour="black",size=18,face="plain"),  
+                     axis.title.y = element_text(colour="black",size=22,face="plain"),
+                     strip.text.x = element_text(colour="black",size=24,face="plain"),
+                     plot.title = element_text(colour="black",size=24,face="plain"),
+                     legend.text=element_text(colour="black",size=18,face="plain"),
+                     legend.title=element_text(colour="black",size=20,face="plain"),
+                     panel.border = element_blank(),
+                     axis.line = element_line(colour = "black")) +
+  guides(colour = guide_legend(override.aes = list(size=4)))
+
+png("copynumber_agreement.png", width=900, height=370)
+print(p)
+dev.off()
+
+ggsave(filename="copynumber_agreement.pdf", plot=p, width=px2cm(1200), height=px2cm(370), units="cm", useDingbats=FALSE)
+
+# numbers for the paper:
+# mean(rowSums(dat[,11:12]))
+# median(rowSums(dat[,11:12]))
+# sd(rowSums(dat[,11:12]*100))
+
+
+
+
+
diff --git a/code_figures_paper/figure_1c/summary_stats_agreement.txt.gz b/code_figures_paper/figure_1c/summary_stats_agreement.txt.gz
diff --git a/code_figures_paper/figure_1d/README.txt b/code_figures_paper/figure_1d/README.txt
@@ -0,0 +1,3 @@
+Before running:
+gunzip pcawg_similarities_before_MTimer_no_wcc.Rdata.gz
+gunzip simclone1000_similarities_with_truth_before_MTimer_noWCC.Rdata.gz
diff --git a/code_figures_paper/figure_1d/figure1d.R b/code_figures_paper/figure_1d/figure1d.R
@@ -0,0 +1,296 @@
+###############################################################
+## Load colour library
+library(RColorBrewer)
+###############################################################
+## Load precomputed pairwise scores/similarities
+load("pcawg_similarities_before_MTimer_no_wcc.Rdata")
+load("simclone1000_similarities_with_truth_before_MTimer_noWCC.Rdata")
+###############################################################
+## assign truth and simulation scores/similarities
+tru <- pcawg.overall.table[-c(18),-c(18)]
+simT <- sim <- simclone1000.overall.table
+###############################################################
+## Plotting parameters
+lwdSEG <- 1.5
+COL1 <- rgb(.7,.2,.3)
+COL2 <- rgb(.2,.2,.6)
+###############################################################
+
+
+
+###############################################################
+## -------------- functions     ---------------------------- ##
+###############################################################
+getColnames <- function(methods)
+{
+    cols <- col2rgb(RColorBrewer:::brewer.pal(12,"Paired"))/255
+    N1 <- 4## C
+    N2 <- 10 ## I
+    N3 <- 6## R
+    sapply(methods,function(x) if(x%in%c("CICC","CSR","WM"))
+                                   return(rgb(cols[1,N1],cols[2,N1],cols[3,N1]))
+                               else if(grepl("rd_",x))
+                                   return(rgb(cols[1,N3],cols[2,N3],cols[3,N3]))
+                               else
+                                   return(rgb(cols[1,N2],cols[2,N2],cols[3,N2])))
+}
+
+getColnames <- function(methods)
+{
+    cols <- cbind(c(124,190,243),c(214,173,254),c(255,168,113))/255
+    cols <- (cbind(c(200,200,200),c(150,150,150),c(55,55,55))/255)[,3:1][,c(2,1,3)]
+    N1 <- 1
+    N2 <- 2
+    N3 <- 3
+    sapply(methods,function(x) if(x%in%c("CICC","CSR","WM"))
+                                   return(rgb(cols[1,N1],cols[2,N1],cols[3,N1]))
+                               else if(grepl("rd_",x))
+                                   return(rgb(cols[1,N3],cols[2,N3],cols[3,N3]))
+                               else if(x=="Truth")
+                                   return(rgb(0,0,0))
+                               else
+                                   return(rgb(cols[1,N2],cols[2,N2],cols[3,N2])))
+}
+
+getCol1 <- function(x)
+{
+    vec <- c(sapply(seq(0,1,length.out=100),function(x) rgb(.7,.1,.3,(1-x)*.9+.1)),
+             sapply(seq(0,1,length.out=100),function(x) rgb(.3,.1,.7,.1+x*.9)))
+    vec[round(x*199)+1]
+}
+
+getCol3 <- function(x)
+{
+    vec <- c(sapply(seq(0,1,length.out=100),function(x) rgb(.7,.4,.3,(1-x)*.9+.1)),
+             sapply(seq(0,1,length.out=100),function(x) rgb(.3,.4,.7,.1+x*.9)))
+    vec[round(x*199)+1]
+}
+
+###############################################################
+## this is what a pre-cancerous code looks like:
+getCol2 <- getCol1
+getCol1 <- getCol3
+getCol3 <- getCol2
+getCol2 <- getCol1
+###############################################################
+
+getTEXT <- function(x)
+{
+    if(x%in%c("CICC","Weme","CSR")) return("C")
+    if(grepl("rd",x)) return("R")
+    else return("I")
+}
+
+getCOLTEXT <- function(x)
+{
+    cols <- RColorBrewer:::brewer.pal(12,"Paired")
+    cols <- paste0(cols,"BB")
+    N1 <- 4## C
+    N2 <- 10 ## I
+    N3 <- 6## R
+    if(x%in%c("CICC","Weme","CSR")) return(cols[N1])
+    if(grepl("rd",x)) return(cols[N3])
+    else return(cols[N2])
+}
+
+getCOLTEXT <- function(x)
+{
+    cols <- cbind(c(124,190,243),c(214,173,254),c(255,168,113))/255
+    cols <- (cbind(c(200,200,200),c(150,150,150),c(55,55,55))/255)[,3:1][,c(2,1,3)]
+    N1 <- 1
+    N2 <- 2
+    N3 <- 3
+    paste0(sapply(x,function(x) if(x%in%c("CICC","CSR","Weme"))
+                                   return(rgb(cols[1,N1],cols[2,N1],cols[3,N1]))
+                               else if(grepl("rd_",x))
+                                   return(rgb(cols[1,N3],cols[2,N3],cols[3,N3]))
+                               else if(grepl("Truth",x))
+                                   return(rgb(0,0,0))
+                               else
+                                   return(rgb(cols[1,N2],cols[2,N2],cols[3,N2]))),"BB")
+}
+
+addBorder <- function(i,j,xpas,ypas,N=18,
+                      col1=COL1,
+                      col2=COL2,
+                      lwds=lwdSEG)
+{
+    if(i==1 & j==1)
+    {
+        segments((i-1)*xpas,j*ypas,i*xpas,j*ypas,col=col1,lwd=lwds)
+        segments((i)*xpas,j*ypas,i*xpas,(j-1)*ypas,col=col2,lwd=lwds)
+    }
+    else if(i==j)
+    {
+        segments((i-1)*xpas,j*ypas,i*xpas,j*ypas,col=col1,lwd=lwds)
+        segments((i-1)*xpas,(j-1)*ypas,(i-1)*xpas,(j)*ypas,col=col1,lwd=lwds)
+        segments((i)*xpas,j*ypas,i*xpas,(j-1)*ypas,col=col2,lwd=lwds)
+        segments((i-1)*xpas,(j-1)*ypas,i*xpas,(j-1)*ypas,col=col2,lwd=lwds)
+    }
+    else
+    {
+        if(i==1)
+        {
+            segments((i-1)*xpas,(j-1)*ypas,(i-1)*xpas,(j)*ypas,col=col1,lwd=lwds)
+        }
+        if(i==N & j!=N)
+        {
+            segments((i)*xpas,j*ypas,i*xpas,(j-1)*ypas,col=col2,lwd=lwds)
+        }
+        if(j==2)
+        {
+            segments((i-1)*xpas,(j-1)*ypas,i*xpas,(j-1)*ypas,col=col2,lwd=lwds)
+        }
+        if(j==N & i!=N)
+        {
+            segments((i-1)*xpas,j*ypas,i*xpas,j*ypas,col=col1,lwd=lwds)
+        }
+    }
+}
+
+myMap <- function(x,y,cexTEXT=.9)
+{
+    plot(0,0,xlim=c(0,1),ylim=c(0,1),
+         xlab="",ylab="",
+         xaxt="n",yaxt="n",
+         col=rgb(0,0,0,0),frame=F)
+    xpas <- 1/ncol(x)
+    ypas <- 1/nrow(x)
+    if(F){
+        x <- x-min(x,na.rm=T)
+        x <- x/max(x,na.rm=T)}
+    ord <- 1:nrow(x)
+    x <- x[ord,ord]
+    bord <- 0
+    for(i in 1:nrow(x))
+    {
+        for(j in 1:ncol(x))
+        {
+            if(i==1) polygon(c((i-1)*xpas,(i-1)*xpas,(i)*xpas-bord,(i)*xpas-bord),
+                            c((j-1)*ypas,(j)*ypas-bord,(j)*ypas-bord,(j-1)*ypas),
+                            col=getCol2(x[i,j]),border=NA)
+            else if(j>i)
+                polygon(c((i-1)*xpas,(i-1)*xpas,(i)*xpas-bord,(i)*xpas-bord),
+                        c((j-1)*ypas,(j)*ypas-bord,(j)*ypas-bord,(j-1)*ypas),
+                        col=getCol1(x[i,j]),border=NA)
+            else polygon(c((i-1)*xpas,(i-1)*xpas,(i)*xpas-bord,(i)*xpas-bord),
+                         c((j-1)*ypas,(j)*ypas-bord,(j)*ypas-bord,(j-1)*ypas),
+                         col=getCol3(x[i,j]),border=NA)
+            if(i==j)
+            {
+                bb <- xpas/20
+                polygon(c((i-1)*xpas+bb,(i-1)*xpas+bb,(i)*xpas-bb,(i)*xpas-bb),
+                        c((j-1)*ypas+bb,(j)*ypas-bb,(j)*ypas-bb,(j-1)*ypas+bb),
+                        col=getCOLTEXT(colnames(x)[i]),border=NA)
+                if(F) text((i-1)*xpas+xpas/2,
+                (j-1)*ypas+ypas/2,
+                getTEXT(colnames(x)[i]),
+                cex=cexTEXT,
+                col=getCOLTEXT(colnames(x)[i]))
+            }
+        }
+    }
+}
+
+plotHeatmap <- function(tru,sim)
+{
+    all <- rbind(rep(NA,18),cbind(rep(NA,17),tru))
+    rownames(all) <- colnames(all) <- colnames(sim)
+    for(i in 1:nrow(sim))
+        for(j in i:ncol(sim))
+        {
+            all[i,j] <- sim[i,j]
+        }
+    layout(mat=cbind(c(1,1,1,1,1,1,1),c(5,2,2,6,3,3,4)),
+           widths=c(4,1.3),
+           heights=c(1,1,1,1,1,1,1))
+    par(mar=c(1,0,1.3,0))
+    myMap(all,NA)
+    NN <- 18
+    par(mar=c(0,0,0,0))
+    plot(0,0,
+         col=rgb(0,0,0,0),
+         xaxt="n",
+         yaxt="n",
+         xlab="",
+         ylab="",
+         frame=F,
+         xlim=c(0,1),
+         ylim=c(0,1))
+    legend("left",
+           legend=c("Truth", "Consensus","Individual","Random"),
+           col=c(getCOLTEXT("Truth"),
+                 getCOLTEXT("CICC"),
+                 getCOLTEXT("Phylogic"),
+                 getCOLTEXT("rd_")),
+           pch=15,##c("C","I","R"),
+           cex=.78,box.col=rgb(0,0,0,0))
+    par(mar=c(0,0,0,0))
+    plot(0,0,
+         col=rgb(0,0,0,0),
+         xaxt="n",
+         yaxt="n",
+         xlab="",
+         ylab="",
+         frame=F,
+         xlim=c(0,1),
+         ylim=c(0,1))
+    xx1 <- 0
+    xx2 <- .2
+    ss <- seq(0,1,length.out=50)
+    scale <- .6
+    for(i in 2:length(ss))
+    {
+        polygon(c(xx1,xx1,xx2,xx2),
+                c(ss[i-1],ss[i],ss[i],ss[i-1])*scale,
+                col=getCol1(ss[i-1]),border=NA)
+    }
+    for(i in 2:length(ss))
+    {
+        polygon(c(xx1,xx1,xx2,xx2)+.3,
+                c(ss[i-1],ss[i],ss[i],ss[i-1])*scale,
+                col=getCol3(ss[i-1]),border=NA)
+    }
+    text(.15,0,"no similarity",cex=.68,pos=4)
+    text(.15,1*scale,"high similarity",cex=.68,pos=4)
+}
+
+minmaxNorm <- function(tru)
+{
+    tru <- tru-min(tru,na.rm=T)
+    tru <- tru/max(tru,na.rm=T)
+    tru
+}
+minmaxNorm.truth <- function(simT,N=18)
+{
+    simT[N,] <- minmaxNorm(simT[N,])
+    simT[,N] <- minmaxNorm(simT[,N])
+    simT
+}
+###############################################################
+
+
+
+
+###############################################################
+set.seed(1234) ##not used
+###############################################################
+## order methods
+ORD <- c(18,15,16,17,
+         11:1,
+         14:12)
+###############################################################
+## normalise scores separately
+tru <- minmaxNorm(tru)
+simT <- minmaxNorm(simT)
+simT <- minmaxNorm.truth(simT)
+###############################################################
+## plot heatmap and colour key
+pdf("heatmap.similarities.pdf",
+    width=3.12,
+    height=2.6)
+plotHeatmap(tru[ORD[-c(1)],ORD[-c(1)]],simT[ORD,ORD])
+dev.off()
+###############################################################
+## The rest is just having fun in inkscape...
+###############################################################
diff --git a/code_figures_paper/figure_1d/pcawg_similarities_before_MTimer_no_wcc.Rdata.gz b/code_figures_paper/figure_1d/pcawg_similarities_before_MTimer_no_wcc.Rdata.gz
diff --git a/...figures_paper/figure_1d/simclone1000_similarities_with_truth_before_MTimer_noWCC.Rdata.gz b/...figures_paper/figure_1d/simclone1000_similarities_with_truth_before_MTimer_noWCC.Rdata.gz
diff --git a/code_figures_paper/figure_2bc/README.txt b/code_figures_paper/figure_2bc/README.txt
@@ -0,0 +1,3 @@
+Before running:
+gunzip pcawg_weme2018_final.txt.gz
+gunzip pcawg_weme2018_wcc_final.txt.gz
diff --git a/code_figures_paper/figure_2bc/consensus.20170217.purity.ploidy.txt.gz b/code_figures_paper/figure_2bc/consensus.20170217.purity.ploidy.txt.gz
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Code and data to produce the manuscript figures
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Before running: gunzip summary_stats_agreement.txt.gz