-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombined_DE_SC.R
121 lines (92 loc) · 5.73 KB
/
combined_DE_SC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# alternate surgical control (SC) DE list
# at least 1.5x (not log) difference in every pair of samples between two timepoints
# for comparison
# DE using DESeq
{
source('setup.R')
require(DESeq2)
pheno$timepoint <- factor(pheno$timepoint)
pheno$group <- factor(pheno$group, levels = c('SC','MVS','SVS','EC','SS'))
ind <- which(pheno$group=='SC')
ds <- DESeqDataSetFromMatrix(countData = counts[,ind],
colData = pheno[ind,],
design= ~timepoint)
res <- DESeq(ds)
res1 <- results(res, alpha = .05, contrast = c('timepoint', 'B', 'A'))
res2 <- results(res, alpha = .05, contrast = c('timepoint', 'C', 'A'))
res3 <- results(res, alpha = .05, contrast = c('timepoint', 'C', 'B'))
names(res1) <- paste0('AB_', names(res1))
names(res2) <- paste0('AC_', names(res2))
names(res3) <- paste0('BC_', names(res3))
deseq <- cbind(res1,res2,res3)
rm(res,res1,res2,res3,ind,ds)
} # hits
source('setup.R')
counts <- counts[, pheno$group == 'SC']
cpm <- t(t(1e6*counts)/colSums(counts))
pheno <- pheno[pheno$group == 'SC', ]
# check order
all(pheno$sample == c("SC1A","SC1B","SC1C","SC2A","SC2B","SC2C","SC3A","SC3B","SC3C","SC4A","SC4B","SC4C","SC5A","SC5B","SC5C","SC6A","SC6B","SC6C","SC7A","SC7B","SC7C","SC8A","SC8B","SC8C"))
all(colnames(counts) == c("SC1A","SC1B","SC1C","SC2A","SC2B","SC2C","SC3A","SC3B","SC3C","SC4A","SC4B","SC4C","SC5A","SC5B","SC5C","SC6A","SC6B","SC6C","SC7A","SC7B","SC7C","SC8A","SC8B","SC8C"))
A <- cpm[,grep('A$', pheno$sample)]
B <- cpm[,grep('B$', pheno$sample)]
C <- cpm[,grep('C$', pheno$sample)]
AB <- B/A; AB[is.nan(AB)] <- 1 # 1/0 = Inf, 0/0 = NaN
AC <- C/A; AC[is.nan(AC)] <- 1
BC <- C/B; BC[is.nan(BC)] <- 1
require(matrixStats)
cfc <- data.frame(
ABup = rowMins(as.matrix(AB)) > 1.5,
ABdn = rowMaxs(as.matrix(AB)) < 2/3,
ACup = rowMins(as.matrix(AC)) > 1.5,
ACdn = rowMaxs(as.matrix(AC)) < 2/3,
BCup = rowMins(as.matrix(BC)) > 1.5,
BCdn = rowMaxs(as.matrix(BC)) < 2/3
)
rownames(cfc) <- rownames(cpm)
rm(A,B,C,AB,BC,AC)
table(deseq$AB_padj < .05 & abs(deseq$AB_log2FoldChange) > 1.5, cfc$ABup | cfc$ABdn, useNA = 'ifany')
table(deseq$AC_padj < .05 & abs(deseq$AC_log2FoldChange) > 1.5, cfc$ACup | cfc$ACdn, useNA = 'ifany')
table(deseq$BC_padj < .05 & abs(deseq$BC_log2FoldChange) > 1.5, cfc$BCup | cfc$BCdn, useNA = 'ifany')
plot(deseq$AB_log2FoldChange, -log10(deseq$AB_padj), col='grey', main = 'Timepoint A vs. B', xlab='Avg Log2 FC', ylab='-log10 Padj', pch=16)
ind <- which(deseq$AB_padj < .05 & abs(deseq$AB_log2FoldChange) > 1.5)
points(deseq$AB_log2FoldChange[ind], -log10(deseq$AB_padj)[ind], col=4, pch=16)
ind <- which(cfc$ABup)
points(deseq$AB_log2FoldChange[ind], -log10(deseq$AB_padj)[ind], col=3, cex = .75, pch=16)
ind <- which(cfc$ABdn)
points(deseq$AB_log2FoldChange[ind], -log10(deseq$AB_padj)[ind], col=2, cex = .75, pch=16)
abline(v = c(-1.5,1.5), col=4); abline(h = -log10(.05), col=4)
legend('topleft',bty='n',cex=.75,pch=16, col = c(4,3,2), legend = c('DESeq2 deseq', 'all FC >1.5', 'all FC <-1.5'))
plot(deseq$AC_log2FoldChange, -log10(deseq$AC_padj), col='grey', main = 'Timepoint A vs. C', xlab='Avg Log2 FC', ylab='-log10 Padj', pch=16)
ind <- which(deseq$AC_padj < .05 & abs(deseq$AC_log2FoldChange) > 1.5)
points(deseq$AC_log2FoldChange[ind], -log10(deseq$AC_padj)[ind], col=4, pch=16)
ind <- which(cfc$ACup)
points(deseq$AC_log2FoldChange[ind], -log10(deseq$AC_padj)[ind], col=3, cex = .75, pch=16)
ind <- which(cfc$ACdn)
points(deseq$AC_log2FoldChange[ind], -log10(deseq$AC_padj)[ind], col=2, cex = .75, pch=16)
abline(v = c(-1.5,1.5), col=4); abline(h = -log10(.05), col=4)
legend('topleft',bty='n',cex=.75,pch=16, col = c(4,3,2), legend = c('DESeq2 deseq', 'all FC >1.5', 'all FC <-1.5'))
plot(deseq$BC_log2FoldChange, -log10(deseq$BC_padj), col='grey', main = 'Timepoint B vs. C', xlab='Avg Log2 FC', ylab='-log10 Padj', pch=16)
ind <- which(deseq$BC_padj < .05 & abs(deseq$BC_log2FoldChange) > 1.5)
points(deseq$BC_log2FoldChange[ind], -log10(deseq$BC_padj)[ind], col=4, pch=16)
ind <- which(cfc$BCup)
points(deseq$BC_log2FoldChange[ind], -log10(deseq$BC_padj)[ind], col=3, cex = .75, pch=16)
ind <- which(cfc$BCdn)
points(deseq$BC_log2FoldChange[ind], -log10(deseq$BC_padj)[ind], col=2, cex = .75, pch=16)
abline(v = c(-1.5,1.5), col=4); abline(h = -log10(.05), col=4)
legend('topleft',bty='n',cex=.75,pch=16, col = c(4,3,2), legend = c('DESeq2 deseq', 'all FC >1.5', 'all FC <-1.5'))
# A vs B
up <- rownames(deseq)[which(deseq$AB_log2FoldChange > 1.5 & deseq$AB_padj < .05 & cfc$ABup)]
write.table(up, file='~/Desktop/DEresults/SC_timepoint/A_vs_B/up.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)
dn <- rownames(deseq)[which(deseq$AB_log2FoldChange < 1.5 & deseq$AB_padj < .05 & cfc$ABdn)]
write.table(dn, file='~/Desktop/DEresults/SC_timepoint/A_vs_B/down.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)
# A vs C
up <- rownames(deseq)[which(deseq$AC_log2FoldChange > 1.5 & deseq$AC_padj < .05 & cfc$ACup)]
write.table(up, file='~/Desktop/DEresults/SC_timepoint/A_vs_C/up.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)
dn <- rownames(deseq)[which(deseq$AC_log2FoldChange < 1.5 & deseq$AC_padj < .05 & cfc$ACdn)]
write.table(dn, file='~/Desktop/DEresults/SC_timepoint/A_vs_C/down.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)
# B vs C
up <- rownames(deseq)[which(deseq$BC_log2FoldChange > 1.5 & deseq$BC_padj < .05 & cfc$BCup)]
write.table(up, file='~/Desktop/DEresults/SC_timepoint/B_vs_C/up.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)
dn <- rownames(deseq)[which(deseq$BC_log2FoldChange < 1.5 & deseq$BC_padj < .05 & cfc$BCdn)]
write.table(dn, file='~/Desktop/DEresults/SC_timepoint/B_vs_C/down.csv', row.names = FALSE, quote = FALSE, col.names = FALSE)