-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathFun_ASD.R
149 lines (128 loc) · 5.14 KB
/
Fun_ASD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
##################################################################################################################################
## Libraries
# Analysis
require(limma)
require(nlme)
require(DESeq2)
require(goseq)
require(WGCNA)
require(multtest)
require(doParallel)
require(variancePartition)
# Plotting
require(gridExtra)
require(data.table)
require(cowplot)
require(reshape2)
require(ggplot2)
require(gplots)
require(VennDiagram)
require(viridis)
require(pals)
require(stringr)
##################################################################################################################################
## Analysis
## Algorithm/signature combination to HIGHLIGHT in all analyses; results from other algorithms will also be shown, albeit less
## prominently, and seen through a lens of replication
main.alg <- "CIB.IP"
## Function for full report on list overlap
overEnrich <- function(list1, list2, backgroundList) {
# setup dataframes
overlap <- length(which(list1 %in% list2))
x <- length(list2) - overlap
y <- length(list1) - overlap
z <- length(backgroundList) - x - y + overlap
contingencyTable <- rbind(c(overlap, x), c(y, z))
# run test
test <- fisher.test(contingencyTable, alternative = "greater")
# output
results <- c(paste0(overlap, " / ", length(list1), "&", length(list2)),
round(test$estimate, 3),
round(test$conf.int[1], 3),
round(test$conf.int[2], 3),
test$p.value)
names(results) <- c("Overlap", "OddsRatio", "conf95_lower", "conf95_upper", "p.value")
return(results)
}
## Plotting theme
theme_gjs <- theme_bw() + theme(panel.border = element_blank(), axis.line = element_line())
## Plotting colours
asd.colours <- c("darkorange", "dodgerblue")
names(asd.colours) <- c("ASD", "CTL")
comp.colours <- alphabet()[c(13, 14)]
names(comp.colours) <- c("CI", "CD")
## Function for fraction of overlaps between two lists
pctOverlap <- function(interestingList, inList) {
round(length(which(interestingList %in% inList)) / length(interestingList), digits = 4)
}
## Function for jaccard similarity
jaccard <- function(list1, list2) {
x <- length(intersect(list1, list2))
y <- length(union(list1, list2))
z <- x/y
round(z, digits = 2)
}
# ## Function to apply sLED to genes in a module
# run.sLED <- function(condition, module) {
# data <- residuals[[condition]]
# genes <- im[[condition]][[module]]
#
# data <- data[which(rownames(data) %in% genes),]
# data <- t(data)
#
# res <- sLED(X = data[which(meta$ASD.CTL == "ASD"),], Y = data[which(meta$ASD.CTL == "CTL"),],
# adj.beta = softPowers[[condition]], rho = 1000, sumabs.seq = 0.1, npermute = 1000,
# useMC = FALSE, mc.cores = 1, seeds = NULL, verbose = TRUE, niter = 20, trace = FALSE)
#
# leverage <- as.numeric(res$leverage)
# names(leverage) <- colnames(data)
# leverage <- leverage[order(leverage, decreasing = TRUE)]
# leverage <- leverage[which(leverage >= 0.01)] # collect genes accounting for >= 1% of leverage
# leverage <- round(leverage, 3)
#
# output <- list()
# output$leverage <- leverage
# output$pValue <- round(res$pVal, 3)
#
# return(output)
# }
## Function to plot GOSeq output
plot.GOseq <- function(x, terms = FALSE, ontology = "BP", topN = 5, legend.position = "none", text.size, y.wrap = FALSE) {
# x should be a NAMED list, where each element is a different run of GOseq
stopifnot(class(x) == "list")
# convert x to a filtered dataframe
for (j in names(x)) x[[j]]$Run <- j # annotate each level
x <- lapply(x, function(y) {
y <- y[which(y$ontology %in% ontology),] # filter to the desired ontologies (any of BP, CC, MF)
y <- y[1:topN,] # filter to the desired number of top hits
y <- y[,c("over_represented_pvalue", "term", "ontology", "Run")] # filter to relevant columns
colnames(y) <- c("FDR", "Term", "Ontology", "Run")
y$FDR <- -log10(y$FDR)
return(y)
})
# merge
x <- do.call("rbind", x) # merge into a single dataframe
# reorder
z <- c()
for(j in names(table(x$Run))) {
k <- x[which(x$Run == j),]
l <- k[order(k$FDR),]
z <- rbind(z, l)
}
x <- z
if (y.wrap != FALSE) x$Term <- str_wrap(x$Term, width = y.wrap)
label <- x$Term
x$Term <- as.factor(paste0(x$Term, x$Run))
x$Term <- factor(x$Term, levels = x$Term[1:nrow(x)])
# plot
ggplot(x, aes(x = Term, y = FDR, fill = Run)) +
geom_col(col = "black", position = position_dodge()) +
coord_flip() +
theme_bw() +
theme(axis.title.y = element_blank(), panel.border = element_blank(),
axis.line.x = element_line(), panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = text.size), legend.title = element_blank(), legend.position = legend.position) +
geom_hline(yintercept = c(0, -log10(0.05)), col = c("black", "black"), linetype = c(1,2)) +
labs(y = "-log10(FDR)") +
scale_x_discrete(labels = label)
}