-
Notifications
You must be signed in to change notification settings - Fork 0
/
hist_plot.R
131 lines (106 loc) · 5.06 KB
/
hist_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
library(tidyverse)
library(data.table)
library(cowplot)
set.seed(20180329)
.st <- function(s){
bquote(bold(.(s)*phantom("tp")))
}
## DrugBank
data <- fread("../db/drugbank/data/data_table.csv") %>% as_tibble() %>%
select(-c(u, v)) %>%
mutate_at(.funs = scale, .vars = vars(-c(class))) %>%
transmute(row_mean = rowMeans(select(., -class)), class)
dfs <- list(
a <- data %>% filter(class == 1) %>% transmute(row_mean, class = "Positive"),
b <- data %>% filter(class == 0) %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Negative"),
c <- data %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Random")
)
df <- bind_rows(dfs)
kruskal.test(row_mean ~ as.factor(class), data = df)
p1 <- ggplot(df, aes(x = row_mean, fill = class)) +
geom_histogram(aes(y = ..ncount..), binwidth = 0.5, alpha = .5, position = "identity") +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(face = "bold"), aspect.ratio = 1) +
xlab("Average Similarity") +
ylab("Relative frequency") +
ggtitle(.st("DrugBank"))
## KEGG
data <- fread("../db/kegg/data/data_table.csv") %>% as_tibble() %>%
select(-c(u, v)) %>%
mutate_at(.funs = scale, .vars = vars(-c(class))) %>%
transmute(row_mean = rowMeans(select(., -class)), class)
dfs <- list(
a <- data %>% filter(class == 1) %>% transmute(row_mean, class = "Positive"),
b <- data %>% filter(class == 0) %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Negative"),
c <- data %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Random")
)
df <- bind_rows(dfs)
kruskal.test(row_mean ~ as.factor(class), data = df)
p2 <- ggplot(df, aes(x=row_mean, fill=class)) +
geom_histogram(aes(y = ..ncount..), binwidth = 0.5, alpha = .5, position = "identity") +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(face = "bold"), aspect.ratio = 1) +
xlab("Average Similarity") +
ylab("Relative frequency") +
ggtitle(.st("KEGG"))
## NDF-RT
data <- fread("../db/ndfrt/data/data_table.csv") %>% as_tibble() %>%
select(-c(u, v)) %>%
mutate_at(.funs = scale, .vars = vars(-c(class))) %>%
transmute(row_mean = rowMeans(select(., -class)), class)
dfs <- list(
a <- data %>% filter(class == 1) %>% transmute(row_mean, class = "Positive"),
b <- data %>% filter(class == 0) %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Negative"),
c <- data %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Random")
)
df <- bind_rows(dfs)
kruskal.test(row_mean ~ as.factor(class), data = df)
p3 <- ggplot(df, aes(x=row_mean, fill=class)) +
geom_histogram(aes(y = ..ncount..), binwidth = 0.5, alpha = .5, position = "identity") +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(face = "bold"), aspect.ratio = 1) +
xlab("Average Similarity") +
ylab("Relative frequency") +
ggtitle(.st("NDF-RT"))
## SemMedDB
data <- fread("../db/semmeddb/data/data_table.csv") %>% as_tibble() %>%
select(-c(u, v)) %>%
mutate_at(.funs = scale, .vars = vars(-c(class))) %>%
transmute(row_mean = rowMeans(select(., -class)), class)
dfs <- list(
a <- data %>% filter(class == 1) %>% transmute(row_mean, class = "Positive"),
b <- data %>% filter(class == 0) %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Negative"),
c <- data %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Random")
)
df <- bind_rows(dfs)
kruskal.test(row_mean ~ as.factor(class), data = df)
p4 <- ggplot(df, aes(x=row_mean, fill=class)) +
geom_histogram(aes(y = ..ncount..), binwidth = 0.5, alpha = .5, position = "identity") +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(face = "bold"), aspect.ratio = 1) +
xlab("Average Similarity") +
ylab("Relative frequency") +
ggtitle(.st("SemMedDB"))
## Twosides
data <- fread("../db/twosides/data/data_table.csv") %>% as_tibble() %>%
select(-c(u, v)) %>%
mutate_at(.funs = scale, .vars = vars(-c(class))) %>%
transmute(row_mean = rowMeans(select(., -class)), class)
dfs <- list(
a <- data %>% filter(class == 1) %>% transmute(row_mean, class = "Positive"),
b <- data %>% filter(class == 0) %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Negative"),
c <- data %>% sample_n(size = nrow(a)) %>% transmute(row_mean, class = "Random")
)
df <- bind_rows(dfs)
kruskal.test(row_mean ~ as.factor(class), data = df)
p5 <- ggplot(df, aes(x=row_mean, fill=class)) +
geom_histogram(aes(y = ..ncount..), binwidth = 0.5, alpha = 0.5, position = "identity") +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(face = "bold"), aspect.ratio = 1) +
xlab("Average Similarity") +
ylab("Relative frequency") +
ggtitle(.st("Twosides"))
all <- plot_grid(p1, p2, p3, p4, p5, ncol = 3, align = "v")
legend <- get_legend(p1 + theme(legend.position = "bottom"))
p <- plot_grid(all, legend, ncol = 1, rel_heights = c(1, 0.05))
save_plot("hist.pdf", p, base_height = 8, base_width = 12)