-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathmakeFull_ZMatrix.R
115 lines (81 loc) · 3.6 KB
/
makeFull_ZMatrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
###### Function to create full Z-matrix to compute prior ######
# #' Create full Z-matrix for prior computation
# #'
# #' From a list of significant studies, create the full Z-matrix that can be used to compute
# #' the prior.
# #'
# #' @inheritParams bGWAS
# NOT EXPORTED
makeFull_ZMatrix <- function(studies=NULL, GWASData, GName, Z_matrices="~/Z_matrices",
prior_shrinkage, save_files=F, verbose=F) {
Log = c()
tmp = paste0("# Loading the ZMatrix... \n")
Log = update_log(Log, tmp, verbose)
tmp = paste0("Selecting studies :\n")
Log = update_log(Log, tmp, verbose)
ZMatrix = as_tibble(data.table::fread(file.path(Z_matrices, "ZMatrix_Full.csv.gz"), select=c(1:5, studies+5 ), showProgress = FALSE, data.table = F))
tmp = paste0(ncol(ZMatrix)-5, " studies \n")
Log = update_log(Log, tmp, verbose)
tmp = paste0(format(nrow(ZMatrix), big.mark = ",", scientific = F), " SNPs \n")
Log = update_log(Log, tmp, verbose)
# Add conventional GWAS column, at the end (make sure alleles are aligned)
tmp = paste0("# Adding data from the conventional GWAS : \n \"", GName,
"\" \n")
Log = update_log(Log, tmp, verbose)
# keep the SNPs in our Z matrix and order them correctly + check allele alignement
ZMatrix %>%
filter(.data$rs %in% GWASData$rsid) -> ZMatrix
GWASData %>%
slice(match(ZMatrix$rs, .data$rsid)) %>%
mutate(ref = toupper(ref),
alt = toupper(alt),
ZMat_alt = toupper(ZMatrix$alt),
Zmat_ref = toupper(ZMatrix$ref),
aligned_Z = case_when(
(.data$alt == .data$ZMat_alt &
.data$ref == .data$Zmat_ref) ~ .data$z_obs,
(.data$ref == .data$ZMat_alt &
.data$alt == .data$Zmat_ref) ~ -.data$z_obs,
TRUE ~ NA_real_))-> GWASData
ZMatrix %>%
mutate({{GName}} := GWASData$aligned_Z) -> ZMatrix
tmp = "Done! \n"
Log = update_log(Log, tmp, verbose)
# set NA to zero for prediction
ZMatrix %>%
mutate_all(~replace(., is.na(.), 0)) -> ZMatrix
tmp = paste0(format(nrow(ZMatrix), big.mark = ",", scientific=F), " SNPs in common between prior studies and the conventional GWAS \n")
Log = update_log(Log, tmp, verbose)
push_extreme_zs_back_a_little_towards_zero <- function(d) { # Some z-scores are just too far from zero
maxAllowed_z = abs(stats::qnorm(1e-300 / 2)) # p=1e-300 is the max allowed now, truncate z-scores accordingly
names(d)[!names(d) %in% c("rs","chrm","pos","alt","ref")] -> studies_here
for(n in studies_here) {
d %>%
mutate(!!n := case_when(
abs(eval(parse(text=n))) > maxAllowed_z ~ maxAllowed_z,
abs(eval(parse(text=n))) < -maxAllowed_z ~ -maxAllowed_z,
TRUE ~ eval(parse(text=n)))) -> d
}
return(d)
}
# Truncate Z-scores
ZMatrix %>%
push_extreme_zs_back_a_little_towards_zero() -> ZMatrix
# Set the z-scores to 0 for the regression if shrinkage
if(prior_shrinkage < 1.0) {
names(ZMatrix)[!names(ZMatrix) %in% c('rs','chrm','pos','alt','ref', GName)] -> Prior_study_names
threshold = abs(stats::qnorm(prior_shrinkage/2))
for(column_of_zs in Prior_study_names) {
ZMatrix %>%
mutate(!!column_of_zs := case_when(
abs(eval(parse(text=column_of_zs))) < threshold ~ 0,
TRUE ~ eval(parse(text=column_of_zs)))) -> ZMatrix
}
tmp = paste0("Applying shrinkage (threshold = ", prior_shrinkage, ") before calculating the prior. \n")
Log = update_log(Log, tmp, verbose)
}
res=list(
log_info = Log,
mat = ZMatrix)
return(res)
}