forked from Al-Murphy/MungeSumstats
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index_vcf.R
78 lines (77 loc) · 3.02 KB
/
index_vcf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#' Tabix-index file: VCF
#'
#' Convert summary stats file to tabix format
#'
#' @source Borrowed function from
#' \href{https://github.com/RajLabMSSM/echotabix/blob/main/R/convert.R}{
#' echotabix}.
#'
#' @param path Path to VCF.
#' @param verbose Print messages.
#'
#' @return Path to tabix-indexed tabular file
#'
#' @family tabix
#' @keywords internal
#' @importFrom methods is
#' @importFrom R.utils gunzip
#' @importFrom VariantAnnotation indexVcf
#' @examples
#' eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt",
#' package = "MungeSumstats")
#' sumstats_dt <- data.table::fread(eduAttainOkbayPth, nThread = 1)
#' sumstats_dt <-
#' MungeSumstats:::standardise_sumstats_column_headers_crossplatform(
#' sumstats_dt = sumstats_dt)$sumstats_dt
#' sumstats_dt <- MungeSumstats:::sort_coords(sumstats_dt = sumstats_dt)
#' path <- tempfile(fileext = ".tsv")
#' MungeSumstats::write_sumstats(sumstats_dt = sumstats_dt, save_path = path)
#'
#' indexed_file <- MungeSumstats::index_tabular(path = path)
index_vcf <- function(path,
verbose=TRUE){
#### When remote ####
if(!file.exists(path) &&
any(startsWith(tolower(path),c("http","ftp")))){
messager("File inferred to be remote.",v=verbose)
return(path)
#### When already tabix ####
} else if(is_tabix(path = path)){
messager("File already tabix-indexed.",v=verbose)
return(path)
#### When local and non-tabix ####
} else {
requireNamespace("Rsamtools")
#### Round 1: assume .gz is bgz-compressed ####
sfx <- supported_suffixes(tabular = FALSE,
tabular_compressed = FALSE,
vcf = FALSE)
if(!any(endsWith(path,sfx))){
messager("bgzip-compressing VCF file.",v=verbose)
path <- Rsamtools::bgzip(file = path,
dest = sprintf("%s.bgz",
sub("\\.gz$|\\.bgz$", "",
path)),
overwrite = TRUE)
}
out <- tryCatch({
VariantAnnotation::indexVcf(x = path)$path
}, error = function(e){e})
#### Round 2: assume .gz is NOT bgz-compressed ####
if(methods::is(out,"error") &&
grepl("file does not appear to be bgzip",out$message,
ignore.case = TRUE)
){
if(endsWith(tolower(path),".gz")){
path <- R.utils::gunzip(path, overwrite=TRUE)
}
path <- Rsamtools::bgzip(file = path,
dest = sprintf("%s.bgz",
sub("\\.gz$|\\.bgz$", "",
path)),
overwrite = TRUE)
path <- VariantAnnotation::indexVcf(x = path)$path
}
return(path)
}
}