forked from Al-Murphy/MungeSumstats
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Enable tabix-indexing for tabular output
- Loading branch information
Showing
13 changed files
with
280 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Map column names to positions. | ||
#' | ||
#' Useful in situations where you need to specify columns by | ||
#' index instead of name (e.g. awk queries). | ||
#' | ||
#' @source Borrowed function from | ||
#' \href{https://github.com/RajLabMSSM/echotabix/blob/main/R/convert.R}{ | ||
#' echotabix}. | ||
#' | ||
#' @source | ||
#' \code{ | ||
#' eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", | ||
#' package = "MungeSumstats" | ||
#' ) | ||
#' tmp <- tempfile(fileext = ".tsv") | ||
#' file.copy(eduAttainOkbayPth, tmp) | ||
#' cdict <- MungeSumstats:::column_dictionary(file_path = tmp) | ||
#' } | ||
#' | ||
#' @param file_path Path to full summary stats file | ||
#' (or any really file you want to make a column dictionary for). | ||
#' | ||
#' @return Named list of column positions. | ||
#' | ||
#' @keywords internal | ||
#' @importFrom stats setNames | ||
column_dictionary <- function(file_path) { | ||
# Get the index of each column name | ||
header <- read_header(path = file_path, | ||
# n must be 2 or else | ||
# fread won't be able to parse text | ||
n = 2, | ||
skip_vcf_metadata = TRUE) | ||
cNames <- colnames(data.table::fread(text = header)) | ||
colDict <- stats::setNames(seq(1, length(cNames)), cNames) | ||
return(colDict) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#' Convert summary stats file to tabix format | ||
#' | ||
#' @source Borrowed function from | ||
#' \href{https://github.com/RajLabMSSM/echotabix/blob/main/R/convert.R}{ | ||
#' echotabix}. | ||
#' | ||
#' @param path Path to GWAS summary statistics file. | ||
#' @param verbose Print messages. | ||
#' @inheritParams dt_to_granges | ||
#' | ||
#' @family tabix | ||
#' @examples | ||
#' eduAttainOkbayPth <- system.file("extdata", "eduAttainOkbay.txt", | ||
#' package = "MungeSumstats") | ||
#' sumstats_dt <- data.table::fread(eduAttainOkbayPth, nThread = 1) | ||
#' sumstats_dt <- | ||
#' MungeSumstats:::standardise_sumstats_column_headers_crossplatform( | ||
#' sumstats_dt = sumstats_dt)$sumstats_dt | ||
#' sumstats_dt <- MungeSumstats:::sort_coords(sumstats_dt = sumstats_dt) | ||
#' path <- tempfile(fileext = ".tsv") | ||
#' MungeSumstats::write_sumstats(sumstats_dt = sumstats_dt, save_path = path) | ||
#' | ||
#' indexed_file <- MungeSumstats::index_tabular(path = path) | ||
#' @export | ||
#' @importFrom Rsamtools bgzip | ||
#' @importFrom seqminer tabix.createIndex | ||
index_tabular <- function(path, | ||
chrom_col = "CHR", | ||
start_col = "BP", | ||
end_col = start_col, | ||
verbose = TRUE) { | ||
msg <- paste("Converting full summary stats file to", | ||
"tabix format for fast querying...") | ||
message(msg) | ||
#### Read header and make dictionary #### | ||
cdict <- column_dictionary(file_path = path) | ||
#### Check column exist #### | ||
if(!chrom_col %in% names(cdict)) stop("chrom_col not found in file.") | ||
if(!start_col %in% names(cdict)) stop("start_col not found in file.") | ||
if(!end_col %in% names(cdict)) stop("end_col not found in file.") | ||
#### Make sure input file isn't empty #### | ||
if (file.size(path) == 0) { | ||
msg2 <- paste("Removing empty file:", path) | ||
messager(msg2) | ||
out <- file.remove(path) | ||
} | ||
### File MUST be bgzipped first | ||
message("Ensuring file is bgzipped.") | ||
bgz_file <- Rsamtools::bgzip(file = path, | ||
dest = sprintf("%s.bgz", | ||
sub("\\.gz$|\\.bgz$", "", | ||
path)), | ||
overwrite = TRUE) | ||
### Tabix-index file | ||
message("Tabix-indexing file.") | ||
seqminer::tabix.createIndex( | ||
bgzipFile = bgz_file, | ||
sequenceColumn = cdict[[chrom_col]], | ||
startColumn = cdict[[start_col]], | ||
endColumn = cdict[[end_col]], | ||
## Just use the first columns name | ||
metaChar = names(cdict)[1] | ||
) | ||
return(bgz_file) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.