diff --git a/DESCRIPTION b/DESCRIPTION index f17a0d6..f0d9acc 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,11 +4,10 @@ Title: Big Data Management of Whole-genome Sequence Variant Calls Version: 1.17.1 Date: 2017-05-07 Depends: R (>= 3.3.0), gdsfmt (>= 1.10.1) -Imports: methods, parallel, IRanges, GenomicRanges, GenomeInfoDb +Imports: methods, parallel, IRanges, GenomicRanges, GenomeInfoDb, Biostrings, S4Vectors LinkingTo: gdsfmt Suggests: digest, crayon, RUnit, knitr, Rcpp, SNPRelate, Biobase, BiocParallel, - BiocGenerics, Biostrings, S4Vectors, VariantAnnotation, - SummarizedExperiment + BiocGenerics, VariantAnnotation Authors@R: c(person("Xiuwen", "Zheng", role=c("aut", "cre"), email="zhengx@u.washington.edu"), person("Stephanie", "Gogarten", role="aut", diff --git a/NAMESPACE b/NAMESPACE index d386d6e..f155d79 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,12 +28,15 @@ importFrom(parallel, clusterApply, clusterApplyLB, clusterCall, detectCores, makeCluster, makeForkCluster, mclapply, stopCluster) importFrom(utils, read.table, flush.console) +importFrom(S4Vectors, DataFrame, SimpleList) importClassesFrom(IRanges, IRanges) -importFrom(IRanges, IRanges, IntegerList, NumericList, CharacterList) +importFrom(IRanges, IRanges, IntegerList, NumericList, CharacterList, DataFrameList) importClassesFrom(GenomicRanges, GRanges, GRangesList) importFrom(GenomicRanges, GRanges) importMethodsFrom(GenomicRanges, granges) -importMethodsFrom(GenomeInfoDb, seqnames) +importMethodsFrom(GenomeInfoDb, seqnames, seqlevels) +importFrom(GenomeInfoDb, renameSeqlevels) +importFrom(Biostrings, DNAStringSet, DNAStringSetList) # Export all names @@ -41,5 +44,6 @@ exportPattern("^seq*") export(.onAttach, .Last.lib) exportClasses(SeqVarGDSClass) -exportMethods(granges) -exportMethods(seqClose, seqSetFilter) +exportMethods(granges, ref, alt, qual, filt, header, fixed, + info, geno, colData, rowRanges) +exportMethods(seqClose, seqSetFilter, seqAsVCF) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index d9a24f1..a1fa2d5 100755 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -6,3 +6,17 @@ setGeneric("seqSetFilter", function(object, variant.sel, ...) standardGeneric("seqSetFilter")) # setGeneric("seqAppend", function(object, ...) standardGeneric("seqAppend")) + +setGeneric("seqAsVCF", function(x, ...) standardGeneric("seqAsVCF")) + +# redefine generics from VariantAnnotation and SummarizedExperiment to reduce package load time +setGeneric("ref", function(x, ...) standardGeneric("ref")) +setGeneric("alt", function(x, ...) standardGeneric("alt")) +setGeneric("qual", function(x, ...) standardGeneric("qual")) +setGeneric("filt", function(x, ...) standardGeneric("filt")) +setGeneric("fixed", function(x, ...) standardGeneric("fixed")) +setGeneric("header", function(x, ...) standardGeneric("header")) +setGeneric("info", function(x, ...) standardGeneric("info")) +setGeneric("geno", function(x, ...) standardGeneric("geno")) +setGeneric("rowRanges", function(x, ...) standardGeneric("rowRanges")) +setGeneric("colData", function(x, ...) standardGeneric("colData")) diff --git a/R/Internal.R b/R/Internal.R index c56bec2..0ca2fb3 100644 --- a/R/Internal.R +++ b/R/Internal.R @@ -6,27 +6,6 @@ # -####################################################################### -# Internal variable(s) -# - -# package-wide variable -.packageEnv <- new.env() - -# asVCF exported functions -.asVCF_Export <- function() -{ - isTRUE(.packageEnv$vcf_export) -} - -# set asVCF export flag -.asVCF_SetTRUE <- function() -{ - .packageEnv$vcf_export <- TRUE -} - - - ####################################################################### # Get the numbers of selected samples and variants # @@ -727,3 +706,49 @@ invisible() } + +####################################################################### +# Convert to a VariantAnnotation object +# + +.variableLengthToList <- function(x) +{ + xl <- list() + j <- 1L + for (i in 1:length(x$length)) + { + len <- x$length[i] + if (len > 0L) + { + xl[[i]] <- x$data[j:(j+len-1)] + j <- j+len + } else { + xl[[i]] <- NA + } + } + xl +} + +.toAtomicList <- function(x, type) +{ + switch(type, + Character=CharacterList(x), + String=CharacterList(x), + Integer=IntegerList(x), + Float=NumericList(x)) +} + +.variableLengthToMatrix <- function(x) +{ + xl <- list() + i <- 1L + for (j in seq_along(x)) + { + for (k in seq_len(NROW(x[[j]]))) + { + xl[[i]] <- x[[j]][k,] + i <- i + 1L + } + } + matrix(xl, nrow=NROW(x[[1L]]), ncol=length(x)) +} diff --git a/R/Methods-SeqVarGDSClass.R b/R/Methods-SeqVarGDSClass.R index 296faf3..a899a4c 100644 --- a/R/Methods-SeqVarGDSClass.R +++ b/R/Methods-SeqVarGDSClass.R @@ -22,3 +22,307 @@ setMethod("granges", "SeqVarGDSClass", }) + +#### from VariantAnnotation #### + +setMethod("ref", "SeqVarGDSClass", function(x) +{ + s <- seqGetData(x, "$ref") + # remove invalid characters + s <- gsub("[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn\\-\\+\\.]", ".", s) + DNAStringSet(s) +}) + + +setMethod("alt", "SeqVarGDSClass", function(x) +{ + alt <- seqGetData(x, "$alt") + s <- strsplit(alt, ",", fixed=TRUE) + s[alt == ""] <- "" + # remove invalid characters + s <- sapply(s, function(x) + gsub("[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn\\-\\+\\.]", ".", x), + simplify=FALSE) + do.call(DNAStringSetList, s) +}) + + +setMethod("qual", "SeqVarGDSClass", function(x) +{ + qual <- seqGetData(x, "annotation/qual") + qual[is.na(qual)] <- NA # change NaN to NA + qual +}) + + +setMethod("filt", "SeqVarGDSClass", function(x) +{ + as.character(seqGetData(x, "annotation/filter")) +}) + + +setMethod("fixed", "SeqVarGDSClass", function(x) +{ + DataFrame(REF=SeqArray::ref(x), + ALT=SeqArray::alt(x), + QUAL=SeqArray::qual(x), + FILTER=SeqArray::filt(x)) +}) + + +setMethod("header", "SeqVarGDSClass", function(x) +{ + requireNamespace("VariantAnnotation", quietly=TRUE, verbose=FALSE) + + sample.id <- seqGetData(x, "sample.id") + + ## info + seqsum <- seqSummary(x, check="none", verbose=FALSE) + infoHd <- seqsum$info + # names(infoHd)[2:4] <- c("Number", "Type", "Description") + infoHd <- DataFrame(infoHd[,2:4], row.names=infoHd[,1L]) + + ## geno + genoHd <- seqsum$format + # names(genoHd)[2:4] <- c("Number", "Type", "Description") + genoHd <- DataFrame(genoHd[,2:4], row.names=genoHd[,1L]) + + ## meta + des <- get.attr.gdsn(index.gdsn(x, "description")) + ff <- des$vcf.fileformat + meta <- DataFrame(Value=ff, row.names="fileformat") + ref <- seqsum$reference + if (length(ref) > 0L) + { + meta <- rbind(meta, DataFrame(Value=ref, row.names="reference")) + } + n <- index.gdsn(x, "description/vcf.header", silent=TRUE) + if (!is.null(n)) + { + des <- read.gdsn(n) + ## ID=value header fields not parsed in GDS + des <- des[!(des[,1L] %in% c("contig", "SAMPLE", "PEDIGREE")),] + ## deal with duplicate row names + if (any(duplicated(des[,1L]))) + { + des[,1] <- make.unique(des[,1L]) + } + meta <- rbind(meta, DataFrame(Value=des[,2L], row.names=des[,1L])) + } + + hdr <- DataFrameList(META=meta, INFO=infoHd, FORMAT=genoHd) + + ## fixed + des <- seqsum$allele + if (nrow(des) > 0L) + { + hdr[["ALT"]] <- DataFrame(Description=des[,2L], row.names=des[,1L]) + } + des <- seqsum$filter + des <- des[des$Description != "" & !is.na(des$Description),,drop=FALSE] + if (nrow(des) > 0L) + { + hdr[["FILTER"]] <- DataFrame(Description=des[,2L], row.names=des[,1L]) + } + + VariantAnnotation::VCFHeader(samples=sample.id, header=hdr) +}) + + +setMethod("info", "SeqVarGDSClass", function(x, infovar=NULL) +{ + des <- seqSummary(x, "annotation/info", check="none", verbose=FALSE) + if (!is.null(infovar)) + des <- des[des$ID %in% infovar, ] + infoDf <- DataFrame(row.names=seqGetData(x, "variant.id")) + if (nrow(des) > 0L) + { + for (i in seq_len(nrow(des))) + { + v <- seqGetData(x, paste0("annotation/info/", des$ID[i])) + ## deal with variable length fields + if (!is.null(names(v))) + { + vl <- .variableLengthToList(v) + ## each element should have length number of alt alleles, even for NAs + if (des$Number[i] == "A") + { + nAlt <- lengths(SeqArray::alt(x)) + addNA <- which(nAlt > 1L & is.na(vl)) + for (ind in addNA) + { + vl[[ind]] <- rep(NA, nAlt[ind]) + } + } + v <- .toAtomicList(vl, des$Type[i]) + } else if (!is.null(dim(v))) + { + ## v is a matrix with nrow="Number" + vl <- list() + for (j in 1:ncol(v)) + { + vl[[j]] <- v[,j] + } + v <- .toAtomicList(vl, des$Type[i]) + } + if (is.atomic(v)) + { + v[is.na(v)] <- NA # change NaN to NA + v[v %in% ""] <- NA + } + infoDf[[des$ID[i]]] <- v + } + } + infoDf +}) + + +setMethod("geno", "SeqVarGDSClass", function(x, genovar=NULL) +{ + ## genotype + sample.id <- seqGetData(x, "sample.id") + variant.id <- seqGetData(x, "variant.id") + + if (is.null(genovar) || "GT" %in% genovar) + { + gt <- seqApply(x, c(genovar="genotype", phase="phase"), + function(x) { + sep <- ifelse(x$phase, "|", "/") + paste(x$genovar[1L,], sep, x$genovar[2L,], sep="") + }, + as.is="list", margin="by.variant") + gt <- matrix(unlist(gt), ncol=length(gt), + dimnames=list(sample.id, variant.id)) + gt[gt == "NA/NA"] <- "." + gt <- t(gt) + + genoSl <- SimpleList(GT=gt) + } else { + genoSl <- SimpleList() + } + + ## all other fields + des <- seqSummary(x, "annotation/format", check="none", verbose=FALSE) + if (!is.null(genovar)) + { + des <- des[des$ID %in% genovar,] + } + if (nrow(des) > 0L) + { + for (i in 1:nrow(des)) + { + var.name <- paste("annotation/format/", des$ID[i], sep="") + number <- suppressWarnings(as.integer(des$Number[i])) + if (!is.na(number) && number > 1L) + { + v <- seqApply(x, var.name, function(v) v, + as.is="list", margin="by.variant") + vm <- array( + dim=c(length(variant.id), length(sample.id), number), + dimnames=list(variant.id, sample.id, NULL)) + for (j in 1:length(v)) + { + if (is.null(v[[j]])) + { + vm[j,,] <- NA + } else { + vm[j,,] <- v[[j]] + } + } + v <- vm + } else { + v <- seqGetData(x, var.name) + if (!is.null(names(v))) + { + if (all(v$length == 1L) && !is.na(number) && number == 1L) + { + v <- v$data + } else { + v <- seqApply(x, var.name, function(v) v, + as.is="list", margin="by.variant") + v <- .variableLengthToMatrix(v) + } + } + dimnames(v) <- list(sample.id, variant.id) + v <- t(v) + } + genoSl[[des$ID[i]]] <- v + } + } + if (is.null(names(genoSl))) names(genoSl) <- character() + genoSl +}) + + +setMethod("seqAsVCF", "SeqVarGDSClass", function(x, chr.prefix="", info=NULL, geno=NULL) +{ + requireNamespace("VariantAnnotation", quietly=TRUE, verbose=FALSE) + + stopifnot(is.character(chr.prefix), length(chr.prefix)==1L) + + seqsum <- seqSummary(x, check="none", verbose=FALSE) + if (!is.null(info)) + { + validInfo <- seqsum$info$ID + infoDiff <- setdiff(info, c(validInfo, NA)) + if (length(infoDiff) > 0) + { + warning(paste("info fields not present:", infoDiff)) + info <- intersect(info, validInfo) + } + } + if (!is.null(geno)) + { + validGeno <- seqsum$format$ID + genoDiff <- setdiff(geno, c(validGeno, NA)) + if (length(genoDiff) > 0) + { + warning(paste("geno fields not present:", genoDiff)) + geno <- intersect(geno, validGeno) + } + } + vcf <- VariantAnnotation::VCF(rowRanges=SeqArray::rowRanges(x), + colData=SeqArray::colData(x), + exptData=SimpleList(header=header(x)), + fixed=SeqArray::fixed(x), + info=SeqArray::info(x, info=info), + geno=SeqArray::geno(x, geno=geno)) + if (chr.prefix != "") + vcf <- renameSeqlevels(vcf, paste0(chr.prefix, seqlevels(vcf))) + vcf +}) + + + +#### from SummarizedExperiment #### + +setMethod("rowRanges", "SeqVarGDSClass", function(x) +{ + granges(x, + ID=seqGetData(x, "annotation/id"), + REF=SeqArray::ref(x), + ALT=SeqArray::alt(x), + QUAL=SeqArray::qual(x), + FILTER=SeqArray::filt(x)) +}) + + +setMethod("colData", "SeqVarGDSClass", function(x) +{ + sample.id <- seqGetData(x, "sample.id") + node <- index.gdsn(x, "sample.annotation", silent=TRUE) + if (!is.null(node)) + vars <- ls.gdsn(node) + else + vars <- character() + if (length(vars) > 0) + { + annot <- lapply(vars, function(v) { + seqGetData(x, paste0("sample.annotation/", v)) + }) + names(annot) <- vars + DataFrame(Samples=seq_along(sample.id), annot, row.names=sample.id) + } else { + DataFrame(Samples=seq_along(sample.id), row.names=sample.id) + } +}) diff --git a/R/asVCF.R b/R/asVCF.R deleted file mode 100644 index 46ba4ca..0000000 --- a/R/asVCF.R +++ /dev/null @@ -1,372 +0,0 @@ - -####################################################################### -# Convert to a VariantAnnotation object -# - -.variableLengthToList <- function(x) -{ - xl <- list() - j <- 1L - for (i in 1:length(x$length)) - { - len <- x$length[i] - if (len > 0L) - { - xl[[i]] <- x$data[j:(j+len-1)] - j <- j+len - } else { - xl[[i]] <- NA - } - } - xl -} - -.toAtomicList <- function(x, type) -{ - switch(type, - Character=CharacterList(x), - String=CharacterList(x), - Integer=IntegerList(x), - Float=NumericList(x)) -} - -.variableLengthToMatrix <- function(x) -{ - xl <- list() - i <- 1L - for (j in seq_along(x)) - { - for (k in seq_len(NROW(x[[j]]))) - { - xl[[i]] <- x[[j]][k,] - i <- i + 1L - } - } - matrix(xl, nrow=NROW(x[[1L]]), ncol=length(x)) -} - - -seqAsVCFInit <- function() -{ - # check flag - if (.asVCF_Export()) return(invisible()) - - # load packages to get generic methods - requireNamespace("SummarizedExperiment", quietly=TRUE, verbose=FALSE) - requireNamespace("VariantAnnotation", quietly=TRUE, verbose=FALSE) - - - #### from VariantAnnotation #### - - setMethod("ref", "SeqVarGDSClass", function(x) - { - s <- seqGetData(x, "$ref") - # remove invalid characters - s <- gsub("[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn\\-\\+\\.]", ".", s) - Biostrings::DNAStringSet(s) - }, where=globalenv()) - - - setMethod("alt", "SeqVarGDSClass", function(x) - { - alt <- seqGetData(x, "$alt") - s <- strsplit(alt, ",", fixed=TRUE) - s[alt == ""] <- "" - # remove invalid characters - s <- sapply(s, function(x) - gsub("[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn\\-\\+\\.]", ".", x), - simplify=FALSE) - do.call(Biostrings::DNAStringSetList, s) - }, where=globalenv()) - - - setMethod("qual", "SeqVarGDSClass", function(x) - { - qual <- seqGetData(x, "annotation/qual") - qual[is.na(qual)] <- NA # change NaN to NA - qual - }, where=globalenv()) - - - setMethod("filt", "SeqVarGDSClass", function(x) - { - as.character(seqGetData(x, "annotation/filter")) - }, where=globalenv()) - - - setMethod("fixed", "SeqVarGDSClass", function(x) - { - S4Vectors::DataFrame(REF=ref(x), - ALT=alt(x), - QUAL=qual(x), - FILTER=filt(x)) - }, where=globalenv()) - - - setMethod("header", "SeqVarGDSClass", function(x) - { - sample.id <- seqGetData(x, "sample.id") - - ## info - seqsum <- seqSummary(x, check="none", verbose=FALSE) - infoHd <- seqsum$info - # names(infoHd)[2:4] <- c("Number", "Type", "Description") - infoHd <- S4Vectors::DataFrame(infoHd[,2:4], row.names=infoHd[,1L]) - - ## geno - genoHd <- seqsum$format - # names(genoHd)[2:4] <- c("Number", "Type", "Description") - genoHd <- S4Vectors::DataFrame(genoHd[,2:4], row.names=genoHd[,1L]) - - ## meta - des <- get.attr.gdsn(index.gdsn(x, "description")) - ff <- des$vcf.fileformat - meta <- S4Vectors::DataFrame(Value=ff, row.names="fileformat") - ref <- seqsum$reference - if (length(ref) > 0L) - { - meta <- rbind(meta, S4Vectors::DataFrame(Value=ref, row.names="reference")) - } - n <- index.gdsn(x, "description/vcf.header", silent=TRUE) - if (!is.null(n)) - { - des <- read.gdsn(n) - ## ID=value header fields not parsed in GDS - des <- des[!(des[,1L] %in% c("contig", "SAMPLE", "PEDIGREE")),] - ## deal with duplicate row names - if (any(duplicated(des[,1L]))) - { - des[,1] <- make.unique(des[,1L]) - } - meta <- rbind(meta, S4Vectors::DataFrame(Value=des[,2L], row.names=des[,1L])) - } - - hdr <- IRanges::DataFrameList(META=meta, INFO=infoHd, FORMAT=genoHd) - - ## fixed - des <- seqsum$allele - if (nrow(des) > 0L) - { - hdr[["ALT"]] <- S4Vectors::DataFrame(Description=des[,2L], row.names=des[,1L]) - } - des <- seqsum$filter - des <- des[des$Description != "" & !is.na(des$Description),,drop=FALSE] - if (nrow(des) > 0L) - { - hdr[["FILTER"]] <- S4Vectors::DataFrame(Description=des[,2L], row.names=des[,1L]) - } - - VariantAnnotation::VCFHeader(samples=sample.id, header=hdr) - }, where=globalenv()) - - - setMethod("info", "SeqVarGDSClass", function(x, infovar=NULL) - { - des <- seqSummary(x, "annotation/info", check="none", verbose=FALSE) - if (!is.null(infovar)) - des <- des[des$ID %in% infovar, ] - infoDf <- S4Vectors::DataFrame(row.names=seqGetData(x, "variant.id")) - if (nrow(des) > 0L) - { - for (i in seq_len(nrow(des))) - { - v <- seqGetData(x, paste0("annotation/info/", des$ID[i])) - ## deal with variable length fields - if (!is.null(names(v))) - { - vl <- .variableLengthToList(v) - ## each element should have length number of alt alleles, even for NAs - if (des$Number[i] == "A") - { - nAlt <- lengths(alt(x)) - addNA <- which(nAlt > 1L & is.na(vl)) - for (ind in addNA) - { - vl[[ind]] <- rep(NA, nAlt[ind]) - } - } - v <- .toAtomicList(vl, des$Type[i]) - } else if (!is.null(dim(v))) - { - ## v is a matrix with nrow="Number" - vl <- list() - for (j in 1:ncol(v)) - { - vl[[j]] <- v[,j] - } - v <- .toAtomicList(vl, des$Type[i]) - } - if (is.atomic(v)) - { - v[is.na(v)] <- NA # change NaN to NA - v[v %in% ""] <- NA - } - infoDf[[des$ID[i]]] <- v - } - } - infoDf - }, where=globalenv()) - - - setMethod("geno", "SeqVarGDSClass", function(x, genovar=NULL) - { - ## genotype - sample.id <- seqGetData(x, "sample.id") - variant.id <- seqGetData(x, "variant.id") - - if (is.null(genovar) || "GT" %in% genovar) - { - gt <- seqApply(x, c(genovar="genotype", phase="phase"), - function(x) { - sep <- ifelse(x$phase, "|", "/") - paste(x$genovar[1L,], sep, x$genovar[2L,], sep="") - }, - as.is="list", margin="by.variant") - gt <- matrix(unlist(gt), ncol=length(gt), - dimnames=list(sample.id, variant.id)) - gt[gt == "NA/NA"] <- "." - gt <- t(gt) - - genoSl <- S4Vectors::SimpleList(GT=gt) - } else { - genoSl <- S4Vectors::SimpleList() - } - - ## all other fields - des <- seqSummary(x, "annotation/format", check="none", verbose=FALSE) - if (!is.null(genovar)) - { - des <- des[des$ID %in% genovar,] - } - if (nrow(des) > 0L) - { - for (i in 1:nrow(des)) - { - var.name <- paste("annotation/format/", des$ID[i], sep="") - number <- suppressWarnings(as.integer(des$Number[i])) - if (!is.na(number) && number > 1L) - { - v <- seqApply(x, var.name, function(v) v, - as.is="list", margin="by.variant") - vm <- array( - dim=c(length(variant.id), length(sample.id), number), - dimnames=list(variant.id, sample.id, NULL)) - for (j in 1:length(v)) - { - if (is.null(v[[j]])) - { - vm[j,,] <- NA - } else { - vm[j,,] <- v[[j]] - } - } - v <- vm - } else { - v <- seqGetData(x, var.name) - if (!is.null(names(v))) - { - if (all(v$length == 1L) && !is.na(number) && number == 1L) - { - v <- v$data - } else { - v <- seqApply(x, var.name, function(v) v, - as.is="list", margin="by.variant") - v <- .variableLengthToMatrix(v) - } - } - dimnames(v) <- list(sample.id, variant.id) - v <- t(v) - } - genoSl[[des$ID[i]]] <- v - } - } - if (is.null(names(genoSl))) names(genoSl) <- character() - genoSl - }, where=globalenv()) - - - setMethod("asVCF", "SeqVarGDSClass", function(x, chr.prefix="", info=NULL, geno=NULL) - { - stopifnot(is.character(chr.prefix), length(chr.prefix)==1L) - - seqsum <- seqSummary(x, check="none", verbose=FALSE) - if (!is.null(info)) - { - validInfo <- seqsum$info$ID - infoDiff <- setdiff(info, c(validInfo, NA)) - if (length(infoDiff) > 0) - { - warning(paste("info fields not present:", infoDiff)) - info <- intersect(info, validInfo) - } - } - if (!is.null(geno)) - { - validGeno <- seqsum$format$ID - genoDiff <- setdiff(geno, c(validGeno, NA)) - if (length(genoDiff) > 0) - { - warning(paste("geno fields not present:", genoDiff)) - geno <- intersect(geno, validGeno) - } - } - vcf <- VariantAnnotation::VCF(rowRanges=rowRanges(x), - colData=colData(x), - exptData=S4Vectors::SimpleList(header=header(x)), - fixed=fixed(x), - info=info(x, info=info), - geno=geno(x, geno=geno)) - if (chr.prefix != "") - vcf <- GenomeInfoDb::renameSeqlevels(vcf, paste0(chr.prefix, GenomeInfoDb::seqlevels(vcf))) - vcf - }, where=globalenv()) - - - - #### from SummarizedExperiment #### - - setMethod("rowRanges", "SeqVarGDSClass", function(x) - { - granges(x, - ID=seqGetData(x, "annotation/id"), - REF=ref(x), - ALT=alt(x), - QUAL=qual(x), - FILTER=filt(x)) - }, where=globalenv()) - - - setMethod("colData", "SeqVarGDSClass", function(x) - { - sample.id <- seqGetData(x, "sample.id") - node <- index.gdsn(x, "sample.annotation", silent=TRUE) - if (!is.null(node)) - vars <- ls.gdsn(node) - else - vars <- character() - if (length(vars) > 0) - { - annot <- lapply(vars, function(v) { - seqGetData(x, paste0("sample.annotation/", v)) - }) - names(annot) <- vars - S4Vectors::DataFrame(Samples=seq_along(sample.id), annot, row.names=sample.id) - } else { - S4Vectors::DataFrame(Samples=seq_along(sample.id), row.names=sample.id) - } - }, where=globalenv()) - - - - # set TRUE internally - .asVCF_SetTRUE() - invisible() -} - - -seqAsVCF <- function(gdsfile, ...) -{ - stopifnot(inherits(gdsfile, "SeqVarGDSClass")) - seqAsVCFInit() - eval(parse(text="asVCF(gdsfile, ...)")) -} diff --git a/inst/unitTests/test_utils.R b/inst/unitTests/test_utils.R index aeea205..66bc316 100755 --- a/inst/unitTests/test_utils.R +++ b/inst/unitTests/test_utils.R @@ -17,14 +17,12 @@ test_colData <- function() { - ## load VariantAnnotation and export functions - seqAsVCFInit() ## test with no sample annotation vcffile <- system.file("extdata", "ex2.vcf", package="VariantAnnotation") gdsfile <- tempfile() seqVCF2GDS(vcffile, gdsfile, storage.option="ZIP_RA", verbose=FALSE) gds <- seqOpen(gdsfile) - annot <- colData(gds) + annot <- SeqArray::colData(gds) checkTrue(ncol(annot) == 1) seqClose(gds) unlink(gdsfile) @@ -32,7 +30,7 @@ test_colData <- function() { ## test with annotation gdsfile <- seqExampleFileName("gds") gds <- seqOpen(gdsfile) - annot <- colData(gds) + annot <- SeqArray::colData(gds) checkTrue(ncol(annot) > 1) seqClose(gds) } diff --git a/man/SeqVarGDSClass-class.Rd b/man/SeqVarGDSClass-class.Rd index a143336..005a80c 100644 --- a/man/SeqVarGDSClass-class.Rd +++ b/man/SeqVarGDSClass-class.Rd @@ -15,7 +15,18 @@ \alias{geno,SeqVarGDSClass,ANY-method} \alias{rowRanges,SeqVarGDSClass-method} \alias{colData,SeqVarGDSClass-method} -\alias{asVCF,SeqVarGDSClass-method} +\alias{seqAsVCF,SeqVarGDSClass-method} +\alias{ref} +\alias{alt} +\alias{filt} +\alias{qual} +\alias{fixed} +\alias{header} +\alias{info} +\alias{geno} +\alias{rowRanges} +\alias{colData} +\alias{seqAsVCF} \title{SeqVarGDSClass} @@ -31,9 +42,7 @@ to create a \code{SeqVarGDSClass} object. } \section{Accessors}{ - In the following code snippets \code{x} is a SeqVarGDSClass object. Users -must call \code{seqAsVCFInit()} and \code{seqAsVCF(x)} to enable the generic -methods. + In the following code snippets \code{x} is a SeqVarGDSClass object. \describe{ \item{}{ \code{granges(x)}: @@ -62,7 +71,7 @@ methods. } \item{}{ \code{header(x)}: - Returns the header. + Returns the header as a \code{\link[VariantAnnotation]{VCFHeader}}. The \pkg{VariantAnnotation} package should be loaded to explore this object. } \item{}{ \code{rowRanges(x)}: @@ -86,12 +95,10 @@ methods. } \section{Coercion methods}{ - In the following code snippets \code{x} is a SeqVarGDSClass object. Users -must call \code{seqAsVCFInit()} and \code{seqAsVCF(x)} to enable the generic -methods. + In the following code snippets \code{x} is a SeqVarGDSClass object. \describe{ \item{}{ - \code{asVCF(x, chr.prefix="", info=NULL, geno=NULL)}: + \code{seqAsVCF(x, chr.prefix="", info=NULL, geno=NULL)}: Coerces a SeqVarGDSClass object to a \link[VariantAnnotation]{VCF-class} object. Row names correspond to the variant.id. \code{info} and @@ -100,6 +107,7 @@ methods. if 'NA' no fields are returned. Use \code{\link{seqSetFilter}} prior to calling \code{asVCF} to specify samples and variants to return. + The \pkg{VariantAnnotation} package should be loaded to explore this object. } } } @@ -108,8 +116,7 @@ methods. \author{Stephanie Gogarten, Xiuwen Zheng} \seealso{ - \code{\link[gdsfmt]{gds.class}}, \code{\link{seqOpen}}, - \code{\link{seqAsVCFInit}}, \code{\link{seqAsVCF}} + \code{\link[gdsfmt]{gds.class}}, \code{\link{seqOpen}} } \examples{ @@ -123,8 +130,6 @@ head(seqGetData(gds, "sample.id")) granges(gds) \dontrun{ -seqAsVCFInit() ## enable exporting the generic methods - ## alleles as comma-separated character strings head(seqGetData(gds, "allele")) diff --git a/man/seqAsVCF.Rd b/man/seqAsVCF.Rd deleted file mode 100644 index 533873f..0000000 --- a/man/seqAsVCF.Rd +++ /dev/null @@ -1,46 +0,0 @@ -\name{seqAsVCF} -\alias{seqAsVCF} -\alias{seqAsVCFInit} -\title{VariantAnnotation objects} -\description{ - Imports the VariantAnnotation package and export the generic methods -associated with SeqVarGDSClass class. -} -\usage{ -seqAsVCFInit() -seqAsVCF(gdsfile, ...) -} -\arguments{ - \item{gdsfile}{a \code{\link{SeqVarGDSClass}} object} - \item{...}{optional arguments to \code{asVCF}} -} -\details{ - The generic methods \code{ref}, \code{alt}, \code{qual}, \code{filt}, -\code{fixed}, \code{header}, \code{info}, \code{geno}, \code{asVCF}, -\code{rowRanges} and \code{colData} are exported and associated with a -\code{\link{SeqVarGDSClass}} object after calling \code{seqAsVCFInit()} -or \code{seqAsVCF()}. -} -\value{ - None or a \code{\link[VariantAnnotation]{CollapsedVCF}} object. -} - -\author{Xiuwen Zheng} -\seealso{ - \code{\link{asVCF,SeqVarGDSClass-method}} -} - -\examples{ -gds <- seqOpen(seqExampleFileName("gds")) - -\dontrun{ -seqAsVCFInit() ## enable exporting the generic methods -seqAsVCF(gds) -} - -seqClose(gds) -} - -\keyword{gds} -\keyword{sequencing} -\keyword{genetics}