Skip to content

Commit

Permalink
added GFF3, GVF, refactored VCF piz, iname ++
Browse files Browse the repository at this point in the history
  • Loading branch information
divonlan committed May 8, 2020
1 parent f05a002 commit 3fd3b22
Show file tree
Hide file tree
Showing 37 changed files with 1,203 additions and 813 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ else
endif

MY_SRCS = genozip.c base250.c move_to_front.c header.c strings.c stats.c arch.c license.c \
zip.c zip_vcf.c zip_sam.c zip_fast.c zip_me23.c \
piz.c piz_vcf.c piz_sam.c piz_fast.c piz_me23.c \
seg.c seg_vcf.c seg_sam.c seg_fast.c seg_me23.c \
zip.c zip_vcf.c zip_sam.c zip_fast.c zip_gff3.c zip_me23.c \
piz.c piz_vcf.c piz_sam.c piz_fast.c piz_gff3.c piz_me23.c \
seg.c seg_vcf.c seg_sam.c seg_fast.c seg_gff3.c seg_me23.c \
gloptimize_vcf.c buffer.c random_access.c sections.c compressor.c \
txtfile.c squeeze_vcf.c zfile.c profiler.c file.c dispatcher.c crypt.c aes.c md5.c \
vblock.c regions.c samples.c optimize.c dict_id.c hash.c gtshark_vcf.c stream.c url.c
Expand All @@ -53,7 +53,7 @@ CONDA_DEVS = Makefile .gitignore test-file.vcf

CONDA_DOCS = LICENSE.non-commercial.txt LICENSE.commercial.txt AUTHORS README.md

CONDA_INCS = aes.h dispatcher.h gloptimize_vcf.h optimize.h profiler.h dict_id.h txtfile.h zip.h v1_vcf.c v2v3_vcf.c \
CONDA_INCS = aes.h dispatcher.h gloptimize_vcf.h optimize.h profiler.h dict_id.h txtfile.h zip.h v1_vcf.c \
base250.h endianness.h md5.h sections.h text_help.h header.h strings.h hash.h stream.h url.h \
buffer.h file.h move_to_front.h seg.h text_license.h version.h gtshark_vcf.h compressor.h stats.h \
crypt.h genozip.h piz.h squeeze_vcf.h vblock.h zfile.h random_access.h regions.h samples.h \
Expand Down
30 changes: 27 additions & 3 deletions dict_id.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ uint64_t dict_id_fields[MAX_NUM_FIELDS_PER_DATA_TYPE];
// VCF stuff
uint64_t dict_id_FORMAT_PL=0, dict_id_FORMAT_GL=0, dict_id_FORMAT_GP=0, dict_id_FORMAT_DP=0, dict_id_FORMAT_MIN_DP=0,
dict_id_INFO_AC=0, dict_id_INFO_AF=0, dict_id_INFO_AN=0, dict_id_INFO_DP=0, dict_id_INFO_VQSLOD=0,
dict_id_INFO_END=0, dict_id_INFO_13=0;
dict_id_INFO_END=0;

// SAM stuff
uint64_t dict_id_OPTION_AM=0, dict_id_OPTION_AS=0, dict_id_OPTION_CM=0, dict_id_OPTION_LB=0, dict_id_OPTION_FI=0, dict_id_OPTION_H0=0,
Expand All @@ -27,7 +27,16 @@ uint64_t dict_id_OPTION_AM=0, dict_id_OPTION_AS=0, dict_id_OPTION_CM=0, dict_id_
dict_id_OPTION_XG=0, dict_id_OPTION_XS=0, dict_id_OPTION_XE=0,
dict_id_OPTION_mc=0, dict_id_OPTION_ms=0,
dict_id_OPTION_BD=0, dict_id_OPTION_BI=0,
dict_id_OPTION_STRAND=0;
dict_id_OPTION_STRAND=0; // private genozip dict

// GVF stuff
uint64_t dict_id_ATTR_ID=0, dict_id_ATTR_Variant_seq=0, dict_id_ATTR_Reference_seq=0,
dict_id_ATTR_Dbxref=0, // from from GRCh37/38 - example: "dbSNP_151:rs1282280967"
dict_id_ATTR_ancestral_allele=0, // from from GRCh37/38 - example ancestral_allele=GTTA
dict_id_ATTR_SEQ=0; // private genozip dict

// our stuff used in multiple data types
uint64_t dict_id_WindowsEOL=0;

DictIdType DICT_ID_NONE = {0};

Expand All @@ -40,6 +49,8 @@ void dict_id_initialize (void)
dict_id_fields[f] = dict_id_field (dict_id_make (field_name, strlen (field_name))).num;
}

dict_id_WindowsEOL = dict_id_type_1 (dict_id_make ("#", 1)).num;

switch (z_file->data_type) {
case DT_VCF:
dict_id_FORMAT_PL = dict_id_vcf_format_sf (dict_id_make ("PL", 2)).num;
Expand All @@ -58,7 +69,6 @@ void dict_id_initialize (void)
dict_id_FORMAT_MIN_DP = dict_id_vcf_format_sf (dict_id_make ("MIN_DP", 6)).num;

// This appears if the VCF line has a Windows-style \r\n line ending
dict_id_INFO_13 = dict_id_vcf_info_sf (dict_id_make ("#", 1)).num;
break;

case DT_SAM:
Expand Down Expand Up @@ -113,6 +123,20 @@ void dict_id_initialize (void)

break;

case DT_GFF3:
// standard GVF fields (ID is also a standard GFF3 field)
dict_id_ATTR_ID = dict_id_gff3_attr_sf (dict_id_make ("ID", 2)).num;
dict_id_ATTR_Variant_seq = dict_id_gff3_attr_sf (dict_id_make ("Variant_", 8)).num;
dict_id_ATTR_Reference_seq = dict_id_gff3_attr_sf (dict_id_make ("Referenc", 8)).num;

// fields added in the GVFs of GRCh37/38
dict_id_ATTR_Dbxref = dict_id_gff3_attr_sf (dict_id_make ("Dbxref", 6)).num;
dict_id_ATTR_ancestral_allele = dict_id_gff3_attr_sf (dict_id_make ("ancestra", 8)).num;

// our own dictionary where we store Variant_seq, Reference_seq and ancestral_allele together
dict_id_ATTR_SEQ = dict_id_gff3_attr_sf (dict_id_make ("SEQ", 3)).num;
break;

default:
break; // no special fields for the other data types
}
Expand Down
21 changes: 18 additions & 3 deletions dict_id.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,23 @@ static inline DictIdType dict_id_type_2(DictIdType dict_id) { return dict_id; }

// FASTQ/FASTA field types
#define dict_id_is_fast_desc_sf dict_id_is_type_2

#define dict_id_fast_desc_sf dict_id_type_2

// GFF3 field types
#define dict_id_is_gff3_attr_sf dict_id_is_type_1
#define dict_id_gff3_attr_sf dict_id_type_1

static inline DictIdType dict_id_printable(DictIdType dict_id) { dict_id.id[0] = (dict_id.id[0] & 0x7f) | 0x40; return dict_id; } // set 2 Msb to 01

extern DictIdType DICT_ID_NONE;
extern DictIdType dict_id_show_one_b250, dict_id_show_one_dict; // arguments of --show-b250-one and --show-dict-one (defined in genozip.c)
extern DictIdType dict_id_dump_one_b250; // arguments of --dump-b250-one (defined in genozip.c)

extern uint64_t dict_id_fields[MAX_NUM_FIELDS_PER_DATA_TYPE],

dict_id_FORMAT_PL, dict_id_FORMAT_GL, dict_id_FORMAT_GP, dict_id_FORMAT_DP, dict_id_FORMAT_MIN_DP, // some VCF FORMAT subfields
dict_id_INFO_AC, dict_id_INFO_AF, dict_id_INFO_AN, dict_id_INFO_DP, dict_id_INFO_VQSLOD, // some VCF INFO subfields
dict_id_INFO_END, dict_id_INFO_13,
dict_id_INFO_END, dict_id_WindowsEOL,

// standard tags, see here: https://samtools.github.io/hts-specs/SAMtags.pdf
dict_id_OPTION_AM, dict_id_OPTION_AS, dict_id_OPTION_CM, dict_id_OPTION_E2, dict_id_OPTION_LB, dict_id_OPTION_FI,
Expand All @@ -85,7 +89,18 @@ extern uint64_t dict_id_fields[MAX_NUM_FIELDS_PER_DATA_TYPE],
// GATK tags
dict_id_OPTION_BD, dict_id_OPTION_BI,

dict_id_OPTION_STRAND;
// our own
dict_id_OPTION_STRAND,

// GVF attributes - standard
dict_id_ATTR_ID, dict_id_ATTR_Variant_seq, dict_id_ATTR_Reference_seq,

// GVF attributes - from GRCh37/38 etc
dict_id_ATTR_Dbxref, // example: "dbSNP_151:rs1282280967"
dict_id_ATTR_ancestral_allele,

dict_id_ATTR_SEQ; // private genozip dict


extern void dict_id_initialize (void);

Expand Down
19 changes: 16 additions & 3 deletions file.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,19 @@
#define FNA_XZ_ ".fna.xz"
#define FNA_GENOZIP_ ".fna" GENOZIP_EXT

// GFF3 file variations (currently only GVF subtype, but maybe others in the future)
#define GVF_ ".gvf"
#define GVF_GZ_ ".gvf.gz"
#define GVF_BZ2_ ".gvf.bz2"
#define GVF_XZ_ ".gvf.xz"
#define GVF_GENOZIP_ ".gvf" GENOZIP_EXT

// 23andMe file variations
// note: 23andMe files come as a .txt, and therefore the user must specify --input to compress them. we have this
// made-up file extension here to avoid needing special cases throughout the code
#define ME23_ ".txt" // our made up extension - natively, 23andMe files come as a zip container containing a txt file
#define ME23_ZIP_ ".zip"
#define ME23_GENOZIP_ ".txt" GENOZIP_EXT
#define ME23_ ".txt" // our made up extension - natively, 23andMe files come as a zip container containing a txt file
#define ME23_ZIP_ ".zip"
#define ME23_GENOZIP_ ".txt" GENOZIP_EXT

typedef enum {TXT_FILE, Z_FILE} FileSupertype;

Expand All @@ -101,6 +108,7 @@ typedef enum { UNKNOWN_FILE_TYPE,
FFN, FFN_GZ, FFN_BZ2, FFN_XZ, FFN_GENOZIP,
FNN, FNN_GZ, FNN_BZ2, FNN_XZ, FNN_GENOZIP,
FNA, FNA_GZ, FNA_BZ2, FNA_XZ, FNA_GENOZIP,
GVF, GVF_GZ, GVF_BZ2, GVF_XZ, GVF_GENOZIP,
ME23, ME23_ZIP, ME23_GENOZIP,
AFTER_LAST_FILE_TYPE } FileType;

Expand All @@ -115,6 +123,7 @@ typedef enum { UNKNOWN_FILE_TYPE,
FFN_, FFN_GZ_, FFN_BZ2_, FFN_XZ_, FFN_GENOZIP_, \
FNN_, FNN_GZ_, FNN_BZ2_, FNN_XZ_, FNN_GENOZIP_, \
FNA_, FNA_GZ_, FNA_BZ2_, FNA_XZ_, FNA_GENOZIP_, \
GVF_, GVF_GZ_, GVF_BZ2_, GVF_XZ_, GVF_GENOZIP_, \
ME23_, ME23_ZIP_, ME23_GENOZIP_,\
"stdin", "stdout" }
extern const char *file_exts[];
Expand Down Expand Up @@ -149,6 +158,8 @@ typedef enum { COMP_UNKNOWN=-1, COMP_PLN=0 /* plain - no compression */,
{ FNA_BZ2, COMP_BZ2, FNA_GENOZIP }, { FNA_XZ, COMP_XZ, FNA_GENOZIP },\
{ FA, COMP_PLN, FA_GENOZIP }, { FA_GZ, COMP_GZ, FA_GENOZIP },\
{ FA_BZ2, COMP_BZ2, FA_GENOZIP }, { FA_XZ, COMP_XZ, FA_GENOZIP }, { 0, 0, 0 } },\
{ { GVF, COMP_PLN, GVF_GENOZIP }, { GVF_GZ, COMP_GZ, GVF_GENOZIP },\
{ GVF_BZ2, COMP_BZ2, GVF_GENOZIP }, { GVF_XZ, COMP_XZ, GVF_GENOZIP }, { 0, 0, 0 } },\
{ { ME23, COMP_PLN, ME23_GENOZIP }, { ME23_ZIP, COMP_ZIP, ME23_GENOZIP }, { 0, 0, 0 } } }

// plain file MUST appear first on the list - this will be the default output when redirecting
Expand All @@ -157,6 +168,7 @@ typedef enum { COMP_UNKNOWN=-1, COMP_PLN=0 /* plain - no compression */,
{ SAM, BAM, 0 }, \
{ FASTQ, FASTQ_GZ, FQ, FQ_GZ, 0 },\
{ FASTA, FASTA_GZ, FA, FA_GZ, FAA, FAA_GZ, FFN, FFN_GZ, FNN, FNN_GZ, FNA, FNA_GZ, 0 },\
{ GVF, GVF_GZ, 0 },\
{ ME23, ME23_ZIP, 0 } }

// txt file types and their corresponding genozip file types for each data type
Expand All @@ -165,6 +177,7 @@ typedef enum { COMP_UNKNOWN=-1, COMP_PLN=0 /* plain - no compression */,
{ SAM_GENOZIP, 0 }, \
{ FASTQ_GENOZIP, FQ_GENOZIP, 0 }, \
{ FASTA_GENOZIP, FA_GENOZIP, FAA_GENOZIP, FFN_GENOZIP, FNN_GENOZIP, FNA_GENOZIP, 0 }, \
{ GVF_GENOZIP, 0 }, \
{ ME23_GENOZIP, 0 } }

typedef const char *FileMode;
Expand Down
2 changes: 2 additions & 0 deletions genozip.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ typedef struct VBlockSAM *VBlockSAMP;
typedef const struct VBlockSAM *ConstVBlockSAMP;
typedef struct VBlockFAST *VBlockFASTP;
typedef const struct VBlockFAST *ConstVBlockFASTP;
typedef struct VBlockGFF3 *VBlockGFF3P;
typedef const struct VBlockGFF3 *ConstVBlockGFF3P;
typedef struct VBlockME23 *VBlockME23P;
typedef const struct VBlockME23 *ConstVBlockME23P;

Expand Down
13 changes: 13 additions & 0 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,19 @@ void hash_alloc_local (VBlock *segging_vb, MtfContext *vb_ctx)
vb_ctx->local_hash_prime = hash_next_size_up(500);
break;

case DT_GFF3:
if (vb_ctx->dict_id.num == dict_id_fields[GFF3_SEQID] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_SOURCE] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_TYPE] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_END] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_SCORE] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_STRAND] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_PHASE] ||
vb_ctx->dict_id.num == dict_id_fields[GFF3_ATTRS])

vb_ctx->local_hash_prime = hash_next_size_up(500);
break;

case DT_ME23:
if (vb_ctx->dict_id.num == dict_id_fields[ME23_CHROM])

Expand Down
4 changes: 2 additions & 2 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ static bool is_first_txt = true;
// (names are not longer than 8=DICT_ID_LEN as the code assumes it)
const char *field_names[NUM_DATATYPES][MAX_NUM_FIELDS_PER_DATA_TYPE] = FIELD_NAMES;

const unsigned datatype_last_field[NUM_DATATYPES] = DATATYPE_LAST_FIELD;
const unsigned chrom_did_i_by_dt[NUM_DATATYPES] = CHROM_DID_I_BY_DT;
const unsigned datatype_last_field[NUM_DATATYPES] = DATATYPE_LAST_FIELD;
const uint8_t chrom_did_i_by_dt[NUM_DATATYPES] = CHROM_DID_I_BY_DT;

// -----------
// VCF stuff
Expand Down
65 changes: 39 additions & 26 deletions header.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,51 @@
#include "md5.h"

// IMPORTANT: DATATYPES GO INTO THE FILE FORMAT - THEY CANNOT BE CHANGED
#define NUM_DATATYPES 5
#define NUM_DATATYPES 6
typedef enum { DT_VCF_V1=-2, DT_NONE=-1, // these values are used in the code logic, they are never written to the file
DT_VCF=0, DT_SAM=1,
DT_FASTQ=2, DT_FASTA=3,
DT_ME23=4 } DataType; // these values go into SectionHeaderGenozipHeader.data_type
#define DATATYPE_NAMES { "VCF", "SAM", "FASTQ", "FASTA", "23ANDME" } // index in array matches values in DataType
DT_FASTQ=2, DT_FASTA=3, DT_GFF3=4,
DT_ME23=5 } DataType; // these values go into SectionHeaderGenozipHeader.data_type
#define DATATYPE_NAMES { "VCF", "SAM", "FASTQ", "FASTA", "GVF" /* actually GFF3, but for now we can call it GVF */, \
"23ANDME" } // index in array matches values in DataType

#define DATATYPE_LAST_FIELD { VCF_FORMAT, SAM_OPTIONAL, FAST_LINEMETA, FAST_LINEMETA, ME23_ID }
extern const unsigned datatype_last_field[NUM_DATATYPES];

#define CHROM_DID_I_BY_DT { VCF_CHROM, SAM_RNAME, -1, -1, ME23_CHROM } // -1 if DATATYPE_HAS_RANDOM_ACCESS is false
extern const unsigned chrom_did_i_by_dt[NUM_DATATYPES]; // used for random access data

#define DATATYPE_HAS_RANDOM_ACCESS { true, true, false, false, true }
#define DATATYPE_HAS_RANDOM_ACCESS { true, true, false, false, true, true }

typedef void (*ComputeFunc)(VBlockP);
#define COMPRESS_FUNC_BY_DT { zip_vcf_compress_one_vb, zip_sam_compress_one_vb, \
zip_fast_compress_one_vb, zip_fast_compress_one_vb, zip_me23_compress_one_vb }
zip_fast_compress_one_vb, zip_fast_compress_one_vb, zip_gff3_compress_one_vb, zip_me23_compress_one_vb }

#define UNCOMPRESS_FUNC_BY_DT { piz_vcf_uncompress_one_vb, piz_sam_uncompress_one_vb, \
piz_fast_uncompress_one_vb, piz_fast_uncompress_one_vb, \
piz_me23_uncompress_one_vb }
piz_gff3_uncompress_one_vb, piz_me23_uncompress_one_vb }

typedef void (*UpdateHeaderFunc) (VBlockP vb, uint32_t vcf_first_line_i);
#define UPDATE_HEADER_FUNC_BY_DT { zfile_vcf_update_compressed_vb_header, \
zfile_update_compressed_vb_header, \
zfile_update_compressed_vb_header, \
zfile_update_compressed_vb_header, \
zfile_update_compressed_vb_header, \
zfile_update_compressed_vb_header }

typedef void (*IOFunc) (VBlockP vb);
#define READ_ONE_VB_FUNC_BY_DT { zfile_vcf_read_one_vb, zfile_sam_read_one_vb, \
zfile_fast_read_one_vb, zfile_fast_read_one_vb, \
zfile_me23_read_one_vb }
zfile_gff3_read_one_vb, zfile_me23_read_one_vb }

#define FIRST_FIELD_DICT_SECTION { SEC_CHROM_DICT, SEC_SAM_QNAME_DICT, \
SEC_FAST_DESC_DICT, SEC_FAST_DESC_DICT, SEC_CHROM_DICT }

// the chrom fields used by --regions for subsetting
#define CHROM_FIELD_DICT_SECTION { SEC_CHROM_DICT, SEC_SAM_RNAME_DICT, -1, -1, SEC_CHROM_DICT }
// by data type - related to the header of the txt file of each data type
#define TXT_HEADER_IS_ALLOWED { true, true, false, false, true, true } // is it possible to have a header in this data_type
#define TXT_HEADER_IS_REQUIRED { true, false, false, false, false, false } // should we error if the header is missing
#define TXT_HEADER_LINE_FIRST_CHAR { '#', '@', -1, -1, '#', '#' }; // first character in each line in the text file header (-1 if TXT_HEADER_IS_ALLOWED is false)

// related to the header of the txt file of each data type
#define TXT_HEADER_IS_ALLOWED { true, true, false, false, true } // is it possible to have a header in this data_type
#define TXT_HEADER_IS_REQUIRED { true, false , false , false , false } // should we error if the header is missing
#define TXT_HEADER_LINE_FIRST_CHAR { '#', '@', -1, -1, '#' }; // first character in each line in the text file header (-1 if TXT_HEADER_IS_ALLOWED is false)

#define STAT_SHOW_SECTIONS_LINE_NAME { "Variants", "Alignment lines", "Entries", "Lines", "SNPs" }
// by data type - the header displayed in --show-sections
#define STAT_SHOW_SECTIONS_LINE_NAME { "Variants", "Alignment lines", "Entries", "Lines", "Sequences", "SNPs" }

// by data type - the dictionary type displayed in --show-sections
#define STAT_DICT_TYPES { { "FIELD", "INFO", "FORMAT" }, \
{ "FIELD", "QNAME", "OPTION" }, \
{ "FIELD", "ERROR!", "DESC" }, \
{ "FIELD", "ERROR!", "DESC" }, \
{ "FIELD", "ATTRS", "ERROR!" }, \
{ "FIELD", "ERROR!", "ERROR!" } };

// VCF related global parameters - set before any thread is created, and never change
Expand All @@ -85,17 +78,37 @@ typedef enum { SAM_QNAME, SAM_FLAG, SAM_RNAME, SAM_POS, SAM_MAPQ, SAM_CIGAR, SAM
#define NUM_FAST_FIELDS 2
typedef enum { FAST_DESC, FAST_LINEMETA } FastqFields;

#define NUM_GFF3_FIELDS 10 // https://m.ensembl.org/info/website/upload/gff3.html
typedef enum { GFF3_SEQID, GFF3_SOURCE, GFF3_TYPE, GFF3_START, GFF3_END, GFF3_SCORE, GFF3_STRAND, GFF3_PHASE, GVF_SEQ, GFF3_ATTRS } Gff3Fields;

// 23ANDME fields
#define NUM_ME23_FIELDS 3
typedef enum { ME23_CHROM, ME23_POS, ME23_ID } Me23Fields; // same order as VCF

#define MAX_NUM_FIELDS_PER_DATA_TYPE 9 // maximum between NUM_*_FIELDS
#define MAX_NUM_FIELDS_PER_DATA_TYPE 10 // maximum between NUM_*_FIELDS

#define DATATYPE_LAST_FIELD { NUM_VCF_FIELDS-1, NUM_SAM_FIELDS-1, NUM_FAST_FIELDS-1, NUM_FAST_FIELDS-1, NUM_GFF3_FIELDS-1, NUM_ME23_FIELDS-1 }
extern const unsigned datatype_last_field[NUM_DATATYPES];

#define CHROM_DID_I_BY_DT { VCF_CHROM, SAM_RNAME, -1, -1, GFF3_SEQID, ME23_CHROM } // -1 if DATATYPE_HAS_RANDOM_ACCESS is false
extern const uint8_t chrom_did_i_by_dt[NUM_DATATYPES]; // used for random access data

#define INFO_DID_I_BY_DT { VCF_INFO, -1, -1, -1, GFF3_ATTRS, -1 }

#define FIRST_FIELD_DICT_SECTION { SEC_CHROM_DICT, SEC_SAM_QNAME_DICT, \
SEC_FAST_DESC_DICT, SEC_FAST_DESC_DICT, SEC_GFF3_SEQID_DICT, SEC_CHROM_DICT }

// by data type - the chrom fields used by --regions for subsetting
#define CHROM_FIELD_DICT_SECTION { SEC_CHROM_DICT, SEC_SAM_RNAME_DICT, -1, -1, SEC_GFF3_SEQID_DICT, SEC_CHROM_DICT }
#define INFO_FIELD_DICT_SECTION { SEC_VCF_INFO_DICT, -1, -1, -1, SEC_GFF3_ATTRS_DICT, -1 }
#define INFO_SF_DICT_SECTION { SEC_VCF_INFO_SF_DICT, -1, -1, -1, SEC_GFF3_ATTRS_SF_DICT, -1 }

#define FIELD_NAMES /* max 8 chars per name */ \
{ { "CHROM", "POS", "ID", "REF+ALT", "QUAL", "FILTER", "INFO", "FORMAT" },\
{ "QNAME", "FLAG", "RNAME", "POS", "MAPQ", "CIGAR", "PNEXT", "TLEN", "OPTIONAL" },\
{ "DESC", "LINEMETA" },\
{ "DESC", "LINEMETA" },\
{ "SEQID", "SOURCE", "TYPE", "START", "END", "SCORE", "STRAND", "PHASE", "SEQ", "ATTRS" },\
{ "CHROM", "POS", "ID" }\
};
extern const char *field_names[NUM_DATATYPES][MAX_NUM_FIELDS_PER_DATA_TYPE];
Expand Down
Loading

0 comments on commit 3fd3b22

Please sign in to comment.