Skip to content

Commit

Permalink
better support for sample-less VCFs
Browse files Browse the repository at this point in the history
  • Loading branch information
divonlan committed May 7, 2020
1 parent a2f820a commit f05a002
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 13 deletions.
7 changes: 4 additions & 3 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,12 @@ static bool header_vcf_set_globals(const char *filename, Buffer *vcf_header)
}

//count samples
global_vcf_num_samples = (tab_count >= 9) ? tab_count-8 : 0; // note: a VCF file without samples would have tab_count==7 (8 fields) and is perfectly legal
global_vcf_num_samples = (tab_count >= 9) ? tab_count-8 : 0;
// note: a VCF file without samples may or may not have a "FORMAT" in the header, i.e. tab_count==7 or 8 (8 or 9 fields).
// however, even if it has a FORMAT in the header, it won't have a FORMAT column in the data

global_vcf_num_displayed_samples = global_vcf_num_samples;

ASSERT0 (tab_count != 8, "Error: invalid VCF file - field header line contains a FORMAT field but no samples");

ASSERT (tab_count >= 7, "Error: invalid VCF file - field header line contains only %d fields, expecting at least 8", tab_count+1);

// if --samples is used, update vcf_header and global_vcf_num_displayed_samples
Expand Down
12 changes: 5 additions & 7 deletions move_to_front.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,13 +535,11 @@ void mtf_merge_in_vb_ctx (VBlock *merging_vb)

mtf_verify_field_ctxs (merging_vb); // this was useful in the past to catch nasty thread issues

// first, all field dictionaries
// first, all field dictionaries (note: even if the dictionary is not allocated - eg FORMAT in a FORMATless VCF)
for (unsigned did_i=0; did_i < merging_vb->num_dict_ids; did_i++) {

MtfContext *ctx = &merging_vb->mtf_ctx[did_i];

if (!buf_is_allocated (&ctx->dict)) continue;

SectionType dict_sec_type = ctx->dict_section_type;

ASSERT (section_type_is_dictionary(dict_sec_type), "Error: dict_sec_type=%s is not a dictionary section", st_name(dict_sec_type));
Expand Down Expand Up @@ -636,9 +634,9 @@ MtfContext *mtf_get_ctx_by_dict_id (MtfContext *mtf_ctx /* an array */,
// called from seg_all_data_lines (ZIP) and zfile_read_all_dictionaries (PIZ) to initialize all
// primary field ctx's. these are not always used (e.g. when some are not read from disk due to --strip)
// but we maintain their fixed positions anyway as the code relies on it
void mtf_initialize_primary_field_ctxs (MtfContext *mtf_ctx /* an array */,
void mtf_initialize_primary_field_ctxs (VBlock *vb, // NULL if called by zfile_read_all_dictionaries
MtfContext *mtf_ctx /* an array */,
DataType dt,
uint32_t vblock_i,
uint8_t *dict_id_to_did_i_map,
unsigned *num_dict_ids)
{
Expand All @@ -651,8 +649,8 @@ void mtf_initialize_primary_field_ctxs (MtfContext *mtf_ctx /* an array */,
MtfContext *ctx = mtf_get_ctx_by_dict_id (mtf_ctx, dict_id_to_did_i_map, num_dict_ids, NULL, dict_id, dict_sec);

// verify that the ctx is at its correct place
ASSERT (ctx - mtf_ctx == f, "Error in mtf_initialize_primary_field_ctxs: f=%u (%s) but ctx is at mtf_ctx[%u]. vb_i=%u",
f, fname, (unsigned)(ctx - mtf_ctx), vblock_i);
ASSERT (ctx - mtf_ctx == f, "Error in mtf_initialize_primary_field_ctxs: f=%u (%s) but ctx is at mtf_ctx[%u]. vb_i=%u vb.first_line=%u",
f, fname, (unsigned)(ctx - mtf_ctx), vb ? vb->vblock_i : 0, vb ? vb->first_line : 0);
}
}

Expand Down
2 changes: 1 addition & 1 deletion move_to_front.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,6 @@ extern void mtf_destroy_context (MtfContext *ctx);

extern void mtf_vb_1_lock (VBlockP vb);
extern MtfNode *mtf_get_node_by_word_index (MtfContext *ctx, uint32_t word_index);
extern void mtf_initialize_primary_field_ctxs (MtfContext *mtf_ctx /* an array */, DataType dt, uint32_t vblock_i, uint8_t *dict_id_to_did_i_map, unsigned *num_dict_ids);
extern void mtf_initialize_primary_field_ctxs (VBlockP vb, MtfContext *mtf_ctx /* an array */, DataType dt, uint8_t *dict_id_to_did_i_map, unsigned *num_dict_ids);

#endif
2 changes: 1 addition & 1 deletion seg.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ void seg_all_data_lines (VBlock *vb,
{
START_TIMER;

mtf_initialize_primary_field_ctxs (vb->mtf_ctx, vb->data_type, vb->vblock_i, vb->dict_id_to_did_i_map, &vb->num_dict_ids); // Create ctx for the fields in the correct order
mtf_initialize_primary_field_ctxs (vb, vb->mtf_ctx, vb->data_type, vb->dict_id_to_did_i_map, &vb->num_dict_ids); // Create ctx for the fields in the correct order

mtf_verify_field_ctxs (vb);

Expand Down
2 changes: 1 addition & 1 deletion zfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ void zfile_read_all_dictionaries (uint32_t last_vb_i /* 0 means all VBs */, Read
{
SectionListEntry *sl_ent = NULL; // NULL -> first call to this sections_get_next_dictionary() will reset cursor

mtf_initialize_primary_field_ctxs (z_file->mtf_ctx, z_file->data_type, 0, z_file->dict_id_to_did_i_map, &z_file->num_dict_ids);
mtf_initialize_primary_field_ctxs (NULL, z_file->mtf_ctx, z_file->data_type, z_file->dict_id_to_did_i_map, &z_file->num_dict_ids);

while (sections_get_next_dictionary (&sl_ent)) {

Expand Down

0 comments on commit f05a002

Please sign in to comment.