forked from divonlan/genozip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
move_to_front.c
898 lines (695 loc) · 42.2 KB
/
move_to_front.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
// ------------------------------------------------------------------
// move-to-front.c
// Copyright (C) 2019-2020 Divon Lan <[email protected]>
// Please see terms and conditions in the files LICENSE.non-commercial.txt and LICENSE.commercial.txt
/*
zip:
1) during segregate - build mtf_context + dictionary for each dict_id
2) during generate - convert snips in vcf to indexes into ctx->mtf
3) merge back into the main (z_file) dictionaries - we use thread synchronization to make
sure this happens in the sequencial order of variant blocks. this merging will also
causes update of the word and char indices in ctx->mtf
4) compress the incremental part of the dictionaries added by this VB
unzip:
1) Dispatcher thread integrates the dictionaries fragments added by this VB
2) Create MTF array mapping word indices to char indices (one array in z-file)
3) Re-create genotype data by looking up words in the dictionaries
*/
#include "genozip.h"
#include "profiler.h"
#include "sections.h"
#include "gloptimize_vcf.h"
#include "base250.h"
#include "vblock.h"
#include "move_to_front.h"
#include "zfile.h"
#include "endianness.h"
#include "file.h"
#include "hash.h"
#define INITIAL_NUM_NODES 10000
static pthread_mutex_t wait_for_vb_1_mutex;
static pthread_mutex_t compress_dictionary_data_mutex;
static inline void mtf_lock_do (VBlock *vb, pthread_mutex_t *mutex, const char *func, uint32_t code_line, const char *name, uint32_t param)
{
//printf ("thread %u vb_i=%u LOCKING %s:%u from %s:%u\n", (unsigned)pthread_self(), vb->vblock_i, name, param, func, code_line);
pthread_mutex_lock (mutex);
//printf ("thread %u vb_i=%u LOCKED %s:%u from %s:%u\n", (unsigned)pthread_self(), vb->vblock_i, name, param, func, code_line);
}
#define mtf_lock(vb, mutex, name, param) mtf_lock_do (vb, mutex, __FUNCTION__, __LINE__, name, param)
static inline void mtf_unlock_do (VBlock *vb, pthread_mutex_t *mutex, const char *func, uint32_t code_line, const char *name, uint32_t param)
{
pthread_mutex_unlock (mutex);
//printf ("thread %u vb_i=%u UNLOCKED %s:%u from %s:%u\n", (unsigned)pthread_self(), vb->vblock_i, name, param, func, code_line);
}
#define mtf_unlock(vb, mutex, name, param) mtf_unlock_do (vb, mutex, __FUNCTION__, __LINE__, name, param)
void mtf_vb_1_lock (VBlockP vb)
{
ASSERT0 (vb->vblock_i == 1, "Error: Only vb_i=1 can call mtf_vb_1_lock");
mtf_lock (vb, &wait_for_vb_1_mutex, "wait_for_vb_1_mutex", 1);
}
// ZIP: add a snip to the dictionary the first time it is encountered in the VCF file.
// the dictionary will be written to GENOZIP and used to reconstruct the MTF during decompression
static inline uint32_t mtf_insert_to_dict (VBlock *vb_of_dict, MtfContext *ctx, bool is_zf_ctx, const char *snip, uint32_t snip_len)
{
buf_alloc (vb_of_dict, &ctx->dict, MAX ((ctx->dict.len + snip_len + 1), INITIAL_NUM_NODES * MIN (10, snip_len)),
CTX_GROWTH, is_zf_ctx ? "z_file->mtf_ctx->dict" : "mtf_ctx->dict", ctx->did_i);
if (is_zf_ctx) buf_set_overlayable (&ctx->dict); // during merge
unsigned char_index = ctx->dict.len;
char *dict_p = &ctx->dict.data[char_index];
memcpy (dict_p, snip, snip_len);
dict_p[snip_len] = '\t'; // dictionary snips have a \t separator within dictionary string
ctx->dict.len += snip_len + 1;
return char_index;
}
// ZIP only (PIZ doesn't have mtf) mtf index to node - possibly in ol_mtf, or in mtf
MtfNode *mtf_node_do (const MtfContext *ctx, uint32_t mtf_i,
const char **snip_in_dict, uint32_t *snip_len, // optional outs
const char *func, uint32_t code_line)
{
ASSERT (ctx->dict_id.num, "Error in mtf_node_do: this ctx is not initialized (dict_id.num=0) - called from %s:%u", func, code_line);
ASSERT (mtf_i < ctx->mtf.len + ctx->ol_mtf.len, "Error in mtf_node_do: out of range: dict=%s %s mtf_i=%d mtf.len=%u ol_mtf.len=%u. Caller: %s:%u",
err_dict_id (ctx->dict_id), st_name (ctx->dict_section_type),
mtf_i, (uint32_t)ctx->mtf.len, (uint32_t)ctx->ol_mtf.len, func, code_line);
bool is_ol = mtf_i < ctx->ol_mtf.len; // is this entry from a previous vb (overlay buffer)
MtfNode *node = is_ol ? ENT (MtfNode, ctx->ol_mtf, mtf_i)
: ENT (MtfNode, ctx->mtf, mtf_i - ctx->ol_mtf.len);
if (snip_in_dict) {
const Buffer *dict = is_ol ? &ctx->ol_dict : &ctx->dict;
ASSERT0 (buf_is_allocated (dict), "Error in mtf_node_do: dict not allocated");
*snip_in_dict = &dict->data[node->char_index];
}
if (snip_len) *snip_len = node->snip_len;
return node;
}
// PIZ: search for a node matching this snip in a directory and return the node index. note that we do a linear
// search as PIZ doesn't have hash tables.
int32_t mtf_search_for_word_index (MtfContext *ctx, const char *snip, unsigned snip_len)
{
MtfWord *words = (MtfWord *)ctx->word_list.data;
for (unsigned i=0; i < ctx->word_list.len; i++)
if (words[i].snip_len == snip_len && !memcmp (&ctx->dict.data[words[i].char_index], snip, snip_len))
return i;
return NIL;
}
// PIZ only (uses word_list): returns word index, and advances the iterator
uint32_t mtf_get_next_snip (VBlock *vb, MtfContext *ctx,
SnipIterator *override_iterator, // if NULL, taken from ctx
const char **snip, uint32_t *snip_len, // optional out
uint32_t txt_line)
{
SnipIterator *iterator = override_iterator ? override_iterator : &ctx->iterator;
if (!override_iterator && !iterator->next_b250) // INFO and Field1-9 data (GT data uses override_next_b250)
iterator->next_b250 = FIRSTENT (uint8_t, ctx->b250); // initialize (GT data initializes to the beginning of each sample rather than the beginning of the data)
// an imperfect test for overflow, but this should never happen anyway
ASSERT (override_iterator || iterator->next_b250 <= LASTENT (uint8_t, ctx->b250), "Error while reconstrucing line %u vb_i=%u: iterator for %s reached end of data",
txt_line, vb->vblock_i, err_dict_id (ctx->dict_id));
uint32_t word_index = is_v2_or_above ? base250_decode (&iterator->next_b250) // if this line has no non-GT subfields, it will not have a ctx
: v1_base250_decode (&iterator->next_b250);
// case: a subfield snip is missing - either the genotype data has less subfields than declared in FORMAT, or not provided at all for some (or all) samples.
if (word_index == WORD_INDEX_MISSING_SF) {
ASSERT (!ctx || ctx->b250_section_type == SEC_VCF_GT_DATA, "Error while reconstrucing line %u vb_i=%u: BASE250_MISSING_SF unexpectedly found in b250 data of %s (%s)",
txt_line, vb->vblock_i, err_dict_id (ctx->dict_id), st_name(ctx->b250_section_type)); // there will be no context if this GT subfield was always missing - never appeared on any sample
if (snip) {
*snip = NULL; // ignore this dict_id - don't even output a separator
*snip_len = 0;
}
}
// case: a subfield snip is empty, eg "AB::CD" (VCF GT Data) or "OA:Z:chr13,52863337,-,56S25M70S,0,;" (SAM OA optional field)
else if (word_index == WORD_INDEX_EMPTY_SF) {
if (snip) {
*snip = ""; // pointer to static empty string
*snip_len = 0;
}
}
else {
if (word_index == WORD_INDEX_ONE_UP)
word_index = ctx->iterator.prev_word_index + 1;
ASSERT (word_index < ctx->word_list.len, "Error while parsing line %u: word_index=%u is out of bounds - %s %s dictionary has only %u entries",
txt_line, word_index, st_name (ctx->dict_section_type),
err_dict_id (ctx->dict_id), (uint32_t)ctx->word_list.len);
//MtfWord *dict_word = &((MtfWord*)ctx->word_list.data)[word_index];
MtfWord *dict_word = ENT (MtfWord, ctx->word_list, word_index);
if (snip) {
*snip = &ctx->dict.data[dict_word->char_index];
*snip_len = dict_word->snip_len;
}
}
iterator->prev_word_index = word_index;
return word_index;
}
// Process and snip - return its node index, and enter it into the directory if its not already there. Called
// 1. During segregate - as snips are encountered in the data. No base250 encoding yet
// 2. During mtf_merge_in_vb_ctx_one_dict_id() - to enter snips into z_file->mtf_ctx - also encoding in base250
static uint32_t mtf_evaluate_snip_merge (VBlock *merging_vb, MtfContext *zf_ctx, MtfContext *vb_ctx,
const char *snip, uint32_t snip_len,
MtfNode **node, bool *is_new) // out
{
// attempt to get the node from the hash table
uint32_t new_mtf_i_if_no_old_one = zf_ctx->mtf.len;
int32_t existing_mtf_i = hash_get_entry_for_merge (zf_ctx, snip, snip_len, new_mtf_i_if_no_old_one, node);
if (existing_mtf_i != NIL) {
*is_new = false;
return existing_mtf_i; // snip was found in hash table - we're done
}
// this snip was just added to the hash table - its a new snip
zf_ctx->mtf.len++; // we have a new dictionary item - this snip
ASSERT (zf_ctx->mtf.len < MAX_WORDS_IN_CTX,
"Error: too many words in directory %s, max allowed number of words is is %u",
err_dict_id (zf_ctx->dict_id), MAX_WORDS_IN_CTX);
buf_alloc (evb, &zf_ctx->mtf, sizeof (MtfNode) * MAX(INITIAL_NUM_NODES, zf_ctx->mtf.len), CTX_GROWTH,
"z_file->mtf_ctx->mtf", zf_ctx->did_i);
buf_set_overlayable (&zf_ctx->mtf);
*node = mtf_node (zf_ctx, new_mtf_i_if_no_old_one, NULL, NULL);
memset (*node, 0, sizeof(MtfNode)); // safety
(*node)->snip_len = snip_len;
(*node)->char_index = mtf_insert_to_dict (evb, zf_ctx, true, snip, snip_len);
(*node)->word_index.n = new_mtf_i_if_no_old_one;
*is_new = true;
return new_mtf_i_if_no_old_one;
}
uint32_t mtf_evaluate_snip_seg (VBlock *segging_vb, MtfContext *vb_ctx,
const char *snip, uint32_t snip_len,
MtfNode **node /* out */, bool *is_new /* out */)
{
ASSERT0 (vb_ctx, "Error in mtf_evaluate_snip_seg: vb_ctx is NULL");
segging_vb->z_section_entries[vb_ctx->b250_section_type]++;
if (!snip_len)
return (!snip || (segging_vb->data_type == DT_VCF && *snip != ':')) ? WORD_INDEX_MISSING_SF : WORD_INDEX_EMPTY_SF;
uint32_t new_mtf_i_if_no_old_one = vb_ctx->ol_mtf.len + vb_ctx->mtf.len;
ASSERT (new_mtf_i_if_no_old_one <= MAX_WORDS_IN_CTX,
"Error: ctx of %s is full (max allowed words=%u): ol_mtf.len=%u mtf.len=%u",
err_dict_id (vb_ctx->dict_id), MAX_WORDS_IN_CTX, (uint32_t)vb_ctx->ol_mtf.len, (uint32_t)vb_ctx->mtf.len)
// get the node from the hash table if it already exists, or add this snip to the hash table if not
int32_t existing_mtf_i = hash_get_entry_for_seg (segging_vb, vb_ctx,snip, snip_len, new_mtf_i_if_no_old_one, node);
if (existing_mtf_i != NIL) {
if (segging_vb->vblock_i == 1)
ENT (SorterEnt, vb_ctx->sorter, existing_mtf_i)->count++;
if (is_new) *is_new = false;
return existing_mtf_i; // snip found - we're done
}
// this snip isn't in the hash table - its a new snip
ASSERT (vb_ctx->mtf.len < 0x7fffffff, "Error: too many words in directory %s", err_dict_id (vb_ctx->dict_id));
segging_vb->z_section_entries[vb_ctx->dict_section_type]++;
buf_alloc (segging_vb, &vb_ctx->mtf, sizeof (MtfNode) * MAX(INITIAL_NUM_NODES, 1+vb_ctx->mtf.len), CTX_GROWTH,
"mtf_ctx->mtf", vb_ctx->did_i);
vb_ctx->mtf.len++; // new hash entry or extend linked list
*node = mtf_node (vb_ctx, new_mtf_i_if_no_old_one, NULL, NULL);
memset (*node, 0, sizeof(MtfNode)); // safety
(*node)->snip_len = snip_len;
(*node)->char_index = mtf_insert_to_dict (segging_vb, vb_ctx, false, snip, snip_len);
(*node)->word_index.n = new_mtf_i_if_no_old_one;
// if this is the first variant block - allocate/grow sorter to contain exactly the same number of entries as mtf
if (segging_vb->vblock_i == 1) {
unsigned prev_size = vb_ctx->sorter.size;
buf_alloc (segging_vb, &vb_ctx->sorter, sizeof (SorterEnt) * (vb_ctx->mtf.size / sizeof(MtfNode)), 1, "mtf_ctx->sorter", 0);
if (vb_ctx->sorter.size > prev_size) memset (&vb_ctx->sorter.data[prev_size], 0, vb_ctx->sorter.size - prev_size);
SorterEnt *sorter_ent = ENT (SorterEnt, vb_ctx->sorter, new_mtf_i_if_no_old_one);
sorter_ent->node_index = new_mtf_i_if_no_old_one;
sorter_ent->count = 1;
}
if (is_new) *is_new = true;
return new_mtf_i_if_no_old_one;
}
// ZIP only: overlay and/or copy the current state of the global context to the vb, ahead of compressing this vb.
void mtf_clone_ctx (VBlock *vb)
{
unsigned z_num_dict_ids = __atomic_load_n (&z_file->num_dict_ids, __ATOMIC_RELAXED);
START_TIMER; // including mutex wait time
// note: because each dictionary has its own mutex, it is possible that we will see only a partial set
// of dictionaries (eg some but not all of the fields) when we are arrive here while another thread is mid-way
// through merging and adding a bunch of dictionaries.
// however z_num_dict_ids will always correctly state the number of dictionaries that are available.
for (unsigned did_i=0; did_i < z_num_dict_ids; did_i++) {
MtfContext *vb_ctx = &vb->mtf_ctx[did_i];
MtfContext *zf_ctx = &z_file->mtf_ctx[did_i];
ASSERT (zf_ctx->mutex_initialized, "Error: expected zf_ctx->mutex_initialized for did_i=%u", did_i);
mtf_lock (vb, &zf_ctx->mutex, "zf_ctx", did_i);
if (buf_is_allocated (&zf_ctx->dict)) { // something already for this dict_id
// overlay the global dict and mtf - these will not change by this (or any other) VB
//fprintf (stderr, ("mtf_clone_ctx: overlaying old dict %.8s, to vb_i=%u vb_did_i=z_did_i=%u\n", dict_id_printable (zf_ctx->dict_id).id, vb->vblock_i, did_i);
buf_overlay (vb, &vb_ctx->ol_dict, &zf_ctx->dict, "ctx->ol_dict", did_i);
buf_overlay (vb, &vb_ctx->ol_mtf, &zf_ctx->mtf, "ctx->ol_mtf", did_i);
// overlay the hash table, that may still change by future vb's merging... this vb will only use
// entries that are up to this merge_num
buf_overlay (vb, &vb_ctx->global_hash, &zf_ctx->global_hash, "mtf_ctx->global_hash", did_i);
vb_ctx->merge_num = zf_ctx->merge_num;
vb_ctx->global_hash_prime = zf_ctx->global_hash_prime; // can never change
vb_ctx->num_new_entries_prev_merged_vb = zf_ctx->num_new_entries_prev_merged_vb;
}
vb_ctx->did_i = did_i;
vb_ctx->dict_id = zf_ctx->dict_id;
vb_ctx->dict_section_type = zf_ctx->dict_section_type;
vb_ctx->b250_section_type = zf_ctx->b250_section_type;
vb->dict_id_to_did_i_map[vb_ctx->dict_id.map_key] = did_i;
mtf_init_iterator (vb_ctx);
mtf_unlock (vb, &zf_ctx->mutex, "zf_ctx", did_i);
}
vb->num_dict_ids = z_num_dict_ids;
COPY_TIMER (vb->profile.mtf_clone_ctx)
}
void mtf_initialize_for_zip (void)
{
if (z_file->dicts_mutex_initialized) return;
unsigned ret = pthread_mutex_init (&z_file->dicts_mutex, NULL);
ASSERT0 (!ret, "pthread_mutex_init failed for z_file->dicts_mutex");
ret = pthread_mutex_init (&wait_for_vb_1_mutex, NULL);
ASSERT0 (!ret, "pthread_mutex_init failed for wait_for_vb_1_mutex");
ret = pthread_mutex_init (&compress_dictionary_data_mutex, NULL);
ASSERT0 (!ret, "pthread_mutex_init failed for compress_dictionary_data_mutex");
}
// find the z_file context that corresponds to dict_id. It could be possibly a different did_i
// than in the vb - in case this dict_id is new to this vb, but another vb already inserted
// it to z_file
static MtfContext *mtf_get_zf_ctx (DictIdType dict_id)
{
unsigned z_num_dict_ids = __atomic_load_n (&z_file->num_dict_ids, __ATOMIC_RELAXED);
for (unsigned did_i=0; did_i < z_num_dict_ids; did_i++)
if (dict_id.num == z_file->mtf_ctx[did_i].dict_id.num)
return &z_file->mtf_ctx[did_i];
return NULL;
}
// ZIP only: called by merging VBs to add a new dict to z_file - copying some stuff from vb_ctx
static MtfContext *mtf_add_new_zf_ctx (VBlock *merging_vb, const MtfContext *vb_ctx)
{
// adding a new dictionary is proctected by a mutex. note that z_file->num_dict_ids is accessed by other threads
// without mutex proction when searching for a dictionary - that's why we update it at the end, after the new
// zf_ctx is set up with the new dict_id (ready for another thread to search it)
mtf_lock (merging_vb, &z_file->dicts_mutex, "dicts_mutex", 0);
// check if another thread raced and created this dict before us
MtfContext *zf_ctx = mtf_get_zf_ctx (vb_ctx->dict_id);
if (zf_ctx) goto finish;
ASSERT (z_file->num_dict_ids+1 < MAX_DICTS, // load num_dict_ids - this time with mutex protection - it could have changed
"Error: z_file has more dict_id types than MAX_DICTS=%u", MAX_DICTS);
zf_ctx = &z_file->mtf_ctx[z_file->num_dict_ids];
ASSERT (!pthread_mutex_init (&zf_ctx->mutex, NULL),
"pthread_mutex_init failed for zf_ctx->mutex did_i=%u", zf_ctx->did_i);
zf_ctx->mutex_initialized = true;
zf_ctx->did_i = z_file->num_dict_ids;
zf_ctx->dict_id = vb_ctx->dict_id;
zf_ctx->b250_section_type = vb_ctx->b250_section_type;
zf_ctx->dict_section_type = vb_ctx->dict_section_type;
// only when the new entry is finalized, do we increment num_dict_ids, atmoically , this is because
// other threads might access it without a mutex when searching for a dict_id
__atomic_store_n (&z_file->num_dict_ids, z_file->num_dict_ids+1, __ATOMIC_RELAXED); // stamp our merge_num as the ones that set the mtf_i
finish:
mtf_unlock (merging_vb, &z_file->dicts_mutex, "dicts_mutex", 0);
return zf_ctx;
}
// ZIP only: this is called towards the end of compressing one vb - merging its dictionaries into the z_file
// each dictionary is protected by its own mutex, and there is one z_file mutex protecting num_dicts.
// we are careful never to hold two muteces at the same time to avoid deadlocks
static void mtf_merge_in_vb_ctx_one_dict_id (VBlock *merging_vb, unsigned did_i)
{
MtfContext *vb_ctx = &merging_vb->mtf_ctx[did_i];
// get the ctx or create a new one. note: mtf_add_new_zf_ctx() must be called before mtf_lock() because it locks the z_file mutex (avoid a deadlock)
MtfContext *zf_ctx = mtf_get_zf_ctx (vb_ctx->dict_id);
if (!zf_ctx) zf_ctx = mtf_add_new_zf_ctx (merging_vb, vb_ctx);
{ START_TIMER;
mtf_lock (merging_vb, &zf_ctx->mutex, "zf_ctx", zf_ctx->did_i);
COPY_TIMER (merging_vb->profile.lock_mutex_zf_ctx);
}
START_TIMER; // note: careful not to count time spent waiting for the mutex
//fprintf (stderr, ("Merging dict_id=%.8s into z_file vb_i=%u vb_did_i=%u z_did_i=%u\n", dict_id_printable (vb_ctx->dict_id).id, merging_vb->vblock_i, did_i, z_did_i);
zf_ctx->merge_num++; // first merge is #1 (first clone which happens before the first merge, will get vb-)
if (!buf_is_allocated (&vb_ctx->dict)) goto finish; // nothing yet for this dict_id
uint32_t start_dict_len = zf_ctx->dict.len;
uint32_t start_mtf_len = zf_ctx->mtf.len;
if (!buf_is_allocated (&zf_ctx->dict)) {
// first data for this dict (usually, but not always, vb_i=1) - move to zf_ctx and leave overlay
zf_ctx->num_new_entries_prev_merged_vb = vb_ctx->mtf.len; // number of new words in this dict from this VB
// thread safety note: zf_ctx buffers are already added to evb's buffer list by file_initialize_z_file_data
// so these buf_move calls don't touch buf_lists and hence there is no possibility of conflict with the I/O thread
// that might be concurently writing to its buffer list
buf_move (evb, &zf_ctx->dict, merging_vb, &vb_ctx->dict);
buf_set_overlayable (&zf_ctx->dict);
buf_overlay (merging_vb, &vb_ctx->ol_dict, &zf_ctx->dict, "ctx->ol_dict", did_i);
buf_move (evb, &zf_ctx->mtf, merging_vb, &vb_ctx->mtf);
buf_set_overlayable (&zf_ctx->mtf);
buf_overlay (merging_vb, &vb_ctx->ol_mtf, &zf_ctx->mtf, "ctx->ol_mtf", did_i);
// allocate hash table, based on the statitics gather by this first vb that is merging this dict and
// populate the hash table without needing to reevalate the snips (we know none are in the hash table, but all are in mtf and dict)
if (zf_ctx->global_hash.size <= 1) // only initial allocation in zip_dict_data_initialize
hash_alloc_global (merging_vb, zf_ctx, vb_ctx);
// encode in base250 - to be used by zip_vcf_generate_genotype_one_section() and zip_generate_b250_section()
for (unsigned i=0; i < zf_ctx->mtf.len; i++) {
MtfNode *zf_node = &((MtfNode *)zf_ctx->mtf.data)[i];
zf_node->word_index = base250_encode (zf_node->word_index.n); // note that vb overlays this. also, vb_1 has been sorted so word_index != node_index
ASSERT (zf_node->word_index.n < zf_ctx->mtf.len, // sanity check
"Error: word_index=%u out of bound - mtf.len=%u, in dictionary %s",
(uint32_t)zf_node->word_index.n, (uint32_t)zf_ctx->mtf.len, err_dict_id (zf_ctx->dict_id));
}
}
else {
// merge in words that are potentially new (but may have been already added by other VBs since we cloned for this VB)
// (vb_ctx->mtf contains only new words, old words from previous vbs are in vb_ctx->ol_mtf)
for (unsigned i=0; i < vb_ctx->mtf.len; i++) {
MtfNode *vb_node = &((MtfNode *)vb_ctx->mtf.data)[i];
MtfNode *zf_node;
bool is_new;
// use evb and not vb because zf_context is z_file (which belongs to evb)
int32_t zf_node_index = mtf_evaluate_snip_merge (merging_vb, zf_ctx, vb_ctx,
&vb_ctx->dict.data[vb_node->char_index], vb_node->snip_len,
&zf_node, &is_new);
ASSERT (zf_node_index >= 0 && zf_node_index < zf_ctx->mtf.len, "Error: zf_node_index=%d out of range - len=%i", zf_node_index, (uint32_t)vb_ctx->mtf.len);
// set word_index to be indexing the global dict - to be used by zip_vcf_generate_genotype_one_section() and zip_generate_b250_section()
if (is_new)
vb_node->word_index = zf_node->word_index = base250_encode (zf_node_index);
else
// a previous VB already already calculated the word index for this node. if it was done by vb_i=1,
// then it is also re-sorted and the word_index is no longer the same as the node_index
vb_node->word_index = zf_node->word_index;
}
}
// we now compress the dictionaries directly from z_file. note: we must continue to hold
// the mutex during compression, lest another thread re-alloc the dictionary.
const char *start_dict = &zf_ctx->dict.data[start_dict_len]; // we take the pointer AFTER the evaluate, since dict can be reallocted
unsigned added_chars = zf_ctx->dict.len - start_dict_len;
unsigned added_words = zf_ctx->mtf.len - start_mtf_len;
// compress incremental part of dictionary added by this vb. note: dispatcher calls this function in the correct order of VBs.
if (added_chars) {
// special optimization for the GL dictionary (it is ineffective with --optimize that already optimizes GL)
if (zf_ctx->dict_id.num == dict_id_FORMAT_GL && !flag_optimize)
start_dict = gl_optimize_dictionary ((VBlockVCFP)merging_vb, &zf_ctx->dict, &((MtfNode *)zf_ctx->mtf.data)[start_mtf_len], start_dict_len, added_words);
// we need to protect z_file->dict_data while we're writing to it. this ensures a single writer
// to this data. we also need this mutex embedded in the zf_ctx->mutex, so that fragments of
// a dictionary are written in the order they are created.
{ START_TIMER;
mtf_lock (merging_vb, &compress_dictionary_data_mutex, "compress_dictionary_data_mutex", merging_vb->vblock_i);
COPY_TIMER (merging_vb->profile.lock_mutex_compress_dict);
}
zfile_compress_dictionary_data (merging_vb, zf_ctx, added_words, start_dict, added_chars);
mtf_unlock (merging_vb, &compress_dictionary_data_mutex, "compress_dictionary_data_mutex", merging_vb->vblock_i);
}
finish:
COPY_TIMER (merging_vb->profile.mtf_merge_in_vb_ctx_one_dict_id)
mtf_unlock (merging_vb, &zf_ctx->mutex, "zf_ctx->mutex", zf_ctx->did_i);
}
// ZIP only: merge new words added in this vb into the z_file.mtf_ctx, and compresses dictionaries.
void mtf_merge_in_vb_ctx (VBlock *merging_vb)
{
// vb_i=1 goes first, as it has the sorted dictionaries, other vbs can go in
// arbitrary order. at the end of this function, vb_i releases the mutex it locked along time ago,
// while the other vbs wait for vb_1 by attempting to lock the mutex
if (merging_vb->vblock_i != 1) {
mtf_lock (merging_vb, &wait_for_vb_1_mutex, "wait_for_vb_1_mutex", merging_vb->vblock_i);
mtf_unlock (merging_vb, &wait_for_vb_1_mutex, "wait_for_vb_1_mutex", merging_vb->vblock_i);
}
mtf_verify_field_ctxs (merging_vb); // this was useful in the past to catch nasty thread issues
// first, all field dictionaries (note: even if the dictionary is not allocated - eg FORMAT in a FORMATless VCF)
for (unsigned did_i=0; did_i < merging_vb->num_dict_ids; did_i++) {
MtfContext *ctx = &merging_vb->mtf_ctx[did_i];
SectionType dict_sec_type = ctx->dict_section_type;
ASSERT (section_type_is_dictionary(dict_sec_type), "Error: dict_sec_type=%s is not a dictionary section", st_name(dict_sec_type));
if (dict_sec_type != SEC_VCF_INFO_SF_DICT && dict_sec_type != SEC_VCF_FRMT_SF_DICT)
mtf_merge_in_vb_ctx_one_dict_id (merging_vb, did_i);
}
// second, all the info subfield dictionaries
for (unsigned did_i=0; did_i < merging_vb->num_dict_ids; did_i++)
if (buf_is_allocated (&merging_vb->mtf_ctx[did_i].dict) &&
merging_vb->mtf_ctx[did_i].dict_section_type == SEC_VCF_INFO_SF_DICT)
mtf_merge_in_vb_ctx_one_dict_id (merging_vb, did_i);
// third, all the genotype subfield dictionaries
for (unsigned did_i=0; did_i < merging_vb->num_dict_ids; did_i++)
if (buf_is_allocated (&merging_vb->mtf_ctx[did_i].dict) &&
merging_vb->mtf_ctx[did_i].dict_section_type == SEC_VCF_FRMT_SF_DICT)
mtf_merge_in_vb_ctx_one_dict_id (merging_vb, did_i);
// note: z_file->num_dict_ids might be larger than merging_vb->num_dict_ids at this point, for example:
// vb_i=1 started, z_file is empty, created 20 contexts
// vb_i=2 started, z_file is empty, created 10 contexts
// vb_i=1 completes, merges 20 contexts to z_file, which has 20 contexts after
// vb_i=2 completes, merges 10 contexts, of which 5 (for example) are shared with vb_i=1. Now z_file has 25 contexts after.
if (merging_vb->vblock_i == 1)
mtf_unlock (merging_vb, &wait_for_vb_1_mutex, "wait_for_vb_1_mutex", 1);
}
// PIZ only (no thread issues - dictionaries are immutable) - gets did_id if the dictionary exists,
// or returns NIL, if not
uint8_t mtf_get_existing_did_i_by_dict_id (DictIdType dict_id)
{
for (uint8_t did_i=0; did_i < z_file->num_dict_ids; did_i++)
if (dict_id.num == z_file->mtf_ctx[did_i].dict_id.num) return did_i;
return DID_I_NONE; // not found
}
// gets did_id if the dictionary exists, and creates a new dictionary if its the first time dict_id is encountered
// threads: no issues - called by PIZ for vb and zf (but dictionaries are immutable) and by Segregate (ZIP) on vb_ctx only
MtfContext *mtf_get_ctx_by_dict_id (MtfContext *mtf_ctx /* an array */,
uint8_t *dict_id_to_did_i_map,
unsigned *num_dict_ids,
uint8_t *num_subfields, // variable to increment if a new context is added
DictIdType dict_id,
SectionType dict_section_type)
{
// attempt to get did_i from dict_id mapper
uint8_t did_i = dict_id_to_did_i_map[dict_id.map_key];
if (did_i != DID_I_NONE && mtf_ctx[did_i].dict_id.num == dict_id.num) goto done;
// case: its not in mapper - mapper is occupied by another - perhaps it exists
// and missing the opportunity to enter mapper - search for it
if (did_i != DID_I_NONE)
for (did_i=0; did_i < *num_dict_ids; did_i++)
if (dict_id.num == mtf_ctx[did_i].dict_id.num) goto done;
did_i = *num_dict_ids; // note: *num_dict_ids cannot be updated until ctx is initialized, see comment below
MtfContext *ctx = &mtf_ctx[did_i];
//fprintf (stderr, "New context: dict_id=%.8s in did_i=%u \n", dict_id_printable (dict_id).id, did_i);
ASSERT (*num_dict_ids+1 < MAX_DICTS,
"Error: number of dictionary types is greater than MAX_DICTS=%u", MAX_DICTS);
ctx->did_i = did_i;
ctx->dict_id = dict_id;
ctx->dict_section_type = dict_section_type;
ctx->b250_section_type = dict_section_type + 1; // the b250 is 1 after the dictionary for all dictionary sections
mtf_init_iterator (ctx);
if (dict_id_to_did_i_map[dict_id.map_key] == DID_I_NONE)
dict_id_to_did_i_map[dict_id.map_key] = did_i;
// thread safety: the increment below MUST be AFTER the initialization of ctx, bc piz_get_line_subfields
// might be reading this data at the same time as the piz dispatcher thread adding more dictionaries
(*num_dict_ids) = did_i + 1;
if (num_subfields) {
(*num_subfields)++;
ASSERT (*num_subfields+1 <= MAX_SUBFIELDS,
"Error: number of %s dictionaries is greater than MAX_SUBFIELDS=%u", st_name (dict_section_type), MAX_SUBFIELDS);
}
done:
ctx = &mtf_ctx[did_i];
ASSERT (ctx->dict_section_type == dict_section_type, "Error: mismatch in dict_id=%s dict_section_type: requested %s but in the ctx says: %s",
err_dict_id (dict_id), st_name(dict_section_type), st_name(ctx->dict_section_type));
return ctx;
}
// called from seg_all_data_lines (ZIP) and zfile_read_all_dictionaries (PIZ) to initialize all
// primary field ctx's. these are not always used (e.g. when some are not read from disk due to --strip)
// but we maintain their fixed positions anyway as the code relies on it
void mtf_initialize_primary_field_ctxs (VBlock *vb, // NULL if called by zfile_read_all_dictionaries
MtfContext *mtf_ctx /* an array */,
DataType dt,
uint8_t *dict_id_to_did_i_map,
unsigned *num_dict_ids)
{
for (int f=0; f < dt_fields[dt].num_fields; f++) {
const char *fname = dt_fields[dt].names[f];
DictIdType dict_id = dict_id_field (dict_id_make (fname, strlen(fname)));
SectionType dict_sec = FIELD_TO_DICT_SECTION(dt, f);
MtfContext *ctx = mtf_get_ctx_by_dict_id (mtf_ctx, dict_id_to_did_i_map, num_dict_ids, NULL, dict_id, dict_sec);
// verify that the ctx is at its correct place
ASSERT (ctx - mtf_ctx == f, "Error in mtf_initialize_primary_field_ctxs: f=%u (%s) but ctx is at mtf_ctx[%u]. vb_i=%u vb.first_line=%u",
f, fname, (unsigned)(ctx - mtf_ctx), vb ? vb->vblock_i : 0, vb ? vb->first_line : 0);
}
}
// PIZ only: this is called by the I/O thread after reading a dictionary section
void mtf_integrate_dictionary_fragment (VBlock *vb, char *section_data)
{
START_TIMER;
// thread safety note: this function is called only from the piz dispatcher thread,
// so no thread safety issues with this static buffer.
static Buffer fragment = EMPTY_BUFFER;
// thread-safety note: while the dispatcher thread is integrating new dictionary fragments,
// compute threads might be using these dictionaries. This is ok, bc the dispatcher thread makes
// sure we integrate dictionaries from vbs by order - so that running compute threads never
// need to access the new parts of dictionaries. We also pre-allocate the dictionaries in
// header_genozip_to_txt() so that they don't need to be realloced. dict.len may be accessed
// by compute threads, but its change is assumed to be atomic, so that no weird things will happen
SectionHeaderDictionary *header = (SectionHeaderDictionary *)section_data;
ASSERT (section_type_is_dictionary(header->h.section_type),
"Error: header->h.section_type=%s is not a dictionary section", st_name(header->h.section_type));
uint32_t num_snips = BGEN32 (header->num_snips);
zfile_uncompress_section (vb, section_data, &fragment, "fragment", header->h.section_type);
// special treatment if this is GL - de-optimize
if (header->dict_id.num == dict_id_FORMAT_GL)
gl_deoptimize_dictionary (fragment.data, fragment.len);
// in piz, the same did_i is used for z_file and vb contexts, meaning that in vbs there could be
// a non-contiguous array of contexts (some are missing if not used by this vb)
MtfContext *zf_ctx = mtf_get_ctx_by_dict_id (z_file->mtf_ctx, z_file->dict_id_to_did_i_map, &z_file->num_dict_ids, NULL, header->dict_id, header->h.section_type);
// append fragment to dict. If there is no room - old memory is abandoned (so that VBs that are overlaying
// it continue to work uninterrupted) and a new memory is allocated, where the old dict is joined by the new fragment
unsigned dict_old_len = zf_ctx->dict.len;
buf_alloc (evb, &zf_ctx->dict, zf_ctx->dict.len + fragment.len, CTX_GROWTH, "z_file->mtf_ctx->dict", header->h.section_type);
buf_set_overlayable (&zf_ctx->dict);
memcpy (AFTERENT (char, zf_ctx->dict), fragment.data, fragment.len);
zf_ctx->dict.len += fragment.len;
// extend word list memory - and calculate the new words. If there is no room - old memory is abandoned
// (so that VBs that are overlaying it continue to work uninterrupted) and a new memory is allocated
buf_alloc (evb, &zf_ctx->word_list, (zf_ctx->word_list.len + num_snips) * sizeof (MtfWord), CTX_GROWTH,
"z_file->mtf_ctx->word_list", zf_ctx->did_i);
buf_set_overlayable (&zf_ctx->word_list);
bool is_ref_alt = (z_file->data_type == DT_VCF && header->dict_id.num == dict_id_fields[VCF_REFALT]);
char *start = fragment.data;
for (unsigned snip_i=0; snip_i < num_snips; snip_i++) {
MtfWord *word = &NEXTENT (MtfWord, zf_ctx->word_list);
char *c=start; while (*c != '\t') c++;
// special case of REFALT - there is always one \t in the middle of the snip, eg "A\tC"
if (is_ref_alt) {
c++;
while (*c != '\t') c++;
}
word->snip_len = c - start;
word->char_index = dict_old_len + (start - fragment.data);
start = c+1; // skip over the \t
}
buf_free (&fragment);
COPY_TIMER(vb->profile.mtf_integrate_dictionary_fragment);
}
// PIZ only: this is called by the I/O thread after it integrated all the dictionary fragment read from disk for one VB.
// Here we hand over the integrated dictionaries to the VB - in preparation for the Compute Thread to use them.
// We overlay the z_file's dictionaries and word lists to the vb. these data remain unchanged - neither
// the vb nor the dispatcher thread will ever change snips placed in these. the dispatcher thread may append
// the dictionary and word list as new fragments become available from subsequent VBs. If the memory is not
// sufficient, the dispatcher thread will "abandon" this memory, leaving it to the VB to continue to use it
// while starting a larger dict/word_list on a fresh memory allocation.
void mtf_overlay_dictionaries_to_vb (VBlock *vb)
{
for (unsigned did_i=0; did_i < MAX_DICTS; did_i++) {
MtfContext *zf_ctx = &z_file->mtf_ctx[did_i];
MtfContext *vb_ctx = &vb->mtf_ctx[did_i];
if (!zf_ctx->dict_id.num) continue;
if (buf_is_allocated (&zf_ctx->dict) && buf_is_allocated (&zf_ctx->word_list)) {
vb_ctx->did_i = did_i;
vb_ctx->dict_id = zf_ctx->dict_id;
vb_ctx->b250_section_type = zf_ctx->b250_section_type;
vb_ctx->dict_section_type = zf_ctx->dict_section_type;
if (vb->dict_id_to_did_i_map[vb_ctx->dict_id.map_key] == DID_I_NONE)
vb->dict_id_to_did_i_map[vb_ctx->dict_id.map_key] = did_i;
mtf_init_iterator (vb_ctx);
buf_overlay (vb, &vb_ctx->dict, &zf_ctx->dict, "ctx->dict", did_i);
buf_overlay (vb, &vb_ctx->word_list, &zf_ctx->word_list, "ctx->word_list", did_i);
// count dictionaries of genotype data subfields
// note: this is needed only for V1 files...
if (vb->data_type == DT_VCF && dict_id_is_vcf_format_sf (vb_ctx->dict_id)) {
ASSERT (++((VBlockVCFP)vb)->num_format_subfields <= MAX_SUBFIELDS,
"Error: number of subfields in %s exceeds MAX_SUBFIELDS=%u, while reading vb_i=%u",
z_name, MAX_SUBFIELDS, vb->vblock_i);
}
}
}
vb->num_dict_ids = z_file->num_dict_ids;
}
// used by random_access_show_index
MtfNode *mtf_get_node_by_word_index (MtfContext *ctx, uint32_t word_index)
{
ARRAY (MtfNode, mtf, ctx->mtf);
for (uint32_t i=0; i < ctx->mtf.len; i++)
if (mtf[i].word_index.n == word_index) return &mtf[i];
ABORT ("mtf_search_for_char_index_by_word_index failed to find word_index=%u in did_i=%u", word_index, ctx->did_i);
return NULL; // never reaches here
}
static int sorter_cmp(const void *a_, const void *b_)
{
SorterEnt *a = (SorterEnt *)a_;
SorterEnt *b = (SorterEnt *)b_;
return (int)b->count - (int)a->count;
}
void mtf_sort_dictionaries_vb_1(VBlock *vb)
{
// thread safety note: no issues here, as this is run only by the compute thread of vblock_i=1
for (unsigned did_i=0; did_i < vb->num_dict_ids; did_i++) {
MtfContext *ctx = &vb->mtf_ctx[did_i];
// sort in ascending order of mtf->count
qsort (ctx->sorter.data, ctx->mtf.len, sizeof(SorterEnt), sorter_cmp);
// rebuild dictionary is the sorted order, and update char and word indices in mtf
static Buffer old_dict = EMPTY_BUFFER;
buf_move (vb, &old_dict, vb, &ctx->dict);
buf_alloc (vb, &ctx->dict, old_dict.size, 1, "mtf_ctx->dict", did_i);
ctx->dict.len = old_dict.len;
char *next = ctx->dict.data;
for (unsigned i=0; i < ctx->mtf.len; i++) {
int32_t mtf_i = ((SorterEnt *)ctx->sorter.data)[i].node_index;
MtfNode *node = &((MtfNode *)ctx->mtf.data)[mtf_i];
memcpy (next, &old_dict.data[node->char_index], node->snip_len + 1 /* +1 for \t */);
node->char_index = next - ctx->dict.data;
node->word_index.n = i;
next += node->snip_len + 1;
}
buf_destroy (&old_dict);
}
}
// zero all sorters - this is called in case of a re-do of the first VB due to ploidy overflow
void mtf_zero_all_sorters (VBlock *vb)
{
ASSERT (vb->vblock_i == 1, "Error in mtf_zero_all_sorters: expected vb_i==1, but vb_i==%u", vb->vblock_i);
for (unsigned did_i=0; did_i < vb->num_dict_ids; did_i++) {
MtfContext *ctx = &vb->mtf_ctx[did_i];
buf_zero (&ctx->sorter);
}
}
// for safety, verify that field ctxs are what they say they are. we had bugs in the past where they got mixed up due to
// delicate thread logic.
void mtf_verify_field_ctxs_do (VBlock *vb, const char *func, uint32_t code_line)
{
for (int f=0; f < DTF(num_fields); f++) {
MtfContext *ctx = &vb->mtf_ctx[f];
ASSERT (FIELD_TO_DICT_SECTION(vb->data_type, f) == ctx->dict_section_type &&
FIELD_TO_B250_SECTION(vb->data_type, f) == ctx->b250_section_type,
"mtf_verify_field_ctxs called from %s:%u: field mismatch with section type: f=%s ctx->dict_section_type=%s ctx->b250_section_type=%s vb_i=%u",
func, code_line,
(char*)DTF(names)[f], st_name (ctx->dict_section_type), st_name (ctx->b250_section_type), vb->vblock_i);
}
}
// ZIP only: run by I/O thread during zip_output_processed_vb()
void mtf_update_stats (VBlock *vb)
{
// zf_ctx doesn't store mtf_i, but we just use mtf_i.len as a counter for displaying in genozip_show_sections
for (unsigned did_i=0; did_i < vb->num_dict_ids; did_i++) {
MtfContext *vb_ctx = &vb->mtf_ctx[did_i];
MtfContext *zf_ctx = mtf_get_zf_ctx (vb_ctx->dict_id);
if (!zf_ctx) continue; // this can happen if FORMAT subfield appears, but no line has data for it
zf_ctx->mtf_i.len += vb_ctx->mtf_i.len; // thread safety: no issues, this only updated only by the I/O thread
}
}
void mtf_free_context (MtfContext *ctx)
{
buf_free (&ctx->ol_dict);
buf_free (&ctx->ol_mtf);
buf_free (&ctx->dict);
buf_free (&ctx->mtf);
buf_free (&ctx->word_list);
buf_free (&ctx->local_hash);
buf_free (&ctx->global_hash);
buf_free (&ctx->sorter);
buf_free (&ctx->mtf_i);
buf_free (&ctx->b250);
ctx->dict_id.num = 0;
ctx->dict_section_type = ctx->b250_section_type = 0;
ctx->iterator.next_b250 = NULL;
ctx->iterator.prev_word_index =0;
ctx->local_hash_prime = 0;
ctx->global_hash_prime = 0;
ctx->merge_num = 0;
ctx->mtf_len_at_1_3 = ctx->mtf_len_at_2_3 = 0;
if (ctx->mutex_initialized) {
pthread_mutex_destroy (&ctx->mutex);
ctx->mutex_initialized = false;
}
}
// Called by file_close ahead of freeing File memory containing mtf_ctx
void mtf_destroy_context (MtfContext *ctx)
{
buf_destroy (&ctx->ol_dict);
buf_destroy (&ctx->ol_mtf);
buf_destroy (&ctx->dict);
buf_destroy (&ctx->mtf);
buf_destroy (&ctx->word_list);
buf_destroy (&ctx->local_hash);
buf_destroy (&ctx->global_hash);
buf_destroy (&ctx->sorter);
buf_destroy (&ctx->mtf_i);
buf_destroy (&ctx->b250);
if (ctx->mutex_initialized) {
pthread_mutex_destroy (&ctx->mutex);
ctx->mutex_initialized = false;
}
}