19
19
20
20
def main ():
21
21
parser = build_construct_args ()
22
- parser .add_argument ('--build-tagset' , '-t ' , default = True ,
23
- action = 'store_false ' ,
24
- help = 'Construct tagset while loading sequences' )
22
+ parser .add_argument ('--no- build-tagset' , '-n ' , default = False ,
23
+ action = 'store_true' , dest = 'no_build_tagset ' ,
24
+ help = 'Do NOT construct tagset while loading sequences' )
25
25
parser .add_argument ('output_filename' )
26
26
parser .add_argument ('input_filenames' , nargs = '+' )
27
27
@@ -48,6 +48,10 @@ def main():
48
48
49
49
print 'Saving hashtable to %s' % base
50
50
print 'Loading kmers from sequences in %s' % repr (filenames )
51
+ if args .no_build_tagset :
52
+ print 'We WILL NOT build the tagset.'
53
+ else :
54
+ print 'We WILL build the tagset (for partitioning/traversal).'
51
55
52
56
###
53
57
@@ -56,12 +60,20 @@ def main():
56
60
57
61
for n , filename in enumerate (filenames ):
58
62
print 'consuming input' , filename
59
- ht .consume_fasta_and_tag (filename )
63
+ if args .no_build_tagset :
64
+ ht .consume_fasta (filename )
65
+ else :
66
+ ht .consume_fasta_and_tag (filename )
60
67
61
68
print 'saving hashtable in' , base + '.ht'
62
69
ht .save (base + '.ht' )
63
- print 'saving tagset in' , base + '.tagset'
64
- ht .save_tagset (base + '.tagset' )
70
+
71
+ if not args .no_build_tagset :
72
+ print 'saving tagset in' , base + '.tagset'
73
+ ht .save_tagset (base + '.tagset' )
74
+
75
+ info_fp = open (base + '.info' , 'w' )
76
+ info_fp .write ('%d unique k-mers' % ht .n_unique_kmers ())
65
77
66
78
fp_rate = khmer .calc_expected_collisions (ht )
67
79
print 'fp rate estimated to be %1.3f' % fp_rate
0 commit comments