Skip to content

Commit

Permalink
Customizable dot freq, more suitable for tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
Waino committed Apr 8, 2015
1 parent ba93fbe commit 0e38e28
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
8 changes: 5 additions & 3 deletions flatcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,20 +195,22 @@ def weighted_sample(data, num_samples):
return data_indices


def _generator_progress(generator):
def _generator_progress(generator, freq=None):
"""Prints a progress bar for visualizing flow through a generator.
The length of a generator is not known in advance, so the bar has
no fixed length. GENERATOR_DOT_FREQ controls the frequency of dots.
This function wraps the argument generator, returning a new generator.
"""

if GENERATOR_DOT_FREQ <= 0:
if freq is None:
freq = GENERATOR_DOT_FREQ
if freq <= 0:
return generator

def _progress_wrapper(generator):
for (i, x) in enumerate(generator):
if i % GENERATOR_DOT_FREQ == 0:
if i % freq == 0:
sys.stderr.write('.')
sys.stderr.flush()
yield x
Expand Down
2 changes: 1 addition & 1 deletion scripts/flatcat-advanced-segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def main(args):

with io._open_text_file_write(args.outfile) as fobj:
pipe = corpus_reader(io, args.infile)
pipe = utils._generator_progress(pipe)
pipe = utils._generator_progress(pipe, 10000)
pipe = cache.segment_from(pipe)
# FIXME: transformations (joining/filtering) here
pipe = (postprocess(args.output_format, morphs)
Expand Down

0 comments on commit 0e38e28

Please sign in to comment.