Skip to content

Commit

Permalink
get_vocabulary: don't crash on double whitespace or empty line
Browse files Browse the repository at this point in the history
  • Loading branch information
rsennrich committed Mar 26, 2018
1 parent 7bb1c3d commit aa1bd9f
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
3 changes: 2 additions & 1 deletion get_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

for line in sys.stdin:
for word in line.strip().split(' '):
c[word] += 1
if word:
c[word] += 1

for key,f in sorted(c.items(), key=lambda x: x[1], reverse=True):
print(key+" "+ str(f))
3 changes: 2 additions & 1 deletion learn_bpe.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def get_vocabulary(fobj, is_dict=False):
vocab[word] += int(count)
else:
for word in line.strip().split(' '):
vocab[word] += 1
if word:
vocab[word] += 1
return vocab

def update_pair_statistics(pair, changed, stats, indices):
Expand Down

0 comments on commit aa1bd9f

Please sign in to comment.