Skip to content

Commit

Permalink
Handle wrong lines in the dictionary (empty line or unknown symbols (f…
Browse files Browse the repository at this point in the history
  • Loading branch information
mimipaskova authored and Guillaume Lample committed Apr 23, 2019
1 parent 3dbfedd commit 3159355
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/evaluation/word_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@ def load_dictionary(path, word2id1, word2id2):
not_found2 = 0

with io.open(path, 'r', encoding='utf-8') as f:
for _, line in enumerate(f):
for index, line in enumerate(f):
assert line == line.lower()
word1, word2 = line.rstrip().split()
parts = line.rstrip().split()
if len(parts) < 2:
logger.warning("Could not parse line %s (%i)", line, index)
continue
word1, word2 = parts
if word1 in word2id1 and word2 in word2id2:
pairs.append((word1, word2))
else:
Expand Down

0 comments on commit 3159355

Please sign in to comment.