Skip to content

Commit

Permalink
more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
rsennrich committed Apr 26, 2018
1 parent 9bf7efb commit c00684c
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions test/test_bpe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
import unittest
import codecs

Expand Down Expand Up @@ -52,5 +53,23 @@ def test_apply_bpe(self):
out = self.bpe.process_line(line)
self.assertEqual(out, ref)

def test_trailing_whitespace(self):
"""BPE.proces_line() preserves leading and trailing whitespace"""

orig = ' iron cement \n'
exp = ' ir@@ on c@@ ement \n'

out = self.bpe.process_line(orig)
self.assertEqual(out, exp)

def test_utf8_whitespace(self):
"""UTF-8 whitespace is treated as normal character, not word boundary"""

orig = 'iron\xa0cement\n'
exp = 'ir@@ on@@ \xa0@@ c@@ ement\n'

out = self.bpe.process_line(orig)
self.assertEqual(out, exp)

if __name__ == '__main__':
unittest.main()

0 comments on commit c00684c

Please sign in to comment.