Skip to content

Commit

Permalink
can assert new special tokens
Browse files Browse the repository at this point in the history
Signed-off-by: ftgreat <[email protected]>
  • Loading branch information
ftgreat committed Mar 15, 2023
1 parent ade0895 commit 3560b60
Showing 1 changed file with 0 additions and 1 deletion.
1 change: 0 additions & 1 deletion tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def test_tokenizer_t5(self):

def test_tokenizer_roberta(self):
tokenizer = Tokenizer.from_pretrained('RoBERTa-base-ch')
# print(tokenizer.DecodeIds([791, 1921, 1391, 7649, 1391, 749, 5507, 2548, 1825]))
self.assertEqual(tokenizer.TokenToId("人"), 782, '')
self.assertEqual(tokenizer.EncodeAsIds("今天吃饭吃了肯德基"),
[791, 1921, 1391, 7649, 1391, 749, 5507, 2548, 1825], '')
Expand Down

0 comments on commit 3560b60

Please sign in to comment.