From a3b21a76b8e372b3ea880a3950269525405b086b Mon Sep 17 00:00:00 2001 From: alvations Date: Fri, 5 May 2017 12:55:13 +0800 Subject: [PATCH] the correct offset (nostest docstr is different from Python interpreter) --- nltk/tokenize/treebank.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nltk/tokenize/treebank.py b/nltk/tokenize/treebank.py index 687d248d65..212464e395 100644 --- a/nltk/tokenize/treebank.py +++ b/nltk/tokenize/treebank.py @@ -150,10 +150,9 @@ def span_tokenize(self, text): >>> from nltk.tokenize import TreebankWordTokenizer >>> s = '''Good muffins cost $3.88\\nin New (York). Please (buy) me\\ntwo of them.\\n(Thanks).''' - >>> expected = [(0, 4), (5, 12), (13, 17), (18, 19), (19, 27), - ... (28, 31), (32, 33), (33, 37), (37, 38), (38, 39), (41, 47), - ... (48, 49), (49, 52), (52, 53), (54, 61), (62, 64), (65, 72), - ... (72, 73), (73, 79), (79, 80), (80, 81)] + >>> expected = [(0, 4), (5, 12), (13, 17), (18, 19), (19, 23), + ... (24, 26), (27, 30), (31, 36), (38, 44), (45, 48), (49, 51), + ... (52, 55), (56, 58), (59, 64), (65, 71), (71, 72)] >>> TreebankWordTokenizer().span_tokenize(s) == expected True """