Skip to content

Commit

Permalink
LazyIteratorList in nltk.collections throws StopIteration exception w…
Browse files Browse the repository at this point in the history
…hen getting list length (nltk#2617)

* Remove unnecessary increment

* Prevent looping until StopIteration

When StopIteration is encountered, simply return. This prevents calling `len()` on a LazyIteratorList to throw a StopIteration exception.

* Replace unused for loop variables with _

* Fixed typo in assertion

* Added doctest for changes

* Removed unnecessary exception variable
  • Loading branch information
tomaarsen authored Nov 23, 2020
1 parent 2c63668 commit 5b07fc4
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
18 changes: 10 additions & 8 deletions nltk/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def __init__(self, list_of_lists):

def __len__(self):
if len(self._offsets) <= len(self._list):
for tok in self.iterate_from(self._offsets[-1]):
for _ in self.iterate_from(self._offsets[-1]):
pass
return self._offsets[-1]

Expand All @@ -327,7 +327,7 @@ def iterate_from(self, start_index):
if sublist_index == (len(self._offsets) - 1):
assert (
index + len(sublist) >= self._offsets[-1]
), "offests not monotonic increasing!"
), "offsets not monotonic increasing!"
self._offsets.append(index + len(sublist))
else:
assert self._offsets[sublist_index + 1] == index + len(
Expand Down Expand Up @@ -580,7 +580,7 @@ def __init__(self, it, known_len=None):
def __len__(self):
if self._len:
return self._len
for x in self.iterate_from(len(self._cache)):
for _ in self.iterate_from(len(self._cache)):
pass
self._len = len(self._cache)
return self._len
Expand All @@ -594,11 +594,13 @@ def iterate_from(self, start):
while i < len(self._cache):
yield self._cache[i]
i += 1
while True:
v = next(self._it)
self._cache.append(v)
yield v
i += 1
try:
while True:
v = next(self._it)
self._cache.append(v)
yield v
except StopIteration:
pass

def __add__(self, other):
"""Return a list concatenating self with other."""
Expand Down
13 changes: 12 additions & 1 deletion nltk/test/collections.doctest
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,15 @@ Trie can be pickled:
>>> trie = nltk.collections.Trie(['a'])
>>> s = pickle.dumps(trie)
>>> pickle.loads(s)
{'a': {True: None}}
{'a': {True: None}}

LazyIteratorList
----------------

Fetching the length of a LazyIteratorList object does not throw a StopIteration exception:

>>> lil = LazyIteratorList(i for i in range(1, 11))
>>> lil[-1]
10
>>> len(lil)
10

0 comments on commit 5b07fc4

Please sign in to comment.