Skip to content

Commit

Permalink
Merge pull request nltk#2555 from palasso/optimize-lm-speed
Browse files Browse the repository at this point in the history
Reduce inference time for some ngram language models
  • Loading branch information
stevenbird authored Jun 15, 2020
2 parents cec5f41 + 4f97129 commit 88c437b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@
- Alexandre H. T. Dias <https://github.com/alexandredias3d>
- Jacob Weightman <https://github.com/jacobdweightman>
- Bonifacio de Oliveira <https://github.com/Bonifacio2>
- Vassilis Palassopoulos <https://github.com/palasso>

## Others whose work we've taken and included in NLTK, but who didn't directly contribute it:
### Contributors to the Porter Stemmer
Expand Down
12 changes: 5 additions & 7 deletions nltk/lm/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,16 @@ def __init__(self, counts=None, unk_cutoff=1, unk_label="<UNK>"):
:param unk_label: Label for marking words not part of vocabulary.
"""
if isinstance(counts, Counter):
self.counts = counts
else:
self.counts = Counter()
if isinstance(counts, Iterable):
self.counts.update(counts)
self.unk_label = unk_label
if unk_cutoff < 1:
raise ValueError(
"Cutoff value cannot be less than 1. Got: {0}".format(unk_cutoff)
)
self._cutoff = unk_cutoff

self.counts = Counter()
self.update(counts if counts is not None else "")

@property
def cutoff(self):
"""Cutoff value.
Expand All @@ -165,6 +162,7 @@ def update(self, *counter_args, **counter_kwargs):
"""
self.counts.update(*counter_args, **counter_kwargs)
self._len = sum(1 for _ in self)

def lookup(self, words):
"""Look up one or more words in the vocabulary.
Expand Down Expand Up @@ -208,7 +206,7 @@ def __iter__(self):

def __len__(self):
"""Computing size of vocabulary reflects the cutoff."""
return sum(1 for _ in self)
return self._len

def __eq__(self, other):
return (
Expand Down

0 comments on commit 88c437b

Please sign in to comment.