Skip to content

Commit

Permalink
fix for piskvorky#520: raise KeyError when no matching doctag
Browse files Browse the repository at this point in the history
  • Loading branch information
gojomo committed Jan 12, 2016
1 parent 9ac47e2 commit 2e53063
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,12 @@ def trained_item(self, indexed_tuple):
returned by indexed_doctags()); a no-op for this implementation"""
pass

def _int_index(self, index, missing=None):
def _int_index(self, index):
"""Return int index for either string or int index"""
if isinstance(index, int):
return index
else:
return self.max_rawint + 1 + self.doctags[index].offset if index in self.doctags else missing
return self.max_rawint + 1 + self.doctags[index].offset

def _key_index(self, i_index, missing=None):
"""Return string index for given int index, if available"""
Expand Down
10 changes: 10 additions & 0 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ def test_int_doctags(self):
self.assertEqual(model.docvecs[0].shape, (300,))
self.assertRaises(KeyError, model.__getitem__, '_*0')

def test_missing_string_doctag(self):
"""Test doc2vec doctag alternatives"""
corpus = list(DocsLeeCorpus(True))
# force duplicated tags
corpus = corpus[0:10] + corpus

model = doc2vec.Doc2Vec(min_count=1)
model.build_vocab(corpus)
self.assertRaises(KeyError, model.docvecs.__getitem__, 'not_a_tag')

def test_string_doctags(self):
"""Test doc2vec doctag alternatives"""
corpus = list(DocsLeeCorpus(True))
Expand Down

0 comments on commit 2e53063

Please sign in to comment.