From 860c494a97c73ee3881a6c57e6468ad888778ad5 Mon Sep 17 00:00:00 2001 From: Eric Kafe Date: Thu, 12 Nov 2020 08:30:02 +0000 Subject: [PATCH] Fixes Issue #2420 (WordNet sense keys mismatch) (#2621) --- nltk/corpus/reader/wordnet.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/nltk/corpus/reader/wordnet.py b/nltk/corpus/reader/wordnet.py index 6b9d4a904e..cbb24dd252 100644 --- a/nltk/corpus/reader/wordnet.py +++ b/nltk/corpus/reader/wordnet.py @@ -1532,26 +1532,17 @@ def synset_from_sense_key(self, sense_key): Only used if sense is in an adjective satellite synset head_id: uniquely identifies sense in a lexicographer file when paired with head_word Only used if head_word is present (2 digit int) + + >>> import nltk + >>> from nltk.corpus import wordnet as wn + >>> print(wn.synset_from_sense_key("drive%1:04:03::")) + Synset('drive.n.06') + + >>> print(wn.synset_from_sense_key("driving%1:04:03::")) + Synset('drive.n.06') """ - sense_key_regex = re.compile(r"(.*)\%(.*):(.*):(.*):(.*):(.*)") - synset_types = {1: NOUN, 2: VERB, 3: ADJ, 4: ADV, 5: ADJ_SAT} - lemma, ss_type, _, lex_id, _, _ = sense_key_regex.match(sense_key).groups() - - # check that information extracted from sense_key is valid - error = None - if not lemma: - error = "lemma" - elif int(ss_type) not in synset_types: - error = "ss_type" - elif int(lex_id) < 0 or int(lex_id) > 99: - error = "lex_id" - if error: - raise WordNetError( - "valid {} could not be extracted from the sense key".format(error) - ) + return self.lemma_from_key(sense_key).synset() - synset_id = ".".join([lemma, synset_types[int(ss_type)], lex_id]) - return self.synset(synset_id) ############################################################# # Retrieve synsets and lemmas.