Skip to content

Commit

Permalink
Merge pull request snipsco#473 from snipsco/hotfix/german-improvments
Browse files Browse the repository at this point in the history
Hotfix/german improvments
  • Loading branch information
Adrien Ball authored Jan 26, 2018
2 parents 35da7f9 + f6967af commit 893c54f
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"sklearn-crfsuite==0.3.5",
"builtin_entities_ontology==0.5.1",
"semantic_version==2.6.0",
"rustling==8.1",
"rustling==8.4",
"nlu_utils==0.5.0",
"num2words==0.5.5"
]
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.10.0
0.10.1
24 changes: 24 additions & 0 deletions snips_nlu/slot_filler/de/specific_features_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,28 @@ def language_specific_features(dataset, intent, config):
use_stemming=True)

features += default_shape_ngram_features(language)

features += [
{
"factory_name": "get_prefix_fn",
"args": {"prefix_size": 2},
"offsets": [0]
},
{
"factory_name": "get_prefix_fn",
"args": {"prefix_size": 5},
"offsets": [0]
},
{
"factory_name": "get_suffix_fn",
"args": {"suffix_size": 2},
"offsets": [0]
},
{
"factory_name": "get_suffix_fn",
"args": {"suffix_size": 3},
"offsets": [0]
}
]

return features
9 changes: 7 additions & 2 deletions snips_nlu/string_variations.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ def get_string_variations(string, language):
flatten(punctuation_variations(v, language) for v in variations))
variations.update(
flatten(numbers_variations(v, language) for v in variations))
variations = set(language.default_sep.join(tokenize_light(v, language))
for v in variations)
# Filter double spaces
variations = set(" ".join(v.split()) for v in variations)
# Add tokenized variations
tokenized_variations = set(
language.default_sep.join(tokenize_light(v, language)) for v in
variations)
variations.update(tokenized_variations)
return variations
16 changes: 16 additions & 0 deletions snips_nlu/tests/test_string_variations.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,22 @@ def test_get_string_variations(self):
}
self.assertItemsEqual(variations, expected_variations)

def test_get_france_24(self):
# Given
language = Language.FR
string = "france 24"

# When
variations = get_string_variations(string, language)

# Then
expected_variations = {
"france vingt-quatre",
"france vingt quatre",
"france 24"
}
self.assertItemsEqual(variations, expected_variations)

def test_numbers_variations_should_handle_floats(self):
# Given
language = Language.EN
Expand Down

0 comments on commit 893c54f

Please sign in to comment.