Skip to content

Commit

Permalink
Added "confo" parameter to recognize_sphinx to enable customization o…
Browse files Browse the repository at this point in the history
…f the location of the various language files used by pocketsphinx
  • Loading branch information
frawau committed Oct 29, 2017
1 parent be9974d commit 1f5bee2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 16 deletions.
5 changes: 4 additions & 1 deletion reference/library-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ Phrase recognition uses the exact same mechanism as ``recognizer_instance.listen

The ``callback`` parameter is a function that should accept two parameters - the ``recognizer_instance``, and an ``AudioData`` instance representing the captured audio. Note that ``callback`` function will be called from a non-main thread.

``recognizer_instance.recognize_sphinx(audio_data, language = "en-US", keyword_entries = None, grammar= None, show_all = False)``
``recognizer_instance.recognize_sphinx(audio_data, language = "en-US", keyword_entries = None, grammar= None, show_all = False, config= {})``
---------------------------------------------------------------------------------------------------------------------------------

Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.
Expand All @@ -185,6 +185,9 @@ The recognition language is determined by ``language``, an IETF language tag lik

If specified, the keywords to search for are determined by ``keyword_entries``, an iterable of tuples of the form ``(keyword, sensitivity)``, where ``keyword`` is a phrase, and ``sensitivity`` is how sensitive to this phrase the recognizer should be, on a scale of 0 (very insensitive, more false negatives) to 1 (very sensitive, more false positives) inclusive. If not specified or ``None``, no keywords are used and Sphinx will simply transcribe whatever words it recognizes. Specifying ``keyword_entries`` is more accurate than just looking for those same keywords in non-keyword-based transcriptions, because Sphinx knows specifically what sounds to look for.

If specified, config is a dictionary that can contain the following keys: language_directory, acoustic_parameters_directory, language_model_file and phoneme_dictionary_file. If set,
their value will be used instead of the preset value. Any other key will be ignored.

Sphinx can also handle FSG or JSGF grammars. The parameter ``grammar`` expects a path to the grammar file. Note that if a JSGF grammar is passed, an FSG grammar will be created at the same location to speed up execution in the next run. If ``keyword_entries`` are passed, content of ``grammar`` will be ignored.

Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Hypothesis`` object generated by Sphinx.
Expand Down
36 changes: 21 additions & 15 deletions speech_recognition/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ def stopper():
listener_thread.start()
return stopper

def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False):
def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False, config={}):
"""
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.
Expand All @@ -747,13 +747,17 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, g
Sphinx can also handle FSG or JSGF grammars. The parameter ``grammar`` expects a path to the grammar file. Note that if a JSGF grammar is passed, an FSG grammar will be created at the same location to speed up execution in the next run. If ``keyword_entries`` are passed, content of ``grammar`` will be ignored.
config is a dictionary that can contain the following keys: language_directory, acoustic_parameters_directory, language_model_file and phoneme_dictionary_file. If set,
their value will be used instead of the preset value. Any other key will be ignored.
Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Decoder`` object resulting from the recognition.
Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if there are any issues with the Sphinx installation.
"""
assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data"
assert isinstance(language, str), "``language`` must be a string"
assert keyword_entries is None or all(isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1"
assert isinstance(config, dict), "``config` must be a dictionary"

# import the PocketSphinx speech recognition module
try:
Expand All @@ -765,28 +769,30 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, g
raise RequestError("bad PocketSphinx installation; try reinstalling PocketSphinx version 0.0.9 or better.")
if not hasattr(pocketsphinx, "Decoder") or not hasattr(pocketsphinx.Decoder, "default_config"):
raise RequestError("outdated PocketSphinx installation; ensure you have PocketSphinx version 0.0.9 or better.")

language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
if "language_directory" in config:
language_directory = config["language_directory"]
else:
language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
if not os.path.isdir(language_directory):
raise RequestError("missing PocketSphinx language data directory: \"{}\"".format(language_directory))
if default_acoustic_parameters_directory:
acoustic_parameters_directory = default_acoustic_parameters_directory
if "acoustic_parameters_directory" in config:
acoustic_parameters_directory = config["acoustic_parameters_directory"]
else:
acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
if not os.path.isdir(acoustic_parameters_directory):
raise RequestError("missing PocketSphinx language model parameters directory: \"{}\"".format(acoustic_parameters_directory))
if default_language_model_file:
language_model_file = default_language_model_file
if not os.path.isdir(acoustic_parameters_directory):
raise RequestError("missing PocketSphinx language model parameters directory: \"{}\"".format(acoustic_parameters_directory))
if "language_model_file" in config:
language_model_file = config["language_model_file"]
else:
language_model_file = os.path.join(language_directory, "language-model.lm.bin")
if not os.path.isfile(language_model_file):
raise RequestError("missing PocketSphinx language model file: \"{}\"".format(language_model_file))
if default_phoneme_dictionary_file:
phoneme_dictionary_file = default_phoneme_dictionary_file
if not os.path.isfile(language_model_file):
raise RequestError("missing PocketSphinx language model file: \"{}\"".format(language_model_file))
if "phoneme_dictionary_file" in config:
phoneme_dictionary_file = config["phoneme_dictionary_file"]
else:
phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
if not os.path.isfile(phoneme_dictionary_file):
raise RequestError("missing PocketSphinx phoneme dictionary file: \"{}\"".format(phoneme_dictionary_file))
if not os.path.isfile(phoneme_dictionary_file):
raise RequestError("missing PocketSphinx phoneme dictionary file: \"{}\"".format(phoneme_dictionary_file))

# create decoder object
config = pocketsphinx.Decoder.default_config()
Expand Down

0 comments on commit 1f5bee2

Please sign in to comment.