Skip to content

Commit ea94f8c

Browse files
committed
Update everything
1 parent 5c82511 commit ea94f8c

10 files changed

+165
-29
lines changed

LICENSE.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2014-2015, Anthony Zhang <[email protected]>
1+
Copyright (c) 2014-2016, Anthony Zhang <[email protected]>
22
All rights reserved.
33

44
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

README.rst

+106-8
Large diffs are not rendered by default.

examples/chinese.wav

167 KB
Binary file not shown.
File renamed without changes.

examples/extended_results.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@
22

33
import speech_recognition as sr
44

5-
# obtain path to "test.wav" in the same folder as this script
5+
# obtain path to "english.wav" in the same folder as this script
66
from os import path
7-
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), "test.wav")
7+
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), "english.wav")
88

9-
# use "test.wav" as the audio source
9+
# use "english.wav" as the audio source
1010
r = sr.Recognizer()
1111
with sr.WavFile(WAV_FILE) as source:
1212
audio = r.record(source) # read the entire WAV file
1313

14+
# recognize speech using Sphinx
15+
try:
16+
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
17+
except sr.UnknownValueError:
18+
print("Sphinx could not understand audio")
19+
except sr.RequestError as e:
20+
print("Sphinx error; {0}".format(e))
21+
1422
# recognize speech using Google Speech Recognition
1523
try:
1624
# for testing purposes, we're just using the default API key

examples/french.wav

406 KB
Binary file not shown.

examples/microphone_recognition.py

+8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@
1010
print("Say something!")
1111
audio = r.listen(source)
1212

13+
# recognize speech using Sphinx
14+
try:
15+
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
16+
except sr.UnknownValueError:
17+
print("Sphinx could not understand audio")
18+
except sr.RequestError as e:
19+
print("Sphinx error; {0}".format(e))
20+
1321
# recognize speech using Google Speech Recognition
1422
try:
1523
# for testing purposes, we're just using the default API key

examples/wav_transcribe.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@
22

33
import speech_recognition as sr
44

5-
# obtain path to "test.wav" in the same folder as this script
5+
# obtain path to "english.wav" in the same folder as this script
66
from os import path
7-
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), "test.wav")
7+
WAV_FILE = path.join(path.dirname(path.realpath(__file__)), "english.wav")
88

9-
# use "test.wav" as the audio source
9+
# use "english.wav" as the audio source
1010
r = sr.Recognizer()
1111
with sr.WavFile(WAV_FILE) as source:
1212
audio = r.record(source) # read the entire WAV file
1313

14+
# recognize speech using Sphinx
15+
try:
16+
print("Sphinx thinks you said " + r.recognize_sphinx(audio))
17+
except sr.UnknownValueError:
18+
print("Sphinx could not understand audio")
19+
except sr.RequestError as e:
20+
print("Sphinx error; {0}".format(e))
21+
1422
# recognize speech using Google Speech Recognition
1523
try:
1624
# for testing purposes, we're just using the default API key

setup.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,35 @@
11
#!/usr/bin/env python3
22

3+
import sys, os, stat
4+
35
from setuptools import setup
6+
from setuptools.command.install import install
7+
from distutils import log
8+
9+
import speech_recognition
410

5-
import sys
611
if sys.version_info < (2, 6):
712
print("THIS MODULE REQUIRES PYTHON 2.6, 2.7, OR 3.3+. YOU ARE CURRENTLY USING PYTHON {0}".format(sys.version))
813
sys.exit(1)
914

10-
import speech_recognition
15+
FILES_TO_MARK_EXECUTABLE = ["flac-linux-i386", "flac-mac", "flac-win32.exe"]
16+
class InstallWithExtraSteps(install):
17+
def run(self):
18+
install.run(self) # do the original install steps
19+
20+
# mark the FLAC executables as executable by all users (this fixes occasional issues when file permissions get messed up)
21+
for output_path in self.get_outputs():
22+
if os.path.basename(output_path) in FILES_TO_MARK_EXECUTABLE:
23+
log.info("setting executable permissions on {}".format(output_path))
24+
stat_info = os.stat(output_path)
25+
os.chmod(output_path, stat_info.st_mode | stat.S_IEXEC)
1126

1227
setup(
1328
name = "SpeechRecognition",
1429
version = speech_recognition.__version__,
1530
packages = ["speech_recognition"],
1631
include_package_data = True,
32+
cmdclass = {"install": InstallWithExtraSteps},
1733

1834
# PyPI metadata
1935
author = speech_recognition.__author__,

speech_recognition/__init__.py

+10-12
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
"""Library for performing speech recognition with support for Google Speech Recognition, Wit.ai, IBM Speech to Text, and AT&T Speech to Text."""
44

5-
#wip: provide binaries for PocketSphinx on Windows, or see if the 0.0.5 binaries will work
6-
75
__author__ = "Anthony Zhang (Uberi)"
8-
__version__ = "3.1.3"
6+
__version__ = "4.0.0"
97
__license__ = "BSD"
108

119
import io, os, subprocess, wave, base64
@@ -401,7 +399,7 @@ def threaded_listen():
401399
if running[0]: callback(self, audio)
402400
def stopper():
403401
running[0] = False
404-
listener_thread.join() # block until the background thread is done
402+
listener_thread.join() # block until the background thread is done, which can be up to 1 second
405403
listener_thread = threading.Thread(target=threaded_listen)
406404
listener_thread.daemon = True
407405
listener_thread.start()
@@ -411,14 +409,14 @@ def recognize_sphinx(self, audio_data, language = "en-US", show_all = False):
411409
"""
412410
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.
413411
414-
The recognition language is determined by ``language``, an IETF language tag like ``"en-US"`` or ``"en-GB"``, defaulting to US English. A list of supported language codes can be found `here <http://stackoverflow.com/questions/14257598/>`__. Basically, language codes can be just the language (``en``), or a language with a dialect (``en-US``). ;wip
412+
The recognition language is determined by ``language``, an IETF language tag like ``"en-US"`` or ``"en-GB"``, defaulting to US English. By default, only ``en-US`` is supported. Additional languages can be installed from ;wip
415413
416414
Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Hypothesis`` object generated by Sphinx.
417415
418416
Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if there are any issues with the Sphinx installation.
419417
"""
420418
assert isinstance(audio_data, AudioData), "`audio_data` must be audio data"
421-
assert isinstance(language, str), "`language` must be a string" #wip: do this properly
419+
assert isinstance(language, str), "`language` must be a string"
422420

423421
# import the PocketSphinx speech recognition module
424422
try:
@@ -429,27 +427,27 @@ def recognize_sphinx(self, audio_data, language = "en-US", show_all = False):
429427

430428
language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
431429
if not os.path.isdir(language_directory):
432-
raise RequestError("missing PocketSphinx model directory: \"{}\"".format(language_directory))
430+
raise RequestError("missing PocketSphinx language data directory: \"{0}\"".format(language_directory))
433431
acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
434432
if not os.path.isdir(acoustic_parameters_directory):
435-
raise RequestError("missing PocketSphinx model parameters directory: \"{}\"".format(acoustic_parameters_directory))
433+
raise RequestError("missing PocketSphinx language model parameters directory: \"{0}\"".format(acoustic_parameters_directory))
436434
language_model_file = os.path.join(language_directory, "language-model.lm.bin")
437435
if not os.path.isfile(language_model_file):
438-
raise RequestError("missing PocketSphinx language model file: \"{}\"".format(language_model_file))
436+
raise RequestError("missing PocketSphinx language model file: \"{0}\"".format(language_model_file))
439437
phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
440438
if not os.path.isfile(phoneme_dictionary_file):
441-
raise RequestError("missing PocketSphinx phoneme dictionary file: \"{}\"".format(phoneme_dictionary_file))
439+
raise RequestError("missing PocketSphinx phoneme dictionary file: \"{0}\"".format(phoneme_dictionary_file))
442440

443441
# create decoder object
444442
config = pocketsphinx.Decoder.default_config()
445-
config.set_string("-hmm", acoustic_parameters_directory)
443+
config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files
446444
config.set_string("-lm", language_model_file)
447445
config.set_string("-dict", phoneme_dictionary_file)
448446
config.set_string("-logfn", os.devnull) # disable logging (logging causes unwanted output in terminal)
449447
decoder = pocketsphinx.Decoder(config)
450448

451449
# obtain audio data
452-
raw_data = audio_data.get_raw_data(convert_rate = 16000, convert_width = 2) # Sphinx requires audio to be 16-bit mono 16 kHz in little-endian format
450+
raw_data = audio_data.get_raw_data(convert_rate = 16000, convert_width = 2) # the included language models require audio to be 16-bit mono 16 kHz in little-endian format
453451

454452
# obtain recognition results
455453
decoder.start_utt() # begin utterance processing

0 commit comments

Comments
 (0)