Skip to content

Commit 03ec480

Browse files
committed
Code cleanup
1 parent 45d1937 commit 03ec480

File tree

4 files changed

+86
-37
lines changed

4 files changed

+86
-37
lines changed

.travis.yml

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
language: python
2+
python:
3+
- "2.7"
4+
- "3.3"
5+
- "3.4"
6+
- "3.5"
7+
install:
8+
- pip install pocketsphinx monotonic
9+
- pip install flake8 rstcheck
10+
- pip install -e .
11+
script:
12+
- python -m unittest discover # run unit tests
13+
- flake8 --ignore=E501,E701 speech_recognition # ignore errors for long lines and multi-statement lines
14+
- rstcheck README.rst reference/*.rst # ensure RST is well-formed
15+
sudo: false # this allows TravisCI to use the fast Docker build environment rather than the slower VMs
16+
env:
17+
global:
18+
- secure: "jFHi/NK+hkf8Jw/bA06utypMRAzOcpeKPEZz/P2U79c70aIcmeAOGNUG6t5x2hmaeNpaP1STDtOLVdDawLY904rv/2sAhdMExlLUYubVQrJumvfgwyHRep0NLxrWV/Sf7y6FBPsvS0We29sn5HeEUlSzFwLrANyagpZYGeeWI3SGfdseDK/n4SlD436i7n5jM0Vlbmo07JDtdTN5Ov17APtuqy0ZViNhhTG+wvU8RCd/0/1IvstaaOhSa/82jABXNzH12hY4ynSuK75EVdVLj/WstSmH90r+8TS+YHH1D68yFeoub8kjTzZirqDuwb1s0nGOzx3VAC03+Fb48jHNfz2X0LJEj6gOpaaxgXOr4qkb1+Bx4L1bUkMk3ywjKoXFF0BU/haZfPbzG0fFUDubEXYjhC88gM1CR0LrFf4qtIqFcdM4sjasfv7TD2peiuWqVRZeHzjcvQVC8aDxVFFbTF+Cx1xZ1qLxAY5iJ/dUPWpOVcSs0GIJaJw7LQJU5uQbiU0vg17k9QcVYbASJu0cFAt/OsWGDZp/uArSWrMcSoexe8wI8/k5u9XFnOmlEu5kUJXOrZANjniUk5ilFUe+lag2Zl/ZasNtW16qke+vaWfBnpKl7NOoQemWNdYOxgyc/4x9B3x8gryf5XAmfBeqneh7k10O18u6GYpt33r0zuQ=" # encrypted version of "WIT_AI_KEY=(my key)"
19+
- secure: "NUTqadJCac2g6n44Phw6qsKSB+cGGPYDQI0nB+jJ1p+R4K7SYo46ECU0Xs+UexNwcZbmUxxAwekXyVS8Rd7GGadsM4rw0wPmZ//ul24dg+ek8/tDZ96U85yGvcSWoPTYsBbwqvKqxFfWZNLLwkdX38zIeBchDwfseVrPgFMjNuh2gOikszNbJcom23FzlOkR1kcfW2nepsLTK+u1AUB+S8FmvUXsE3oiLEA605FQu+hblXaltu1CeCGmci8cMIP7XX0VdpPlrO4kGodRvZmE/KmNt1fTjESYN1td9PZ3ZhO6ZBJf6TP0VfB6qXz/efVTdeVenxu5J9BpXvcDkEgEa/7PwQc0FeK0eaOmAUSKgjRQTjf1A3vTfvSBseDpGZasCQ3YCS2wF5qBtTBiKlzKyC6xio7fW+DEt9L3IC6gW/hzA5fiXe4ZZygofvNY4u2kVRT7C9GM7UgGwT8nSapqWdr898NfM9goPfNAV5PFsNgNGx8n7659Q4lCmymVPhxD76449kOCNlWozzdCmjnsGrK4JzleTTFXCWpsQFsahJSqQ/W6CNm667zvUnlFKs1/edPOwsnYLSnU1jtJyxVlAX4wUFUKYzYehyv49+DtX0CAMeV149hhINJPzBDc4YAsNxrLnuqjTYm555s0jDz+itmoYv3BEA4wEQp1j0EMJMM=" # encrypted version of "BING_KEY=(my key)"
20+
- secure: "JEtMaAhDglqRrHdKZapxIaY0zlCohsepgxfRckhuCB3RZljeIKjt15Q/8LzFcx0ZdQV2thOQ/2oA0WpnfTckEnh42X+Ki0AUlezjXXYII2DenCs9q7jXxuOYK5AjxcNzyfeh7NnI2R3jdAyf49FdnoOa/OdEZq7aYRouP0yZtVKK/eMueURfr7JMsTrmuYoy1LXkF/yEyxns9HiaSebn7YqeQ7cb9Q5LcSigM6kCXZrtG1K4MqWGrvnqGeabE6xoZVxkf+az6fMv91oZ4spZRfjjlFpGx050gP4SCpk8XQqVS2HAtzVSFBdnLld4ydRoGVHVMAOmvQY5xbk5y9REVj4EVdfeErOhaEz6CfFqZi9UpAS0Zza/7khGDDWkHmfg4O4CzrVLkfdcPIgIKcz9TT9zP+wPVCYmfN2Qq0XB+PJkewjmgPuWZnUyBb402iPs1hWEze8oK6Yk5K3OnBuSqeE4EtvpT/SUrLtroSNcWJJ7i585cqgNB5KwzDDKNnyn0zteQQTj+fUzrumQ+/FTYjaafOVZ6ZAiZ+xvgge0+foB94GCoV/8LUm5rVTtk8vV3c3oJu9jdzsyiOSargYPSYg7iy1kzkC/eQ12rX89EWLGjoP+mveLGBpUebQNbB8vxaVRd8uaozW/G3Vwgelqg7gzrvmwkaYK3g6a1TAVpcs=" # encrypted version of "HOUNDIFY_CLIENT_ID=(my client ID) HOUNDIFY_CLIENT_KEY=(my client key)"
21+
- secure: "uj5LUKDtf214EZPqsjpy6tk8iXEfydC3z/px98xbXa/H6PVN6wMPTHsF1DuuTWCbLrqNyi9/rMbpJFiNuqMm+q0LarrvvuTKHA9JFe/ZA11R1w3WI2ZMTvub6vzCbmcznIkjq981BjFWz5aCazPXhLt18e0iMit2FR+D6jwZ4al8TIo9i6RjkJ3MimH2/Sgm2BnXZ7qHsmDlG+4VsABiPiH0SPzrxqJJ4WSOb8EnNkNcOujiHuYvDNR+6R566bXjV1x+z2ewKb2nae5LOEl8L+6B/CsNT2cyeds2imYWAw9vTZoTajXf2u21M3pqRINQ67CuWhGFOdUXiEd6E/jTQFcsE4GuB7eMIYcHCmPzhhHn1b6XzNJtf923+YlSnayf63Y5jHjeSWSWs6pjJOUjJquuXS8vQYuJYX4n8sXDeEsZg0yD2jdxFMqMmjZoKKJzWPTPUkDTLawZdZs2q6bOF+xBQysUPozgSnxe3koCMFLeA1cU6fUkXWWIFDuAehR0JqYQHaUovoO0ZYx8Env0Ojhl6IZclONxaLVA41CbzkSUC1pg0k/VeMiv6YB2SQsFxV1riKM/OPDxq7AAuUuNVDCj/SGya4BJEYrxtagtmq0em8Q8SJzLq7IFNBNq5pO8IaqA0JO/tieSIsutrhdRzVMI35apuwbE+5jxoDmiGW0=" # encrypted version of "IBM_USERNAME=(my username) IBM_PASSWORD=(my password)"

speech_recognition/__init__.py

+14-37
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, s
665665
# generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
666666
keywords_path = os.path.join(temp_directory, "keyphrases.txt")
667667
with open(keywords_path, "w") as f:
668-
f.writelines("{} /1e{}/\n".format(keyword, 45 * sensitivity - 50) for keyword, sensitivity in keyword_entries)
668+
f.writelines("{} /1e{}/\n".format(keyword, 100 * sensitivity - 110) for keyword, sensitivity in keyword_entries)
669669

670670
# perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
671671
decoder.set_kws("keywords", keywords_path)
@@ -741,55 +741,36 @@ def recognize_google(self, audio_data, key=None, language="en-US", show_all=Fals
741741
return entry["transcript"]
742742
raise UnknownValueError() # no transcriptions available
743743

744-
def recognize_google_cloud(self, audio_data, language="en-US", filter_profanity=False, speech_context=None, show_all=False):
744+
def recognize_google_cloud(self, audio_data, language="en-US", preferred_phrases=None, show_all=False):
745745
"""
746746
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Google Cloud Speech API.
747747
748748
This requires a Google Cloud Platform account; see the `Google Cloud Speech API Quickstart <https://cloud.google.com/speech/docs/getting-started>`__ for details and instructions.
749749
750750
The recognition language is determined by ``language``, which is a BCP-47 language tag like ``"en-US"`` (US English). For more information see the `RecognitionConfig documentation <https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig>`__.
751751
752-
By default profanity will not be filtered. To filter it set ``filter_profanity`` to True.
753-
754-
To provide words and phrases likely to be used in the context specify a list of those words and phrases as ``speech_context``. See `Usage Limits <https://cloud.google.com/speech/limits#content>`__ for limitations.
752+
If ``preferred_phrases`` is a list of phrase strings, those given phrases will be more likely to be recognized over similar-sounding alternatives. This is useful for things like keyword/command recognition or adding new phrases that aren't in Google's vocabulary. Note that the API imposes certain `restrictions on the list of phrase strings <https://cloud.google.com/speech/limits#content>`__.
755753
756754
Returns the most likely transcription if ``show_all`` is False (the default). Otherwise, returns the raw API response as a JSON dictionary.
757755
758756
Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the credentials aren't valid, or if there is no Internet connection.
759757
"""
760-
if speech_context is None:
761-
speech_context = []
762-
763758
assert isinstance(audio_data, AudioData), "`audio_data` must be audio data"
764759
assert isinstance(language, str), "`language` must be a string"
765-
assert isinstance(filter_profanity, bool), "`filter_profanity` must be a bool"
766-
assert isinstance(speech_context, list), "`speech_context` must be a list"
760+
assert preferred_phrases is None or all(isinstance(preferred_phrases, str) for preferred_phrases in preferred_phrases), "`preferred_phrases` must be a list of strings"
767761

768762
# See https://cloud.google.com/speech/reference/rest/v1beta1/RecognitionConfig
769763
flac_data = audio_data.get_flac_data(
770-
# Audio samples must be at least 8 kHz and at most 48 kHz. Do not
771-
# convert if in the range; if outside of it convert to clamped to
772-
# that range.
773-
convert_rate=None if 8000 <= audio_data.sample_rate <= 48000
774-
else max(8000, min(audio_data.sample_rate, 48000)),
764+
convert_rate=None if 8000 <= audio_data.sample_rate <= 48000 else max(8000, min(audio_data.sample_rate, 48000)), # audio sample rate must be between 8 kHz and 48 kHz inclusive - clamp sample rate into this range
775765
convert_width=2 # audio samples must be 16-bit
776766
)
777767

778768
speech_service = self.get_speech_service()
779-
request = speech_service.speech().syncrecognize(body={
780-
"audio": {
781-
"content": base64.b64encode(flac_data).decode("utf8"),
782-
},
783-
"config": {
784-
"encoding": "FLAC",
785-
"sampleRate": audio_data.sample_rate,
786-
"languageCode": language,
787-
"profanityFilter": filter_profanity,
788-
"speechContext": {
789-
"phrases": speech_context,
790-
},
791-
},
792-
})
769+
if preferred_phrases is None:
770+
speech_config = {"encoding": "FLAC", "sampleRate": audio_data.sample_rate, "languageCode": language}
771+
else:
772+
speech_config = {"encoding": "FLAC", "sampleRate": audio_data.sample_rate, "languageCode": language, "speechContext": {"phrases": preferred_phrases}}
773+
request = speech_service.speech().syncrecognize(body={"audio": {"content": base64.b64encode(flac_data).decode("utf8")}, "config": speech_config})
793774

794775
import googleapiclient.errors
795776
try:
@@ -799,15 +780,11 @@ def recognize_google_cloud(self, audio_data, language="en-US", filter_profanity=
799780
except URLError as e:
800781
raise RequestError("recognition connection failed: {0}".format(e.reason))
801782

802-
if show_all:
803-
return response
804-
805-
if "results" not in response or len(response["results"]) == 0:
806-
raise UnknownValueError()
807-
783+
if show_all: return response
784+
if "results" not in response or len(response["results"]) == 0: raise UnknownValueError()
808785
transcript = ""
809786
for result in response["results"]:
810-
transcript += result['alternatives'][0]["transcript"].strip() + " "
787+
transcript += result["alternatives"][0]["transcript"].strip() + " "
811788

812789
return transcript
813790

@@ -1059,7 +1036,7 @@ def get_flac_converter():
10591036
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
10601037
flac_converter = shutil_which("flac") # check for installed version first
10611038
if flac_converter is None: # flac utility is not installed
1062-
compatible_machine_types = ["i686", "i786", "x86", "x86_64", "AMD64"] # whitelist of machine types our bundled binaries are compatible with
1039+
compatible_machine_types = {"i686", "i786", "x86", "x86_64", "AMD64"} # whitelist of machine types our bundled binaries are compatible with
10631040
flac_converters = {"Windows": "flac-win32.exe", "Linux": "flac-linux-x86", "Darwin": "flac-mac"}
10641041
flac_converter = flac_converters.get(platform.system(), None)
10651042
if flac_converter is not None and platform.machine() in compatible_machine_types:

tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# placeholder file to make this folder a module - this allows tests in this folder to be discovered by `python -m unittest discover`

tests/test_recognition.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
3+
import os
4+
import unittest
5+
6+
import speech_recognition as sr
7+
8+
class TestRecognition(unittest.TestCase):
9+
def setUp(self):
10+
self.WIT_AI_KEY = os.environ["WIT_AI_KEY"]
11+
self.BING_KEY = os.environ["BING_KEY"]
12+
self.HOUNDIFY_CLIENT_ID = os.environ["HOUNDIFY_CLIENT_ID"]
13+
self.HOUNDIFY_CLIENT_KEY = os.environ["HOUNDIFY_CLIENT_KEY"]
14+
self.IBM_USERNAME = os.environ["IBM_USERNAME"]
15+
self.IBM_PASSWORD = os.environ["IBM_PASSWORD"]
16+
17+
self.AUDIO_FILE_EN = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "examples", "english.wav")
18+
19+
def test_sphinx(self):
20+
r = sr.Recognizer()
21+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
22+
self.assertEqual(r.recognize_sphinx(audio), "wanted to three")
23+
24+
def test_google(self):
25+
r = sr.Recognizer()
26+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
27+
self.assertEqual(r.recognize_google(audio), "one two three")
28+
29+
def test_wit(self):
30+
r = sr.Recognizer()
31+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
32+
self.assertEqual(r.recognize_wit(audio, key=self.WIT_AI_KEY), "one two three")
33+
34+
def test_bing(self):
35+
r = sr.Recognizer()
36+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
37+
self.assertEqual(r.recognize_bing(audio, key=self.BING_KEY), "one two three")
38+
39+
def test_houndify(self):
40+
r = sr.Recognizer()
41+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
42+
self.assertEqual(r.recognize_houndify(audio, client_id=self.HOUNDIFY_CLIENT_ID, client_key=self.HOUNDIFY_CLIENT_KEY), "one two three")
43+
44+
def test_ibm(self):
45+
r = sr.Recognizer()
46+
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
47+
self.assertEqual(r.recognize_ibm(audio, username=self.IBM_USERNAME, password=self.IBM_PASSWORD), "one two three ")
48+
49+
if __name__ == "__main__":
50+
unittest.main()

0 commit comments

Comments
 (0)