Skip to content

Commit 19dc36e

Browse files
authored
Merge pull request Uberi#296 from chriamue/tensorflow
Tensorflow
2 parents c932096 + 0a7bf7c commit 19dc36e

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

examples/tensorflow_commands.py

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env python3
2+
import time
3+
import speech_recognition as sr
4+
from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio # noqa
5+
6+
# obtain audio from the microphone
7+
r = sr.Recognizer()
8+
m = sr.Microphone()
9+
10+
with m as source:
11+
r.adjust_for_ambient_noise(source)
12+
13+
14+
def callback(recognizer, audio):
15+
try:
16+
# You can download the data here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
17+
spoken = recognizer.recognize_tensorflow(audio, tensor_graph='speech_recognition/tensorflow-data/conv_actions_frozen.pb', tensor_label='speech_recognition/tensorflow-data/conv_actions_labels.txt')
18+
print(spoken)
19+
except sr.UnknownValueError:
20+
print("Tensorflow could not understand audio")
21+
except sr.RequestError as e:
22+
print("Could not request results from Tensorflow service; {0}".format(e))
23+
24+
25+
stop_listening = r.listen_in_background(m, callback, phrase_time_limit=0.6)
26+
time.sleep(100)

speech_recognition/__init__.py

+47
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,53 @@ def recognize_ibm(self, audio_data, username, password, language="en-US", show_a
12141214
transcription.append(hypothesis["transcript"])
12151215
return "\n".join(transcription)
12161216

1217+
lasttfgraph = ''
1218+
tflabels = None
1219+
1220+
def recognize_tensorflow(self, audio_data, tensor_graph='tensorflow-data/conv_actions_frozen.pb', tensor_label='tensorflow-data/conv_actions_labels.txt'):
1221+
"""
1222+
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).
1223+
1224+
Path to Tensor loaded from ``tensor_graph``. You can download a model here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
1225+
1226+
Path to Tensor Labels file loaded from ``tensor_label``.
1227+
"""
1228+
assert isinstance(audio_data, AudioData), "Data must be audio data"
1229+
assert isinstance(tensor_graph, str), "``tensor_graph`` must be a string"
1230+
assert isinstance(tensor_label, str), "``tensor_label`` must be a string"
1231+
1232+
try:
1233+
import tensorflow as tf
1234+
except ImportError:
1235+
raise RequestError("missing tensorflow module: ensure that tensorflow is set up correctly.")
1236+
1237+
if not (tensor_graph == self.lasttfgraph):
1238+
self.lasttfgraph = tensor_graph
1239+
1240+
# load graph
1241+
with tf.gfile.FastGFile(tensor_graph, 'rb') as f:
1242+
graph_def = tf.GraphDef()
1243+
graph_def.ParseFromString(f.read())
1244+
tf.import_graph_def(graph_def, name='')
1245+
# load labels
1246+
self.tflabels = [line.rstrip() for line in tf.gfile.GFile(tensor_label)]
1247+
1248+
wav_data = audio_data.get_wav_data(
1249+
convert_rate=16000, convert_width=2
1250+
)
1251+
1252+
with tf.Session() as sess:
1253+
input_layer_name = 'wav_data:0'
1254+
output_layer_name = 'labels_softmax:0'
1255+
softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
1256+
predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data})
1257+
1258+
# Sort labels in order of confidence
1259+
top_k = predictions.argsort()[-1:][::-1]
1260+
for node_id in top_k:
1261+
human_string = self.tflabels[node_id]
1262+
return human_string
1263+
12171264

12181265
def get_flac_converter():
12191266
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""

0 commit comments

Comments
 (0)