@@ -1214,6 +1214,53 @@ def recognize_ibm(self, audio_data, username, password, language="en-US", show_a
1214
1214
transcription .append (hypothesis ["transcript" ])
1215
1215
return "\n " .join (transcription )
1216
1216
1217
+ lasttfgraph = ''
1218
+ tflabels = None
1219
+
1220
+ def recognize_tensorflow (self , audio_data , tensor_graph = 'tensorflow-data/conv_actions_frozen.pb' , tensor_label = 'tensorflow-data/conv_actions_labels.txt' ):
1221
+ """
1222
+ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).
1223
+
1224
+ Path to Tensor loaded from ``tensor_graph``. You can download a model here: http://download.tensorflow.org/models/speech_commands_v0.01.zip
1225
+
1226
+ Path to Tensor Labels file loaded from ``tensor_label``.
1227
+ """
1228
+ assert isinstance (audio_data , AudioData ), "Data must be audio data"
1229
+ assert isinstance (tensor_graph , str ), "``tensor_graph`` must be a string"
1230
+ assert isinstance (tensor_label , str ), "``tensor_label`` must be a string"
1231
+
1232
+ try :
1233
+ import tensorflow as tf
1234
+ except ImportError :
1235
+ raise RequestError ("missing tensorflow module: ensure that tensorflow is set up correctly." )
1236
+
1237
+ if not (tensor_graph == self .lasttfgraph ):
1238
+ self .lasttfgraph = tensor_graph
1239
+
1240
+ # load graph
1241
+ with tf .gfile .FastGFile (tensor_graph , 'rb' ) as f :
1242
+ graph_def = tf .GraphDef ()
1243
+ graph_def .ParseFromString (f .read ())
1244
+ tf .import_graph_def (graph_def , name = '' )
1245
+ # load labels
1246
+ self .tflabels = [line .rstrip () for line in tf .gfile .GFile (tensor_label )]
1247
+
1248
+ wav_data = audio_data .get_wav_data (
1249
+ convert_rate = 16000 , convert_width = 2
1250
+ )
1251
+
1252
+ with tf .Session () as sess :
1253
+ input_layer_name = 'wav_data:0'
1254
+ output_layer_name = 'labels_softmax:0'
1255
+ softmax_tensor = sess .graph .get_tensor_by_name (output_layer_name )
1256
+ predictions , = sess .run (softmax_tensor , {input_layer_name : wav_data })
1257
+
1258
+ # Sort labels in order of confidence
1259
+ top_k = predictions .argsort ()[- 1 :][::- 1 ]
1260
+ for node_id in top_k :
1261
+ human_string = self .tflabels [node_id ]
1262
+ return human_string
1263
+
1217
1264
1218
1265
def get_flac_converter ():
1219
1266
"""Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
0 commit comments