Compute correct features length in preprocess.py

icaas · Nov 3, 2018 · 1d55c94 · 1d55c94
1 parent f5e7dfc
commit 1d55c94
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 6 deletions.
diff --git a/util/feeding.py b/util/feeding.py
@@ -134,10 +134,7 @@ def _populate_batch_queue(self, session, coord):
         index = -1
         while not coord.should_stop():
             index = self._data_set.next_index(index) % file_count
-            features, _, transcript, transcript_len = self._data_set.data.iloc[index]
-
-            # One stride per time step in the input
-            num_strides = len(features) - (self._model_feeder.numcontext * 2)
+            features, num_strides, transcript, transcript_len = self._data_set.data.iloc[index]
 
             # Create a view into the array with overlapping strides of size
             # numcontext (past) + 1 (present) + numcontext (future)

diff --git a/util/preprocess.py b/util/preprocess.py
@@ -19,12 +19,13 @@ def process_single_file(row, numcep, numcontext, alphabet):
     # row = index, Series
     _, file = row
     features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext)
+    features_len = len(features) - 2*numcontext
     transcript = text_to_char_array(file.transcript, alphabet)
 
-    if (2*numcontext + len(features)) < len(transcript):
+    if features_len < len(transcript):
         raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename))
 
-    return features, len(features), transcript, len(transcript)
+    return features, features_len, transcript, len(transcript)
 
 
 # load samples from CSV, compute features, optionally cache results on disk