Skip to content

Commit

Permalink
Compute correct features length in preprocess.py
Browse files Browse the repository at this point in the history
  • Loading branch information
reuben committed Nov 3, 2018
1 parent f5e7dfc commit 1d55c94
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 6 deletions.
5 changes: 1 addition & 4 deletions util/feeding.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,7 @@ def _populate_batch_queue(self, session, coord):
index = -1
while not coord.should_stop():
index = self._data_set.next_index(index) % file_count
features, _, transcript, transcript_len = self._data_set.data.iloc[index]

# One stride per time step in the input
num_strides = len(features) - (self._model_feeder.numcontext * 2)
features, num_strides, transcript, transcript_len = self._data_set.data.iloc[index]

# Create a view into the array with overlapping strides of size
# numcontext (past) + 1 (present) + numcontext (future)
Expand Down
5 changes: 3 additions & 2 deletions util/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ def process_single_file(row, numcep, numcontext, alphabet):
# row = index, Series
_, file = row
features = audiofile_to_input_vector(file.wav_filename, numcep, numcontext)
features_len = len(features) - 2*numcontext
transcript = text_to_char_array(file.transcript, alphabet)

if (2*numcontext + len(features)) < len(transcript):
if features_len < len(transcript):
raise ValueError('Error: Audio file {} is too short for transcription.'.format(file.wav_filename))

return features, len(features), transcript, len(transcript)
return features, features_len, transcript, len(transcript)


# load samples from CSV, compute features, optionally cache results on disk
Expand Down

0 comments on commit 1d55c94

Please sign in to comment.