Skip to content

Commit

Permalink
Fix mozilla#693; corrections for several Fisher test samples
Browse files Browse the repository at this point in the history
  • Loading branch information
tilmankamp committed Jul 5, 2017
1 parent 449adf3 commit 0260b2f
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions bin/import_fisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,14 @@ def _download_and_preprocess_data(data_dir):
# their audio source. The result is that we end up with more labels than time
# slices, which breaks CTC.
all_2004.loc[all_2004["wav_filename"].str.endswith("fe_03_00265-33.53-33.81.wav"), "transcript"] = "correct"
all_2004.loc[all_2004["wav_filename"].str.endswith("fe_03_00991-527.39-528.3.wav"), "transcript"] = "that's one of those"
all_2005.loc[all_2005["wav_filename"].str.endswith("fe_03_10282-344.42-344.84.wav"), "transcript"] = "they don't want"
all_2005.loc[all_2005["wav_filename"].str.endswith("fe_03_10677-101.04-106.41.wav"), "transcript"] = "uh my mine yeah the german shepherd pitbull mix he snores almost as loud as i do"

# The following file is just a short sound and not at all transcribed like provided.
# So we just exclude it.
all_2004 = all_2004[~all_2004["wav_filename"].str.endswith("fe_03_00027-393.8-394.05.wav")]

# The following file is far too long and would ruin our training batch size.
# So we just exclude it.
all_2005 = all_2005[~all_2005["wav_filename"].str.endswith("fe_03_11487-31.09-234.06.wav")]
Expand Down

0 comments on commit 0260b2f

Please sign in to comment.