-
Notifications
You must be signed in to change notification settings - Fork 139
/
Copy pathDS_input.py
93 lines (76 loc) · 3.43 KB
/
DS_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os.path
import glob
import tensorflow as tf
# Global constants describing the dataset
# Note this definition must match the ALPHABET chosen in
# preprocess_Librispeech.py
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ' " # for LibriSpeech
NUM_CLASSES = len(ALPHABET) + 1 # Additional class for blank character
NUM_PER_EPOCH_FOR_TRAIN = 28535
NUM_PER_EPOCH_FOR_EVAL = 2703
NUM_PER_EPOCH_FOR_TEST = 2620
def _generate_feats_and_label_batch(filename_queue, batch_size):
"""Construct a queued batch of spectral features and transcriptions.
Args:
filename_queue: queue of filenames to read data from.
batch_size: Number of utterances per batch.
Returns:
feats: mfccs. 4D tensor of [batch_size, height, width, 3] size.
labels: transcripts. List of length batch_size.
seq_lens: Sequence Lengths. List of length batch_size.
"""
# Define how to parse the example
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
context_features = {
"seq_len": tf.FixedLenFeature([], dtype=tf.int64),
"labels": tf.VarLenFeature(dtype=tf.int64)
}
sequence_features = {
# mfcc features are 13 dimensional
"feats": tf.FixedLenSequenceFeature([13, ], dtype=tf.float32)
}
# Parse the example (returns a dictionary of tensors)
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
# Generate a batch worth of examples after bucketing
seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length(
input_length=tf.cast(context_parsed['seq_len'], tf.int32),
tensors=[sequence_parsed['feats'], context_parsed['labels']],
batch_size=batch_size,
bucket_boundaries=list(range(100, 1900, 100)),
allow_smaller_final_batch=True,
num_threads=16,
dynamic_pad=True)
return feats, tf.cast(labels, tf.int32), seq_len
def inputs(eval_data, data_dir, batch_size, shuffle=False):
"""Construct input for fordspeech evaluation using the Reader ops.
Args:
eval_data: bool, indicating if one should use the train or eval data set.
data_dir: Path to the fordspeech data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of
[batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
if eval_data == 'train':
num_files = len(glob.glob(os.path.join(data_dir,
'train*/*.tfrecords')))
filenames = [os.path.join(data_dir, 'train-clean-100/train_' +
str(i) + '.tfrecords')
for i in range(1, num_files+1)]
elif eval_data == 'val':
filenames = glob.glob(os.path.join(data_dir, 'dev*/*.tfrecords'))
elif eval_data == 'test':
filenames = glob.glob(os.path.join(data_dir, 'test*/*.tfrecords'))
for file in filenames:
if not tf.gfile.Exists(file):
raise ValueError('Failed to find file: ' + file)
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle)
# Generate a batch of images and labels by building up a queue of examples.
return _generate_feats_and_label_batch(filename_queue, batch_size)