forked from baidu-research/ba-dls-deepspeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_desc_json.py
55 lines (48 loc) · 2.04 KB
/
create_desc_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
Use this script to create JSON-Line description files that can be used to
train deep-speech models through this library.
This works with data directories that are organized like LibriSpeech:
data_directory/group/speaker/[file_id1.wav, file_id2.wav, ...,
speaker.trans.txt]
Where speaker.trans.txt has in each line, file_id transcription
"""
from __future__ import absolute_import, division, print_function
import argparse
import json
import os
import wave
def main(data_directory, output_file):
labels = []
durations = []
keys = []
for group in os.listdir(data_directory):
speaker_path = os.path.join(data_directory, group)
for speaker in os.listdir(speaker_path):
labels_file = os.path.join(speaker_path, speaker,
'{}-{}.trans.txt'
.format(group, speaker))
for line in open(labels_file):
split = line.strip().split()
file_id = split[0]
label = ' '.join(split[1:]).lower()
audio_file = os.path.join(speaker_path, speaker,
file_id) + '.wav'
audio = wave.open(audio_file)
duration = float(audio.getnframes()) / audio.getframerate()
audio.close()
keys.append(audio_file)
durations.append(duration)
labels.append(label)
with open(output_file, 'w') as out_file:
for i in range(len(keys)):
line = json.dumps({'key': keys[i], 'duration': durations[i],
'text': labels[i]})
out_file.write(line + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('data_directory', type=str,
help='Path to data directory')
parser.add_argument('output_file', type=str,
help='Path to output file')
args = parser.parse_args()
main(args.data_directory, args.output_file)