-
Notifications
You must be signed in to change notification settings - Fork 56
/
download.py
77 lines (55 loc) · 2.28 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import random
from urllib.request import urlretrieve
import zipfile
import argparse
import shlex, subprocess
import zipfile
def unzip(zip_filepath, dest_dir='./data'):
with zipfile.ZipFile(zip_filepath) as zf:
zf.extractall(dest_dir)
print("Extraction complete!")
def download_vcc2016():
datalink="https://datashare.is.ed.ac.uk/bitstream/handle/10283/2211/"
data_files = ['vcc2016_training.zip', 'evaluation_all.zip']
if os.path.exists(data_files[0]) or os.path.exists(data_files[1]):
print("File already exists!")
return
trainset = f'{datalink}/{data_files[0]}'
evalset = f'{datalink}/{data_files[1]}'
train_comm = f'wget {trainset}'
eval_comm = f'wget {evalset}'
train_comm = shlex.split(train_comm)
eval_comm = shlex.split(eval_comm)
print('Start download dataset...')
subprocess.run(train_comm)
subprocess.run(eval_comm)
unzip(data_files[0])
unzip(data_files[1])
print('Finish download dataset...')
# removed "four" form speakers as it wasn't used later
def create_dirs(trainset: str='./data/speakers', testset: str='./data/speakers_test'):
'''create train test dirs'''
if not os.path.exists(trainset):
print(f'create train set dir {trainset}')
os.makedirs(trainset, exist_ok=True)
if not os.path.exists(testset):
print(f'create test set dir {testset}')
os.makedirs(testset, exist_ok=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description = 'Download voice conversion datasets.')
datasets_default = 'vcc2016'
train_dir = './data/speakers'
test_dir = './data/speakers_test'
#changed from fourspeakers to speakers
parser.add_argument('--datasets', type = str, help = 'Datasets available: vcc2016', default = datasets_default)
parser.add_argument('--train_dir', type = str, help = 'trainset directory', default = train_dir)
parser.add_argument('--test_dir', type = str, help = 'testset directory', default = test_dir)
argv = parser.parse_args("")
# inserted "" for parsing the arguments as defaults
datasets = argv.datasets
create_dirs(train_dir, test_dir)
if datasets == 'vcc2016' or datasets == 'VCC2016':
download_vcc2016()
else:
print('Dataset not available.')