-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathdownload_dataset.py
36 lines (30 loc) · 1.1 KB
/
download_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import requests
from zipfile import ZipFile
dataset_url = "http://aws-proserve-data-science.s3.amazonaws.com/geological_similarity.zip"
filePath = './data_repository/geological_similarity.zip'
data_directory = './data_repository'
if not os.path.exists(data_directory):
try:
os.makedirs(data_directory)
print(data_directory," created successfully.")
except:
print("Unable to create directory at ",data_directory," Please create ",data_directory," manually. Then run this file again.")
if os.path.exists(filePath):
os.remove(filePath)
else:
print("Have to download dataset.")
r = requests.get(dataset_url, stream = True)
print('Started downloading dataset...')
with open(filePath, "wb") as data:
for chunk in r.iter_content(chunk_size=1024):
# writing one chunk at a time to data file
if chunk:
print('...',end = ''),
data.write(chunk)
print('Download finished.')
print('Unzipping File...')
zf = ZipFile(filePath, 'r')
zf.extractall('./data_repository/')
zf.close()
print('Successfully unzipped file. Ready to run model...')