forked from yzhao062/anomaly-detection-resources
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownload.py
32 lines (25 loc) · 880 Bytes
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/python
"""
This script will download all papers/books and rename to proper name
if there is no copyright issue.
TODO: download resources by item number
TODO: add exception handler for downloader
"""
import re
import pathlib
import urllib.request
# initialize the log directory if it does not exist
pathlib.Path('resources').mkdir(parents=True, exist_ok=True)
f = open('resource_urls\\papers.txt', 'r')
for line in f:
# print(line)
line_splits = line.split(' | ')
# remove all special char in file name
file_name = re.sub(r'[\\/*?:"<>|]', "", line_splits[0])
# strip filename length in case it is too long
if len(file_name) > 255:
file_name = file_name[:255]
url = line_splits[1]
print('Downloading', file_name, 'from', url)
urllib.request.urlretrieve(url, "resources\\" + file_name + '.pdf')
f.close()