-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCh1_download.py
50 lines (41 loc) · 1.26 KB
/
Ch1_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# author = [email protected]
# -*- coding: cp936 -*-
# coding: cp936
import builtwith
import whois
import urllib2
import re
def download_0(url):
return urllib2.urlopen(url).read()
def download_1(url):
print 'Downloading: ', url
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print 'Download error: ', e.reason
html = None
return html
def download_2(url, num_retries = 2):
print 'Downloading: ', url
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print 'Download error: ', e.reason
html = None
if num_retries > 0:
if hasattr(e, 'code') and 500 <= e.code < 600:
return download_1(url, num_retries - 1)
return html
def download_3(url, user_agent = 'wswp', num_retries = 2):
print 'Downloading: ', url
headers = {'User_agent': user_agent}
request = urllib2.Request(url, headers=headers)
try:
html = urllib2.urlopen(request).read()
except urllib2.URLError as e:
print 'Download error: ', e.reason
html = None
if num_retries > 0:
if hasattr(e, 'code') and 500 <= e.code < 600:
return download_1(url, user_agent, num_retries - 1)
return html