-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremote.py
76 lines (58 loc) · 2.15 KB
/
remote.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'tldextract helpers for testing and fetching remote resources.'
import logging
import re
import socket
import sys
import requests
from requests_file import FileAdapter
# pylint: disable=import-error,invalid-name,no-name-in-module,redefined-builtin
if sys.version_info < (3,): # pragma: no cover
from urlparse import scheme_chars
else: # pragma: no cover
from urllib.parse import scheme_chars
unicode = str
# pylint: enable=import-error,invalid-name,no-name-in-module,redefined-builtin
IP_RE = re.compile(r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$') # pylint: disable=line-too-long
SCHEME_RE = re.compile(r'^([' + scheme_chars + ']+:)?//')
LOG = logging.getLogger('tldextract')
def find_first_response(urls, cache_fetch_timeout=None):
""" Decode the first successfully fetched URL, from UTF-8 encoding to
Python unicode.
"""
with requests.Session() as session:
session.mount('file://', FileAdapter())
for url in urls:
try:
resp = session.get(url, timeout=cache_fetch_timeout)
resp.raise_for_status()
except requests.exceptions.RequestException:
LOG.exception(
'Exception reading Public Suffix List url %s',
url
)
else:
return _decode_utf8(resp.text)
LOG.error(
'No Public Suffix List found. Consider using a mirror or constructing '
'your TLDExtract with `suffix_list_urls=None`.'
)
return unicode('')
def _decode_utf8(text):
""" Decode from utf8 to Python unicode string.
The suffix list, wherever its origin, should be UTF-8 encoded.
"""
if not isinstance(text, unicode):
return unicode(text, 'utf-8')
return text
def looks_like_ip(maybe_ip):
"""Does the given str look like an IP address?"""
if not maybe_ip[0].isdigit():
return False
try:
socket.inet_aton(maybe_ip)
return True
except (AttributeError, UnicodeError):
if IP_RE.match(maybe_ip):
return True
except socket.error:
return False