Skip to content

Commit

Permalink
Respect --insecure (-k) flag when fetching list_url. (spack#5178)
Browse files Browse the repository at this point in the history
* Respect --insecure when fetching list_url.
* Ensure support for Python 2.6, and that urlopen works for python versions prior 2.7.9 and between 3.0 and 3.4.3.
  • Loading branch information
skosukhin authored and tgamblin committed Aug 25, 2017
1 parent c0d3d33 commit fda3249
Showing 1 changed file with 33 additions and 3 deletions.
36 changes: 33 additions & 3 deletions lib/spack/spack/util/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
##############################################################################
import re
import os
import ssl
import sys
import traceback

Expand Down Expand Up @@ -105,14 +106,28 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
root = re.sub('/index.html$', '', root)

try:
context = None
if sys.version_info < (2, 7, 9) or \
((3,) < sys.version_info < (3, 4, 3)):
if not spack.insecure:
tty.warn("Spack will not check SSL certificates. You need to "
"update your Python to enable certificate "
"verification.")
else:
# We explicitly create default context to avoid error described in
# https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
context = ssl._create_unverified_context() \
if spack.insecure \
else ssl.create_default_context()

# Make a HEAD request first to check the content type. This lets
# us ignore tarballs and gigantic files.
# It would be nice to do this with the HTTP Accept header to avoid
# one round-trip. However, most servers seem to ignore the header
# if you ask for a tarball with Accept: text/html.
req = Request(url)
req.get_method = lambda: "HEAD"
resp = urlopen(req, timeout=TIMEOUT)
resp = _urlopen(req, timeout=TIMEOUT, context=context)

if "Content-type" not in resp.headers:
tty.debug("ignoring page " + url)
Expand All @@ -125,7 +140,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):

# Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET"
response = urlopen(req, timeout=TIMEOUT)
response = _urlopen(req, timeout=TIMEOUT, context=context)
response_url = response.geturl()

# Read the page and and stick it in the map we'll return
Expand Down Expand Up @@ -176,6 +191,13 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):

except URLError as e:
tty.debug(e)

if isinstance(e.reason, ssl.SSLError):
tty.warn("Spack was unable to fetch url list due to a certificate "
"verification problem. You can try running spack -k, "
"which will not check SSL certificates. Use this at your "
"own risk.")

if raise_on_error:
raise spack.error.NoNetworkConnectionError(str(e), url)

Expand All @@ -202,8 +224,16 @@ def _spider_wrapper(args):
return _spider(*args)


def spider(root_url, depth=0):
def _urlopen(*args, **kwargs):
"""Wrapper for compatibility with old versions of Python."""
# We don't pass 'context' parameter to urlopen because it
# was introduces only starting versions 2.7.9 and 3.4.3 of Python.
if 'context' in kwargs and kwargs['context'] is None:
del kwargs['context']
return urlopen(*args, **kwargs)


def spider(root_url, depth=0):
"""Gets web pages from a root URL.
If depth is specified (e.g., depth=2), then this will also follow
Expand Down

0 comments on commit fda3249

Please sign in to comment.