Respect --insecure (-k) flag when fetching list_url. (spack#5178)

* Respect --insecure when fetching list_url. * Ensure support for Python 2.6, and that urlopen works for python versions prior 2.7.9 and between 3.0 and 3.4.3.
CSCfi · Aug 25, 2017 · fda3249 · fda3249
1 parent c0d3d33
commit fda3249
Showing 1 changed file with 33 additions and 3 deletions.
diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py
@@ -24,6 +24,7 @@
 ##############################################################################
 import re
 import os
+import ssl
 import sys
 import traceback
 
@@ -105,14 +106,28 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
         root = re.sub('/index.html$', '', root)
 
     try:
+        context = None
+        if sys.version_info < (2, 7, 9) or \
+                ((3,) < sys.version_info < (3, 4, 3)):
+            if not spack.insecure:
+                tty.warn("Spack will not check SSL certificates. You need to "
+                         "update your Python to enable certificate "
+                         "verification.")
+        else:
+            # We explicitly create default context to avoid error described in
+            # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html
+            context = ssl._create_unverified_context() \
+                if spack.insecure \
+                else ssl.create_default_context()
+
         # Make a HEAD request first to check the content type.  This lets
         # us ignore tarballs and gigantic files.
         # It would be nice to do this with the HTTP Accept header to avoid
         # one round-trip.  However, most servers seem to ignore the header
         # if you ask for a tarball with Accept: text/html.
         req = Request(url)
         req.get_method = lambda: "HEAD"
-        resp = urlopen(req, timeout=TIMEOUT)
+        resp = _urlopen(req, timeout=TIMEOUT, context=context)
 
         if "Content-type" not in resp.headers:
             tty.debug("ignoring page " + url)
@@ -125,7 +140,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
 
         # Do the real GET request when we know it's just HTML.
         req.get_method = lambda: "GET"
-        response = urlopen(req, timeout=TIMEOUT)
+        response = _urlopen(req, timeout=TIMEOUT, context=context)
         response_url = response.geturl()
 
         # Read the page and and stick it in the map we'll return
@@ -176,6 +191,13 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
 
     except URLError as e:
         tty.debug(e)
+
+        if isinstance(e.reason, ssl.SSLError):
+            tty.warn("Spack was unable to fetch url list due to a certificate "
+                     "verification problem. You can try running spack -k, "
+                     "which will not check SSL certificates. Use this at your "
+                     "own risk.")
+
         if raise_on_error:
             raise spack.error.NoNetworkConnectionError(str(e), url)
 
@@ -202,8 +224,16 @@ def _spider_wrapper(args):
     return _spider(*args)
 
 
-def spider(root_url, depth=0):
+def _urlopen(*args, **kwargs):
+    """Wrapper for compatibility with old versions of Python."""
+    # We don't pass 'context' parameter to urlopen because it
+    # was introduces only starting versions 2.7.9 and 3.4.3 of Python.
+    if 'context' in kwargs and kwargs['context'] is None:
+        del kwargs['context']
+    return urlopen(*args, **kwargs)
+
 
+def spider(root_url, depth=0):
     """Gets web pages from a root URL.
 
        If depth is specified (e.g., depth=2), then this will also follow