1 files changed, 116 insertions, 109 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index b6407be..14881d2 100755..100644
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -2,36 +2,39 @@
 import sys
 import os
 import re
+import io
 import shutil
 import socket
 import base64
 import hashlib
 import itertools
+import warnings
+import configparser
+import html
+import http.client
+import urllib.parse
+import urllib.request
+import urllib.error
 from functools import wraps
 
-from setuptools.extern import six
-from setuptools.extern.six.moves import urllib, http_client, configparser, map
-
 import setuptools
 from pkg_resources import (
     CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
     Environment, find_distributions, safe_name, safe_version,
-    to_filename, Requirement, DEVELOP_DIST, EGG_DIST,
+    to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version,
 )
-from setuptools import ssl_support
 from distutils import log
 from distutils.errors import DistutilsError
 from fnmatch import translate
-from setuptools.py27compat import get_all_headers
-from setuptools.py33compat import unescape
 from setuptools.wheel import Wheel
+from setuptools.extern.more_itertools import unique_everseen
+
 
 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
-HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
-# this is here to fix emacs' cruddy broken syntax highlighting
+HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
 PYPI_MD5 = re.compile(
-    '<a href="([^"#]+)">([^<]+)</a>\n\\s+\\(<a (?:title="MD5 hash"\n\\s+)'
-    'href="[^?]+\\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
+    r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
+    r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
 )
 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
@@ -44,16 +47,17 @@ __all__ = [
 _SOCKET_TIMEOUT = 15
 
 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
-user_agent = _tmpl.format(py_major=sys.version[:3], setuptools=setuptools)
+user_agent = _tmpl.format(
+    py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
 
 
 def parse_requirement_arg(spec):
     try:
         return Requirement.parse(spec)
-    except ValueError:
+    except ValueError as e:
         raise DistutilsError(
             "Not a URL, existing file, or requirement spec: %r" % (spec,)
-        )
+        ) from e
 
 
 def parse_bdist_wininst(name):
@@ -158,7 +162,7 @@ def interpret_distro_name(
     # Generate alternative interpretations of a source distro name
     # Because some packages are ambiguous as to name/versions split
     # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
-    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
+    # So, we generate each possible interpretation (e.g. "adns, python-1.1.0"
     # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
     # the spurious interpretations should be ignored, because in the event
     # there's also an "adns" package, the spurious "python-1.1.0" version will
@@ -180,25 +184,6 @@ def interpret_distro_name(
         )
 
 
-# From Python 2.7 docs
-def unique_everseen(iterable, key=None):
-    "List unique elements, preserving order. Remember all elements ever seen."
-    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
-    # unique_everseen('ABBCcAD', str.lower) --> A B C D
-    seen = set()
-    seen_add = seen.add
-    if key is None:
-        for element in six.moves.filterfalse(seen.__contains__, iterable):
-            seen_add(element)
-            yield element
-    else:
-        for element in iterable:
-            k = key(element)
-            if k not in seen:
-                seen_add(k)
-                yield element
-
-
 def unique_values(func):
     """
     Wrap a function returning an iterable such that the resulting iterable
@@ -235,7 +220,7 @@ def find_external_links(url, page):
                 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
 
 
-class ContentChecker(object):
+class ContentChecker:
     """
     A null content checker that defines the interface for checking content
     """
@@ -300,24 +285,25 @@ class PackageIndex(Environment):
             self, index_url="https://pypi.org/simple/", hosts=('*',),
             ca_bundle=None, verify_ssl=True, *args, **kw
     ):
-        Environment.__init__(self, *args, **kw)
+        super().__init__(*args, **kw)
         self.index_url = index_url + "/" [:not index_url.endswith('/')]
         self.scanned_urls = {}
         self.fetched_urls = {}
         self.package_pages = {}
         self.allows = re.compile('|'.join(map(translate, hosts))).match
         self.to_scan = []
-        use_ssl = (
-            verify_ssl
-            and ssl_support.is_available
-            and (ca_bundle or ssl_support.find_ca_bundle())
-        )
-        if use_ssl:
-            self.opener = ssl_support.opener_for(ca_bundle)
-        else:
-            self.opener = urllib.request.urlopen
+        self.opener = urllib.request.urlopen
+
+    def add(self, dist):
+        # ignore invalid versions
+        try:
+            parse_version(dist.version)
+        except Exception:
+            return
+        return super().add(dist)
 
-    def process_url(self, url, retrieve=False):
+    # FIXME: 'PackageIndex.process_url' is too complex (14)
+    def process_url(self, url, retrieve=False):  # noqa: C901
         """Evaluate a URL as a possible download, and maybe retrieve it"""
         if url in self.scanned_urls and not retrieve:
             return
@@ -346,6 +332,8 @@ class PackageIndex(Environment):
         f = self.open_url(url, tmpl % url)
         if f is None:
             return
+        if isinstance(f, urllib.error.HTTPError) and f.code == 401:
+            self.info("Authentication error: %s" % f.msg)
         self.fetched_urls[f.url] = True
         if 'html' not in f.headers.get('content-type', '').lower():
             f.close()  # not html, we can't process it
@@ -423,49 +411,53 @@ class PackageIndex(Environment):
             dist.precedence = SOURCE_DIST
             self.add(dist)
 
+    def _scan(self, link):
+        # Process a URL to see if it's for a package page
+        NO_MATCH_SENTINEL = None, None
+        if not link.startswith(self.index_url):
+            return NO_MATCH_SENTINEL
+
+        parts = list(map(
+            urllib.parse.unquote, link[len(self.index_url):].split('/')
+        ))
+        if len(parts) != 2 or '#' in parts[1]:
+            return NO_MATCH_SENTINEL
+
+        # it's a package page, sanitize and index it
+        pkg = safe_name(parts[0])
+        ver = safe_version(parts[1])
+        self.package_pages.setdefault(pkg.lower(), {})[link] = True
+        return to_filename(pkg), to_filename(ver)
+
     def process_index(self, url, page):
         """Process the contents of a PyPI page"""
 
-        def scan(link):
-            # Process a URL to see if it's for a package page
-            if link.startswith(self.index_url):
-                parts = list(map(
-                    urllib.parse.unquote, link[len(self.index_url):].split('/')
-                ))
-                if len(parts) == 2 and '#' not in parts[1]:
-                    # it's a package page, sanitize and index it
-                    pkg = safe_name(parts[0])
-                    ver = safe_version(parts[1])
-                    self.package_pages.setdefault(pkg.lower(), {})[link] = True
-                    return to_filename(pkg), to_filename(ver)
-            return None, None
-
         # process an index page into the package-page index
         for match in HREF.finditer(page):
             try:
-                scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+                self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
             except ValueError:
                 pass
 
-        pkg, ver = scan(url)  # ensure this page is in the page index
-        if pkg:
-            # process individual package page
-            for new_url in find_external_links(url, page):
-                # Process the found URL
-                base, frag = egg_info_for_url(new_url)
-                if base.endswith('.py') and not frag:
-                    if ver:
-                        new_url += '#egg=%s-%s' % (pkg, ver)
-                    else:
-                        self.need_version_info(url)
-                self.scan_url(new_url)
-
-            return PYPI_MD5.sub(
-                lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
-            )
-        else:
+        pkg, ver = self._scan(url)  # ensure this page is in the page index
+        if not pkg:
             return ""  # no sense double-scanning non-package pages
 
+        # process individual package page
+        for new_url in find_external_links(url, page):
+            # Process the found URL
+            base, frag = egg_info_for_url(new_url)
+            if base.endswith('.py') and not frag:
+                if ver:
+                    new_url += '#egg=%s-%s' % (pkg, ver)
+                else:
+                    self.need_version_info(url)
+            self.scan_url(new_url)
+
+        return PYPI_MD5.sub(
+            lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
+        )
+
     def need_version_info(self, url):
         self.scan_all(
             "Page at %s links to .py file(s) without version info; an index "
@@ -586,7 +578,7 @@ class PackageIndex(Environment):
                 spec = parse_requirement_arg(spec)
         return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
 
-    def fetch_distribution(
+    def fetch_distribution(  # noqa: C901  # is too complex (14)  # FIXME
             self, requirement, tmpdir, force_scan=False, source=False,
             develop_ok=False, local_index=None):
         """Obtain a distribution suitable for fulfilling `requirement`
@@ -688,8 +680,7 @@ class PackageIndex(Environment):
             # Make sure the file has been downloaded to the temp dir.
             if os.path.dirname(filename) != tmpdir:
                 dst = os.path.join(tmpdir, basename)
-                from setuptools.command.easy_install import samefile
-                if not samefile(filename, dst):
+                if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
                     shutil.copy2(filename, dst)
                     filename = dst
 
@@ -735,7 +726,7 @@ class PackageIndex(Environment):
             size = -1
             if "content-length" in headers:
                 # Some servers return multiple Content-Length headers :(
-                sizes = get_all_headers(headers, 'Content-Length')
+                sizes = headers.get_all('Content-Length')
                 size = max(map(int, sizes))
                 self.reporthook(url, filename, blocknum, bs, size)
             with open(filename, 'wb') as tfp:
@@ -757,17 +748,18 @@ class PackageIndex(Environment):
     def reporthook(self, url, filename, blocknum, blksize, size):
         pass  # no-op
 
-    def open_url(self, url, warning=None):
+    # FIXME:
+    def open_url(self, url, warning=None):  # noqa: C901  # is too complex (12)
         if url.startswith('file:'):
             return local_open(url)
         try:
             return open_with_auth(url, self.opener)
-        except (ValueError, http_client.InvalidURL) as v:
+        except (ValueError, http.client.InvalidURL) as v:
             msg = ' '.join([str(arg) for arg in v.args])
             if warning:
                 self.warn(warning, msg)
             else:
-                raise DistutilsError('%s %s' % (url, msg))
+                raise DistutilsError('%s %s' % (url, msg)) from v
         except urllib.error.HTTPError as v:
             return v
         except urllib.error.URLError as v:
@@ -775,8 +767,8 @@ class PackageIndex(Environment):
                 self.warn(warning, v.reason)
             else:
                 raise DistutilsError("Download error for %s: %s"
-                                     % (url, v.reason))
-        except http_client.BadStatusLine as v:
+                                     % (url, v.reason)) from v
+        except http.client.BadStatusLine as v:
             if warning:
                 self.warn(warning, v.line)
             else:
@@ -784,13 +776,13 @@ class PackageIndex(Environment):
                     '%s returned a bad status line. The server might be '
                     'down, %s' %
                     (url, v.line)
-                )
-        except (http_client.HTTPException, socket.error) as v:
+                ) from v
+        except (http.client.HTTPException, socket.error) as v:
             if warning:
                 self.warn(warning, v)
             else:
                 raise DistutilsError("Download error for %s: %s"
-                                     % (url, v))
+                                     % (url, v)) from v
 
     def _download_url(self, scheme, url, tmpdir):
         # Determine download filename
@@ -847,13 +839,14 @@ class PackageIndex(Environment):
         raise DistutilsError("Unexpected HTML page found at " + url)
 
     def _download_svn(self, url, filename):
+        warnings.warn("SVN download support is deprecated", UserWarning)
         url = url.split('#', 1)[0]  # remove any fragment for svn's sake
         creds = ''
         if url.lower().startswith('svn:') and '@' in url:
             scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
             if not netloc and path.startswith('//') and '/' in path[2:]:
                 netloc, path = path[2:].split('/', 1)
-                auth, host = urllib.parse.splituser(netloc)
+                auth, host = _splituser(netloc)
                 if auth:
                     if ':' in auth:
                         user, pw = auth.split(':', 1)
@@ -894,7 +887,7 @@ class PackageIndex(Environment):
 
         if rev is not None:
             self.info("Checking out %s", rev)
-            os.system("(cd %s && git checkout --quiet %s)" % (
+            os.system("git -C %s checkout --quiet %s" % (
                 filename,
                 rev,
             ))
@@ -910,7 +903,7 @@ class PackageIndex(Environment):
 
         if rev is not None:
             self.info("Updating to %s", rev)
-            os.system("(cd %s && hg up -C -r %s -q)" % (
+            os.system("hg --cwd %s up -C -r %s -q" % (
                 filename,
                 rev,
             ))
@@ -933,12 +926,19 @@ entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
 
 
 def decode_entity(match):
-    what = match.group(1)
-    return unescape(what)
+    what = match.group(0)
+    return html.unescape(what)
 
 
 def htmldecode(text):
-    """Decode HTML entities in the given text."""
+    """
+    Decode HTML entities in the given text.
+
+    >>> htmldecode(
+    ...     'https://../package_name-0.1.2.tar.gz'
+    ...     '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
+    'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
+    """
     return entity_sub(decode_entity, text)
 
 
@@ -959,8 +959,7 @@ def socket_timeout(timeout=15):
 
 def _encode_auth(auth):
     """
-    A function compatible with Python 2.3-3.3 that will encode
-    auth from a URL suitable for an HTTP header.
+    Encode auth from a URL suitable for an HTTP header.
     >>> str(_encode_auth('username%3Apassword'))
     'dXNlcm5hbWU6cGFzc3dvcmQ='
 
@@ -972,15 +971,14 @@ def _encode_auth(auth):
     auth_s = urllib.parse.unquote(auth)
     # convert to bytes
     auth_bytes = auth_s.encode()
-    # use the legacy interface for Python 2.3 support
-    encoded_bytes = base64.encodestring(auth_bytes)
+    encoded_bytes = base64.b64encode(auth_bytes)
     # convert back to a string
     encoded = encoded_bytes.decode()
     # strip the trailing carriage return
     return encoded.replace('\n', '')
 
 
-class Credential(object):
+class Credential:
     """
     A username/password pair. Use like a namedtuple.
     """
@@ -1003,7 +1001,7 @@ class PyPIConfig(configparser.RawConfigParser):
         Load from ~/.pypirc
         """
         defaults = dict.fromkeys(['username', 'password', 'repository'], '')
-        configparser.RawConfigParser.__init__(self, defaults)
+        super().__init__(defaults)
 
         rc = os.path.join(os.path.expanduser('~'), '.pypirc')
         if os.path.exists(rc):
@@ -1038,15 +1036,16 @@ class PyPIConfig(configparser.RawConfigParser):
 def open_with_auth(url, opener=urllib.request.urlopen):
     """Open a urllib2 request, handling HTTP authentication"""
 
-    scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url)
+    parsed = urllib.parse.urlparse(url)
+    scheme, netloc, path, params, query, frag = parsed
 
-    # Double scheme does not raise on Mac OS X as revealed by a
+    # Double scheme does not raise on macOS as revealed by a
     # failing test. We would expect "nonnumeric port". Refs #20.
     if netloc.endswith(':'):
-        raise http_client.InvalidURL("nonnumeric port: ''")
+        raise http.client.InvalidURL("nonnumeric port: ''")
 
     if scheme in ('http', 'https'):
-        auth, host = urllib.parse.splituser(netloc)
+        auth, address = _splituser(netloc)
     else:
         auth = None
 
@@ -1059,7 +1058,7 @@ def open_with_auth(url, opener=urllib.request.urlopen):
 
     if auth:
         auth = "Basic " + _encode_auth(auth)
-        parts = scheme, host, path, params, query, frag
+        parts = scheme, address, path, params, query, frag
         new_url = urllib.parse.urlunparse(parts)
         request = urllib.request.Request(new_url)
         request.add_header("Authorization", auth)
@@ -1073,13 +1072,21 @@ def open_with_auth(url, opener=urllib.request.urlopen):
         # Put authentication info back into request URL if same host,
         # so that links found on the page will work
         s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
-        if s2 == scheme and h2 == host:
+        if s2 == scheme and h2 == address:
             parts = s2, netloc, path2, param2, query2, frag2
             fp.url = urllib.parse.urlunparse(parts)
 
     return fp
 
 
+# copy of urllib.parse._splituser from Python 3.8
+def _splituser(host):
+    """splituser('user[:passwd]@host[:port]')
+    --> 'user[:passwd]', 'host[:port]'."""
+    user, delim, host = host.rpartition('@')
+    return (user if delim else None), host
+
+
 # adding a timeout to avoid freezing package_index
 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
 
@@ -1115,5 +1122,5 @@ def local_open(url):
         status, message, body = 404, "Path not found", "Not found"
 
     headers = {'content-type': 'text/html'}
-    body_stream = six.StringIO(body)
+    body_stream = io.StringIO(body)
     return urllib.error.HTTPError(url, status, message, headers, body_stream)