diff options
Diffstat (limited to 'setuptools/package_index.py')
-rw-r--r--[-rwxr-xr-x] | setuptools/package_index.py | 225 |
1 files changed, 116 insertions, 109 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py index b6407be..14881d2 100755..100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -2,36 +2,39 @@ import sys import os import re +import io import shutil import socket import base64 import hashlib import itertools +import warnings +import configparser +import html +import http.client +import urllib.parse +import urllib.request +import urllib.error from functools import wraps -from setuptools.extern import six -from setuptools.extern.six.moves import urllib, http_client, configparser, map - import setuptools from pkg_resources import ( CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST, Environment, find_distributions, safe_name, safe_version, - to_filename, Requirement, DEVELOP_DIST, EGG_DIST, + to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version, ) -from setuptools import ssl_support from distutils import log from distutils.errors import DistutilsError from fnmatch import translate -from setuptools.py27compat import get_all_headers -from setuptools.py33compat import unescape from setuptools.wheel import Wheel +from setuptools.extern.more_itertools import unique_everseen + EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$') -HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I) -# this is here to fix emacs' cruddy broken syntax highlighting +HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I) PYPI_MD5 = re.compile( - '<a href="([^"#]+)">([^<]+)</a>\n\\s+\\(<a (?:title="MD5 hash"\n\\s+)' - 'href="[^?]+\\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)' + r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)' + r'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)' ) URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() @@ -44,16 +47,17 @@ __all__ = [ _SOCKET_TIMEOUT = 15 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}" -user_agent = _tmpl.format(py_major=sys.version[:3], setuptools=setuptools) +user_agent = _tmpl.format( + py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools) def parse_requirement_arg(spec): try: return Requirement.parse(spec) - except ValueError: + except ValueError as e: raise DistutilsError( "Not a URL, existing file, or requirement spec: %r" % (spec,) - ) + ) from e def parse_bdist_wininst(name): @@ -158,7 +162,7 @@ def interpret_distro_name( # Generate alternative interpretations of a source distro name # Because some packages are ambiguous as to name/versions split # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc. - # So, we generate each possible interepretation (e.g. "adns, python-1.1.0" + # So, we generate each possible interpretation (e.g. "adns, python-1.1.0" # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice, # the spurious interpretations should be ignored, because in the event # there's also an "adns" package, the spurious "python-1.1.0" version will @@ -180,25 +184,6 @@ def interpret_distro_name( ) -# From Python 2.7 docs -def unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in six.moves.filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element - - def unique_values(func): """ Wrap a function returning an iterable such that the resulting iterable @@ -235,7 +220,7 @@ def find_external_links(url, page): yield urllib.parse.urljoin(url, htmldecode(match.group(1))) -class ContentChecker(object): +class ContentChecker: """ A null content checker that defines the interface for checking content """ @@ -300,24 +285,25 @@ class PackageIndex(Environment): self, index_url="https://pypi.org/simple/", hosts=('*',), ca_bundle=None, verify_ssl=True, *args, **kw ): - Environment.__init__(self, *args, **kw) + super().__init__(*args, **kw) self.index_url = index_url + "/" [:not index_url.endswith('/')] self.scanned_urls = {} self.fetched_urls = {} self.package_pages = {} self.allows = re.compile('|'.join(map(translate, hosts))).match self.to_scan = [] - use_ssl = ( - verify_ssl - and ssl_support.is_available - and (ca_bundle or ssl_support.find_ca_bundle()) - ) - if use_ssl: - self.opener = ssl_support.opener_for(ca_bundle) - else: - self.opener = urllib.request.urlopen + self.opener = urllib.request.urlopen + + def add(self, dist): + # ignore invalid versions + try: + parse_version(dist.version) + except Exception: + return + return super().add(dist) - def process_url(self, url, retrieve=False): + # FIXME: 'PackageIndex.process_url' is too complex (14) + def process_url(self, url, retrieve=False): # noqa: C901 """Evaluate a URL as a possible download, and maybe retrieve it""" if url in self.scanned_urls and not retrieve: return @@ -346,6 +332,8 @@ class PackageIndex(Environment): f = self.open_url(url, tmpl % url) if f is None: return + if isinstance(f, urllib.error.HTTPError) and f.code == 401: + self.info("Authentication error: %s" % f.msg) self.fetched_urls[f.url] = True if 'html' not in f.headers.get('content-type', '').lower(): f.close() # not html, we can't process it @@ -423,49 +411,53 @@ class PackageIndex(Environment): dist.precedence = SOURCE_DIST self.add(dist) + def _scan(self, link): + # Process a URL to see if it's for a package page + NO_MATCH_SENTINEL = None, None + if not link.startswith(self.index_url): + return NO_MATCH_SENTINEL + + parts = list(map( + urllib.parse.unquote, link[len(self.index_url):].split('/') + )) + if len(parts) != 2 or '#' in parts[1]: + return NO_MATCH_SENTINEL + + # it's a package page, sanitize and index it + pkg = safe_name(parts[0]) + ver = safe_version(parts[1]) + self.package_pages.setdefault(pkg.lower(), {})[link] = True + return to_filename(pkg), to_filename(ver) + def process_index(self, url, page): """Process the contents of a PyPI page""" - def scan(link): - # Process a URL to see if it's for a package page - if link.startswith(self.index_url): - parts = list(map( - urllib.parse.unquote, link[len(self.index_url):].split('/') - )) - if len(parts) == 2 and '#' not in parts[1]: - # it's a package page, sanitize and index it - pkg = safe_name(parts[0]) - ver = safe_version(parts[1]) - self.package_pages.setdefault(pkg.lower(), {})[link] = True - return to_filename(pkg), to_filename(ver) - return None, None - # process an index page into the package-page index for match in HREF.finditer(page): try: - scan(urllib.parse.urljoin(url, htmldecode(match.group(1)))) + self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1)))) except ValueError: pass - pkg, ver = scan(url) # ensure this page is in the page index - if pkg: - # process individual package page - for new_url in find_external_links(url, page): - # Process the found URL - base, frag = egg_info_for_url(new_url) - if base.endswith('.py') and not frag: - if ver: - new_url += '#egg=%s-%s' % (pkg, ver) - else: - self.need_version_info(url) - self.scan_url(new_url) - - return PYPI_MD5.sub( - lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page - ) - else: + pkg, ver = self._scan(url) # ensure this page is in the page index + if not pkg: return "" # no sense double-scanning non-package pages + # process individual package page + for new_url in find_external_links(url, page): + # Process the found URL + base, frag = egg_info_for_url(new_url) + if base.endswith('.py') and not frag: + if ver: + new_url += '#egg=%s-%s' % (pkg, ver) + else: + self.need_version_info(url) + self.scan_url(new_url) + + return PYPI_MD5.sub( + lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page + ) + def need_version_info(self, url): self.scan_all( "Page at %s links to .py file(s) without version info; an index " @@ -586,7 +578,7 @@ class PackageIndex(Environment): spec = parse_requirement_arg(spec) return getattr(self.fetch_distribution(spec, tmpdir), 'location', None) - def fetch_distribution( + def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME self, requirement, tmpdir, force_scan=False, source=False, develop_ok=False, local_index=None): """Obtain a distribution suitable for fulfilling `requirement` @@ -688,8 +680,7 @@ class PackageIndex(Environment): # Make sure the file has been downloaded to the temp dir. if os.path.dirname(filename) != tmpdir: dst = os.path.join(tmpdir, basename) - from setuptools.command.easy_install import samefile - if not samefile(filename, dst): + if not (os.path.exists(dst) and os.path.samefile(filename, dst)): shutil.copy2(filename, dst) filename = dst @@ -735,7 +726,7 @@ class PackageIndex(Environment): size = -1 if "content-length" in headers: # Some servers return multiple Content-Length headers :( - sizes = get_all_headers(headers, 'Content-Length') + sizes = headers.get_all('Content-Length') size = max(map(int, sizes)) self.reporthook(url, filename, blocknum, bs, size) with open(filename, 'wb') as tfp: @@ -757,17 +748,18 @@ class PackageIndex(Environment): def reporthook(self, url, filename, blocknum, blksize, size): pass # no-op - def open_url(self, url, warning=None): + # FIXME: + def open_url(self, url, warning=None): # noqa: C901 # is too complex (12) if url.startswith('file:'): return local_open(url) try: return open_with_auth(url, self.opener) - except (ValueError, http_client.InvalidURL) as v: + except (ValueError, http.client.InvalidURL) as v: msg = ' '.join([str(arg) for arg in v.args]) if warning: self.warn(warning, msg) else: - raise DistutilsError('%s %s' % (url, msg)) + raise DistutilsError('%s %s' % (url, msg)) from v except urllib.error.HTTPError as v: return v except urllib.error.URLError as v: @@ -775,8 +767,8 @@ class PackageIndex(Environment): self.warn(warning, v.reason) else: raise DistutilsError("Download error for %s: %s" - % (url, v.reason)) - except http_client.BadStatusLine as v: + % (url, v.reason)) from v + except http.client.BadStatusLine as v: if warning: self.warn(warning, v.line) else: @@ -784,13 +776,13 @@ class PackageIndex(Environment): '%s returned a bad status line. The server might be ' 'down, %s' % (url, v.line) - ) - except (http_client.HTTPException, socket.error) as v: + ) from v + except (http.client.HTTPException, socket.error) as v: if warning: self.warn(warning, v) else: raise DistutilsError("Download error for %s: %s" - % (url, v)) + % (url, v)) from v def _download_url(self, scheme, url, tmpdir): # Determine download filename @@ -847,13 +839,14 @@ class PackageIndex(Environment): raise DistutilsError("Unexpected HTML page found at " + url) def _download_svn(self, url, filename): + warnings.warn("SVN download support is deprecated", UserWarning) url = url.split('#', 1)[0] # remove any fragment for svn's sake creds = '' if url.lower().startswith('svn:') and '@' in url: scheme, netloc, path, p, q, f = urllib.parse.urlparse(url) if not netloc and path.startswith('//') and '/' in path[2:]: netloc, path = path[2:].split('/', 1) - auth, host = urllib.parse.splituser(netloc) + auth, host = _splituser(netloc) if auth: if ':' in auth: user, pw = auth.split(':', 1) @@ -894,7 +887,7 @@ class PackageIndex(Environment): if rev is not None: self.info("Checking out %s", rev) - os.system("(cd %s && git checkout --quiet %s)" % ( + os.system("git -C %s checkout --quiet %s" % ( filename, rev, )) @@ -910,7 +903,7 @@ class PackageIndex(Environment): if rev is not None: self.info("Updating to %s", rev) - os.system("(cd %s && hg up -C -r %s -q)" % ( + os.system("hg --cwd %s up -C -r %s -q" % ( filename, rev, )) @@ -933,12 +926,19 @@ entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub def decode_entity(match): - what = match.group(1) - return unescape(what) + what = match.group(0) + return html.unescape(what) def htmldecode(text): - """Decode HTML entities in the given text.""" + """ + Decode HTML entities in the given text. + + >>> htmldecode( + ... 'https://../package_name-0.1.2.tar.gz' + ... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz') + 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz' + """ return entity_sub(decode_entity, text) @@ -959,8 +959,7 @@ def socket_timeout(timeout=15): def _encode_auth(auth): """ - A function compatible with Python 2.3-3.3 that will encode - auth from a URL suitable for an HTTP header. + Encode auth from a URL suitable for an HTTP header. >>> str(_encode_auth('username%3Apassword')) 'dXNlcm5hbWU6cGFzc3dvcmQ=' @@ -972,15 +971,14 @@ def _encode_auth(auth): auth_s = urllib.parse.unquote(auth) # convert to bytes auth_bytes = auth_s.encode() - # use the legacy interface for Python 2.3 support - encoded_bytes = base64.encodestring(auth_bytes) + encoded_bytes = base64.b64encode(auth_bytes) # convert back to a string encoded = encoded_bytes.decode() # strip the trailing carriage return return encoded.replace('\n', '') -class Credential(object): +class Credential: """ A username/password pair. Use like a namedtuple. """ @@ -1003,7 +1001,7 @@ class PyPIConfig(configparser.RawConfigParser): Load from ~/.pypirc """ defaults = dict.fromkeys(['username', 'password', 'repository'], '') - configparser.RawConfigParser.__init__(self, defaults) + super().__init__(defaults) rc = os.path.join(os.path.expanduser('~'), '.pypirc') if os.path.exists(rc): @@ -1038,15 +1036,16 @@ class PyPIConfig(configparser.RawConfigParser): def open_with_auth(url, opener=urllib.request.urlopen): """Open a urllib2 request, handling HTTP authentication""" - scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url) + parsed = urllib.parse.urlparse(url) + scheme, netloc, path, params, query, frag = parsed - # Double scheme does not raise on Mac OS X as revealed by a + # Double scheme does not raise on macOS as revealed by a # failing test. We would expect "nonnumeric port". Refs #20. if netloc.endswith(':'): - raise http_client.InvalidURL("nonnumeric port: ''") + raise http.client.InvalidURL("nonnumeric port: ''") if scheme in ('http', 'https'): - auth, host = urllib.parse.splituser(netloc) + auth, address = _splituser(netloc) else: auth = None @@ -1059,7 +1058,7 @@ def open_with_auth(url, opener=urllib.request.urlopen): if auth: auth = "Basic " + _encode_auth(auth) - parts = scheme, host, path, params, query, frag + parts = scheme, address, path, params, query, frag new_url = urllib.parse.urlunparse(parts) request = urllib.request.Request(new_url) request.add_header("Authorization", auth) @@ -1073,13 +1072,21 @@ def open_with_auth(url, opener=urllib.request.urlopen): # Put authentication info back into request URL if same host, # so that links found on the page will work s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url) - if s2 == scheme and h2 == host: + if s2 == scheme and h2 == address: parts = s2, netloc, path2, param2, query2, frag2 fp.url = urllib.parse.urlunparse(parts) return fp +# copy of urllib.parse._splituser from Python 3.8 +def _splituser(host): + """splituser('user[:passwd]@host[:port]') + --> 'user[:passwd]', 'host[:port]'.""" + user, delim, host = host.rpartition('@') + return (user if delim else None), host + + # adding a timeout to avoid freezing package_index open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) @@ -1115,5 +1122,5 @@ def local_open(url): status, message, body = 404, "Path not found", "Not found" headers = {'content-type': 'text/html'} - body_stream = six.StringIO(body) + body_stream = io.StringIO(body) return urllib.error.HTTPError(url, status, message, headers, body_stream) |