aboutsummaryrefslogtreecommitdiff
path: root/setuptools/package_index.py
diff options
context:
space:
mode:
Diffstat (limited to 'setuptools/package_index.py')
-rw-r--r--[-rwxr-xr-x]setuptools/package_index.py225
1 files changed, 116 insertions, 109 deletions
diff --git a/setuptools/package_index.py b/setuptools/package_index.py
index b6407be..14881d2 100755..100644
--- a/setuptools/package_index.py
+++ b/setuptools/package_index.py
@@ -2,36 +2,39 @@
import sys
import os
import re
+import io
import shutil
import socket
import base64
import hashlib
import itertools
+import warnings
+import configparser
+import html
+import http.client
+import urllib.parse
+import urllib.request
+import urllib.error
from functools import wraps
-from setuptools.extern import six
-from setuptools.extern.six.moves import urllib, http_client, configparser, map
-
import setuptools
from pkg_resources import (
CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
Environment, find_distributions, safe_name, safe_version,
- to_filename, Requirement, DEVELOP_DIST, EGG_DIST,
+ to_filename, Requirement, DEVELOP_DIST, EGG_DIST, parse_version,
)
-from setuptools import ssl_support
from distutils import log
from distutils.errors import DistutilsError
from fnmatch import translate
-from setuptools.py27compat import get_all_headers
-from setuptools.py33compat import unescape
from setuptools.wheel import Wheel
+from setuptools.extern.more_itertools import unique_everseen
+
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
-HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
-# this is here to fix emacs' cruddy broken syntax highlighting
+HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
PYPI_MD5 = re.compile(
- '<a href="([^"#]+)">([^<]+)</a>\n\\s+\\(<a (?:title="MD5 hash"\n\\s+)'
- 'href="[^?]+\\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
+ r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
+ r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
)
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
@@ -44,16 +47,17 @@ __all__ = [
_SOCKET_TIMEOUT = 15
_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
-user_agent = _tmpl.format(py_major=sys.version[:3], setuptools=setuptools)
+user_agent = _tmpl.format(
+ py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
def parse_requirement_arg(spec):
try:
return Requirement.parse(spec)
- except ValueError:
+ except ValueError as e:
raise DistutilsError(
"Not a URL, existing file, or requirement spec: %r" % (spec,)
- )
+ ) from e
def parse_bdist_wininst(name):
@@ -158,7 +162,7 @@ def interpret_distro_name(
# Generate alternative interpretations of a source distro name
# Because some packages are ambiguous as to name/versions split
# e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
- # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
+ # So, we generate each possible interpretation (e.g. "adns, python-1.1.0"
# "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
# the spurious interpretations should be ignored, because in the event
# there's also an "adns" package, the spurious "python-1.1.0" version will
@@ -180,25 +184,6 @@ def interpret_distro_name(
)
-# From Python 2.7 docs
-def unique_everseen(iterable, key=None):
- "List unique elements, preserving order. Remember all elements ever seen."
- # unique_everseen('AAAABBBCCDAABBB') --> A B C D
- # unique_everseen('ABBCcAD', str.lower) --> A B C D
- seen = set()
- seen_add = seen.add
- if key is None:
- for element in six.moves.filterfalse(seen.__contains__, iterable):
- seen_add(element)
- yield element
- else:
- for element in iterable:
- k = key(element)
- if k not in seen:
- seen_add(k)
- yield element
-
-
def unique_values(func):
"""
Wrap a function returning an iterable such that the resulting iterable
@@ -235,7 +220,7 @@ def find_external_links(url, page):
yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
-class ContentChecker(object):
+class ContentChecker:
"""
A null content checker that defines the interface for checking content
"""
@@ -300,24 +285,25 @@ class PackageIndex(Environment):
self, index_url="https://pypi.org/simple/", hosts=('*',),
ca_bundle=None, verify_ssl=True, *args, **kw
):
- Environment.__init__(self, *args, **kw)
+ super().__init__(*args, **kw)
self.index_url = index_url + "/" [:not index_url.endswith('/')]
self.scanned_urls = {}
self.fetched_urls = {}
self.package_pages = {}
self.allows = re.compile('|'.join(map(translate, hosts))).match
self.to_scan = []
- use_ssl = (
- verify_ssl
- and ssl_support.is_available
- and (ca_bundle or ssl_support.find_ca_bundle())
- )
- if use_ssl:
- self.opener = ssl_support.opener_for(ca_bundle)
- else:
- self.opener = urllib.request.urlopen
+ self.opener = urllib.request.urlopen
+
+ def add(self, dist):
+ # ignore invalid versions
+ try:
+ parse_version(dist.version)
+ except Exception:
+ return
+ return super().add(dist)
- def process_url(self, url, retrieve=False):
+ # FIXME: 'PackageIndex.process_url' is too complex (14)
+ def process_url(self, url, retrieve=False): # noqa: C901
"""Evaluate a URL as a possible download, and maybe retrieve it"""
if url in self.scanned_urls and not retrieve:
return
@@ -346,6 +332,8 @@ class PackageIndex(Environment):
f = self.open_url(url, tmpl % url)
if f is None:
return
+ if isinstance(f, urllib.error.HTTPError) and f.code == 401:
+ self.info("Authentication error: %s" % f.msg)
self.fetched_urls[f.url] = True
if 'html' not in f.headers.get('content-type', '').lower():
f.close() # not html, we can't process it
@@ -423,49 +411,53 @@ class PackageIndex(Environment):
dist.precedence = SOURCE_DIST
self.add(dist)
+ def _scan(self, link):
+ # Process a URL to see if it's for a package page
+ NO_MATCH_SENTINEL = None, None
+ if not link.startswith(self.index_url):
+ return NO_MATCH_SENTINEL
+
+ parts = list(map(
+ urllib.parse.unquote, link[len(self.index_url):].split('/')
+ ))
+ if len(parts) != 2 or '#' in parts[1]:
+ return NO_MATCH_SENTINEL
+
+ # it's a package page, sanitize and index it
+ pkg = safe_name(parts[0])
+ ver = safe_version(parts[1])
+ self.package_pages.setdefault(pkg.lower(), {})[link] = True
+ return to_filename(pkg), to_filename(ver)
+
def process_index(self, url, page):
"""Process the contents of a PyPI page"""
- def scan(link):
- # Process a URL to see if it's for a package page
- if link.startswith(self.index_url):
- parts = list(map(
- urllib.parse.unquote, link[len(self.index_url):].split('/')
- ))
- if len(parts) == 2 and '#' not in parts[1]:
- # it's a package page, sanitize and index it
- pkg = safe_name(parts[0])
- ver = safe_version(parts[1])
- self.package_pages.setdefault(pkg.lower(), {})[link] = True
- return to_filename(pkg), to_filename(ver)
- return None, None
-
# process an index page into the package-page index
for match in HREF.finditer(page):
try:
- scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+ self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
except ValueError:
pass
- pkg, ver = scan(url) # ensure this page is in the page index
- if pkg:
- # process individual package page
- for new_url in find_external_links(url, page):
- # Process the found URL
- base, frag = egg_info_for_url(new_url)
- if base.endswith('.py') and not frag:
- if ver:
- new_url += '#egg=%s-%s' % (pkg, ver)
- else:
- self.need_version_info(url)
- self.scan_url(new_url)
-
- return PYPI_MD5.sub(
- lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
- )
- else:
+ pkg, ver = self._scan(url) # ensure this page is in the page index
+ if not pkg:
return "" # no sense double-scanning non-package pages
+ # process individual package page
+ for new_url in find_external_links(url, page):
+ # Process the found URL
+ base, frag = egg_info_for_url(new_url)
+ if base.endswith('.py') and not frag:
+ if ver:
+ new_url += '#egg=%s-%s' % (pkg, ver)
+ else:
+ self.need_version_info(url)
+ self.scan_url(new_url)
+
+ return PYPI_MD5.sub(
+ lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
+ )
+
def need_version_info(self, url):
self.scan_all(
"Page at %s links to .py file(s) without version info; an index "
@@ -586,7 +578,7 @@ class PackageIndex(Environment):
spec = parse_requirement_arg(spec)
return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
- def fetch_distribution(
+ def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
self, requirement, tmpdir, force_scan=False, source=False,
develop_ok=False, local_index=None):
"""Obtain a distribution suitable for fulfilling `requirement`
@@ -688,8 +680,7 @@ class PackageIndex(Environment):
# Make sure the file has been downloaded to the temp dir.
if os.path.dirname(filename) != tmpdir:
dst = os.path.join(tmpdir, basename)
- from setuptools.command.easy_install import samefile
- if not samefile(filename, dst):
+ if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
shutil.copy2(filename, dst)
filename = dst
@@ -735,7 +726,7 @@ class PackageIndex(Environment):
size = -1
if "content-length" in headers:
# Some servers return multiple Content-Length headers :(
- sizes = get_all_headers(headers, 'Content-Length')
+ sizes = headers.get_all('Content-Length')
size = max(map(int, sizes))
self.reporthook(url, filename, blocknum, bs, size)
with open(filename, 'wb') as tfp:
@@ -757,17 +748,18 @@ class PackageIndex(Environment):
def reporthook(self, url, filename, blocknum, blksize, size):
pass # no-op
- def open_url(self, url, warning=None):
+ # FIXME:
+ def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
if url.startswith('file:'):
return local_open(url)
try:
return open_with_auth(url, self.opener)
- except (ValueError, http_client.InvalidURL) as v:
+ except (ValueError, http.client.InvalidURL) as v:
msg = ' '.join([str(arg) for arg in v.args])
if warning:
self.warn(warning, msg)
else:
- raise DistutilsError('%s %s' % (url, msg))
+ raise DistutilsError('%s %s' % (url, msg)) from v
except urllib.error.HTTPError as v:
return v
except urllib.error.URLError as v:
@@ -775,8 +767,8 @@ class PackageIndex(Environment):
self.warn(warning, v.reason)
else:
raise DistutilsError("Download error for %s: %s"
- % (url, v.reason))
- except http_client.BadStatusLine as v:
+ % (url, v.reason)) from v
+ except http.client.BadStatusLine as v:
if warning:
self.warn(warning, v.line)
else:
@@ -784,13 +776,13 @@ class PackageIndex(Environment):
'%s returned a bad status line. The server might be '
'down, %s' %
(url, v.line)
- )
- except (http_client.HTTPException, socket.error) as v:
+ ) from v
+ except (http.client.HTTPException, socket.error) as v:
if warning:
self.warn(warning, v)
else:
raise DistutilsError("Download error for %s: %s"
- % (url, v))
+ % (url, v)) from v
def _download_url(self, scheme, url, tmpdir):
# Determine download filename
@@ -847,13 +839,14 @@ class PackageIndex(Environment):
raise DistutilsError("Unexpected HTML page found at " + url)
def _download_svn(self, url, filename):
+ warnings.warn("SVN download support is deprecated", UserWarning)
url = url.split('#', 1)[0] # remove any fragment for svn's sake
creds = ''
if url.lower().startswith('svn:') and '@' in url:
scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
if not netloc and path.startswith('//') and '/' in path[2:]:
netloc, path = path[2:].split('/', 1)
- auth, host = urllib.parse.splituser(netloc)
+ auth, host = _splituser(netloc)
if auth:
if ':' in auth:
user, pw = auth.split(':', 1)
@@ -894,7 +887,7 @@ class PackageIndex(Environment):
if rev is not None:
self.info("Checking out %s", rev)
- os.system("(cd %s && git checkout --quiet %s)" % (
+ os.system("git -C %s checkout --quiet %s" % (
filename,
rev,
))
@@ -910,7 +903,7 @@ class PackageIndex(Environment):
if rev is not None:
self.info("Updating to %s", rev)
- os.system("(cd %s && hg up -C -r %s -q)" % (
+ os.system("hg --cwd %s up -C -r %s -q" % (
filename,
rev,
))
@@ -933,12 +926,19 @@ entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
def decode_entity(match):
- what = match.group(1)
- return unescape(what)
+ what = match.group(0)
+ return html.unescape(what)
def htmldecode(text):
- """Decode HTML entities in the given text."""
+ """
+ Decode HTML entities in the given text.
+
+ >>> htmldecode(
+ ... 'https://../package_name-0.1.2.tar.gz'
+ ... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
+ 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
+ """
return entity_sub(decode_entity, text)
@@ -959,8 +959,7 @@ def socket_timeout(timeout=15):
def _encode_auth(auth):
"""
- A function compatible with Python 2.3-3.3 that will encode
- auth from a URL suitable for an HTTP header.
+ Encode auth from a URL suitable for an HTTP header.
>>> str(_encode_auth('username%3Apassword'))
'dXNlcm5hbWU6cGFzc3dvcmQ='
@@ -972,15 +971,14 @@ def _encode_auth(auth):
auth_s = urllib.parse.unquote(auth)
# convert to bytes
auth_bytes = auth_s.encode()
- # use the legacy interface for Python 2.3 support
- encoded_bytes = base64.encodestring(auth_bytes)
+ encoded_bytes = base64.b64encode(auth_bytes)
# convert back to a string
encoded = encoded_bytes.decode()
# strip the trailing carriage return
return encoded.replace('\n', '')
-class Credential(object):
+class Credential:
"""
A username/password pair. Use like a namedtuple.
"""
@@ -1003,7 +1001,7 @@ class PyPIConfig(configparser.RawConfigParser):
Load from ~/.pypirc
"""
defaults = dict.fromkeys(['username', 'password', 'repository'], '')
- configparser.RawConfigParser.__init__(self, defaults)
+ super().__init__(defaults)
rc = os.path.join(os.path.expanduser('~'), '.pypirc')
if os.path.exists(rc):
@@ -1038,15 +1036,16 @@ class PyPIConfig(configparser.RawConfigParser):
def open_with_auth(url, opener=urllib.request.urlopen):
"""Open a urllib2 request, handling HTTP authentication"""
- scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url)
+ parsed = urllib.parse.urlparse(url)
+ scheme, netloc, path, params, query, frag = parsed
- # Double scheme does not raise on Mac OS X as revealed by a
+ # Double scheme does not raise on macOS as revealed by a
# failing test. We would expect "nonnumeric port". Refs #20.
if netloc.endswith(':'):
- raise http_client.InvalidURL("nonnumeric port: ''")
+ raise http.client.InvalidURL("nonnumeric port: ''")
if scheme in ('http', 'https'):
- auth, host = urllib.parse.splituser(netloc)
+ auth, address = _splituser(netloc)
else:
auth = None
@@ -1059,7 +1058,7 @@ def open_with_auth(url, opener=urllib.request.urlopen):
if auth:
auth = "Basic " + _encode_auth(auth)
- parts = scheme, host, path, params, query, frag
+ parts = scheme, address, path, params, query, frag
new_url = urllib.parse.urlunparse(parts)
request = urllib.request.Request(new_url)
request.add_header("Authorization", auth)
@@ -1073,13 +1072,21 @@ def open_with_auth(url, opener=urllib.request.urlopen):
# Put authentication info back into request URL if same host,
# so that links found on the page will work
s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
- if s2 == scheme and h2 == host:
+ if s2 == scheme and h2 == address:
parts = s2, netloc, path2, param2, query2, frag2
fp.url = urllib.parse.urlunparse(parts)
return fp
+# copy of urllib.parse._splituser from Python 3.8
+def _splituser(host):
+ """splituser('user[:passwd]@host[:port]')
+ --> 'user[:passwd]', 'host[:port]'."""
+ user, delim, host = host.rpartition('@')
+ return (user if delim else None), host
+
+
# adding a timeout to avoid freezing package_index
open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
@@ -1115,5 +1122,5 @@ def local_open(url):
status, message, body = 404, "Path not found", "Not found"
headers = {'content-type': 'text/html'}
- body_stream = six.StringIO(body)
+ body_stream = io.StringIO(body)
return urllib.error.HTTPError(url, status, message, headers, body_stream)