diff options
Diffstat (limited to 'python/pip_install/tools/wheel_installer/wheel_installer.py')
-rw-r--r-- | python/pip_install/tools/wheel_installer/wheel_installer.py | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/python/pip_install/tools/wheel_installer/wheel_installer.py b/python/pip_install/tools/wheel_installer/wheel_installer.py new file mode 100644 index 0000000..9b363c3 --- /dev/null +++ b/python/pip_install/tools/wheel_installer/wheel_installer.py @@ -0,0 +1,452 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import errno +import glob +import json +import os +import re +import shutil +import subprocess +import sys +import textwrap +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Dict, Iterable, List, Optional, Set, Tuple + +from pip._vendor.packaging.utils import canonicalize_name + +from python.pip_install.tools.lib import annotation, arguments, bazel +from python.pip_install.tools.wheel_installer import namespace_pkgs, wheel + + +def _configure_reproducible_wheels() -> None: + """Modifies the environment to make wheel building reproducible. + Wheels created from sdists are not reproducible by default. We can however workaround this by + patching in some configuration with environment variables. + """ + + # wheel, by default, enables debug symbols in GCC. This incidentally captures the build path in the .so file + # We can override this behavior by disabling debug symbols entirely. + # https://github.com/pypa/pip/issues/6505 + if "CFLAGS" in os.environ: + os.environ["CFLAGS"] += " -g0" + else: + os.environ["CFLAGS"] = "-g0" + + # set SOURCE_DATE_EPOCH to 1980 so that we can use python wheels + # https://github.com/NixOS/nixpkgs/blob/master/doc/languages-frameworks/python.section.md#python-setuppy-bdist_wheel-cannot-create-whl + if "SOURCE_DATE_EPOCH" not in os.environ: + os.environ["SOURCE_DATE_EPOCH"] = "315532800" + + # Python wheel metadata files can be unstable. + # See https://bitbucket.org/pypa/wheel/pull-requests/74/make-the-output-of-metadata-files/diff + if "PYTHONHASHSEED" not in os.environ: + os.environ["PYTHONHASHSEED"] = "0" + + +def _parse_requirement_for_extra( + requirement: str, +) -> Tuple[Optional[str], Optional[Set[str]]]: + """Given a requirement string, returns the requirement name and set of extras, if extras specified. + Else, returns (None, None) + """ + + # https://www.python.org/dev/peps/pep-0508/#grammar + extras_pattern = re.compile( + r"^\s*([0-9A-Za-z][0-9A-Za-z_.\-]*)\s*\[\s*([0-9A-Za-z][0-9A-Za-z_.\-]*(?:\s*,\s*[0-9A-Za-z][0-9A-Za-z_.\-]*)*)\s*\]" + ) + + matches = extras_pattern.match(requirement) + if matches: + return ( + canonicalize_name(matches.group(1)), + {extra.strip() for extra in matches.group(2).split(",")}, + ) + + return None, None + + +def _setup_namespace_pkg_compatibility(wheel_dir: str) -> None: + """Converts native namespace packages to pkgutil-style packages + + Namespace packages can be created in one of three ways. They are detailed here: + https://packaging.python.org/guides/packaging-namespace-packages/#creating-a-namespace-package + + 'pkgutil-style namespace packages' (2) and 'pkg_resources-style namespace packages' (3) works in Bazel, but + 'native namespace packages' (1) do not. + + We ensure compatibility with Bazel of method 1 by converting them into method 2. + + Args: + wheel_dir: the directory of the wheel to convert + """ + + namespace_pkg_dirs = namespace_pkgs.implicit_namespace_packages( + wheel_dir, + ignored_dirnames=["%s/bin" % wheel_dir], + ) + + for ns_pkg_dir in namespace_pkg_dirs: + namespace_pkgs.add_pkgutil_style_namespace_pkg_init(ns_pkg_dir) + + +def _generate_entry_point_contents( + module: str, attribute: str, shebang: str = "#!/usr/bin/env python3" +) -> str: + """Generate the contents of an entry point script. + + Args: + module (str): The name of the module to use. + attribute (str): The name of the attribute to call. + shebang (str, optional): The shebang to use for the entry point python + file. + + Returns: + str: A string of python code. + """ + return textwrap.dedent( + """\ + {shebang} + import sys + from {module} import {attribute} + if __name__ == "__main__": + sys.exit({attribute}()) + """.format( + shebang=shebang, module=module, attribute=attribute + ) + ) + + +def _generate_entry_point_rule(name: str, script: str, pkg: str) -> str: + """Generate a Bazel `py_binary` rule for an entry point script. + + Note that the script is used to determine the name of the target. The name of + entry point targets should be uniuqe to avoid conflicts with existing sources or + directories within a wheel. + + Args: + name (str): The name of the generated py_binary. + script (str): The path to the entry point's python file. + pkg (str): The package owning the entry point. This is expected to + match up with the `py_library` defined for each repository. + + + Returns: + str: A `py_binary` instantiation. + """ + return textwrap.dedent( + """\ + py_binary( + name = "{name}", + srcs = ["{src}"], + # This makes this directory a top-level in the python import + # search path for anything that depends on this. + imports = ["."], + deps = ["{pkg}"], + ) + """.format( + name=name, src=str(script).replace("\\", "/"), pkg=pkg + ) + ) + + +def _generate_copy_commands(src, dest, is_executable=False) -> str: + """Generate a [@bazel_skylib//rules:copy_file.bzl%copy_file][cf] target + + [cf]: https://github.com/bazelbuild/bazel-skylib/blob/1.1.1/docs/copy_file_doc.md + + Args: + src (str): The label for the `src` attribute of [copy_file][cf] + dest (str): The label for the `out` attribute of [copy_file][cf] + is_executable (bool, optional): Whether or not the file being copied is executable. + sets `is_executable` for [copy_file][cf] + + Returns: + str: A `copy_file` instantiation. + """ + return textwrap.dedent( + """\ + copy_file( + name = "{dest}.copy", + src = "{src}", + out = "{dest}", + is_executable = {is_executable}, + ) + """.format( + src=src, + dest=dest, + is_executable=is_executable, + ) + ) + + +def _generate_build_file_contents( + name: str, + dependencies: List[str], + whl_file_deps: List[str], + data_exclude: List[str], + tags: List[str], + srcs_exclude: List[str] = [], + data: List[str] = [], + additional_content: List[str] = [], +) -> str: + """Generate a BUILD file for an unzipped Wheel + + Args: + name: the target name of the py_library + dependencies: a list of Bazel labels pointing to dependencies of the library + whl_file_deps: a list of Bazel labels pointing to wheel file dependencies of this wheel. + data_exclude: more patterns to exclude from the data attribute of generated py_library rules. + tags: list of tags to apply to generated py_library rules. + additional_content: A list of additional content to append to the BUILD file. + + Returns: + A complete BUILD file as a string + + We allow for empty Python sources as for Wheels containing only compiled C code + there may be no Python sources whatsoever (e.g. packages written in Cython: like `pymssql`). + """ + + data_exclude = list( + set( + [ + "**/* *", + "**/*.py", + "**/*.pyc", + "**/*.pyc.*", # During pyc creation, temp files named *.pyc.NNNN are created + # RECORD is known to contain sha256 checksums of files which might include the checksums + # of generated files produced when wheels are installed. The file is ignored to avoid + # Bazel caching issues. + "**/*.dist-info/RECORD", + ] + + data_exclude + ) + ) + + return "\n".join( + [ + textwrap.dedent( + """\ + load("@rules_python//python:defs.bzl", "py_library", "py_binary") + load("@bazel_skylib//rules:copy_file.bzl", "copy_file") + + package(default_visibility = ["//visibility:public"]) + + filegroup( + name = "{dist_info_label}", + srcs = glob(["site-packages/*.dist-info/**"], allow_empty = True), + ) + + filegroup( + name = "{data_label}", + srcs = glob(["data/**"], allow_empty = True), + ) + + filegroup( + name = "{whl_file_label}", + srcs = glob(["*.whl"], allow_empty = True), + data = [{whl_file_deps}], + ) + + py_library( + name = "{name}", + srcs = glob(["site-packages/**/*.py"], exclude={srcs_exclude}, allow_empty = True), + data = {data} + glob(["site-packages/**/*"], exclude={data_exclude}), + # This makes this directory a top-level in the python import + # search path for anything that depends on this. + imports = ["site-packages"], + deps = [{dependencies}], + tags = [{tags}], + ) + """.format( + name=name, + dependencies=",".join(sorted(dependencies)), + data_exclude=json.dumps(sorted(data_exclude)), + whl_file_label=bazel.WHEEL_FILE_LABEL, + whl_file_deps=",".join(sorted(whl_file_deps)), + tags=",".join(sorted(['"%s"' % t for t in tags])), + data_label=bazel.DATA_LABEL, + dist_info_label=bazel.DIST_INFO_LABEL, + entry_point_prefix=bazel.WHEEL_ENTRY_POINT_PREFIX, + srcs_exclude=json.dumps(sorted(srcs_exclude)), + data=json.dumps(sorted(data)), + ) + ) + ] + + additional_content + ) + + +def _extract_wheel( + wheel_file: str, + extras: Dict[str, Set[str]], + pip_data_exclude: List[str], + enable_implicit_namespace_pkgs: bool, + repo_prefix: str, + installation_dir: Path = Path("."), + annotation: Optional[annotation.Annotation] = None, +) -> None: + """Extracts wheel into given directory and creates py_library and filegroup targets. + + Args: + wheel_file: the filepath of the .whl + installation_dir: the destination directory for installation of the wheel. + extras: a list of extras to add as dependencies for the installed wheel + pip_data_exclude: list of file patterns to exclude from the generated data section of the py_library + enable_implicit_namespace_pkgs: if true, disables conversion of implicit namespace packages and will unzip as-is + annotation: An optional set of annotations to apply to the BUILD contents of the wheel. + """ + + whl = wheel.Wheel(wheel_file) + whl.unzip(installation_dir) + + if not enable_implicit_namespace_pkgs: + _setup_namespace_pkg_compatibility(installation_dir) + + extras_requested = extras[whl.name] if whl.name in extras else set() + # Packages may create dependency cycles when specifying optional-dependencies / 'extras'. + # Example: github.com/google/etils/blob/a0b71032095db14acf6b33516bca6d885fe09e35/pyproject.toml#L32. + self_edge_dep = set([whl.name]) + whl_deps = sorted(whl.dependencies(extras_requested) - self_edge_dep) + + sanitised_dependencies = [ + bazel.sanitised_repo_library_label(d, repo_prefix=repo_prefix) for d in whl_deps + ] + sanitised_wheel_file_dependencies = [ + bazel.sanitised_repo_file_label(d, repo_prefix=repo_prefix) for d in whl_deps + ] + + entry_points = [] + for name, (module, attribute) in sorted(whl.entry_points().items()): + # There is an extreme edge-case with entry_points that end with `.py` + # See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174 + entry_point_without_py = f"{name[:-3]}_py" if name.endswith(".py") else name + entry_point_target_name = ( + f"{bazel.WHEEL_ENTRY_POINT_PREFIX}_{entry_point_without_py}" + ) + entry_point_script_name = f"{entry_point_target_name}.py" + (installation_dir / entry_point_script_name).write_text( + _generate_entry_point_contents(module, attribute) + ) + entry_points.append( + _generate_entry_point_rule( + entry_point_target_name, + entry_point_script_name, + bazel.PY_LIBRARY_LABEL, + ) + ) + + with open(os.path.join(installation_dir, "BUILD.bazel"), "w") as build_file: + additional_content = entry_points + data = [] + data_exclude = pip_data_exclude + srcs_exclude = [] + if annotation: + for src, dest in annotation.copy_files.items(): + data.append(dest) + additional_content.append(_generate_copy_commands(src, dest)) + for src, dest in annotation.copy_executables.items(): + data.append(dest) + additional_content.append( + _generate_copy_commands(src, dest, is_executable=True) + ) + data.extend(annotation.data) + data_exclude.extend(annotation.data_exclude_glob) + srcs_exclude.extend(annotation.srcs_exclude_glob) + if annotation.additive_build_content: + additional_content.append(annotation.additive_build_content) + + contents = _generate_build_file_contents( + name=bazel.PY_LIBRARY_LABEL, + dependencies=sanitised_dependencies, + whl_file_deps=sanitised_wheel_file_dependencies, + data_exclude=data_exclude, + data=data, + srcs_exclude=srcs_exclude, + tags=["pypi_name=" + whl.name, "pypi_version=" + whl.version], + additional_content=additional_content, + ) + build_file.write(contents) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Build and/or fetch a single wheel based on the requirement passed in" + ) + parser.add_argument( + "--requirement", + action="store", + required=True, + help="A single PEP508 requirement specifier string.", + ) + parser.add_argument( + "--annotation", + type=annotation.annotation_from_str_path, + help="A json encoded file containing annotations for rendered packages.", + ) + arguments.parse_common_args(parser) + args = parser.parse_args() + deserialized_args = dict(vars(args)) + arguments.deserialize_structured_args(deserialized_args) + + _configure_reproducible_wheels() + + pip_args = ( + [sys.executable, "-m", "pip"] + + (["--isolated"] if args.isolated else []) + + (["download", "--only-binary=:all:"] if args.download_only else ["wheel"]) + + ["--no-deps"] + + deserialized_args["extra_pip_args"] + ) + + requirement_file = NamedTemporaryFile(mode="wb", delete=False) + try: + requirement_file.write(args.requirement.encode("utf-8")) + requirement_file.flush() + # Close the file so pip is allowed to read it when running on Windows. + # For more information, see: https://bugs.python.org/issue14243 + requirement_file.close() + # Requirement specific args like --hash can only be passed in a requirements file, + # so write our single requirement into a temp file in case it has any of those flags. + pip_args.extend(["-r", requirement_file.name]) + + env = os.environ.copy() + env.update(deserialized_args["environment"]) + # Assumes any errors are logged by pip so do nothing. This command will fail if pip fails + subprocess.run(pip_args, check=True, env=env) + finally: + try: + os.unlink(requirement_file.name) + except OSError as e: + if e.errno != errno.ENOENT: + raise + + name, extras_for_pkg = _parse_requirement_for_extra(args.requirement) + extras = {name: extras_for_pkg} if extras_for_pkg and name else dict() + + whl = next(iter(glob.glob("*.whl"))) + _extract_wheel( + wheel_file=whl, + extras=extras, + pip_data_exclude=deserialized_args["pip_data_exclude"], + enable_implicit_namespace_pkgs=args.enable_implicit_namespace_pkgs, + repo_prefix=args.repo_prefix, + annotation=args.annotation, + ) + + +if __name__ == "__main__": + main() |