diff options
Diffstat (limited to 'python/private/python_bootstrap_template.txt')
-rw-r--r-- | python/private/python_bootstrap_template.txt | 559 |
1 files changed, 559 insertions, 0 deletions
diff --git a/python/private/python_bootstrap_template.txt b/python/private/python_bootstrap_template.txt new file mode 100644 index 0000000..92dd6b8 --- /dev/null +++ b/python/private/python_bootstrap_template.txt @@ -0,0 +1,559 @@ +%shebang% + +# This script must retain compatibility with a wide variety of Python versions +# since it is run for every py_binary target. Currently we guarantee support +# going back to Python 2.7, and try to support even Python 2.6 on a best-effort +# basis. We might abandon 2.6 support once users have the ability to control the +# above shebang string via the Python toolchain (#8685). + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +# The Python interpreter unconditionally prepends the directory containing this +# script (following symlinks) to the import path. This is the cause of #9239, +# and is a special case of #7091. We therefore explicitly delete that entry. +# TODO(#7091): Remove this hack when no longer necessary. +del sys.path[0] + +import os +import subprocess + +def IsRunningFromZip(): + return %is_zipfile% + +if IsRunningFromZip(): + import shutil + import tempfile + import zipfile +else: + import re + +# Return True if running on Windows +def IsWindows(): + return os.name == 'nt' + +def GetWindowsPathWithUNCPrefix(path): + """Adds UNC prefix after getting a normalized absolute Windows path. + + No-op for non-Windows platforms or if running under python2. + """ + path = path.strip() + + # No need to add prefix for non-Windows platforms. + # And \\?\ doesn't work in python 2 or on mingw + if not IsWindows() or sys.version_info[0] < 3: + return path + + # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been + # removed from common Win32 file and directory functions. + # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later + import platform + if platform.win32_ver()[1] >= '10.0.14393': + return path + + # import sysconfig only now to maintain python 2.6 compatibility + import sysconfig + if sysconfig.get_platform() == 'mingw': + return path + + # Lets start the unicode fun + unicode_prefix = '\\\\?\\' + if path.startswith(unicode_prefix): + return path + + # os.path.abspath returns a normalized absolute path + return unicode_prefix + os.path.abspath(path) + +def HasWindowsExecutableExtension(path): + return path.endswith('.exe') or path.endswith('.com') or path.endswith('.bat') + +PYTHON_BINARY = '%python_binary%' +if IsWindows() and not HasWindowsExecutableExtension(PYTHON_BINARY): + PYTHON_BINARY = PYTHON_BINARY + '.exe' + +def SearchPath(name): + """Finds a file in a given search path.""" + search_path = os.getenv('PATH', os.defpath).split(os.pathsep) + for directory in search_path: + if directory: + path = os.path.join(directory, name) + if os.path.isfile(path) and os.access(path, os.X_OK): + return path + return None + +def FindPythonBinary(module_space): + """Finds the real Python binary if it's not a normal absolute path.""" + return FindBinary(module_space, PYTHON_BINARY) + +def PrintVerboseCoverage(*args): + """Print output if VERBOSE_COVERAGE is non-empty in the environment.""" + if os.environ.get("VERBOSE_COVERAGE"): + print(*args, file=sys.stderr) + +def FindCoverageEntryPoint(module_space): + cov_tool = '%coverage_tool%' + if cov_tool: + PrintVerboseCoverage('Using toolchain coverage_tool %r' % cov_tool) + else: + cov_tool = os.environ.get('PYTHON_COVERAGE') + if cov_tool: + PrintVerboseCoverage('PYTHON_COVERAGE: %r' % cov_tool) + if cov_tool: + return FindBinary(module_space, cov_tool) + return None + +def FindBinary(module_space, bin_name): + """Finds the real binary if it's not a normal absolute path.""" + if not bin_name: + return None + if bin_name.startswith("//"): + # Case 1: Path is a label. Not supported yet. + raise AssertionError( + "Bazel does not support execution of Python interpreters via labels yet" + ) + elif os.path.isabs(bin_name): + # Case 2: Absolute path. + return bin_name + # Use normpath() to convert slashes to os.sep on Windows. + elif os.sep in os.path.normpath(bin_name): + # Case 3: Path is relative to the repo root. + return os.path.join(module_space, bin_name) + else: + # Case 4: Path has to be looked up in the search path. + return SearchPath(bin_name) + +def CreatePythonPathEntries(python_imports, module_space): + parts = python_imports.split(':') + return [module_space] + ['%s/%s' % (module_space, path) for path in parts] + +def FindModuleSpace(main_rel_path): + """Finds the runfiles tree.""" + # When the calling process used the runfiles manifest to resolve the + # location of this stub script, the path may be expanded. This means + # argv[0] may no longer point to a location inside the runfiles + # directory. We should therefore respect RUNFILES_DIR and + # RUNFILES_MANIFEST_FILE set by the caller. + runfiles_dir = os.environ.get('RUNFILES_DIR', None) + if not runfiles_dir: + runfiles_manifest_file = os.environ.get('RUNFILES_MANIFEST_FILE', '') + if (runfiles_manifest_file.endswith('.runfiles_manifest') or + runfiles_manifest_file.endswith('.runfiles/MANIFEST')): + runfiles_dir = runfiles_manifest_file[:-9] + # Be defensive: the runfiles dir should contain our main entry point. If + # it doesn't, then it must not be our runfiles directory. + if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)): + return runfiles_dir + + stub_filename = sys.argv[0] + if not os.path.isabs(stub_filename): + stub_filename = os.path.join(os.getcwd(), stub_filename) + + while True: + module_space = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles' + if os.path.isdir(module_space): + return module_space + + runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if IsWindows() else '/') + '.*' + matchobj = re.match(runfiles_pattern, stub_filename) + if matchobj: + return matchobj.group(1) + + if not os.path.islink(stub_filename): + break + target = os.readlink(stub_filename) + if os.path.isabs(target): + stub_filename = target + else: + stub_filename = os.path.join(os.path.dirname(stub_filename), target) + + raise AssertionError('Cannot find .runfiles directory for %s' % sys.argv[0]) + +def ExtractZip(zip_path, dest_dir): + """Extracts the contents of a zip file, preserving the unix file mode bits. + + These include the permission bits, and in particular, the executable bit. + + Ideally the zipfile module should set these bits, but it doesn't. See: + https://bugs.python.org/issue15795. + + Args: + zip_path: The path to the zip file to extract + dest_dir: The path to the destination directory + """ + zip_path = GetWindowsPathWithUNCPrefix(zip_path) + dest_dir = GetWindowsPathWithUNCPrefix(dest_dir) + with zipfile.ZipFile(zip_path) as zf: + for info in zf.infolist(): + zf.extract(info, dest_dir) + # UNC-prefixed paths must be absolute/normalized. See + # https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation + file_path = os.path.abspath(os.path.join(dest_dir, info.filename)) + # The Unix st_mode bits (see "man 7 inode") are stored in the upper 16 + # bits of external_attr. Of those, we set the lower 12 bits, which are the + # file mode bits (since the file type bits can't be set by chmod anyway). + attrs = info.external_attr >> 16 + if attrs != 0: # Rumor has it these can be 0 for zips created on Windows. + os.chmod(file_path, attrs & 0o7777) + +# Create the runfiles tree by extracting the zip file +def CreateModuleSpace(): + temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_') + ExtractZip(os.path.dirname(__file__), temp_dir) + # IMPORTANT: Later code does `rm -fr` on dirname(module_space) -- it's + # important that deletion code be in sync with this directory structure + return os.path.join(temp_dir, 'runfiles') + +# Returns repository roots to add to the import path. +def GetRepositoriesImports(module_space, import_all): + if import_all: + repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)] + repo_dirs.sort() + return [d for d in repo_dirs if os.path.isdir(d)] + return [os.path.join(module_space, '%workspace_name%')] + +def RunfilesEnvvar(module_space): + """Finds the runfiles manifest or the runfiles directory. + + Returns: + A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or + 'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or + file, or (None, None) if runfiles couldn't be found. + """ + # If this binary is the data-dependency of another one, the other sets + # RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake. + runfiles = os.environ.get('RUNFILES_MANIFEST_FILE', None) + if runfiles: + return ('RUNFILES_MANIFEST_FILE', runfiles) + + runfiles = os.environ.get('RUNFILES_DIR', None) + if runfiles: + return ('RUNFILES_DIR', runfiles) + + # If running from a zip, there's no manifest file. + if IsRunningFromZip(): + return ('RUNFILES_DIR', module_space) + + # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest" + runfiles = module_space + '_manifest' + if os.path.exists(runfiles): + return ('RUNFILES_MANIFEST_FILE', runfiles) + + # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST" + # Normally .runfiles_manifest and MANIFEST are both present, but the + # former will be missing for zip-based builds or if someone copies the + # runfiles tree elsewhere. + runfiles = os.path.join(module_space, 'MANIFEST') + if os.path.exists(runfiles): + return ('RUNFILES_MANIFEST_FILE', runfiles) + + # If running in a sandbox and no environment variables are set, then + # Look for the runfiles next to the binary. + if module_space.endswith('.runfiles') and os.path.isdir(module_space): + return ('RUNFILES_DIR', module_space) + + return (None, None) + +def Deduplicate(items): + """Efficiently filter out duplicates, keeping the first element only.""" + seen = set() + for it in items: + if it not in seen: + seen.add(it) + yield it + +def InstrumentedFilePaths(): + """Yields tuples of realpath of each instrumented file with the relative path.""" + manifest_filename = os.environ.get('COVERAGE_MANIFEST') + if not manifest_filename: + return + with open(manifest_filename, "r") as manifest: + for line in manifest: + filename = line.strip() + if not filename: + continue + try: + realpath = os.path.realpath(filename) + except OSError: + print( + "Could not find instrumented file {}".format(filename), + file=sys.stderr) + continue + if realpath != filename: + PrintVerboseCoverage("Fixing up {} -> {}".format(realpath, filename)) + yield (realpath, filename) + +def UnresolveSymlinks(output_filename): + # type: (str) -> None + """Replace realpath of instrumented files with the relative path in the lcov output. + + Though we are asking coveragepy to use relative file names, currently + ignore that for purposes of generating the lcov report (and other reports + which are not the XML report), so we need to go and fix up the report. + + This function is a workaround for that issue. Once that issue is fixed + upstream and the updated version is widely in use, this should be removed. + + See https://github.com/nedbat/coveragepy/issues/963. + """ + substitutions = list(InstrumentedFilePaths()) + if substitutions: + unfixed_file = output_filename + '.tmp' + os.rename(output_filename, unfixed_file) + with open(unfixed_file, "r") as unfixed: + with open(output_filename, "w") as output_file: + for line in unfixed: + if line.startswith('SF:'): + for (realpath, filename) in substitutions: + line = line.replace(realpath, filename) + output_file.write(line) + os.unlink(unfixed_file) + +def ExecuteFile(python_program, main_filename, args, env, module_space, + coverage_entrypoint, workspace, delete_module_space): + # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ... + """Executes the given Python file using the various environment settings. + + This will not return, and acts much like os.execv, except is much + more restricted, and handles Bazel-related edge cases. + + Args: + python_program: (str) Path to the Python binary to use for execution + main_filename: (str) The Python file to execute + args: (list[str]) Additional args to pass to the Python file + env: (dict[str, str]) A dict of environment variables to set for the execution + module_space: (str) Path to the module space/runfiles tree directory + coverage_entrypoint: (str|None) Path to the coverage tool entry point file. + workspace: (str|None) Name of the workspace to execute in. This is expected to be a + directory under the runfiles tree. + delete_module_space: (bool), True if the module space should be deleted + after a successful (exit code zero) program run, False if not. + """ + # We want to use os.execv instead of subprocess.call, which causes + # problems with signal passing (making it difficult to kill + # Bazel). However, these conditions force us to run via + # subprocess.call instead: + # + # - On Windows, os.execv doesn't handle arguments with spaces + # correctly, and it actually starts a subprocess just like + # subprocess.call. + # - When running in a workspace or zip file, we need to clean up the + # workspace after the process finishes so control must return here. + # - If we may need to emit a host config warning after execution, we + # can't execv because we need control to return here. This only + # happens for targets built in the host config. + # - For coverage targets, at least coveragepy requires running in + # two invocations, which also requires control to return here. + # + if not (IsWindows() or workspace or coverage_entrypoint or delete_module_space): + _RunExecv(python_program, main_filename, args, env) + + if coverage_entrypoint is not None: + ret_code = _RunForCoverage(python_program, main_filename, args, env, + coverage_entrypoint, workspace) + else: + ret_code = subprocess.call( + [python_program, main_filename] + args, + env=env, + cwd=workspace + ) + + if delete_module_space: + # NOTE: dirname() is called because CreateModuleSpace() creates a + # sub-directory within a temporary directory, and we want to remove the + # whole temporary directory. + shutil.rmtree(os.path.dirname(module_space), True) + sys.exit(ret_code) + +def _RunExecv(python_program, main_filename, args, env): + # type: (str, str, list[str], dict[str, str]) -> ... + """Executes the given Python file using the various environment settings.""" + os.environ.update(env) + os.execv(python_program, [python_program, main_filename] + args) + +def _RunForCoverage(python_program, main_filename, args, env, + coverage_entrypoint, workspace): + # type: (str, str, list[str], dict[str, str], str, str|None) -> int + """Collects coverage infomration for the given Python file. + + Args: + python_program: (str) Path to the Python binary to use for execution + main_filename: (str) The Python file to execute + args: (list[str]) Additional args to pass to the Python file + env: (dict[str, str]) A dict of environment variables to set for the execution + coverage_entrypoint: (str|None) Path to the coverage entry point to execute with. + workspace: (str|None) Name of the workspace to execute in. This is expected to be a + directory under the runfiles tree, and will recursively delete the + runfiles directory if set. + """ + # We need for coveragepy to use relative paths. This can only be configured + # via an rc file, so we need to make one. + rcfile_name = os.path.join(os.environ['COVERAGE_DIR'], '.coveragerc') + with open(rcfile_name, "w") as rcfile: + rcfile.write('''[run] +relative_files = True +''') + PrintVerboseCoverage('Coverage entrypoint:', coverage_entrypoint) + # First run the target Python file via coveragepy to create a .coverage + # database file, from which we can later export lcov. + ret_code = subprocess.call( + [ + python_program, + coverage_entrypoint, + "run", + "--rcfile=" + rcfile_name, + "--append", + "--branch", + main_filename + ] + args, + env=env, + cwd=workspace + ) + output_filename = os.path.join(os.environ['COVERAGE_DIR'], 'pylcov.dat') + + PrintVerboseCoverage('Converting coveragepy database to lcov:', output_filename) + # Run coveragepy again to convert its .coverage database file into lcov. + ret_code = subprocess.call( + [ + python_program, + coverage_entrypoint, + "lcov", + "--rcfile=" + rcfile_name, + "-o", + output_filename + ], + env=env, + cwd=workspace + ) or ret_code + try: + os.unlink(rcfile_name) + except OSError as err: + # It's possible that the profiled program might execute another Python + # binary through a wrapper that would then delete the rcfile. Not much + # we can do about that, besides ignore the failure here. + PrintVerboseCoverage('Error removing temporary coverage rc file:', err) + if os.path.isfile(output_filename): + UnresolveSymlinks(output_filename) + return ret_code + +def Main(): + args = sys.argv[1:] + + new_env = {} + + # The main Python source file. + # The magic string percent-main-percent is replaced with the runfiles-relative + # filename of the main file of the Python binary in BazelPythonSemantics.java. + main_rel_path = '%main%' + if IsWindows(): + main_rel_path = main_rel_path.replace('/', os.sep) + + if IsRunningFromZip(): + module_space = CreateModuleSpace() + delete_module_space = True + else: + module_space = FindModuleSpace(main_rel_path) + delete_module_space = False + + python_imports = '%imports%' + python_path_entries = CreatePythonPathEntries(python_imports, module_space) + python_path_entries += GetRepositoriesImports(module_space, %import_all%) + # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order, + # keep first occurrence only. + python_path_entries = [ + GetWindowsPathWithUNCPrefix(d) + for d in python_path_entries + ] + + old_python_path = os.environ.get('PYTHONPATH') + if old_python_path: + python_path_entries += old_python_path.split(os.pathsep) + + python_path = os.pathsep.join(Deduplicate(python_path_entries)) + + if IsWindows(): + python_path = python_path.replace('/', os.sep) + + new_env['PYTHONPATH'] = python_path + runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space) + if runfiles_envkey: + new_env[runfiles_envkey] = runfiles_envvalue + + # Don't prepend a potentially unsafe path to sys.path + # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH + new_env['PYTHONSAFEPATH'] = '1' + + main_filename = os.path.join(module_space, main_rel_path) + main_filename = GetWindowsPathWithUNCPrefix(main_filename) + assert os.path.exists(main_filename), \ + 'Cannot exec() %r: file not found.' % main_filename + assert os.access(main_filename, os.R_OK), \ + 'Cannot exec() %r: file not readable.' % main_filename + + program = python_program = FindPythonBinary(module_space) + if python_program is None: + raise AssertionError('Could not find python binary: ' + PYTHON_BINARY) + + # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured + # for something, though it could be another program executing this one or + # one executed by this one (e.g. an extension module). + if os.environ.get('COVERAGE_DIR'): + cov_tool = FindCoverageEntryPoint(module_space) + if cov_tool is None: + PrintVerboseCoverage('Coverage was enabled, but python coverage tool was not configured.') + else: + # Inhibit infinite recursion: + if 'PYTHON_COVERAGE' in os.environ: + del os.environ['PYTHON_COVERAGE'] + + if not os.path.exists(cov_tool): + raise EnvironmentError( + 'Python coverage tool %r not found. ' + 'Try running with VERBOSE_COVERAGE=1 to collect more information.' + % cov_tool + ) + + # coverage library expects sys.path[0] to contain the library, and replaces + # it with the directory of the program it starts. Our actual sys.path[0] is + # the runfiles directory, which must not be replaced. + # CoverageScript.do_execute() undoes this sys.path[0] setting. + # + # Update sys.path such that python finds the coverage package. The coverage + # entry point is coverage.coverage_main, so we need to do twice the dirname. + python_path_entries = new_env['PYTHONPATH'].split(os.pathsep) + python_path_entries.append(os.path.dirname(os.path.dirname(cov_tool))) + new_env['PYTHONPATH'] = os.pathsep.join(Deduplicate(python_path_entries)) + else: + cov_tool = None + + new_env.update((key, val) for key, val in os.environ.items() if key not in new_env) + + workspace = None + if IsRunningFromZip(): + # If RUN_UNDER_RUNFILES equals 1, it means we need to + # change directory to the right runfiles directory. + # (So that the data files are accessible) + if os.environ.get('RUN_UNDER_RUNFILES') == '1': + workspace = os.path.join(module_space, '%workspace_name%') + + try: + sys.stdout.flush() + # NOTE: ExecuteFile may call execve() and lines after this will never run. + ExecuteFile( + python_program, main_filename, args, new_env, module_space, + cov_tool, workspace, + delete_module_space = delete_module_space, + ) + + except EnvironmentError: + # This works from Python 2.4 all the way to 3.x. + e = sys.exc_info()[1] + # This exception occurs when os.execv() fails for some reason. + if not getattr(e, 'filename', None): + e.filename = program # Add info to error message + raise + +if __name__ == '__main__': + Main() |