diff options
Diffstat (limited to 'llvm_tools/fetch_cq_size_diff.py')
-rwxr-xr-x | llvm_tools/fetch_cq_size_diff.py | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/llvm_tools/fetch_cq_size_diff.py b/llvm_tools/fetch_cq_size_diff.py new file mode 100755 index 00000000..a20f7396 --- /dev/null +++ b/llvm_tools/fetch_cq_size_diff.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python3 +# Copyright 2024 The ChromiumOS Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Fetches the size diff between two images on gs://. + +If given a CL, this will autodetect a passing CQ builder on that CL and find +a corresponding release build for said CQ builder. The sizes of these images +will be compared. + +**Please note** that there's often version skew between release builds and CQ +builds. While this skew shouldn't result in _huge_ binary size differences, +it can still account for a few MB of diff in an average case. +""" + +import abc +import argparse +import dataclasses +import json +import logging +import os +from pathlib import Path +import subprocess +import sys +import tempfile +from typing import List, Optional, Tuple + +import cros_cls + + +@dataclasses.dataclass(frozen=True) +class SizeDiffInfo: + """Holds information about a size difference.""" + + baseline_size_bytes: int + new_size_bytes: int + + +class ComparableArtifact(abc.ABC): + """Artifacts from CQ runs that can be compared.""" + + @property + @abc.abstractmethod + def artifact_name(self) -> str: + """Returns the name of the artifact in gs:// e.g., "image.zip".""" + + @abc.abstractmethod + def _measure_artifact_size(self, file: Path) -> int: + """Given a path to the artifact, extract the relevant size info. + + The directory that `file` is in may be mutated by this function. No + guarantees are made about the state of said directory after execution + finishes, except that `file` should remain unmodified. + """ + + def _download_and_measure_size(self, gs_url: str) -> int: + with tempfile.TemporaryDirectory( + prefix="fetch_size_diff_" + ) as tempdir_str: + into = Path(tempdir_str) + local_file = into / os.path.basename(gs_url) + subprocess.run( + ["gsutil", "cp", gs_url, local_file], + check=True, + stdin=subprocess.DEVNULL, + ) + return self._measure_artifact_size(local_file) + + def compare_size_from_gs(self, baseline: str, new: str) -> SizeDiffInfo: + return SizeDiffInfo( + baseline_size_bytes=self._download_and_measure_size(baseline), + new_size_bytes=self._download_and_measure_size(new), + ) + + +class DebugInfoArtifact(ComparableArtifact): + """ComparableArtifact instance for debuginfo.""" + + @property + def artifact_name(self) -> str: + return "debug.tgz" + + def _measure_artifact_size(self, file: Path) -> int: + chrome_debug = "./opt/google/chrome/chrome.debug" + logging.info("Unpacking debuginfo...") + subprocess.run( + ["tar", "xaf", file, chrome_debug], + check=True, + cwd=file.parent, + stdin=subprocess.DEVNULL, + ) + return os.path.getsize(file.parent / chrome_debug) + + +class ImageSizeArtifact(ComparableArtifact): + """ComparableArtifact instance for image files.""" + + @property + def artifact_name(self) -> str: + return "image.zip" + + def _measure_artifact_size(self, file: Path) -> int: + binpkg_sizes_name = "chromiumos_base_image.bin-package-sizes.json" + subprocess.run( + [ + "unzip", + file.name, + binpkg_sizes_name, + ], + check=True, + cwd=file.parent, + stdin=subprocess.DEVNULL, + ) + with (file.parent / binpkg_sizes_name).open(encoding="utf-8") as f: + loaded = json.load(f) + try: + size = loaded["total_size"] + except KeyError: + raise ValueError(f"Missing total_size in {loaded.keys()}") + + if not isinstance(size, int): + raise ValueError( + f"total_size was unexpectedly {type(size)}: {size}" + ) + return size + + +def is_probably_non_production_builder(builder_name: str) -> bool: + """Quickly determine if a builder doesn't represent a board in production. + + Note that this is a heuristic; results should be taken as mostly accurate. + """ + return any( + x in builder_name + for x in ( + "-asan-", + "-buildtest-", + "-fuzzer-", + "-kernelnext-", + "-ubsan-", + "-vmtest-", + ) + ) + + +def guess_release_artifact_path(artifact_link: str) -> Optional[str]: + """Guesses a close-enough release path for a CQ artifact. + + Returns: + A path to the release artifact. Returns None if the given image_zip + wasn't generated by a CQ builder. + + >>> guess_release_artifact_path("gs://chromeos-image-archive/brya-cq/" + "R121-15677.0.0-90523-8764532770258575633/image.zip") + "gs://chromeos-image-archive/brya-release/R121-15677.0.0/image.zip" + """ + artifacts_link = os.path.dirname(artifact_link) + release_version = cros_cls.parse_release_from_builder_artifacts_link( + artifacts_link + ) + # Scrape the board name from a level above the artifacts directory. + builder = os.path.basename(os.path.dirname(artifacts_link)) + if not builder.endswith("-cq"): + return None + board = builder[:-3] + return ( + f"gs://chromeos-image-archive/{board}-release/{release_version}/" + f"{os.path.basename(artifact_link)}" + ) + + +def try_gsutil_ls(paths: List[str]) -> List[str]: + """Returns all of the paths `gsutil` matches from `paths`. + + Ignores errors from gsutil about paths not existing. + """ + result = subprocess.run( + ["gsutil", "-m", "ls"] + paths, + # If any URI doesn't exist, gsutil will fail. Ignore the failure. + check=False, + encoding="utf-8", + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.returncode: + # Ensure the error message is what's expected, rather than e.g., + # invalid credentials. + err_msg = "CommandException: One or more URLs matched no objects" + if err_msg not in result.stderr: + logging.error( + "gsutil had unexpected output; stderr: %r", result.stderr + ) + result.check_returncode() + return [x.strip() for x in result.stdout.splitlines()] + + +def find_size_diffable_cq_artifacts( + cq_build_ids: List[cros_cls.BuildID], + artifact_name: str, +) -> Optional[Tuple[str, str]]: + """Searches the cq-orchestrator builds for candidates for size comparison. + + Returns: + None if no candidates are found. Otherwise, returns a two-tuple: index + 0 is the baseline (release) artifact, index 1 is the corresponding + artifact generated by the CQ. + """ + for cq_build_id in cq_build_ids: + logging.info("Inspecting CQ build %d...", cq_build_id) + orch_output = cros_cls.CQOrchestratorOutput.fetch(cq_build_id) + child_builder_values = cros_cls.CQBoardBuilderOutput.fetch_many( + [ + val + for name, val in orch_output.child_builders.items() + if not is_probably_non_production_builder(name) + ] + ) + artifacts_links = [ + x.artifacts_link + for x in child_builder_values + if x.artifacts_link is not None + ] + if not artifacts_links: + logging.info("No children of CQ run %d had artifacts", cq_build_id) + continue + + potential_artifacts = try_gsutil_ls( + [os.path.join(x, artifact_name) for x in artifacts_links] + ) + if not potential_artifacts: + logging.info( + "No children of CQ run %d produced a(n) %s", + cq_build_id, + artifact_name, + ) + continue + + logging.debug( + "Found candidate %s files: %s", artifact_name, potential_artifacts + ) + guessed_paths = [ + (x, guess_release_artifact_path(x)) for x in potential_artifacts + ] + logging.debug("Guessed corresponding artifact files: %s", guessed_paths) + release_artifacts = try_gsutil_ls([x for _, x in guessed_paths if x]) + if not release_artifacts: + logging.info( + "No release %s artifacts could be found for CQ builder %d.", + artifact_name, + cq_build_id, + ) + continue + + # `try_gsutil_ls` makes no ordering guarantees; always pick the min() + # artifact here for consistency across reruns. + selected_release_artifact = min(release_artifacts) + logging.info("Selected release artifact: %s", selected_release_artifact) + cq_artifact = next( + cq_path + for cq_path, guessed_path in guessed_paths + if guessed_path == selected_release_artifact + ) + return selected_release_artifact, cq_artifact + return None + + +def inspect_gs_impl( + baseline_gs_url: str, new_gs_url: str, artifact: ComparableArtifact +) -> None: + """Compares the `image.zip`s at the given URLs, logging the results.""" + size_diff = artifact.compare_size_from_gs(baseline_gs_url, new_gs_url) + # `%d` doesn't support `,` as a modifier, and commas make these numbers + # much easier to read. Prefer to keep strings interpreted as format strings + # constant. + logging.info("Baseline size: %s", f"{size_diff.baseline_size_bytes:,}") + logging.info("New size: %s", f"{size_diff.new_size_bytes:,}") + + diff_pct = abs(size_diff.new_size_bytes / size_diff.baseline_size_bytes) - 1 + logging.info("Diff: %.2f%%", diff_pct * 100) + + +def inspect_cl(opts: argparse.Namespace, artifact: ComparableArtifact) -> None: + """Implements the `cl` subcommand of this script.""" + cq_build_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl) + if not cq_build_ids: + sys.exit(f"No completed cq-orchestrators found for {opts.cl}") + + # Reverse cq_build_ids so we try the newest first. + diffable_artifacts = find_size_diffable_cq_artifacts( + cq_build_ids, artifact.artifact_name + ) + if not diffable_artifacts: + sys.exit("No diffable artifacts were found") + + baseline, new = diffable_artifacts + logging.info("Comparing %s (baseline) to %s (new)", baseline, new) + inspect_gs_impl(baseline, new, artifact) + logging.warning( + "Friendly reminder: CL inspection diffs between your CL and a " + "corresponding release build. Size differences up to a few megabytes " + "are expected and do not necessarily indicate a size difference " + "attributable to your CL." + ) + + +def inspect_gs(opts: argparse.Namespace, artifact: ComparableArtifact) -> None: + """Implements the `gs` subcommand of this script.""" + inspect_gs_impl(opts.baseline, opts.new, artifact) + + +def main(argv: List[str]) -> None: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + what_to_compare = parser.add_mutually_exclusive_group(required=True) + what_to_compare.add_argument( + "--image", action="store_true", help="Compare image.zip sizes." + ) + what_to_compare.add_argument( + "--debuginfo", action="store_true", help="Compare debuginfo sizes." + ) + + parser.add_argument( + "--debug", action="store_true", help="Enable debug logging" + ) + subparsers = parser.add_subparsers(required=True) + + cl_parser = subparsers.add_parser( + "cl", help="Inspect a CL's CQ runs to find artifacts to compare." + ) + cl_parser.set_defaults(func=inspect_cl) + cl_parser.add_argument( + "cl", + type=cros_cls.ChangeListURL.parse_with_patch_set, + help="CL to inspect CQ runs of. This must contain a patchset number.", + ) + + gs_parser = subparsers.add_parser( + "gs", help="Directly compare two zip files from gs://." + ) + gs_parser.add_argument("baseline", help="Baseline file to compare.") + gs_parser.add_argument("new", help="New file to compare.") + gs_parser.set_defaults(func=inspect_gs) + opts = parser.parse_args(argv) + + logging.basicConfig( + format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: " + "%(message)s", + level=logging.DEBUG if opts.debug else logging.INFO, + ) + + assert getattr(opts, "func", None), "Unknown subcommand?" + if opts.image: + artifact: ComparableArtifact = ImageSizeArtifact() + else: + assert opts.debuginfo + artifact = DebugInfoArtifact() + + opts.func(opts, artifact) + + +if __name__ == "__main__": + main(sys.argv[1:]) |