aboutsummaryrefslogtreecommitdiff
path: root/llvm_tools/fetch_cq_size_diff.py
diff options
context:
space:
mode:
Diffstat (limited to 'llvm_tools/fetch_cq_size_diff.py')
-rwxr-xr-xllvm_tools/fetch_cq_size_diff.py366
1 files changed, 366 insertions, 0 deletions
diff --git a/llvm_tools/fetch_cq_size_diff.py b/llvm_tools/fetch_cq_size_diff.py
new file mode 100755
index 00000000..a20f7396
--- /dev/null
+++ b/llvm_tools/fetch_cq_size_diff.py
@@ -0,0 +1,366 @@
+#!/usr/bin/env python3
+# Copyright 2024 The ChromiumOS Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Fetches the size diff between two images on gs://.
+
+If given a CL, this will autodetect a passing CQ builder on that CL and find
+a corresponding release build for said CQ builder. The sizes of these images
+will be compared.
+
+**Please note** that there's often version skew between release builds and CQ
+builds. While this skew shouldn't result in _huge_ binary size differences,
+it can still account for a few MB of diff in an average case.
+"""
+
+import abc
+import argparse
+import dataclasses
+import json
+import logging
+import os
+from pathlib import Path
+import subprocess
+import sys
+import tempfile
+from typing import List, Optional, Tuple
+
+import cros_cls
+
+
+@dataclasses.dataclass(frozen=True)
+class SizeDiffInfo:
+ """Holds information about a size difference."""
+
+ baseline_size_bytes: int
+ new_size_bytes: int
+
+
+class ComparableArtifact(abc.ABC):
+ """Artifacts from CQ runs that can be compared."""
+
+ @property
+ @abc.abstractmethod
+ def artifact_name(self) -> str:
+ """Returns the name of the artifact in gs:// e.g., "image.zip"."""
+
+ @abc.abstractmethod
+ def _measure_artifact_size(self, file: Path) -> int:
+ """Given a path to the artifact, extract the relevant size info.
+
+ The directory that `file` is in may be mutated by this function. No
+ guarantees are made about the state of said directory after execution
+ finishes, except that `file` should remain unmodified.
+ """
+
+ def _download_and_measure_size(self, gs_url: str) -> int:
+ with tempfile.TemporaryDirectory(
+ prefix="fetch_size_diff_"
+ ) as tempdir_str:
+ into = Path(tempdir_str)
+ local_file = into / os.path.basename(gs_url)
+ subprocess.run(
+ ["gsutil", "cp", gs_url, local_file],
+ check=True,
+ stdin=subprocess.DEVNULL,
+ )
+ return self._measure_artifact_size(local_file)
+
+ def compare_size_from_gs(self, baseline: str, new: str) -> SizeDiffInfo:
+ return SizeDiffInfo(
+ baseline_size_bytes=self._download_and_measure_size(baseline),
+ new_size_bytes=self._download_and_measure_size(new),
+ )
+
+
+class DebugInfoArtifact(ComparableArtifact):
+ """ComparableArtifact instance for debuginfo."""
+
+ @property
+ def artifact_name(self) -> str:
+ return "debug.tgz"
+
+ def _measure_artifact_size(self, file: Path) -> int:
+ chrome_debug = "./opt/google/chrome/chrome.debug"
+ logging.info("Unpacking debuginfo...")
+ subprocess.run(
+ ["tar", "xaf", file, chrome_debug],
+ check=True,
+ cwd=file.parent,
+ stdin=subprocess.DEVNULL,
+ )
+ return os.path.getsize(file.parent / chrome_debug)
+
+
+class ImageSizeArtifact(ComparableArtifact):
+ """ComparableArtifact instance for image files."""
+
+ @property
+ def artifact_name(self) -> str:
+ return "image.zip"
+
+ def _measure_artifact_size(self, file: Path) -> int:
+ binpkg_sizes_name = "chromiumos_base_image.bin-package-sizes.json"
+ subprocess.run(
+ [
+ "unzip",
+ file.name,
+ binpkg_sizes_name,
+ ],
+ check=True,
+ cwd=file.parent,
+ stdin=subprocess.DEVNULL,
+ )
+ with (file.parent / binpkg_sizes_name).open(encoding="utf-8") as f:
+ loaded = json.load(f)
+ try:
+ size = loaded["total_size"]
+ except KeyError:
+ raise ValueError(f"Missing total_size in {loaded.keys()}")
+
+ if not isinstance(size, int):
+ raise ValueError(
+ f"total_size was unexpectedly {type(size)}: {size}"
+ )
+ return size
+
+
+def is_probably_non_production_builder(builder_name: str) -> bool:
+ """Quickly determine if a builder doesn't represent a board in production.
+
+ Note that this is a heuristic; results should be taken as mostly accurate.
+ """
+ return any(
+ x in builder_name
+ for x in (
+ "-asan-",
+ "-buildtest-",
+ "-fuzzer-",
+ "-kernelnext-",
+ "-ubsan-",
+ "-vmtest-",
+ )
+ )
+
+
+def guess_release_artifact_path(artifact_link: str) -> Optional[str]:
+ """Guesses a close-enough release path for a CQ artifact.
+
+ Returns:
+ A path to the release artifact. Returns None if the given image_zip
+ wasn't generated by a CQ builder.
+
+ >>> guess_release_artifact_path("gs://chromeos-image-archive/brya-cq/"
+ "R121-15677.0.0-90523-8764532770258575633/image.zip")
+ "gs://chromeos-image-archive/brya-release/R121-15677.0.0/image.zip"
+ """
+ artifacts_link = os.path.dirname(artifact_link)
+ release_version = cros_cls.parse_release_from_builder_artifacts_link(
+ artifacts_link
+ )
+ # Scrape the board name from a level above the artifacts directory.
+ builder = os.path.basename(os.path.dirname(artifacts_link))
+ if not builder.endswith("-cq"):
+ return None
+ board = builder[:-3]
+ return (
+ f"gs://chromeos-image-archive/{board}-release/{release_version}/"
+ f"{os.path.basename(artifact_link)}"
+ )
+
+
+def try_gsutil_ls(paths: List[str]) -> List[str]:
+ """Returns all of the paths `gsutil` matches from `paths`.
+
+ Ignores errors from gsutil about paths not existing.
+ """
+ result = subprocess.run(
+ ["gsutil", "-m", "ls"] + paths,
+ # If any URI doesn't exist, gsutil will fail. Ignore the failure.
+ check=False,
+ encoding="utf-8",
+ stdin=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ if result.returncode:
+ # Ensure the error message is what's expected, rather than e.g.,
+ # invalid credentials.
+ err_msg = "CommandException: One or more URLs matched no objects"
+ if err_msg not in result.stderr:
+ logging.error(
+ "gsutil had unexpected output; stderr: %r", result.stderr
+ )
+ result.check_returncode()
+ return [x.strip() for x in result.stdout.splitlines()]
+
+
+def find_size_diffable_cq_artifacts(
+ cq_build_ids: List[cros_cls.BuildID],
+ artifact_name: str,
+) -> Optional[Tuple[str, str]]:
+ """Searches the cq-orchestrator builds for candidates for size comparison.
+
+ Returns:
+ None if no candidates are found. Otherwise, returns a two-tuple: index
+ 0 is the baseline (release) artifact, index 1 is the corresponding
+ artifact generated by the CQ.
+ """
+ for cq_build_id in cq_build_ids:
+ logging.info("Inspecting CQ build %d...", cq_build_id)
+ orch_output = cros_cls.CQOrchestratorOutput.fetch(cq_build_id)
+ child_builder_values = cros_cls.CQBoardBuilderOutput.fetch_many(
+ [
+ val
+ for name, val in orch_output.child_builders.items()
+ if not is_probably_non_production_builder(name)
+ ]
+ )
+ artifacts_links = [
+ x.artifacts_link
+ for x in child_builder_values
+ if x.artifacts_link is not None
+ ]
+ if not artifacts_links:
+ logging.info("No children of CQ run %d had artifacts", cq_build_id)
+ continue
+
+ potential_artifacts = try_gsutil_ls(
+ [os.path.join(x, artifact_name) for x in artifacts_links]
+ )
+ if not potential_artifacts:
+ logging.info(
+ "No children of CQ run %d produced a(n) %s",
+ cq_build_id,
+ artifact_name,
+ )
+ continue
+
+ logging.debug(
+ "Found candidate %s files: %s", artifact_name, potential_artifacts
+ )
+ guessed_paths = [
+ (x, guess_release_artifact_path(x)) for x in potential_artifacts
+ ]
+ logging.debug("Guessed corresponding artifact files: %s", guessed_paths)
+ release_artifacts = try_gsutil_ls([x for _, x in guessed_paths if x])
+ if not release_artifacts:
+ logging.info(
+ "No release %s artifacts could be found for CQ builder %d.",
+ artifact_name,
+ cq_build_id,
+ )
+ continue
+
+ # `try_gsutil_ls` makes no ordering guarantees; always pick the min()
+ # artifact here for consistency across reruns.
+ selected_release_artifact = min(release_artifacts)
+ logging.info("Selected release artifact: %s", selected_release_artifact)
+ cq_artifact = next(
+ cq_path
+ for cq_path, guessed_path in guessed_paths
+ if guessed_path == selected_release_artifact
+ )
+ return selected_release_artifact, cq_artifact
+ return None
+
+
+def inspect_gs_impl(
+ baseline_gs_url: str, new_gs_url: str, artifact: ComparableArtifact
+) -> None:
+ """Compares the `image.zip`s at the given URLs, logging the results."""
+ size_diff = artifact.compare_size_from_gs(baseline_gs_url, new_gs_url)
+ # `%d` doesn't support `,` as a modifier, and commas make these numbers
+ # much easier to read. Prefer to keep strings interpreted as format strings
+ # constant.
+ logging.info("Baseline size: %s", f"{size_diff.baseline_size_bytes:,}")
+ logging.info("New size: %s", f"{size_diff.new_size_bytes:,}")
+
+ diff_pct = abs(size_diff.new_size_bytes / size_diff.baseline_size_bytes) - 1
+ logging.info("Diff: %.2f%%", diff_pct * 100)
+
+
+def inspect_cl(opts: argparse.Namespace, artifact: ComparableArtifact) -> None:
+ """Implements the `cl` subcommand of this script."""
+ cq_build_ids = cros_cls.fetch_cq_orchestrator_ids(opts.cl)
+ if not cq_build_ids:
+ sys.exit(f"No completed cq-orchestrators found for {opts.cl}")
+
+ # Reverse cq_build_ids so we try the newest first.
+ diffable_artifacts = find_size_diffable_cq_artifacts(
+ cq_build_ids, artifact.artifact_name
+ )
+ if not diffable_artifacts:
+ sys.exit("No diffable artifacts were found")
+
+ baseline, new = diffable_artifacts
+ logging.info("Comparing %s (baseline) to %s (new)", baseline, new)
+ inspect_gs_impl(baseline, new, artifact)
+ logging.warning(
+ "Friendly reminder: CL inspection diffs between your CL and a "
+ "corresponding release build. Size differences up to a few megabytes "
+ "are expected and do not necessarily indicate a size difference "
+ "attributable to your CL."
+ )
+
+
+def inspect_gs(opts: argparse.Namespace, artifact: ComparableArtifact) -> None:
+ """Implements the `gs` subcommand of this script."""
+ inspect_gs_impl(opts.baseline, opts.new, artifact)
+
+
+def main(argv: List[str]) -> None:
+ parser = argparse.ArgumentParser(
+ description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ what_to_compare = parser.add_mutually_exclusive_group(required=True)
+ what_to_compare.add_argument(
+ "--image", action="store_true", help="Compare image.zip sizes."
+ )
+ what_to_compare.add_argument(
+ "--debuginfo", action="store_true", help="Compare debuginfo sizes."
+ )
+
+ parser.add_argument(
+ "--debug", action="store_true", help="Enable debug logging"
+ )
+ subparsers = parser.add_subparsers(required=True)
+
+ cl_parser = subparsers.add_parser(
+ "cl", help="Inspect a CL's CQ runs to find artifacts to compare."
+ )
+ cl_parser.set_defaults(func=inspect_cl)
+ cl_parser.add_argument(
+ "cl",
+ type=cros_cls.ChangeListURL.parse_with_patch_set,
+ help="CL to inspect CQ runs of. This must contain a patchset number.",
+ )
+
+ gs_parser = subparsers.add_parser(
+ "gs", help="Directly compare two zip files from gs://."
+ )
+ gs_parser.add_argument("baseline", help="Baseline file to compare.")
+ gs_parser.add_argument("new", help="New file to compare.")
+ gs_parser.set_defaults(func=inspect_gs)
+ opts = parser.parse_args(argv)
+
+ logging.basicConfig(
+ format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: "
+ "%(message)s",
+ level=logging.DEBUG if opts.debug else logging.INFO,
+ )
+
+ assert getattr(opts, "func", None), "Unknown subcommand?"
+ if opts.image:
+ artifact: ComparableArtifact = ImageSizeArtifact()
+ else:
+ assert opts.debuginfo
+ artifact = DebugInfoArtifact()
+
+ opts.func(opts, artifact)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])