diff options
author | George Burgess IV <gbiv@google.com> | 2024-01-09 11:21:09 -0700 |
---|---|---|
committer | Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2024-01-10 05:03:09 +0000 |
commit | d6925f77858ce5d1c4eead39c8d083c24e10d676 (patch) | |
tree | a593ff1562a322220cd0a837e0137b6cbe9c77a4 | |
parent | 567a3a8d6d8030cd7967d846b4aa492f5aa65daf (diff) | |
download | toolchain-utils-d6925f77858ce5d1c4eead39c8d083c24e10d676.tar.gz |
llvm_tools: add a -Werror log parsing tool
This is the first "half" of points 3 and 4 in the description of
b/316172255. This is useful on its own, since if a Mage runs a local
`FORCE_DISABLE_WERROR` build, they can point this tool at their build
tree and get a full report of all of the suppressed warnings.
BUG=b:316172255
TEST=Unittests; ran on a few `FORCE_DISABLE_WERROR` trees
Change-Id: I95bfc5de70f8614bf0ae7570fd5ab31766ce405d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/5180185
Commit-Queue: George Burgess <gbiv@chromium.org>
Tested-by: George Burgess <gbiv@chromium.org>
Reviewed-by: Ryan Beltran <ryanbeltran@chromium.org>
-rw-r--r-- | llvm_tools/README.md | 13 | ||||
-rwxr-xr-x | llvm_tools/werror_logs.py | 289 | ||||
-rwxr-xr-x | llvm_tools/werror_logs_test.py | 272 |
3 files changed, 574 insertions, 0 deletions
diff --git a/llvm_tools/README.md b/llvm_tools/README.md index e2ef34f1..8bd05bd9 100644 --- a/llvm_tools/README.md +++ b/llvm_tools/README.md @@ -552,3 +552,16 @@ Note that it's recommended to 'seed' the state file with a most recent upload date. This can be done by running this tool *once* with a `--last_date` flag. This flag has the script override whatever's in the state file (if anything) and start submitting all crashes uploaded starting at the given day. + +### `werror_logs.py` + +This tool exists to help devs reason about `-Werror` instances that _would_ +break builds, were the `FORCE_DISABLE_WERROR` support in the compiler wrapper +not enabled. + +Usage example: + +``` +$ ./werror_logs.py aggregate \ + --directory=${repo}/out/sdk/tmp/portage/dev-cpp/gtest-1.13.0-r12/cros-artifacts +``` diff --git a/llvm_tools/werror_logs.py b/llvm_tools/werror_logs.py new file mode 100755 index 00000000..6509be95 --- /dev/null +++ b/llvm_tools/werror_logs.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +# Copyright 2024 The ChromiumOS Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Helps reason about -Werror logs emitted by the compiler wrapper. + +Specifically, this works with the -Werror reports produced by the compiler +wrapper in FORCE_DISABLE_WERROR mode. It's intended to be run on trees of these +reports, so devs can run roughly the following commands: + +$ apply_force_disable_werror # (There's no actual script to do this today.) +$ build_packages --board=foo --nousepkg +$ ./werror_logs.py aggregate --directory=/build/foo/var/lib/chromeos + +And see a full aggregation of all warnings that were suppressed in that +`build_packages` invocation. +""" + +import argparse +import collections +import dataclasses +import json +import logging +from pathlib import Path +import re +import sys +from typing import Any, Counter, DefaultDict, Dict, IO, Iterable, List, Optional + + +@dataclasses.dataclass(frozen=True, eq=True, order=True) +class ClangWarningLocation: + """Represents a location at which a Clang warning was emitted.""" + + file: str + line: int + column: int + + @classmethod + def parse(cls, location: str) -> "ClangWarningLocation": + split = location.rsplit(":", 2) + if len(split) == 3: + return cls(file=split[0], line=int(split[1]), column=int(split[2])) + raise ValueError(f"Invalid location: {location!r}") + + +@dataclasses.dataclass(frozen=True, eq=True) +class ClangWarning: + """Represents a Clang warning at a specific location (if applicable).""" + + # The name of the warning, e.g., -Wunused-variable + name: str + # The message of the warning, e.g., "'allocate' is deprecated." + message: str + # The location of this warning. Not present for frontend diagnostics. + location: Optional[ClangWarningLocation] + + # This parses two kinds of errors: + # 1. `clang-17: error: foo [-Werror,...]` + # 2. `/file/path:123:45: error: foo [-Werror,...]" + _WARNING_RE = re.compile( + # Capture the location on its own, since `clang-\d+` is unused below. + r"^(?:([^:]*:\d+:\d+)|clang-\d+)" + r": error: " + # Capture the message + r"(.*?)\s+" + r"\[(-W[^\][]+)]\s*$" + ) + + @classmethod + def try_parse_line(cls, line: str) -> Optional["ClangWarning"]: + # Fast path: we can expect "error: " in interesting lines. Break early + # if that's not present. + if "error: " not in line: + return None + + m = cls._WARNING_RE.fullmatch(line) + if not m: + return None + + location, message, warning_flags = m.groups() + individual_warning_flags = warning_flags.split(",") + try: + werror_index = individual_warning_flags.index("-Werror") + except ValueError: + # Somehow this warning is fatal, but not related to -Werror. Since + # we're only interested in -Werror warnings, drop it. + logging.warning( + "Fatal warning that has nothing to do with -Werror? %r", line + ) + return None + + del individual_warning_flags[werror_index] + + # This isn't impossible to handle, just unexpected. Complain about it. + if len(individual_warning_flags) != 1: + raise ValueError( + f"Weird: parsed warnings {individual_warning_flags} out " + f"of {line}" + ) + + if location is None: + parsed_location = None + else: + parsed_location = ClangWarningLocation.parse(location) + return cls( + name=individual_warning_flags[0], + message=message, + location=parsed_location, + ) + + +@dataclasses.dataclass(frozen=True, eq=True) +class WarningInfo: + """Carries information about a ClangWarning.""" + + packages: DefaultDict[str, int] = dataclasses.field( + default_factory=lambda: collections.defaultdict(int) + ) + + +class UnknownPackageNameError(ValueError): + """Raised when a package name can't be determined from a warning report.""" + + +@dataclasses.dataclass +class AggregatedWarnings: + """Aggregates warning reports incrementally.""" + + num_reports: int = 0 + # Mapping of warning -> list of packages that emitted it. Warnings in + # headers may be referred to by multiple packages. + warnings: DefaultDict[ClangWarning, WarningInfo] = dataclasses.field( + default_factory=lambda: collections.defaultdict(WarningInfo) + ) + + _CWD_PACKAGE_RE = re.compile( + r"^(?:/build/[^/]+)?/var/(?:cache|tmp)/portage/([^/]+/[^/]+)/" + ) + + @classmethod + def _guess_package_name(cls, report: Dict[str, Any]) -> str: + """Tries to guess what package `report` is from. + + Raises: + UnknownPackageNameError if the package's name couldn't be + determined. + """ + m = cls._CWD_PACKAGE_RE.match(report.get("cwd", "")) + if not m: + raise UnknownPackageNameError() + return m.group(1) + + def add_report_json(self, report_json: Dict[str, Any]) -> int: + """Adds the given report, returning the number of warnings parsed. + + Raises: + UnknownPackageNameError if the package's name couldn't be + determined. + """ + self.num_reports += 1 + package_name = self._guess_package_name(report_json) + + num_warnings = 0 + for line in report_json.get("stdout", "").splitlines(): + if parsed := ClangWarning.try_parse_line(line): + self.warnings[parsed].packages[package_name] += 1 + num_warnings += 1 + + return num_warnings + + def add_report(self, report_file: Path) -> None: + with report_file.open(encoding="utf-8") as f: + report = json.load(f) + + try: + n = self.add_report_json(report) + except UnknownPackageNameError: + logging.warning( + "Failed guessing package name for report at %r; ignoring file", + report_file, + ) + return + + if not n: + logging.warning( + "Report at %r had no parseable warnings", report_file + ) + + +def print_aligned_counts( + name_count_map: Dict[str, int], file: Optional[IO[str]] = None +) -> None: + assert name_count_map + # Sort on value, highest first. Name breaks ties. + summary = sorted(name_count_map.items(), key=lambda x: (-x[1], x[0])) + num_col_width = len(f"{summary[0][1]:,}") + name_col_width = max(len(x) for x in name_count_map) + for name, count in summary: + fmt_name = name.rjust(name_col_width) + fmt_count = f"{count:,}".rjust(num_col_width) + print(f"\t{fmt_name}: {fmt_count}", file=file) + + +def summarize_per_package_warnings( + warning_infos: Iterable[WarningInfo], + file: Optional[IO[str]] = None, +) -> None: + warnings_per_package: DefaultDict[str, int] = collections.defaultdict(int) + for info in warning_infos: + for package_name, warning_count in info.packages.items(): + warnings_per_package[package_name] += warning_count + + if not warnings_per_package: + return + + print("## Per-package warning counts:", file=file) + print_aligned_counts(warnings_per_package, file=file) + + +def summarize_warnings_by_flag( + warnings: Dict[ClangWarning, WarningInfo], + file: Optional[IO[str]] = None, +) -> None: + if not warnings: + return + + warnings_per_flag: Counter[str] = collections.Counter() + for warning, info in warnings.items(): + warnings_per_flag[warning.name] += sum(info.packages.values()) + + print("## Instances of each fatal warning:", file=file) + print_aligned_counts(warnings_per_flag, file=file) + + +def aggregate_reports(opts: argparse.Namespace) -> None: + directory = opts.directory + aggregated = AggregatedWarnings() + for report in directory.glob("**/warnings_report*.json"): + logging.debug("Discovered report %s", report) + aggregated.add_report(report) + + if not aggregated.num_reports: + raise ValueError(f"Found no warnings report under {directory}") + + logging.info("Discovered %d report files in total", aggregated.num_reports) + summarize_per_package_warnings(aggregated.warnings.values()) + summarize_warnings_by_flag(aggregated.warnings) + + +def main(argv: List[str]) -> None: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--debug", action="store_true", help="Enable debug logging" + ) + subparsers = parser.add_subparsers(required=True) + # b/318833638: While there's only one subparser here for the moment, more + # are expected to come (specifically, one to download logs from a CQ run). + aggregate = subparsers.add_parser( + "aggregate", + help=""" + Aggregate all -Werror reports beneath a directory. Note that this will + traverse all children of the directory, so can be used either on + unpacked -Werror reports from CQ builders, or can be used on e.g., + /build/cherry/var/lib/chromeos. + """, + ) + aggregate.set_defaults(func=aggregate_reports) + aggregate.add_argument( + "--directory", type=Path, required=True, help="Directory to inspect." + ) + + opts = parser.parse_args(argv) + + logging.basicConfig( + format=">> %(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: " + "%(message)s", + level=logging.DEBUG if opts.debug else logging.INFO, + ) + + assert getattr(opts, "func", None), "Unknown subcommand?" + opts.func(opts) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/llvm_tools/werror_logs_test.py b/llvm_tools/werror_logs_test.py new file mode 100755 index 00000000..acd296e6 --- /dev/null +++ b/llvm_tools/werror_logs_test.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +# Copyright 2024 The ChromiumOS Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Tests for werror_logs.py.""" + +import io +import logging +import textwrap +from typing import Dict +import unittest + +import werror_logs + + +class SilenceLogs: + """Used by Test.silence_logs to ignore all logging output.""" + + def filter(self, _record): + return False + + +def create_warning_info(packages: Dict[str, int]) -> werror_logs.WarningInfo: + """Constructs a WarningInfo conveniently in one line. + + Mostly useful because `WarningInfo` has a defaultdict field, and those + don't `assertEqual` to regular dict fields. + """ + x = werror_logs.WarningInfo() + x.packages.update(packages) + return x + + +class Test(unittest.TestCase): + """Tests for werror_logs.""" + + def silence_logs(self): + f = SilenceLogs() + log = logging.getLogger() + log.addFilter(f) + self.addCleanup(log.removeFilter, f) + + def test_clang_warning_parsing_parses_flag_errors(self): + self.assertEqual( + werror_logs.ClangWarning.try_parse_line( + "clang-17: error: optimization flag -foo is not supported " + "[-Werror,-Wfoo]" + ), + werror_logs.ClangWarning( + name="-Wfoo", + message="optimization flag -foo is not supported", + location=None, + ), + ) + + def test_clang_warning_parsing_doesnt_care_about_werror_order(self): + self.assertEqual( + werror_logs.ClangWarning.try_parse_line( + "clang-17: error: optimization flag -foo is not supported " + "[-Wfoo,-Werror]" + ), + werror_logs.ClangWarning( + name="-Wfoo", + message="optimization flag -foo is not supported", + location=None, + ), + ) + + def test_clang_warning_parsing_parses_code_errors(self): + self.assertEqual( + werror_logs.ClangWarning.try_parse_line( + "/path/to/foo/bar/baz.cc:12:34: error: don't do this " + "[-Werror,-Wbar]" + ), + werror_logs.ClangWarning( + name="-Wbar", + message="don't do this", + location=werror_logs.ClangWarningLocation( + file="/path/to/foo/bar/baz.cc", + line=12, + column=34, + ), + ), + ) + + def test_clang_warning_parsing_skips_uninteresting_lines(self): + self.silence_logs() + + pointless = ( + "", + "foo", + "error: something's wrong", + "clang-14: warning: something's wrong [-Wsomething]", + "clang-14: error: something's wrong [-Wsomething,-Wnot-werror]", + ) + for line in pointless: + self.assertIsNone( + werror_logs.ClangWarning.try_parse_line(line), line + ) + + def test_aggregation_correctly_scrapes_warnings(self): + aggregated = werror_logs.AggregatedWarnings() + aggregated.add_report_json( + { + "cwd": "/var/tmp/portage/sys-devel/llvm/foo/bar", + "stdout": textwrap.dedent( + """\ + Foo + clang-17: error: failed to blah [-Werror,-Wblah] + /path/to/file.cc:1:2: error: other error [-Werror,-Wother] + """ + ), + } + ) + aggregated.add_report_json( + { + "cwd": "/var/tmp/portage/sys-devel/llvm/foo/bar", + "stdout": textwrap.dedent( + """\ + Foo + clang-17: error: failed to blah [-Werror,-Wblah] + /path/to/file.cc:1:3: error: other error [-Werror,-Wother] + Bar + """ + ), + } + ) + + self.assertEqual(aggregated.num_reports, 2) + self.assertEqual( + dict(aggregated.warnings), + { + werror_logs.ClangWarning( + name="-Wblah", + message="failed to blah", + location=None, + ): create_warning_info( + packages={"sys-devel/llvm": 2}, + ), + werror_logs.ClangWarning( + name="-Wother", + message="other error", + location=werror_logs.ClangWarningLocation( + file="/path/to/file.cc", + line=1, + column=2, + ), + ): create_warning_info( + packages={"sys-devel/llvm": 1}, + ), + werror_logs.ClangWarning( + name="-Wother", + message="other error", + location=werror_logs.ClangWarningLocation( + file="/path/to/file.cc", + line=1, + column=3, + ), + ): create_warning_info( + packages={"sys-devel/llvm": 1}, + ), + }, + ) + + def test_aggregation_guesses_packages_correctly(self): + aggregated = werror_logs.AggregatedWarnings() + cwds = ( + "/var/tmp/portage/sys-devel/llvm/foo/bar", + "/var/cache/portage/sys-devel/llvm/foo/bar", + "/build/amd64-host/var/tmp/portage/sys-devel/llvm/foo/bar", + "/build/amd64-host/var/cache/portage/sys-devel/llvm/foo/bar", + ) + for d in cwds: + # If the directory isn't recognized, this will raise. + aggregated.add_report_json( + { + "cwd": d, + "stdout": "clang-17: error: foo [-Werror,-Wfoo]", + } + ) + + self.assertEqual(len(aggregated.warnings), 1) + warning, warning_info = next(iter(aggregated.warnings.items())) + self.assertEqual(warning.name, "-Wfoo") + self.assertEqual( + warning_info, create_warning_info({"sys-devel/llvm": len(cwds)}) + ) + + def test_aggregation_raises_if_package_name_cant_be_guessed(self): + aggregated = werror_logs.AggregatedWarnings() + with self.assertRaises(werror_logs.UnknownPackageNameError): + aggregated.add_report_json({}) + + def test_warning_by_flag_summarization_works_in_simple_case(self): + string_io = io.StringIO() + werror_logs.summarize_warnings_by_flag( + { + werror_logs.ClangWarning( + name="-Wother", + message="other error", + location=werror_logs.ClangWarningLocation( + file="/path/to/some/file.cc", + line=1, + column=2, + ), + ): create_warning_info( + { + "sys-devel/llvm": 3000, + "sys-devel/gcc": 1, + } + ), + werror_logs.ClangWarning( + name="-Wother", + message="other error", + location=werror_logs.ClangWarningLocation( + file="/path/to/some/file.cc", + line=1, + column=3, + ), + ): create_warning_info( + { + "sys-devel/llvm": 1, + } + ), + }, + file=string_io, + ) + result = string_io.getvalue() + self.assertEqual( + result, + textwrap.dedent( + """\ + ## Instances of each fatal warning: + \t-Wother: 3,002 + """ + ), + ) + + def test_warning_by_package_summarization_works_in_simple_case(self): + string_io = io.StringIO() + werror_logs.summarize_per_package_warnings( + ( + create_warning_info( + { + "sys-devel/llvm": 3000, + "sys-devel/gcc": 1, + } + ), + create_warning_info( + { + "sys-devel/llvm": 1, + } + ), + ), + file=string_io, + ) + result = string_io.getvalue() + self.assertEqual( + result, + textwrap.dedent( + """\ + ## Per-package warning counts: + \tsys-devel/llvm: 3,001 + \t sys-devel/gcc: 1 + """ + ), + ) + + +if __name__ == "__main__": + unittest.main() |