81 files changed, 2604 insertions, 1627 deletions
diff --git a/.clang-tidy b/.clang-tidy
index 56938a5..1e229e5 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -2,6 +2,5 @@
 Checks:          'clang-analyzer-*,readability-redundant-*,performance-*'
 WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
 HeaderFilterRegex: '.*'
-AnalyzeTemporaryDtors: false
 FormatStyle:     none
 User:            user
diff --git a/.github/install_bazel.sh b/.github/install_bazel.sh
index 2b1f4e7..1b0d63c 100644
--- a/.github/install_bazel.sh
+++ b/.github/install_bazel.sh
@@ -3,11 +3,10 @@ if ! bazel version; then
   if [ "$arch" == "aarch64" ]; then
     arch="arm64"
   fi
-  echo "Installing wget and downloading $arch Bazel binary from GitHub releases."
-  yum install -y wget
-  wget "https://github.com/bazelbuild/bazel/releases/download/6.3.0/bazel-6.3.0-linux-$arch" -O /usr/local/bin/bazel
-  chmod +x /usr/local/bin/bazel
+  echo "Downloading $arch Bazel binary from GitHub releases."
+  curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/7.1.1/bazel-7.1.1-linux-$arch"
+  chmod +x $HOME/bin/bazel
 else
-  # bazel is installed for the correct architecture
+  # Bazel is installed for the correct architecture
   exit 0
 fi
diff --git a/.github/libcxx-setup.sh b/.github/libcxx-setup.sh
index 8773b9c..9aaf96a 100755
--- a/.github/libcxx-setup.sh
+++ b/.github/libcxx-setup.sh
@@ -3,7 +3,7 @@
 set -e
 
 # Checkout LLVM sources
-git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project
+git clone --depth=1 --branch llvmorg-16.0.6 https://github.com/llvm/llvm-project.git llvm-project
 
 ## Setup libc++ options
 if [ -z "$BUILD_32_BITS" ]; then
diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml
index 1cdc38c..a669cda 100644
--- a/.github/workflows/bazel.yml
+++ b/.github/workflows/bazel.yml
@@ -14,7 +14,7 @@ jobs:
         os: [ubuntu-latest, macos-latest, windows-latest]
         bzlmod: [false, true]
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     - name: mount bazel cache
       uses: actions/cache@v3
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index b35200a..95e0482 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -102,13 +102,60 @@ jobs:
       - name: build
         run: cmake --build _build/ --config ${{ matrix.build_type }}
 
-      - name: setup test environment
-        # Make sure gmock and benchmark DLLs can be found
-        run: >
-            echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
-            echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
-
       - name: test
         run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
 
+  msys2:
+    name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msys2.msystem }}
+    runs-on: ${{ matrix.os }}
+    defaults:
+        run:
+            shell: msys2 {0}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ windows-latest ]
+        msys2:
+          - { msystem: MINGW64,    arch: x86_64,  family: GNU,  compiler: g++ }
+          - { msystem: MINGW32,    arch: i686,    family: GNU,  compiler: g++ }
+          - { msystem: CLANG64,    arch: x86_64,  family: LLVM, compiler: clang++ }
+          - { msystem: CLANG32,    arch: i686,    family: LLVM, compiler: clang++ }
+          - { msystem: UCRT64,     arch: x86_64,  family: GNU,  compiler: g++ }
+        build_type:
+          - Debug
+          - Release
+        lib:
+          - shared
+          - static
+
+    steps:
+      - uses: actions/checkout@v2
 
+      - name: Install Base Dependencies
+        uses: msys2/setup-msys2@v2
+        with:
+          cache: false
+          msystem: ${{ matrix.msys2.msystem }}
+          update: true
+          install: >-
+            git
+            base-devel
+          pacboy: >-
+            cc:p
+            cmake:p
+            ninja:p
+
+      - name: configure cmake
+        env:
+          CXX: ${{ matrix.msys2.compiler }}
+        run: >
+          cmake -S . -B _build/
+          -GNinja
+          -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+          -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
+
+      - name: build
+        run: cmake --build _build/ --config ${{ matrix.build_type }}
+
+      - name: test
+        run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
diff --git a/.github/workflows/clang-format-lint.yml b/.github/workflows/clang-format-lint.yml
index 77ce1f8..328fe36 100644
--- a/.github/workflows/clang-format-lint.yml
+++ b/.github/workflows/clang-format-lint.yml
@@ -4,7 +4,8 @@ on:
   pull_request: {}
 
 jobs:
-  build:
+  job:
+    name: check-clang-format
     runs-on: ubuntu-latest
 
     steps:
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000..5d65b99
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,38 @@
+name: python + Bazel pre-commit checks
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    env:
+      MYPY_CACHE_DIR: "${{ github.workspace }}/.cache/mypy"
+      RUFF_CACHE_DIR: "${{ github.workspace }}/.cache/ruff"
+      PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pre-commit"
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.11
+        cache: pip
+        cache-dependency-path: pyproject.toml
+    - name: Install dependencies
+      run: python -m pip install ".[dev]"
+    - name: Cache pre-commit tools
+      uses: actions/cache@v3
+      with:
+        path: |
+          ${{ env.MYPY_CACHE_DIR }}
+          ${{ env.RUFF_CACHE_DIR }}
+          ${{ env.PRE_COMMIT_HOME }}
+        key: ${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}-linter-cache
+    - name: Run pre-commit checks
+      run: pre-commit run --all-files --verbose --show-diff-on-failure
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
deleted file mode 100644
index c6939b5..0000000
--- a/.github/workflows/pylint.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: pylint
-
-on:
-  push:
-    branches: [ main ]
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  pylint:
-
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v1
-      with:
-        python-version: 3.8
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint pylint-exit conan
-
-    - name: Run pylint
-      run: |
-        pylint `find . -name '*.py'|xargs` || pylint-exit $?
diff --git a/.github/workflows/test_bindings.yml b/.github/workflows/test_bindings.yml
index e01bb7b..436a8f9 100644
--- a/.github/workflows/test_bindings.yml
+++ b/.github/workflows/test_bindings.yml
@@ -13,17 +13,18 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ ubuntu-latest, macos-latest, windows-2019 ]
+        os: [ ubuntu-latest, macos-latest, windows-latest ]
 
     steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python
-        uses: actions/setup-python@v4
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
         with:
           python-version: 3.11
-      - name: Install GBM Python bindings on ${{ matrix.os}}
-        run:
-          python -m pip install wheel .
+      - name: Install GBM Python bindings on ${{ matrix.os }}
+        run: python -m pip install .
       - name: Run bindings example on ${{ matrix.os }}
         run:
           python bindings/python/google_benchmark/example.py
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 1f73bff..8b772cd 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -12,20 +12,19 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repo
-        uses: actions/checkout@v3
-
-      - name: Install Python 3.11
-        uses: actions/setup-python@v4
+        uses: actions/checkout@v4
         with:
-          python-version: 3.11
-
-      - name: Build and check sdist
-        run: |
-          python setup.py sdist
-      - name: Upload sdist
-        uses: actions/upload-artifact@v3
+          fetch-depth: 0
+      - name: Install Python 3.12
+        uses: actions/setup-python@v5
         with:
-          name: dist
+          python-version: 3.12
+      - run: python -m pip install build
+      - name: Build sdist
+        run: python -m build --sdist
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist-sdist
           path: dist/*.tar.gz
 
   build_wheels:
@@ -33,47 +32,59 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-2019]
+        os: [ubuntu-latest, macos-13, macos-14, windows-latest]
 
     steps:
       - name: Check out Google Benchmark
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
 
       - name: Set up QEMU
         if: runner.os == 'Linux'
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3
         with:
           platforms: all
 
       - name: Build wheels on ${{ matrix.os }} using cibuildwheel
-        uses: pypa/cibuildwheel@v2.14.1
+        uses: pypa/cibuildwheel@v2.17
         env:
-          CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-*'
+          CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
           CIBW_SKIP: "*-musllinux_*"
-          CIBW_TEST_SKIP: "*-macosx_arm64"
-          CIBW_ARCHS_LINUX: x86_64 aarch64
-          CIBW_ARCHS_MACOS: x86_64 arm64
-          CIBW_ARCHS_WINDOWS: AMD64
+          CIBW_TEST_SKIP: "cp38-macosx_*:arm64"
+          CIBW_ARCHS_LINUX: auto64 aarch64
+          CIBW_ARCHS_WINDOWS: auto64
           CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
+          # Grab the rootless Bazel installation inside the container.
+          CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin
           CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
 
       - name: Upload Google Benchmark ${{ matrix.os }} wheels
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist-${{ matrix.os }}
+          path: wheelhouse/*.whl
+
+  merge_wheels:
+    name: Merge all built wheels into one artifact
+    runs-on: ubuntu-latest
+    needs: build_wheels
+    steps:
+      - name: Merge wheels
+        uses: actions/upload-artifact/merge@v4
         with:
           name: dist
-          path: ./wheelhouse/*.whl
+          pattern: dist-*
+          delete-merged: true
 
   pypi_upload:
     name: Publish google-benchmark wheels to PyPI
-    needs: [build_sdist, build_wheels]
+    needs: [merge_wheels]
     runs-on: ubuntu-latest
+    permissions:
+      id-token: write
     steps:
-    - uses: actions/download-artifact@v3
-      with:
-        name: dist
-        path: dist
-
-    - uses: pypa/gh-action-pypi-publish@v1.6.4
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_PASSWORD }}
+      - uses: actions/download-artifact@v4
+        with:
+          path: dist
+      - uses: pypa/gh-action-pypi-publish@v1
diff --git a/.gitignore b/.gitignore
index 704f56c..24a1fb6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ rules.ninja
 
 # bazel output symlinks.
 bazel-*
+MODULE.bazel.lock
 
 # out-of-source build top-level folders.
 build/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..93455ab
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+  -   repo: https://github.com/keith/pre-commit-buildifier
+      rev: 6.4.0
+      hooks:
+      -   id: buildifier
+      -   id: buildifier-lint
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        types_or: [ python, pyi ]
+        args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.1
+    hooks:
+      - id: ruff
+        args: [ --fix, --exit-non-zero-on-fix ]
+      - id: ruff-format
+\ No newline at end of file
diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py
index 5649ddc..caf257f 100644
--- a/.ycm_extra_conf.py
+++ b/.ycm_extra_conf.py
@@ -1,25 +1,30 @@
 import os
+
 import ycm_core
 
 # These are the compilation flags that will be used in case there's no
 # compilation database set (by default, one is not set).
 # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
 flags = [
-'-Wall',
-'-Werror',
-'-pedantic-errors',
-'-std=c++0x',
-'-fno-strict-aliasing',
-'-O3',
-'-DNDEBUG',
-# ...and the same thing goes for the magic -x option which specifies the
-# language that the files to be compiled are written in. This is mostly
-# relevant for c++ headers.
-# For a C project, you would set this to 'c' instead of 'c++'.
-'-x', 'c++',
-'-I', 'include',
-'-isystem', '/usr/include',
-'-isystem', '/usr/local/include',
+    "-Wall",
+    "-Werror",
+    "-pedantic-errors",
+    "-std=c++0x",
+    "-fno-strict-aliasing",
+    "-O3",
+    "-DNDEBUG",
+    # ...and the same thing goes for the magic -x option which specifies the
+    # language that the files to be compiled are written in. This is mostly
+    # relevant for c++ headers.
+    # For a C project, you would set this to 'c' instead of 'c++'.
+    "-x",
+    "c++",
+    "-I",
+    "include",
+    "-isystem",
+    "/usr/include",
+    "-isystem",
+    "/usr/local/include",
 ]
 
 
@@ -29,87 +34,87 @@ flags = [
 #
 # Most projects will NOT need to set this to anything; you can just change the
 # 'flags' list of compilation flags. Notice that YCM itself uses that approach.
-compilation_database_folder = ''
+compilation_database_folder = ""
 
-if os.path.exists( compilation_database_folder ):
-  database = ycm_core.CompilationDatabase( compilation_database_folder )
+if os.path.exists(compilation_database_folder):
+    database = ycm_core.CompilationDatabase(compilation_database_folder)
 else:
-  database = None
+    database = None
+
+SOURCE_EXTENSIONS = [".cc"]
 
-SOURCE_EXTENSIONS = [ '.cc' ]
 
 def DirectoryOfThisScript():
-  return os.path.dirname( os.path.abspath( __file__ ) )
-
-
-def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
-  if not working_directory:
-    return list( flags )
-  new_flags = []
-  make_next_absolute = False
-  path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
-  for flag in flags:
-    new_flag = flag
-
-    if make_next_absolute:
-      make_next_absolute = False
-      if not flag.startswith( '/' ):
-        new_flag = os.path.join( working_directory, flag )
-
-    for path_flag in path_flags:
-      if flag == path_flag:
-        make_next_absolute = True
-        break
-
-      if flag.startswith( path_flag ):
-        path = flag[ len( path_flag ): ]
-        new_flag = path_flag + os.path.join( working_directory, path )
-        break
-
-    if new_flag:
-      new_flags.append( new_flag )
-  return new_flags
-
-
-def IsHeaderFile( filename ):
-  extension = os.path.splitext( filename )[ 1 ]
-  return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
-
-
-def GetCompilationInfoForFile( filename ):
-  # The compilation_commands.json file generated by CMake does not have entries
-  # for header files. So we do our best by asking the db for flags for a
-  # corresponding source file, if any. If one exists, the flags for that file
-  # should be good enough.
-  if IsHeaderFile( filename ):
-    basename = os.path.splitext( filename )[ 0 ]
-    for extension in SOURCE_EXTENSIONS:
-      replacement_file = basename + extension
-      if os.path.exists( replacement_file ):
-        compilation_info = database.GetCompilationInfoForFile(
-          replacement_file )
-        if compilation_info.compiler_flags_:
-          return compilation_info
-    return None
-  return database.GetCompilationInfoForFile( filename )
-
-
-def FlagsForFile( filename, **kwargs ):
-  if database:
-    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
-    # python list, but a "list-like" StringVec object
-    compilation_info = GetCompilationInfoForFile( filename )
-    if not compilation_info:
-      return None
-
-    final_flags = MakeRelativePathsInFlagsAbsolute(
-      compilation_info.compiler_flags_,
-      compilation_info.compiler_working_dir_ )
-  else:
-    relative_to = DirectoryOfThisScript()
-    final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
-
-  return {
-    'flags': final_flags,
-    'do_cache': True
-  }
+    return os.path.dirname(os.path.abspath(__file__))
+
+
+def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
+    if not working_directory:
+        return list(flags)
+    new_flags = []
+    make_next_absolute = False
+    path_flags = ["-isystem", "-I", "-iquote", "--sysroot="]
+    for flag in flags:
+        new_flag = flag
+
+        if make_next_absolute:
+            make_next_absolute = False
+            if not flag.startswith("/"):
+                new_flag = os.path.join(working_directory, flag)
+
+        for path_flag in path_flags:
+            if flag == path_flag:
+                make_next_absolute = True
+                break
+
+            if flag.startswith(path_flag):
+                path = flag[len(path_flag) :]
+                new_flag = path_flag + os.path.join(working_directory, path)
+                break
+
+        if new_flag:
+            new_flags.append(new_flag)
+    return new_flags
+
+
+def IsHeaderFile(filename):
+    extension = os.path.splitext(filename)[1]
+    return extension in [".h", ".hxx", ".hpp", ".hh"]
+
+
+def GetCompilationInfoForFile(filename):
+    # The compilation_commands.json file generated by CMake does not have entries
+    # for header files. So we do our best by asking the db for flags for a
+    # corresponding source file, if any. If one exists, the flags for that file
+    # should be good enough.
+    if IsHeaderFile(filename):
+        basename = os.path.splitext(filename)[0]
+        for extension in SOURCE_EXTENSIONS:
+            replacement_file = basename + extension
+            if os.path.exists(replacement_file):
+                compilation_info = database.GetCompilationInfoForFile(
+                    replacement_file
+                )
+                if compilation_info.compiler_flags_:
+                    return compilation_info
+        return None
+    return database.GetCompilationInfoForFile(filename)
+
+
+def FlagsForFile(filename, **kwargs):
+    if database:
+        # Bear in mind that compilation_info.compiler_flags_ does NOT return a
+        # python list, but a "list-like" StringVec object
+        compilation_info = GetCompilationInfoForFile(filename)
+        if not compilation_info:
+            return None
+
+        final_flags = MakeRelativePathsInFlagsAbsolute(
+            compilation_info.compiler_flags_,
+            compilation_info.compiler_working_dir_,
+        )
+    else:
+        relative_to = DirectoryOfThisScript()
+        final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
+
+    return {"flags": final_flags, "do_cache": True}
diff --git a/AUTHORS b/AUTHORS
index d08c1fd..2170e46 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -31,6 +31,7 @@ Evgeny Safronov <division494@gmail.com>
 Fabien Pichot <pichot.fabien@gmail.com>
 Federico Ficarelli <federico.ficarelli@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
+Gergely Meszaros <maetveis@gmail.com>
 Gergő Szitár <szitar.gergo@gmail.com>
 Google Inc.
 Henrique Bucher <hbucher@gmail.com>
diff --git a/BUILD.bazel b/BUILD.bazel
index 60d31d2..15d8369 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1,5 +1,22 @@
 licenses(["notice"])
 
+COPTS = [
+    "-pedantic",
+    "-pedantic-errors",
+    "-std=c++11",
+    "-Wall",
+    "-Wconversion",
+    "-Wextra",
+    "-Wshadow",
+    #    "-Wshorten-64-to-32",
+    "-Wfloat-equal",
+    "-fstrict-aliasing",
+    ## assert() are used a lot in tests upstream, which may be optimised out leading to
+    ## unused-variable warning.
+    "-Wno-unused-variable",
+    "-Werror=old-style-cast",
+]
+
 config_setting(
     name = "qnx",
     constraint_values = ["@platforms//os:qnx"],
@@ -45,28 +62,35 @@ cc_library(
         "include/benchmark/benchmark.h",
         "include/benchmark/export.h",
     ],
-    linkopts = select({
-        ":windows": ["-DEFAULTLIB:shlwapi.lib"],
-        "//conditions:default": ["-pthread"],
-    }),
     copts = select({
         ":windows": [],
-        "//conditions:default": ["-Werror=old-style-cast"],
+        "//conditions:default": COPTS,
     }),
-    strip_include_prefix = "include",
-    visibility = ["//visibility:public"],
-    # Only static linking is allowed; no .so will be produced.
-    # Using `defines` (i.e. not `local_defines`) means that no
-    # dependent rules need to bother about defining the macro.
-    linkstatic = True,
     defines = [
         "BENCHMARK_STATIC_DEFINE",
+        "BENCHMARK_VERSION=\\\"" + (module_version() if module_version() != None else "") + "\\\"",
     ] + select({
         ":perfcounters": ["HAVE_LIBPFM"],
         "//conditions:default": [],
     }),
+    linkopts = select({
+        ":windows": ["-DEFAULTLIB:shlwapi.lib"],
+        "//conditions:default": ["-pthread"],
+    }),
+    # Only static linking is allowed; no .so will be produced.
+    # Using `defines` (i.e. not `local_defines`) means that no
+    # dependent rules need to bother about defining the macro.
+    linkstatic = True,
+    local_defines = [
+        # Turn on Large-file Support
+        "_FILE_OFFSET_BITS=64",
+        "_LARGEFILE64_SOURCE",
+        "_LARGEFILE_SOURCE",
+    ],
+    strip_include_prefix = "include",
+    visibility = ["//visibility:public"],
     deps = select({
-        ":perfcounters": ["@libpfm//:libpfm"],
+        ":perfcounters": ["@libpfm"],
         "//conditions:default": [],
     }),
 )
@@ -74,7 +98,10 @@ cc_library(
 cc_library(
     name = "benchmark_main",
     srcs = ["src/benchmark_main.cc"],
-    hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
+    hdrs = [
+        "include/benchmark/benchmark.h",
+        "include/benchmark/export.h",
+    ],
     strip_include_prefix = "include",
     visibility = ["//visibility:public"],
     deps = [":benchmark"],
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ffd7dee..23b519c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@ if(BENCHMARK_FORCE_WERROR)
   set(BENCHMARK_ENABLE_WERROR ON)
 endif(BENCHMARK_FORCE_WERROR)
 
-if(NOT MSVC)
+if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
   option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
 else()
   set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
@@ -45,7 +45,7 @@ option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm"
 set(CMAKE_CXX_VISIBILITY_PRESET hidden)
 set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
 
-if(MSVC)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
     # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
     # undocumented, but working variable.
@@ -66,7 +66,7 @@ function(should_enable_assembly_tests)
       return()
     endif()
   endif()
-  if (MSVC)
+  if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
     return()
   elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
     return()
@@ -105,16 +105,26 @@ get_git_version(GIT_VERSION)
 # If no git version can be determined, use the version
 # from the project() command
 if ("${GIT_VERSION}" STREQUAL "0.0.0")
-  set(VERSION "${benchmark_VERSION}")
+  set(VERSION "v${benchmark_VERSION}")
 else()
   set(VERSION "${GIT_VERSION}")
 endif()
+
+# Normalize version: drop "v" prefix, replace first "-" with ".",
+# drop everything after second "-" (including said "-").
+string(STRIP ${VERSION} VERSION)
+if(VERSION MATCHES v[^-]*-)
+   string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2"  NORMALIZED_VERSION ${VERSION})
+else()
+   string(REGEX REPLACE "v(.*)" "\\1" NORMALIZED_VERSION ${VERSION})
+endif()
+
 # Tell the user what versions we are using
-message(STATUS "Google Benchmark version: ${VERSION}")
+message(STATUS "Google Benchmark version: ${VERSION}, normalized to ${NORMALIZED_VERSION}")
 
 # The version of the libraries
-set(GENERIC_LIB_VERSION ${VERSION})
-string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
+set(GENERIC_LIB_VERSION ${NORMALIZED_VERSION})
+string(SUBSTRING ${NORMALIZED_VERSION} 0 1 GENERIC_LIB_SOVERSION)
 
 # Import our CMake modules
 include(AddCXXCompilerFlag)
@@ -128,7 +138,7 @@ if (BENCHMARK_BUILD_32_BITS)
   add_required_cxx_compiler_flag(-m32)
 endif()
 
-if (MSVC)
+if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
   set(BENCHMARK_CXX_STANDARD 14)
 else()
   set(BENCHMARK_CXX_STANDARD 11)
@@ -170,12 +180,17 @@ if (MSVC)
     set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
   endif()
 else()
+  # Turn on Large-file Support
+  add_definitions(-D_FILE_OFFSET_BITS=64)
+  add_definitions(-D_LARGEFILE64_SOURCE)
+  add_definitions(-D_LARGEFILE_SOURCE)
   # Turn compiler warnings up to 11
   add_cxx_compiler_flag(-Wall)
   add_cxx_compiler_flag(-Wextra)
   add_cxx_compiler_flag(-Wshadow)
   add_cxx_compiler_flag(-Wfloat-equal)
   add_cxx_compiler_flag(-Wold-style-cast)
+  add_cxx_compiler_flag(-Wconversion)
   if(BENCHMARK_ENABLE_WERROR)
       add_cxx_compiler_flag(-Werror)
   endif()
@@ -312,7 +327,7 @@ find_package(Threads REQUIRED)
 cxx_feature_check(PTHREAD_AFFINITY)
 
 if (BENCHMARK_ENABLE_LIBPFM)
-  find_package(PFM)
+  find_package(PFM REQUIRED)
 endif()
 
 # Set up directories
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 95bcad0..9ca2caa 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -51,10 +51,12 @@ Fanbo Meng <fanbo.meng@ibm.com>
 Federico Ficarelli <federico.ficarelli@gmail.com>
 Felix Homann <linuxaudio@showlabor.de>
 Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
+Gergely Meszaros <maetveis@gmail.com>
 Gergő Szitár <szitar.gergo@gmail.com>
 Hannes Hauswedell <h2@fsfe.org>
 Henrique Bucher <hbucher@gmail.com>
 Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
+Iakov Sergeev <yahontu@gmail.com>
 Jern-Kuan Leong <jernkuan@gmail.com>
 JianXiong Zhou <zhoujianxiong2@gmail.com>
 Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
diff --git a/METADATA b/METADATA
index 5433077..8718b22 100644
--- a/METADATA
+++ b/METADATA
@@ -1,23 +1,20 @@
 # This project was upgraded with external_updater.
-# Usage: tools/external_updater/updater.sh update google-benchmark
+# Usage: tools/external_updater/updater.sh update external/google-benchmark
 # For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
 
 name: "google-benchmark"
 description: "A library to support the benchmarking of functions, similar to unit-tests."
 third_party {
-  url {
-    type: HOMEPAGE
-    value: "https://github.com/google/benchmark"
-  }
-  url {
-    type: GIT
-    value: "https://github.com/google/benchmark.git"
-  }
-  version: "v1.8.3"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2023
-    month: 9
-    day: 22
+    year: 2024
+    month: 5
+    day: 1
+  }
+  homepage: "https://github.com/google/benchmark"
+  identifier {
+    type: "Git"
+    value: "https://github.com/google/benchmark.git"
+    version: "bc946b919cac6f25a199a526da571638cfde109f"
   }
 }
diff --git a/MODULE.bazel b/MODULE.bazel
index 37a5f5d..95db0b1 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -1,11 +1,16 @@
-module(name = "google_benchmark", version="1.8.3")
-
-bazel_dep(name = "bazel_skylib", version = "1.4.1")
-bazel_dep(name = "platforms", version = "0.0.6")
-bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
-bazel_dep(name = "rules_cc", version = "0.0.6")
-bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
-bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
+module(
+    name = "google_benchmark",
+    version = "1.8.3",
+)
+
+bazel_dep(name = "bazel_skylib", version = "1.5.0")
+bazel_dep(name = "platforms", version = "0.0.8")
+bazel_dep(name = "rules_foreign_cc", version = "0.10.1")
+bazel_dep(name = "rules_cc", version = "0.0.9")
+
+bazel_dep(name = "rules_python", version = "0.31.0", dev_dependency = True)
+bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
+
 bazel_dep(name = "libpfm", version = "4.11.0")
 
 # Register a toolchain for Python 3.9 to be able to build numpy. Python
@@ -14,11 +19,23 @@ bazel_dep(name = "libpfm", version = "4.11.0")
 # of relying on the changing default version from rules_python.
 
 python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
+python.toolchain(python_version = "3.8")
 python.toolchain(python_version = "3.9")
+python.toolchain(python_version = "3.10")
+python.toolchain(python_version = "3.11")
+python.toolchain(
+    is_default = True,
+    python_version = "3.12",
+)
 
 pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
 pip.parse(
-    hub_name="tools_pip_deps",
+    hub_name = "tools_pip_deps",
     python_version = "3.9",
-    requirements_lock="//tools:requirements.txt")
+    requirements_lock = "//tools:requirements.txt",
+)
 use_repo(pip, "tools_pip_deps")
+
+# -- bazel_dep definitions -- #
+
+bazel_dep(name = "nanobind_bazel", version = "1.0.0", dev_dependency = True)
diff --git a/WORKSPACE b/WORKSPACE
index 833590f..5032024 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -8,15 +8,17 @@ load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_depende
 
 rules_foreign_cc_dependencies()
 
-load("@rules_python//python:pip.bzl", pip3_install="pip_install")
+load("@rules_python//python:repositories.bzl", "py_repositories")
 
-pip3_install(
-   name = "tools_pip_deps",
-   requirements = "//tools:requirements.txt",
-)
+py_repositories()
+
+load("@rules_python//python:pip.bzl", "pip_parse")
 
-new_local_repository(
-    name = "python_headers",
-    build_file = "@//bindings/python:python_headers.BUILD",
-    path = "<PYTHON_INCLUDE_PATH>",  # May be overwritten by setup.py.
+pip_parse(
+    name = "tools_pip_deps",
+    requirements_lock = "//tools:requirements.txt",
 )
+
+load("@tools_pip_deps//:requirements.bzl", "install_deps")
+
+install_deps()
diff --git a/bazel/benchmark_deps.bzl b/bazel/benchmark_deps.bzl
index 667065f..4fb45a5 100644
--- a/bazel/benchmark_deps.bzl
+++ b/bazel/benchmark_deps.bzl
@@ -1,5 +1,9 @@
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+"""
+This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
+"""
+
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
 def benchmark_deps():
     """Loads dependencies required to build Google Benchmark."""
@@ -7,48 +11,41 @@ def benchmark_deps():
     if "bazel_skylib" not in native.existing_rules():
         http_archive(
             name = "bazel_skylib",
-            sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
+            sha256 = "cd55a062e763b9349921f0f5db8c3933288dc8ba4f76dd9416aac68acee3cb94",
             urls = [
-                "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
-                "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
+                "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
+                "https://github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
             ],
         )
 
     if "rules_foreign_cc" not in native.existing_rules():
         http_archive(
             name = "rules_foreign_cc",
-            sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83",
-            strip_prefix = "rules_foreign_cc-0.7.1",
-            url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz",
+            sha256 = "476303bd0f1b04cc311fc258f1708a5f6ef82d3091e53fd1977fa20383425a6a",
+            strip_prefix = "rules_foreign_cc-0.10.1",
+            url = "https://github.com/bazelbuild/rules_foreign_cc/releases/download/0.10.1/rules_foreign_cc-0.10.1.tar.gz",
         )
 
     if "rules_python" not in native.existing_rules():
         http_archive(
             name = "rules_python",
-            url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
-            sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
-        )
-
-    if "com_google_absl" not in native.existing_rules():
-        http_archive(
-            name = "com_google_absl",
-            sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
-            strip_prefix = "abseil-cpp-20200225.2",
-            urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
+            sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793",
+            strip_prefix = "rules_python-0.27.1",
+            url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz",
         )
 
     if "com_google_googletest" not in native.existing_rules():
         new_git_repository(
             name = "com_google_googletest",
             remote = "https://github.com/google/googletest.git",
-            tag = "release-1.11.0",
+            tag = "release-1.12.1",
         )
 
     if "nanobind" not in native.existing_rules():
         new_git_repository(
             name = "nanobind",
             remote = "https://github.com/wjakob/nanobind.git",
-            tag = "v1.4.0",
+            tag = "v1.8.0",
             build_file = "@//bindings/python:nanobind.BUILD",
             recursive_init_submodules = True,
         )
diff --git a/bindings/python/BUILD b/bindings/python/BUILD
deleted file mode 100644
index 9559a76..0000000
--- a/bindings/python/BUILD
+++ /dev/null
@@ -1,3 +0,0 @@
-exports_files(glob(["*.BUILD"]))
-exports_files(["build_defs.bzl"])
-
diff --git a/bindings/python/build_defs.bzl b/bindings/python/build_defs.bzl
deleted file mode 100644
index 009820a..0000000
--- a/bindings/python/build_defs.bzl
+++ /dev/null
@@ -1,25 +0,0 @@
-_SHARED_LIB_SUFFIX = {
-    "//conditions:default": ".so",
-    "//:windows": ".dll",
-}
-
-def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []):
-    for shared_lib_suffix in _SHARED_LIB_SUFFIX.values():
-        shared_lib_name = name + shared_lib_suffix
-        native.cc_binary(
-            name = shared_lib_name,
-            linkshared = True,
-            linkstatic = True,
-            srcs = srcs + hdrs,
-            copts = copts,
-            features = features,
-            deps = deps,
-        )
-
-    return native.py_library(
-        name = name,
-        data = select({
-            platform: [name + shared_lib_suffix]
-            for platform, shared_lib_suffix in _SHARED_LIB_SUFFIX.items()
-        }),
-    )
diff --git a/bindings/python/google_benchmark/BUILD b/bindings/python/google_benchmark/BUILD
index 89ec76e..0c8e3c1 100644
--- a/bindings/python/google_benchmark/BUILD
+++ b/bindings/python/google_benchmark/BUILD
@@ -1,4 +1,4 @@
-load("//bindings/python:build_defs.bzl", "py_extension")
+load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension")
 
 py_library(
     name = "google_benchmark",
@@ -9,22 +9,10 @@ py_library(
     ],
 )
 
-py_extension(
+nanobind_extension(
     name = "_benchmark",
     srcs = ["benchmark.cc"],
-    copts = [
-        "-fexceptions",
-        "-fno-strict-aliasing",
-    ],
-    features = [
-        "-use_header_modules",
-        "-parse_headers",
-    ],
-    deps = [
-        "//:benchmark",
-        "@nanobind",
-        "@python_headers",
-    ],
+    deps = ["//:benchmark"],
 )
 
 py_test(
@@ -37,4 +25,3 @@ py_test(
         ":google_benchmark",
     ],
 )
-
diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py
index 642d78a..c1393b4 100644
--- a/bindings/python/google_benchmark/__init__.py
+++ b/bindings/python/google_benchmark/__init__.py
@@ -26,50 +26,30 @@ Example usage:
   if __name__ == '__main__':
     benchmark.main()
 """
+
 import atexit
 
 from absl import app
+
 from google_benchmark import _benchmark
 from google_benchmark._benchmark import (
-    Counter,
-    kNanosecond,
-    kMicrosecond,
-    kMillisecond,
-    kSecond,
-    oNone,
-    o1,
-    oN,
-    oNSquared,
-    oNCubed,
-    oLogN,
-    oNLogN,
-    oAuto,
-    oLambda,
-    State,
+    Counter as Counter,
+    State as State,
+    kMicrosecond as kMicrosecond,
+    kMillisecond as kMillisecond,
+    kNanosecond as kNanosecond,
+    kSecond as kSecond,
+    o1 as o1,
+    oAuto as oAuto,
+    oLambda as oLambda,
+    oLogN as oLogN,
+    oN as oN,
+    oNCubed as oNCubed,
+    oNLogN as oNLogN,
+    oNone as oNone,
+    oNSquared as oNSquared,
 )
-
-
-__all__ = [
-    "register",
-    "main",
-    "Counter",
-    "kNanosecond",
-    "kMicrosecond",
-    "kMillisecond",
-    "kSecond",
-    "oNone",
-    "o1",
-    "oN",
-    "oNSquared",
-    "oNCubed",
-    "oLogN",
-    "oNLogN",
-    "oAuto",
-    "oLambda",
-    "State",
-]
-
-__version__ = "1.8.3"
+from google_benchmark.version import __version__ as __version__
 
 
 class __OptionMaker:
@@ -97,7 +77,6 @@ class __OptionMaker:
 
         # The function that get returned on @option.range(start=0, limit=1<<5).
         def __builder_method(*args, **kwargs):
-
             # The decorator that get called, either with the benchmared function
             # or the previous Options
             def __decorator(func_or_options):
diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py
index d95a043..b5b2f88 100644
--- a/bindings/python/google_benchmark/example.py
+++ b/bindings/python/google_benchmark/example.py
@@ -38,6 +38,7 @@ def sum_million(state):
     while state:
         sum(range(1_000_000))
 
+
 @benchmark.register
 def pause_timing(state):
     """Pause timing every iteration."""
@@ -85,7 +86,9 @@ def custom_counters(state):
     # Set a counter as a rate.
     state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
     #  Set a counter as an inverse of rate.
-    state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
+    state.counters["foo_inv_rate"] = Counter(
+        num_foo, Counter.kIsRate | Counter.kInvert
+    )
     # Set a counter as a thread-average quantity.
     state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
     # There's also a combined flag:
diff --git a/bindings/python/google_benchmark/version.py b/bindings/python/google_benchmark/version.py
new file mode 100644
index 0000000..a324693
--- /dev/null
+++ b/bindings/python/google_benchmark/version.py
@@ -0,0 +1,7 @@
+from importlib.metadata import PackageNotFoundError, version
+
+try:
+    __version__ = version("google-benchmark")
+except PackageNotFoundError:
+    # package is not installed
+    pass
diff --git a/bindings/python/nanobind.BUILD b/bindings/python/nanobind.BUILD
deleted file mode 100644
index cd9faf9..0000000
--- a/bindings/python/nanobind.BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-cc_library(
-    name = "nanobind",
-    srcs = glob([
-        "src/*.cpp"
-    ]),
-    copts = ["-fexceptions"],
-    includes = ["include", "ext/robin_map/include"],
-    textual_hdrs = glob(
-        [
-            "include/**/*.h",
-            "src/*.h",
-            "ext/robin_map/include/tsl/*.h",
-        ],
-    ),
-    deps = ["@python_headers"],
-    visibility = ["//visibility:public"],
-)
diff --git a/bindings/python/python_headers.BUILD b/bindings/python/python_headers.BUILD
deleted file mode 100644
index 9c34cf6..0000000
--- a/bindings/python/python_headers.BUILD
+++ /dev/null
@@ -1,6 +0,0 @@
-cc_library(
-    name = "python_headers",
-    hdrs = glob(["**/*.h"]),
-    includes = ["."],
-    visibility = ["//visibility:public"],
-)
diff --git a/cmake/GetGitVersion.cmake b/cmake/GetGitVersion.cmake
index 04a1f9b..b021010 100644
--- a/cmake/GetGitVersion.cmake
+++ b/cmake/GetGitVersion.cmake
@@ -20,38 +20,16 @@ set(__get_git_version INCLUDED)
 
 function(get_git_version var)
   if(GIT_EXECUTABLE)
-      execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
+      execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 --dirty
           WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
           RESULT_VARIABLE status
-          OUTPUT_VARIABLE GIT_DESCRIBE_VERSION
+          OUTPUT_VARIABLE GIT_VERSION
           ERROR_QUIET)
       if(status)
-          set(GIT_DESCRIBE_VERSION "v0.0.0")
+          set(GIT_VERSION "v0.0.0")
       endif()
-      
-      string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION)
-      if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) 
-         string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2"  GIT_VERSION ${GIT_DESCRIBE_VERSION})
-      else()
-         string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION})
-      endif()
-
-      # Work out if the repository is dirty
-      execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
-          WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-          OUTPUT_QUIET
-          ERROR_QUIET)
-      execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
-          WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-          OUTPUT_VARIABLE GIT_DIFF_INDEX
-          ERROR_QUIET)
-      string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
-      if (${GIT_DIRTY})
-          set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty")
-      endif()
-      message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}")
   else()
-      set(GIT_VERSION "0.0.0")
+      set(GIT_VERSION "v0.0.0")
   endif()
 
   set(${var} ${GIT_VERSION} PARENT_SCOPE)
diff --git a/cmake/benchmark_main.pc.in b/cmake/benchmark_main.pc.in
new file mode 100644
index 0000000..a90f3cd
--- /dev/null
+++ b/cmake/benchmark_main.pc.in
@@ -0,0 +1,7 @@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+
+Name: @PROJECT_NAME@
+Description: Google microbenchmark framework (with main() function)
+Version: @VERSION@
+Requires: benchmark
+Libs: -L${libdir} -lbenchmark_main
diff --git a/docs/python_bindings.md b/docs/python_bindings.md
index 6a7aab0..d9c5d2d 100644
--- a/docs/python_bindings.md
+++ b/docs/python_bindings.md
@@ -3,7 +3,7 @@
 Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and 
 using Google Benchmark directly in Python. 
 Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
-Supported Python versions are Python 3.7 - 3.10.
+Supported Python versions are Python 3.8 - 3.12.
 
 To install Google Benchmark's Python bindings, run:
 
@@ -25,9 +25,9 @@ python3 -m venv venv --system-site-packages
 source venv/bin/activate  # .\venv\Scripts\Activate.ps1 on Windows
 
 # upgrade Python's system-wide packages
-python -m pip install --upgrade pip setuptools wheel
-# builds the wheel and stores it in the directory "wheelhouse".
-python -m pip wheel . -w wheelhouse
+python -m pip install --upgrade pip build
+# builds the wheel and stores it in the directory "dist".
+python -m build
 ```
 
 NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
diff --git a/docs/reducing_variance.md b/docs/reducing_variance.md
index e566ab9..105f96e 100644
--- a/docs/reducing_variance.md
+++ b/docs/reducing_variance.md
@@ -14,8 +14,6 @@ you might want to disable the CPU frequency scaling while running the
 benchmark, as well as consider other ways to stabilize the performance of
 your system while benchmarking.
 
-See [Reducing Variance](reducing_variance.md) for more information.
-
 Exactly how to do this depends on the Linux distribution,
 desktop environment, and installed programs.  Specific details are a moving
 target, so we will not attempt to exhaustively document them here.
@@ -67,7 +65,7 @@ program.
 Reducing sources of variance is OS and architecture dependent, which is one
 reason some companies maintain machines dedicated to performance testing.
 
-Some of the easier and and effective ways of reducing variance on a typical
+Some of the easier and effective ways of reducing variance on a typical
 Linux workstation are:
 
 1. Use the performance governor as [discussed
@@ -89,7 +87,7 @@ above](user_guide#disabling-cpu-frequency-scaling).
 4. Close other programs that do non-trivial things based on timers, such as
    your web browser, desktop environment, etc.
 5. Reduce the working set of your benchmark to fit within the L1 cache, but
-   do be aware that this may lead you to optimize for an unrelistic
+   do be aware that this may lead you to optimize for an unrealistic
    situation.
 
 Further resources on this topic:
diff --git a/docs/releasing.md b/docs/releasing.md
index cdf4159..09bf937 100644
--- a/docs/releasing.md
+++ b/docs/releasing.md
@@ -8,9 +8,8 @@
     * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
       commits between the last annotated tag and HEAD
     * Pick the most interesting.
-* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`
-  and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the
-  release version you're creating. (This version will be used if benchmark is installed
+* Create one last commit that updates the version saved in `CMakeLists.txt` and `MODULE.bazel`
+  to the release version you're creating. (This version will be used if benchmark is installed
   from the archive you'll be creating in the next step.)
 
 ```
@@ -21,16 +20,6 @@ project (benchmark VERSION 1.8.0 LANGUAGES CXX)
 module(name = "com_github_google_benchmark", version="1.8.0")
 ```
 
-```python
-# bindings/python/google_benchmark/__init__.py
-
-# ...
-
-__version__ = "1.8.0"  # <-- change this to the release version you are creating
-
-# ...
-```
-
 * Create a release through github's interface
     * Note this will create a lightweight tag.
     * Update this to an annotated tag:
@@ -38,4 +27,5 @@ __version__ = "1.8.0"  # <-- change this to the release version you are creating
       * `git tag -a -f <tag> <tag>`
       * `git push --force --tags origin`
 * Confirm that the "Build and upload Python wheels" action runs to completion
-    * run it manually if it hasn't run
+    * Run it manually if it hasn't run.
+    * IMPORTANT: When re-running manually, make sure to select the newly created `<tag>` as the workflow version in the "Run workflow" tab on the GitHub Actions page. 
diff --git a/docs/user_guide.md b/docs/user_guide.md
index 2ceb13e..d22a906 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -28,6 +28,8 @@
 
 [Templated Benchmarks](#templated-benchmarks)
 
+[Templated Benchmarks that take arguments](#templated-benchmarks-with-arguments)
+
 [Fixtures](#fixtures)
 
 [Custom Counters](#custom-counters)
@@ -574,6 +576,30 @@ Three macros are provided for adding benchmark templates.
 #define BENCHMARK_TEMPLATE2(func, arg1, arg2)
 ```
 
+<a name="templated-benchmarks-with-arguments" />
+
+## Templated Benchmarks that take arguments
+
+Sometimes there is a need to template benchmarks, and provide arguments to them.
+
+```c++
+template <class Q> void BM_Sequential_With_Step(benchmark::State& state, int step) {
+  Q q;
+  typename Q::value_type v;
+  for (auto _ : state) {
+    for (int i = state.range(0); i-=step; )
+      q.push(v);
+    for (int e = state.range(0); e-=step; )
+      q.Wait(&v);
+  }
+  // actually messages, not bytes:
+  state.SetBytesProcessed(
+      static_cast<int64_t>(state.iterations())*state.range(0));
+}
+
+BENCHMARK_TEMPLATE1_CAPTURE(BM_Sequential, WaitQueue<int>, Step1, 1)->Range(1<<0, 1<<10);
+```
+
 <a name="fixtures" />
 
 ## Fixtures
@@ -591,10 +617,10 @@ For Example:
 ```c++
 class MyFixture : public benchmark::Fixture {
 public:
-  void SetUp(const ::benchmark::State& state) {
+  void SetUp(::benchmark::State& state) {
   }
 
-  void TearDown(const ::benchmark::State& state) {
+  void TearDown(::benchmark::State& state) {
   }
 };
 
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index e3857e7..08cfe29 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -302,6 +302,9 @@ class BenchmarkReporter;
 // Default number of minimum benchmark running time in seconds.
 const char kDefaultMinTimeStr[] = "0.5s";
 
+// Returns the version of the library.
+BENCHMARK_EXPORT std::string GetBenchmarkVersion();
+
 BENCHMARK_EXPORT void PrintDefaultHelp();
 
 BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
@@ -341,7 +344,7 @@ BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
 // The second and third overload use the specified 'display_reporter' and
 //  'file_reporter' respectively. 'file_reporter' will write to the file
 //  specified
-//   by '--benchmark_output'. If '--benchmark_output' is not given the
+//   by '--benchmark_out'. If '--benchmark_out' is not given the
 //  'file_reporter' is ignored.
 //
 // RETURNS: The number of matching benchmarks.
@@ -584,6 +587,12 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
 #endif
 #else
+#ifdef BENCHMARK_HAS_CXX11
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+}
+#else
 template <class Tp>
 BENCHMARK_DEPRECATED_MSG(
     "The const-ref version of this method can permit "
@@ -591,6 +600,12 @@ BENCHMARK_DEPRECATED_MSG(
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
+}
+#endif
 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
 #endif
 
@@ -660,13 +675,15 @@ typedef std::map<std::string, Counter> UserCounters;
 // calculated automatically to the best fit.
 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
 
+typedef int64_t ComplexityN;
+
 typedef int64_t IterationCount;
 
 enum StatisticUnit { kTime, kPercentage };
 
 // BigOFunc is passed to a benchmark in order to specify the asymptotic
 // computational complexity for the benchmark.
-typedef double(BigOFunc)(IterationCount);
+typedef double(BigOFunc)(ComplexityN);
 
 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
 // statistics over all the measurements of some type
@@ -734,13 +751,13 @@ class BENCHMARK_EXPORT State {
   // have been called previously.
   //
   // NOTE: KeepRunning may not be used after calling either of these functions.
-  BENCHMARK_ALWAYS_INLINE StateIterator begin();
-  BENCHMARK_ALWAYS_INLINE StateIterator end();
+  inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
+  inline BENCHMARK_ALWAYS_INLINE StateIterator end();
 
   // Returns true if the benchmark should continue through another iteration.
   // NOTE: A benchmark may not return from the test until KeepRunning() has
   // returned false.
-  bool KeepRunning();
+  inline bool KeepRunning();
 
   // Returns true iff the benchmark should run n more iterations.
   // REQUIRES: 'n' > 0.
@@ -752,7 +769,7 @@ class BENCHMARK_EXPORT State {
   //   while (state.KeepRunningBatch(1000)) {
   //     // process 1000 elements
   //   }
-  bool KeepRunningBatch(IterationCount n);
+  inline bool KeepRunningBatch(IterationCount n);
 
   // REQUIRES: timer is running and 'SkipWithMessage(...)' or
   //   'SkipWithError(...)' has not been called by the current thread.
@@ -863,10 +880,12 @@ class BENCHMARK_EXPORT State {
   // and complexity_n will
   // represent the length of N.
   BENCHMARK_ALWAYS_INLINE
-  void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
+  void SetComplexityN(ComplexityN complexity_n) {
+    complexity_n_ = complexity_n;
+  }
 
   BENCHMARK_ALWAYS_INLINE
-  int64_t complexity_length_n() const { return complexity_n_; }
+  ComplexityN complexity_length_n() const { return complexity_n_; }
 
   // If this routine is called with items > 0, then an items/s
   // label is printed on the benchmark report line for the currently
@@ -955,7 +974,7 @@ class BENCHMARK_EXPORT State {
   // items we don't need on the first cache line
   std::vector<int64_t> range_;
 
-  int64_t complexity_n_;
+  ComplexityN complexity_n_;
 
  public:
   // Container for user-defined counters.
@@ -970,7 +989,7 @@ class BENCHMARK_EXPORT State {
   void StartKeepRunning();
   // Implementation of KeepRunning() and KeepRunningBatch().
   // is_batch must be true unless n is 1.
-  bool KeepRunningInternal(IterationCount n, bool is_batch);
+  inline bool KeepRunningInternal(IterationCount n, bool is_batch);
   void FinishKeepRunning();
 
   const std::string name_;
@@ -1504,7 +1523,7 @@ class Fixture : public internal::Benchmark {
 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
-  BENCHMARK_PRIVATE_DECLARE(func) =                      \
+  BENCHMARK_PRIVATE_DECLARE(_benchmark_) =               \
       (::benchmark::internal::RegisterBenchmarkInternal( \
           new ::benchmark::internal::FunctionBenchmark(  \
               #func "/" #test_case_name,                 \
@@ -1541,6 +1560,31 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
 #endif
 
+#ifdef BENCHMARK_HAS_CXX11
+// This will register a benchmark for a templatized function,
+// with the additional arguments specified by `...`.
+//
+// For example:
+//
+// template <typename T, class ...ExtraArgs>`
+// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
+//  [...]
+//}
+// /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
+// BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
+//                             std::string("abc"));
+#define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
+  BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
+
+#define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
+  BENCHMARK_PRIVATE_DECLARE(func) =                                  \
+      (::benchmark::internal::RegisterBenchmarkInternal(             \
+          new ::benchmark::internal::FunctionBenchmark(              \
+              #func "<" #a "," #b ">"                                \
+                    "/" #test_case_name,                             \
+              [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
+#endif  // BENCHMARK_HAS_CXX11
+
 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)          \
   class BaseClass##_##Method##_Benchmark : public BaseClass {   \
    public:                                                      \
@@ -1748,6 +1792,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
           real_accumulated_time(0),
           cpu_accumulated_time(0),
           max_heapbytes_used(0),
+          use_real_time_for_initial_big_o(false),
           complexity(oNone),
           complexity_lambda(),
           complexity_n(0),
@@ -1790,10 +1835,14 @@ class BENCHMARK_EXPORT BenchmarkReporter {
     // This is set to 0.0 if memory tracing is not enabled.
     double max_heapbytes_used;
 
+    // By default Big-O is computed for CPU time, but that is not what you want
+    // to happen when manual time was requested, which is stored as real time.
+    bool use_real_time_for_initial_big_o;
+
     // Keep track of arguments to compute asymptotic complexity
     BigO complexity;
     BigOFunc* complexity_lambda;
-    int64_t complexity_n;
+    ComplexityN complexity_n;
 
     // what statistics to compute from the measurements
     const std::vector<internal::Statistics>* statistics;
diff --git a/pyproject.toml b/pyproject.toml
index fe8770b..62507a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "wheel"]
+requires = ["setuptools", "setuptools-scm[toml]", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -22,6 +22,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Software Development :: Testing",
     "Topic :: System :: Benchmark",
 ]
@@ -32,6 +33,11 @@ dependencies = [
     "absl-py>=0.7.1",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit>=3.3.3",
+]
+
 [project.urls]
 Homepage = "https://github.com/google/benchmark"
 Documentation = "https://github.com/google/benchmark/tree/main/docs"
@@ -46,5 +52,35 @@ zip-safe = false
 where = ["bindings/python"]
 
 [tool.setuptools.dynamic]
-version = { attr = "google_benchmark.__version__" }
 readme = { file = "README.md", content-type = "text/markdown" }
+
+[tool.setuptools_scm]
+
+[tool.mypy]
+check_untyped_defs = true
+disallow_incomplete_defs = true
+pretty = true
+python_version = "3.11"
+strict_optional = false
+warn_unreachable = true
+
+[[tool.mypy.overrides]]
+module = ["yaml"]
+ignore_missing_imports = true
+
+[tool.ruff]
+# explicitly tell ruff the source directory to correctly identify first-party package.
+src = ["bindings/python"]
+
+line-length = 80
+target-version = "py311"
+
+[tool.ruff.lint]
+# Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
+select = ["E", "F", "I", "W"]
+ignore = [
+    "E501", # line too long
+]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
diff --git a/setup.py b/setup.py
index b02a6a7..40cdc8d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,46 +1,70 @@
 import contextlib
 import os
 import platform
+import re
 import shutil
-import sysconfig
 from pathlib import Path
+from typing import Any, Generator
 
 import setuptools
 from setuptools.command import build_ext
 
-
-PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
-
 IS_WINDOWS = platform.system() == "Windows"
 IS_MAC = platform.system() == "Darwin"
+IS_LINUX = platform.system() == "Linux"
+
+# hardcoded SABI-related options. Requires that each Python interpreter
+# (hermetic or not) participating is of the same major-minor version.
+version_tuple = tuple(int(i) for i in platform.python_version_tuple())
+py_limited_api = version_tuple >= (3, 12)
+options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}
+
+
+def is_cibuildwheel() -> bool:
+    return os.getenv("CIBUILDWHEEL") is not None
 
 
 @contextlib.contextmanager
-def temp_fill_include_path(fp: str):
-    """Temporarily set the Python include path in a file."""
-    with open(fp, "r+") as f:
-        try:
-            content = f.read()
-            replaced = content.replace(
-                PYTHON_INCLUDE_PATH_PLACEHOLDER,
-                Path(sysconfig.get_paths()['include']).as_posix(),
+def _maybe_patch_toolchains() -> Generator[None, None, None]:
+    """
+    Patch rules_python toolchains to ignore root user error
+    when run in a Docker container on Linux in cibuildwheel.
+    """
+
+    def fmt_toolchain_args(matchobj):
+        suffix = "ignore_root_user_error = True"
+        callargs = matchobj.group(1)
+        # toolchain def is broken over multiple lines
+        if callargs.endswith("\n"):
+            callargs = callargs + "    " + suffix + ",\n"
+        # toolchain def is on one line.
+        else:
+            callargs = callargs + ", " + suffix
+        return "python.toolchain(" + callargs + ")"
+
+    CIBW_LINUX = is_cibuildwheel() and IS_LINUX
+    try:
+        if CIBW_LINUX:
+            module_bazel = Path("MODULE.bazel")
+            content: str = module_bazel.read_text()
+            module_bazel.write_text(
+                re.sub(
+                    r"python.toolchain\(([\w\"\s,.=]*)\)",
+                    fmt_toolchain_args,
+                    content,
+                )
             )
-            f.seek(0)
-            f.write(replaced)
-            f.truncate()
-            yield
-        finally:
-            # revert to the original content after exit
-            f.seek(0)
-            f.write(content)
-            f.truncate()
+        yield
+    finally:
+        if CIBW_LINUX:
+            module_bazel.write_text(content)
 
 
 class BazelExtension(setuptools.Extension):
     """A C/C++ extension that is defined as a Bazel BUILD target."""
 
-    def __init__(self, name: str, bazel_target: str):
-        super().__init__(name=name, sources=[])
+    def __init__(self, name: str, bazel_target: str, **kwargs: Any):
+        super().__init__(name=name, sources=[], **kwargs)
 
         self.bazel_target = bazel_target
         stripped_target = bazel_target.split("//")[-1]
@@ -53,53 +77,62 @@ class BuildBazelExtension(build_ext.build_ext):
     def run(self):
         for ext in self.extensions:
             self.bazel_build(ext)
-        build_ext.build_ext.run(self)
-
-    def bazel_build(self, ext: BazelExtension):
+        super().run()
+        # explicitly call `bazel shutdown` for graceful exit
+        self.spawn(["bazel", "shutdown"])
+
+    def copy_extensions_to_source(self):
+        """
+        Copy generated extensions into the source tree.
+        This is done in the ``bazel_build`` method, so it's not necessary to
+        do again in the `build_ext` base class.
+        """
+        pass
+
+    def bazel_build(self, ext: BazelExtension) -> None:
         """Runs the bazel build to create the package."""
-        with temp_fill_include_path("WORKSPACE"):
-            temp_path = Path(self.build_temp)
-
-            bazel_argv = [
-                "bazel",
-                "build",
-                ext.bazel_target,
-                f"--symlink_prefix={temp_path / 'bazel-'}",
-                f"--compilation_mode={'dbg' if self.debug else 'opt'}",
-                # C++17 is required by nanobind
-                f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
-            ]
-
-            if IS_WINDOWS:
-                # Link with python*.lib.
-                for library_dir in self.library_dirs:
-                    bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
-            elif IS_MAC:
-                if platform.machine() == "x86_64":
-                    # C++17 needs macOS 10.14 at minimum
-                    bazel_argv.append("--macos_minimum_os=10.14")
-
-                    # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
-                    # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
-                    archflags = os.getenv("ARCHFLAGS", "")
-                    if "arm64" in archflags:
-                        bazel_argv.append("--cpu=darwin_arm64")
-                        bazel_argv.append("--macos_cpus=arm64")
-
-                elif platform.machine() == "arm64":
-                    bazel_argv.append("--macos_minimum_os=11.0")
-
+        temp_path = Path(self.build_temp)
+        # omit the patch version to avoid build errors if the toolchain is not
+        # yet registered in the current @rules_python version.
+        # patch version differences should be fine.
+        python_version = ".".join(platform.python_version_tuple()[:2])
+
+        bazel_argv = [
+            "bazel",
+            "build",
+            ext.bazel_target,
+            f"--symlink_prefix={temp_path / 'bazel-'}",
+            f"--compilation_mode={'dbg' if self.debug else 'opt'}",
+            # C++17 is required by nanobind
+            f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
+            f"--@rules_python//python/config_settings:python_version={python_version}",
+        ]
+
+        if ext.py_limited_api:
+            bazel_argv += ["--@nanobind_bazel//:py-limited-api=cp312"]
+
+        if IS_WINDOWS:
+            # Link with python*.lib.
+            for library_dir in self.library_dirs:
+                bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
+        elif IS_MAC:
+            # C++17 needs macOS 10.14 at minimum
+            bazel_argv.append("--macos_minimum_os=10.14")
+
+        with _maybe_patch_toolchains():
             self.spawn(bazel_argv)
 
-            shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
-            ext_name = ext.target_name + shared_lib_suffix
-            ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
+        if IS_WINDOWS:
+            suffix = ".pyd"
+        else:
+            suffix = ".abi3.so" if ext.py_limited_api else ".so"
 
-            ext_dest_path = Path(self.get_ext_fullpath(ext.name))
-            shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
-
-            # explicitly call `bazel shutdown` for graceful exit
-            self.spawn(["bazel", "shutdown"])
+        ext_name = ext.target_name + suffix
+        ext_bazel_bin_path = temp_path / "bazel-bin" / ext.relpath / ext_name
+        ext_dest_path = Path(self.get_ext_fullpath(ext.name)).with_name(
+            ext_name
+        )
+        shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
 
 
 setuptools.setup(
@@ -108,6 +141,8 @@ setuptools.setup(
         BazelExtension(
             name="google_benchmark._benchmark",
             bazel_target="//bindings/python/google_benchmark:_benchmark",
+            py_limited_api=py_limited_api,
         )
     ],
+    options=options,
 )
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index daf82fb..5551099 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -28,6 +28,13 @@ target_include_directories(benchmark PUBLIC
   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
 )
 
+set_property(
+  SOURCE benchmark.cc
+  APPEND
+  PROPERTY COMPILE_DEFINITIONS
+  BENCHMARK_VERSION="${VERSION}"
+)
+
 # libpfm, if available
 if (PFM_FOUND)
   target_link_libraries(benchmark PRIVATE PFM::libpfm)
@@ -79,6 +86,7 @@ set(generated_dir "${PROJECT_BINARY_DIR}")
 set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
 set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
 set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
+set(pkg_config_main "${generated_dir}/${PROJECT_NAME}_main.pc")
 set(targets_to_export benchmark benchmark_main)
 set(targets_export_name "${PROJECT_NAME}Targets")
 
@@ -98,6 +106,7 @@ write_basic_package_version_file(
 )
 
 configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
+configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark_main.pc.in" "${pkg_config_main}" @ONLY)
 
 export (
   TARGETS ${targets_to_export}
@@ -126,7 +135,7 @@ if (BENCHMARK_ENABLE_INSTALL)
       DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
 
   install(
-      FILES "${pkg_config}"
+      FILES "${pkg_config}" "${pkg_config_main}"
       DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
 
   install(
diff --git a/src/benchmark.cc b/src/benchmark.cc
index 6139e59..337bb3f 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -152,8 +152,16 @@ BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
   return global_context;
 }
 
-// FIXME: wouldn't LTO mess this up?
-void UseCharPointer(char const volatile*) {}
+static void const volatile* volatile global_force_escape_pointer;
+
+// FIXME: Verify if LTO still messes this up?
+void UseCharPointer(char const volatile* const v) {
+  // We want to escape the pointer `v` so that the compiler can not eliminate
+  // computations that produced it. To do that, we escape the pointer by storing
+  // it into a volatile variable, since generally, volatile store, is not
+  // something the compiler is allowed to elide.
+  global_force_escape_pointer = reinterpret_cast<void const volatile*>(v);
+}
 
 }  // namespace internal
 
@@ -399,7 +407,8 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       benchmarks_with_threads += (benchmark.threads() > 1);
       runners.emplace_back(benchmark, &perfcounters, reports_for_family);
       int num_repeats_of_this_instance = runners.back().GetNumRepeats();
-      num_repetitions_total += num_repeats_of_this_instance;
+      num_repetitions_total +=
+          static_cast<size_t>(num_repeats_of_this_instance);
       if (reports_for_family)
         reports_for_family->num_runs_total += num_repeats_of_this_instance;
     }
@@ -577,12 +586,16 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
     Err << "A custom file reporter was provided but "
            "--benchmark_out=<file> was not specified."
         << std::endl;
+    Out.flush();
+    Err.flush();
     std::exit(1);
   }
   if (!fname.empty()) {
     output_file.open(fname);
     if (!output_file.is_open()) {
       Err << "invalid file name: '" << fname << "'" << std::endl;
+      Out.flush();
+      Err.flush();
       std::exit(1);
     }
     if (!file_reporter) {
@@ -597,10 +610,16 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
   }
 
   std::vector<internal::BenchmarkInstance> benchmarks;
-  if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
+  if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) {
+    Out.flush();
+    Err.flush();
+    return 0;
+  }
 
   if (benchmarks.empty()) {
     Err << "Failed to match any benchmarks against regex: " << spec << "\n";
+    Out.flush();
+    Err.flush();
     return 0;
   }
 
@@ -611,6 +630,8 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
     internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
   }
 
+  Out.flush();
+  Err.flush();
   return benchmarks.size();
 }
 
@@ -736,6 +757,14 @@ int InitializeStreams() {
 
 }  // end namespace internal
 
+std::string GetBenchmarkVersion() {
+#ifdef BENCHMARK_VERSION
+  return {BENCHMARK_VERSION};
+#else
+  return {""};
+#endif
+}
+
 void PrintDefaultHelp() {
   fprintf(stdout,
           "benchmark"
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index e447c9a..8ade048 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -482,8 +482,9 @@ int Benchmark::ArgsCnt() const {
 
 const char* Benchmark::GetArgName(int arg) const {
   BM_CHECK_GE(arg, 0);
-  BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
-  return arg_names_[arg].c_str();
+  size_t uarg = static_cast<size_t>(arg);
+  BM_CHECK_LT(uarg, arg_names_.size());
+  return arg_names_[uarg].c_str();
 }
 
 TimeUnit Benchmark::GetTimeUnit() const {
diff --git a/src/benchmark_register.h b/src/benchmark_register.h
index 53367c7..be50265 100644
--- a/src/benchmark_register.h
+++ b/src/benchmark_register.h
@@ -24,7 +24,7 @@ typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
   static const T kmax = std::numeric_limits<T>::max();
 
   // Space out the values in multiples of "mult"
-  for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
+  for (T i = static_cast<T>(1); i <= hi; i = static_cast<T>(i * mult)) {
     if (i >= lo) {
       dst->push_back(i);
     }
@@ -52,7 +52,7 @@ void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
 
   const auto it = AddPowers(dst, hi_complement, lo_complement, mult);
 
-  std::for_each(it, dst->end(), [](T& t) { t *= -1; });
+  std::for_each(it, dst->end(), [](T& t) { t = static_cast<T>(t * -1); });
   std::reverse(it, dst->end());
 }
 
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index f7ae424..a74bdad 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -64,7 +64,7 @@ MemoryManager* memory_manager = nullptr;
 
 namespace {
 
-static constexpr IterationCount kMaxIterations = 1000000000;
+static constexpr IterationCount kMaxIterations = 1000000000000;
 const double kDefaultMinTime =
     std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
 
@@ -96,6 +96,7 @@ BenchmarkReporter::Run CreateRunReport(
     } else {
       report.real_accumulated_time = results.real_time_used;
     }
+    report.use_real_time_for_initial_big_o = b.use_manual_time();
     report.cpu_accumulated_time = results.cpu_time_used;
     report.complexity_n = results.complexity_n;
     report.complexity = b.complexity();
@@ -108,7 +109,7 @@ BenchmarkReporter::Run CreateRunReport(
       report.memory_result = memory_result;
       report.allocs_per_iter =
           memory_iterations ? static_cast<double>(memory_result->num_allocs) /
-                                  memory_iterations
+                                  static_cast<double>(memory_iterations)
                             : 0;
     }
 
@@ -234,7 +235,7 @@ BenchmarkRunner::BenchmarkRunner(
       has_explicit_iteration_count(b.iterations() != 0 ||
                                    parsed_benchtime_flag.tag ==
                                        BenchTimeType::ITERS),
-      pool(b.threads() - 1),
+      pool(static_cast<size_t>(b.threads() - 1)),
       iters(has_explicit_iteration_count
                 ? ComputeIters(b_, parsed_benchtime_flag)
                 : 1),
@@ -325,8 +326,8 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
 
   // So what seems to be the sufficiently-large iteration count? Round up.
   const IterationCount max_next_iters = static_cast<IterationCount>(
-      std::lround(std::max(multiplier * static_cast<double>(i.iters),
-                           static_cast<double>(i.iters) + 1.0)));
+      std::llround(std::max(multiplier * static_cast<double>(i.iters),
+                            static_cast<double>(i.iters) + 1.0)));
   // But we do have *some* limits though..
   const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
 
diff --git a/src/colorprint.cc b/src/colorprint.cc
index 0bfd670..abc7149 100644
--- a/src/colorprint.cc
+++ b/src/colorprint.cc
@@ -140,12 +140,12 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
   // We need to flush the stream buffers into the console before each
   // SetConsoleTextAttribute call lest it affect the text that is already
   // printed but has not yet reached the console.
-  fflush(stdout);
+  out.flush();
   SetConsoleTextAttribute(stdout_handle,
                           GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
-  vprintf(fmt, args);
+  out << FormatString(fmt, args);
 
-  fflush(stdout);
+  out.flush();
   // Restores the text color.
   SetConsoleTextAttribute(stdout_handle, old_color_attrs);
 #else
diff --git a/src/complexity.cc b/src/complexity.cc
index 825c573..eee3122 100644
--- a/src/complexity.cc
+++ b/src/complexity.cc
@@ -37,12 +37,14 @@ BigOFunc* FittingCurve(BigO complexity) {
       return [](IterationCount n) -> double { return std::pow(n, 3); };
     case oLogN:
       /* Note: can't use log2 because Android's GNU STL lacks it */
-      return
-          [](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
+      return [](IterationCount n) {
+        return kLog2E * std::log(static_cast<double>(n));
+      };
     case oNLogN:
       /* Note: can't use log2 because Android's GNU STL lacks it */
       return [](IterationCount n) {
-        return kLog2E * n * log(static_cast<double>(n));
+        return kLog2E * static_cast<double>(n) *
+               std::log(static_cast<double>(n));
       };
     case o1:
     default:
@@ -75,12 +77,12 @@ std::string GetBigOString(BigO complexity) {
 // given by the lambda expression.
 //   - n             : Vector containing the size of the benchmark tests.
 //   - time          : Vector containing the times for the benchmark tests.
-//   - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
+//   - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };).
 
 // For a deeper explanation on the algorithm logic, please refer to
 // https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
 
-LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
+LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
                        const std::vector<double>& time,
                        BigOFunc* fitting_curve) {
   double sigma_gn_squared = 0.0;
@@ -105,12 +107,12 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
   double rms = 0.0;
   for (size_t i = 0; i < n.size(); ++i) {
     double fit = result.coef * fitting_curve(n[i]);
-    rms += pow((time[i] - fit), 2);
+    rms += std::pow((time[i] - fit), 2);
   }
 
   // Normalized RMS by the mean of the observed values
-  double mean = sigma_time / n.size();
-  result.rms = sqrt(rms / n.size()) / mean;
+  double mean = sigma_time / static_cast<double>(n.size());
+  result.rms = std::sqrt(rms / static_cast<double>(n.size())) / mean;
 
   return result;
 }
@@ -122,7 +124,7 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
 //   - complexity : If different than oAuto, the fitting curve will stick to
 //                  this one. If it is oAuto, it will be calculated the best
 //                  fitting curve.
-LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
+LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
                        const std::vector<double>& time, const BigO complexity) {
   BM_CHECK_EQ(n.size(), time.size());
   BM_CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
@@ -162,7 +164,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
   if (reports.size() < 2) return results;
 
   // Accumulators.
-  std::vector<int64_t> n;
+  std::vector<ComplexityN> n;
   std::vector<double> real_time;
   std::vector<double> cpu_time;
 
@@ -171,8 +173,10 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
     BM_CHECK_GT(run.complexity_n, 0)
         << "Did you forget to call SetComplexityN?";
     n.push_back(run.complexity_n);
-    real_time.push_back(run.real_accumulated_time / run.iterations);
-    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
+    real_time.push_back(run.real_accumulated_time /
+                        static_cast<double>(run.iterations));
+    cpu_time.push_back(run.cpu_accumulated_time /
+                       static_cast<double>(run.iterations));
   }
 
   LeastSq result_cpu;
@@ -182,8 +186,19 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
     result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
     result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
   } else {
-    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
-    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
+    const BigO* InitialBigO = &reports[0].complexity;
+    const bool use_real_time_for_initial_big_o =
+        reports[0].use_real_time_for_initial_big_o;
+    if (use_real_time_for_initial_big_o) {
+      result_real = MinimalLeastSq(n, real_time, *InitialBigO);
+      InitialBigO = &result_real.complexity;
+      // The Big-O complexity for CPU time must have the same Big-O function!
+    }
+    result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO);
+    InitialBigO = &result_cpu.complexity;
+    if (!use_real_time_for_initial_big_o) {
+      result_real = MinimalLeastSq(n, real_time, *InitialBigO);
+    }
   }
 
   // Drop the 'args' when reporting complexity.
diff --git a/src/console_reporter.cc b/src/console_reporter.cc
index 10e05e1..35c3de2 100644
--- a/src/console_reporter.cc
+++ b/src/console_reporter.cc
@@ -42,11 +42,15 @@ bool ConsoleReporter::ReportContext(const Context& context) {
   PrintBasicContext(&GetErrorStream(), context);
 
 #ifdef BENCHMARK_OS_WINDOWS
-  if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
-    GetErrorStream()
-        << "Color printing is only supported for stdout on windows."
-           " Disabling color printing\n";
-    output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
+  if ((output_options_ & OO_Color)) {
+    auto stdOutBuf = std::cout.rdbuf();
+    auto outStreamBuf = GetOutputStream().rdbuf();
+    if (stdOutBuf != outStreamBuf) {
+      GetErrorStream()
+          << "Color printing is only supported for stdout on windows."
+             " Disabling color printing\n";
+      output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
+    }
   }
 #endif
 
diff --git a/src/counter.cc b/src/counter.cc
index cf5b78e..aa14cd8 100644
--- a/src/counter.cc
+++ b/src/counter.cc
@@ -27,10 +27,10 @@ double Finish(Counter const& c, IterationCount iterations, double cpu_time,
     v /= num_threads;
   }
   if (c.flags & Counter::kIsIterationInvariant) {
-    v *= iterations;
+    v *= static_cast<double>(iterations);
   }
   if (c.flags & Counter::kAvgIterations) {
-    v /= iterations;
+    v /= static_cast<double>(iterations);
   }
 
   if (c.flags & Counter::kInvert) {  // Invert is *always* last.
diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc
index 7b56da1..4b39e2c 100644
--- a/src/csv_reporter.cc
+++ b/src/csv_reporter.cc
@@ -122,13 +122,21 @@ void CSVReporter::PrintRunData(const Run& run) {
   }
   Out << ",";
 
-  Out << run.GetAdjustedRealTime() << ",";
-  Out << run.GetAdjustedCPUTime() << ",";
+  if (run.run_type != Run::RT_Aggregate ||
+      run.aggregate_unit == StatisticUnit::kTime) {
+    Out << run.GetAdjustedRealTime() << ",";
+    Out << run.GetAdjustedCPUTime() << ",";
+  } else {
+    assert(run.aggregate_unit == StatisticUnit::kPercentage);
+    Out << run.real_accumulated_time << ",";
+    Out << run.cpu_accumulated_time << ",";
+  }
 
   // Do not print timeLabel on bigO and RMS report
   if (run.report_big_o) {
     Out << GetBigOString(run.complexity);
-  } else if (!run.report_rms) {
+  } else if (!run.report_rms &&
+             run.aggregate_unit != StatisticUnit::kPercentage) {
     Out << GetTimeUnitString(run.time_unit);
   }
   Out << ",";
diff --git a/src/cycleclock.h b/src/cycleclock.h
index ae1ef2d..a258437 100644
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@@ -70,7 +70,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   // frequency scaling).  Also note that when the Mac sleeps, this
   // counter pauses; it does not continue counting, nor does it
   // reset to zero.
-  return mach_absolute_time();
+  return static_cast<int64_t>(mach_absolute_time());
 #elif defined(BENCHMARK_OS_EMSCRIPTEN)
   // this goes above x86-specific code because old versions of Emscripten
   // define __x86_64__, although they have nothing to do with it.
@@ -82,7 +82,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__x86_64__) || defined(__amd64__)
   uint64_t low, high;
   __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
-  return (high << 32) | low;
+  return static_cast<int64_t>((high << 32) | low);
 #elif defined(__powerpc__) || defined(__ppc__)
   // This returns a time-base, which is not always precisely a cycle-count.
 #if defined(__powerpc64__) || defined(__ppc64__)
@@ -181,23 +181,25 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__s390__)  // Covers both s390 and s390x.
   // Return the CPU clock.
   uint64_t tsc;
-#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL)
-  // z/OS XL compiler HLASM syntax.
+#if defined(BENCHMARK_OS_ZOS)
+  // z/OS HLASM syntax.
   asm(" stck %0" : "=m"(tsc) : : "cc");
 #else
+  // Linux on Z syntax.
   asm("stck %0" : "=Q"(tsc) : : "cc");
 #endif
   return tsc;
 #elif defined(__riscv)  // RISC-V
-  // Use RDCYCLE (and RDCYCLEH on riscv32)
+  // Use RDTIME (and RDTIMEH on riscv32).
+  // RDCYCLE is a privileged instruction since Linux 6.6.
 #if __riscv_xlen == 32
   uint32_t cycles_lo, cycles_hi0, cycles_hi1;
   // This asm also includes the PowerPC overflow handling strategy, as above.
   // Implemented in assembly because Clang insisted on branching.
   asm volatile(
-      "rdcycleh %0\n"
-      "rdcycle %1\n"
-      "rdcycleh %2\n"
+      "rdtimeh %0\n"
+      "rdtime %1\n"
+      "rdtimeh %2\n"
       "sub %0, %0, %2\n"
       "seqz %0, %0\n"
       "sub %0, zero, %0\n"
@@ -206,7 +208,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   return (static_cast<uint64_t>(cycles_hi1) << 32) | cycles_lo;
 #else
   uint64_t cycles;
-  asm volatile("rdcycle %0" : "=r"(cycles));
+  asm volatile("rdtime %0" : "=r"(cycles));
   return cycles;
 #endif
 #elif defined(__e2k__) || defined(__elbrus__)
@@ -217,10 +219,20 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   uint64_t pcycle;
   asm volatile("%0 = C15:14" : "=r"(pcycle));
   return static_cast<double>(pcycle);
+#elif defined(__alpha__)
+  // Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
+  // integer and thus wraps every ~4s, making using it for tick counts
+  // unreliable beyond this time range.  The real-time clock is low-precision,
+  // roughtly ~1ms, but it is the only option that can reasonable count
+  // indefinitely.
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 #else
-// The soft failover to a generic implementation is automatic only for ARM.
-// For other platforms the developer is expected to make an attempt to create
-// a fast implementation and use generic version if nothing better is available.
+  // The soft failover to a generic implementation is automatic only for ARM.
+  // For other platforms the developer is expected to make an attempt to create
+  // a fast implementation and use generic version if nothing better is
+  // available.
 #error You need to define CycleTimer for your OS and CPU
 #endif
 }
diff --git a/src/internal_macros.h b/src/internal_macros.h
index 8dd7d0c..f4894ba 100644
--- a/src/internal_macros.h
+++ b/src/internal_macros.h
@@ -11,11 +11,7 @@
 #endif
 
 #if defined(__clang__)
-  #if defined(__ibmxl__)
-    #if !defined(COMPILER_IBMXL)
-      #define COMPILER_IBMXL
-    #endif
-  #elif !defined(COMPILER_CLANG)
+  #if !defined(COMPILER_CLANG)
     #define COMPILER_CLANG
   #endif
 #elif defined(_MSC_VER)
diff --git a/src/json_reporter.cc b/src/json_reporter.cc
index 6559dfd..b8c8c94 100644
--- a/src/json_reporter.cc
+++ b/src/json_reporter.cc
@@ -167,12 +167,19 @@ bool JSONReporter::ReportContext(const Context& context) {
   }
   out << "],\n";
 
+  out << indent << FormatKV("library_version", GetBenchmarkVersion());
+  out << ",\n";
+
 #if defined(NDEBUG)
   const char build_type[] = "release";
 #else
   const char build_type[] = "debug";
 #endif
   out << indent << FormatKV("library_build_type", build_type);
+  out << ",\n";
+
+  // NOTE: our json schema is not strictly tied to the library version!
+  out << indent << FormatKV("json_schema_version", int64_t(1));
 
   std::map<std::string, std::string>* global_context =
       internal::GetGlobalContext();
diff --git a/src/perf_counters.cc b/src/perf_counters.cc
index 417acdb..2eb97eb 100644
--- a/src/perf_counters.cc
+++ b/src/perf_counters.cc
@@ -39,7 +39,8 @@ size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
     auto read_bytes = ::read(lead, ptr, size);
     if (read_bytes >= ssize_t(sizeof(uint64_t))) {
       // Actual data bytes are all bytes minus initial padding
-      std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+      std::size_t data_bytes =
+          static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
       // This should be very cheap since it's in hot cache
       std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
       // Increment our counters
@@ -254,7 +255,7 @@ bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
 PerfCounters PerfCounters::Create(
     const std::vector<std::string>& counter_names) {
   if (!counter_names.empty()) {
-    GetErrorLogInstance() << "Performance counters not supported.";
+    GetErrorLogInstance() << "Performance counters not supported.\n";
   }
   return NoCounters();
 }
diff --git a/src/statistics.cc b/src/statistics.cc
index 844e926..16b6026 100644
--- a/src/statistics.cc
+++ b/src/statistics.cc
@@ -32,7 +32,7 @@ auto StatisticsSum = [](const std::vector<double>& v) {
 
 double StatisticsMean(const std::vector<double>& v) {
   if (v.empty()) return 0.0;
-  return StatisticsSum(v) * (1.0 / v.size());
+  return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
 }
 
 double StatisticsMedian(const std::vector<double>& v) {
@@ -71,8 +71,11 @@ double StatisticsStdDev(const std::vector<double>& v) {
   // Sample standard deviation is undefined for n = 1
   if (v.size() == 1) return 0.0;
 
-  const double avg_squares = SumSquares(v) * (1.0 / v.size());
-  return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
+  const double avg_squares =
+      SumSquares(v) * (1.0 / static_cast<double>(v.size()));
+  return Sqrt(static_cast<double>(v.size()) /
+              (static_cast<double>(v.size()) - 1.0) *
+              (avg_squares - Sqr(mean)));
 }
 
 double StatisticsCV(const std::vector<double>& v) {
@@ -81,6 +84,8 @@ double StatisticsCV(const std::vector<double>& v) {
   const auto stddev = StatisticsStdDev(v);
   const auto mean = StatisticsMean(v);
 
+  if (std::fpclassify(mean) == FP_ZERO) return 0.0;
+
   return stddev / mean;
 }
 
@@ -92,7 +97,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
   auto error_count = std::count_if(reports.begin(), reports.end(),
                                    [](Run const& run) { return run.skipped; });
 
-  if (reports.size() - error_count < 2) {
+  if (reports.size() - static_cast<size_t>(error_count) < 2) {
     // We don't report aggregated data if there was a single run.
     return results;
   }
@@ -174,7 +179,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
     // Similarly, if there are N repetitions with 1 iterations each,
     // an aggregate will be computed over N measurements, not 1.
     // Thus it is best to simply use the count of separate reports.
-    data.iterations = reports.size();
+    data.iterations = static_cast<IterationCount>(reports.size());
 
     data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
     data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
diff --git a/src/string_util.cc b/src/string_util.cc
index c69e40a..9ba63a7 100644
--- a/src/string_util.cc
+++ b/src/string_util.cc
@@ -56,7 +56,7 @@ void ToExponentAndMantissa(double val, int precision, double one_k,
       scaled /= one_k;
       if (scaled <= big_threshold) {
         mantissa_stream << scaled;
-        *exponent = i + 1;
+        *exponent = static_cast<int64_t>(i + 1);
         *mantissa = mantissa_stream.str();
         return;
       }
diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index 922e83a..7261e2a 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -15,6 +15,10 @@
 #include "internal_macros.h"
 
 #ifdef BENCHMARK_OS_WINDOWS
+#if !defined(WINVER) || WINVER < 0x0600
+#undef WINVER
+#define WINVER 0x0600
+#endif  // WINVER handling
 #include <shlwapi.h>
 #undef StrCat  // Don't let StrCat in string_util.h be renamed to lstrcatA
 #include <versionhelpers.h>
@@ -158,7 +162,7 @@ ValueUnion GetSysctlImp(std::string const& name) {
       mib[1] = HW_CPUSPEED;
     }
 
-    if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) {
+    if (sysctl(mib, 2, buff.data(), &buff.size, nullptr, 0) == -1) {
       return ValueUnion();
     }
     return buff;
@@ -346,7 +350,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
     CPUInfo::CacheInfo C;
     C.num_sharing = static_cast<int>(b.count());
     C.level = cache.Level;
-    C.size = cache.Size;
+    C.size = static_cast<int>(cache.Size);
     C.type = "Unknown";
     switch (cache.Type) {
       case CacheUnified:
@@ -456,6 +460,8 @@ std::string GetSystemName() {
 #define HOST_NAME_MAX 256
 #elif defined(BENCHMARK_OS_SOLARIS)
 #define HOST_NAME_MAX MAXHOSTNAMELEN
+#elif defined(BENCHMARK_OS_ZOS)
+#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
 #else
 #pragma message("HOST_NAME_MAX not defined. using 64")
 #define HOST_NAME_MAX 64
@@ -468,27 +474,25 @@ std::string GetSystemName() {
 #endif  // Catch-all POSIX block.
 }
 
-int GetNumCPUs() {
+int GetNumCPUsImpl() {
 #ifdef BENCHMARK_HAS_SYSCTL
   int num_cpu = -1;
   if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu;
-  fprintf(stderr, "Err: %s\n", strerror(errno));
-  std::exit(EXIT_FAILURE);
+  PrintErrorAndDie("Err: ", strerror(errno));
 #elif defined(BENCHMARK_OS_WINDOWS)
   SYSTEM_INFO sysinfo;
   // Use memset as opposed to = {} to avoid GCC missing initializer false
   // positives.
   std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
   GetSystemInfo(&sysinfo);
-  return sysinfo.dwNumberOfProcessors;  // number of logical
-                                        // processors in the current
-                                        // group
+  // number of logical processors in the current group
+  return static_cast<int>(sysinfo.dwNumberOfProcessors);
 #elif defined(BENCHMARK_OS_SOLARIS)
   // Returns -1 in case of a failure.
   long num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
   if (num_cpu < 0) {
-    fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
-            strerror(errno));
+    PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ",
+                     strerror(errno));
   }
   return (int)num_cpu;
 #elif defined(BENCHMARK_OS_QNX)
@@ -504,10 +508,13 @@ int GetNumCPUs() {
   int max_id = -1;
   std::ifstream f("/proc/cpuinfo");
   if (!f.is_open()) {
-    std::cerr << "failed to open /proc/cpuinfo\n";
-    return -1;
+    PrintErrorAndDie("Failed to open /proc/cpuinfo");
   }
+#if defined(__alpha__)
+  const std::string Key = "cpus detected";
+#else
   const std::string Key = "processor";
+#endif
   std::string ln;
   while (std::getline(f, ln)) {
     if (ln.empty()) continue;
@@ -530,12 +537,10 @@ int GetNumCPUs() {
     }
   }
   if (f.bad()) {
-    std::cerr << "Failure reading /proc/cpuinfo\n";
-    return -1;
+    PrintErrorAndDie("Failure reading /proc/cpuinfo");
   }
   if (!f.eof()) {
-    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
-    return -1;
+    PrintErrorAndDie("Failed to read to end of /proc/cpuinfo");
   }
   f.close();
 
@@ -549,6 +554,16 @@ int GetNumCPUs() {
   BENCHMARK_UNREACHABLE();
 }
 
+int GetNumCPUs() {
+  const int num_cpus = GetNumCPUsImpl();
+  if (num_cpus < 1) {
+    PrintErrorAndDie(
+        "Unable to extract number of CPUs.  If your platform uses "
+        "/proc/cpuinfo, custom support may need to be added.");
+  }
+  return num_cpus;
+}
+
 class ThreadAffinityGuard final {
  public:
   ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
@@ -651,7 +666,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
                       &freq)) {
     // The value is in kHz (as the file name suggests).  For example, on a
     // 2GHz warpstation, the file contains the value "2000000".
-    return freq * 1000.0;
+    return static_cast<double>(freq) * 1000.0;
   }
 
   const double error_value = -1;
@@ -719,9 +734,9 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
 #endif
   unsigned long long hz = 0;
 #if defined BENCHMARK_OS_OPENBSD
-  if (GetSysctl(freqStr, &hz)) return hz * 1000000;
+  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz * 1000000);
 #else
-  if (GetSysctl(freqStr, &hz)) return hz;
+  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz);
 #endif
   fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
           freqStr, strerror(errno));
@@ -771,8 +786,9 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
   kstat_close(kc);
   return clock_hz;
 #elif defined(BENCHMARK_OS_QNX)
-  return static_cast<double>((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) *
-                             (int64_t)(1000 * 1000));
+  return static_cast<double>(
+      static_cast<int64_t>(SYSPAGE_ENTRY(cpuinfo)->speed) *
+      static_cast<int64_t>(1000 * 1000));
 #elif defined(BENCHMARK_OS_QURT)
   // QuRT doesn't provide any API to query Hexagon frequency.
   return 1000000000;
@@ -820,7 +836,7 @@ std::vector<double> GetLoadAvg() {
     !(defined(__ANDROID__) && __ANDROID_API__ < 29)
   static constexpr int kMaxSamples = 3;
   std::vector<double> res(kMaxSamples, 0.0);
-  const int nelem = getloadavg(res.data(), kMaxSamples);
+  const size_t nelem = static_cast<size_t>(getloadavg(res.data(), kMaxSamples));
   if (nelem < 1) {
     res.clear();
   } else {
diff --git a/src/timers.cc b/src/timers.cc
index b23feea..d0821f3 100644
--- a/src/timers.cc
+++ b/src/timers.cc
@@ -102,7 +102,8 @@ double MakeTime(thread_basic_info_data_t const& info) {
 #endif
 #if defined(CLOCK_PROCESS_CPUTIME_ID) || defined(CLOCK_THREAD_CPUTIME_ID)
 double MakeTime(struct timespec const& ts) {
-  return ts.tv_sec + (static_cast<double>(ts.tv_nsec) * 1e-9);
+  return static_cast<double>(ts.tv_sec) +
+         (static_cast<double>(ts.tv_nsec) * 1e-9);
 }
 #endif
 
@@ -181,6 +182,9 @@ double ThreadCPUUsage() {
   // RTEMS doesn't support CLOCK_THREAD_CPUTIME_ID. See
   // https://github.com/RTEMS/rtems/blob/master/cpukit/posix/src/clockgettime.c
   return ProcessCPUUsage();
+#elif defined(BENCHMARK_OS_ZOS)
+  // z/OS doesn't support CLOCK_THREAD_CPUTIME_ID.
+  return ProcessCPUUsage();
 #elif defined(BENCHMARK_OS_SOLARIS)
   struct rusage ru;
   if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru);
@@ -241,9 +245,9 @@ std::string LocalDateTimeString() {
       tz_offset_sign = '-';
     }
 
-    tz_len =
+    tz_len = static_cast<size_t>(
         ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
-                   tz_offset_sign, offset_minutes / 100, offset_minutes % 100);
+                   tz_offset_sign, offset_minutes / 100, offset_minutes % 100));
     BM_CHECK(tz_len == kTzOffsetLen);
     ((void)tz_len);  // Prevent unused variable warning in optimized build.
   } else {
diff --git a/test/BUILD b/test/BUILD
index ea34fd4..b245fa7 100644
--- a/test/BUILD
+++ b/test/BUILD
@@ -18,6 +18,10 @@ TEST_COPTS = [
     #    "-Wshorten-64-to-32",
     "-Wfloat-equal",
     "-fstrict-aliasing",
+    ## assert() are used a lot in tests upstream, which may be optimised out leading to
+    ## unused-variable warning.
+    "-Wno-unused-variable",
+    "-Werror=old-style-cast",
 ]
 
 # Some of the issues with DoNotOptimize only occur when optimization is enabled
@@ -32,6 +36,7 @@ PER_SRC_TEST_ARGS = {
     "repetitions_test.cc": [" --benchmark_repetitions=3"],
     "spec_arg_test.cc": ["--benchmark_filter=BM_NotChosen"],
     "spec_arg_verbosity_test.cc": ["--v=42"],
+    "complexity_test.cc": ["--benchmark_min_time=1000000x"],
 }
 
 cc_library(
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fd88131..1de175f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -5,6 +5,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 include(CheckCXXCompilerFlag)
 
+add_cxx_compiler_flag(-Wno-unused-variable)
+
 # NOTE: Some tests use `<cassert>` to perform the test. Therefore we must
 # strip -DNDEBUG from the default CMake flags in DEBUG mode.
 string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
@@ -62,30 +64,38 @@ macro(compile_output_test name)
           ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 endmacro(compile_output_test)
 
+macro(benchmark_add_test)
+  add_test(${ARGV})
+  if(WIN32 AND BUILD_SHARED_LIBS)
+    cmake_parse_arguments(TEST "" "NAME" "" ${ARGN})
+    set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>")
+  endif()
+endmacro(benchmark_add_test)
+
 # Demonstration executable
 compile_benchmark_test(benchmark_test)
-add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(spec_arg_test)
-add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
+benchmark_add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
 
 compile_benchmark_test(spec_arg_verbosity_test)
-add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42)
+benchmark_add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42)
 
 compile_benchmark_test(benchmark_setup_teardown_test)
-add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
+benchmark_add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
 
 compile_benchmark_test(filter_test)
 macro(add_filter_test name filter expect)
-  add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect})
-  add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
+  benchmark_add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect})
+  benchmark_add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
 endmacro(add_filter_test)
 
 compile_benchmark_test(benchmark_min_time_flag_time_test)
-add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test)
+benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test)
 
 compile_benchmark_test(benchmark_min_time_flag_iters_test)
-add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
+benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
 
 add_filter_test(filter_simple "Foo" 3)
 add_filter_test(filter_simple_negative "-Foo" 2)
@@ -107,19 +117,19 @@ add_filter_test(filter_regex_end ".*Ba$" 1)
 add_filter_test(filter_regex_end_negative "-.*Ba$" 4)
 
 compile_benchmark_test(options_test)
-add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(basic_test)
-add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s)
 
 compile_output_test(repetitions_test)
-add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3)
+benchmark_add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3)
 
 compile_benchmark_test(diagnostics_test)
-add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(skip_with_error_test)
-add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(donotoptimize_test)
 # Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)).
@@ -132,58 +142,58 @@ check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
 if (BENCHMARK_HAS_O3_FLAG)
   set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3")
 endif()
-add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(fixture_test)
-add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(register_benchmark_test)
-add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(map_test)
-add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(multiple_ranges_test)
-add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test(args_product_test)
-add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s)
 
 compile_benchmark_test_with_main(link_main_test)
-add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s)
 
 compile_output_test(reporter_output_test)
-add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s)
 
 compile_output_test(templated_fixture_test)
-add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)
 
 compile_output_test(user_counters_test)
-add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)
 
 compile_output_test(perf_counters_test)
-add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,BRANCHES)
+benchmark_add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,INSTRUCTIONS)
 
 compile_output_test(internal_threading_test)
-add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
 
 compile_output_test(report_aggregates_only_test)
-add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
 
 compile_output_test(display_aggregates_only_test)
-add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s)
 
 compile_output_test(user_counters_tabular_test)
-add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s)
+benchmark_add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s)
 
 compile_output_test(user_counters_thousands_test)
-add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s)
 
 compile_output_test(memory_manager_test)
-add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
+benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
 
 # MSVC does not allow to set the language standard to C++98/03.
-if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
   compile_benchmark_test(cxx03_test)
   set_target_properties(cxx03_test
       PROPERTIES
@@ -205,17 +215,11 @@ if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
     set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch")
   endif()
   set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}")
-  add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
+  benchmark_add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
 endif()
 
-# Attempt to work around flaky test failures when running on Appveyor servers.
-if (DEFINED ENV{APPVEYOR})
-  set(COMPLEXITY_MIN_TIME "0.5s")
-else()
-  set(COMPLEXITY_MIN_TIME "0.01s")
-endif()
 compile_output_test(complexity_test)
-add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})
+benchmark_add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=1000000x)
 
 ###############################################################################
 # GoogleTest Unit Tests
@@ -230,7 +234,12 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
 
   macro(add_gtest name)
     compile_gtest(${name})
-    add_test(NAME ${name} COMMAND ${name})
+    benchmark_add_test(NAME ${name} COMMAND ${name})
+    if(WIN32 AND BUILD_SHARED_LIBS)
+      set_tests_properties(${name} PROPERTIES
+        ENVIRONMENT_MODIFICATION "PATH=path_list_prepend:$<TARGET_FILE_DIR:benchmark::benchmark>;PATH=path_list_prepend:$<TARGET_FILE_DIR:gmock_main>"
+      )
+    endif()
   endmacro()
 
   add_gtest(benchmark_gtest)
diff --git a/test/basic_test.cc b/test/basic_test.cc
index cba1b0f..c25bec7 100644
--- a/test/basic_test.cc
+++ b/test/basic_test.cc
@@ -5,7 +5,7 @@
 
 void BM_empty(benchmark::State& state) {
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 }
diff --git a/test/benchmark_gtest.cc b/test/benchmark_gtest.cc
index 2c9e555..0aa2552 100644
--- a/test/benchmark_gtest.cc
+++ b/test/benchmark_gtest.cc
@@ -38,7 +38,7 @@ TEST(AddRangeTest, Advanced64) {
 
 TEST(AddRangeTest, FullRange8) {
   std::vector<int8_t> dst;
-  AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8});
+  AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8);
   EXPECT_THAT(
       dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127}));
 }
diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc
index 94590d5..8b14017 100644
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@@ -16,6 +16,7 @@
 #include <sstream>
 #include <string>
 #include <thread>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -226,6 +227,31 @@ void BM_non_template_args(benchmark::State& state, int, double) {
 }
 BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
 
+template <class T, class U, class... ExtraArgs>
+void BM_template2_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
+  static_assert(std::is_same<T, void>::value, "");
+  static_assert(std::is_same<U, char*>::value, "");
+  static_assert(std::is_same<ExtraArgs..., unsigned int>::value, "");
+  unsigned int dummy[sizeof...(ExtraArgs)] = {extra_args...};
+  assert(dummy[0] == 42);
+  for (auto _ : state) {
+  }
+}
+BENCHMARK_TEMPLATE2_CAPTURE(BM_template2_capture, void, char*, foo, 42U);
+BENCHMARK_CAPTURE((BM_template2_capture<void, char*>), foo, 42U);
+
+template <class T, class... ExtraArgs>
+void BM_template1_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
+  static_assert(std::is_same<T, void>::value, "");
+  static_assert(std::is_same<ExtraArgs..., unsigned long>::value, "");
+  unsigned long dummy[sizeof...(ExtraArgs)] = {extra_args...};
+  assert(dummy[0] == 24);
+  for (auto _ : state) {
+  }
+}
+BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL);
+BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL);
+
 #endif  // BENCHMARK_HAS_CXX11
 
 static void BM_DenseThreadRanges(benchmark::State& st) {
diff --git a/test/complexity_test.cc b/test/complexity_test.cc
index 76891e0..0729d15 100644
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@@ -69,35 +69,44 @@ int AddComplexityTest(const std::string &test_name,
 
 void BM_Complexity_O1(benchmark::State &state) {
   for (auto _ : state) {
-    for (int i = 0; i < 1024; ++i) {
-      benchmark::DoNotOptimize(i);
+    // This test requires a non-zero CPU time to avoid divide-by-zero
+    benchmark::DoNotOptimize(state.iterations());
+    double tmp = static_cast<double>(state.iterations());
+    benchmark::DoNotOptimize(tmp);
+    for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+      benchmark::DoNotOptimize(state.iterations());
+      tmp *= static_cast<double>(state.iterations());
+      benchmark::DoNotOptimize(tmp);
     }
+
+    // always 1ns per iteration
+    state.SetIterationTime(42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
-BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity(benchmark::o1);
-BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->Complexity();
 BENCHMARK(BM_Complexity_O1)
     ->Range(1, 1 << 18)
+    ->UseManualTime()
+    ->Complexity(benchmark::o1);
+BENCHMARK(BM_Complexity_O1)->Range(1, 1 << 18)->UseManualTime()->Complexity();
+BENCHMARK(BM_Complexity_O1)
+    ->Range(1, 1 << 18)
+    ->UseManualTime()
     ->Complexity([](benchmark::IterationCount) { return 1.0; });
 
-const char *one_test_name = "BM_Complexity_O1";
-const char *big_o_1_test_name = "BM_Complexity_O1_BigO";
-const char *rms_o_1_test_name = "BM_Complexity_O1_RMS";
-const char *enum_big_o_1 = "\\([0-9]+\\)";
-// FIXME: Tolerate both '(1)' and 'lgN' as output when the complexity is auto
-// deduced.
-// See https://github.com/google/benchmark/issues/272
-const char *auto_big_o_1 = "(\\([0-9]+\\))|(lgN)";
+const char *one_test_name = "BM_Complexity_O1/manual_time";
+const char *big_o_1_test_name = "BM_Complexity_O1/manual_time_BigO";
+const char *rms_o_1_test_name = "BM_Complexity_O1/manual_time_RMS";
+const char *enum_auto_big_o_1 = "\\([0-9]+\\)";
 const char *lambda_big_o_1 = "f\\(N\\)";
 
 // Add enum tests
 ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
-                     enum_big_o_1, /*family_index=*/0);
+                     enum_auto_big_o_1, /*family_index=*/0);
 
-// Add auto enum tests
+// Add auto tests
 ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
-                     auto_big_o_1, /*family_index=*/1);
+                     enum_auto_big_o_1, /*family_index=*/1);
 
 // Add lambda tests
 ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
@@ -107,43 +116,44 @@ ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
 // --------------------------- Testing BigO O(N) --------------------------- //
 // ========================================================================= //
 
-std::vector<int> ConstructRandomVector(int64_t size) {
-  std::vector<int> v;
-  v.reserve(static_cast<size_t>(size));
-  for (int i = 0; i < size; ++i) {
-    v.push_back(static_cast<int>(std::rand() % size));
-  }
-  return v;
-}
-
 void BM_Complexity_O_N(benchmark::State &state) {
-  auto v = ConstructRandomVector(state.range(0));
-  // Test worst case scenario (item not in vector)
-  const int64_t item_not_in_vector = state.range(0) * 2;
   for (auto _ : state) {
-    auto it = std::find(v.begin(), v.end(), item_not_in_vector);
-    benchmark::DoNotOptimize(it);
+    // This test requires a non-zero CPU time to avoid divide-by-zero
+    benchmark::DoNotOptimize(state.iterations());
+    double tmp = static_cast<double>(state.iterations());
+    benchmark::DoNotOptimize(tmp);
+    for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+      benchmark::DoNotOptimize(state.iterations());
+      tmp *= static_cast<double>(state.iterations());
+      benchmark::DoNotOptimize(tmp);
+    }
+
+    // 1ns per iteration per entry
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
 BENCHMARK(BM_Complexity_O_N)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
+    ->Range(1 << 10, 1 << 20)
+    ->UseManualTime()
     ->Complexity(benchmark::oN);
 BENCHMARK(BM_Complexity_O_N)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
+    ->Range(1 << 10, 1 << 20)
+    ->UseManualTime()
+    ->Complexity();
+BENCHMARK(BM_Complexity_O_N)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20)
+    ->UseManualTime()
     ->Complexity([](benchmark::IterationCount n) -> double {
       return static_cast<double>(n);
     });
-BENCHMARK(BM_Complexity_O_N)
-    ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
-    ->Complexity();
 
-const char *n_test_name = "BM_Complexity_O_N";
-const char *big_o_n_test_name = "BM_Complexity_O_N_BigO";
-const char *rms_o_n_test_name = "BM_Complexity_O_N_RMS";
+const char *n_test_name = "BM_Complexity_O_N/manual_time";
+const char *big_o_n_test_name = "BM_Complexity_O_N/manual_time_BigO";
+const char *rms_o_n_test_name = "BM_Complexity_O_N/manual_time_RMS";
 const char *enum_auto_big_o_n = "N";
 const char *lambda_big_o_n = "f\\(N\\)";
 
@@ -151,40 +161,57 @@ const char *lambda_big_o_n = "f\\(N\\)";
 ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
                      enum_auto_big_o_n, /*family_index=*/3);
 
+// Add auto tests
+ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
+                     enum_auto_big_o_n, /*family_index=*/4);
+
 // Add lambda tests
 ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
-                     lambda_big_o_n, /*family_index=*/4);
+                     lambda_big_o_n, /*family_index=*/5);
 
 // ========================================================================= //
-// ------------------------- Testing BigO O(N*lgN) ------------------------- //
+// ------------------------- Testing BigO O(NlgN) ------------------------- //
 // ========================================================================= //
 
+static const double kLog2E = 1.44269504088896340736;
 static void BM_Complexity_O_N_log_N(benchmark::State &state) {
-  auto v = ConstructRandomVector(state.range(0));
   for (auto _ : state) {
-    std::sort(v.begin(), v.end());
+    // This test requires a non-zero CPU time to avoid divide-by-zero
+    benchmark::DoNotOptimize(state.iterations());
+    double tmp = static_cast<double>(state.iterations());
+    benchmark::DoNotOptimize(tmp);
+    for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+      benchmark::DoNotOptimize(state.iterations());
+      tmp *= static_cast<double>(state.iterations());
+      benchmark::DoNotOptimize(tmp);
+    }
+
+    state.SetIterationTime(static_cast<double>(state.range(0)) * kLog2E *
+                           std::log(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(state.range(0));
 }
-static const double kLog2E = 1.44269504088896340736;
 BENCHMARK(BM_Complexity_O_N_log_N)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
+    ->Range(1 << 10, 1U << 24)
+    ->UseManualTime()
     ->Complexity(benchmark::oNLogN);
 BENCHMARK(BM_Complexity_O_N_log_N)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
-    ->Complexity([](benchmark::IterationCount n) {
-      return kLog2E * static_cast<double>(n) * log(static_cast<double>(n));
-    });
+    ->Range(1 << 10, 1U << 24)
+    ->UseManualTime()
+    ->Complexity();
 BENCHMARK(BM_Complexity_O_N_log_N)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16)
-    ->Complexity();
+    ->Range(1 << 10, 1U << 24)
+    ->UseManualTime()
+    ->Complexity([](benchmark::IterationCount n) {
+      return kLog2E * static_cast<double>(n) * std::log(static_cast<double>(n));
+    });
 
-const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N";
-const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO";
-const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS";
+const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time";
+const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_BigO";
+const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_RMS";
 const char *enum_auto_big_o_n_lg_n = "NlgN";
 const char *lambda_big_o_n_lg_n = "f\\(N\\)";
 
@@ -193,11 +220,16 @@ ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
                      rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
                      /*family_index=*/6);
 
-// Add lambda tests
+// NOTE: auto big-o is wron.g
 ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
-                     rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n,
+                     rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
                      /*family_index=*/7);
 
+//// Add lambda tests
+ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
+                     rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n,
+                     /*family_index=*/8);
+
 // ========================================================================= //
 // -------- Testing formatting of Complexity with captured args ------------ //
 // ========================================================================= //
@@ -205,21 +237,31 @@ ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
 void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
-    benchmark::DoNotOptimize(iterations);
+    benchmark::DoNotOptimize(state.iterations());
+    double tmp = static_cast<double>(state.iterations());
+    benchmark::DoNotOptimize(tmp);
+    for (benchmark::IterationCount i = 0; i < state.iterations(); ++i) {
+      benchmark::DoNotOptimize(state.iterations());
+      tmp *= static_cast<double>(state.iterations());
+      benchmark::DoNotOptimize(tmp);
+    }
+
+    state.SetIterationTime(static_cast<double>(state.range(0)) * 42 * 1e-9);
   }
   state.SetComplexityN(n);
 }
 
 BENCHMARK_CAPTURE(BM_ComplexityCaptureArgs, capture_test, 100)
+    ->UseManualTime()
     ->Complexity(benchmark::oN)
     ->Ranges({{1, 2}, {3, 4}});
 
 const std::string complexity_capture_name =
-    "BM_ComplexityCaptureArgs/capture_test";
+    "BM_ComplexityCaptureArgs/capture_test/manual_time";
 
 ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO",
-                     complexity_capture_name + "_RMS", "N", /*family_index=*/9);
+                     complexity_capture_name + "_RMS", "N",
+                     /*family_index=*/9);
 
 // ========================================================================= //
 // --------------------------- TEST CASES END ------------------------------ //
diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc
index 0cd3edb..7c68a98 100644
--- a/test/diagnostics_test.cc
+++ b/test/diagnostics_test.cc
@@ -49,7 +49,7 @@ void BM_diagnostic_test(benchmark::State& state) {
   if (called_once == false) try_invalid_pause_resume(state);
 
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 
@@ -65,7 +65,7 @@ void BM_diagnostic_test_keep_running(benchmark::State& state) {
   if (called_once == false) try_invalid_pause_resume(state);
 
   while (state.KeepRunning()) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 
diff --git a/test/link_main_test.cc b/test/link_main_test.cc
index e806500..131937e 100644
--- a/test/link_main_test.cc
+++ b/test/link_main_test.cc
@@ -2,7 +2,7 @@
 
 void BM_empty(benchmark::State& state) {
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 }
diff --git a/test/memory_manager_test.cc b/test/memory_manager_test.cc
index d94bd51..4df674d 100644
--- a/test/memory_manager_test.cc
+++ b/test/memory_manager_test.cc
@@ -14,7 +14,7 @@ class TestMemoryManager : public benchmark::MemoryManager {
 
 void BM_empty(benchmark::State& state) {
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 }
diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc
index 2567370..265f28a 100644
--- a/test/output_test_helper.cc
+++ b/test/output_test_helper.cc
@@ -65,6 +65,7 @@ SubMap& GetSubstitutions() {
       {"%csv_us_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",us,,,,,"},
       {"%csv_ms_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ms,,,,,"},
       {"%csv_s_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",s,,,,,"},
+      {"%csv_cv_report", "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",,,,,,"},
       {"%csv_bytes_report",
        "[0-9]+," + safe_dec_re + "," + safe_dec_re + ",ns," + safe_dec_re + ",,,,"},
       {"%csv_items_report",
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
index 54c7863..2e63049 100644
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@@ -41,7 +41,7 @@ TEST(PerfCountersTest, NegativeTest) {
     return;
   }
   EXPECT_TRUE(PerfCounters::Initialize());
-  // Sanity checks
+  // Safety checks
   // Create() will always create a valid object, even if passed no or
   // wrong arguments as the new behavior is to warn and drop unsupported
   // counters
diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc
index b0a3ab0..3cc593e 100644
--- a/test/perf_counters_test.cc
+++ b/test/perf_counters_test.cc
@@ -14,7 +14,7 @@ BM_DECLARE_string(benchmark_perf_counters);
 
 static void BM_Simple(benchmark::State& state) {
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
 }
diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc
index 2eb545a..7867165 100644
--- a/test/reporter_output_test.cc
+++ b/test/reporter_output_test.cc
@@ -55,6 +55,9 @@ static int AddContextCases() {
              {{"Load Average: (%float, ){0,2}%float$", MR_Next}});
   }
   AddCases(TC_JSONOut, {{"\"load_avg\": \\[(%float,?){0,3}],$", MR_Next}});
+  AddCases(TC_JSONOut, {{"\"library_version\": \".*\",$", MR_Next}});
+  AddCases(TC_JSONOut, {{"\"library_build_type\": \".*\",$", MR_Next}});
+  AddCases(TC_JSONOut, {{"\"json_schema_version\": 1$", MR_Next}});
   return 0;
 }
 int dummy_register = AddContextCases();
@@ -93,7 +96,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
 void BM_bytes_per_second(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   state.SetBytesProcessed(1);
@@ -125,7 +128,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
 void BM_items_per_second(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   state.SetItemsProcessed(1);
@@ -406,7 +409,7 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"},
 void BM_Complexity_O1(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   state.SetComplexityN(state.range(0));
@@ -1088,7 +1091,7 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/"
                       {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
                        "manual_time_stddev\",%csv_report$"},
                       {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
-                       "manual_time_\",%csv_report$"}});
+                       "manual_time_\",%csv_cv_report$"}});
 
 // ========================================================================= //
 // ------------------------- Testing StrEscape JSON ------------------------ //
diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc
index b4c5e15..2139a19 100644
--- a/test/skip_with_error_test.cc
+++ b/test/skip_with_error_test.cc
@@ -143,7 +143,7 @@ ADD_CASES("BM_error_during_running_ranged_for",
 
 void BM_error_after_running(benchmark::State& state) {
   for (auto _ : state) {
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   if (state.thread_index() <= (state.threads() / 2))
diff --git a/test/statistics_gtest.cc b/test/statistics_gtest.cc
index 1de2d87..48c7726 100644
--- a/test/statistics_gtest.cc
+++ b/test/statistics_gtest.cc
@@ -28,8 +28,8 @@ TEST(StatisticsTest, StdDev) {
 TEST(StatisticsTest, CV) {
   EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0);
   EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.);
-  EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}),
-                   0.32888184094918121);
+  ASSERT_NEAR(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}),
+              0.32888184094918121, 1e-15);
 }
 
 }  // end namespace
diff --git a/test/user_counters_tabular_test.cc b/test/user_counters_tabular_test.cc
index c98b769..cfc1ab0 100644
--- a/test/user_counters_tabular_test.cc
+++ b/test/user_counters_tabular_test.cc
@@ -63,6 +63,9 @@ ADD_CASES(TC_CSVOut, {{"%csv_header,"
 
 void BM_Counters_Tabular(benchmark::State& state) {
   for (auto _ : state) {
+    // This test requires a non-zero CPU time to avoid divide-by-zero
+    auto iterations = double(state.iterations()) * double(state.iterations());
+    benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
   state.counters.insert({
@@ -330,7 +333,7 @@ ADD_CASES(TC_CSVOut,
           {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report,"
             "%float,%float,%float,%float,%float,%float$"}});
 ADD_CASES(TC_CSVOut,
-          {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report,"
+          {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_cv_report,"
             "%float,%float,%float,%float,%float,%float$"}});
 ADD_CASES(TC_CSVOut,
           {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report,"
@@ -348,7 +351,7 @@ ADD_CASES(TC_CSVOut,
           {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report,"
             "%float,%float,%float,%float,%float,%float$"}});
 ADD_CASES(TC_CSVOut,
-          {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report,"
+          {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_cv_report,"
             "%float,%float,%float,%float,%float,%float$"}});
 // VS2013 does not allow this function to be passed as a lambda argument
 // to CHECK_BENCHMARK_RESULTS()
@@ -372,7 +375,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$",
 void BM_CounterRates_Tabular(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
diff --git a/test/user_counters_test.cc b/test/user_counters_test.cc
index 4cd8ee3..22252ac 100644
--- a/test/user_counters_test.cc
+++ b/test/user_counters_test.cc
@@ -67,7 +67,7 @@ int num_calls1 = 0;
 void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   state.counters["foo"] = 1;
@@ -119,7 +119,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec",
 void BM_Counters_Rate(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
@@ -163,7 +163,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate);
 void BM_Invert(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
@@ -204,7 +204,7 @@ CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert);
 void BM_Counters_InvertedRate(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
@@ -333,7 +333,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int",
 void BM_Counters_AvgThreadsRate(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
@@ -421,7 +421,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant",
 void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
@@ -513,7 +513,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations);
 void BM_Counters_kAvgIterationsRate(benchmark::State& state) {
   for (auto _ : state) {
     // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = state.iterations();
+    auto iterations = double(state.iterations()) * double(state.iterations());
     benchmark::DoNotOptimize(iterations);
   }
   namespace bm = benchmark;
diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel
index d25caa7..8ef6a86 100644
--- a/tools/BUILD.bazel
+++ b/tools/BUILD.bazel
@@ -4,14 +4,15 @@ py_library(
     name = "gbench",
     srcs = glob(["gbench/*.py"]),
     deps = [
-      requirement("numpy"),
-      requirement("scipy"),
+        requirement("numpy"),
+        requirement("scipy"),
     ],
 )
 
 py_binary(
     name = "compare",
     srcs = ["compare.py"],
+    imports = ["."],
     python_version = "PY3",
     deps = [
         ":gbench",
diff --git a/tools/compare.py b/tools/compare.py
index e5eeb24..7572520 100755
--- a/tools/compare.py
+++ b/tools/compare.py
@@ -1,17 +1,20 @@
 #!/usr/bin/env python3
 
-import unittest
+# type: ignore
+
 """
 compare.py - versatile benchmark output compare tool
 """
 
 import argparse
-from argparse import ArgumentParser
 import json
-import sys
 import os
+import sys
+import unittest
+from argparse import ArgumentParser
+
 import gbench
-from gbench import util, report
+from gbench import report, util
 
 
 def check_inputs(in1, in2, flags):
@@ -20,163 +23,203 @@ def check_inputs(in1, in2, flags):
     """
     in1_kind, in1_err = util.classify_input_file(in1)
     in2_kind, in2_err = util.classify_input_file(in2)
-    output_file = util.find_benchmark_flag('--benchmark_out=', flags)
-    output_type = util.find_benchmark_flag('--benchmark_out_format=', flags)
-    if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file:
-        print(("WARNING: '--benchmark_out=%s' will be passed to both "
-               "benchmarks causing it to be overwritten") % output_file)
+    output_file = util.find_benchmark_flag("--benchmark_out=", flags)
+    output_type = util.find_benchmark_flag("--benchmark_out_format=", flags)
+    if (
+        in1_kind == util.IT_Executable
+        and in2_kind == util.IT_Executable
+        and output_file
+    ):
+        print(
+            (
+                "WARNING: '--benchmark_out=%s' will be passed to both "
+                "benchmarks causing it to be overwritten"
+            )
+            % output_file
+        )
     if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
         # When both sides are JSON the only supported flag is
         # --benchmark_filter=
-        for flag in util.remove_benchmark_flags('--benchmark_filter=', flags):
-            print("WARNING: passing %s has no effect since both "
-                  "inputs are JSON" % flag)
-    if output_type is not None and output_type != 'json':
-        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
-               " is not supported.") % output_type)
+        for flag in util.remove_benchmark_flags("--benchmark_filter=", flags):
+            print(
+                "WARNING: passing %s has no effect since both "
+                "inputs are JSON" % flag
+            )
+    if output_type is not None and output_type != "json":
+        print(
+            (
+                "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+                " is not supported."
+            )
+            % output_type
+        )
         sys.exit(1)
 
 
 def create_parser():
     parser = ArgumentParser(
-        description='versatile benchmark output compare tool')
+        description="versatile benchmark output compare tool"
+    )
 
     parser.add_argument(
-        '-a',
-        '--display_aggregates_only',
-        dest='display_aggregates_only',
+        "-a",
+        "--display_aggregates_only",
+        dest="display_aggregates_only",
         action="store_true",
         help="If there are repetitions, by default, we display everything - the"
-             " actual runs, and the aggregates computed. Sometimes, it is "
-             "desirable to only view the aggregates. E.g. when there are a lot "
-             "of repetitions. Do note that only the display is affected. "
-             "Internally, all the actual runs are still used, e.g. for U test.")
+        " actual runs, and the aggregates computed. Sometimes, it is "
+        "desirable to only view the aggregates. E.g. when there are a lot "
+        "of repetitions. Do note that only the display is affected. "
+        "Internally, all the actual runs are still used, e.g. for U test.",
+    )
 
     parser.add_argument(
-        '--no-color',
-        dest='color',
+        "--no-color",
+        dest="color",
         default=True,
         action="store_false",
-        help="Do not use colors in the terminal output"
+        help="Do not use colors in the terminal output",
     )
 
     parser.add_argument(
-        '-d',
-        '--dump_to_json',
-        dest='dump_to_json',
-        help="Additionally, dump benchmark comparison output to this file in JSON format.")
+        "-d",
+        "--dump_to_json",
+        dest="dump_to_json",
+        help="Additionally, dump benchmark comparison output to this file in JSON format.",
+    )
 
     utest = parser.add_argument_group()
     utest.add_argument(
-        '--no-utest',
-        dest='utest',
+        "--no-utest",
+        dest="utest",
         default=True,
         action="store_false",
-        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
+        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(
+            report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS
+        ),
+    )
     alpha_default = 0.05
     utest.add_argument(
         "--alpha",
-        dest='utest_alpha',
+        dest="utest_alpha",
         default=alpha_default,
         type=float,
-        help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
-        alpha_default)
+        help=(
+            "significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)"
+        )
+        % alpha_default,
+    )
 
     subparsers = parser.add_subparsers(
-        help='This tool has multiple modes of operation:',
-        dest='mode')
+        help="This tool has multiple modes of operation:", dest="mode"
+    )
 
     parser_a = subparsers.add_parser(
-        'benchmarks',
-        help='The most simple use-case, compare all the output of these two benchmarks')
-    baseline = parser_a.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "benchmarks",
+        help="The most simple use-case, compare all the output of these two benchmarks",
+    )
+    baseline = parser_a.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test_baseline',
-        metavar='test_baseline',
-        type=argparse.FileType('r'),
+        "test_baseline",
+        metavar="test_baseline",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     contender = parser_a.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'test_contender',
-        metavar='test_contender',
-        type=argparse.FileType('r'),
+        "test_contender",
+        metavar="test_contender",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     parser_a.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     parser_b = subparsers.add_parser(
-        'filters', help='Compare filter one with the filter two of benchmark')
-    baseline = parser_b.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "filters", help="Compare filter one with the filter two of benchmark"
+    )
+    baseline = parser_b.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test',
-        metavar='test',
-        type=argparse.FileType('r'),
+        "test",
+        metavar="test",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     baseline.add_argument(
-        'filter_baseline',
-        metavar='filter_baseline',
+        "filter_baseline",
+        metavar="filter_baseline",
         type=str,
         nargs=1,
-        help='The first filter, that will be used as baseline')
+        help="The first filter, that will be used as baseline",
+    )
     contender = parser_b.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'filter_contender',
-        metavar='filter_contender',
+        "filter_contender",
+        metavar="filter_contender",
         type=str,
         nargs=1,
-        help='The second filter, that will be compared against the baseline')
+        help="The second filter, that will be compared against the baseline",
+    )
     parser_b.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     parser_c = subparsers.add_parser(
-        'benchmarksfiltered',
-        help='Compare filter one of first benchmark with filter two of the second benchmark')
-    baseline = parser_c.add_argument_group(
-        'baseline', 'The benchmark baseline')
+        "benchmarksfiltered",
+        help="Compare filter one of first benchmark with filter two of the second benchmark",
+    )
+    baseline = parser_c.add_argument_group("baseline", "The benchmark baseline")
     baseline.add_argument(
-        'test_baseline',
-        metavar='test_baseline',
-        type=argparse.FileType('r'),
+        "test_baseline",
+        metavar="test_baseline",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='A benchmark executable or JSON output file')
+        help="A benchmark executable or JSON output file",
+    )
     baseline.add_argument(
-        'filter_baseline',
-        metavar='filter_baseline',
+        "filter_baseline",
+        metavar="filter_baseline",
         type=str,
         nargs=1,
-        help='The first filter, that will be used as baseline')
+        help="The first filter, that will be used as baseline",
+    )
     contender = parser_c.add_argument_group(
-        'contender', 'The benchmark that will be compared against the baseline')
+        "contender", "The benchmark that will be compared against the baseline"
+    )
     contender.add_argument(
-        'test_contender',
-        metavar='test_contender',
-        type=argparse.FileType('r'),
+        "test_contender",
+        metavar="test_contender",
+        type=argparse.FileType("r"),
         nargs=1,
-        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+        help="The second benchmark executable or JSON output file, that will be compared against the baseline",
+    )
     contender.add_argument(
-        'filter_contender',
-        metavar='filter_contender',
+        "filter_contender",
+        metavar="filter_contender",
         type=str,
         nargs=1,
-        help='The second filter, that will be compared against the baseline')
+        help="The second filter, that will be compared against the baseline",
+    )
     parser_c.add_argument(
-        'benchmark_options',
-        metavar='benchmark_options',
+        "benchmark_options",
+        metavar="benchmark_options",
         nargs=argparse.REMAINDER,
-        help='Arguments to pass when running benchmark executables')
+        help="Arguments to pass when running benchmark executables",
+    )
 
     return parser
 
@@ -191,16 +234,16 @@ def main():
     assert not unknown_args
     benchmark_options = args.benchmark_options
 
-    if args.mode == 'benchmarks':
+    if args.mode == "benchmarks":
         test_baseline = args.test_baseline[0].name
         test_contender = args.test_contender[0].name
-        filter_baseline = ''
-        filter_contender = ''
+        filter_baseline = ""
+        filter_contender = ""
 
         # NOTE: if test_baseline == test_contender, you are analyzing the stdev
 
-        description = 'Comparing %s to %s' % (test_baseline, test_contender)
-    elif args.mode == 'filters':
+        description = "Comparing %s to %s" % (test_baseline, test_contender)
+    elif args.mode == "filters":
         test_baseline = args.test[0].name
         test_contender = args.test[0].name
         filter_baseline = args.filter_baseline[0]
@@ -209,9 +252,12 @@ def main():
         # NOTE: if filter_baseline == filter_contender, you are analyzing the
         # stdev
 
-        description = 'Comparing %s to %s (from %s)' % (
-            filter_baseline, filter_contender, args.test[0].name)
-    elif args.mode == 'benchmarksfiltered':
+        description = "Comparing %s to %s (from %s)" % (
+            filter_baseline,
+            filter_contender,
+            args.test[0].name,
+        )
+    elif args.mode == "benchmarksfiltered":
         test_baseline = args.test_baseline[0].name
         test_contender = args.test_contender[0].name
         filter_baseline = args.filter_baseline[0]
@@ -220,8 +266,12 @@ def main():
         # NOTE: if test_baseline == test_contender and
         # filter_baseline == filter_contender, you are analyzing the stdev
 
-        description = 'Comparing %s (from %s) to %s (from %s)' % (
-            filter_baseline, test_baseline, filter_contender, test_contender)
+        description = "Comparing %s (from %s) to %s (from %s)" % (
+            filter_baseline,
+            test_baseline,
+            filter_contender,
+            test_contender,
+        )
     else:
         # should never happen
         print("Unrecognized mode of operation: '%s'" % args.mode)
@@ -231,199 +281,240 @@ def main():
     check_inputs(test_baseline, test_contender, benchmark_options)
 
     if args.display_aggregates_only:
-        benchmark_options += ['--benchmark_display_aggregates_only=true']
+        benchmark_options += ["--benchmark_display_aggregates_only=true"]
 
     options_baseline = []
     options_contender = []
 
     if filter_baseline and filter_contender:
-        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
-        options_contender = ['--benchmark_filter=%s' % filter_contender]
+        options_baseline = ["--benchmark_filter=%s" % filter_baseline]
+        options_contender = ["--benchmark_filter=%s" % filter_contender]
 
     # Run the benchmarks and report the results
-    json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
-        test_baseline, benchmark_options + options_baseline))
-    json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
-        test_contender, benchmark_options + options_contender))
+    json1 = json1_orig = gbench.util.sort_benchmark_results(
+        gbench.util.run_or_load_benchmark(
+            test_baseline, benchmark_options + options_baseline
+        )
+    )
+    json2 = json2_orig = gbench.util.sort_benchmark_results(
+        gbench.util.run_or_load_benchmark(
+            test_contender, benchmark_options + options_contender
+        )
+    )
 
     # Now, filter the benchmarks so that the difference report can work
     if filter_baseline and filter_contender:
-        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        replacement = "[%s vs. %s]" % (filter_baseline, filter_contender)
         json1 = gbench.report.filter_benchmark(
-            json1_orig, filter_baseline, replacement)
+            json1_orig, filter_baseline, replacement
+        )
         json2 = gbench.report.filter_benchmark(
-            json2_orig, filter_contender, replacement)
+            json2_orig, filter_contender, replacement
+        )
 
-    diff_report = gbench.report.get_difference_report(
-        json1, json2, args.utest)
+    diff_report = gbench.report.get_difference_report(json1, json2, args.utest)
     output_lines = gbench.report.print_difference_report(
         diff_report,
         args.display_aggregates_only,
-        args.utest, args.utest_alpha, args.color)
+        args.utest,
+        args.utest_alpha,
+        args.color,
+    )
     print(description)
     for ln in output_lines:
         print(ln)
 
     # Optionally, diff and output to JSON
     if args.dump_to_json is not None:
-        with open(args.dump_to_json, 'w') as f_json:
-            json.dump(diff_report, f_json)
+        with open(args.dump_to_json, "w") as f_json:
+            json.dump(diff_report, f_json, indent=1)
+
 
 class TestParser(unittest.TestCase):
     def setUp(self):
         self.parser = create_parser()
         testInputs = os.path.join(
-            os.path.dirname(
-                os.path.realpath(__file__)),
-            'gbench',
-            'Inputs')
-        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
-        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
+            os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs"
+        )
+        self.testInput0 = os.path.join(testInputs, "test1_run1.json")
+        self.testInput1 = os.path.join(testInputs, "test1_run2.json")
 
     def test_benchmarks_basic(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1])
+            ["benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_without_utest(self):
         parsed = self.parser.parse_args(
-            ['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
+            ["--no-utest", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertFalse(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.05)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_display_aggregates_only(self):
         parsed = self.parser.parse_args(
-            ['-a', 'benchmarks', self.testInput0, self.testInput1])
+            ["-a", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertTrue(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_with_utest_alpha(self):
         parsed = self.parser.parse_args(
-            ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+            ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.314)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_basic_without_utest_with_utest_alpha(self):
         parsed = self.parser.parse_args(
-            ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+            [
+                "--no-utest",
+                "--alpha=0.314",
+                "benchmarks",
+                self.testInput0,
+                self.testInput1,
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertFalse(parsed.utest)
         self.assertEqual(parsed.utest_alpha, 0.314)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarks_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+            ["benchmarks", self.testInput0, self.testInput1, "d"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.benchmark_options, ['d'])
+        self.assertEqual(parsed.benchmark_options, ["d"])
 
     def test_benchmarks_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+            ["benchmarks", self.testInput0, self.testInput1, "--", "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.mode, "benchmarks")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.benchmark_options, ['e'])
+        self.assertEqual(parsed.benchmark_options, ["e"])
 
     def test_filters_basic(self):
-        parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd'])
+        parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"])
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
         self.assertFalse(parsed.benchmark_options)
 
     def test_filters_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd', 'e'])
+            ["filters", self.testInput0, "c", "d", "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
-        self.assertEqual(parsed.benchmark_options, ['e'])
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
+        self.assertEqual(parsed.benchmark_options, ["e"])
 
     def test_filters_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+            ["filters", self.testInput0, "c", "d", "--", "f"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.mode, "filters")
         self.assertEqual(parsed.test[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
-        self.assertEqual(parsed.filter_contender[0], 'd')
-        self.assertEqual(parsed.benchmark_options, ['f'])
+        self.assertEqual(parsed.filter_baseline[0], "c")
+        self.assertEqual(parsed.filter_contender[0], "d")
+        self.assertEqual(parsed.benchmark_options, ["f"])
 
     def test_benchmarksfiltered_basic(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+            ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.filter_contender[0], "e")
         self.assertFalse(parsed.benchmark_options)
 
     def test_benchmarksfiltered_with_remainder(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+            [
+                "benchmarksfiltered",
+                self.testInput0,
+                "c",
+                self.testInput1,
+                "e",
+                "f",
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
-        self.assertEqual(parsed.benchmark_options[0], 'f')
+        self.assertEqual(parsed.filter_contender[0], "e")
+        self.assertEqual(parsed.benchmark_options[0], "f")
 
     def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
         parsed = self.parser.parse_args(
-            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+            [
+                "benchmarksfiltered",
+                self.testInput0,
+                "c",
+                self.testInput1,
+                "e",
+                "--",
+                "g",
+            ]
+        )
         self.assertFalse(parsed.display_aggregates_only)
         self.assertTrue(parsed.utest)
-        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.mode, "benchmarksfiltered")
         self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
-        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_baseline[0], "c")
         self.assertEqual(parsed.test_contender[0].name, self.testInput1)
-        self.assertEqual(parsed.filter_contender[0], 'e')
-        self.assertEqual(parsed.benchmark_options[0], 'g')
+        self.assertEqual(parsed.filter_contender[0], "e")
+        self.assertEqual(parsed.benchmark_options[0], "g")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # unittest.main()
     main()
 
diff --git a/tools/gbench/Inputs/test5_run0.json b/tools/gbench/Inputs/test5_run0.json
new file mode 100644
index 0000000..074103b
--- /dev/null
+++ b/tools/gbench/Inputs/test5_run0.json
@@ -0,0 +1,18 @@
+{
+    "context": {
+        "date": "2016-08-02 17:44:46",
+        "num_cpus": 4,
+        "mhz_per_cpu": 4228,
+        "cpu_scaling_enabled": false,
+        "library_build_type": "release"
+    },
+    "benchmarks": [
+        {
+            "name": "BM_ManyRepetitions",
+            "iterations": 1000,
+            "real_time": 1,
+            "cpu_time": 1000,
+            "time_unit": "s"
+        }
+    ]
+}
diff --git a/tools/gbench/Inputs/test5_run1.json b/tools/gbench/Inputs/test5_run1.json
new file mode 100644
index 0000000..430df9f
--- /dev/null
+++ b/tools/gbench/Inputs/test5_run1.json
@@ -0,0 +1,18 @@
+{
+    "context": {
+        "date": "2016-08-02 17:44:46",
+        "num_cpus": 4,
+        "mhz_per_cpu": 4228,
+        "cpu_scaling_enabled": false,
+        "library_build_type": "release"
+    },
+    "benchmarks": [
+        {
+            "name": "BM_ManyRepetitions",
+            "iterations": 1000,
+            "real_time": 1000,
+            "cpu_time": 1,
+            "time_unit": "s"
+        }
+    ]
+}
diff --git a/tools/gbench/__init__.py b/tools/gbench/__init__.py
index fce1a1a..9212568 100644
--- a/tools/gbench/__init__.py
+++ b/tools/gbench/__init__.py
@@ -1,8 +1,8 @@
 """Google Benchmark tooling"""
 
-__author__ = 'Eric Fiselier'
-__email__ = 'eric@efcs.ca'
+__author__ = "Eric Fiselier"
+__email__ = "eric@efcs.ca"
 __versioninfo__ = (0, 5, 0)
-__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+__version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
-__all__ = []
+__all__ = []  # type: ignore
diff --git a/tools/gbench/report.py b/tools/gbench/report.py
index b2bbfb9..7158fd1 100644
--- a/tools/gbench/report.py
+++ b/tools/gbench/report.py
@@ -1,14 +1,17 @@
-"""report.py - Utilities for reporting statistics about benchmark results
+# type: ignore
+
+"""
+report.py - Utilities for reporting statistics about benchmark results
 """
 
-import unittest
-import os
-import re
 import copy
+import os
 import random
+import re
+import unittest
 
-from scipy.stats import mannwhitneyu, gmean
 from numpy import array
+from scipy.stats import gmean, mannwhitneyu
 
 
 class BenchmarkColor(object):
@@ -17,26 +20,25 @@ class BenchmarkColor(object):
         self.code = code
 
     def __repr__(self):
-        return '%s%r' % (self.__class__.__name__,
-                         (self.name, self.code))
+        return "%s%r" % (self.__class__.__name__, (self.name, self.code))
 
     def __format__(self, format):
         return self.code
 
 
 # Benchmark Colors Enumeration
-BC_NONE = BenchmarkColor('NONE', '')
-BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
-BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
-BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
-BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
-BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
-BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
-BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
-BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
-BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
-BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
-BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+BC_NONE = BenchmarkColor("NONE", "")
+BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
+BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
+BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
+BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
+BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
+BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
+BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
+BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
+BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
+BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
+BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
 
 UTEST_MIN_REPETITIONS = 2
 UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
@@ -59,10 +61,14 @@ def color_format(use_color, fmt_str, *args, **kwargs):
     """
     assert use_color is True or use_color is False
     if not use_color:
-        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
-                for arg in args]
-        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
-                  for key, arg in kwargs.items()}
+        args = [
+            arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+            for arg in args
+        ]
+        kwargs = {
+            key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+            for key, arg in kwargs.items()
+        }
     return fmt_str.format(*args, **kwargs)
 
 
@@ -73,8 +79,8 @@ def find_longest_name(benchmark_list):
     """
     longest_name = 1
     for bc in benchmark_list:
-        if len(bc['name']) > longest_name:
-            longest_name = len(bc['name'])
+        if len(bc["name"]) > longest_name:
+            longest_name = len(bc["name"])
     return longest_name
 
 
@@ -95,13 +101,13 @@ def filter_benchmark(json_orig, family, replacement=""):
     """
     regex = re.compile(family)
     filtered = {}
-    filtered['benchmarks'] = []
-    for be in json_orig['benchmarks']:
-        if not regex.search(be['name']):
+    filtered["benchmarks"] = []
+    for be in json_orig["benchmarks"]:
+        if not regex.search(be["name"]):
             continue
         filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
-        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
-        filtered['benchmarks'].append(filteredbench)
+        filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
+        filtered["benchmarks"].append(filteredbench)
     return filtered
 
 
@@ -110,9 +116,11 @@ def get_unique_benchmark_names(json):
     While *keeping* the order, give all the unique 'names' used for benchmarks.
     """
     seen = set()
-    uniqued = [x['name'] for x in json['benchmarks']
-               if x['name'] not in seen and
-               (seen.add(x['name']) or True)]
+    uniqued = [
+        x["name"]
+        for x in json["benchmarks"]
+        if x["name"] not in seen and (seen.add(x["name"]) or True)
+    ]
     return uniqued
 
 
@@ -125,7 +133,7 @@ def intersect(list1, list2):
 
 
 def is_potentially_comparable_benchmark(x):
-    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
+    return "time_unit" in x and "real_time" in x and "cpu_time" in x
 
 
 def partition_benchmarks(json1, json2):
@@ -142,18 +150,24 @@ def partition_benchmarks(json1, json2):
         time_unit = None
         # Pick the time unit from the first entry of the lhs benchmark.
         # We should be careful not to crash with unexpected input.
-        for x in json1['benchmarks']:
-            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
-                time_unit = x['time_unit']
+        for x in json1["benchmarks"]:
+            if x["name"] == name and is_potentially_comparable_benchmark(x):
+                time_unit = x["time_unit"]
                 break
         if time_unit is None:
             continue
         # Filter by name and time unit.
         # All the repetitions are assumed to be comparable.
-        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
-               x['time_unit'] == time_unit]
-        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
-               x['time_unit'] == time_unit]
+        lhs = [
+            x
+            for x in json1["benchmarks"]
+            if x["name"] == name and x["time_unit"] == time_unit
+        ]
+        rhs = [
+            x
+            for x in json2["benchmarks"]
+            if x["name"] == name and x["time_unit"] == time_unit
+        ]
         partitions.append([lhs, rhs])
     return partitions
 
@@ -164,7 +178,7 @@ def get_timedelta_field_as_seconds(benchmark, field_name):
     time_unit, as time in seconds.
     """
     timedelta = benchmark[field_name]
-    time_unit = benchmark.get('time_unit', 's')
+    time_unit = benchmark.get("time_unit", "s")
     return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
 
 
@@ -174,11 +188,15 @@ def calculate_geomean(json):
     and calculate their geomean.
     """
     times = []
-    for benchmark in json['benchmarks']:
-        if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate':
+    for benchmark in json["benchmarks"]:
+        if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
             continue
-        times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'),
-                      get_timedelta_field_as_seconds(benchmark, 'cpu_time')])
+        times.append(
+            [
+                get_timedelta_field_as_seconds(benchmark, "real_time"),
+                get_timedelta_field_as_seconds(benchmark, "cpu_time"),
+            ]
+        )
     return gmean(times) if times else array([])
 
 
@@ -190,19 +208,23 @@ def extract_field(partition, field_name):
 
 
 def calc_utest(timings_cpu, timings_time):
-    min_rep_cnt = min(len(timings_time[0]),
-                      len(timings_time[1]),
-                      len(timings_cpu[0]),
-                      len(timings_cpu[1]))
+    min_rep_cnt = min(
+        len(timings_time[0]),
+        len(timings_time[1]),
+        len(timings_cpu[0]),
+        len(timings_cpu[1]),
+    )
 
     # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
     if min_rep_cnt < UTEST_MIN_REPETITIONS:
         return False, None, None
 
     time_pvalue = mannwhitneyu(
-        timings_time[0], timings_time[1], alternative='two-sided').pvalue
+        timings_time[0], timings_time[1], alternative="two-sided"
+    ).pvalue
     cpu_pvalue = mannwhitneyu(
-        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
+        timings_cpu[0], timings_cpu[1], alternative="two-sided"
+    ).pvalue
 
     return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
 
@@ -212,38 +234,46 @@ def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
         return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
 
     # Check if we failed miserably with minimum required repetitions for utest
-    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
+    if (
+        not utest["have_optimal_repetitions"]
+        and utest["cpu_pvalue"] is None
+        and utest["time_pvalue"] is None
+    ):
         return []
 
     dsc = "U Test, Repetitions: {} vs {}".format(
-        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
+        utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
+    )
     dsc_color = BC_OKGREEN
 
     # We still got some results to show but issue a warning about it.
-    if not utest['have_optimal_repetitions']:
+    if not utest["have_optimal_repetitions"]:
         dsc_color = BC_WARNING
         dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
-            UTEST_OPTIMAL_REPETITIONS)
+            UTEST_OPTIMAL_REPETITIONS
+        )
 
     special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
 
-    return [color_format(use_color,
-                         special_str,
-                         BC_HEADER,
-                         "{}{}".format(bc_name, UTEST_COL_NAME),
-                         first_col_width,
-                         get_utest_color(
-                             utest['time_pvalue']), utest['time_pvalue'],
-                         get_utest_color(
-                             utest['cpu_pvalue']), utest['cpu_pvalue'],
-                         dsc_color, dsc,
-                         endc=BC_ENDC)]
-
-
-def get_difference_report(
-        json1,
-        json2,
-        utest=False):
+    return [
+        color_format(
+            use_color,
+            special_str,
+            BC_HEADER,
+            "{}{}".format(bc_name, UTEST_COL_NAME),
+            first_col_width,
+            get_utest_color(utest["time_pvalue"]),
+            utest["time_pvalue"],
+            get_utest_color(utest["cpu_pvalue"]),
+            utest["cpu_pvalue"],
+            dsc_color,
+            dsc,
+            endc=BC_ENDC,
+        )
+    ]
+
+
+def get_difference_report(json1, json2, utest=False):
     """
     Calculate and report the difference between each test of two benchmarks
     runs specified as 'json1' and 'json2'. Output is another json containing
@@ -254,37 +284,44 @@ def get_difference_report(
     diff_report = []
     partitions = partition_benchmarks(json1, json2)
     for partition in partitions:
-        benchmark_name = partition[0][0]['name']
-        label = partition[0][0]['label'] if 'label' in partition[0][0] else ''
-        time_unit = partition[0][0]['time_unit']
+        benchmark_name = partition[0][0]["name"]
+        label = partition[0][0]["label"] if "label" in partition[0][0] else ""
+        time_unit = partition[0][0]["time_unit"]
         measurements = []
         utest_results = {}
         # Careful, we may have different repetition count.
         for i in range(min(len(partition[0]), len(partition[1]))):
             bn = partition[0][i]
             other_bench = partition[1][i]
-            measurements.append({
-                'real_time': bn['real_time'],
-                'cpu_time': bn['cpu_time'],
-                'real_time_other': other_bench['real_time'],
-                'cpu_time_other': other_bench['cpu_time'],
-                'time': calculate_change(bn['real_time'], other_bench['real_time']),
-                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
-            })
+            measurements.append(
+                {
+                    "real_time": bn["real_time"],
+                    "cpu_time": bn["cpu_time"],
+                    "real_time_other": other_bench["real_time"],
+                    "cpu_time_other": other_bench["cpu_time"],
+                    "time": calculate_change(
+                        bn["real_time"], other_bench["real_time"]
+                    ),
+                    "cpu": calculate_change(
+                        bn["cpu_time"], other_bench["cpu_time"]
+                    ),
+                }
+            )
 
         # After processing the whole partition, if requested, do the U test.
         if utest:
-            timings_cpu = extract_field(partition, 'cpu_time')
-            timings_time = extract_field(partition, 'real_time')
+            timings_cpu = extract_field(partition, "cpu_time")
+            timings_time = extract_field(partition, "real_time")
             have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
-                timings_cpu, timings_time)
-            if cpu_pvalue and time_pvalue:
+                timings_cpu, timings_time
+            )
+            if cpu_pvalue is not None and time_pvalue is not None:
                 utest_results = {
-                    'have_optimal_repetitions': have_optimal_repetitions,
-                    'cpu_pvalue': cpu_pvalue,
-                    'time_pvalue': time_pvalue,
-                    'nr_of_repetitions': len(timings_cpu[0]),
-                    'nr_of_repetitions_other': len(timings_cpu[1])
+                    "have_optimal_repetitions": have_optimal_repetitions,
+                    "cpu_pvalue": cpu_pvalue,
+                    "time_pvalue": time_pvalue,
+                    "nr_of_repetitions": len(timings_cpu[0]),
+                    "nr_of_repetitions_other": len(timings_cpu[1]),
                 }
 
         # Store only if we had any measurements for given benchmark.
@@ -292,47 +329,63 @@ def get_difference_report(
         # time units which are not compatible with other time units in the
         # benchmark suite.
         if measurements:
-            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
-            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
-            diff_report.append({
-                'name': benchmark_name,
-                'label': label,
-                'measurements': measurements,
-                'time_unit': time_unit,
-                'run_type': run_type,
-                'aggregate_name': aggregate_name,
-                'utest': utest_results
-            })
+            run_type = (
+                partition[0][0]["run_type"]
+                if "run_type" in partition[0][0]
+                else ""
+            )
+            aggregate_name = (
+                partition[0][0]["aggregate_name"]
+                if run_type == "aggregate"
+                and "aggregate_name" in partition[0][0]
+                else ""
+            )
+            diff_report.append(
+                {
+                    "name": benchmark_name,
+                    "label": label,
+                    "measurements": measurements,
+                    "time_unit": time_unit,
+                    "run_type": run_type,
+                    "aggregate_name": aggregate_name,
+                    "utest": utest_results,
+                }
+            )
 
     lhs_gmean = calculate_geomean(json1)
     rhs_gmean = calculate_geomean(json2)
     if lhs_gmean.any() and rhs_gmean.any():
-        diff_report.append({
-            'name': 'OVERALL_GEOMEAN',
-            'label': '',
-            'measurements': [{
-                'real_time': lhs_gmean[0],
-                'cpu_time': lhs_gmean[1],
-                'real_time_other': rhs_gmean[0],
-                'cpu_time_other': rhs_gmean[1],
-                'time': calculate_change(lhs_gmean[0], rhs_gmean[0]),
-                'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1])
-            }],
-            'time_unit': 's',
-            'run_type': 'aggregate',
-            'aggregate_name': 'geomean',
-            'utest': {}
-        })
+        diff_report.append(
+            {
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": lhs_gmean[0],
+                        "cpu_time": lhs_gmean[1],
+                        "real_time_other": rhs_gmean[0],
+                        "cpu_time_other": rhs_gmean[1],
+                        "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
+                        "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            }
+        )
 
     return diff_report
 
 
 def print_difference_report(
-        json_diff_report,
-        include_aggregates_only=False,
-        utest=False,
-        utest_alpha=0.05,
-        use_color=True):
+    json_diff_report,
+    include_aggregates_only=False,
+    utest=False,
+    utest_alpha=0.05,
+    use_color=True,
+):
     """
     Calculate and report the difference between each test of two benchmarks
     runs specified as 'json1' and 'json2'.
@@ -348,44 +401,53 @@ def print_difference_report(
             return BC_CYAN
 
     first_col_width = find_longest_name(json_diff_report)
-    first_col_width = max(
-        first_col_width,
-        len('Benchmark'))
+    first_col_width = max(first_col_width, len("Benchmark"))
     first_col_width += len(UTEST_COL_NAME)
     first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
-        'Benchmark', 12 + first_col_width)
-    output_strs = [first_line, '-' * len(first_line)]
+        "Benchmark", 12 + first_col_width
+    )
+    output_strs = [first_line, "-" * len(first_line)]
 
     fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
     for benchmark in json_diff_report:
         # *If* we were asked to only include aggregates,
         # and if it is non-aggregate, then don't print it.
-        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
-            for measurement in benchmark['measurements']:
-                output_strs += [color_format(use_color,
-                                             fmt_str,
-                                             BC_HEADER,
-                                             benchmark['name'],
-                                             first_col_width,
-                                             get_color(measurement['time']),
-                                             measurement['time'],
-                                             get_color(measurement['cpu']),
-                                             measurement['cpu'],
-                                             measurement['real_time'],
-                                             measurement['real_time_other'],
-                                             measurement['cpu_time'],
-                                             measurement['cpu_time_other'],
-                                             endc=BC_ENDC)]
+        if (
+            not include_aggregates_only
+            or "run_type" not in benchmark
+            or benchmark["run_type"] == "aggregate"
+        ):
+            for measurement in benchmark["measurements"]:
+                output_strs += [
+                    color_format(
+                        use_color,
+                        fmt_str,
+                        BC_HEADER,
+                        benchmark["name"],
+                        first_col_width,
+                        get_color(measurement["time"]),
+                        measurement["time"],
+                        get_color(measurement["cpu"]),
+                        measurement["cpu"],
+                        measurement["real_time"],
+                        measurement["real_time_other"],
+                        measurement["cpu_time"],
+                        measurement["cpu_time_other"],
+                        endc=BC_ENDC,
+                    )
+                ]
 
         # After processing the measurements, if requested and
         # if applicable (e.g. u-test exists for given benchmark),
         # print the U test.
-        if utest and benchmark['utest']:
-            output_strs += print_utest(benchmark['name'],
-                                       benchmark['utest'],
-                                       utest_alpha=utest_alpha,
-                                       first_col_width=first_col_width,
-                                       use_color=use_color)
+        if utest and benchmark["utest"]:
+            output_strs += print_utest(
+                benchmark["name"],
+                benchmark["utest"],
+                utest_alpha=utest_alpha,
+                first_col_width=first_col_width,
+                use_color=use_color,
+            )
 
     return output_strs
 
@@ -397,21 +459,21 @@ def print_difference_report(
 class TestGetUniqueBenchmarkNames(unittest.TestCase):
     def load_results(self):
         import json
+
         testInputs = os.path.join(
-            os.path.dirname(
-                os.path.realpath(__file__)),
-            'Inputs')
-        testOutput = os.path.join(testInputs, 'test3_run0.json')
-        with open(testOutput, 'r') as f:
+            os.path.dirname(os.path.realpath(__file__)), "Inputs"
+        )
+        testOutput = os.path.join(testInputs, "test3_run0.json")
+        with open(testOutput, "r") as f:
             json = json.load(f)
         return json
 
     def test_basic(self):
         expect_lines = [
-            'BM_One',
-            'BM_Two',
-            'short',  # These two are not sorted
-            'medium',  # These two are not sorted
+            "BM_One",
+            "BM_Two",
+            "short",  # These two are not sorted
+            "medium",  # These two are not sorted
         ]
         json = self.load_results()
         output_lines = get_unique_benchmark_names(json)
@@ -427,15 +489,15 @@ class TestReportDifference(unittest.TestCase):
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
-            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test1_run1.json")
+            testOutput2 = os.path.join(testInputs, "test1_run2.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
@@ -444,171 +506,323 @@ class TestReportDifference(unittest.TestCase):
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
-            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
-            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
-            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
-            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
-            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
-            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
-            ['BM_100xSlower', '+99.0000', '+99.0000',
-                '100', '10000', '100', '10000'],
-            ['BM_100xFaster', '-0.9900', '-0.9900',
-                '10000', '100', '10000', '100'],
-            ['BM_10PercentCPUToTime', '+0.1000',
-                '-0.1000', '100', '110', '100', '90'],
-            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
-            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
-            ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'],
-            ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0']
+            ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
+            ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
+            ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
+            [
+                "BM_1PercentFaster",
+                "-0.0100",
+                "-0.0100",
+                "100",
+                "99",
+                "100",
+                "99",
+            ],
+            [
+                "BM_1PercentSlower",
+                "+0.0100",
+                "+0.0100",
+                "100",
+                "101",
+                "100",
+                "101",
+            ],
+            [
+                "BM_10PercentFaster",
+                "-0.1000",
+                "-0.1000",
+                "100",
+                "90",
+                "100",
+                "90",
+            ],
+            [
+                "BM_10PercentSlower",
+                "+0.1000",
+                "+0.1000",
+                "100",
+                "110",
+                "100",
+                "110",
+            ],
+            [
+                "BM_100xSlower",
+                "+99.0000",
+                "+99.0000",
+                "100",
+                "10000",
+                "100",
+                "10000",
+            ],
+            [
+                "BM_100xFaster",
+                "-0.9900",
+                "-0.9900",
+                "10000",
+                "100",
+                "10000",
+                "100",
+            ],
+            [
+                "BM_10PercentCPUToTime",
+                "+0.1000",
+                "-0.1000",
+                "100",
+                "110",
+                "100",
+                "90",
+            ],
+            ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
+            ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
+            ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
+            ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, use_color=False)
+            self.json_diff_report, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(len(parts), 7)
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report_output(self):
         expected_output = [
             {
-                'name': 'BM_SameTimes',
-                'label': '',
-                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
-                                  'real_time': 10, 'real_time_other': 10,
-                                  'cpu_time': 10, 'cpu_time_other': 10}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_SameTimes",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.0000,
+                        "cpu": 0.0000,
+                        "real_time": 10,
+                        "real_time_other": 10,
+                        "cpu_time": 10,
+                        "cpu_time_other": 10,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_2xFaster',
-                'label': '',
-                'measurements': [{'time': -0.5000, 'cpu': -0.5000,
-                                  'real_time': 50, 'real_time_other': 25,
-                                  'cpu_time': 50, 'cpu_time_other': 25}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_2xFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.5000,
+                        "cpu": -0.5000,
+                        "real_time": 50,
+                        "real_time_other": 25,
+                        "cpu_time": 50,
+                        "cpu_time_other": 25,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_2xSlower',
-                'label': '',
-                'measurements': [{'time': 1.0000, 'cpu': 1.0000,
-                                  'real_time': 50, 'real_time_other': 100,
-                                  'cpu_time': 50, 'cpu_time_other': 100}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_2xSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 1.0000,
+                        "cpu": 1.0000,
+                        "real_time": 50,
+                        "real_time_other": 100,
+                        "cpu_time": 50,
+                        "cpu_time_other": 100,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_1PercentFaster',
-                'label': '',
-                'measurements': [{'time': -0.0100, 'cpu': -0.0100,
-                                  'real_time': 100, 'real_time_other': 98.9999999,
-                                  'cpu_time': 100, 'cpu_time_other': 98.9999999}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_1PercentFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.0100,
+                        "cpu": -0.0100,
+                        "real_time": 100,
+                        "real_time_other": 98.9999999,
+                        "cpu_time": 100,
+                        "cpu_time_other": 98.9999999,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_1PercentSlower',
-                'label': '',
-                'measurements': [{'time': 0.0100, 'cpu': 0.0100,
-                                  'real_time': 100, 'real_time_other': 101,
-                                  'cpu_time': 100, 'cpu_time_other': 101}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_1PercentSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.0100,
+                        "cpu": 0.0100,
+                        "real_time": 100,
+                        "real_time_other": 101,
+                        "cpu_time": 100,
+                        "cpu_time_other": 101,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentFaster',
-                'label': '',
-                'measurements': [{'time': -0.1000, 'cpu': -0.1000,
-                                  'real_time': 100, 'real_time_other': 90,
-                                  'cpu_time': 100, 'cpu_time_other': 90}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.1000,
+                        "cpu": -0.1000,
+                        "real_time": 100,
+                        "real_time_other": 90,
+                        "cpu_time": 100,
+                        "cpu_time_other": 90,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentSlower',
-                'label': '',
-                'measurements': [{'time': 0.1000, 'cpu': 0.1000,
-                                  'real_time': 100, 'real_time_other': 110,
-                                  'cpu_time': 100, 'cpu_time_other': 110}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.1000,
+                        "cpu": 0.1000,
+                        "real_time": 100,
+                        "real_time_other": 110,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_100xSlower',
-                'label': '',
-                'measurements': [{'time': 99.0000, 'cpu': 99.0000,
-                                  'real_time': 100, 'real_time_other': 10000,
-                                  'cpu_time': 100, 'cpu_time_other': 10000}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_100xSlower",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 99.0000,
+                        "cpu": 99.0000,
+                        "real_time": 100,
+                        "real_time_other": 10000,
+                        "cpu_time": 100,
+                        "cpu_time_other": 10000,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_100xFaster',
-                'label': '',
-                'measurements': [{'time': -0.9900, 'cpu': -0.9900,
-                                  'real_time': 10000, 'real_time_other': 100,
-                                  'cpu_time': 10000, 'cpu_time_other': 100}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_100xFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.9900,
+                        "cpu": -0.9900,
+                        "real_time": 10000,
+                        "real_time_other": 100,
+                        "cpu_time": 10000,
+                        "cpu_time_other": 100,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_10PercentCPUToTime',
-                'label': '',
-                'measurements': [{'time': 0.1000, 'cpu': -0.1000,
-                                  'real_time': 100, 'real_time_other': 110,
-                                  'cpu_time': 100, 'cpu_time_other': 90}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_10PercentCPUToTime",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": 0.1000,
+                        "cpu": -0.1000,
+                        "real_time": 100,
+                        "real_time_other": 110,
+                        "cpu_time": 100,
+                        "cpu_time_other": 90,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_ThirdFaster',
-                'label': '',
-                'measurements': [{'time': -0.3333, 'cpu': -0.3334,
-                                  'real_time': 100, 'real_time_other': 67,
-                                  'cpu_time': 100, 'cpu_time_other': 67}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "BM_ThirdFaster",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.3333,
+                        "cpu": -0.3334,
+                        "real_time": 100,
+                        "real_time_other": 67,
+                        "cpu_time": 100,
+                        "cpu_time_other": 67,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'BM_NotBadTimeUnit',
-                'label': '',
-                'measurements': [{'time': -0.9000, 'cpu': 0.2000,
-                                  'real_time': 0.4, 'real_time_other': 0.04,
-                                  'cpu_time': 0.5, 'cpu_time_other': 0.6}],
-                'time_unit': 's',
-                'utest': {}
+                "name": "BM_NotBadTimeUnit",
+                "label": "",
+                "measurements": [
+                    {
+                        "time": -0.9000,
+                        "cpu": 0.2000,
+                        "real_time": 0.4,
+                        "real_time_other": 0.04,
+                        "cpu_time": 0.5,
+                        "cpu_time_other": 0.6,
+                    }
+                ],
+                "time_unit": "s",
+                "utest": {},
             },
             {
-                'name': 'BM_hasLabel',
-                'label': 'a label',
-                'measurements': [{'time': 0.0000, 'cpu': 0.0000,
-                                  'real_time': 1, 'real_time_other': 1,
-                                  'cpu_time': 1, 'cpu_time_other': 1}],
-                'time_unit': 's',
-                'utest': {}
+                "name": "BM_hasLabel",
+                "label": "a label",
+                "measurements": [
+                    {
+                        "time": 0.0000,
+                        "cpu": 0.0000,
+                        "real_time": 1,
+                        "real_time_other": 1,
+                        "cpu_time": 1,
+                        "cpu_time_other": 1,
+                    }
+                ],
+                "time_unit": "s",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'label': '',
-                'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06,
-                                  'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07,
-                                  'time': -0.8112976497120911, 'cpu': -0.7778551721181174}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean', 'utest': {}
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": 3.1622776601683826e-06,
+                        "cpu_time": 3.2130844755623912e-06,
+                        "real_time_other": 1.9768988699420897e-07,
+                        "cpu_time_other": 2.397447755209533e-07,
+                        "time": -0.8112976497120911,
+                        "cpu": -0.7778551721181174,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
             },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['label'], expected['label'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["label"], expected["label"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -618,12 +832,12 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
     def setUpClass(cls):
         def load_result():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput = os.path.join(testInputs, 'test2_run.json')
-            with open(testOutput, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput = os.path.join(testInputs, "test2_run.json")
+            with open(testOutput, "r") as f:
                 json = json.load(f)
             return json
 
@@ -634,65 +848,108 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
-            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
-            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
-            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
-            ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0']
+            [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
+            ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
+            ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
+            ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
+            ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, use_color=False)
+            self.json_diff_report, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(len(parts), 7)
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'.',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": ".",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 10,
+                        "real_time_other": 5,
+                        "cpu_time": 10,
+                        "cpu_time_other": 5,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'./4',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
-                'time_unit': 'ns',
-                'utest': {},
+                "name": "./4",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 40,
+                        "real_time_other": 20,
+                        "cpu_time": 40,
+                        "cpu_time_other": 20,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'Prefix/.',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "Prefix/.",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 20,
+                        "real_time_other": 10,
+                        "cpu_time": 20,
+                        "cpu_time_other": 10,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'Prefix/./3',
-                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
-                'time_unit': 'ns',
-                'utest': {}
+                "name": "Prefix/./3",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": -0.5,
+                        "real_time": 30,
+                        "real_time_other": 15,
+                        "cpu_time": 30,
+                        "cpu_time_other": 15,
+                    }
+                ],
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08,
-                                  'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08,
-                                  'time': -0.5000000000000009, 'cpu': -0.5000000000000009}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 2.213363839400641e-08,
+                        "cpu_time": 2.213363839400641e-08,
+                        "real_time_other": 1.1066819197003185e-08,
+                        "cpu_time_other": 1.1066819197003185e-08,
+                        "time": -0.5000000000000009,
+                        "cpu": -0.5000000000000009,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -702,424 +959,489 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test3_run0.json")
+            testOutput2 = os.path.join(testInputs, "test3_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
-            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
+            ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report_pretty_printing_aggregates_only(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report,
+            include_aggregates_only=True,
+            utest=True,
+            utest_alpha=0.05,
+            use_color=False,
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'BM_One',
-                'measurements': [
-                    {'time': -0.1,
-                     'cpu': 0.1,
-                     'real_time': 10,
-                     'real_time_other': 9,
-                     'cpu_time': 100,
-                     'cpu_time_other': 110}
+                "name": "BM_One",
+                "measurements": [
+                    {
+                        "time": -0.1,
+                        "cpu": 0.1,
+                        "real_time": 10,
+                        "real_time_other": 9,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'BM_Two',
-                'measurements': [
-                    {'time': 0.1111111111111111,
-                     'cpu': -0.011111111111111112,
-                     'real_time': 9,
-                     'real_time_other': 10,
-                     'cpu_time': 90,
-                     'cpu_time_other': 89},
-                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
-                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
+                "name": "BM_Two",
+                "measurements": [
+                    {
+                        "time": 0.1111111111111111,
+                        "cpu": -0.011111111111111112,
+                        "real_time": 9,
+                        "real_time_other": 10,
+                        "cpu_time": 90,
+                        "cpu_time_other": 89,
+                    },
+                    {
+                        "time": -0.125,
+                        "cpu": -0.16279069767441862,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 86,
+                        "cpu_time_other": 72,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.6666666666666666,
+                    "time_pvalue": 1.0,
+                },
             },
             {
-                'name': u'short',
-                'measurements': [
-                    {'time': -0.125,
-                     'cpu': -0.0625,
-                     'real_time': 8,
-                     'real_time_other': 7,
-                     'cpu_time': 80,
-                     'cpu_time_other': 75},
-                    {'time': -0.4325,
-                     'cpu': -0.13506493506493514,
-                     'real_time': 8,
-                     'real_time_other': 4.54,
-                     'cpu_time': 77,
-                     'cpu_time_other': 66.6}
+                "name": "short",
+                "measurements": [
+                    {
+                        "time": -0.125,
+                        "cpu": -0.0625,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 80,
+                        "cpu_time_other": 75,
+                    },
+                    {
+                        "time": -0.4325,
+                        "cpu": -0.13506493506493514,
+                        "real_time": 8,
+                        "real_time_other": 4.54,
+                        "cpu_time": 77,
+                        "cpu_time_other": 66.6,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.2,
+                    "time_pvalue": 0.7670968684102772,
+                },
             },
             {
-                'name': u'medium',
-                'measurements': [
-                    {'time': -0.375,
-                     'cpu': -0.3375,
-                     'real_time': 8,
-                     'real_time_other': 5,
-                     'cpu_time': 80,
-                     'cpu_time_other': 53}
+                "name": "medium",
+                "measurements": [
+                    {
+                        "time": -0.375,
+                        "cpu": -0.3375,
+                        "real_time": 8,
+                        "real_time_other": 5,
+                        "cpu_time": 80,
+                        "cpu_time_other": 53,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
-                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
-                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 8.48528137423858e-09,
+                        "cpu_time": 8.441336246629233e-08,
+                        "real_time_other": 2.2405267593145244e-08,
+                        "cpu_time_other": 2.5453661413660466e-08,
+                        "time": 1.6404861082353634,
+                        "cpu": -0.6984640740519662,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
 
 class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
-        unittest.TestCase):
+    unittest.TestCase
+):
     @classmethod
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test3_run0.json")
+            testOutput2 = os.path.join(testInputs, "test3_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
         expect_lines = [
-            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
-            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
-            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
-            ['BM_Two_pvalue',
-             '1.0000',
-             '0.6667',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '2.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
-            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
-            ['short_pvalue',
-             '0.7671',
-             '0.2000',
-             'U',
-             'Test,',
-             'Repetitions:',
-             '2',
-             'vs',
-             '3.',
-             'WARNING:',
-             'Results',
-             'unreliable!',
-             '9+',
-             'repetitions',
-             'recommended.'],
-            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
-            ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+            ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
+            ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
+            ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
+            [
+                "BM_Two_pvalue",
+                "1.0000",
+                "0.6667",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "2.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
+            ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
+            [
+                "short_pvalue",
+                "0.7671",
+                "0.2000",
+                "U",
+                "Test,",
+                "Repetitions:",
+                "2",
+                "vs",
+                "3.",
+                "WARNING:",
+                "Results",
+                "unreliable!",
+                "9+",
+                "repetitions",
+                "recommended.",
+            ],
+            ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
+            ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
         ]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report,
-            utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'BM_One',
-                'measurements': [
-                    {'time': -0.1,
-                     'cpu': 0.1,
-                     'real_time': 10,
-                     'real_time_other': 9,
-                     'cpu_time': 100,
-                     'cpu_time_other': 110}
+                "name": "BM_One",
+                "measurements": [
+                    {
+                        "time": -0.1,
+                        "cpu": 0.1,
+                        "real_time": 10,
+                        "real_time_other": 9,
+                        "cpu_time": 100,
+                        "cpu_time_other": 110,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             },
             {
-                'name': u'BM_Two',
-                'measurements': [
-                    {'time': 0.1111111111111111,
-                     'cpu': -0.011111111111111112,
-                     'real_time': 9,
-                     'real_time_other': 10,
-                     'cpu_time': 90,
-                     'cpu_time_other': 89},
-                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
-                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
+                "name": "BM_Two",
+                "measurements": [
+                    {
+                        "time": 0.1111111111111111,
+                        "cpu": -0.011111111111111112,
+                        "real_time": 9,
+                        "real_time_other": 10,
+                        "cpu_time": 90,
+                        "cpu_time_other": 89,
+                    },
+                    {
+                        "time": -0.125,
+                        "cpu": -0.16279069767441862,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 86,
+                        "cpu_time_other": 72,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.6666666666666666,
+                    "time_pvalue": 1.0,
+                },
             },
             {
-                'name': u'short',
-                'measurements': [
-                    {'time': -0.125,
-                     'cpu': -0.0625,
-                     'real_time': 8,
-                     'real_time_other': 7,
-                     'cpu_time': 80,
-                     'cpu_time_other': 75},
-                    {'time': -0.4325,
-                     'cpu': -0.13506493506493514,
-                     'real_time': 8,
-                     'real_time_other': 4.54,
-                     'cpu_time': 77,
-                     'cpu_time_other': 66.6}
+                "name": "short",
+                "measurements": [
+                    {
+                        "time": -0.125,
+                        "cpu": -0.0625,
+                        "real_time": 8,
+                        "real_time_other": 7,
+                        "cpu_time": 80,
+                        "cpu_time_other": 75,
+                    },
+                    {
+                        "time": -0.4325,
+                        "cpu": -0.13506493506493514,
+                        "real_time": 8,
+                        "real_time_other": 4.54,
+                        "cpu_time": 77,
+                        "cpu_time_other": 66.6,
+                    },
                 ],
-                'time_unit': 'ns',
-                'utest': {
-                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
-                }
+                "time_unit": "ns",
+                "utest": {
+                    "have_optimal_repetitions": False,
+                    "cpu_pvalue": 0.2,
+                    "time_pvalue": 0.7670968684102772,
+                },
             },
             {
-                'name': u'medium',
-                'measurements': [
-                    {'real_time_other': 5,
-                     'cpu_time': 80,
-                     'time': -0.375,
-                     'real_time': 8,
-                     'cpu_time_other': 53,
-                     'cpu': -0.3375
-                     }
+                "name": "medium",
+                "measurements": [
+                    {
+                        "real_time_other": 5,
+                        "cpu_time": 80,
+                        "time": -0.375,
+                        "real_time": 8,
+                        "cpu_time_other": 53,
+                        "cpu": -0.3375,
+                    }
                 ],
-                'utest': {},
-                'time_unit': u'ns',
-                'aggregate_name': ''
+                "utest": {},
+                "time_unit": "ns",
+                "aggregate_name": "",
             },
             {
-                'name': 'OVERALL_GEOMEAN',
-                'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
-                                  'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
-                                  'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
-                'time_unit': 's',
-                'run_type': 'aggregate',
-                'aggregate_name': 'geomean',
-                'utest': {}
-            }
+                "name": "OVERALL_GEOMEAN",
+                "measurements": [
+                    {
+                        "real_time": 8.48528137423858e-09,
+                        "cpu_time": 8.441336246629233e-08,
+                        "real_time_other": 2.2405267593145244e-08,
+                        "cpu_time_other": 2.5453661413660466e-08,
+                        "time": 1.6404861082353634,
+                        "cpu": -0.6984640740519662,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
 
-class TestReportDifferenceForPercentageAggregates(
-        unittest.TestCase):
+class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         def load_results():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput1 = os.path.join(testInputs, 'test4_run0.json')
-            testOutput2 = os.path.join(testInputs, 'test4_run1.json')
-            with open(testOutput1, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test4_run0.json")
+            testOutput2 = os.path.join(testInputs, "test4_run1.json")
+            with open(testOutput1, "r") as f:
                 json1 = json.load(f)
-            with open(testOutput2, 'r') as f:
+            with open(testOutput2, "r") as f:
                 json2 = json.load(f)
             return json1, json2
 
         json1, json2 = load_results()
-        cls.json_diff_report = get_difference_report(
-            json1, json2, utest=True)
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
 
     def test_json_diff_report_pretty_printing(self):
-        expect_lines = [
-            ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
-        ]
+        expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
         output_lines_with_header = print_difference_report(
-            self.json_diff_report,
-            utest=True, utest_alpha=0.05, use_color=False)
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
         output_lines = output_lines_with_header[2:]
         print("\n")
         print("\n".join(output_lines_with_header))
         self.assertEqual(len(output_lines), len(expect_lines))
         for i in range(0, len(output_lines)):
-            parts = [x for x in output_lines[i].split(' ') if x]
+            parts = [x for x in output_lines[i].split(" ") if x]
             self.assertEqual(expect_lines[i], parts)
 
     def test_json_diff_report(self):
         expected_output = [
             {
-                'name': u'whocares',
-                'measurements': [
-                    {'time': -0.5,
-                     'cpu': 0.5,
-                     'real_time': 0.01,
-                     'real_time_other': 0.005,
-                     'cpu_time': 0.10,
-                     'cpu_time_other': 0.15}
+                "name": "whocares",
+                "measurements": [
+                    {
+                        "time": -0.5,
+                        "cpu": 0.5,
+                        "real_time": 0.01,
+                        "real_time_other": 0.005,
+                        "cpu_time": 0.10,
+                        "cpu_time_other": 0.15,
+                    }
                 ],
-                'time_unit': 'ns',
-                'utest': {}
+                "time_unit": "ns",
+                "utest": {},
             }
         ]
         self.assertEqual(len(self.json_diff_report), len(expected_output))
-        for out, expected in zip(
-                self.json_diff_report, expected_output):
-            self.assertEqual(out['name'], expected['name'])
-            self.assertEqual(out['time_unit'], expected['time_unit'])
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
             assert_utest(self, out, expected)
             assert_measurements(self, out, expected)
 
@@ -1129,12 +1451,12 @@ class TestReportSorting(unittest.TestCase):
     def setUpClass(cls):
         def load_result():
             import json
+
             testInputs = os.path.join(
-                os.path.dirname(
-                    os.path.realpath(__file__)),
-                'Inputs')
-            testOutput = os.path.join(testInputs, 'test4_run.json')
-            with open(testOutput, 'r') as f:
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput = os.path.join(testInputs, "test4_run.json")
+            with open(testOutput, "r") as f:
                 json = json.load(f)
             return json
 
@@ -1155,45 +1477,141 @@ class TestReportSorting(unittest.TestCase):
             "91 family 1 instance 0 aggregate",
             "90 family 1 instance 1 repetition 0",
             "89 family 1 instance 1 repetition 1",
-            "88 family 1 instance 1 aggregate"
+            "88 family 1 instance 1 aggregate",
         ]
 
-        for n in range(len(self.json['benchmarks']) ** 2):
-            random.shuffle(self.json['benchmarks'])
+        for n in range(len(self.json["benchmarks"]) ** 2):
+            random.shuffle(self.json["benchmarks"])
             sorted_benchmarks = util.sort_benchmark_results(self.json)[
-                'benchmarks']
+                "benchmarks"
+            ]
             self.assertEqual(len(expected_names), len(sorted_benchmarks))
             for out, expected in zip(sorted_benchmarks, expected_names):
-                self.assertEqual(out['name'], expected)
+                self.assertEqual(out["name"], expected)
+
+
+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
+    unittest.TestCase
+):
+    @classmethod
+    def setUpClass(cls):
+        def load_results():
+            import json
+
+            testInputs = os.path.join(
+                os.path.dirname(os.path.realpath(__file__)), "Inputs"
+            )
+            testOutput1 = os.path.join(testInputs, "test5_run0.json")
+            testOutput2 = os.path.join(testInputs, "test5_run1.json")
+            with open(testOutput1, "r") as f:
+                json1 = json.load(f)
+                json1["benchmarks"] = [
+                    json1["benchmarks"][0] for i in range(1000)
+                ]
+            with open(testOutput2, "r") as f:
+                json2 = json.load(f)
+                json2["benchmarks"] = [
+                    json2["benchmarks"][0] for i in range(1000)
+                ]
+            return json1, json2
+
+        json1, json2 = load_results()
+        cls.json_diff_report = get_difference_report(json1, json2, utest=True)
+
+    def test_json_diff_report_pretty_printing(self):
+        expect_line = [
+            "BM_ManyRepetitions_pvalue",
+            "0.0000",
+            "0.0000",
+            "U",
+            "Test,",
+            "Repetitions:",
+            "1000",
+            "vs",
+            "1000",
+        ]
+        output_lines_with_header = print_difference_report(
+            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
+        )
+        output_lines = output_lines_with_header[2:]
+        found = False
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(" ") if x]
+            found = expect_line == parts
+            if found:
+                break
+        self.assertTrue(found)
+
+    def test_json_diff_report(self):
+        expected_output = [
+            {
+                "name": "BM_ManyRepetitions",
+                "label": "",
+                "time_unit": "s",
+                "run_type": "",
+                "aggregate_name": "",
+                "utest": {
+                    "have_optimal_repetitions": True,
+                    "cpu_pvalue": 0.0,
+                    "time_pvalue": 0.0,
+                    "nr_of_repetitions": 1000,
+                    "nr_of_repetitions_other": 1000,
+                },
+            },
+            {
+                "name": "OVERALL_GEOMEAN",
+                "label": "",
+                "measurements": [
+                    {
+                        "real_time": 1.0,
+                        "cpu_time": 1000.000000000069,
+                        "real_time_other": 1000.000000000069,
+                        "cpu_time_other": 1.0,
+                        "time": 999.000000000069,
+                        "cpu": -0.9990000000000001,
+                    }
+                ],
+                "time_unit": "s",
+                "run_type": "aggregate",
+                "aggregate_name": "geomean",
+                "utest": {},
+            },
+        ]
+        self.assertEqual(len(self.json_diff_report), len(expected_output))
+        for out, expected in zip(self.json_diff_report, expected_output):
+            self.assertEqual(out["name"], expected["name"])
+            self.assertEqual(out["time_unit"], expected["time_unit"])
+            assert_utest(self, out, expected)
 
 
 def assert_utest(unittest_instance, lhs, rhs):
-    if lhs['utest']:
+    if lhs["utest"]:
         unittest_instance.assertAlmostEqual(
-            lhs['utest']['cpu_pvalue'],
-            rhs['utest']['cpu_pvalue'])
+            lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
+        )
         unittest_instance.assertAlmostEqual(
-            lhs['utest']['time_pvalue'],
-            rhs['utest']['time_pvalue'])
+            lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
+        )
         unittest_instance.assertEqual(
-            lhs['utest']['have_optimal_repetitions'],
-            rhs['utest']['have_optimal_repetitions'])
+            lhs["utest"]["have_optimal_repetitions"],
+            rhs["utest"]["have_optimal_repetitions"],
+        )
     else:
         # lhs is empty. assert if rhs is not.
-        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
+        unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
 
 
 def assert_measurements(unittest_instance, lhs, rhs):
-    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
-        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
-        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
+    for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
+        unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
+        unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
         # m1['time'] and m1['cpu'] hold values which are being calculated,
         # and therefore we must use almost-equal pattern.
-        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
-        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
+        unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
+        unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
 
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
diff --git a/tools/gbench/util.py b/tools/gbench/util.py
index 5e79da8..1119a1a 100644
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@@ -1,5 +1,5 @@
-"""util.py - General utilities for running, loading, and processing benchmarks
-"""
+"""util.py - General utilities for running, loading, and processing benchmarks"""
+
 import json
 import os
 import re
@@ -7,13 +7,12 @@ import subprocess
 import sys
 import tempfile
 
-
 # Input file type enumeration
 IT_Invalid = 0
 IT_JSON = 1
 IT_Executable = 2
 
-_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+_num_magic_bytes = 2 if sys.platform.startswith("win") else 4
 
 
 def is_executable_file(filename):
@@ -24,21 +23,21 @@ def is_executable_file(filename):
     """
     if not os.path.isfile(filename):
         return False
-    with open(filename, mode='rb') as f:
+    with open(filename, mode="rb") as f:
         magic_bytes = f.read(_num_magic_bytes)
-    if sys.platform == 'darwin':
+    if sys.platform == "darwin":
         return magic_bytes in [
-            b'\xfe\xed\xfa\xce',  # MH_MAGIC
-            b'\xce\xfa\xed\xfe',  # MH_CIGAM
-            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
-            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
-            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
-            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
+            b"\xfe\xed\xfa\xce",  # MH_MAGIC
+            b"\xce\xfa\xed\xfe",  # MH_CIGAM
+            b"\xfe\xed\xfa\xcf",  # MH_MAGIC_64
+            b"\xcf\xfa\xed\xfe",  # MH_CIGAM_64
+            b"\xca\xfe\xba\xbe",  # FAT_MAGIC
+            b"\xbe\xba\xfe\xca",  # FAT_CIGAM
         ]
-    elif sys.platform.startswith('win'):
-        return magic_bytes == b'MZ'
+    elif sys.platform.startswith("win"):
+        return magic_bytes == b"MZ"
     else:
-        return magic_bytes == b'\x7FELF'
+        return magic_bytes == b"\x7fELF"
 
 
 def is_json_file(filename):
@@ -47,7 +46,7 @@ def is_json_file(filename):
     'False' otherwise.
     """
     try:
-        with open(filename, 'r') as f:
+        with open(filename, "r") as f:
             json.load(f)
         return True
     except BaseException:
@@ -72,7 +71,10 @@ def classify_input_file(filename):
     elif is_json_file(filename):
         ftype = IT_JSON
     else:
-        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
+        err_msg = (
+            "'%s' does not name a valid benchmark executable or JSON file"
+            % filename
+        )
     return ftype, err_msg
 
 
@@ -95,11 +97,11 @@ def find_benchmark_flag(prefix, benchmark_flags):
     if it is found return the arg it specifies. If specified more than once the
     last value is returned. If the flag is not found None is returned.
     """
-    assert prefix.startswith('--') and prefix.endswith('=')
+    assert prefix.startswith("--") and prefix.endswith("=")
     result = None
     for f in benchmark_flags:
         if f.startswith(prefix):
-            result = f[len(prefix):]
+            result = f[len(prefix) :]
     return result
 
 
@@ -108,7 +110,7 @@ def remove_benchmark_flags(prefix, benchmark_flags):
     Return a new list containing the specified benchmark_flags except those
     with the specified prefix.
     """
-    assert prefix.startswith('--') and prefix.endswith('=')
+    assert prefix.startswith("--") and prefix.endswith("=")
     return [f for f in benchmark_flags if not f.startswith(prefix)]
 
 
@@ -124,36 +126,61 @@ def load_benchmark_results(fname, benchmark_filter):
 
     REQUIRES: 'fname' names a file containing JSON benchmark output.
     """
+
     def benchmark_wanted(benchmark):
         if benchmark_filter is None:
             return True
-        name = benchmark.get('run_name', None) or benchmark['name']
-        if re.search(benchmark_filter, name):
-            return True
-        return False
+        name = benchmark.get("run_name", None) or benchmark["name"]
+        return re.search(benchmark_filter, name) is not None
 
-    with open(fname, 'r') as f:
+    with open(fname, "r") as f:
         results = json.load(f)
-        if 'benchmarks' in results:
-            results['benchmarks'] = list(filter(benchmark_wanted,
-                                                results['benchmarks']))
+        if "context" in results:
+            if "json_schema_version" in results["context"]:
+                json_schema_version = results["context"]["json_schema_version"]
+                if json_schema_version != 1:
+                    print(
+                        "In %s, got unnsupported JSON schema version: %i, expected 1"
+                        % (fname, json_schema_version)
+                    )
+                    sys.exit(1)
+        if "benchmarks" in results:
+            results["benchmarks"] = list(
+                filter(benchmark_wanted, results["benchmarks"])
+            )
         return results
 
 
 def sort_benchmark_results(result):
-    benchmarks = result['benchmarks']
+    benchmarks = result["benchmarks"]
 
     # From inner key to the outer key!
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["repetition_index"]
+        if "repetition_index" in benchmark
+        else -1,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
+        benchmarks,
+        key=lambda benchmark: 1
+        if "run_type" in benchmark and benchmark["run_type"] == "aggregate"
+        else 0,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["per_family_instance_index"]
+        if "per_family_instance_index" in benchmark
+        else -1,
+    )
     benchmarks = sorted(
-        benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
+        benchmarks,
+        key=lambda benchmark: benchmark["family_index"]
+        if "family_index" in benchmark
+        else -1,
+    )
 
-    result['benchmarks'] = benchmarks
+    result["benchmarks"] = benchmarks
     return result
 
 
@@ -164,21 +191,21 @@ def run_benchmark(exe_name, benchmark_flags):
     real time console output.
     RETURNS: A JSON object representing the benchmark output
     """
-    output_name = find_benchmark_flag('--benchmark_out=',
-                                      benchmark_flags)
+    output_name = find_benchmark_flag("--benchmark_out=", benchmark_flags)
     is_temp_output = False
     if output_name is None:
         is_temp_output = True
         thandle, output_name = tempfile.mkstemp()
         os.close(thandle)
-        benchmark_flags = list(benchmark_flags) + \
-            ['--benchmark_out=%s' % output_name]
+        benchmark_flags = list(benchmark_flags) + [
+            "--benchmark_out=%s" % output_name
+        ]
 
     cmd = [exe_name] + benchmark_flags
-    print("RUNNING: %s" % ' '.join(cmd))
+    print("RUNNING: %s" % " ".join(cmd))
     exitCode = subprocess.call(cmd)
     if exitCode != 0:
-        print('TEST FAILED...')
+        print("TEST FAILED...")
         sys.exit(exitCode)
     json_res = load_benchmark_results(output_name, None)
     if is_temp_output:
@@ -195,9 +222,10 @@ def run_or_load_benchmark(filename, benchmark_flags):
     """
     ftype = check_input_file(filename)
     if ftype == IT_JSON:
-        benchmark_filter = find_benchmark_flag('--benchmark_filter=',
-                                               benchmark_flags)
+        benchmark_filter = find_benchmark_flag(
+            "--benchmark_filter=", benchmark_flags
+        )
         return load_benchmark_results(filename, benchmark_filter)
     if ftype == IT_Executable:
         return run_benchmark(filename, benchmark_flags)
-    raise ValueError('Unknown file type %s' % ftype)
+    raise ValueError("Unknown file type %s" % ftype)
diff --git a/tools/strip_asm.py b/tools/strip_asm.py
index d131dc7..bc3a774 100755
--- a/tools/strip_asm.py
+++ b/tools/strip_asm.py
@@ -4,48 +4,49 @@
 strip_asm.py - Cleanup ASM output for the specified file
 """
 
-from argparse import ArgumentParser
-import sys
 import os
 import re
+import sys
+from argparse import ArgumentParser
+
 
 def find_used_labels(asm):
     found = set()
-    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
-    for l in asm.splitlines():
-        m = label_re.match(l)
+    label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+    for line in asm.splitlines():
+        m = label_re.match(line)
         if m:
-            found.add('.L%s' % m.group(1))
+            found.add(".L%s" % m.group(1))
     return found
 
 
 def normalize_labels(asm):
     decls = set()
     label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
-    for l in asm.splitlines():
-        m = label_decl.match(l)
+    for line in asm.splitlines():
+        m = label_decl.match(line)
         if m:
             decls.add(m.group(0))
     if len(decls) == 0:
         return asm
-    needs_dot = next(iter(decls))[0] != '.'
+    needs_dot = next(iter(decls))[0] != "."
     if not needs_dot:
         return asm
     for ld in decls:
-        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
+        asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm)
     return asm
 
 
 def transform_labels(asm):
     asm = normalize_labels(asm)
     used_decls = find_used_labels(asm)
-    new_asm = ''
-    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
-    for l in asm.splitlines():
-        m = label_decl.match(l)
+    new_asm = ""
+    label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for line in asm.splitlines():
+        m = label_decl.match(line)
         if not m or m.group(0) in used_decls:
-            new_asm += l
-            new_asm += '\n'
+            new_asm += line
+            new_asm += "\n"
     return new_asm
 
 
@@ -53,29 +54,34 @@ def is_identifier(tk):
     if len(tk) == 0:
         return False
     first = tk[0]
-    if not first.isalpha() and first != '_':
+    if not first.isalpha() and first != "_":
         return False
     for i in range(1, len(tk)):
         c = tk[i]
-        if not c.isalnum() and c != '_':
+        if not c.isalnum() and c != "_":
             return False
     return True
 
-def process_identifiers(l):
+
+def process_identifiers(line):
     """
     process_identifiers - process all identifiers and modify them to have
     consistent names across all platforms; specifically across ELF and MachO.
     For example, MachO inserts an additional understore at the beginning of
     names. This function removes that.
     """
-    parts = re.split(r'([a-zA-Z0-9_]+)', l)
-    new_line = ''
+    parts = re.split(r"([a-zA-Z0-9_]+)", line)
+    new_line = ""
     for tk in parts:
         if is_identifier(tk):
-            if tk.startswith('__Z'):
+            if tk.startswith("__Z"):
                 tk = tk[1:]
-            elif tk.startswith('_') and len(tk) > 1 and \
-                    tk[1].isalpha() and tk[1] != 'Z':
+            elif (
+                tk.startswith("_")
+                and len(tk) > 1
+                and tk[1].isalpha()
+                and tk[1] != "Z"
+            ):
                 tk = tk[1:]
         new_line += tk
     return new_line
@@ -85,65 +91,71 @@ def process_asm(asm):
     """
     Strip the ASM of unwanted directives and lines
     """
-    new_contents = ''
+    new_contents = ""
     asm = transform_labels(asm)
 
     # TODO: Add more things we want to remove
     discard_regexes = [
-        re.compile("\s+\..*$"), # directive
-        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
-        re.compile("\s*#.*$"), # comment line
-        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
-        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
-    ]
-    keep_regexes = [
-
+        re.compile(r"\s+\..*$"),  # directive
+        re.compile(r"\s*#(NO_APP|APP)$"),  # inline ASM
+        re.compile(r"\s*#.*$"),  # comment line
+        re.compile(
+            r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"
+        ),  # global directive
+        re.compile(
+            r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"
+        ),
     ]
+    keep_regexes: list[re.Pattern] = []
     fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
-    for l in asm.splitlines():
+    for line in asm.splitlines():
         # Remove Mach-O attribute
-        l = l.replace('@GOTPCREL', '')
+        line = line.replace("@GOTPCREL", "")
         add_line = True
         for reg in discard_regexes:
-            if reg.match(l) is not None:
+            if reg.match(line) is not None:
                 add_line = False
                 break
         for reg in keep_regexes:
-            if reg.match(l) is not None:
+            if reg.match(line) is not None:
                 add_line = True
                 break
         if add_line:
-            if fn_label_def.match(l) and len(new_contents) != 0:
-                new_contents += '\n'
-            l = process_identifiers(l)
-            new_contents += l
-            new_contents += '\n'
+            if fn_label_def.match(line) and len(new_contents) != 0:
+                new_contents += "\n"
+            line = process_identifiers(line)
+            new_contents += line
+            new_contents += "\n"
     return new_contents
 
+
 def main():
-    parser = ArgumentParser(
-        description='generate a stripped assembly file')
+    parser = ArgumentParser(description="generate a stripped assembly file")
     parser.add_argument(
-        'input', metavar='input', type=str, nargs=1,
-        help='An input assembly file')
+        "input",
+        metavar="input",
+        type=str,
+        nargs=1,
+        help="An input assembly file",
+    )
     parser.add_argument(
-        'out', metavar='output', type=str, nargs=1,
-        help='The output file')
+        "out", metavar="output", type=str, nargs=1, help="The output file"
+    )
     args, unknown_args = parser.parse_known_args()
     input = args.input[0]
     output = args.out[0]
     if not os.path.isfile(input):
-        print(("ERROR: input file '%s' does not exist") % input)
+        print("ERROR: input file '%s' does not exist" % input)
         sys.exit(1)
-    contents = None
-    with open(input, 'r') as f:
+
+    with open(input, "r") as f:
         contents = f.read()
     new_contents = process_asm(contents)
-    with open(output, 'w') as f:
+    with open(output, "w") as f:
         f.write(new_contents)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
 
 # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4