aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadaf Ebrahimi <sadafebrahimi@google.com>2023-09-27 17:54:23 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2023-09-27 17:54:23 +0000
commit63fc46edd10580e6c4b1de9055729b9902011879 (patch)
treecbcc19fcf98479dce5ad3e7954427bd82961a348
parentdb4553b1a39ef8ef84a097dfa2e795c0a4df60d8 (diff)
parent83836ea18fd8db8d60e4d1a1dcf96109e1447611 (diff)
downloadgoogle-benchmark-63fc46edd10580e6c4b1de9055729b9902011879.tar.gz
Upgrade google-benchmark to v1.8.3 am: f45c56f9b5 am: 04b856c2d7 am: 83836ea18f
Original change: https://android-review.googlesource.com/c/platform/external/google-benchmark/+/2760806 Change-Id: I85fb057a1f043df81a954b68081b59de03e43412 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--.clang-tidy7
-rw-r--r--.github/install_bazel.sh13
-rwxr-xr-x.github/libcxx-setup.sh26
-rw-r--r--.github/workflows/bazel.yml36
-rw-r--r--.github/workflows/build-and-test-min-cmake.yml46
-rw-r--r--.github/workflows/build-and-test-perfcounters.yml51
-rw-r--r--.github/workflows/build-and-test.yml116
-rw-r--r--.github/workflows/clang-format-lint.yml17
-rw-r--r--.github/workflows/clang-tidy.yml38
-rw-r--r--.github/workflows/doxygen.yml28
-rw-r--r--.github/workflows/pylint.yml8
-rw-r--r--.github/workflows/sanitizer.yml96
-rw-r--r--.github/workflows/test_bindings.yml23
-rw-r--r--.github/workflows/wheels.yml79
-rw-r--r--.gitignore1
-rw-r--r--.travis-libcxx-setup.sh28
-rw-r--r--.travis.yml25
-rw-r--r--AUTHORS15
-rw-r--r--BUILD.bazel51
-rw-r--r--CMakeLists.txt135
-rw-r--r--CONTRIBUTORS16
-rw-r--r--METADATA12
-rw-r--r--MODULE.bazel24
-rw-r--r--README.md1150
-rw-r--r--WORKSPACE49
-rw-r--r--WORKSPACE.bzlmod2
-rw-r--r--_config.yml3
-rw-r--r--bazel/benchmark_deps.bzl65
-rw-r--r--bindings/python/build_defs.bzl4
-rw-r--r--bindings/python/google_benchmark/BUILD8
-rw-r--r--bindings/python/google_benchmark/__init__.py10
-rw-r--r--bindings/python/google_benchmark/benchmark.cc149
-rw-r--r--bindings/python/google_benchmark/example.py4
-rw-r--r--bindings/python/nanobind.BUILD17
-rw-r--r--bindings/python/pybind11.BUILD20
-rw-r--r--bindings/python/requirements.txt2
-rw-r--r--cmake/AddCXXCompilerFlag.cmake12
-rw-r--r--cmake/CXXFeatureCheck.cmake29
-rw-r--r--cmake/Config.cmake.in6
-rw-r--r--cmake/GetGitVersion.cmake22
-rw-r--r--cmake/GoogleTest.cmake25
-rw-r--r--cmake/GoogleTest.cmake.in5
-rw-r--r--cmake/Modules/FindPFM.cmake28
-rw-r--r--cmake/benchmark.pc.in4
-rw-r--r--cmake/pthread_affinity.cpp16
-rw-r--r--conan/CMakeLists.txt7
-rw-r--r--conan/test_package/CMakeLists.txt10
-rw-r--r--conan/test_package/conanfile.py19
-rw-r--r--conan/test_package/test_package.cpp18
-rw-r--r--conanfile.py79
-rw-r--r--dependencies.md18
-rw-r--r--docs/AssemblyTests.md2
-rw-r--r--docs/_config.yml4
-rw-r--r--docs/assets/images/icon.pngbin0 -> 11106 bytes
-rw-r--r--docs/assets/images/icon.xcfbin0 -> 25934 bytes
-rw-r--r--docs/assets/images/icon_black.pngbin0 -> 11559 bytes
-rw-r--r--docs/assets/images/icon_black.xcfbin0 -> 36322 bytes
-rw-r--r--docs/dependencies.md13
-rw-r--r--docs/index.md12
-rw-r--r--docs/perf_counters.md35
-rw-r--r--docs/platform_specific_build_instructions.md48
-rw-r--r--docs/python_bindings.md34
-rw-r--r--docs/random_interleaving.md13
-rw-r--r--docs/reducing_variance.md100
-rw-r--r--docs/releasing.md31
-rw-r--r--docs/tools.md140
-rw-r--r--docs/user_guide.md1266
-rw-r--r--include/benchmark/benchmark.h731
-rw-r--r--include/benchmark/export.h47
-rw-r--r--pyproject.toml50
-rw-r--r--requirements.txt2
-rw-r--r--setup.py177
-rw-r--r--src/CMakeLists.txt102
-rw-r--r--src/benchmark.cc528
-rw-r--r--src/benchmark_api_internal.cc117
-rw-r--r--src/benchmark_api_internal.h80
-rw-r--r--src/benchmark_main.cc1
-rw-r--r--src/benchmark_name.cc5
-rw-r--r--src/benchmark_register.cc235
-rw-r--r--src/benchmark_register.h27
-rw-r--r--src/benchmark_runner.cc559
-rw-r--r--src/benchmark_runner.h102
-rw-r--r--src/check.cc11
-rw-r--r--src/check.h70
-rw-r--r--src/colorprint.cc44
-rw-r--r--src/commandlineflags.cc76
-rw-r--r--src/commandlineflags.h90
-rw-r--r--src/complexity.cc24
-rw-r--r--src/complexity.h2
-rw-r--r--src/console_reporter.cc73
-rw-r--r--src/csv_reporter.cc27
-rw-r--r--src/cycleclock.h23
-rw-r--r--src/internal_macros.h17
-rw-r--r--src/json_reporter.cc131
-rw-r--r--src/log.h28
-rw-r--r--src/mutex.h44
-rw-r--r--src/perf_counters.cc282
-rw-r--r--src/perf_counters.h200
-rw-r--r--src/re.h4
-rw-r--r--src/reporter.cc23
-rw-r--r--src/sleep.cc67
-rw-r--r--src/sleep.h15
-rw-r--r--src/statistics.cc66
-rw-r--r--src/statistics.h13
-rw-r--r--src/string_util.cc101
-rw-r--r--src/string_util.h25
-rw-r--r--src/sysinfo.cc477
-rw-r--r--src/thread_manager.h5
-rw-r--r--src/thread_timer.h8
-rw-r--r--src/timers.cc69
-rw-r--r--test/AssemblyTests.cmake21
-rw-r--r--test/BUILD88
-rw-r--r--test/CMakeLists.txt110
-rw-r--r--test/args_product_test.cc12
-rw-r--r--test/basic_test.cc80
-rw-r--r--test/benchmark_gtest.cc43
-rw-r--r--test/benchmark_min_time_flag_iters_test.cc66
-rw-r--r--test/benchmark_min_time_flag_time_test.cc90
-rw-r--r--test/benchmark_name_gtest.cc8
-rw-r--r--test/benchmark_random_interleaving_gtest.cc126
-rw-r--r--test/benchmark_setup_teardown_test.cc157
-rw-r--r--test/benchmark_test.cc67
-rw-r--r--test/clobber_memory_assembly_test.cc2
-rw-r--r--test/commandlineflags_gtest.cc33
-rw-r--r--test/complexity_test.cc91
-rw-r--r--test/cxx03_test.cc7
-rw-r--r--test/diagnostics_test.cc21
-rw-r--r--test/display_aggregates_only_test.cc10
-rw-r--r--test/donotoptimize_assembly_test.cc46
-rw-r--r--test/donotoptimize_test.cc41
-rw-r--r--test/filter_test.cc43
-rw-r--r--test/fixture_test.cc18
-rw-r--r--test/internal_threading_test.cc1
-rw-r--r--test/link_main_test.cc3
-rw-r--r--test/map_test.cc14
-rw-r--r--test/memory_manager_test.cc15
-rw-r--r--test/min_time_parse_gtest.cc30
-rw-r--r--test/multiple_ranges_test.cc12
-rw-r--r--test/options_test.cc11
-rw-r--r--test/output_test.h34
-rw-r--r--test/output_test_helper.cc123
-rw-r--r--test/perf_counters_gtest.cc307
-rw-r--r--test/perf_counters_test.cc92
-rw-r--r--test/register_benchmark_test.cc34
-rw-r--r--test/repetitions_test.cc214
-rw-r--r--test/report_aggregates_only_test.cc10
-rw-r--r--test/reporter_output_test.cc332
-rw-r--r--test/skip_with_error_test.cc32
-rw-r--r--test/spec_arg_test.cc105
-rw-r--r--test/spec_arg_verbosity_test.cc43
-rw-r--r--test/statistics_gtest.cc7
-rw-r--r--test/string_util_gtest.cc202
-rw-r--r--test/templated_fixture_test.cc4
-rw-r--r--test/time_unit_gtest.cc37
-rw-r--r--test/user_counters_tabular_test.cc321
-rw-r--r--test/user_counters_test.cc78
-rw-r--r--test/user_counters_thousands_test.cc45
-rw-r--r--tools/BUILD.bazel4
-rwxr-xr-xtools/compare.py31
-rw-r--r--tools/gbench/Inputs/test1_run1.json8
-rw-r--r--tools/gbench/Inputs/test1_run2.json8
-rw-r--r--tools/gbench/Inputs/test4_run.json96
-rw-r--r--tools/gbench/Inputs/test4_run0.json21
-rw-r--r--tools/gbench/Inputs/test4_run1.json21
-rw-r--r--tools/gbench/report.py392
-rw-r--r--tools/gbench/util.py52
-rw-r--r--tools/libpfm.BUILD.bazel22
-rw-r--r--tools/requirements.txt3
-rwxr-xr-xtools/strip_asm.py2
169 files changed, 9567 insertions, 3527 deletions
diff --git a/.clang-tidy b/.clang-tidy
new file mode 100644
index 0000000..56938a5
--- /dev/null
+++ b/.clang-tidy
@@ -0,0 +1,7 @@
+---
+Checks: 'clang-analyzer-*,readability-redundant-*,performance-*'
+WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
+HeaderFilterRegex: '.*'
+AnalyzeTemporaryDtors: false
+FormatStyle: none
+User: user
diff --git a/.github/install_bazel.sh b/.github/install_bazel.sh
new file mode 100644
index 0000000..2b1f4e7
--- /dev/null
+++ b/.github/install_bazel.sh
@@ -0,0 +1,13 @@
+if ! bazel version; then
+ arch=$(uname -m)
+ if [ "$arch" == "aarch64" ]; then
+ arch="arm64"
+ fi
+ echo "Installing wget and downloading $arch Bazel binary from GitHub releases."
+ yum install -y wget
+ wget "https://github.com/bazelbuild/bazel/releases/download/6.3.0/bazel-6.3.0-linux-$arch" -O /usr/local/bin/bazel
+ chmod +x /usr/local/bin/bazel
+else
+ # bazel is installed for the correct architecture
+ exit 0
+fi
diff --git a/.github/libcxx-setup.sh b/.github/libcxx-setup.sh
new file mode 100755
index 0000000..8773b9c
--- /dev/null
+++ b/.github/libcxx-setup.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -e
+
+# Checkout LLVM sources
+git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project
+
+## Setup libc++ options
+if [ -z "$BUILD_32_BITS" ]; then
+ export BUILD_32_BITS=OFF && echo disabling 32 bit build
+fi
+
+## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
+mkdir llvm-build && cd llvm-build
+cmake -DCMAKE_C_COMPILER=${CC} \
+ -DCMAKE_CXX_COMPILER=${CXX} \
+ -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+ -DCMAKE_INSTALL_PREFIX=/usr \
+ -DLIBCXX_ABI_UNSTABLE=OFF \
+ -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
+ -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
+ -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \
+ -G "Unix Makefiles" \
+ ../llvm-project/runtimes/
+make -j cxx cxxabi unwind
+cd ..
diff --git a/.github/workflows/bazel.yml b/.github/workflows/bazel.yml
index d6bbe62..1cdc38c 100644
--- a/.github/workflows/bazel.yml
+++ b/.github/workflows/bazel.yml
@@ -5,29 +5,31 @@ on:
pull_request: {}
jobs:
- build-and-test:
- runs-on: ubuntu-latest
-
+ build_and_test_default:
+ name: bazel.${{ matrix.os }}.${{ matrix.bzlmod && 'bzlmod' || 'no_bzlmod' }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ bzlmod: [false, true]
steps:
- - uses: actions/checkout@v1
+ - uses: actions/checkout@v3
- name: mount bazel cache
- uses: actions/cache@v1
+ uses: actions/cache@v3
+ env:
+ cache-name: bazel-cache
with:
- path: "/home/runner/.cache/bazel"
- key: bazel
-
- - name: install bazelisk
- run: |
- curl -LO "https://github.com/bazelbuild/bazelisk/releases/download/v1.1.0/bazelisk-linux-amd64"
- mkdir -p "${GITHUB_WORKSPACE}/bin/"
- mv bazelisk-linux-amd64 "${GITHUB_WORKSPACE}/bin/bazel"
- chmod +x "${GITHUB_WORKSPACE}/bin/bazel"
+ path: "~/.cache/bazel"
+ key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }}
+ restore-keys: |
+ ${{ env.cache-name }}-${{ matrix.os }}-main
- name: build
run: |
- "${GITHUB_WORKSPACE}/bin/bazel" build //...
-
+ bazel build ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} //:benchmark //:benchmark_main //test/...
+
- name: test
run: |
- "${GITHUB_WORKSPACE}/bin/bazel" test //test/...
+ bazel test ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} --test_output=all //test/...
diff --git a/.github/workflows/build-and-test-min-cmake.yml b/.github/workflows/build-and-test-min-cmake.yml
new file mode 100644
index 0000000..e3e3217
--- /dev/null
+++ b/.github/workflows/build-and-test-min-cmake.yml
@@ -0,0 +1,46 @@
+name: build-and-test-min-cmake
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ job:
+ name: ${{ matrix.os }}.min-cmake
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - uses: lukka/get-cmake@latest
+ with:
+ cmakeVersion: 3.10.0
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: setup cmake initial cache
+ run: touch compiler-cache.cmake
+
+ - name: configure cmake
+ env:
+ CXX: ${{ matrix.compiler }}
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: >
+ cmake -C ${{ github.workspace }}/compiler-cache.cmake
+ $GITHUB_WORKSPACE
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DCMAKE_CXX_VISIBILITY_PRESET=hidden
+ -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
+
+ - name: build
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake --build .
diff --git a/.github/workflows/build-and-test-perfcounters.yml b/.github/workflows/build-and-test-perfcounters.yml
new file mode 100644
index 0000000..97e4d8e
--- /dev/null
+++ b/.github/workflows/build-and-test-perfcounters.yml
@@ -0,0 +1,51 @@
+name: build-and-test-perfcounters
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ job:
+ # TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
+ name: ${{ matrix.os }}.${{ matrix.build_type }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-22.04, ubuntu-20.04]
+ build_type: ['Release', 'Debug']
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: install libpfm
+ run: |
+ sudo apt update
+ sudo apt -y install libpfm4-dev
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: configure cmake
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: >
+ cmake $GITHUB_WORKSPACE
+ -DBENCHMARK_ENABLE_LIBPFM=1
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+
+ - name: build
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake --build . --config ${{ matrix.build_type }}
+
+ # Skip testing, for now. It seems perf_event_open does not succeed on the
+ # hosting machine, very likely a permissions issue.
+ # TODO(mtrofin): Enable test.
+ # - name: test
+ # shell: bash
+ # working-directory: ${{ runner.workspace }}/_build
+ # run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure
+
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index f0f0626..b35200a 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -2,37 +2,113 @@ name: build-and-test
on:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
jobs:
+ # TODO: add 32-bit builds (g++ and clang++) for ubuntu
+ # (requires g++-multilib and libc6:i386)
+ # TODO: add coverage build (requires lcov)
+ # TODO: add clang + libc++ builds for ubuntu
job:
- # TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
- name: ${{ matrix.os }}.${{ matrix.build_type }}
+ name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
- os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest, windows-latest]
+ os: [ubuntu-22.04, ubuntu-20.04, macos-latest]
build_type: ['Release', 'Debug']
+ compiler: ['g++', 'clang++']
+ lib: ['shared', 'static']
+
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
+
+ - uses: lukka/get-cmake@latest
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: setup cmake initial cache
+ run: touch compiler-cache.cmake
+
+ - name: configure cmake
+ env:
+ CXX: ${{ matrix.compiler }}
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: >
+ cmake -C ${{ github.workspace }}/compiler-cache.cmake
+ $GITHUB_WORKSPACE
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+ -DCMAKE_CXX_COMPILER=${{ env.CXX }}
+ -DCMAKE_CXX_VISIBILITY_PRESET=hidden
+ -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
+
+ - name: build
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake --build . --config ${{ matrix.build_type }}
+
+ - name: test
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: ctest -C ${{ matrix.build_type }} -VV
+
+ msvc:
+ name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }}
+ runs-on: ${{ matrix.os }}
+ defaults:
+ run:
+ shell: powershell
+ strategy:
+ fail-fast: false
+ matrix:
+ msvc:
+ - VS-16-2019
+ - VS-17-2022
+ arch:
+ - x64
+ build_type:
+ - Debug
+ - Release
+ lib:
+ - shared
+ - static
+ include:
+ - msvc: VS-16-2019
+ os: windows-2019
+ generator: 'Visual Studio 16 2019'
+ - msvc: VS-17-2022
+ os: windows-2022
+ generator: 'Visual Studio 17 2022'
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - uses: lukka/get-cmake@latest
+
+ - name: configure cmake
+ run: >
+ cmake -S . -B _build/
+ -A ${{ matrix.arch }}
+ -G "${{ matrix.generator }}"
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
+
+ - name: build
+ run: cmake --build _build/ --config ${{ matrix.build_type }}
- - name: create build environment
- run: cmake -E make_directory ${{ runner.workspace }}/_build
+ - name: setup test environment
+ # Make sure gmock and benchmark DLLs can be found
+ run: >
+ echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
+ echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
- - name: configure cmake
- shell: bash
- working-directory: ${{ runner.workspace }}/_build
- run: cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+ - name: test
+ run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
- - name: build
- shell: bash
- working-directory: ${{ runner.workspace }}/_build
- run: cmake --build . --config ${{ matrix.build_type }}
- - name: test
- shell: bash
- working-directory: ${{ runner.workspace }}/_build
- run: ctest -C ${{ matrix.build_type }}
diff --git a/.github/workflows/clang-format-lint.yml b/.github/workflows/clang-format-lint.yml
new file mode 100644
index 0000000..77ce1f8
--- /dev/null
+++ b/.github/workflows/clang-format-lint.yml
@@ -0,0 +1,17 @@
+name: clang-format-lint
+on:
+ push: {}
+ pull_request: {}
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - uses: DoozyX/clang-format-lint-action@v0.13
+ with:
+ source: './include/benchmark ./src ./test'
+ extensions: 'h,cc'
+ clangFormatVersion: 12
+ style: Google
diff --git a/.github/workflows/clang-tidy.yml b/.github/workflows/clang-tidy.yml
new file mode 100644
index 0000000..2eaab9c
--- /dev/null
+++ b/.github/workflows/clang-tidy.yml
@@ -0,0 +1,38 @@
+name: clang-tidy
+
+on:
+ push: {}
+ pull_request: {}
+
+jobs:
+ job:
+ name: run-clang-tidy
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: install clang-tidy
+ run: sudo apt update && sudo apt -y install clang-tidy
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: configure cmake
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: >
+ cmake $GITHUB_WORKSPACE
+ -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
+ -DBENCHMARK_ENABLE_LIBPFM=OFF
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DCMAKE_C_COMPILER=clang
+ -DCMAKE_CXX_COMPILER=clang++
+ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+ -DGTEST_COMPILE_COMMANDS=OFF
+
+ - name: run
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: run-clang-tidy
diff --git a/.github/workflows/doxygen.yml b/.github/workflows/doxygen.yml
new file mode 100644
index 0000000..da92c46
--- /dev/null
+++ b/.github/workflows/doxygen.yml
@@ -0,0 +1,28 @@
+name: doxygen
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ build-and-deploy:
+ name: Build HTML documentation
+ runs-on: ubuntu-latest
+ steps:
+ - name: Fetching sources
+ uses: actions/checkout@v3
+
+ - name: Installing build dependencies
+ run: |
+ sudo apt update
+ sudo apt install doxygen gcc git
+
+ - name: Creating build directory
+ run: mkdir build
+
+ - name: Building HTML documentation with Doxygen
+ run: |
+ cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON
+ cmake --build build --target benchmark_doxygen
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index c869674..c6939b5 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -2,9 +2,9 @@ name: pylint
on:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
jobs:
pylint:
@@ -12,15 +12,17 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8
+
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint pylint-exit conan
+
- name: Run pylint
run: |
pylint `find . -name '*.py'|xargs` || pylint-exit $?
diff --git a/.github/workflows/sanitizer.yml b/.github/workflows/sanitizer.yml
new file mode 100644
index 0000000..86cccf4
--- /dev/null
+++ b/.github/workflows/sanitizer.yml
@@ -0,0 +1,96 @@
+name: sanitizer
+
+on:
+ push: {}
+ pull_request: {}
+
+env:
+ UBSAN_OPTIONS: "print_stacktrace=1"
+
+jobs:
+ job:
+ name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ build_type: ['Debug', 'RelWithDebInfo']
+ sanitizer: ['asan', 'ubsan', 'tsan', 'msan']
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: configure msan env
+ if: matrix.sanitizer == 'msan'
+ run: |
+ echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV
+ echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV
+
+ - name: configure ubsan env
+ if: matrix.sanitizer == 'ubsan'
+ run: |
+ echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV
+ echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV
+
+ - name: configure asan env
+ if: matrix.sanitizer == 'asan'
+ run: |
+ echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV
+ echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV
+
+ - name: configure tsan env
+ if: matrix.sanitizer == 'tsan'
+ run: |
+ echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV
+ echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV
+
+ - name: fine-tune asan options
+ # in asan we get an error from std::regex. ignore it.
+ if: matrix.sanitizer == 'asan'
+ run: |
+ echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV
+
+ - name: setup clang
+ uses: egor-tensin/setup-clang@v1
+ with:
+ version: latest
+ platform: x64
+
+ - name: configure clang
+ run: |
+ echo "CC=cc" >> $GITHUB_ENV
+ echo "CXX=c++" >> $GITHUB_ENV
+
+ - name: build libc++ (non-asan)
+ if: matrix.sanitizer != 'asan'
+ run: |
+ "${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
+ echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L ${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -Isystem${GITHUB_WORKSPACE}/llvm-build/include -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
+
+ - name: create build environment
+ run: cmake -E make_directory ${{ runner.workspace }}/_build
+
+ - name: configure cmake
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: >
+ VERBOSE=1
+ cmake $GITHUB_WORKSPACE
+ -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
+ -DBENCHMARK_ENABLE_LIBPFM=OFF
+ -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+ -DCMAKE_C_COMPILER=${{ env.CC }}
+ -DCMAKE_CXX_COMPILER=${{ env.CXX }}
+ -DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}"
+ -DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}"
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+
+ - name: build
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: cmake --build . --config ${{ matrix.build_type }}
+
+ - name: test
+ shell: bash
+ working-directory: ${{ runner.workspace }}/_build
+ run: ctest -C ${{ matrix.build_type }} -VV
diff --git a/.github/workflows/test_bindings.yml b/.github/workflows/test_bindings.yml
index 273d7f9..e01bb7b 100644
--- a/.github/workflows/test_bindings.yml
+++ b/.github/workflows/test_bindings.yml
@@ -2,23 +2,28 @@ name: test-bindings
on:
push:
- branches: [master]
+ branches: [main]
pull_request:
- branches: [master]
+ branches: [main]
jobs:
python_bindings:
- runs-on: ubuntu-latest
+ name: Test GBM Python bindings on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ ubuntu-latest, macos-latest, windows-2019 ]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Set up Python
- uses: actions/setup-python@v1
+ uses: actions/setup-python@v4
with:
- python-version: 3.8
- - name: Install benchmark
+ python-version: 3.11
+ - name: Install GBM Python bindings on ${{ matrix.os}}
run:
- python setup.py install
- - name: Run example bindings
+ python -m pip install wheel .
+ - name: Run bindings example on ${{ matrix.os }}
run:
python bindings/python/google_benchmark/example.py
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 0000000..1f73bff
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,79 @@
+name: Build and upload Python wheels
+
+on:
+ workflow_dispatch:
+ release:
+ types:
+ - published
+
+jobs:
+ build_sdist:
+ name: Build source distribution
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out repo
+ uses: actions/checkout@v3
+
+ - name: Install Python 3.11
+ uses: actions/setup-python@v4
+ with:
+ python-version: 3.11
+
+ - name: Build and check sdist
+ run: |
+ python setup.py sdist
+ - name: Upload sdist
+ uses: actions/upload-artifact@v3
+ with:
+ name: dist
+ path: dist/*.tar.gz
+
+ build_wheels:
+ name: Build Google Benchmark wheels on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-2019]
+
+ steps:
+ - name: Check out Google Benchmark
+ uses: actions/checkout@v3
+
+ - name: Set up QEMU
+ if: runner.os == 'Linux'
+ uses: docker/setup-qemu-action@v2
+ with:
+ platforms: all
+
+ - name: Build wheels on ${{ matrix.os }} using cibuildwheel
+ uses: pypa/cibuildwheel@v2.14.1
+ env:
+ CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-*'
+ CIBW_SKIP: "*-musllinux_*"
+ CIBW_TEST_SKIP: "*-macosx_arm64"
+ CIBW_ARCHS_LINUX: x86_64 aarch64
+ CIBW_ARCHS_MACOS: x86_64 arm64
+ CIBW_ARCHS_WINDOWS: AMD64
+ CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
+ CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
+
+ - name: Upload Google Benchmark ${{ matrix.os }} wheels
+ uses: actions/upload-artifact@v3
+ with:
+ name: dist
+ path: ./wheelhouse/*.whl
+
+ pypi_upload:
+ name: Publish google-benchmark wheels to PyPI
+ needs: [build_sdist, build_wheels]
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/download-artifact@v3
+ with:
+ name: dist
+ path: dist
+
+ - uses: pypa/gh-action-pypi-publish@v1.6.4
+ with:
+ user: __token__
+ password: ${{ secrets.PYPI_PASSWORD }}
diff --git a/.gitignore b/.gitignore
index be55d77..704f56c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
*.swp
*.pyc
__pycache__
+.DS_Store
# lcov
*.lcov
diff --git a/.travis-libcxx-setup.sh b/.travis-libcxx-setup.sh
deleted file mode 100644
index a591743..0000000
--- a/.travis-libcxx-setup.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-
-# Install a newer CMake version
-curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
-chmod +x install-cmake.sh
-sudo ./install-cmake.sh --prefix=/usr/local --skip-license
-
-# Checkout LLVM sources
-git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
-git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
-git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
-
-# Setup libc++ options
-if [ -z "$BUILD_32_BITS" ]; then
- export BUILD_32_BITS=OFF && echo disabling 32 bit build
-fi
-
-# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
-mkdir llvm-build && cd llvm-build
-cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
- -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
- -DLIBCXX_ABI_UNSTABLE=ON \
- -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
- -DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
- ../llvm-source
-make cxx -j2
-sudo make install-cxxabi install-cxx
-cd ../
diff --git a/.travis.yml b/.travis.yml
index 36e343d..8cfed3d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,10 +11,6 @@ matrix:
- lcov
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage
- compiler: gcc
- env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug
- - compiler: gcc
- env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release
- - compiler: gcc
addons:
apt:
packages:
@@ -44,10 +40,6 @@ matrix:
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
- - compiler: clang
- env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
- - compiler: clang
- env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
# Clang w/ libc++
- compiler: clang
dist: xenial
@@ -150,29 +142,14 @@ matrix:
osx_image: xcode8.3
compiler: clang
env:
- - COMPILER=clang++ BUILD_TYPE=Debug
- - os: osx
- osx_image: xcode8.3
- compiler: clang
- env:
- - COMPILER=clang++ BUILD_TYPE=Release
- - os: osx
- osx_image: xcode8.3
- compiler: clang
- env:
- COMPILER=clang++
- BUILD_TYPE=Release
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-m32"
- - os: osx
- osx_image: xcode9.4
- compiler: gcc
- env:
- - COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
before_script:
- if [ -n "${LIBCXX_BUILD}" ]; then
- source .travis-libcxx-setup.sh;
+ source .libcxx-setup.sh;
fi
- if [ -n "${ENABLE_SANITIZER}" ]; then
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";
diff --git a/AUTHORS b/AUTHORS
index 3068b2e..d08c1fd 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -13,6 +13,7 @@ Alex Steele <steeleal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Carto
+Cezary Skrzyński <czars1988@gmail.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
@@ -21,14 +22,18 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dirac Research
Dominik Czarnota <dominik.b.czarnota@gmail.com>
+Dominik Korman <kormandominik@gmail.com>
+Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
+Fabien Pichot <pichot.fabien@gmail.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Gergő Szitár <szitar.gergo@gmail.com>
Google Inc.
+Henrique Bucher <hbucher@gmail.com>
International Business Machines Corporation
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
@@ -39,20 +44,28 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
+Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
+Mike Apodaca <gatorfax@gmail.com>
+Min-Yih Hsu <yihshyng223@gmail.com>
MongoDB Inc.
Nick Hutchinson <nshutchinson@gmail.com>
+Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Paul Redmond <paul.redmond@gmail.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
+Raghu Raja <raghu@enfabrica.net>
+Rainer Orth <ro@cebitec.uni-bielefeld.de>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
+Shapr3D <google-contributors@shapr3d.com>
Shuo Chen <chenshuo@chenshuo.com>
+Staffan Tjernstrom <staffantj@gmail.com>
Steinar H. Gunderson <sgunderson@bigfoot.com>
Stripe, Inc.
+Tobias Schmidt <tobias.schmidt@in.tum.de>
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>
-Min-Yih Hsu <yihshyng223@gmail.com>
diff --git a/BUILD.bazel b/BUILD.bazel
index eb35b62..60d31d2 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1,15 +1,37 @@
-load("@rules_cc//cc:defs.bzl", "cc_library")
-
licenses(["notice"])
config_setting(
+ name = "qnx",
+ constraint_values = ["@platforms//os:qnx"],
+ values = {
+ "cpu": "x64_qnx",
+ },
+ visibility = [":__subpackages__"],
+)
+
+config_setting(
name = "windows",
+ constraint_values = ["@platforms//os:windows"],
values = {
"cpu": "x64_windows",
},
visibility = [":__subpackages__"],
)
+config_setting(
+ name = "macos",
+ constraint_values = ["@platforms//os:macos"],
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "perfcounters",
+ define_values = {
+ "pfm": "1",
+ },
+ visibility = [":__subpackages__"],
+)
+
cc_library(
name = "benchmark",
srcs = glob(
@@ -19,19 +41,40 @@ cc_library(
],
exclude = ["src/benchmark_main.cc"],
),
- hdrs = ["include/benchmark/benchmark.h"],
+ hdrs = [
+ "include/benchmark/benchmark.h",
+ "include/benchmark/export.h",
+ ],
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
+ copts = select({
+ ":windows": [],
+ "//conditions:default": ["-Werror=old-style-cast"],
+ }),
strip_include_prefix = "include",
visibility = ["//visibility:public"],
+ # Only static linking is allowed; no .so will be produced.
+ # Using `defines` (i.e. not `local_defines`) means that no
+ # dependent rules need to bother about defining the macro.
+ linkstatic = True,
+ defines = [
+ "BENCHMARK_STATIC_DEFINE",
+ ] + select({
+ ":perfcounters": ["HAVE_LIBPFM"],
+ "//conditions:default": [],
+ }),
+ deps = select({
+ ":perfcounters": ["@libpfm//:libpfm"],
+ "//conditions:default": [],
+ }),
)
cc_library(
name = "benchmark_main",
srcs = ["src/benchmark_main.cc"],
- hdrs = ["include/benchmark/benchmark.h"],
+ hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = [":benchmark"],
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1007254..ffd7dee 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,30 +1,34 @@
-cmake_minimum_required (VERSION 3.5.1)
-
-foreach(p
- CMP0048 # OK to clear PROJECT_VERSION on project()
- CMP0054 # CMake 3.1
- CMP0056 # export EXE_LINKER_FLAGS to try_run
- CMP0057 # Support no if() IN_LIST operator
- CMP0063 # Honor visibility properties for all targets
- CMP0077 # Allow option() overrides in importing projects
- )
- if(POLICY ${p})
- cmake_policy(SET ${p} NEW)
- endif()
-endforeach()
+# Require CMake 3.10. If available, use the policies up to CMake 3.22.
+cmake_minimum_required (VERSION 3.10...3.22)
-project (benchmark CXX)
+project (benchmark VERSION 1.8.3 LANGUAGES CXX)
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
+option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON)
+option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF)
+
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
+ # PGC++ maybe reporting false positives.
+ set(BENCHMARK_ENABLE_WERROR OFF)
+endif()
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
+ set(BENCHMARK_ENABLE_WERROR OFF)
+endif()
+if(BENCHMARK_FORCE_WERROR)
+ set(BENCHMARK_ENABLE_WERROR ON)
+endif(BENCHMARK_FORCE_WERROR)
+
if(NOT MSVC)
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
else()
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
endif()
option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
+option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF)
+option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON)
# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
# may require downloading the source code.
@@ -33,8 +37,25 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
# This option can be used to disable building and running unit tests which depend on gtest
# in cases where it is not possible to build or find a valid version of gtest.
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
+option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON)
+
+option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
+
+# Export only public symbols
+set(CMAKE_CXX_VISIBILITY_PRESET hidden)
+set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
+
+if(MSVC)
+ # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
+ # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
+ # undocumented, but working variable.
+ # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170
+ set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID})
+ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")
+ set(CMAKE_CROSSCOMPILING TRUE)
+ endif()
+endif()
-set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
function(should_enable_assembly_tests)
if(CMAKE_BUILD_TYPE)
@@ -81,24 +102,43 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(GetGitVersion)
get_git_version(GIT_VERSION)
+# If no git version can be determined, use the version
+# from the project() command
+if ("${GIT_VERSION}" STREQUAL "0.0.0")
+ set(VERSION "${benchmark_VERSION}")
+else()
+ set(VERSION "${GIT_VERSION}")
+endif()
# Tell the user what versions we are using
-string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
-message(STATUS "Version: ${VERSION}")
+message(STATUS "Google Benchmark version: ${VERSION}")
# The version of the libraries
set(GENERIC_LIB_VERSION ${VERSION})
string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
# Import our CMake modules
-include(CheckCXXCompilerFlag)
include(AddCXXCompilerFlag)
+include(CheckCXXCompilerFlag)
+include(CheckLibraryExists)
include(CXXFeatureCheck)
+check_library_exists(rt shm_open "" HAVE_LIB_RT)
+
if (BENCHMARK_BUILD_32_BITS)
add_required_cxx_compiler_flag(-m32)
endif()
if (MSVC)
+ set(BENCHMARK_CXX_STANDARD 14)
+else()
+ set(BENCHMARK_CXX_STANDARD 11)
+endif()
+
+set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
+set(CMAKE_CXX_STANDARD_REQUIRED YES)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+if (MSVC)
# Turn compiler warnings up to 11
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
@@ -130,44 +170,43 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
endif()
else()
- # Try and enable C++11. Don't use C++14 because it doesn't work in some
- # configurations.
- add_cxx_compiler_flag(-std=c++11)
- if (NOT HAVE_CXX_FLAG_STD_CXX11)
- add_cxx_compiler_flag(-std=c++0x)
- endif()
-
# Turn compiler warnings up to 11
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
- add_cxx_compiler_flag(-Werror RELEASE)
- add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
- add_cxx_compiler_flag(-Werror MINSIZEREL)
- # Disabled until googletest (gmock) stops emitting variadic macro warnings
- #add_cxx_compiler_flag(-pedantic)
- #add_cxx_compiler_flag(-pedantic-errors)
+ add_cxx_compiler_flag(-Wfloat-equal)
+ add_cxx_compiler_flag(-Wold-style-cast)
+ if(BENCHMARK_ENABLE_WERROR)
+ add_cxx_compiler_flag(-Werror)
+ endif()
+ if (NOT BENCHMARK_ENABLE_TESTING)
+ # Disable warning when compiling tests as gtest does not use 'override'.
+ add_cxx_compiler_flag(-Wsuggest-override)
+ endif()
+ add_cxx_compiler_flag(-pedantic)
+ add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
- if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
+ add_cxx_compiler_flag(-fno-finite-math-only)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
- add_cxx_compiler_flag(-Wno-deprecated RELEASE)
- add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
- add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
+ if(BENCHMARK_ENABLE_WERROR)
+ add_cxx_compiler_flag(-Wno-deprecated)
+ endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
- if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
+ if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
endif()
@@ -176,12 +215,12 @@ else()
add_cxx_compiler_flag(-wd654)
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
- cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
+ cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
endif()
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
# predefined macro, which turns on all of the wonderful libc extensions.
- # However g++ doesn't do this in Cygwin so we have to define it ourselfs
+ # However g++ doesn't do this in Cygwin so we have to define it ourselves
# since we depend on GNU/POSIX/BSD extensions.
if (CYGWIN)
add_definitions(-D_GNU_SOURCE=1)
@@ -232,7 +271,8 @@ if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
add_cxx_compiler_flag(-stdlib=libc++)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
- "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
+ "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
+ "${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
add_cxx_compiler_flag(-nostdinc++)
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
@@ -269,6 +309,11 @@ cxx_feature_check(STEADY_CLOCK)
# Ensure we have pthreads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
+cxx_feature_check(PTHREAD_AFFINITY)
+
+if (BENCHMARK_ENABLE_LIBPFM)
+ find_package(PFM)
+endif()
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
@@ -281,7 +326,15 @@ if (BENCHMARK_ENABLE_TESTING)
if (BENCHMARK_ENABLE_GTEST_TESTS AND
NOT (TARGET gtest AND TARGET gtest_main AND
TARGET gmock AND TARGET gmock_main))
- include(GoogleTest)
+ if (BENCHMARK_USE_BUNDLED_GTEST)
+ include(GoogleTest)
+ else()
+ find_package(GTest CONFIG REQUIRED)
+ add_library(gtest ALIAS GTest::gtest)
+ add_library(gtest_main ALIAS GTest::gtest_main)
+ add_library(gmock ALIAS GTest::gmock)
+ add_library(gmock_main ALIAS GTest::gmock_main)
+ endif()
endif()
add_subdirectory(test)
endif()
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index b5e1aa4..95bcad0 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -27,7 +27,9 @@ Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steelal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
+Bátor Tallér <bator.taller@shapr3d.com>
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
+Cezary Skrzyński <czars1988@gmail.com>
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
@@ -38,16 +40,20 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com>
+Dominik Korman <kormandominik@gmail.com>
+Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
+Fabien Pichot <pichot.fabien@gmail.com>
Fanbo Meng <fanbo.meng@ibm.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Hannes Hauswedell <h2@fsfe.org>
+Henrique Bucher <hbucher@gmail.com>
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
@@ -55,19 +61,25 @@ Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
John Millikin <jmillikin@stripe.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
-Kai Wolf <kai.wolf@gmail.com>
Kaito Udagawa <umireon@gmail.com>
+Kai Wolf <kai.wolf@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
+Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
+Mike Apodaca <gatorfax@gmail.com>
+Min-Yih Hsu <yihshyng223@gmail.com>
Nick Hutchinson <nshutchinson@gmail.com>
+Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Pascal Leroy <phl@google.com>
Paul Redmond <paul.redmond@gmail.com>
Pierre Phaneuf <pphaneuf@google.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
+Raghu Raja <raghu@enfabrica.net>
+Rainer Orth <ro@cebitec.uni-bielefeld.de>
Raul Marin <rmrodriguez@cartodb.com>
Ray Glover <ray.glover@uk.ibm.com>
Robert Guo <robert.guo@mongodb.com>
@@ -75,9 +87,9 @@ Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shuo Chen <chenshuo@chenshuo.com>
Steven Wan <wan.yu@ibm.com>
+Tobias Schmidt <tobias.schmidt@in.tum.de>
Tobias Ulvgård <tobias.ulvgard@dirac.se>
Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>
-Min-Yih Hsu <yihshyng223@gmail.com>
diff --git a/METADATA b/METADATA
index 0584c04..5433077 100644
--- a/METADATA
+++ b/METADATA
@@ -1,3 +1,7 @@
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update google-benchmark
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
+
name: "google-benchmark"
description: "A library to support the benchmarking of functions, similar to unit-tests."
third_party {
@@ -9,11 +13,11 @@ third_party {
type: GIT
value: "https://github.com/google/benchmark.git"
}
- version: "ea5a5bbff491fd625c6e3458f6edd680b8bd5452"
+ version: "v1.8.3"
license_type: NOTICE
last_upgrade_date {
- year: 2021
- month: 2
- day: 12
+ year: 2023
+ month: 9
+ day: 22
}
}
diff --git a/MODULE.bazel b/MODULE.bazel
new file mode 100644
index 0000000..37a5f5d
--- /dev/null
+++ b/MODULE.bazel
@@ -0,0 +1,24 @@
+module(name = "google_benchmark", version="1.8.3")
+
+bazel_dep(name = "bazel_skylib", version = "1.4.1")
+bazel_dep(name = "platforms", version = "0.0.6")
+bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
+bazel_dep(name = "rules_cc", version = "0.0.6")
+bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
+bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
+bazel_dep(name = "libpfm", version = "4.11.0")
+
+# Register a toolchain for Python 3.9 to be able to build numpy. Python
+# versions >=3.10 are problematic.
+# A second reason for this is to be able to build Python hermetically instead
+# of relying on the changing default version from rules_python.
+
+python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
+python.toolchain(python_version = "3.9")
+
+pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
+pip.parse(
+ hub_name="tools_pip_deps",
+ python_version = "3.9",
+ requirements_lock="//tools:requirements.txt")
+use_repo(pip, "tools_pip_deps")
diff --git a/README.md b/README.md
index 6c09b9d..a5e5d39 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,12 @@
# Benchmark
[![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test)
+[![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml)
[![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint)
[![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings)
-
-[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark)
-[![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master)
[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark)
+[![Discord](https://discordapp.com/api/guilds/1125694995928719494/widget.png?style=shield)](https://discord.gg/cz7UX7wKC2)
A library to benchmark code snippets, similar to unit tests. Example:
@@ -27,23 +26,28 @@ BENCHMARK(BM_SomeFunction);
BENCHMARK_MAIN();
```
+## Getting Started
+
To get started, see [Requirements](#requirements) and
[Installation](#installation). See [Usage](#usage) for a full example and the
-[User Guide](#user-guide) for a more comprehensive feature overview.
+[User Guide](docs/user_guide.md) for a more comprehensive feature overview.
-It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/googletest/docs/primer.md)
+It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/main/docs/primer.md)
as some of the structural aspects of the APIs are similar.
-### Resources
+## Resources
[Discussion group](https://groups.google.com/d/forum/benchmark-discuss)
-IRC channel: [freenode](https://freenode.net) #googlebenchmark
+IRC channels:
+* [libera](https://libera.chat) #benchmark
[Additional Tooling Documentation](docs/tools.md)
[Assembly Testing Documentation](docs/AssemblyTests.md)
+[Building and installing Python bindings](docs/python_bindings.md)
+
## Requirements
The library can be used with C++03. However, it requires C++11 to build,
@@ -56,27 +60,25 @@ The following minimum versions are required to build the library:
* Visual Studio 14 2015
* Intel 2015 Update 1
-See [Platform-Specific Build Instructions](#platform-specific-build-instructions).
+See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md).
## Installation
This describes the installation process using cmake. As pre-requisites, you'll
need git and cmake installed.
-_See [dependencies.md](dependencies.md) for more details regarding supported
+_See [dependencies.md](docs/dependencies.md) for more details regarding supported
versions of build tools._
```bash
# Check out the library.
$ git clone https://github.com/google/benchmark.git
-# Benchmark requires Google Test as a dependency. Add the source tree as a subdirectory.
-$ git clone https://github.com/google/googletest.git benchmark/googletest
# Go to the library root directory
$ cd benchmark
# Make a build directory to place the build output.
$ cmake -E make_directory "build"
-# Generate build system files with cmake.
-$ cmake -E chdir "build" cmake -DCMAKE_BUILD_TYPE=Release ../
+# Generate build system files with cmake, and download any dependencies.
+$ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
# or, starting with CMake 3.13, use a simpler form:
# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build"
# Build the library.
@@ -110,10 +112,10 @@ sudo cmake --build "build" --config Release --target install
Note that Google Benchmark requires Google Test to build and run the tests. This
dependency can be provided two ways:
-* Checkout the Google Test sources into `benchmark/googletest` as above.
+* Checkout the Google Test sources into `benchmark/googletest`.
* Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during
- configuration, the library will automatically download and build any required
- dependencies.
+ configuration as above, the library will automatically download and build
+ any required dependencies.
If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF`
to `CMAKE_ARGS`.
@@ -136,6 +138,12 @@ cache variables, if autodetection fails.
If you are using clang, you may need to set `LLVMAR_EXECUTABLE`,
`LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables.
+To enable sanitizer checks (eg., `asan` and `tsan`), add:
+```
+ -DCMAKE_C_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all"
+ -DCMAKE_CXX_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=address -fsanitize=thread -fno-sanitize-recover=all "
+```
+
### Stable and Experimental Library Versions
The main branch contains the latest stable version of the benchmarking library;
@@ -192,7 +200,7 @@ Alternatively, link against the `benchmark_main` library and remove
`BENCHMARK_MAIN();` above to get the same behavior.
The compiled executable will run all benchmarks by default. Pass the `--help`
-flag for option information or see the guide below.
+flag for option information or see the [User Guide](docs/user_guide.md).
### Usage with CMake
@@ -213,1111 +221,3 @@ Either way, link to the library as follows.
```cmake
target_link_libraries(MyTarget benchmark::benchmark)
```
-
-## Platform Specific Build Instructions
-
-### Building with GCC
-
-When the library is built using GCC it is necessary to link with the pthread
-library due to how GCC implements `std::thread`. Failing to link to pthread will
-lead to runtime exceptions (unless you're using libc++), not linker errors. See
-[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
-can link to pthread by adding `-pthread` to your linker command. Note, you can
-also use `-lpthread`, but there are potential issues with ordering of command
-line parameters if you use that.
-
-### Building with Visual Studio 2015 or 2017
-
-The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
-
-```
-// Alternatively, can add libraries using linker options.
-#ifdef _WIN32
-#pragma comment ( lib, "Shlwapi.lib" )
-#ifdef _DEBUG
-#pragma comment ( lib, "benchmarkd.lib" )
-#else
-#pragma comment ( lib, "benchmark.lib" )
-#endif
-#endif
-```
-
-Can also use the graphical version of CMake:
-* Open `CMake GUI`.
-* Under `Where to build the binaries`, same path as source plus `build`.
-* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
-* Click `Configure`, `Generate`, `Open Project`.
-* If build fails, try deleting entire directory and starting again, or unticking options to build less.
-
-### Building with Intel 2015 Update 1 or Intel System Studio Update 4
-
-See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
-
-### Building on Solaris
-
-If you're running benchmarks on solaris, you'll want the kstat library linked in
-too (`-lkstat`).
-
-## User Guide
-
-### Command Line
-
-[Output Formats](#output-formats)
-
-[Output Files](#output-files)
-
-[Running Benchmarks](#running-benchmarks)
-
-[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks)
-
-[Result Comparison](#result-comparison)
-
-### Library
-
-[Runtime and Reporting Considerations](#runtime-and-reporting-considerations)
-
-[Passing Arguments](#passing-arguments)
-
-[Calculating Asymptotic Complexity](#asymptotic-complexity)
-
-[Templated Benchmarks](#templated-benchmarks)
-
-[Fixtures](#fixtures)
-
-[Custom Counters](#custom-counters)
-
-[Multithreaded Benchmarks](#multithreaded-benchmarks)
-
-[CPU Timers](#cpu-timers)
-
-[Manual Timing](#manual-timing)
-
-[Setting the Time Unit](#setting-the-time-unit)
-
-[Preventing Optimization](#preventing-optimization)
-
-[Reporting Statistics](#reporting-statistics)
-
-[Custom Statistics](#custom-statistics)
-
-[Using RegisterBenchmark](#using-register-benchmark)
-
-[Exiting with an Error](#exiting-with-an-error)
-
-[A Faster KeepRunning Loop](#a-faster-keep-running-loop)
-
-[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling)
-
-
-<a name="output-formats" />
-
-### Output Formats
-
-The library supports multiple output formats. Use the
-`--benchmark_format=<console|json|csv>` flag (or set the
-`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set
-the format type. `console` is the default format.
-
-The Console format is intended to be a human readable format. By default
-the format generates color output. Context is output on stderr and the
-tabular data on stdout. Example tabular output looks like:
-
-```
-Benchmark Time(ns) CPU(ns) Iterations
-----------------------------------------------------------------------
-BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s
-BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s
-BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s
-```
-
-The JSON format outputs human readable json split into two top level attributes.
-The `context` attribute contains information about the run in general, including
-information about the CPU and the date.
-The `benchmarks` attribute contains a list of every benchmark run. Example json
-output looks like:
-
-```json
-{
- "context": {
- "date": "2015/03/17-18:40:25",
- "num_cpus": 40,
- "mhz_per_cpu": 2801,
- "cpu_scaling_enabled": false,
- "build_type": "debug"
- },
- "benchmarks": [
- {
- "name": "BM_SetInsert/1024/1",
- "iterations": 94877,
- "real_time": 29275,
- "cpu_time": 29836,
- "bytes_per_second": 134066,
- "items_per_second": 33516
- },
- {
- "name": "BM_SetInsert/1024/8",
- "iterations": 21609,
- "real_time": 32317,
- "cpu_time": 32429,
- "bytes_per_second": 986770,
- "items_per_second": 246693
- },
- {
- "name": "BM_SetInsert/1024/10",
- "iterations": 21393,
- "real_time": 32724,
- "cpu_time": 33355,
- "bytes_per_second": 1199226,
- "items_per_second": 299807
- }
- ]
-}
-```
-
-The CSV format outputs comma-separated values. The `context` is output on stderr
-and the CSV itself on stdout. Example CSV output looks like:
-
-```
-name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
-"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942,
-"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115,
-"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06,
-```
-
-<a name="output-files" />
-
-### Output Files
-
-Write benchmark results to a file with the `--benchmark_out=<filename>` option
-(or set `BENCHMARK_OUT`). Specify the output format with
-`--benchmark_out_format={json|console|csv}` (or set
-`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that specifying
-`--benchmark_out` does not suppress the console output.
-
-<a name="running-benchmarks" />
-
-### Running Benchmarks
-
-Benchmarks are executed by running the produced binaries. Benchmarks binaries,
-by default, accept options that may be specified either through their command
-line interface or by setting environment variables before execution. For every
-`--option_flag=<value>` CLI switch, a corresponding environment variable
-`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always
- prevails). A complete list of CLI options is available running benchmarks
- with the `--help` switch.
-
-<a name="running-a-subset-of-benchmarks" />
-
-### Running a Subset of Benchmarks
-
-The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>`
-environment variable) can be used to only run the benchmarks that match
-the specified `<regex>`. For example:
-
-```bash
-$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32
-Run on (1 X 2300 MHz CPU )
-2016-06-25 19:34:24
-Benchmark Time CPU Iterations
-----------------------------------------------------
-BM_memcpy/32 11 ns 11 ns 79545455
-BM_memcpy/32k 2181 ns 2185 ns 324074
-BM_memcpy/32 12 ns 12 ns 54687500
-BM_memcpy/32k 1834 ns 1837 ns 357143
-```
-
-<a name="result-comparison" />
-
-### Result comparison
-
-It is possible to compare the benchmarking results.
-See [Additional Tooling Documentation](docs/tools.md)
-
-<a name="runtime-and-reporting-considerations" />
-
-### Runtime and Reporting Considerations
-
-When the benchmark binary is executed, each benchmark function is run serially.
-The number of iterations to run is determined dynamically by running the
-benchmark a few times and measuring the time taken and ensuring that the
-ultimate result will be statistically stable. As such, faster benchmark
-functions will be run for more iterations than slower benchmark functions, and
-the number of iterations is thus reported.
-
-In all cases, the number of iterations for which the benchmark is run is
-governed by the amount of time the benchmark takes. Concretely, the number of
-iterations is at least one, not more than 1e9, until CPU time is greater than
-the minimum time, or the wallclock time is 5x minimum time. The minimum time is
-set per benchmark by calling `MinTime` on the registered benchmark object.
-
-Average timings are then reported over the iterations run. If multiple
-repetitions are requested using the `--benchmark_repetitions` command-line
-option, or at registration time, the benchmark function will be run several
-times and statistical results across these repetitions will also be reported.
-
-As well as the per-benchmark entries, a preamble in the report will include
-information about the machine on which the benchmarks are run.
-
-<a name="passing-arguments" />
-
-### Passing Arguments
-
-Sometimes a family of benchmarks can be implemented with just one routine that
-takes an extra argument to specify which one of the family of benchmarks to
-run. For example, the following code defines a family of benchmarks for
-measuring the speed of `memcpy()` calls of different lengths:
-
-```c++
-static void BM_memcpy(benchmark::State& state) {
- char* src = new char[state.range(0)];
- char* dst = new char[state.range(0)];
- memset(src, 'x', state.range(0));
- for (auto _ : state)
- memcpy(dst, src, state.range(0));
- state.SetBytesProcessed(int64_t(state.iterations()) *
- int64_t(state.range(0)));
- delete[] src;
- delete[] dst;
-}
-BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
-```
-
-The preceding code is quite repetitive, and can be replaced with the following
-short-hand. The following invocation will pick a few appropriate arguments in
-the specified range and will generate a benchmark for each such argument.
-
-```c++
-BENCHMARK(BM_memcpy)->Range(8, 8<<10);
-```
-
-By default the arguments in the range are generated in multiples of eight and
-the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the
-range multiplier is changed to multiples of two.
-
-```c++
-BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
-```
-
-Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].
-
-The preceding code shows a method of defining a sparse range. The following
-example shows a method of defining a dense range. It is then used to benchmark
-the performance of `std::vector` initialization for uniformly increasing sizes.
-
-```c++
-static void BM_DenseRange(benchmark::State& state) {
- for(auto _ : state) {
- std::vector<int> v(state.range(0), state.range(0));
- benchmark::DoNotOptimize(v.data());
- benchmark::ClobberMemory();
- }
-}
-BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128);
-```
-
-Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ].
-
-You might have a benchmark that depends on two or more inputs. For example, the
-following code defines a family of benchmarks for measuring the speed of set
-insertion.
-
-```c++
-static void BM_SetInsert(benchmark::State& state) {
- std::set<int> data;
- for (auto _ : state) {
- state.PauseTiming();
- data = ConstructRandomSet(state.range(0));
- state.ResumeTiming();
- for (int j = 0; j < state.range(1); ++j)
- data.insert(RandomNumber());
- }
-}
-BENCHMARK(BM_SetInsert)
- ->Args({1<<10, 128})
- ->Args({2<<10, 128})
- ->Args({4<<10, 128})
- ->Args({8<<10, 128})
- ->Args({1<<10, 512})
- ->Args({2<<10, 512})
- ->Args({4<<10, 512})
- ->Args({8<<10, 512});
-```
-
-The preceding code is quite repetitive, and can be replaced with the following
-short-hand. The following macro will pick a few appropriate arguments in the
-product of the two specified ranges and will generate a benchmark for each such
-pair.
-
-```c++
-BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
-```
-
-Some benchmarks may require specific argument values that cannot be expressed
-with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a
-benchmark input for each combination in the product of the supplied vectors.
-
-```c++
-BENCHMARK(BM_SetInsert)
- ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}})
-// would generate the same benchmark arguments as
-BENCHMARK(BM_SetInsert)
- ->Args({1<<10, 20})
- ->Args({3<<10, 20})
- ->Args({8<<10, 20})
- ->Args({3<<10, 40})
- ->Args({8<<10, 40})
- ->Args({1<<10, 40})
- ->Args({1<<10, 60})
- ->Args({3<<10, 60})
- ->Args({8<<10, 60})
- ->Args({1<<10, 80})
- ->Args({3<<10, 80})
- ->Args({8<<10, 80});
-```
-
-For more complex patterns of inputs, passing a custom function to `Apply` allows
-programmatic specification of an arbitrary set of arguments on which to run the
-benchmark. The following example enumerates a dense range on one parameter,
-and a sparse range on the second.
-
-```c++
-static void CustomArguments(benchmark::internal::Benchmark* b) {
- for (int i = 0; i <= 10; ++i)
- for (int j = 32; j <= 1024*1024; j *= 8)
- b->Args({i, j});
-}
-BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
-```
-
-#### Passing Arbitrary Arguments to a Benchmark
-
-In C++11 it is possible to define a benchmark that takes an arbitrary number
-of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
-macro creates a benchmark that invokes `func` with the `benchmark::State` as
-the first argument followed by the specified `args...`.
-The `test_case_name` is appended to the name of the benchmark and
-should describe the values passed.
-
-```c++
-template <class ...ExtraArgs>
-void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
- [...]
-}
-// Registers a benchmark named "BM_takes_args/int_string_test" that passes
-// the specified values to `extra_args`.
-BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
-```
-
-Note that elements of `...args` may refer to global variables. Users should
-avoid modifying global state inside of a benchmark.
-
-<a name="asymptotic-complexity" />
-
-### Calculating Asymptotic Complexity (Big O)
-
-Asymptotic complexity might be calculated for a family of benchmarks. The
-following code will calculate the coefficient for the high-order term in the
-running time and the normalized root-mean square error of string comparison.
-
-```c++
-static void BM_StringCompare(benchmark::State& state) {
- std::string s1(state.range(0), '-');
- std::string s2(state.range(0), '-');
- for (auto _ : state) {
- benchmark::DoNotOptimize(s1.compare(s2));
- }
- state.SetComplexityN(state.range(0));
-}
-BENCHMARK(BM_StringCompare)
- ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN);
-```
-
-As shown in the following invocation, asymptotic complexity might also be
-calculated automatically.
-
-```c++
-BENCHMARK(BM_StringCompare)
- ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity();
-```
-
-The following code will specify asymptotic complexity with a lambda function,
-that might be used to customize high-order term calculation.
-
-```c++
-BENCHMARK(BM_StringCompare)->RangeMultiplier(2)
- ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; });
-```
-
-<a name="templated-benchmarks" />
-
-### Templated Benchmarks
-
-This example produces and consumes messages of size `sizeof(v)` `range_x`
-times. It also outputs throughput in the absence of multiprogramming.
-
-```c++
-template <class Q> void BM_Sequential(benchmark::State& state) {
- Q q;
- typename Q::value_type v;
- for (auto _ : state) {
- for (int i = state.range(0); i--; )
- q.push(v);
- for (int e = state.range(0); e--; )
- q.Wait(&v);
- }
- // actually messages, not bytes:
- state.SetBytesProcessed(
- static_cast<int64_t>(state.iterations())*state.range(0));
-}
-BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
-```
-
-Three macros are provided for adding benchmark templates.
-
-```c++
-#ifdef BENCHMARK_HAS_CXX11
-#define BENCHMARK_TEMPLATE(func, ...) // Takes any number of parameters.
-#else // C++ < C++11
-#define BENCHMARK_TEMPLATE(func, arg1)
-#endif
-#define BENCHMARK_TEMPLATE1(func, arg1)
-#define BENCHMARK_TEMPLATE2(func, arg1, arg2)
-```
-
-<a name="fixtures" />
-
-### Fixtures
-
-Fixture tests are created by first defining a type that derives from
-`::benchmark::Fixture` and then creating/registering the tests using the
-following macros:
-
-* `BENCHMARK_F(ClassName, Method)`
-* `BENCHMARK_DEFINE_F(ClassName, Method)`
-* `BENCHMARK_REGISTER_F(ClassName, Method)`
-
-For Example:
-
-```c++
-class MyFixture : public benchmark::Fixture {
-public:
- void SetUp(const ::benchmark::State& state) {
- }
-
- void TearDown(const ::benchmark::State& state) {
- }
-};
-
-BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) {
- for (auto _ : st) {
- ...
- }
-}
-
-BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) {
- for (auto _ : st) {
- ...
- }
-}
-/* BarTest is NOT registered */
-BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2);
-/* BarTest is now registered */
-```
-
-#### Templated Fixtures
-
-Also you can create templated fixture by using the following macros:
-
-* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)`
-* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)`
-
-For example:
-
-```c++
-template<typename T>
-class MyFixture : public benchmark::Fixture {};
-
-BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) {
- for (auto _ : st) {
- ...
- }
-}
-
-BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) {
- for (auto _ : st) {
- ...
- }
-}
-
-BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2);
-```
-
-<a name="custom-counters" />
-
-### Custom Counters
-
-You can add your own counters with user-defined names. The example below
-will add columns "Foo", "Bar" and "Baz" in its output:
-
-```c++
-static void UserCountersExample1(benchmark::State& state) {
- double numFoos = 0, numBars = 0, numBazs = 0;
- for (auto _ : state) {
- // ... count Foo,Bar,Baz events
- }
- state.counters["Foo"] = numFoos;
- state.counters["Bar"] = numBars;
- state.counters["Baz"] = numBazs;
-}
-```
-
-The `state.counters` object is a `std::map` with `std::string` keys
-and `Counter` values. The latter is a `double`-like class, via an implicit
-conversion to `double&`. Thus you can use all of the standard arithmetic
-assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter.
-
-In multithreaded benchmarks, each counter is set on the calling thread only.
-When the benchmark finishes, the counters from each thread will be summed;
-the resulting sum is the value which will be shown for the benchmark.
-
-The `Counter` constructor accepts three parameters: the value as a `double`
-; a bit flag which allows you to show counters as rates, and/or as per-thread
-iteration, and/or as per-thread averages, and/or iteration invariants,
-and/or finally inverting the result; and a flag specifying the 'unit' - i.e.
-is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024
-(`benchmark::Counter::OneK::kIs1024`)?
-
-```c++
- // sets a simple counter
- state.counters["Foo"] = numFoos;
-
- // Set the counter as a rate. It will be presented divided
- // by the duration of the benchmark.
- // Meaning: per one second, how many 'foo's are processed?
- state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate);
-
- // Set the counter as a rate. It will be presented divided
- // by the duration of the benchmark, and the result inverted.
- // Meaning: how many seconds it takes to process one 'foo'?
- state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
-
- // Set the counter as a thread-average quantity. It will
- // be presented divided by the number of threads.
- state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads);
-
- // There's also a combined flag:
- state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate);
-
- // This says that we process with the rate of state.range(0) bytes every iteration:
- state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024);
-```
-
-When you're compiling in C++11 mode or later you can use `insert()` with
-`std::initializer_list`:
-
-```c++
- // With C++11, this can be done:
- state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}});
- // ... instead of:
- state.counters["Foo"] = numFoos;
- state.counters["Bar"] = numBars;
- state.counters["Baz"] = numBazs;
-```
-
-#### Counter Reporting
-
-When using the console reporter, by default, user counters are printed at
-the end after the table, the same way as ``bytes_processed`` and
-``items_processed``. This is best for cases in which there are few counters,
-or where there are only a couple of lines per benchmark. Here's an example of
-the default output:
-
-```
-------------------------------------------------------------------------------
-Benchmark Time CPU Iterations UserCounters...
-------------------------------------------------------------------------------
-BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8
-BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m
-BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2
-BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4
-BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8
-BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16
-BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32
-BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4
-BM_Factorial 26 ns 26 ns 26608979 40320
-BM_Factorial/real_time 26 ns 26 ns 26587936 40320
-BM_CalculatePiRange/1 16 ns 16 ns 45704255 0
-BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374
-BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746
-BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355
-```
-
-If this doesn't suit you, you can print each counter as a table column by
-passing the flag `--benchmark_counters_tabular=true` to the benchmark
-application. This is best for cases in which there are a lot of counters, or
-a lot of lines per individual benchmark. Note that this will trigger a
-reprinting of the table header any time the counter set changes between
-individual benchmarks. Here's an example of corresponding output when
-`--benchmark_counters_tabular=true` is passed:
-
-```
----------------------------------------------------------------------------------------
-Benchmark Time CPU Iterations Bar Bat Baz Foo
----------------------------------------------------------------------------------------
-BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8
-BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1
-BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2
-BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4
-BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8
-BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16
-BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32
-BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4
---------------------------------------------------------------
-Benchmark Time CPU Iterations
---------------------------------------------------------------
-BM_Factorial 26 ns 26 ns 26392245 40320
-BM_Factorial/real_time 26 ns 26 ns 26494107 40320
-BM_CalculatePiRange/1 15 ns 15 ns 45571597 0
-BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374
-BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746
-BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355
-BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184
-BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162
-BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416
-BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159
-BM_CalculatePi/threads:8 2255 ns 9943 ns 70936
-```
-
-Note above the additional header printed when the benchmark changes from
-``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does
-not have the same counter set as ``BM_UserCounter``.
-
-<a name="multithreaded-benchmarks"/>
-
-### Multithreaded Benchmarks
-
-In a multithreaded test (benchmark invoked by multiple threads simultaneously),
-it is guaranteed that none of the threads will start until all have reached
-the start of the benchmark loop, and all will have finished before any thread
-exits the benchmark loop. (This behavior is also provided by the `KeepRunning()`
-API) As such, any global setup or teardown can be wrapped in a check against the thread
-index:
-
-```c++
-static void BM_MultiThreaded(benchmark::State& state) {
- if (state.thread_index == 0) {
- // Setup code here.
- }
- for (auto _ : state) {
- // Run the test as normal.
- }
- if (state.thread_index == 0) {
- // Teardown code here.
- }
-}
-BENCHMARK(BM_MultiThreaded)->Threads(2);
-```
-
-If the benchmarked code itself uses threads and you want to compare it to
-single-threaded code, you may want to use real-time ("wallclock") measurements
-for latency comparisons:
-
-```c++
-BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
-```
-
-Without `UseRealTime`, CPU time is used by default.
-
-<a name="cpu-timers" />
-
-### CPU Timers
-
-By default, the CPU timer only measures the time spent by the main thread.
-If the benchmark itself uses threads internally, this measurement may not
-be what you are looking for. Instead, there is a way to measure the total
-CPU usage of the process, by all the threads.
-
-```c++
-void callee(int i);
-
-static void MyMain(int size) {
-#pragma omp parallel for
- for(int i = 0; i < size; i++)
- callee(i);
-}
-
-static void BM_OpenMP(benchmark::State& state) {
- for (auto _ : state)
- MyMain(state.range(0));
-}
-
-// Measure the time spent by the main thread, use it to decide for how long to
-// run the benchmark loop. Depending on the internal implementation detail may
-// measure to anywhere from near-zero (the overhead spent before/after work
-// handoff to worker thread[s]) to the whole single-thread time.
-BENCHMARK(BM_OpenMP)->Range(8, 8<<10);
-
-// Measure the user-visible time, the wall clock (literally, the time that
-// has passed on the clock on the wall), use it to decide for how long to
-// run the benchmark loop. This will always be meaningful, an will match the
-// time spent by the main thread in single-threaded case, in general decreasing
-// with the number of internal threads doing the work.
-BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime();
-
-// Measure the total CPU consumption, use it to decide for how long to
-// run the benchmark loop. This will always measure to no less than the
-// time spent by the main thread in single-threaded case.
-BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime();
-
-// A mixture of the last two. Measure the total CPU consumption, but use the
-// wall clock to decide for how long to run the benchmark loop.
-BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime();
-```
-
-#### Controlling Timers
-
-Normally, the entire duration of the work loop (`for (auto _ : state) {}`)
-is measured. But sometimes, it is necessary to do some work inside of
-that loop, every iteration, but without counting that time to the benchmark time.
-That is possible, although it is not recommended, since it has high overhead.
-
-```c++
-static void BM_SetInsert_With_Timer_Control(benchmark::State& state) {
- std::set<int> data;
- for (auto _ : state) {
- state.PauseTiming(); // Stop timers. They will not count until they are resumed.
- data = ConstructRandomSet(state.range(0)); // Do something that should not be measured
- state.ResumeTiming(); // And resume timers. They are now counting again.
- // The rest will be measured.
- for (int j = 0; j < state.range(1); ++j)
- data.insert(RandomNumber());
- }
-}
-BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}});
-```
-
-<a name="manual-timing" />
-
-### Manual Timing
-
-For benchmarking something for which neither CPU time nor real-time are
-correct or accurate enough, completely manual timing is supported using
-the `UseManualTime` function.
-
-When `UseManualTime` is used, the benchmarked code must call
-`SetIterationTime` once per iteration of the benchmark loop to
-report the manually measured time.
-
-An example use case for this is benchmarking GPU execution (e.g. OpenCL
-or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
-be accurately measured using CPU time or real-time. Instead, they can be
-measured accurately using a dedicated API, and these measurement results
-can be reported back with `SetIterationTime`.
-
-```c++
-static void BM_ManualTiming(benchmark::State& state) {
- int microseconds = state.range(0);
- std::chrono::duration<double, std::micro> sleep_duration {
- static_cast<double>(microseconds)
- };
-
- for (auto _ : state) {
- auto start = std::chrono::high_resolution_clock::now();
- // Simulate some useful workload with a sleep
- std::this_thread::sleep_for(sleep_duration);
- auto end = std::chrono::high_resolution_clock::now();
-
- auto elapsed_seconds =
- std::chrono::duration_cast<std::chrono::duration<double>>(
- end - start);
-
- state.SetIterationTime(elapsed_seconds.count());
- }
-}
-BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
-```
-
-<a name="setting-the-time-unit" />
-
-### Setting the Time Unit
-
-If a benchmark runs a few milliseconds it may be hard to visually compare the
-measured times, since the output data is given in nanoseconds per default. In
-order to manually set the time unit, you can specify it manually:
-
-```c++
-BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
-```
-
-<a name="preventing-optimization" />
-
-### Preventing Optimization
-
-To prevent a value or expression from being optimized away by the compiler
-the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()`
-functions can be used.
-
-```c++
-static void BM_test(benchmark::State& state) {
- for (auto _ : state) {
- int x = 0;
- for (int i=0; i < 64; ++i) {
- benchmark::DoNotOptimize(x += i);
- }
- }
-}
-```
-
-`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either
-memory or a register. For GNU based compilers it acts as read/write barrier
-for global memory. More specifically it forces the compiler to flush pending
-writes to memory and reload any other values as necessary.
-
-Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>`
-in any way. `<expr>` may even be removed entirely when the result is already
-known. For example:
-
-```c++
- /* Example 1: `<expr>` is removed entirely. */
- int foo(int x) { return x + 42; }
- while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42);
-
- /* Example 2: Result of '<expr>' is only reused */
- int bar(int) __attribute__((const));
- while (...) DoNotOptimize(bar(0)); // Optimized to:
- // int __result__ = bar(0);
- // while (...) DoNotOptimize(__result__);
-```
-
-The second tool for preventing optimizations is `ClobberMemory()`. In essence
-`ClobberMemory()` forces the compiler to perform all pending writes to global
-memory. Memory managed by block scope objects must be "escaped" using
-`DoNotOptimize(...)` before it can be clobbered. In the below example
-`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized
-away.
-
-```c++
-static void BM_vector_push_back(benchmark::State& state) {
- for (auto _ : state) {
- std::vector<int> v;
- v.reserve(1);
- benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
- v.push_back(42);
- benchmark::ClobberMemory(); // Force 42 to be written to memory.
- }
-}
-```
-
-Note that `ClobberMemory()` is only available for GNU or MSVC based compilers.
-
-<a name="reporting-statistics" />
-
-### Statistics: Reporting the Mean, Median and Standard Deviation of Repeated Benchmarks
-
-By default each benchmark is run once and that single result is reported.
-However benchmarks are often noisy and a single result may not be representative
-of the overall behavior. For this reason it's possible to repeatedly rerun the
-benchmark.
-
-The number of runs of each benchmark is specified globally by the
-`--benchmark_repetitions` flag or on a per benchmark basis by calling
-`Repetitions` on the registered benchmark object. When a benchmark is run more
-than once the mean, median and standard deviation of the runs will be reported.
-
-Additionally the `--benchmark_report_aggregates_only={true|false}`,
-`--benchmark_display_aggregates_only={true|false}` flags or
-`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be
-used to change how repeated tests are reported. By default the result of each
-repeated run is reported. When `report aggregates only` option is `true`,
-only the aggregates (i.e. mean, median and standard deviation, maybe complexity
-measurements if they were requested) of the runs is reported, to both the
-reporters - standard output (console), and the file.
-However when only the `display aggregates only` option is `true`,
-only the aggregates are displayed in the standard output, while the file
-output still contains everything.
-Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a
-registered benchmark object overrides the value of the appropriate flag for that
-benchmark.
-
-<a name="custom-statistics" />
-
-### Custom Statistics
-
-While having mean, median and standard deviation is nice, this may not be
-enough for everyone. For example you may want to know what the largest
-observation is, e.g. because you have some real-time constraints. This is easy.
-The following code will specify a custom statistic to be calculated, defined
-by a lambda function.
-
-```c++
-void BM_spin_empty(benchmark::State& state) {
- for (auto _ : state) {
- for (int x = 0; x < state.range(0); ++x) {
- benchmark::DoNotOptimize(x);
- }
- }
-}
-
-BENCHMARK(BM_spin_empty)
- ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
- return *(std::max_element(std::begin(v), std::end(v)));
- })
- ->Arg(512);
-```
-
-<a name="using-register-benchmark" />
-
-### Using RegisterBenchmark(name, fn, args...)
-
-The `RegisterBenchmark(name, func, args...)` function provides an alternative
-way to create and register benchmarks.
-`RegisterBenchmark(name, func, args...)` creates, registers, and returns a
-pointer to a new benchmark with the specified `name` that invokes
-`func(st, args...)` where `st` is a `benchmark::State` object.
-
-Unlike the `BENCHMARK` registration macros, which can only be used at the global
-scope, the `RegisterBenchmark` can be called anywhere. This allows for
-benchmark tests to be registered programmatically.
-
-Additionally `RegisterBenchmark` allows any callable object to be registered
-as a benchmark. Including capturing lambdas and function objects.
-
-For Example:
-```c++
-auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ };
-
-int main(int argc, char** argv) {
- for (auto& test_input : { /* ... */ })
- benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input);
- benchmark::Initialize(&argc, argv);
- benchmark::RunSpecifiedBenchmarks();
-}
-```
-
-<a name="exiting-with-an-error" />
-
-### Exiting with an Error
-
-When errors caused by external influences, such as file I/O and network
-communication, occur within a benchmark the
-`State::SkipWithError(const char* msg)` function can be used to skip that run
-of benchmark and report the error. Note that only future iterations of the
-`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop
-Users must explicitly exit the loop, otherwise all iterations will be performed.
-Users may explicitly return to exit the benchmark immediately.
-
-The `SkipWithError(...)` function may be used at any point within the benchmark,
-including before and after the benchmark loop. Moreover, if `SkipWithError(...)`
-has been used, it is not required to reach the benchmark loop and one may return
-from the benchmark function early.
-
-For example:
-
-```c++
-static void BM_test(benchmark::State& state) {
- auto resource = GetResource();
- if (!resource.good()) {
- state.SkipWithError("Resource is not good!");
- // KeepRunning() loop will not be entered.
- }
- while (state.KeepRunning()) {
- auto data = resource.read_data();
- if (!resource.good()) {
- state.SkipWithError("Failed to read data!");
- break; // Needed to skip the rest of the iteration.
- }
- do_stuff(data);
- }
-}
-
-static void BM_test_ranged_fo(benchmark::State & state) {
- auto resource = GetResource();
- if (!resource.good()) {
- state.SkipWithError("Resource is not good!");
- return; // Early return is allowed when SkipWithError() has been used.
- }
- for (auto _ : state) {
- auto data = resource.read_data();
- if (!resource.good()) {
- state.SkipWithError("Failed to read data!");
- break; // REQUIRED to prevent all further iterations.
- }
- do_stuff(data);
- }
-}
-```
-<a name="a-faster-keep-running-loop" />
-
-### A Faster KeepRunning Loop
-
-In C++11 mode, a ranged-based for loop should be used in preference to
-the `KeepRunning` loop for running the benchmarks. For example:
-
-```c++
-static void BM_Fast(benchmark::State &state) {
- for (auto _ : state) {
- FastOperation();
- }
-}
-BENCHMARK(BM_Fast);
-```
-
-The reason the ranged-for loop is faster than using `KeepRunning`, is
-because `KeepRunning` requires a memory load and store of the iteration count
-ever iteration, whereas the ranged-for variant is able to keep the iteration count
-in a register.
-
-For example, an empty inner loop of using the ranged-based for method looks like:
-
-```asm
-# Loop Init
- mov rbx, qword ptr [r14 + 104]
- call benchmark::State::StartKeepRunning()
- test rbx, rbx
- je .LoopEnd
-.LoopHeader: # =>This Inner Loop Header: Depth=1
- add rbx, -1
- jne .LoopHeader
-.LoopEnd:
-```
-
-Compared to an empty `KeepRunning` loop, which looks like:
-
-```asm
-.LoopHeader: # in Loop: Header=BB0_3 Depth=1
- cmp byte ptr [rbx], 1
- jne .LoopInit
-.LoopBody: # =>This Inner Loop Header: Depth=1
- mov rax, qword ptr [rbx + 8]
- lea rcx, [rax + 1]
- mov qword ptr [rbx + 8], rcx
- cmp rax, qword ptr [rbx + 104]
- jb .LoopHeader
- jmp .LoopEnd
-.LoopInit:
- mov rdi, rbx
- call benchmark::State::StartKeepRunning()
- jmp .LoopBody
-.LoopEnd:
-```
-
-Unless C++03 compatibility is required, the ranged-for variant of writing
-the benchmark loop should be preferred.
-
-<a name="disabling-cpu-frequency-scaling" />
-
-### Disabling CPU Frequency Scaling
-
-If you see this error:
-
-```
-***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
-```
-
-you might want to disable the CPU frequency scaling while running the benchmark:
-
-```bash
-sudo cpupower frequency-set --governor performance
-./mybench
-sudo cpupower frequency-set --governor powersave
-```
diff --git a/WORKSPACE b/WORKSPACE
index 631f3ba..833590f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,51 +1,22 @@
workspace(name = "com_github_google_benchmark")
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
-http_archive(
- name = "rules_cc",
- strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912",
- urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"],
- sha256 = "d7dc12c1d5bc1a87474de8e3d17b7731a4dcebcfb8aa3990fe8ac7734ef12f2f",
-)
+benchmark_deps()
-http_archive(
- name = "com_google_absl",
- sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
- strip_prefix = "abseil-cpp-20200225.2",
- urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
-)
+load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies")
-http_archive(
- name = "com_google_googletest",
- strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
- urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
- sha256 = "8f827dd550db8b4fdf73904690df0be9fccc161017c9038a724bc9a0617a1bc8",
-)
+rules_foreign_cc_dependencies()
-http_archive(
- name = "pybind11",
- build_file = "@//bindings/python:pybind11.BUILD",
- sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d",
- strip_prefix = "pybind11-2.4.3",
- urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"],
+load("@rules_python//python:pip.bzl", pip3_install="pip_install")
+
+pip3_install(
+ name = "tools_pip_deps",
+ requirements = "//tools:requirements.txt",
)
new_local_repository(
name = "python_headers",
build_file = "@//bindings/python:python_headers.BUILD",
- path = "/usr/include/python3.6", # May be overwritten by setup.py.
-)
-
-http_archive(
- name = "rules_python",
- url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
- sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
-)
-
-load("@rules_python//python:pip.bzl", pip3_install="pip_install")
-
-pip3_install(
- name = "py_deps",
- requirements = "//:requirements.txt",
+ path = "<PYTHON_INCLUDE_PATH>", # May be overwritten by setup.py.
)
diff --git a/WORKSPACE.bzlmod b/WORKSPACE.bzlmod
new file mode 100644
index 0000000..9526376
--- /dev/null
+++ b/WORKSPACE.bzlmod
@@ -0,0 +1,2 @@
+# This file marks the root of the Bazel workspace.
+# See MODULE.bazel for dependencies and setup.
diff --git a/_config.yml b/_config.yml
index 1885487..1fa5ff8 100644
--- a/_config.yml
+++ b/_config.yml
@@ -1 +1,2 @@
-theme: jekyll-theme-midnight \ No newline at end of file
+theme: jekyll-theme-midnight
+markdown: GFM
diff --git a/bazel/benchmark_deps.bzl b/bazel/benchmark_deps.bzl
new file mode 100644
index 0000000..667065f
--- /dev/null
+++ b/bazel/benchmark_deps.bzl
@@ -0,0 +1,65 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+
+def benchmark_deps():
+ """Loads dependencies required to build Google Benchmark."""
+
+ if "bazel_skylib" not in native.existing_rules():
+ http_archive(
+ name = "bazel_skylib",
+ sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
+ "https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
+ ],
+ )
+
+ if "rules_foreign_cc" not in native.existing_rules():
+ http_archive(
+ name = "rules_foreign_cc",
+ sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83",
+ strip_prefix = "rules_foreign_cc-0.7.1",
+ url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz",
+ )
+
+ if "rules_python" not in native.existing_rules():
+ http_archive(
+ name = "rules_python",
+ url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
+ sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
+ )
+
+ if "com_google_absl" not in native.existing_rules():
+ http_archive(
+ name = "com_google_absl",
+ sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
+ strip_prefix = "abseil-cpp-20200225.2",
+ urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
+ )
+
+ if "com_google_googletest" not in native.existing_rules():
+ new_git_repository(
+ name = "com_google_googletest",
+ remote = "https://github.com/google/googletest.git",
+ tag = "release-1.11.0",
+ )
+
+ if "nanobind" not in native.existing_rules():
+ new_git_repository(
+ name = "nanobind",
+ remote = "https://github.com/wjakob/nanobind.git",
+ tag = "v1.4.0",
+ build_file = "@//bindings/python:nanobind.BUILD",
+ recursive_init_submodules = True,
+ )
+
+ if "libpfm" not in native.existing_rules():
+ # Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
+ http_archive(
+ name = "libpfm",
+ build_file = str(Label("//tools:libpfm.BUILD.bazel")),
+ sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
+ type = "tar.gz",
+ strip_prefix = "libpfm-4.11.0",
+ urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
+ )
diff --git a/bindings/python/build_defs.bzl b/bindings/python/build_defs.bzl
index 45907aa..009820a 100644
--- a/bindings/python/build_defs.bzl
+++ b/bindings/python/build_defs.bzl
@@ -8,8 +8,8 @@ def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []):
shared_lib_name = name + shared_lib_suffix
native.cc_binary(
name = shared_lib_name,
- linkshared = 1,
- linkstatic = 1,
+ linkshared = True,
+ linkstatic = True,
srcs = srcs + hdrs,
copts = copts,
features = features,
diff --git a/bindings/python/google_benchmark/BUILD b/bindings/python/google_benchmark/BUILD
index 3c1561f..89ec76e 100644
--- a/bindings/python/google_benchmark/BUILD
+++ b/bindings/python/google_benchmark/BUILD
@@ -6,7 +6,6 @@ py_library(
visibility = ["//visibility:public"],
deps = [
":_benchmark",
- # pip; absl:app
],
)
@@ -17,10 +16,13 @@ py_extension(
"-fexceptions",
"-fno-strict-aliasing",
],
- features = ["-use_header_modules"],
+ features = [
+ "-use_header_modules",
+ "-parse_headers",
+ ],
deps = [
"//:benchmark",
- "@pybind11",
+ "@nanobind",
"@python_headers",
],
)
diff --git a/bindings/python/google_benchmark/__init__.py b/bindings/python/google_benchmark/__init__.py
index f31285e..642d78a 100644
--- a/bindings/python/google_benchmark/__init__.py
+++ b/bindings/python/google_benchmark/__init__.py
@@ -26,6 +26,7 @@ Example usage:
if __name__ == '__main__':
benchmark.main()
"""
+import atexit
from absl import app
from google_benchmark import _benchmark
@@ -44,6 +45,7 @@ from google_benchmark._benchmark import (
oNLogN,
oAuto,
oLambda,
+ State,
)
@@ -64,9 +66,10 @@ __all__ = [
"oNLogN",
"oAuto",
"oLambda",
+ "State",
]
-__version__ = "0.2.0"
+__version__ = "1.8.3"
class __OptionMaker:
@@ -101,7 +104,7 @@ class __OptionMaker:
options = self.make(func_or_options)
options.builder_calls.append((builder_name, args, kwargs))
# The decorator returns Options so it is not technically a decorator
- # and needs a final call to @regiser
+ # and needs a final call to @register
return options
return __decorator
@@ -110,7 +113,7 @@ class __OptionMaker:
# Alias for nicer API.
-# We have to instanciate an object, even if stateless, to be able to use __getattr__
+# We have to instantiate an object, even if stateless, to be able to use __getattr__
# on option.range
option = __OptionMaker()
@@ -156,3 +159,4 @@ def main(argv=None):
# Methods for use with custom main function.
initialize = _benchmark.Initialize
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
+atexit.register(_benchmark.ClearRegisteredBenchmarks)
diff --git a/bindings/python/google_benchmark/benchmark.cc b/bindings/python/google_benchmark/benchmark.cc
index d80816e..f444769 100644
--- a/bindings/python/google_benchmark/benchmark.cc
+++ b/bindings/python/google_benchmark/benchmark.cc
@@ -1,20 +1,17 @@
// Benchmark for Python.
-#include <map>
-#include <string>
-#include <vector>
-
-#include "pybind11/operators.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl.h"
-#include "pybind11/stl_bind.h"
-
#include "benchmark/benchmark.h"
-PYBIND11_MAKE_OPAQUE(benchmark::UserCounters);
+#include "nanobind/nanobind.h"
+#include "nanobind/operators.h"
+#include "nanobind/stl/bind_map.h"
+#include "nanobind/stl/string.h"
+#include "nanobind/stl/vector.h"
+
+NB_MAKE_OPAQUE(benchmark::UserCounters);
namespace {
-namespace py = ::pybind11;
+namespace nb = nanobind;
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
// The `argv` pointers here become invalid when this function returns, but
@@ -37,15 +34,16 @@ std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
return remaining_argv;
}
-benchmark::internal::Benchmark* RegisterBenchmark(const char* name,
- py::function f) {
+benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
+ nb::callable f) {
return benchmark::RegisterBenchmark(
name, [f](benchmark::State& state) { f(&state); });
}
-PYBIND11_MODULE(_benchmark, m) {
+NB_MODULE(_benchmark, m) {
+
using benchmark::TimeUnit;
- py::enum_<TimeUnit>(m, "TimeUnit")
+ nb::enum_<TimeUnit>(m, "TimeUnit")
.value("kNanosecond", TimeUnit::kNanosecond)
.value("kMicrosecond", TimeUnit::kMicrosecond)
.value("kMillisecond", TimeUnit::kMillisecond)
@@ -53,72 +51,74 @@ PYBIND11_MODULE(_benchmark, m) {
.export_values();
using benchmark::BigO;
- py::enum_<BigO>(m, "BigO")
+ nb::enum_<BigO>(m, "BigO")
.value("oNone", BigO::oNone)
.value("o1", BigO::o1)
.value("oN", BigO::oN)
.value("oNSquared", BigO::oNSquared)
.value("oNCubed", BigO::oNCubed)
.value("oLogN", BigO::oLogN)
- .value("oNLogN", BigO::oLogN)
+ .value("oNLogN", BigO::oNLogN)
.value("oAuto", BigO::oAuto)
.value("oLambda", BigO::oLambda)
.export_values();
using benchmark::internal::Benchmark;
- py::class_<Benchmark>(m, "Benchmark")
- // For methods returning a pointer tor the current object, reference
- // return policy is used to ask pybind not to take ownership oof the
+ nb::class_<Benchmark>(m, "Benchmark")
+ // For methods returning a pointer to the current object, reference
+ // return policy is used to ask nanobind not to take ownership of the
// returned object and avoid calling delete on it.
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
//
// For methods taking a const std::vector<...>&, a copy is created
// because a it is bound to a Python list.
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
- .def("unit", &Benchmark::Unit, py::return_value_policy::reference)
- .def("arg", &Benchmark::Arg, py::return_value_policy::reference)
- .def("args", &Benchmark::Args, py::return_value_policy::reference)
- .def("range", &Benchmark::Range, py::return_value_policy::reference,
- py::arg("start"), py::arg("limit"))
+ .def("unit", &Benchmark::Unit, nb::rv_policy::reference)
+ .def("arg", &Benchmark::Arg, nb::rv_policy::reference)
+ .def("args", &Benchmark::Args, nb::rv_policy::reference)
+ .def("range", &Benchmark::Range, nb::rv_policy::reference,
+ nb::arg("start"), nb::arg("limit"))
.def("dense_range", &Benchmark::DenseRange,
- py::return_value_policy::reference, py::arg("start"),
- py::arg("limit"), py::arg("step") = 1)
- .def("ranges", &Benchmark::Ranges, py::return_value_policy::reference)
+ nb::rv_policy::reference, nb::arg("start"),
+ nb::arg("limit"), nb::arg("step") = 1)
+ .def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
.def("args_product", &Benchmark::ArgsProduct,
- py::return_value_policy::reference)
- .def("arg_name", &Benchmark::ArgName, py::return_value_policy::reference)
+ nb::rv_policy::reference)
+ .def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
.def("arg_names", &Benchmark::ArgNames,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("range_pair", &Benchmark::RangePair,
- py::return_value_policy::reference, py::arg("lo1"), py::arg("hi1"),
- py::arg("lo2"), py::arg("hi2"))
+ nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
+ nb::arg("lo2"), nb::arg("hi2"))
.def("range_multiplier", &Benchmark::RangeMultiplier,
- py::return_value_policy::reference)
- .def("min_time", &Benchmark::MinTime, py::return_value_policy::reference)
+ nb::rv_policy::reference)
+ .def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
+ .def("min_warmup_time", &Benchmark::MinWarmUpTime,
+ nb::rv_policy::reference)
.def("iterations", &Benchmark::Iterations,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("repetitions", &Benchmark::Repetitions,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
- py::return_value_policy::reference, py::arg("value") = true)
+ nb::rv_policy::reference, nb::arg("value") = true)
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
- py::return_value_policy::reference, py::arg("value") = true)
+ nb::rv_policy::reference, nb::arg("value") = true)
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("use_real_time", &Benchmark::UseRealTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def("use_manual_time", &Benchmark::UseManualTime,
- py::return_value_policy::reference)
+ nb::rv_policy::reference)
.def(
"complexity",
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
- py::return_value_policy::reference,
- py::arg("complexity") = benchmark::oAuto);
+ nb::rv_policy::reference,
+ nb::arg("complexity") = benchmark::oAuto);
using benchmark::Counter;
- py::class_<Counter> py_counter(m, "Counter");
+ nb::class_<Counter> py_counter(m, "Counter");
- py::enum_<Counter::Flags>(py_counter, "Flags")
+ nb::enum_<Counter::Flags>(py_counter, "Flags")
.value("kDefaults", Counter::Flags::kDefaults)
.value("kIsRate", Counter::Flags::kIsRate)
.value("kAvgThreads", Counter::Flags::kAvgThreads)
@@ -130,52 +130,55 @@ PYBIND11_MODULE(_benchmark, m) {
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
.value("kInvert", Counter::Flags::kInvert)
.export_values()
- .def(py::self | py::self);
+ .def(nb::self | nb::self);
- py::enum_<Counter::OneK>(py_counter, "OneK")
+ nb::enum_<Counter::OneK>(py_counter, "OneK")
.value("kIs1000", Counter::OneK::kIs1000)
.value("kIs1024", Counter::OneK::kIs1024)
.export_values();
py_counter
- .def(py::init<double, Counter::Flags, Counter::OneK>(),
- py::arg("value") = 0., py::arg("flags") = Counter::kDefaults,
- py::arg("k") = Counter::kIs1000)
- .def(py::init([](double value) { return Counter(value); }))
- .def_readwrite("value", &Counter::value)
- .def_readwrite("flags", &Counter::flags)
- .def_readwrite("oneK", &Counter::oneK);
- py::implicitly_convertible<py::float_, Counter>();
- py::implicitly_convertible<py::int_, Counter>();
-
- py::bind_map<benchmark::UserCounters>(m, "UserCounters");
+ .def(nb::init<double, Counter::Flags, Counter::OneK>(),
+ nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
+ nb::arg("k") = Counter::kIs1000)
+ .def("__init__", ([](Counter *c, double value) { new (c) Counter(value); }))
+ .def_rw("value", &Counter::value)
+ .def_rw("flags", &Counter::flags)
+ .def_rw("oneK", &Counter::oneK)
+ .def(nb::init_implicit<double>());
+
+ nb::implicitly_convertible<nb::int_, Counter>();
+
+ nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
using benchmark::State;
- py::class_<State>(m, "State")
+ nb::class_<State>(m, "State")
.def("__bool__", &State::KeepRunning)
- .def_property_readonly("keep_running", &State::KeepRunning)
+ .def_prop_ro("keep_running", &State::KeepRunning)
.def("pause_timing", &State::PauseTiming)
.def("resume_timing", &State::ResumeTiming)
.def("skip_with_error", &State::SkipWithError)
- .def_property_readonly("error_occured", &State::error_occurred)
+ .def_prop_ro("error_occurred", &State::error_occurred)
.def("set_iteration_time", &State::SetIterationTime)
- .def_property("bytes_processed", &State::bytes_processed,
+ .def_prop_rw("bytes_processed", &State::bytes_processed,
&State::SetBytesProcessed)
- .def_property("complexity_n", &State::complexity_length_n,
+ .def_prop_rw("complexity_n", &State::complexity_length_n,
&State::SetComplexityN)
- .def_property("items_processed", &State::items_processed,
- &State::SetItemsProcessed)
- .def("set_label", (void (State::*)(const char*)) & State::SetLabel)
- .def("range", &State::range, py::arg("pos") = 0)
- .def_property_readonly("iterations", &State::iterations)
- .def_readwrite("counters", &State::counters)
- .def_readonly("thread_index", &State::thread_index)
- .def_readonly("threads", &State::threads);
+ .def_prop_rw("items_processed", &State::items_processed,
+ &State::SetItemsProcessed)
+ .def("set_label", &State::SetLabel)
+ .def("range", &State::range, nb::arg("pos") = 0)
+ .def_prop_ro("iterations", &State::iterations)
+ .def_prop_ro("name", &State::name)
+ .def_rw("counters", &State::counters)
+ .def_prop_ro("thread_index", &State::thread_index)
+ .def_prop_ro("threads", &State::threads);
m.def("Initialize", Initialize);
m.def("RegisterBenchmark", RegisterBenchmark,
- py::return_value_policy::reference);
+ nb::rv_policy::reference);
m.def("RunSpecifiedBenchmarks",
[]() { benchmark::RunSpecifiedBenchmarks(); });
+ m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
};
} // namespace
diff --git a/bindings/python/google_benchmark/example.py b/bindings/python/google_benchmark/example.py
index 9134e8c..d95a043 100644
--- a/bindings/python/google_benchmark/example.py
+++ b/bindings/python/google_benchmark/example.py
@@ -72,7 +72,7 @@ def manual_timing(state):
@benchmark.register
def custom_counters(state):
- """Collect cutom metric using benchmark.Counter."""
+ """Collect custom metric using benchmark.Counter."""
num_foo = 0.0
while state:
# Benchmark some code here
@@ -102,7 +102,7 @@ def with_options(state):
@benchmark.register(name="sum_million_microseconds")
@benchmark.option.unit(benchmark.kMicrosecond)
-def with_options(state):
+def with_options2(state):
while state:
sum(range(1_000_000))
diff --git a/bindings/python/nanobind.BUILD b/bindings/python/nanobind.BUILD
new file mode 100644
index 0000000..cd9faf9
--- /dev/null
+++ b/bindings/python/nanobind.BUILD
@@ -0,0 +1,17 @@
+cc_library(
+ name = "nanobind",
+ srcs = glob([
+ "src/*.cpp"
+ ]),
+ copts = ["-fexceptions"],
+ includes = ["include", "ext/robin_map/include"],
+ textual_hdrs = glob(
+ [
+ "include/**/*.h",
+ "src/*.h",
+ "ext/robin_map/include/tsl/*.h",
+ ],
+ ),
+ deps = ["@python_headers"],
+ visibility = ["//visibility:public"],
+)
diff --git a/bindings/python/pybind11.BUILD b/bindings/python/pybind11.BUILD
deleted file mode 100644
index bc83350..0000000
--- a/bindings/python/pybind11.BUILD
+++ /dev/null
@@ -1,20 +0,0 @@
-cc_library(
- name = "pybind11",
- hdrs = glob(
- include = [
- "include/pybind11/*.h",
- "include/pybind11/detail/*.h",
- ],
- exclude = [
- "include/pybind11/common.h",
- "include/pybind11/eigen.h",
- ],
- ),
- copts = [
- "-fexceptions",
- "-Wno-undefined-inline",
- "-Wno-pragma-once-outside-header",
- ],
- includes = ["include"],
- visibility = ["//visibility:public"],
-)
diff --git a/bindings/python/requirements.txt b/bindings/python/requirements.txt
deleted file mode 100644
index f5bbe7e..0000000
--- a/bindings/python/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-absl-py>=0.7.1
-
diff --git a/cmake/AddCXXCompilerFlag.cmake b/cmake/AddCXXCompilerFlag.cmake
index d0d2099..858589e 100644
--- a/cmake/AddCXXCompilerFlag.cmake
+++ b/cmake/AddCXXCompilerFlag.cmake
@@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
- set(VARIANT ${ARGV1})
- if(ARGV1)
+ if(ARGC GREATER 1)
+ set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
+ else()
+ set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
endif()
@@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
- set(VARIANT ${ARGV1})
- if(ARGV1)
+ if(ARGC GREATER 1)
+ set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
+ else()
+ set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE)
diff --git a/cmake/CXXFeatureCheck.cmake b/cmake/CXXFeatureCheck.cmake
index 62e6741..e514826 100644
--- a/cmake/CXXFeatureCheck.cmake
+++ b/cmake/CXXFeatureCheck.cmake
@@ -17,6 +17,8 @@ if(__cxx_feature_check)
endif()
set(__cxx_feature_check INCLUDED)
+option(CXXFEATURECHECK_DEBUG OFF)
+
function(cxx_feature_check FILE)
string(TOLOWER ${FILE} FILE)
string(TOUPPER ${FILE} VAR)
@@ -27,18 +29,22 @@ function(cxx_feature_check FILE)
return()
endif()
+ set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
if (ARGC GREATER 1)
message(STATUS "Enabling additional flags: ${ARGV1}")
- list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1})
+ list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
endif()
if (NOT DEFINED COMPILE_${FEATURE})
- message(STATUS "Performing Test ${FEATURE}")
if(CMAKE_CROSSCOMPILING)
+ message(STATUS "Cross-compiling to test ${FEATURE}")
try_compile(COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
- CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
- LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED ON
+ CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
+ LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
+ OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
if(COMPILE_${FEATURE})
message(WARNING
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
@@ -47,11 +53,14 @@ function(cxx_feature_check FILE)
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
endif()
else()
- message(STATUS "Performing Test ${FEATURE}")
+ message(STATUS "Compiling and running to test ${FEATURE}")
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
- CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
- LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED ON
+ CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
+ LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
+ COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
endif()
endif()
@@ -61,7 +70,11 @@ function(cxx_feature_check FILE)
add_definitions(-DHAVE_${VAR})
else()
if(NOT COMPILE_${FEATURE})
- message(STATUS "Performing Test ${FEATURE} -- failed to compile")
+ if(CXXFEATURECHECK_DEBUG)
+ message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
+ else()
+ message(STATUS "Performing Test ${FEATURE} -- failed to compile")
+ endif()
else()
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
endif()
diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in
index 6e9256e..2e15f0c 100644
--- a/cmake/Config.cmake.in
+++ b/cmake/Config.cmake.in
@@ -1 +1,7 @@
+@PACKAGE_INIT@
+
+include (CMakeFindDependencyMacro)
+
+find_dependency (Threads)
+
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")
diff --git a/cmake/GetGitVersion.cmake b/cmake/GetGitVersion.cmake
index 4f10f22..04a1f9b 100644
--- a/cmake/GetGitVersion.cmake
+++ b/cmake/GetGitVersion.cmake
@@ -20,16 +20,20 @@ set(__get_git_version INCLUDED)
function(get_git_version var)
if(GIT_EXECUTABLE)
- execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
+ execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE status
- OUTPUT_VARIABLE GIT_VERSION
+ OUTPUT_VARIABLE GIT_DESCRIBE_VERSION
ERROR_QUIET)
- if(${status})
- set(GIT_VERSION "v0.0.0")
+ if(status)
+ set(GIT_DESCRIBE_VERSION "v0.0.0")
+ endif()
+
+ string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION)
+ if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-)
+ string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION})
else()
- string(STRIP ${GIT_VERSION} GIT_VERSION)
- string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
+ string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION})
endif()
# Work out if the repository is dirty
@@ -43,12 +47,12 @@ function(get_git_version var)
ERROR_QUIET)
string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
if (${GIT_DIRTY})
- set(GIT_VERSION "${GIT_VERSION}-dirty")
+ set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty")
endif()
+ message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}")
else()
- set(GIT_VERSION "v0.0.0")
+ set(GIT_VERSION "0.0.0")
endif()
- message(STATUS "git Version: ${GIT_VERSION}")
set(${var} ${GIT_VERSION} PARENT_SCOPE)
endfunction()
diff --git a/cmake/GoogleTest.cmake b/cmake/GoogleTest.cmake
index dd611fc..e66e9d1 100644
--- a/cmake/GoogleTest.cmake
+++ b/cmake/GoogleTest.cmake
@@ -35,7 +35,24 @@ add_subdirectory(${GOOGLETEST_SOURCE_DIR}
${GOOGLETEST_BINARY_DIR}
EXCLUDE_FROM_ALL)
-set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES>)
-set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES>)
-set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES>)
-set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES>)
+# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
+if (MSVC)
+ target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
+ target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
+else()
+ target_compile_options(gtest PRIVATE "-w")
+ target_compile_options(gtest_main PRIVATE "-w")
+ target_compile_options(gmock PRIVATE "-w")
+ target_compile_options(gmock_main PRIVATE "-w")
+endif()
+
+if(NOT DEFINED GTEST_COMPILE_COMMANDS)
+ set(GTEST_COMPILE_COMMANDS ON)
+endif()
+
+set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
+set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
+set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
+set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
diff --git a/cmake/GoogleTest.cmake.in b/cmake/GoogleTest.cmake.in
index fd957ff..ce653ac 100644
--- a/cmake/GoogleTest.cmake.in
+++ b/cmake/GoogleTest.cmake.in
@@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}"
)
else()
if(NOT ALLOW_DOWNLOADING_GOOGLETEST)
- message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
+ message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
+ return()
else()
message(WARNING "Did not find Google Test sources! Fetching from web...")
ExternalProject_Add(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
- GIT_TAG master
+ GIT_TAG "release-1.11.0"
PREFIX "${CMAKE_BINARY_DIR}"
STAMP_DIR "${CMAKE_BINARY_DIR}/stamp"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
diff --git a/cmake/Modules/FindPFM.cmake b/cmake/Modules/FindPFM.cmake
new file mode 100644
index 0000000..4c1ce93
--- /dev/null
+++ b/cmake/Modules/FindPFM.cmake
@@ -0,0 +1,28 @@
+# If successful, the following variables will be defined:
+# PFM_FOUND.
+# PFM_LIBRARIES
+# PFM_INCLUDE_DIRS
+# the following target will be defined:
+# PFM::libpfm
+
+include(FeatureSummary)
+include(FindPackageHandleStandardArgs)
+
+set_package_properties(PFM PROPERTIES
+ URL http://perfmon2.sourceforge.net/
+ DESCRIPTION "A helper library to develop monitoring tools"
+ PURPOSE "Used to program specific performance monitoring events")
+
+find_library(PFM_LIBRARY NAMES pfm)
+find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h)
+
+find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR)
+
+if (PFM_FOUND AND NOT TARGET PFM::libpfm)
+ add_library(PFM::libpfm UNKNOWN IMPORTED)
+ set_target_properties(PFM::libpfm PROPERTIES
+ IMPORTED_LOCATION "${PFM_LIBRARY}"
+ INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}")
+endif()
+
+mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR)
diff --git a/cmake/benchmark.pc.in b/cmake/benchmark.pc.in
index 34beb01..9dae881 100644
--- a/cmake/benchmark.pc.in
+++ b/cmake/benchmark.pc.in
@@ -1,7 +1,7 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
-libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
-includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
+libdir=@CMAKE_INSTALL_FULL_LIBDIR@
+includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework
diff --git a/cmake/pthread_affinity.cpp b/cmake/pthread_affinity.cpp
new file mode 100644
index 0000000..7b143bc
--- /dev/null
+++ b/cmake/pthread_affinity.cpp
@@ -0,0 +1,16 @@
+#include <pthread.h>
+int main() {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ for (int i = 0; i < CPU_SETSIZE; ++i) {
+ CPU_SET(i, &set);
+ CPU_CLR(i, &set);
+ }
+ pthread_t self = pthread_self();
+ int ret;
+ ret = pthread_getaffinity_np(self, sizeof(set), &set);
+ if (ret != 0) return ret;
+ ret = pthread_setaffinity_np(self, sizeof(set), &set);
+ if (ret != 0) return ret;
+ return 0;
+}
diff --git a/conan/CMakeLists.txt b/conan/CMakeLists.txt
deleted file mode 100644
index 15b92ca..0000000
--- a/conan/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-cmake_minimum_required(VERSION 2.8.11)
-project(cmake_wrapper)
-
-include(conanbuildinfo.cmake)
-conan_basic_setup()
-
-include(${CMAKE_SOURCE_DIR}/CMakeListsOriginal.txt)
diff --git a/conan/test_package/CMakeLists.txt b/conan/test_package/CMakeLists.txt
deleted file mode 100644
index 089a6c7..0000000
--- a/conan/test_package/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-cmake_minimum_required(VERSION 2.8.11)
-project(test_package)
-
-set(CMAKE_VERBOSE_MAKEFILE TRUE)
-
-include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
-conan_basic_setup()
-
-add_executable(${PROJECT_NAME} test_package.cpp)
-target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS})
diff --git a/conan/test_package/conanfile.py b/conan/test_package/conanfile.py
deleted file mode 100644
index d63f408..0000000
--- a/conan/test_package/conanfile.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from conans import ConanFile, CMake
-import os
-
-
-class TestPackageConan(ConanFile):
- settings = "os", "compiler", "build_type", "arch"
- generators = "cmake"
-
- def build(self):
- cmake = CMake(self)
- cmake.configure()
- cmake.build()
-
- def test(self):
- bin_path = os.path.join("bin", "test_package")
- self.run(bin_path, run_environment=True)
diff --git a/conan/test_package/test_package.cpp b/conan/test_package/test_package.cpp
deleted file mode 100644
index 4fa7ec0..0000000
--- a/conan/test_package/test_package.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include "benchmark/benchmark.h"
-
-void BM_StringCreation(benchmark::State& state) {
- while (state.KeepRunning())
- std::string empty_string;
-}
-
-BENCHMARK(BM_StringCreation);
-
-void BM_StringCopy(benchmark::State& state) {
- std::string x = "hello";
- while (state.KeepRunning())
- std::string copy(x);
-}
-
-BENCHMARK(BM_StringCopy);
-
-BENCHMARK_MAIN();
diff --git a/conanfile.py b/conanfile.py
deleted file mode 100644
index e31fc52..0000000
--- a/conanfile.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from conans import ConanFile, CMake, tools
-from conans.errors import ConanInvalidConfiguration
-import shutil
-import os
-
-
-class GoogleBenchmarkConan(ConanFile):
- name = "benchmark"
- description = "A microbenchmark support library."
- topics = ("conan", "benchmark", "google", "microbenchmark")
- url = "https://github.com/google/benchmark"
- homepage = "https://github.com/google/benchmark"
- author = "Google Inc."
- license = "Apache-2.0"
- exports_sources = ["*"]
- generators = "cmake"
-
- settings = "arch", "build_type", "compiler", "os"
- options = {
- "shared": [True, False],
- "fPIC": [True, False],
- "enable_lto": [True, False],
- "enable_exceptions": [True, False]
- }
- default_options = {"shared": False, "fPIC": True, "enable_lto": False, "enable_exceptions": True}
-
- _build_subfolder = "."
-
- def source(self):
- # Wrap the original CMake file to call conan_basic_setup
- shutil.move("CMakeLists.txt", "CMakeListsOriginal.txt")
- shutil.move(os.path.join("conan", "CMakeLists.txt"), "CMakeLists.txt")
-
- def config_options(self):
- if self.settings.os == "Windows":
- if self.settings.compiler == "Visual Studio" and float(self.settings.compiler.version.value) <= 12:
- raise ConanInvalidConfiguration("{} {} does not support Visual Studio <= 12".format(self.name, self.version))
- del self.options.fPIC
-
- def configure(self):
- if self.settings.os == "Windows" and self.options.shared:
- raise ConanInvalidConfiguration("Windows shared builds are not supported right now, see issue #639")
-
- def _configure_cmake(self):
- cmake = CMake(self)
-
- cmake.definitions["BENCHMARK_ENABLE_TESTING"] = "OFF"
- cmake.definitions["BENCHMARK_ENABLE_GTEST_TESTS"] = "OFF"
- cmake.definitions["BENCHMARK_ENABLE_LTO"] = "ON" if self.options.enable_lto else "OFF"
- cmake.definitions["BENCHMARK_ENABLE_EXCEPTIONS"] = "ON" if self.options.enable_exceptions else "OFF"
-
- # See https://github.com/google/benchmark/pull/638 for Windows 32 build explanation
- if self.settings.os != "Windows":
- cmake.definitions["BENCHMARK_BUILD_32_BITS"] = "ON" if "64" not in str(self.settings.arch) else "OFF"
- cmake.definitions["BENCHMARK_USE_LIBCXX"] = "ON" if (str(self.settings.compiler.libcxx) == "libc++") else "OFF"
- else:
- cmake.definitions["BENCHMARK_USE_LIBCXX"] = "OFF"
-
- cmake.configure(build_folder=self._build_subfolder)
- return cmake
-
- def build(self):
- cmake = self._configure_cmake()
- cmake.build()
-
- def package(self):
- cmake = self._configure_cmake()
- cmake.install()
-
- self.copy(pattern="LICENSE", dst="licenses")
-
- def package_info(self):
- self.cpp_info.libs = tools.collect_libs(self)
- if self.settings.os == "Linux":
- self.cpp_info.libs.extend(["pthread", "rt"])
- elif self.settings.os == "Windows":
- self.cpp_info.libs.append("shlwapi")
- elif self.settings.os == "SunOS":
- self.cpp_info.libs.append("kstat")
diff --git a/dependencies.md b/dependencies.md
deleted file mode 100644
index 6289b4e..0000000
--- a/dependencies.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# Build tool dependency policy
-
-To ensure the broadest compatibility when building the benchmark library, but
-still allow forward progress, we require any build tooling to be available for:
-
-* Debian stable AND
-* The last two Ubuntu LTS releases AND
-
-Currently, this means using build tool versions that are available for Ubuntu
-16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch.
-
-_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._
-
-## cmake
-The current supported version is cmake 3.5.1 as of 2018-06-06.
-
-_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS
-release, as `cmake3`._
diff --git a/docs/AssemblyTests.md b/docs/AssemblyTests.md
index 1fbdc26..89df7ca 100644
--- a/docs/AssemblyTests.md
+++ b/docs/AssemblyTests.md
@@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this
is matching stack frame addresses. In this case regular expressions
can be used to match the differing bits of output. For example:
+<!-- {% raw %} -->
```c++
int ExternInt;
struct Point { int x, y, z; };
@@ -127,6 +128,7 @@ extern "C" void test_store_point() {
// CHECK: ret
}
```
+<!-- {% endraw %} -->
## Current Requirements and Limitations
diff --git a/docs/_config.yml b/docs/_config.yml
index 1885487..32f9f2e 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -1 +1,3 @@
-theme: jekyll-theme-midnight \ No newline at end of file
+theme: jekyll-theme-minimal
+logo: /assets/images/icon_black.png
+show_downloads: true
diff --git a/docs/assets/images/icon.png b/docs/assets/images/icon.png
new file mode 100644
index 0000000..b982604
--- /dev/null
+++ b/docs/assets/images/icon.png
Binary files differ
diff --git a/docs/assets/images/icon.xcf b/docs/assets/images/icon.xcf
new file mode 100644
index 0000000..f2f0be4
--- /dev/null
+++ b/docs/assets/images/icon.xcf
Binary files differ
diff --git a/docs/assets/images/icon_black.png b/docs/assets/images/icon_black.png
new file mode 100644
index 0000000..656ae79
--- /dev/null
+++ b/docs/assets/images/icon_black.png
Binary files differ
diff --git a/docs/assets/images/icon_black.xcf b/docs/assets/images/icon_black.xcf
new file mode 100644
index 0000000..430e7ba
--- /dev/null
+++ b/docs/assets/images/icon_black.xcf
Binary files differ
diff --git a/docs/dependencies.md b/docs/dependencies.md
new file mode 100644
index 0000000..07760e1
--- /dev/null
+++ b/docs/dependencies.md
@@ -0,0 +1,13 @@
+# Build tool dependency policy
+
+We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
+particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
+
+## CMake
+
+The current supported version is CMake 3.10 as of 2023-08-10. Most modern
+distributions include newer versions, for example:
+
+* Ubuntu 20.04 provides CMake 3.16.3
+* Debian 11.4 provides CMake 3.18.4
+* Ubuntu 22.04 provides CMake 3.22.1
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..9cada96
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,12 @@
+# Benchmark
+
+* [Assembly Tests](AssemblyTests.md)
+* [Dependencies](dependencies.md)
+* [Perf Counters](perf_counters.md)
+* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
+* [Python Bindings](python_bindings.md)
+* [Random Interleaving](random_interleaving.md)
+* [Reducing Variance](reducing_variance.md)
+* [Releasing](releasing.md)
+* [Tools](tools.md)
+* [User Guide](user_guide.md)
diff --git a/docs/perf_counters.md b/docs/perf_counters.md
new file mode 100644
index 0000000..f342092
--- /dev/null
+++ b/docs/perf_counters.md
@@ -0,0 +1,35 @@
+<a name="perf-counters" />
+
+# User-Requested Performance Counters
+
+When running benchmarks, the user may choose to request collection of
+performance counters. This may be useful in investigation scenarios - narrowing
+down the cause of a regression; or verifying that the underlying cause of a
+performance improvement matches expectations.
+
+This feature is available if:
+
+* The benchmark is run on an architecture featuring a Performance Monitoring
+ Unit (PMU),
+* The benchmark is compiled with support for collecting counters. Currently,
+ this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
+ dependency via Bazel.
+
+The feature does not require modifying benchmark code. Counter collection is
+handled at the boundaries where timer collection is also handled.
+
+To opt-in:
+* If using a Bazel build, add `--define pfm=1` to your build flags
+* If using CMake:
+ * Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
+ * Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
+
+To use, pass a comma-separated list of counter names through the
+`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
+they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
+mapped by libpfm to platform-specifics - see libpfm
+[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
+
+The counter values are reported back through the [User Counters](../README.md#custom-counters)
+mechanism, meaning, they are available in all the formats (e.g. JSON) supported
+by User Counters.
diff --git a/docs/platform_specific_build_instructions.md b/docs/platform_specific_build_instructions.md
new file mode 100644
index 0000000..2d5d6c4
--- /dev/null
+++ b/docs/platform_specific_build_instructions.md
@@ -0,0 +1,48 @@
+# Platform Specific Build Instructions
+
+## Building with GCC
+
+When the library is built using GCC it is necessary to link with the pthread
+library due to how GCC implements `std::thread`. Failing to link to pthread will
+lead to runtime exceptions (unless you're using libc++), not linker errors. See
+[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
+can link to pthread by adding `-pthread` to your linker command. Note, you can
+also use `-lpthread`, but there are potential issues with ordering of command
+line parameters if you use that.
+
+On QNX, the pthread library is part of libc and usually included automatically
+(see
+[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)).
+There's no separate pthread library to link.
+
+## Building with Visual Studio 2015 or 2017
+
+The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
+
+```
+// Alternatively, can add libraries using linker options.
+#ifdef _WIN32
+#pragma comment ( lib, "Shlwapi.lib" )
+#ifdef _DEBUG
+#pragma comment ( lib, "benchmarkd.lib" )
+#else
+#pragma comment ( lib, "benchmark.lib" )
+#endif
+#endif
+```
+
+Can also use the graphical version of CMake:
+* Open `CMake GUI`.
+* Under `Where to build the binaries`, same path as source plus `build`.
+* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
+* Click `Configure`, `Generate`, `Open Project`.
+* If build fails, try deleting entire directory and starting again, or unticking options to build less.
+
+## Building with Intel 2015 Update 1 or Intel System Studio Update 4
+
+See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
+
+## Building on Solaris
+
+If you're running benchmarks on solaris, you'll want the kstat library linked in
+too (`-lkstat`). \ No newline at end of file
diff --git a/docs/python_bindings.md b/docs/python_bindings.md
new file mode 100644
index 0000000..6a7aab0
--- /dev/null
+++ b/docs/python_bindings.md
@@ -0,0 +1,34 @@
+# Building and installing Python bindings
+
+Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
+using Google Benchmark directly in Python.
+Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
+Supported Python versions are Python 3.7 - 3.10.
+
+To install Google Benchmark's Python bindings, run:
+
+```bash
+python -m pip install --upgrade pip # for manylinux2014 support
+python -m pip install google-benchmark
+```
+
+In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
+environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
+on how to create virtual environments.
+
+To build a wheel directly from source, you can follow these steps:
+```bash
+git clone https://github.com/google/benchmark.git
+cd benchmark
+# create a virtual environment and activate it
+python3 -m venv venv --system-site-packages
+source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
+
+# upgrade Python's system-wide packages
+python -m pip install --upgrade pip setuptools wheel
+# builds the wheel and stores it in the directory "wheelhouse".
+python -m pip wheel . -w wheelhouse
+```
+
+NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
+refer to the [Bazel installation docs](https://bazel.build/install).
diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md
new file mode 100644
index 0000000..c083036
--- /dev/null
+++ b/docs/random_interleaving.md
@@ -0,0 +1,13 @@
+<a name="interleaving" />
+
+# Random Interleaving
+
+[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
+technique to lower run-to-run variance. It randomly interleaves repetitions of a
+microbenchmark with repetitions from other microbenchmarks in the same benchmark
+test. Data shows it is able to lower run-to-run variance by
+[40%](https://github.com/google/benchmark/issues/1051) on average.
+
+To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
+and optionally specify non-zero repetition count `--benchmark_repetitions=9`
+and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.
diff --git a/docs/reducing_variance.md b/docs/reducing_variance.md
new file mode 100644
index 0000000..e566ab9
--- /dev/null
+++ b/docs/reducing_variance.md
@@ -0,0 +1,100 @@
+# Reducing Variance
+
+<a name="disabling-cpu-frequency-scaling" />
+
+## Disabling CPU Frequency Scaling
+
+If you see this error:
+
+```
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
+```
+
+you might want to disable the CPU frequency scaling while running the
+benchmark, as well as consider other ways to stabilize the performance of
+your system while benchmarking.
+
+See [Reducing Variance](reducing_variance.md) for more information.
+
+Exactly how to do this depends on the Linux distribution,
+desktop environment, and installed programs. Specific details are a moving
+target, so we will not attempt to exhaustively document them here.
+
+One simple option is to use the `cpupower` program to change the
+performance governor to "performance". This tool is maintained along with
+the Linux kernel and provided by your distribution.
+
+It must be run as root, like this:
+
+```bash
+sudo cpupower frequency-set --governor performance
+```
+
+After this you can verify that all CPUs are using the performance governor
+by running this command:
+
+```bash
+cpupower frequency-info -o proc
+```
+
+The benchmarks you subsequently run will have less variance.
+
+<a name="reducing-variance" />
+
+## Reducing Variance in Benchmarks
+
+The Linux CPU frequency governor [discussed
+above](user_guide#disabling-cpu-frequency-scaling) is not the only source
+of noise in benchmarks. Some, but not all, of the sources of variance
+include:
+
+1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
+ speed, so running a benchmark one time and then again may give a
+ different result depending on which CPU it ran on.
+2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
+ AMD Turbo Core and Precision Boost, can temporarily change the CPU
+ frequency even when the using the "performance" governor on Linux.
+3. Context switching between CPUs, or scheduling competition on the CPU the
+ benchmark is running on.
+4. Intel Hyperthreading or AMD SMT causing the same issue as above.
+5. Cache effects caused by code running on other CPUs.
+6. Non-uniform memory architectures (NUMA).
+
+These can cause variance in benchmarks results within a single run
+(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
+program.
+
+Reducing sources of variance is OS and architecture dependent, which is one
+reason some companies maintain machines dedicated to performance testing.
+
+Some of the easier and and effective ways of reducing variance on a typical
+Linux workstation are:
+
+1. Use the performance governor as [discussed
+above](user_guide#disabling-cpu-frequency-scaling).
+1. Disable processor boosting by:
+ ```sh
+ echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
+ ```
+ See the Linux kernel's
+ [boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
+ for more information.
+2. Set the benchmark program's task affinity to a fixed cpu. For example:
+ ```sh
+ taskset -c 0 ./mybenchmark
+ ```
+3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
+ `/sys` file system (see the LLVM project's [Benchmarking
+ tips](https://llvm.org/docs/Benchmarking.html)).
+4. Close other programs that do non-trivial things based on timers, such as
+ your web browser, desktop environment, etc.
+5. Reduce the working set of your benchmark to fit within the L1 cache, but
+ do be aware that this may lead you to optimize for an unrelistic
+ situation.
+
+Further resources on this topic:
+
+1. The LLVM project's [Benchmarking
+ tips](https://llvm.org/docs/Benchmarking.html).
+1. The Arch Wiki [Cpu frequency
+scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.
diff --git a/docs/releasing.md b/docs/releasing.md
index f0cd701..cdf4159 100644
--- a/docs/releasing.md
+++ b/docs/releasing.md
@@ -1,16 +1,41 @@
# How to release
-* Make sure you're on master and synced to HEAD
-* Ensure the project builds and tests run (sanity check only, obviously)
+* Make sure you're on main and synced to HEAD
+* Ensure the project builds and tests run
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
+* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`
+ and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the
+ release version you're creating. (This version will be used if benchmark is installed
+ from the archive you'll be creating in the next step.)
+
+```
+project (benchmark VERSION 1.8.0 LANGUAGES CXX)
+```
+
+```
+module(name = "com_github_google_benchmark", version="1.8.0")
+```
+
+```python
+# bindings/python/google_benchmark/__init__.py
+
+# ...
+
+__version__ = "1.8.0" # <-- change this to the release version you are creating
+
+# ...
+```
+
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
- * `git push --force origin`
+ * `git push --force --tags origin`
+* Confirm that the "Build and upload Python wheels" action runs to completion
+ * run it manually if it hasn't run
diff --git a/docs/tools.md b/docs/tools.md
index f2d0c49..411f41d 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -186,6 +186,146 @@ Benchmark Time CPU Time Old
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
+### Note: Interpreting the output
+
+Performance measurements are an art, and performance comparisons are doubly so.
+Results are often noisy and don't necessarily have large absolute differences to
+them, so just by visual inspection, it is not at all apparent if two
+measurements are actually showing a performance change or not. It is even more
+confusing with multiple benchmark repetitions.
+
+Thankfully, what we can do, is use statistical tests on the results to determine
+whether the performance has statistically-significantly changed. `compare.py`
+uses [Mann–Whitney U
+test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
+hypothesis being that there's no difference in performance.
+
+**The below output is a summary of a benchmark comparison with statistics
+provided for a multi-threaded process.**
+```
+Benchmark Time CPU Time Old Time New CPU Old CPU New
+-----------------------------------------------------------------------------------------------------------------------------
+benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
+benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
+benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
+benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
+benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
+OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
+```
+--------------------------------------------
+Here's a breakdown of each row:
+
+**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
+the statistical test comparing the performance of the process running with one
+thread. A value of 0.0000 suggests a statistically significant difference in
+performance. The comparison was conducted using the U Test (Mann-Whitney
+U Test) with 27 repetitions for each case.
+
+**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
+difference in mean execution time between two different cases. The negative
+value (-0.1442) implies that the new process is faster by about 14.42%. The old
+time was 90 units, while the new time is 77 units.
+
+**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
+relative difference in the median execution time. Again, the new process is
+faster by 14.44%.
+
+**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
+difference in the standard deviation of the execution time, which is a measure
+of how much variation or dispersion there is from the mean. A positive value
+(+0.3974) implies there is more variance in the execution time in the new
+process.
+
+**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
+Variation. It is the ratio of the standard deviation to the mean. It provides a
+standardized measure of dispersion. An increase (+0.6329) indicates more
+relative variability in the new process.
+
+**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
+less influenced by outliers. The negative value indicates a general improvement
+in the new process. However, given the values are all zero for the old and new
+times, this seems to be a mistake or placeholder in the output.
+
+-----------------------------------------
+
+
+
+Let's first try to see what the different columns represent in the above
+`compare.py` benchmarking output:
+
+ 1. **Benchmark:** The name of the function being benchmarked, along with the
+ size of the input (after the slash).
+
+ 2. **Time:** The average time per operation, across all iterations.
+
+ 3. **CPU:** The average CPU time per operation, across all iterations.
+
+ 4. **Iterations:** The number of iterations the benchmark was run to get a
+ stable estimate.
+
+ 5. **Time Old and Time New:** These represent the average time it takes for a
+ function to run in two different scenarios or versions. For example, you
+ might be comparing how fast a function runs before and after you make some
+ changes to it.
+
+ 6. **CPU Old and CPU New:** These show the average amount of CPU time that the
+ function uses in two different scenarios or versions. This is similar to
+ Time Old and Time New, but focuses on CPU usage instead of overall time.
+
+In the comparison section, the relative differences in both time and CPU time
+are displayed for each input size.
+
+
+A statistically-significant difference is determined by a **p-value**, which is
+a measure of the probability that the observed difference could have occurred
+just by random chance. A smaller p-value indicates stronger evidence against the
+null hypothesis.
+
+**Therefore:**
+ 1. If the p-value is less than the chosen significance level (alpha), we
+ reject the null hypothesis and conclude the benchmarks are significantly
+ different.
+ 2. If the p-value is greater than or equal to alpha, we fail to reject the
+ null hypothesis and treat the two benchmarks as similar.
+
+
+
+The result of said the statistical test is additionally communicated through color coding:
+```diff
++ Green:
+```
+ The benchmarks are _**statistically different**_. This could mean the
+ performance has either **significantly improved** or **significantly
+ deteriorated**. You should look at the actual performance numbers to see which
+ is the case.
+```diff
+- Red:
+```
+ The benchmarks are _**statistically similar**_. This means the performance
+ **hasn't significantly changed**.
+
+In statistical terms, **'green'** means we reject the null hypothesis that
+there's no difference in performance, and **'red'** means we fail to reject the
+null hypothesis. This might seem counter-intuitive if you're expecting 'green'
+to mean 'improved performance' and 'red' to mean 'worsened performance'.
+```bash
+ But remember, in this context:
+
+ 'Success' means 'successfully finding a difference'.
+ 'Failure' means 'failing to find a difference'.
+```
+
+
+Also, please note that **even if** we determine that there **is** a
+statistically-significant difference between the two measurements, it does not
+_necessarily_ mean that the actual benchmarks that were measured **are**
+different, or vice versa, even if we determine that there is **no**
+statistically-significant difference between the two measurements, it does not
+necessarily mean that the actual benchmarks that were measured **are not**
+different.
+
+
+
### U test
If there is a sufficient repetition count of the benchmarks, the tool can do
diff --git a/docs/user_guide.md b/docs/user_guide.md
new file mode 100644
index 0000000..2ceb13e
--- /dev/null
+++ b/docs/user_guide.md
@@ -0,0 +1,1266 @@
+# User Guide
+
+## Command Line
+
+[Output Formats](#output-formats)
+
+[Output Files](#output-files)
+
+[Running Benchmarks](#running-benchmarks)
+
+[Running a Subset of Benchmarks](#running-a-subset-of-benchmarks)
+
+[Result Comparison](#result-comparison)
+
+[Extra Context](#extra-context)
+
+## Library
+
+[Runtime and Reporting Considerations](#runtime-and-reporting-considerations)
+
+[Setup/Teardown](#setupteardown)
+
+[Passing Arguments](#passing-arguments)
+
+[Custom Benchmark Name](#custom-benchmark-name)
+
+[Calculating Asymptotic Complexity](#asymptotic-complexity)
+
+[Templated Benchmarks](#templated-benchmarks)
+
+[Fixtures](#fixtures)
+
+[Custom Counters](#custom-counters)
+
+[Multithreaded Benchmarks](#multithreaded-benchmarks)
+
+[CPU Timers](#cpu-timers)
+
+[Manual Timing](#manual-timing)
+
+[Setting the Time Unit](#setting-the-time-unit)
+
+[Random Interleaving](random_interleaving.md)
+
+[User-Requested Performance Counters](perf_counters.md)
+
+[Preventing Optimization](#preventing-optimization)
+
+[Reporting Statistics](#reporting-statistics)
+
+[Custom Statistics](#custom-statistics)
+
+[Memory Usage](#memory-usage)
+
+[Using RegisterBenchmark](#using-register-benchmark)
+
+[Exiting with an Error](#exiting-with-an-error)
+
+[A Faster `KeepRunning` Loop](#a-faster-keep-running-loop)
+
+## Benchmarking Tips
+
+[Disabling CPU Frequency Scaling](#disabling-cpu-frequency-scaling)
+
+[Reducing Variance in Benchmarks](reducing_variance.md)
+
+<a name="output-formats" />
+
+## Output Formats
+
+The library supports multiple output formats. Use the
+`--benchmark_format=<console|json|csv>` flag (or set the
+`BENCHMARK_FORMAT=<console|json|csv>` environment variable) to set
+the format type. `console` is the default format.
+
+The Console format is intended to be a human readable format. By default
+the format generates color output. Context is output on stderr and the
+tabular data on stdout. Example tabular output looks like:
+
+```
+Benchmark Time(ns) CPU(ns) Iterations
+----------------------------------------------------------------------
+BM_SetInsert/1024/1 28928 29349 23853 133.097kB/s 33.2742k items/s
+BM_SetInsert/1024/8 32065 32913 21375 949.487kB/s 237.372k items/s
+BM_SetInsert/1024/10 33157 33648 21431 1.13369MB/s 290.225k items/s
+```
+
+The JSON format outputs human readable json split into two top level attributes.
+The `context` attribute contains information about the run in general, including
+information about the CPU and the date.
+The `benchmarks` attribute contains a list of every benchmark run. Example json
+output looks like:
+
+```json
+{
+ "context": {
+ "date": "2015/03/17-18:40:25",
+ "num_cpus": 40,
+ "mhz_per_cpu": 2801,
+ "cpu_scaling_enabled": false,
+ "build_type": "debug"
+ },
+ "benchmarks": [
+ {
+ "name": "BM_SetInsert/1024/1",
+ "iterations": 94877,
+ "real_time": 29275,
+ "cpu_time": 29836,
+ "bytes_per_second": 134066,
+ "items_per_second": 33516
+ },
+ {
+ "name": "BM_SetInsert/1024/8",
+ "iterations": 21609,
+ "real_time": 32317,
+ "cpu_time": 32429,
+ "bytes_per_second": 986770,
+ "items_per_second": 246693
+ },
+ {
+ "name": "BM_SetInsert/1024/10",
+ "iterations": 21393,
+ "real_time": 32724,
+ "cpu_time": 33355,
+ "bytes_per_second": 1199226,
+ "items_per_second": 299807
+ }
+ ]
+}
+```
+
+The CSV format outputs comma-separated values. The `context` is output on stderr
+and the CSV itself on stdout. Example CSV output looks like:
+
+```
+name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label
+"BM_SetInsert/1024/1",65465,17890.7,8407.45,475768,118942,
+"BM_SetInsert/1024/8",116606,18810.1,9766.64,3.27646e+06,819115,
+"BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06,
+```
+
+<a name="output-files" />
+
+## Output Files
+
+Write benchmark results to a file with the `--benchmark_out=<filename>` option
+(or set `BENCHMARK_OUT`). Specify the output format with
+`--benchmark_out_format={json|console|csv}` (or set
+`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is
+deprecated and the saved `.csv` file
+[is not parsable](https://github.com/google/benchmark/issues/794) by csv
+parsers.
+
+Specifying `--benchmark_out` does not suppress the console output.
+
+<a name="running-benchmarks" />
+
+## Running Benchmarks
+
+Benchmarks are executed by running the produced binaries. Benchmarks binaries,
+by default, accept options that may be specified either through their command
+line interface or by setting environment variables before execution. For every
+`--option_flag=<value>` CLI switch, a corresponding environment variable
+`OPTION_FLAG=<value>` exist and is used as default if set (CLI switches always
+ prevails). A complete list of CLI options is available running benchmarks
+ with the `--help` switch.
+
+<a name="running-a-subset-of-benchmarks" />
+
+## Running a Subset of Benchmarks
+
+The `--benchmark_filter=<regex>` option (or `BENCHMARK_FILTER=<regex>`
+environment variable) can be used to only run the benchmarks that match
+the specified `<regex>`. For example:
+
+```bash
+$ ./run_benchmarks.x --benchmark_filter=BM_memcpy/32
+Run on (1 X 2300 MHz CPU )
+2016-06-25 19:34:24
+Benchmark Time CPU Iterations
+----------------------------------------------------
+BM_memcpy/32 11 ns 11 ns 79545455
+BM_memcpy/32k 2181 ns 2185 ns 324074
+BM_memcpy/32 12 ns 12 ns 54687500
+BM_memcpy/32k 1834 ns 1837 ns 357143
+```
+
+## Disabling Benchmarks
+
+It is possible to temporarily disable benchmarks by renaming the benchmark
+function to have the prefix "DISABLED_". This will cause the benchmark to
+be skipped at runtime.
+
+<a name="result-comparison" />
+
+## Result comparison
+
+It is possible to compare the benchmarking results.
+See [Additional Tooling Documentation](tools.md)
+
+<a name="extra-context" />
+
+## Extra Context
+
+Sometimes it's useful to add extra context to the content printed before the
+results. By default this section includes information about the CPU on which
+the benchmarks are running. If you do want to add more context, you can use
+the `benchmark_context` command line flag:
+
+```bash
+$ ./run_benchmarks --benchmark_context=pwd=`pwd`
+Run on (1 x 2300 MHz CPU)
+pwd: /home/user/benchmark/
+Benchmark Time CPU Iterations
+----------------------------------------------------
+BM_memcpy/32 11 ns 11 ns 79545455
+BM_memcpy/32k 2181 ns 2185 ns 324074
+```
+
+You can get the same effect with the API:
+
+```c++
+ benchmark::AddCustomContext("foo", "bar");
+```
+
+Note that attempts to add a second value with the same key will fail with an
+error message.
+
+<a name="runtime-and-reporting-considerations" />
+
+## Runtime and Reporting Considerations
+
+When the benchmark binary is executed, each benchmark function is run serially.
+The number of iterations to run is determined dynamically by running the
+benchmark a few times and measuring the time taken and ensuring that the
+ultimate result will be statistically stable. As such, faster benchmark
+functions will be run for more iterations than slower benchmark functions, and
+the number of iterations is thus reported.
+
+In all cases, the number of iterations for which the benchmark is run is
+governed by the amount of time the benchmark takes. Concretely, the number of
+iterations is at least one, not more than 1e9, until CPU time is greater than
+the minimum time, or the wallclock time is 5x minimum time. The minimum time is
+set per benchmark by calling `MinTime` on the registered benchmark object.
+
+Furthermore warming up a benchmark might be necessary in order to get
+stable results because of e.g caching effects of the code under benchmark.
+Warming up means running the benchmark a given amount of time, before
+results are actually taken into account. The amount of time for which
+the warmup should be run can be set per benchmark by calling
+`MinWarmUpTime` on the registered benchmark object or for all benchmarks
+using the `--benchmark_min_warmup_time` command-line option. Note that
+`MinWarmUpTime` will overwrite the value of `--benchmark_min_warmup_time`
+for the single benchmark. How many iterations the warmup run of each
+benchmark takes is determined the same way as described in the paragraph
+above. Per default the warmup phase is set to 0 seconds and is therefore
+disabled.
+
+Average timings are then reported over the iterations run. If multiple
+repetitions are requested using the `--benchmark_repetitions` command-line
+option, or at registration time, the benchmark function will be run several
+times and statistical results across these repetitions will also be reported.
+
+As well as the per-benchmark entries, a preamble in the report will include
+information about the machine on which the benchmarks are run.
+
+<a name="setup-teardown" />
+
+## Setup/Teardown
+
+Global setup/teardown specific to each benchmark can be done by
+passing a callback to Setup/Teardown:
+
+The setup/teardown callbacks will be invoked once for each benchmark. If the
+benchmark is multi-threaded (will run in k threads), they will be invoked
+exactly once before each run with k threads.
+
+If the benchmark uses different size groups of threads, the above will be true
+for each size group.
+
+Eg.,
+
+```c++
+static void DoSetup(const benchmark::State& state) {
+}
+
+static void DoTeardown(const benchmark::State& state) {
+}
+
+static void BM_func(benchmark::State& state) {...}
+
+BENCHMARK(BM_func)->Arg(1)->Arg(3)->Threads(16)->Threads(32)->Setup(DoSetup)->Teardown(DoTeardown);
+
+```
+
+In this example, `DoSetup` and `DoTearDown` will be invoked 4 times each,
+specifically, once for each of this family:
+ - BM_func_Arg_1_Threads_16, BM_func_Arg_1_Threads_32
+ - BM_func_Arg_3_Threads_16, BM_func_Arg_3_Threads_32
+
+<a name="passing-arguments" />
+
+## Passing Arguments
+
+Sometimes a family of benchmarks can be implemented with just one routine that
+takes an extra argument to specify which one of the family of benchmarks to
+run. For example, the following code defines a family of benchmarks for
+measuring the speed of `memcpy()` calls of different lengths:
+
+```c++
+static void BM_memcpy(benchmark::State& state) {
+ char* src = new char[state.range(0)];
+ char* dst = new char[state.range(0)];
+ memset(src, 'x', state.range(0));
+ for (auto _ : state)
+ memcpy(dst, src, state.range(0));
+ state.SetBytesProcessed(int64_t(state.iterations()) *
+ int64_t(state.range(0)));
+ delete[] src;
+ delete[] dst;
+}
+BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(4<<10)->Arg(8<<10);
+```
+
+The preceding code is quite repetitive, and can be replaced with the following
+short-hand. The following invocation will pick a few appropriate arguments in
+the specified range and will generate a benchmark for each such argument.
+
+```c++
+BENCHMARK(BM_memcpy)->Range(8, 8<<10);
+```
+
+By default the arguments in the range are generated in multiples of eight and
+the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the
+range multiplier is changed to multiples of two.
+
+```c++
+BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10);
+```
+
+Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ].
+
+The preceding code shows a method of defining a sparse range. The following
+example shows a method of defining a dense range. It is then used to benchmark
+the performance of `std::vector` initialization for uniformly increasing sizes.
+
+```c++
+static void BM_DenseRange(benchmark::State& state) {
+ for(auto _ : state) {
+ std::vector<int> v(state.range(0), state.range(0));
+ auto data = v.data();
+ benchmark::DoNotOptimize(data);
+ benchmark::ClobberMemory();
+ }
+}
+BENCHMARK(BM_DenseRange)->DenseRange(0, 1024, 128);
+```
+
+Now arguments generated are [ 0, 128, 256, 384, 512, 640, 768, 896, 1024 ].
+
+You might have a benchmark that depends on two or more inputs. For example, the
+following code defines a family of benchmarks for measuring the speed of set
+insertion.
+
+```c++
+static void BM_SetInsert(benchmark::State& state) {
+ std::set<int> data;
+ for (auto _ : state) {
+ state.PauseTiming();
+ data = ConstructRandomSet(state.range(0));
+ state.ResumeTiming();
+ for (int j = 0; j < state.range(1); ++j)
+ data.insert(RandomNumber());
+ }
+}
+BENCHMARK(BM_SetInsert)
+ ->Args({1<<10, 128})
+ ->Args({2<<10, 128})
+ ->Args({4<<10, 128})
+ ->Args({8<<10, 128})
+ ->Args({1<<10, 512})
+ ->Args({2<<10, 512})
+ ->Args({4<<10, 512})
+ ->Args({8<<10, 512});
+```
+
+The preceding code is quite repetitive, and can be replaced with the following
+short-hand. The following macro will pick a few appropriate arguments in the
+product of the two specified ranges and will generate a benchmark for each such
+pair.
+
+<!-- {% raw %} -->
+```c++
+BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
+```
+<!-- {% endraw %} -->
+
+Some benchmarks may require specific argument values that cannot be expressed
+with `Ranges`. In this case, `ArgsProduct` offers the ability to generate a
+benchmark input for each combination in the product of the supplied vectors.
+
+<!-- {% raw %} -->
+```c++
+BENCHMARK(BM_SetInsert)
+ ->ArgsProduct({{1<<10, 3<<10, 8<<10}, {20, 40, 60, 80}})
+// would generate the same benchmark arguments as
+BENCHMARK(BM_SetInsert)
+ ->Args({1<<10, 20})
+ ->Args({3<<10, 20})
+ ->Args({8<<10, 20})
+ ->Args({3<<10, 40})
+ ->Args({8<<10, 40})
+ ->Args({1<<10, 40})
+ ->Args({1<<10, 60})
+ ->Args({3<<10, 60})
+ ->Args({8<<10, 60})
+ ->Args({1<<10, 80})
+ ->Args({3<<10, 80})
+ ->Args({8<<10, 80});
+```
+<!-- {% endraw %} -->
+
+For the most common scenarios, helper methods for creating a list of
+integers for a given sparse or dense range are provided.
+
+```c++
+BENCHMARK(BM_SetInsert)
+ ->ArgsProduct({
+ benchmark::CreateRange(8, 128, /*multi=*/2),
+ benchmark::CreateDenseRange(1, 4, /*step=*/1)
+ })
+// would generate the same benchmark arguments as
+BENCHMARK(BM_SetInsert)
+ ->ArgsProduct({
+ {8, 16, 32, 64, 128},
+ {1, 2, 3, 4}
+ });
+```
+
+For more complex patterns of inputs, passing a custom function to `Apply` allows
+programmatic specification of an arbitrary set of arguments on which to run the
+benchmark. The following example enumerates a dense range on one parameter,
+and a sparse range on the second.
+
+```c++
+static void CustomArguments(benchmark::internal::Benchmark* b) {
+ for (int i = 0; i <= 10; ++i)
+ for (int j = 32; j <= 1024*1024; j *= 8)
+ b->Args({i, j});
+}
+BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
+```
+
+### Passing Arbitrary Arguments to a Benchmark
+
+In C++11 it is possible to define a benchmark that takes an arbitrary number
+of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
+macro creates a benchmark that invokes `func` with the `benchmark::State` as
+the first argument followed by the specified `args...`.
+The `test_case_name` is appended to the name of the benchmark and
+should describe the values passed.
+
+```c++
+template <class ...Args>
+void BM_takes_args(benchmark::State& state, Args&&... args) {
+ auto args_tuple = std::make_tuple(std::move(args)...);
+ for (auto _ : state) {
+ std::cout << std::get<0>(args_tuple) << ": " << std::get<1>(args_tuple)
+ << '\n';
+ [...]
+ }
+}
+// Registers a benchmark named "BM_takes_args/int_string_test" that passes
+// the specified values to `args`.
+BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
+
+// Registers the same benchmark "BM_takes_args/int_test" that passes
+// the specified values to `args`.
+BENCHMARK_CAPTURE(BM_takes_args, int_test, 42, 43);
+```
+
+Note that elements of `...args` may refer to global variables. Users should
+avoid modifying global state inside of a benchmark.
+
+<a name="asymptotic-complexity" />
+
+## Calculating Asymptotic Complexity (Big O)
+
+Asymptotic complexity might be calculated for a family of benchmarks. The
+following code will calculate the coefficient for the high-order term in the
+running time and the normalized root-mean square error of string comparison.
+
+```c++
+static void BM_StringCompare(benchmark::State& state) {
+ std::string s1(state.range(0), '-');
+ std::string s2(state.range(0), '-');
+ for (auto _ : state) {
+ auto comparison_result = s1.compare(s2);
+ benchmark::DoNotOptimize(comparison_result);
+ }
+ state.SetComplexityN(state.range(0));
+}
+BENCHMARK(BM_StringCompare)
+ ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN);
+```
+
+As shown in the following invocation, asymptotic complexity might also be
+calculated automatically.
+
+```c++
+BENCHMARK(BM_StringCompare)
+ ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity();
+```
+
+The following code will specify asymptotic complexity with a lambda function,
+that might be used to customize high-order term calculation.
+
+```c++
+BENCHMARK(BM_StringCompare)->RangeMultiplier(2)
+ ->Range(1<<10, 1<<18)->Complexity([](benchmark::IterationCount n)->double{return n; });
+```
+
+<a name="custom-benchmark-name" />
+
+## Custom Benchmark Name
+
+You can change the benchmark's name as follows:
+
+```c++
+BENCHMARK(BM_memcpy)->Name("memcpy")->RangeMultiplier(2)->Range(8, 8<<10);
+```
+
+The invocation will execute the benchmark as before using `BM_memcpy` but changes
+the prefix in the report to `memcpy`.
+
+<a name="templated-benchmarks" />
+
+## Templated Benchmarks
+
+This example produces and consumes messages of size `sizeof(v)` `range_x`
+times. It also outputs throughput in the absence of multiprogramming.
+
+```c++
+template <class Q> void BM_Sequential(benchmark::State& state) {
+ Q q;
+ typename Q::value_type v;
+ for (auto _ : state) {
+ for (int i = state.range(0); i--; )
+ q.push(v);
+ for (int e = state.range(0); e--; )
+ q.Wait(&v);
+ }
+ // actually messages, not bytes:
+ state.SetBytesProcessed(
+ static_cast<int64_t>(state.iterations())*state.range(0));
+}
+// C++03
+BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
+
+// C++11 or newer, you can use the BENCHMARK macro with template parameters:
+BENCHMARK(BM_Sequential<WaitQueue<int>>)->Range(1<<0, 1<<10);
+
+```
+
+Three macros are provided for adding benchmark templates.
+
+```c++
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK(func<...>) // Takes any number of parameters.
+#else // C++ < C++11
+#define BENCHMARK_TEMPLATE(func, arg1)
+#endif
+#define BENCHMARK_TEMPLATE1(func, arg1)
+#define BENCHMARK_TEMPLATE2(func, arg1, arg2)
+```
+
+<a name="fixtures" />
+
+## Fixtures
+
+Fixture tests are created by first defining a type that derives from
+`::benchmark::Fixture` and then creating/registering the tests using the
+following macros:
+
+* `BENCHMARK_F(ClassName, Method)`
+* `BENCHMARK_DEFINE_F(ClassName, Method)`
+* `BENCHMARK_REGISTER_F(ClassName, Method)`
+
+For Example:
+
+```c++
+class MyFixture : public benchmark::Fixture {
+public:
+ void SetUp(const ::benchmark::State& state) {
+ }
+
+ void TearDown(const ::benchmark::State& state) {
+ }
+};
+
+BENCHMARK_F(MyFixture, FooTest)(benchmark::State& st) {
+ for (auto _ : st) {
+ ...
+ }
+}
+
+BENCHMARK_DEFINE_F(MyFixture, BarTest)(benchmark::State& st) {
+ for (auto _ : st) {
+ ...
+ }
+}
+/* BarTest is NOT registered */
+BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2);
+/* BarTest is now registered */
+```
+
+### Templated Fixtures
+
+Also you can create templated fixture by using the following macros:
+
+* `BENCHMARK_TEMPLATE_F(ClassName, Method, ...)`
+* `BENCHMARK_TEMPLATE_DEFINE_F(ClassName, Method, ...)`
+
+For example:
+
+```c++
+template<typename T>
+class MyFixture : public benchmark::Fixture {};
+
+BENCHMARK_TEMPLATE_F(MyFixture, IntTest, int)(benchmark::State& st) {
+ for (auto _ : st) {
+ ...
+ }
+}
+
+BENCHMARK_TEMPLATE_DEFINE_F(MyFixture, DoubleTest, double)(benchmark::State& st) {
+ for (auto _ : st) {
+ ...
+ }
+}
+
+BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2);
+```
+
+<a name="custom-counters" />
+
+## Custom Counters
+
+You can add your own counters with user-defined names. The example below
+will add columns "Foo", "Bar" and "Baz" in its output:
+
+```c++
+static void UserCountersExample1(benchmark::State& state) {
+ double numFoos = 0, numBars = 0, numBazs = 0;
+ for (auto _ : state) {
+ // ... count Foo,Bar,Baz events
+ }
+ state.counters["Foo"] = numFoos;
+ state.counters["Bar"] = numBars;
+ state.counters["Baz"] = numBazs;
+}
+```
+
+The `state.counters` object is a `std::map` with `std::string` keys
+and `Counter` values. The latter is a `double`-like class, via an implicit
+conversion to `double&`. Thus you can use all of the standard arithmetic
+assignment operators (`=,+=,-=,*=,/=`) to change the value of each counter.
+
+In multithreaded benchmarks, each counter is set on the calling thread only.
+When the benchmark finishes, the counters from each thread will be summed;
+the resulting sum is the value which will be shown for the benchmark.
+
+The `Counter` constructor accepts three parameters: the value as a `double`
+; a bit flag which allows you to show counters as rates, and/or as per-thread
+iteration, and/or as per-thread averages, and/or iteration invariants,
+and/or finally inverting the result; and a flag specifying the 'unit' - i.e.
+is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024
+(`benchmark::Counter::OneK::kIs1024`)?
+
+```c++
+ // sets a simple counter
+ state.counters["Foo"] = numFoos;
+
+ // Set the counter as a rate. It will be presented divided
+ // by the duration of the benchmark.
+ // Meaning: per one second, how many 'foo's are processed?
+ state.counters["FooRate"] = Counter(numFoos, benchmark::Counter::kIsRate);
+
+ // Set the counter as a rate. It will be presented divided
+ // by the duration of the benchmark, and the result inverted.
+ // Meaning: how many seconds it takes to process one 'foo'?
+ state.counters["FooInvRate"] = Counter(numFoos, benchmark::Counter::kIsRate | benchmark::Counter::kInvert);
+
+ // Set the counter as a thread-average quantity. It will
+ // be presented divided by the number of threads.
+ state.counters["FooAvg"] = Counter(numFoos, benchmark::Counter::kAvgThreads);
+
+ // There's also a combined flag:
+ state.counters["FooAvgRate"] = Counter(numFoos,benchmark::Counter::kAvgThreadsRate);
+
+ // This says that we process with the rate of state.range(0) bytes every iteration:
+ state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024);
+```
+
+When you're compiling in C++11 mode or later you can use `insert()` with
+`std::initializer_list`:
+
+<!-- {% raw %} -->
+```c++
+ // With C++11, this can be done:
+ state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}});
+ // ... instead of:
+ state.counters["Foo"] = numFoos;
+ state.counters["Bar"] = numBars;
+ state.counters["Baz"] = numBazs;
+```
+<!-- {% endraw %} -->
+
+### Counter Reporting
+
+When using the console reporter, by default, user counters are printed at
+the end after the table, the same way as ``bytes_processed`` and
+``items_processed``. This is best for cases in which there are few counters,
+or where there are only a couple of lines per benchmark. Here's an example of
+the default output:
+
+```
+------------------------------------------------------------------------------
+Benchmark Time CPU Iterations UserCounters...
+------------------------------------------------------------------------------
+BM_UserCounter/threads:8 2248 ns 10277 ns 68808 Bar=16 Bat=40 Baz=24 Foo=8
+BM_UserCounter/threads:1 9797 ns 9788 ns 71523 Bar=2 Bat=5 Baz=3 Foo=1024m
+BM_UserCounter/threads:2 4924 ns 9842 ns 71036 Bar=4 Bat=10 Baz=6 Foo=2
+BM_UserCounter/threads:4 2589 ns 10284 ns 68012 Bar=8 Bat=20 Baz=12 Foo=4
+BM_UserCounter/threads:8 2212 ns 10287 ns 68040 Bar=16 Bat=40 Baz=24 Foo=8
+BM_UserCounter/threads:16 1782 ns 10278 ns 68144 Bar=32 Bat=80 Baz=48 Foo=16
+BM_UserCounter/threads:32 1291 ns 10296 ns 68256 Bar=64 Bat=160 Baz=96 Foo=32
+BM_UserCounter/threads:4 2615 ns 10307 ns 68040 Bar=8 Bat=20 Baz=12 Foo=4
+BM_Factorial 26 ns 26 ns 26608979 40320
+BM_Factorial/real_time 26 ns 26 ns 26587936 40320
+BM_CalculatePiRange/1 16 ns 16 ns 45704255 0
+BM_CalculatePiRange/8 73 ns 73 ns 9520927 3.28374
+BM_CalculatePiRange/64 609 ns 609 ns 1140647 3.15746
+BM_CalculatePiRange/512 4900 ns 4901 ns 142696 3.14355
+```
+
+If this doesn't suit you, you can print each counter as a table column by
+passing the flag `--benchmark_counters_tabular=true` to the benchmark
+application. This is best for cases in which there are a lot of counters, or
+a lot of lines per individual benchmark. Note that this will trigger a
+reprinting of the table header any time the counter set changes between
+individual benchmarks. Here's an example of corresponding output when
+`--benchmark_counters_tabular=true` is passed:
+
+```
+---------------------------------------------------------------------------------------
+Benchmark Time CPU Iterations Bar Bat Baz Foo
+---------------------------------------------------------------------------------------
+BM_UserCounter/threads:8 2198 ns 9953 ns 70688 16 40 24 8
+BM_UserCounter/threads:1 9504 ns 9504 ns 73787 2 5 3 1
+BM_UserCounter/threads:2 4775 ns 9550 ns 72606 4 10 6 2
+BM_UserCounter/threads:4 2508 ns 9951 ns 70332 8 20 12 4
+BM_UserCounter/threads:8 2055 ns 9933 ns 70344 16 40 24 8
+BM_UserCounter/threads:16 1610 ns 9946 ns 70720 32 80 48 16
+BM_UserCounter/threads:32 1192 ns 9948 ns 70496 64 160 96 32
+BM_UserCounter/threads:4 2506 ns 9949 ns 70332 8 20 12 4
+--------------------------------------------------------------
+Benchmark Time CPU Iterations
+--------------------------------------------------------------
+BM_Factorial 26 ns 26 ns 26392245 40320
+BM_Factorial/real_time 26 ns 26 ns 26494107 40320
+BM_CalculatePiRange/1 15 ns 15 ns 45571597 0
+BM_CalculatePiRange/8 74 ns 74 ns 9450212 3.28374
+BM_CalculatePiRange/64 595 ns 595 ns 1173901 3.15746
+BM_CalculatePiRange/512 4752 ns 4752 ns 147380 3.14355
+BM_CalculatePiRange/4k 37970 ns 37972 ns 18453 3.14184
+BM_CalculatePiRange/32k 303733 ns 303744 ns 2305 3.14162
+BM_CalculatePiRange/256k 2434095 ns 2434186 ns 288 3.1416
+BM_CalculatePiRange/1024k 9721140 ns 9721413 ns 71 3.14159
+BM_CalculatePi/threads:8 2255 ns 9943 ns 70936
+```
+
+Note above the additional header printed when the benchmark changes from
+``BM_UserCounter`` to ``BM_Factorial``. This is because ``BM_Factorial`` does
+not have the same counter set as ``BM_UserCounter``.
+
+<a name="multithreaded-benchmarks"/>
+
+## Multithreaded Benchmarks
+
+In a multithreaded test (benchmark invoked by multiple threads simultaneously),
+it is guaranteed that none of the threads will start until all have reached
+the start of the benchmark loop, and all will have finished before any thread
+exits the benchmark loop. (This behavior is also provided by the `KeepRunning()`
+API) As such, any global setup or teardown can be wrapped in a check against the thread
+index:
+
+```c++
+static void BM_MultiThreaded(benchmark::State& state) {
+ if (state.thread_index() == 0) {
+ // Setup code here.
+ }
+ for (auto _ : state) {
+ // Run the test as normal.
+ }
+ if (state.thread_index() == 0) {
+ // Teardown code here.
+ }
+}
+BENCHMARK(BM_MultiThreaded)->Threads(2);
+```
+
+To run the benchmark across a range of thread counts, instead of `Threads`, use
+`ThreadRange`. This takes two parameters (`min_threads` and `max_threads`) and
+runs the benchmark once for values in the inclusive range. For example:
+
+```c++
+BENCHMARK(BM_MultiThreaded)->ThreadRange(1, 8);
+```
+
+will run `BM_MultiThreaded` with thread counts 1, 2, 4, and 8.
+
+If the benchmarked code itself uses threads and you want to compare it to
+single-threaded code, you may want to use real-time ("wallclock") measurements
+for latency comparisons:
+
+```c++
+BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
+```
+
+Without `UseRealTime`, CPU time is used by default.
+
+<a name="cpu-timers" />
+
+## CPU Timers
+
+By default, the CPU timer only measures the time spent by the main thread.
+If the benchmark itself uses threads internally, this measurement may not
+be what you are looking for. Instead, there is a way to measure the total
+CPU usage of the process, by all the threads.
+
+```c++
+void callee(int i);
+
+static void MyMain(int size) {
+#pragma omp parallel for
+ for(int i = 0; i < size; i++)
+ callee(i);
+}
+
+static void BM_OpenMP(benchmark::State& state) {
+ for (auto _ : state)
+ MyMain(state.range(0));
+}
+
+// Measure the time spent by the main thread, use it to decide for how long to
+// run the benchmark loop. Depending on the internal implementation detail may
+// measure to anywhere from near-zero (the overhead spent before/after work
+// handoff to worker thread[s]) to the whole single-thread time.
+BENCHMARK(BM_OpenMP)->Range(8, 8<<10);
+
+// Measure the user-visible time, the wall clock (literally, the time that
+// has passed on the clock on the wall), use it to decide for how long to
+// run the benchmark loop. This will always be meaningful, and will match the
+// time spent by the main thread in single-threaded case, in general decreasing
+// with the number of internal threads doing the work.
+BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->UseRealTime();
+
+// Measure the total CPU consumption, use it to decide for how long to
+// run the benchmark loop. This will always measure to no less than the
+// time spent by the main thread in single-threaded case.
+BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime();
+
+// A mixture of the last two. Measure the total CPU consumption, but use the
+// wall clock to decide for how long to run the benchmark loop.
+BENCHMARK(BM_OpenMP)->Range(8, 8<<10)->MeasureProcessCPUTime()->UseRealTime();
+```
+
+### Controlling Timers
+
+Normally, the entire duration of the work loop (`for (auto _ : state) {}`)
+is measured. But sometimes, it is necessary to do some work inside of
+that loop, every iteration, but without counting that time to the benchmark time.
+That is possible, although it is not recommended, since it has high overhead.
+
+<!-- {% raw %} -->
+```c++
+static void BM_SetInsert_With_Timer_Control(benchmark::State& state) {
+ std::set<int> data;
+ for (auto _ : state) {
+ state.PauseTiming(); // Stop timers. They will not count until they are resumed.
+ data = ConstructRandomSet(state.range(0)); // Do something that should not be measured
+ state.ResumeTiming(); // And resume timers. They are now counting again.
+ // The rest will be measured.
+ for (int j = 0; j < state.range(1); ++j)
+ data.insert(RandomNumber());
+ }
+}
+BENCHMARK(BM_SetInsert_With_Timer_Control)->Ranges({{1<<10, 8<<10}, {128, 512}});
+```
+<!-- {% endraw %} -->
+
+<a name="manual-timing" />
+
+## Manual Timing
+
+For benchmarking something for which neither CPU time nor real-time are
+correct or accurate enough, completely manual timing is supported using
+the `UseManualTime` function.
+
+When `UseManualTime` is used, the benchmarked code must call
+`SetIterationTime` once per iteration of the benchmark loop to
+report the manually measured time.
+
+An example use case for this is benchmarking GPU execution (e.g. OpenCL
+or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
+be accurately measured using CPU time or real-time. Instead, they can be
+measured accurately using a dedicated API, and these measurement results
+can be reported back with `SetIterationTime`.
+
+```c++
+static void BM_ManualTiming(benchmark::State& state) {
+ int microseconds = state.range(0);
+ std::chrono::duration<double, std::micro> sleep_duration {
+ static_cast<double>(microseconds)
+ };
+
+ for (auto _ : state) {
+ auto start = std::chrono::high_resolution_clock::now();
+ // Simulate some useful workload with a sleep
+ std::this_thread::sleep_for(sleep_duration);
+ auto end = std::chrono::high_resolution_clock::now();
+
+ auto elapsed_seconds =
+ std::chrono::duration_cast<std::chrono::duration<double>>(
+ end - start);
+
+ state.SetIterationTime(elapsed_seconds.count());
+ }
+}
+BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
+```
+
+<a name="setting-the-time-unit" />
+
+## Setting the Time Unit
+
+If a benchmark runs a few milliseconds it may be hard to visually compare the
+measured times, since the output data is given in nanoseconds per default. In
+order to manually set the time unit, you can specify it manually:
+
+```c++
+BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
+```
+
+Additionally the default time unit can be set globally with the
+`--benchmark_time_unit={ns|us|ms|s}` command line argument. The argument only
+affects benchmarks where the time unit is not set explicitly.
+
+<a name="preventing-optimization" />
+
+## Preventing Optimization
+
+To prevent a value or expression from being optimized away by the compiler
+the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()`
+functions can be used.
+
+```c++
+static void BM_test(benchmark::State& state) {
+ for (auto _ : state) {
+ int x = 0;
+ for (int i=0; i < 64; ++i) {
+ benchmark::DoNotOptimize(x += i);
+ }
+ }
+}
+```
+
+`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either
+memory or a register. For GNU based compilers it acts as read/write barrier
+for global memory. More specifically it forces the compiler to flush pending
+writes to memory and reload any other values as necessary.
+
+Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>`
+in any way. `<expr>` may even be removed entirely when the result is already
+known. For example:
+
+```c++
+ /* Example 1: `<expr>` is removed entirely. */
+ int foo(int x) { return x + 42; }
+ while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42);
+
+ /* Example 2: Result of '<expr>' is only reused */
+ int bar(int) __attribute__((const));
+ while (...) DoNotOptimize(bar(0)); // Optimized to:
+ // int __result__ = bar(0);
+ // while (...) DoNotOptimize(__result__);
+```
+
+The second tool for preventing optimizations is `ClobberMemory()`. In essence
+`ClobberMemory()` forces the compiler to perform all pending writes to global
+memory. Memory managed by block scope objects must be "escaped" using
+`DoNotOptimize(...)` before it can be clobbered. In the below example
+`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized
+away.
+
+```c++
+static void BM_vector_push_back(benchmark::State& state) {
+ for (auto _ : state) {
+ std::vector<int> v;
+ v.reserve(1);
+ auto data = v.data(); // Allow v.data() to be clobbered. Pass as non-const
+ benchmark::DoNotOptimize(data); // lvalue to avoid undesired compiler optimizations
+ v.push_back(42);
+ benchmark::ClobberMemory(); // Force 42 to be written to memory.
+ }
+}
+```
+
+Note that `ClobberMemory()` is only available for GNU or MSVC based compilers.
+
+<a name="reporting-statistics" />
+
+## Statistics: Reporting the Mean, Median and Standard Deviation / Coefficient of variation of Repeated Benchmarks
+
+By default each benchmark is run once and that single result is reported.
+However benchmarks are often noisy and a single result may not be representative
+of the overall behavior. For this reason it's possible to repeatedly rerun the
+benchmark.
+
+The number of runs of each benchmark is specified globally by the
+`--benchmark_repetitions` flag or on a per benchmark basis by calling
+`Repetitions` on the registered benchmark object. When a benchmark is run more
+than once the mean, median, standard deviation and coefficient of variation
+of the runs will be reported.
+
+Additionally the `--benchmark_report_aggregates_only={true|false}`,
+`--benchmark_display_aggregates_only={true|false}` flags or
+`ReportAggregatesOnly(bool)`, `DisplayAggregatesOnly(bool)` functions can be
+used to change how repeated tests are reported. By default the result of each
+repeated run is reported. When `report aggregates only` option is `true`,
+only the aggregates (i.e. mean, median, standard deviation and coefficient
+of variation, maybe complexity measurements if they were requested) of the runs
+is reported, to both the reporters - standard output (console), and the file.
+However when only the `display aggregates only` option is `true`,
+only the aggregates are displayed in the standard output, while the file
+output still contains everything.
+Calling `ReportAggregatesOnly(bool)` / `DisplayAggregatesOnly(bool)` on a
+registered benchmark object overrides the value of the appropriate flag for that
+benchmark.
+
+<a name="custom-statistics" />
+
+## Custom Statistics
+
+While having these aggregates is nice, this may not be enough for everyone.
+For example you may want to know what the largest observation is, e.g. because
+you have some real-time constraints. This is easy. The following code will
+specify a custom statistic to be calculated, defined by a lambda function.
+
+```c++
+void BM_spin_empty(benchmark::State& state) {
+ for (auto _ : state) {
+ for (int x = 0; x < state.range(0); ++x) {
+ benchmark::DoNotOptimize(x);
+ }
+ }
+}
+
+BENCHMARK(BM_spin_empty)
+ ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
+ return *(std::max_element(std::begin(v), std::end(v)));
+ })
+ ->Arg(512);
+```
+
+While usually the statistics produce values in time units,
+you can also produce percentages:
+
+```c++
+void BM_spin_empty(benchmark::State& state) {
+ for (auto _ : state) {
+ for (int x = 0; x < state.range(0); ++x) {
+ benchmark::DoNotOptimize(x);
+ }
+ }
+}
+
+BENCHMARK(BM_spin_empty)
+ ->ComputeStatistics("ratio", [](const std::vector<double>& v) -> double {
+ return std::begin(v) / std::end(v);
+ }, benchmark::StatisticUnit::kPercentage)
+ ->Arg(512);
+```
+
+<a name="memory-usage" />
+
+## Memory Usage
+
+It's often useful to also track memory usage for benchmarks, alongside CPU
+performance. For this reason, benchmark offers the `RegisterMemoryManager`
+method that allows a custom `MemoryManager` to be injected.
+
+If set, the `MemoryManager::Start` and `MemoryManager::Stop` methods will be
+called at the start and end of benchmark runs to allow user code to fill out
+a report on the number of allocations, bytes used, etc.
+
+This data will then be reported alongside other performance data, currently
+only when using JSON output.
+
+<a name="using-register-benchmark" />
+
+## Using RegisterBenchmark(name, fn, args...)
+
+The `RegisterBenchmark(name, func, args...)` function provides an alternative
+way to create and register benchmarks.
+`RegisterBenchmark(name, func, args...)` creates, registers, and returns a
+pointer to a new benchmark with the specified `name` that invokes
+`func(st, args...)` where `st` is a `benchmark::State` object.
+
+Unlike the `BENCHMARK` registration macros, which can only be used at the global
+scope, the `RegisterBenchmark` can be called anywhere. This allows for
+benchmark tests to be registered programmatically.
+
+Additionally `RegisterBenchmark` allows any callable object to be registered
+as a benchmark. Including capturing lambdas and function objects.
+
+For Example:
+```c++
+auto BM_test = [](benchmark::State& st, auto Inputs) { /* ... */ };
+
+int main(int argc, char** argv) {
+ for (auto& test_input : { /* ... */ })
+ benchmark::RegisterBenchmark(test_input.name(), BM_test, test_input);
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+}
+```
+
+<a name="exiting-with-an-error" />
+
+## Exiting with an Error
+
+When errors caused by external influences, such as file I/O and network
+communication, occur within a benchmark the
+`State::SkipWithError(const std::string& msg)` function can be used to skip that run
+of benchmark and report the error. Note that only future iterations of the
+`KeepRunning()` are skipped. For the ranged-for version of the benchmark loop
+Users must explicitly exit the loop, otherwise all iterations will be performed.
+Users may explicitly return to exit the benchmark immediately.
+
+The `SkipWithError(...)` function may be used at any point within the benchmark,
+including before and after the benchmark loop. Moreover, if `SkipWithError(...)`
+has been used, it is not required to reach the benchmark loop and one may return
+from the benchmark function early.
+
+For example:
+
+```c++
+static void BM_test(benchmark::State& state) {
+ auto resource = GetResource();
+ if (!resource.good()) {
+ state.SkipWithError("Resource is not good!");
+ // KeepRunning() loop will not be entered.
+ }
+ while (state.KeepRunning()) {
+ auto data = resource.read_data();
+ if (!resource.good()) {
+ state.SkipWithError("Failed to read data!");
+ break; // Needed to skip the rest of the iteration.
+ }
+ do_stuff(data);
+ }
+}
+
+static void BM_test_ranged_fo(benchmark::State & state) {
+ auto resource = GetResource();
+ if (!resource.good()) {
+ state.SkipWithError("Resource is not good!");
+ return; // Early return is allowed when SkipWithError() has been used.
+ }
+ for (auto _ : state) {
+ auto data = resource.read_data();
+ if (!resource.good()) {
+ state.SkipWithError("Failed to read data!");
+ break; // REQUIRED to prevent all further iterations.
+ }
+ do_stuff(data);
+ }
+}
+```
+<a name="a-faster-keep-running-loop" />
+
+## A Faster KeepRunning Loop
+
+In C++11 mode, a ranged-based for loop should be used in preference to
+the `KeepRunning` loop for running the benchmarks. For example:
+
+```c++
+static void BM_Fast(benchmark::State &state) {
+ for (auto _ : state) {
+ FastOperation();
+ }
+}
+BENCHMARK(BM_Fast);
+```
+
+The reason the ranged-for loop is faster than using `KeepRunning`, is
+because `KeepRunning` requires a memory load and store of the iteration count
+ever iteration, whereas the ranged-for variant is able to keep the iteration count
+in a register.
+
+For example, an empty inner loop of using the ranged-based for method looks like:
+
+```asm
+# Loop Init
+ mov rbx, qword ptr [r14 + 104]
+ call benchmark::State::StartKeepRunning()
+ test rbx, rbx
+ je .LoopEnd
+.LoopHeader: # =>This Inner Loop Header: Depth=1
+ add rbx, -1
+ jne .LoopHeader
+.LoopEnd:
+```
+
+Compared to an empty `KeepRunning` loop, which looks like:
+
+```asm
+.LoopHeader: # in Loop: Header=BB0_3 Depth=1
+ cmp byte ptr [rbx], 1
+ jne .LoopInit
+.LoopBody: # =>This Inner Loop Header: Depth=1
+ mov rax, qword ptr [rbx + 8]
+ lea rcx, [rax + 1]
+ mov qword ptr [rbx + 8], rcx
+ cmp rax, qword ptr [rbx + 104]
+ jb .LoopHeader
+ jmp .LoopEnd
+.LoopInit:
+ mov rdi, rbx
+ call benchmark::State::StartKeepRunning()
+ jmp .LoopBody
+.LoopEnd:
+```
+
+Unless C++03 compatibility is required, the ranged-for variant of writing
+the benchmark loop should be preferred.
+
+<a name="disabling-cpu-frequency-scaling" />
+
+## Disabling CPU Frequency Scaling
+
+If you see this error:
+
+```
+***WARNING*** CPU scaling is enabled, the benchmark real time measurements may
+be noisy and will incur extra overhead.
+```
+
+you might want to disable the CPU frequency scaling while running the
+benchmark, as well as consider other ways to stabilize the performance of
+your system while benchmarking.
+
+See [Reducing Variance](reducing_variance.md) for more information.
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index f57e3e7..e3857e7 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -34,7 +34,7 @@ static void BM_StringCopy(benchmark::State& state) {
BENCHMARK(BM_StringCopy);
// Augment the main() program to invoke benchmarks if specified
-// via the --benchmarks command line flag. E.g.,
+// via the --benchmark_filter command line flag. E.g.,
// my_unittest --benchmark_filter=all
// my_unittest --benchmark_filter=BM_StringCreation
// my_unittest --benchmark_filter=String
@@ -42,6 +42,7 @@ BENCHMARK(BM_StringCopy);
int main(int argc, char** argv) {
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
return 0;
}
@@ -139,13 +140,13 @@ thread exits the loop body. As such, any global setup or teardown you want to
do can be wrapped in a check against the thread index:
static void BM_MultiThreaded(benchmark::State& state) {
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
// Setup code here.
}
for (auto _ : state) {
// Run the test as normal.
}
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
// Teardown code here.
}
}
@@ -167,19 +168,29 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
#define BENCHMARK_HAS_CXX11
#endif
+// This _MSC_VER check should detect VS 2017 v15.3 and newer.
+#if __cplusplus >= 201703L || \
+ (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
+#define BENCHMARK_HAS_CXX17
+#endif
+
#include <stdint.h>
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <iosfwd>
+#include <limits>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
+#include "benchmark/export.h"
+
#if defined(BENCHMARK_HAS_CXX11)
+#include <atomic>
#include <initializer_list>
#include <type_traits>
#include <utility>
@@ -199,42 +210,63 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
TypeName& operator=(const TypeName&) = delete
#endif
-#if defined(__GNUC__)
+#ifdef BENCHMARK_HAS_CXX17
+#define BENCHMARK_UNUSED [[maybe_unused]]
+#elif defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_UNUSED __attribute__((unused))
-#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
-#elif defined(_MSC_VER) && !defined(__clang__)
+#else
#define BENCHMARK_UNUSED
-#define BENCHMARK_ALWAYS_INLINE __forceinline
-#if _MSC_VER >= 1900
-#define BENCHMARK_NOEXCEPT noexcept
-#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#endif
+
+// Used to annotate functions, methods and classes so they
+// are not optimized by the compiler. Useful for tests
+// where you expect loops to stay in place churning cycles
+#if defined(__clang__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
+#elif defined(__GNUC__) || defined(__GNUG__)
+#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
#else
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
+// MSVC & Intel do not have a no-optimize attribute, only line pragmas
+#define BENCHMARK_DONT_OPTIMIZE
#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
+#elif defined(_MSC_VER) && !defined(__clang__)
+#define BENCHMARK_ALWAYS_INLINE __forceinline
#define __func__ __FUNCTION__
#else
-#define BENCHMARK_UNUSED
#define BENCHMARK_ALWAYS_INLINE
-#define BENCHMARK_NOEXCEPT
-#define BENCHMARK_NOEXCEPT_OP(x)
#endif
#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
-#if defined(__GNUC__) || defined(__clang__)
+// clang-format off
+#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
+#elif defined(__NVCOMPILER)
+#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
+#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
+ _Pragma("diagnostic push") \
+ _Pragma("diag_suppress deprecated_entity_with_custom_message")
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
#else
#define BENCHMARK_BUILTIN_EXPECT(x, y) x
#define BENCHMARK_DEPRECATED_MSG(msg)
#define BENCHMARK_WARNING_MSG(msg) \
__pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
__LINE__) ") : warning note: " msg))
+#define BENCHMARK_DISABLE_DEPRECATED_WARNING
+#define BENCHMARK_RESTORE_DEPRECATED_WARNING
#endif
+// clang-format on
#if defined(__GNUC__) && !defined(__clang__)
#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
@@ -252,21 +284,60 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
#define BENCHMARK_UNREACHABLE() ((void)0)
#endif
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_OVERRIDE override
+#else
+#define BENCHMARK_OVERRIDE
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4251: <symbol> needs to have dll-interface to be used by clients of class
+#pragma warning(disable : 4251)
+#endif
+
namespace benchmark {
class BenchmarkReporter;
-class MemoryManager;
-void Initialize(int* argc, char** argv);
+// Default number of minimum benchmark running time in seconds.
+const char kDefaultMinTimeStr[] = "0.5s";
+
+BENCHMARK_EXPORT void PrintDefaultHelp();
+
+BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
+ void (*HelperPrinterf)() = PrintDefaultHelp);
+BENCHMARK_EXPORT void Shutdown();
// Report to stdout all arguments in 'argv' as unrecognized except the first.
// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
-bool ReportUnrecognizedArguments(int argc, char** argv);
+BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
+
+// Returns the current value of --benchmark_filter.
+BENCHMARK_EXPORT std::string GetBenchmarkFilter();
+
+// Sets a new value to --benchmark_filter. (This will override this flag's
+// current value).
+// Should be called after `benchmark::Initialize()`, as
+// `benchmark::Initialize()` will override the flag's value.
+BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
+
+// Returns the current value of --v (command line value for verbosity).
+BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
+
+// Creates a default display reporter. Used by the library when no display
+// reporter is provided, but also made available for external use in case a
+// custom reporter should respect the `--benchmark_format` flag as a fallback
+BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
// Generate a list of benchmarks matching the specified --benchmark_filter flag
// and if --benchmark_list_tests is specified return after printing the name
// of each matching benchmark. Otherwise run each matching benchmark and
// report the results.
//
+// spec : Specify the benchmarks to run. If users do not specify this arg,
+// then the value of FLAGS_benchmark_filter
+// will be used.
+//
// The second and third overload use the specified 'display_reporter' and
// 'file_reporter' respectively. 'file_reporter' will write to the file
// specified
@@ -274,28 +345,94 @@ bool ReportUnrecognizedArguments(int argc, char** argv);
// 'file_reporter' is ignored.
//
// RETURNS: The number of matching benchmarks.
-size_t RunSpecifiedBenchmarks();
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
-size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
- BenchmarkReporter* file_reporter);
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
+
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
+
+BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
+ BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
+BENCHMARK_EXPORT size_t
+RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
+ BenchmarkReporter* file_reporter, std::string spec);
+
+// TimeUnit is passed to a benchmark in order to specify the order of magnitude
+// for the measured time.
+enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
+
+BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
+
+// Sets the default time unit the benchmarks use
+// Has to be called before the benchmark loop to take effect
+BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
+
+// If a MemoryManager is registered (via RegisterMemoryManager()),
+// it can be used to collect and report allocation metrics for a run of the
+// benchmark.
+class MemoryManager {
+ public:
+ static const int64_t TombstoneValue;
+
+ struct Result {
+ Result()
+ : num_allocs(0),
+ max_bytes_used(0),
+ total_allocated_bytes(TombstoneValue),
+ net_heap_growth(TombstoneValue) {}
+
+ // The number of allocations made in total between Start and Stop.
+ int64_t num_allocs;
+
+ // The peak memory use between Start and Stop.
+ int64_t max_bytes_used;
+
+ // The total memory allocated, in bytes, between Start and Stop.
+ // Init'ed to TombstoneValue if metric not available.
+ int64_t total_allocated_bytes;
+
+ // The net changes in memory, in bytes, between Start and Stop.
+ // ie., total_allocated_bytes - total_deallocated_bytes.
+ // Init'ed to TombstoneValue if metric not available.
+ int64_t net_heap_growth;
+ };
+
+ virtual ~MemoryManager() {}
+
+ // Implement this to start recording allocation information.
+ virtual void Start() = 0;
+
+ // Implement this to stop recording and fill out the given Result structure.
+ virtual void Stop(Result& result) = 0;
+};
// Register a MemoryManager instance that will be used to collect and report
// allocation measurements for benchmark runs.
+BENCHMARK_EXPORT
void RegisterMemoryManager(MemoryManager* memory_manager);
+// Add a key-value pair to output as part of the context stanza in the report.
+BENCHMARK_EXPORT
+void AddCustomContext(const std::string& key, const std::string& value);
+
namespace internal {
class Benchmark;
class BenchmarkImp;
class BenchmarkFamilies;
+BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
+
+BENCHMARK_EXPORT
void UseCharPointer(char const volatile*);
// Take ownership of the pointer and register the benchmark. Return the
// registered benchmark.
-Benchmark* RegisterBenchmarkInternal(Benchmark*);
+BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*);
// Ensure that the standard streams are properly initialized in every TU.
-int InitializeStreams();
+BENCHMARK_EXPORT int InitializeStreams();
BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
} // namespace internal
@@ -305,12 +442,24 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
#endif
+// Force the compiler to flush pending writes to global memory. Acts as an
+// effective read/write barrier
+#ifdef BENCHMARK_HAS_CXX11
+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
+ std::atomic_signal_fence(std::memory_order_acq_rel);
+}
+#endif
+
// The DoNotOptimize(...) function can be used to prevent a value or
// expression from being optimized away by the compiler. This function is
// intended to add little to no overhead.
// See: https://youtu.be/nXaxk27zwlk?t=2441
#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
+#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : : "r,m"(value) : "memory");
}
@@ -324,25 +473,125 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
#endif
}
-// Force the compiler to flush pending writes to global memory. Acts as an
-// effective read/write barrier
+#ifdef BENCHMARK_HAS_CXX11
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+#if defined(__clang__)
+ asm volatile("" : "+r,m"(value) : : "memory");
+#else
+ asm volatile("" : "+m,r"(value) : : "memory");
+#endif
+}
+#endif
+#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5)
+// Workaround for a bug with full argument copy overhead with GCC.
+// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "r,m"(value) : "memory");
+}
+
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "m"(value) : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m,r"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
+ (sizeof(Tp) <= sizeof(Tp*))>::type
+ DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m,r"(value) : : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE
+ typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
+ (sizeof(Tp) > sizeof(Tp*))>::type
+ DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+#else
+// Fallback for GCC < 5. Can add some overhead because the compiler is forced
+// to use memory operations instead of operations with registers.
+// TODO: Remove if GCC < 5 will be unsupported.
+template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
+ asm volatile("" : : "m"(value) : "memory");
+}
+
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+
+#ifdef BENCHMARK_HAS_CXX11
+template <class Tp>
+inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
+ asm volatile("" : "+m"(value) : : "memory");
+}
+#endif
+#endif
+
+#ifndef BENCHMARK_HAS_CXX11
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
asm volatile("" : : : "memory");
}
+#endif
#elif defined(_MSC_VER)
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
_ReadWriteBarrier();
}
+#ifndef BENCHMARK_HAS_CXX11
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
+#endif
#else
template <class Tp>
+BENCHMARK_DEPRECATED_MSG(
+ "The const-ref version of this method can permit "
+ "undesired compiler optimizations in benchmarks")
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
}
-// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
+// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
#endif
// This class is used for user-defined counters.
@@ -352,27 +601,27 @@ class Counter {
kDefaults = 0,
// Mark the counter as a rate. It will be presented divided
// by the duration of the benchmark.
- kIsRate = 1U << 0U,
+ kIsRate = 1 << 0,
// Mark the counter as a thread-average quantity. It will be
// presented divided by the number of threads.
- kAvgThreads = 1U << 1U,
+ kAvgThreads = 1 << 1,
// Mark the counter as a thread-average rate. See above.
kAvgThreadsRate = kIsRate | kAvgThreads,
// Mark the counter as a constant value, valid/same for *every* iteration.
// When reporting, it will be *multiplied* by the iteration count.
- kIsIterationInvariant = 1U << 2U,
+ kIsIterationInvariant = 1 << 2,
// Mark the counter as a constant rate.
// When reporting, it will be *multiplied* by the iteration count
// and then divided by the duration of the benchmark.
kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
// Mark the counter as a iteration-average quantity.
// It will be presented divided by the number of iterations.
- kAvgIterations = 1U << 3U,
+ kAvgIterations = 1 << 3,
// Mark the counter as a iteration-average rate. See above.
kAvgIterationsRate = kIsRate | kAvgIterations,
// In the end, invert the result. This is always done last!
- kInvert = 1U << 31U
+ kInvert = 1 << 31
};
enum OneK {
@@ -390,7 +639,7 @@ class Counter {
Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
: value(v), flags(f), oneK(k) {}
- BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
+ BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; }
BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
};
@@ -405,17 +654,15 @@ Counter::Flags inline operator|(const Counter::Flags& LHS,
// This is the container for the user-defined counters.
typedef std::map<std::string, Counter> UserCounters;
-// TimeUnit is passed to a benchmark in order to specify the order of magnitude
-// for the measured time.
-enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
-
// BigO is passed to a benchmark in order to specify the asymptotic
// computational
// complexity for the benchmark. In case oAuto is selected, complexity will be
// calculated automatically to the best fit.
enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
-typedef uint64_t IterationCount;
+typedef int64_t IterationCount;
+
+enum StatisticUnit { kTime, kPercentage };
// BigOFunc is passed to a benchmark in order to specify the asymptotic
// computational complexity for the benchmark.
@@ -429,14 +676,17 @@ namespace internal {
struct Statistics {
std::string name_;
StatisticsFunc* compute_;
+ StatisticUnit unit_;
- Statistics(const std::string& name, StatisticsFunc* compute)
- : name_(name), compute_(compute) {}
+ Statistics(const std::string& name, StatisticsFunc* compute,
+ StatisticUnit unit = kTime)
+ : name_(name), compute_(compute), unit_(unit) {}
};
-struct BenchmarkInstance;
+class BenchmarkInstance;
class ThreadTimer;
class ThreadManager;
+class PerfCountersMeasurement;
enum AggregationReportMode
#if defined(BENCHMARK_HAS_CXX11)
@@ -458,11 +708,21 @@ enum AggregationReportMode
ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
};
+enum Skipped
+#if defined(BENCHMARK_HAS_CXX11)
+ : unsigned
+#endif
+{
+ NotSkipped = 0,
+ SkippedWithMessage,
+ SkippedWithError
+};
+
} // namespace internal
// State is passed to a running Benchmark and contains state for the
// benchmark to use.
-class State {
+class BENCHMARK_EXPORT State {
public:
struct StateIterator;
friend struct StateIterator;
@@ -494,8 +754,8 @@ class State {
// }
bool KeepRunningBatch(IterationCount n);
- // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Stop the benchmark timer. If not called, the timer will be
// automatically stopped after the last iteration of the benchmark loop.
//
@@ -510,8 +770,8 @@ class State {
// within each benchmark iteration, if possible.
void PauseTiming();
- // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
- // by the current thread.
+ // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
+ // 'SkipWithError(...)' has not been called by the current thread.
// Start the benchmark timer. The timer is NOT running on entrance to the
// benchmark function. It begins running after control flow enters the
// benchmark loop.
@@ -521,8 +781,30 @@ class State {
// within each benchmark iteration, if possible.
void ResumeTiming();
- // REQUIRES: 'SkipWithError(...)' has not been called previously by the
- // current thread.
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
+ // Report the benchmark as resulting in being skipped with the specified
+ // 'msg'.
+ // After this call the user may explicitly 'return' from the benchmark.
+ //
+ // If the ranged-for style of benchmark loop is used, the user must explicitly
+ // break from the loop, otherwise all future iterations will be run.
+ // If the 'KeepRunning()' loop is used the current thread will automatically
+ // exit the loop at the end of the current iteration.
+ //
+ // For threaded benchmarks only the current thread stops executing and future
+ // calls to `KeepRunning()` will block until all threads have completed
+ // the `KeepRunning()` loop. If multiple threads report being skipped only the
+ // first skip message is used.
+ //
+ // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
+ // the current scope immediately. If the function is called from within
+ // the 'KeepRunning()' loop the current iteration will finish. It is the users
+ // responsibility to exit the scope as needed.
+ void SkipWithMessage(const std::string& msg);
+
+ // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
+ // called previously by the current thread.
// Report the benchmark as resulting in an error with the specified 'msg'.
// After this call the user may explicitly 'return' from the benchmark.
//
@@ -540,10 +822,13 @@ class State {
// the current scope immediately. If the function is called from within
// the 'KeepRunning()' loop the current iteration will finish. It is the users
// responsibility to exit the scope as needed.
- void SkipWithError(const char* msg);
+ void SkipWithError(const std::string& msg);
+
+ // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
+ bool skipped() const { return internal::NotSkipped != skipped_; }
// Returns true if an error has been reported with 'SkipWithError(...)'.
- bool error_occurred() const { return error_occurred_; }
+ bool error_occurred() const { return internal::SkippedWithError == skipped_; }
// REQUIRES: called exactly once per iteration of the benchmarking loop.
// Set the manually measured time for this benchmark iteration, which
@@ -614,11 +899,7 @@ class State {
// BM_Compress 50 50 14115038 compress:27.3%
//
// REQUIRES: a benchmark has exited its benchmarking loop.
- void SetLabel(const char* label);
-
- void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
- this->SetLabel(str.c_str());
- }
+ void SetLabel(const std::string& label);
// Range arguments for this run. CHECKs if the argument has been set.
BENCHMARK_ALWAYS_INLINE
@@ -633,6 +914,14 @@ class State {
BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
int64_t range_y() const { return range(1); }
+ // Number of threads concurrently executing the benchmark.
+ BENCHMARK_ALWAYS_INLINE
+ int threads() const { return threads_; }
+
+ // Index of the executing thread. Values from [0, threads).
+ BENCHMARK_ALWAYS_INLINE
+ int thread_index() const { return thread_index_; }
+
BENCHMARK_ALWAYS_INLINE
IterationCount iterations() const {
if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
@@ -641,8 +930,11 @@ class State {
return max_iterations - total_iterations_ + batch_leftover_;
}
- private
- : // items we expect on the first cache line (ie 64 bytes of the struct)
+ BENCHMARK_ALWAYS_INLINE
+ std::string name() const { return name_; }
+
+ private:
+ // items we expect on the first cache line (ie 64 bytes of the struct)
// When total_iterations_ is 0, KeepRunning() and friends will return false.
// May be larger than max_iterations.
IterationCount total_iterations_;
@@ -658,9 +950,9 @@ class State {
private:
bool started_;
bool finished_;
- bool error_occurred_;
+ internal::Skipped skipped_;
- private: // items we don't need on the first cache line
+ // items we don't need on the first cache line
std::vector<int64_t> range_;
int64_t complexity_n_;
@@ -668,25 +960,28 @@ class State {
public:
// Container for user-defined counters.
UserCounters counters;
- // Index of the executing thread. Values from [0, threads).
- const int thread_index;
- // Number of threads concurrently executing the benchmark.
- const int threads;
private:
- State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager);
+ State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement);
void StartKeepRunning();
// Implementation of KeepRunning() and KeepRunningBatch().
// is_batch must be true unless n is 1.
bool KeepRunningInternal(IterationCount n, bool is_batch);
void FinishKeepRunning();
- internal::ThreadTimer* timer_;
- internal::ThreadManager* manager_;
- friend struct internal::BenchmarkInstance;
+ const std::string name_;
+ const int thread_index_;
+ const int threads_;
+
+ internal::ThreadTimer* const timer_;
+ internal::ThreadManager* const manager_;
+ internal::PerfCountersMeasurement* const perf_counters_measurement_;
+
+ friend class internal::BenchmarkInstance;
};
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
@@ -710,7 +1005,7 @@ inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
}
if (!started_) {
StartKeepRunning();
- if (!error_occurred_ && total_iterations_ >= n) {
+ if (!skipped() && total_iterations_ >= n) {
total_iterations_ -= n;
return true;
}
@@ -740,7 +1035,7 @@ struct State::StateIterator {
BENCHMARK_ALWAYS_INLINE
explicit StateIterator(State* st)
- : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
+ : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
public:
BENCHMARK_ALWAYS_INLINE
@@ -783,13 +1078,16 @@ typedef void(Function)(State&);
// be called on this object to change the properties of the benchmark.
// Each method returns "this" so that multiple method calls can
// chained into one expression.
-class Benchmark {
+class BENCHMARK_EXPORT Benchmark {
public:
virtual ~Benchmark();
// Note: the following methods all return "this" so that multiple
// method calls can be chained together in one expression.
+ // Specify the name of the benchmark
+ Benchmark* Name(const std::string& name);
+
// Run this benchmark once with "x" as the extra argument passed
// to the function.
// REQUIRES: The function passed to the constructor must accept an arg1.
@@ -850,6 +1148,23 @@ class Benchmark {
return Ranges(ranges);
}
+ // Have "setup" and/or "teardown" invoked once for every benchmark run.
+ // If the benchmark is multi-threaded (will run in k threads concurrently),
+ // the setup callback will be be invoked exactly once (not k times) before
+ // each run with k threads. Time allowing (e.g. for a short benchmark), there
+ // may be multiple such runs per benchmark, each run with its own
+ // "setup"/"teardown".
+ //
+ // If the benchmark uses different size groups of threads (e.g. via
+ // ThreadRange), the above will be true for each size group.
+ //
+ // The callback will be passed a State object, which includes the number
+ // of threads, thread-index, benchmark arguments, etc.
+ //
+ // The callback must not be NULL or self-deleting.
+ Benchmark* Setup(void (*setup)(const benchmark::State&));
+ Benchmark* Teardown(void (*teardown)(const benchmark::State&));
+
// Pass this benchmark object to *func, which can customize
// the benchmark by calling various methods like Arg, Args,
// Threads, etc.
@@ -864,12 +1179,19 @@ class Benchmark {
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
Benchmark* MinTime(double t);
+ // Set the minimum amount of time to run the benchmark before taking runtimes
+ // of this benchmark into account. This
+ // option overrides the `benchmark_min_warmup_time` flag.
+ // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
+ Benchmark* MinWarmUpTime(double t);
+
// Specify the amount of iterations that should be run by this benchmark.
+ // This option overrides the `benchmark_min_time` flag.
// REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
//
// NOTE: This function should only be used when *exact* iteration control is
// needed and never to control or limit how long a benchmark runs, where
- // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
+ // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
Benchmark* Iterations(IterationCount n);
// Specify the amount of times to repeat this benchmark. This option overrides
@@ -889,7 +1211,7 @@ class Benchmark {
// By default, the CPU time is measured only for the main thread, which may
// be unrepresentative if the benchmark uses threads internally. If called,
// the total CPU time spent by all the threads will be measured instead.
- // By default, the only the main thread CPU time will be measured.
+ // By default, only the main thread CPU time will be measured.
Benchmark* MeasureProcessCPUTime();
// If a particular benchmark should use the Wall clock instead of the CPU time
@@ -918,7 +1240,9 @@ class Benchmark {
Benchmark* Complexity(BigOFunc* complexity);
// Add this statistics to be computed over all the values of benchmark run
- Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
+ Benchmark* ComputeStatistics(const std::string& name,
+ StatisticsFunc* statistics,
+ StatisticUnit unit = kTime);
// Support for running multiple copies of the same benchmark concurrently
// in multiple threads. This may be useful when measuring the scaling
@@ -952,23 +1276,32 @@ class Benchmark {
virtual void Run(State& state) = 0;
+ TimeUnit GetTimeUnit() const;
+
protected:
- explicit Benchmark(const char* name);
- Benchmark(Benchmark const&);
- void SetName(const char* name);
+ explicit Benchmark(const std::string& name);
+ void SetName(const std::string& name);
+ public:
+ const char* GetName() const;
int ArgsCnt() const;
+ const char* GetArgName(int arg) const;
private:
friend class BenchmarkFamilies;
+ friend class BenchmarkInstance;
std::string name_;
AggregationReportMode aggregation_report_mode_;
std::vector<std::string> arg_names_; // Args for all benchmark runs
std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
+
TimeUnit time_unit_;
+ bool use_default_time_unit_;
+
int range_multiplier_;
double min_time_;
+ double min_warmup_time_;
IterationCount iterations_;
int repetitions_;
bool measure_process_cpu_time_;
@@ -979,7 +1312,21 @@ class Benchmark {
std::vector<Statistics> statistics_;
std::vector<int> thread_counts_;
- Benchmark& operator=(Benchmark const&);
+ typedef void (*callback_function)(const benchmark::State&);
+ callback_function setup_;
+ callback_function teardown_;
+
+ Benchmark(Benchmark const&)
+#if defined(BENCHMARK_HAS_CXX11)
+ = delete
+#endif
+ ;
+
+ Benchmark& operator=(Benchmark const&)
+#if defined(BENCHMARK_HAS_CXX11)
+ = delete
+#endif
+ ;
};
} // namespace internal
@@ -988,27 +1335,27 @@ class Benchmark {
// the specified functor 'fn'.
//
// RETURNS: A pointer to the registered benchmark.
-internal::Benchmark* RegisterBenchmark(const char* name,
+internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn);
#if defined(BENCHMARK_HAS_CXX11)
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
#endif
// Remove all registered benchmarks. All pointers to previously registered
// benchmarks are invalidated.
-void ClearRegisteredBenchmarks();
+BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
namespace internal {
// The class used to hold all Benchmarks created from static function.
// (ie those created using the BENCHMARK(...) macros.
-class FunctionBenchmark : public Benchmark {
+class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
public:
- FunctionBenchmark(const char* name, Function* func)
+ FunctionBenchmark(const std::string& name, Function* func)
: Benchmark(name), func_(func) {}
- virtual void Run(State& st);
+ void Run(State& st) BENCHMARK_OVERRIDE;
private:
Function* func_;
@@ -1018,36 +1365,38 @@ class FunctionBenchmark : public Benchmark {
template <class Lambda>
class LambdaBenchmark : public Benchmark {
public:
- virtual void Run(State& st) { lambda_(st); }
+ void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
private:
template <class OLambda>
- LambdaBenchmark(const char* name, OLambda&& lam)
+ LambdaBenchmark(const std::string& name, OLambda&& lam)
: Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
LambdaBenchmark(LambdaBenchmark const&) = delete;
- private:
- template <class Lam>
- friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
+ template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
+ friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
Lambda lambda_;
};
#endif
-
} // namespace internal
-inline internal::Benchmark* RegisterBenchmark(const char* name,
+inline internal::Benchmark* RegisterBenchmark(const std::string& name,
internal::Function* fn) {
+ // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
+ // codechecker_intentional [cplusplus.NewDeleteLeaks]
return internal::RegisterBenchmarkInternal(
::new internal::FunctionBenchmark(name, fn));
}
#ifdef BENCHMARK_HAS_CXX11
template <class Lambda>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
using BenchType =
internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
+ // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
+ // codechecker_intentional [cplusplus.NewDeleteLeaks]
return internal::RegisterBenchmarkInternal(
::new BenchType(name, std::forward<Lambda>(fn)));
}
@@ -1056,7 +1405,7 @@ internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
#if defined(BENCHMARK_HAS_CXX11) && \
(!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
template <class Lambda, class... Args>
-internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
+internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
Args&&... args) {
return benchmark::RegisterBenchmark(
name, [=](benchmark::State& st) { fn(st, args...); });
@@ -1070,7 +1419,7 @@ class Fixture : public internal::Benchmark {
public:
Fixture() : internal::Benchmark("") {}
- virtual void Run(State& st) {
+ void Run(State& st) BENCHMARK_OVERRIDE {
this->SetUp(st);
this->BenchmarkCase(st);
this->TearDown(st);
@@ -1086,7 +1435,6 @@ class Fixture : public internal::Benchmark {
protected:
virtual void BenchmarkCase(State&) = 0;
};
-
} // namespace benchmark
// ------------------------------------------------------
@@ -1102,22 +1450,37 @@ class Fixture : public internal::Benchmark {
#endif
// Helpers for generating unique variable names
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK_PRIVATE_NAME(...) \
+ BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
+ __VA_ARGS__)
+#else
#define BENCHMARK_PRIVATE_NAME(n) \
- BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
+ BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
+#endif // BENCHMARK_HAS_CXX11
+
#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
// Helper for concatenation with macro name expansion
#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
- BaseClass##_##Method##_Benchmark
+ BaseClass##_##Method##_Benchmark
#define BENCHMARK_PRIVATE_DECLARE(n) \
static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
BENCHMARK_UNUSED
+#ifdef BENCHMARK_HAS_CXX11
+#define BENCHMARK(...) \
+ BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
+ (::benchmark::internal::RegisterBenchmarkInternal( \
+ new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
+ __VA_ARGS__)))
+#else
#define BENCHMARK(n) \
BENCHMARK_PRIVATE_DECLARE(n) = \
(::benchmark::internal::RegisterBenchmarkInternal( \
new ::benchmark::internal::FunctionBenchmark(#n, n)))
+#endif // BENCHMARK_HAS_CXX11
// Old-style macros
#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
@@ -1178,49 +1541,49 @@ class Fixture : public internal::Benchmark {
#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
#endif
-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
- class BaseClass##_##Method##_Benchmark : public BaseClass { \
- public: \
- BaseClass##_##Method##_Benchmark() : BaseClass() { \
- this->SetName(#BaseClass "/" #Method); \
- } \
- \
- protected: \
- virtual void BenchmarkCase(::benchmark::State&); \
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
+ class BaseClass##_##Method##_Benchmark : public BaseClass { \
+ public: \
+ BaseClass##_##Method##_Benchmark() { \
+ this->SetName(#BaseClass "/" #Method); \
+ } \
+ \
+ protected: \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
public: \
- BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \
+ BaseClass##_##Method##_Benchmark() { \
this->SetName(#BaseClass "<" #a ">/" #Method); \
} \
\
protected: \
- virtual void BenchmarkCase(::benchmark::State&); \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
public: \
- BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \
+ BaseClass##_##Method##_Benchmark() { \
this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
} \
\
protected: \
- virtual void BenchmarkCase(::benchmark::State&); \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#ifdef BENCHMARK_HAS_CXX11
#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
public: \
- BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \
+ BaseClass##_##Method##_Benchmark() { \
this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
} \
\
protected: \
- virtual void BenchmarkCase(::benchmark::State&); \
+ void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
};
#else
#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
@@ -1282,11 +1645,20 @@ class Fixture : public internal::Benchmark {
#endif
// Helper macro to create a main routine in a test that runs the benchmarks
+// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
#define BENCHMARK_MAIN() \
int main(int argc, char** argv) { \
+ char arg0_default[] = "benchmark"; \
+ char* args_default = arg0_default; \
+ if (!argv) { \
+ argc = 1; \
+ argv = &args_default; \
+ } \
::benchmark::Initialize(&argc, argv); \
if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
::benchmark::RunSpecifiedBenchmarks(); \
+ ::benchmark::Shutdown(); \
+ return 0; \
} \
int main(int, char**)
@@ -1295,7 +1667,7 @@ class Fixture : public internal::Benchmark {
namespace benchmark {
-struct CPUInfo {
+struct BENCHMARK_EXPORT CPUInfo {
struct CacheInfo {
std::string type;
int level;
@@ -1303,16 +1675,12 @@ struct CPUInfo {
int num_sharing;
};
- enum Scaling {
- UNKNOWN,
- ENABLED,
- DISABLED
- };
+ enum Scaling { UNKNOWN, ENABLED, DISABLED };
int num_cpus;
+ Scaling scaling;
double cycles_per_second;
std::vector<CacheInfo> caches;
- Scaling scaling;
std::vector<double> load_avg;
static const CPUInfo& Get();
@@ -1323,7 +1691,7 @@ struct CPUInfo {
};
// Adding Struct for System Information
-struct SystemInfo {
+struct BENCHMARK_EXPORT SystemInfo {
std::string name;
static const SystemInfo& Get();
@@ -1335,10 +1703,11 @@ struct SystemInfo {
// BenchmarkName contains the components of the Benchmark's name
// which allows individual fields to be modified or cleared before
// building the final name using 'str()'.
-struct BenchmarkName {
+struct BENCHMARK_EXPORT BenchmarkName {
std::string function_name;
std::string args;
std::string min_time;
+ std::string min_warmup_time;
std::string iterations;
std::string repetitions;
std::string time_type;
@@ -1354,7 +1723,7 @@ struct BenchmarkName {
// can control the destination of the reports by calling
// RunSpecifiedBenchmarks and passing it a custom reporter object.
// The reporter object must implement the following interface.
-class BenchmarkReporter {
+class BENCHMARK_EXPORT BenchmarkReporter {
public:
struct Context {
CPUInfo const& cpu_info;
@@ -1365,16 +1734,17 @@ class BenchmarkReporter {
Context();
};
- struct Run {
+ struct BENCHMARK_EXPORT Run {
static const int64_t no_repetition_index = -1;
enum RunType { RT_Iteration, RT_Aggregate };
Run()
: run_type(RT_Iteration),
- error_occurred(false),
+ aggregate_unit(kTime),
+ skipped(internal::NotSkipped),
iterations(1),
threads(1),
- time_unit(kNanosecond),
+ time_unit(GetDefaultTimeUnit()),
real_accumulated_time(0),
cpu_accumulated_time(0),
max_heapbytes_used(0),
@@ -1383,18 +1753,19 @@ class BenchmarkReporter {
complexity_n(0),
report_big_o(false),
report_rms(false),
- counters(),
- has_memory_result(false),
- allocs_per_iter(0.0),
- max_bytes_used(0) {}
+ memory_result(NULL),
+ allocs_per_iter(0.0) {}
std::string benchmark_name() const;
BenchmarkName run_name;
+ int64_t family_index;
+ int64_t per_family_instance_index;
RunType run_type;
std::string aggregate_name;
+ StatisticUnit aggregate_unit;
std::string report_label; // Empty if not set by benchmark.
- bool error_occurred;
- std::string error_message;
+ internal::Skipped skipped;
+ std::string skip_message;
IterationCount iterations;
int64_t threads;
@@ -1434,9 +1805,21 @@ class BenchmarkReporter {
UserCounters counters;
// Memory metrics.
- bool has_memory_result;
+ const MemoryManager::Result* memory_result;
double allocs_per_iter;
- int64_t max_bytes_used;
+ };
+
+ struct PerFamilyRunReports {
+ PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
+
+ // How many runs will all instances of this benchmark perform?
+ int num_runs_total;
+
+ // How many runs have happened already?
+ int num_runs_done;
+
+ // The reports about (non-errneous!) runs of this family.
+ std::vector<BenchmarkReporter::Run> Runs;
};
// Construct a BenchmarkReporter with the output stream set to 'std::cout'
@@ -1452,6 +1835,12 @@ class BenchmarkReporter {
virtual bool ReportContext(const Context& context) = 0;
// Called once for each group of benchmark runs, gives information about
+ // the configurations of the runs.
+ virtual void ReportRunsConfig(double /*min_time*/,
+ bool /*has_explicit_iters*/,
+ IterationCount /*iters*/) {}
+
+ // Called once for each group of benchmark runs, gives information about
// cpu-time and heap memory usage during the benchmark run. If the group
// of runs contained more than two entries then 'report' contains additional
// elements representing the mean and standard deviation of those runs.
@@ -1496,7 +1885,7 @@ class BenchmarkReporter {
// Simple reporter that outputs benchmark data to the console. This is the
// default reporter used by RunSpecifiedBenchmarks().
-class ConsoleReporter : public BenchmarkReporter {
+class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
public:
enum OutputOptions {
OO_None = 0,
@@ -1506,13 +1895,10 @@ class ConsoleReporter : public BenchmarkReporter {
OO_Defaults = OO_ColorTabular
};
explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
- : output_options_(opts_),
- name_field_width_(0),
- prev_counters_(),
- printed_header_(false) {}
+ : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
- virtual bool ReportContext(const Context& context);
- virtual void ReportRuns(const std::vector<Run>& reports);
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
protected:
virtual void PrintRunData(const Run& report);
@@ -1524,12 +1910,12 @@ class ConsoleReporter : public BenchmarkReporter {
bool printed_header_;
};
-class JSONReporter : public BenchmarkReporter {
+class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
public:
JSONReporter() : first_report_(true) {}
- virtual bool ReportContext(const Context& context);
- virtual void ReportRuns(const std::vector<Run>& reports);
- virtual void Finalize();
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+ void Finalize() BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1537,13 +1923,13 @@ class JSONReporter : public BenchmarkReporter {
bool first_report_;
};
-class BENCHMARK_DEPRECATED_MSG(
+class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
"The CSV Reporter will be removed in a future release") CSVReporter
: public BenchmarkReporter {
public:
CSVReporter() : printed_header_(false) {}
- virtual bool ReportContext(const Context& context);
- virtual void ReportRuns(const std::vector<Run>& reports);
+ bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
+ void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
private:
void PrintRunData(const Run& report);
@@ -1552,29 +1938,6 @@ class BENCHMARK_DEPRECATED_MSG(
std::set<std::string> user_counter_names_;
};
-// If a MemoryManager is registered, it can be used to collect and report
-// allocation metrics for a run of the benchmark.
-class MemoryManager {
- public:
- struct Result {
- Result() : num_allocs(0), max_bytes_used(0) {}
-
- // The number of allocations made in total between Start and Stop.
- int64_t num_allocs;
-
- // The peak memory use between Start and Stop.
- int64_t max_bytes_used;
- };
-
- virtual ~MemoryManager() {}
-
- // Implement this to start recording allocation information.
- virtual void Start() = 0;
-
- // Implement this to stop recording and fill out the given Result structure.
- virtual void Stop(Result* result) = 0;
-};
-
inline const char* GetTimeUnitString(TimeUnit unit) {
switch (unit) {
case kSecond:
@@ -1603,6 +1966,26 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) {
BENCHMARK_UNREACHABLE();
}
+// Creates a list of integer values for the given range and multiplier.
+// This can be used together with ArgsProduct() to allow multiple ranges
+// with different multipliers.
+// Example:
+// ArgsProduct({
+// CreateRange(0, 1024, /*multi=*/32),
+// CreateRange(0, 100, /*multi=*/4),
+// CreateDenseRange(0, 4, /*step=*/1),
+// });
+BENCHMARK_EXPORT
+std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
+
+// Creates a list of integer values for the given range and step.
+BENCHMARK_EXPORT
+std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
+
} // namespace benchmark
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
#endif // BENCHMARK_BENCHMARK_H_
diff --git a/include/benchmark/export.h b/include/benchmark/export.h
new file mode 100644
index 0000000..f96f859
--- /dev/null
+++ b/include/benchmark/export.h
@@ -0,0 +1,47 @@
+#ifndef BENCHMARK_EXPORT_H
+#define BENCHMARK_EXPORT_H
+
+#if defined(_WIN32)
+#define EXPORT_ATTR __declspec(dllexport)
+#define IMPORT_ATTR __declspec(dllimport)
+#define NO_EXPORT_ATTR
+#define DEPRECATED_ATTR __declspec(deprecated)
+#else // _WIN32
+#define EXPORT_ATTR __attribute__((visibility("default")))
+#define IMPORT_ATTR __attribute__((visibility("default")))
+#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
+#define DEPRECATE_ATTR __attribute__((__deprecated__))
+#endif // _WIN32
+
+#ifdef BENCHMARK_STATIC_DEFINE
+#define BENCHMARK_EXPORT
+#define BENCHMARK_NO_EXPORT
+#else // BENCHMARK_STATIC_DEFINE
+#ifndef BENCHMARK_EXPORT
+#ifdef benchmark_EXPORTS
+/* We are building this library */
+#define BENCHMARK_EXPORT EXPORT_ATTR
+#else // benchmark_EXPORTS
+/* We are using this library */
+#define BENCHMARK_EXPORT IMPORT_ATTR
+#endif // benchmark_EXPORTS
+#endif // !BENCHMARK_EXPORT
+
+#ifndef BENCHMARK_NO_EXPORT
+#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
+#endif // !BENCHMARK_NO_EXPORT
+#endif // BENCHMARK_STATIC_DEFINE
+
+#ifndef BENCHMARK_DEPRECATED
+#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
+#endif // BENCHMARK_DEPRECATED
+
+#ifndef BENCHMARK_DEPRECATED_EXPORT
+#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
+#endif // BENCHMARK_DEPRECATED_EXPORT
+
+#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
+#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
+#endif // BENCHMARK_DEPRECATED_EXPORT
+
+#endif /* BENCHMARK_EXPORT_H */
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fe8770b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,50 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "google_benchmark"
+description = "A library to benchmark code snippets."
+requires-python = ">=3.8"
+license = {file = "LICENSE"}
+keywords = ["benchmark"]
+
+authors = [
+ {name = "Google", email = "benchmark-discuss@googlegroups.com"},
+]
+
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Topic :: Software Development :: Testing",
+ "Topic :: System :: Benchmark",
+]
+
+dynamic = ["readme", "version"]
+
+dependencies = [
+ "absl-py>=0.7.1",
+]
+
+[project.urls]
+Homepage = "https://github.com/google/benchmark"
+Documentation = "https://github.com/google/benchmark/tree/main/docs"
+Repository = "https://github.com/google/benchmark.git"
+Discord = "https://discord.gg/cz7UX7wKC2"
+
+[tool.setuptools]
+package-dir = {"" = "bindings/python"}
+zip-safe = false
+
+[tool.setuptools.packages.find]
+where = ["bindings/python"]
+
+[tool.setuptools.dynamic]
+version = { attr = "google_benchmark.__version__" }
+readme = { file = "README.md", content-type = "text/markdown" }
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 85e8986..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-numpy == 1.19.4
-scipy == 1.5.4
diff --git a/setup.py b/setup.py
index 5cdab10..b02a6a7 100644
--- a/setup.py
+++ b/setup.py
@@ -1,55 +1,50 @@
+import contextlib
import os
-import posixpath
-import re
+import platform
import shutil
-import sys
+import sysconfig
+from pathlib import Path
-from distutils import sysconfig
import setuptools
from setuptools.command import build_ext
-HERE = os.path.dirname(os.path.abspath(__file__))
+PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
+IS_WINDOWS = platform.system() == "Windows"
+IS_MAC = platform.system() == "Darwin"
-IS_WINDOWS = sys.platform.startswith("win")
-
-def _get_version():
- """Parse the version string from __init__.py."""
- with open(
- os.path.join(HERE, "bindings", "python", "google_benchmark", "__init__.py")
- ) as init_file:
+@contextlib.contextmanager
+def temp_fill_include_path(fp: str):
+ """Temporarily set the Python include path in a file."""
+ with open(fp, "r+") as f:
try:
- version_line = next(
- line for line in init_file if line.startswith("__version__")
+ content = f.read()
+ replaced = content.replace(
+ PYTHON_INCLUDE_PATH_PLACEHOLDER,
+ Path(sysconfig.get_paths()['include']).as_posix(),
)
- except StopIteration:
- raise ValueError("__version__ not defined in __init__.py")
- else:
- namespace = {}
- exec(version_line, namespace) # pylint: disable=exec-used
- return namespace["__version__"]
-
-
-def _parse_requirements(path):
- with open(os.path.join(HERE, path)) as requirements:
- return [
- line.rstrip()
- for line in requirements
- if not (line.isspace() or line.startswith("#"))
- ]
+ f.seek(0)
+ f.write(replaced)
+ f.truncate()
+ yield
+ finally:
+ # revert to the original content after exit
+ f.seek(0)
+ f.write(content)
+ f.truncate()
class BazelExtension(setuptools.Extension):
"""A C/C++ extension that is defined as a Bazel BUILD target."""
- def __init__(self, name, bazel_target):
+ def __init__(self, name: str, bazel_target: str):
+ super().__init__(name=name, sources=[])
+
self.bazel_target = bazel_target
- self.relpath, self.target_name = posixpath.relpath(bazel_target, "//").split(
- ":"
- )
- setuptools.Extension.__init__(self, name, sources=[])
+ stripped_target = bazel_target.split("//")[-1]
+ self.relpath, self.target_name = stripped_target.split(":")
class BuildBazelExtension(build_ext.build_ext):
@@ -60,81 +55,59 @@ class BuildBazelExtension(build_ext.build_ext):
self.bazel_build(ext)
build_ext.build_ext.run(self)
- def bazel_build(self, ext):
+ def bazel_build(self, ext: BazelExtension):
"""Runs the bazel build to create the package."""
- with open("WORKSPACE", "r") as workspace:
- workspace_contents = workspace.read()
-
- with open("WORKSPACE", "w") as workspace:
- workspace.write(
- re.sub(
- r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)',
- sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep),
- workspace_contents,
- )
- )
-
- if not os.path.exists(self.build_temp):
- os.makedirs(self.build_temp)
-
- bazel_argv = [
- "bazel",
- "build",
- ext.bazel_target,
- "--symlink_prefix=" + os.path.join(self.build_temp, "bazel-"),
- "--compilation_mode=" + ("dbg" if self.debug else "opt"),
- ]
-
- if IS_WINDOWS:
- # Link with python*.lib.
- for library_dir in self.library_dirs:
- bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
-
- self.spawn(bazel_argv)
-
- shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
- ext_bazel_bin_path = os.path.join(
- self.build_temp, 'bazel-bin',
- ext.relpath, ext.target_name + shared_lib_suffix)
-
- ext_dest_path = self.get_ext_fullpath(ext.name)
- ext_dest_dir = os.path.dirname(ext_dest_path)
- if not os.path.exists(ext_dest_dir):
- os.makedirs(ext_dest_dir)
- shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
+ with temp_fill_include_path("WORKSPACE"):
+ temp_path = Path(self.build_temp)
+
+ bazel_argv = [
+ "bazel",
+ "build",
+ ext.bazel_target,
+ f"--symlink_prefix={temp_path / 'bazel-'}",
+ f"--compilation_mode={'dbg' if self.debug else 'opt'}",
+ # C++17 is required by nanobind
+ f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
+ ]
+
+ if IS_WINDOWS:
+ # Link with python*.lib.
+ for library_dir in self.library_dirs:
+ bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
+ elif IS_MAC:
+ if platform.machine() == "x86_64":
+ # C++17 needs macOS 10.14 at minimum
+ bazel_argv.append("--macos_minimum_os=10.14")
+
+ # cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
+ # ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
+ archflags = os.getenv("ARCHFLAGS", "")
+ if "arm64" in archflags:
+ bazel_argv.append("--cpu=darwin_arm64")
+ bazel_argv.append("--macos_cpus=arm64")
+
+ elif platform.machine() == "arm64":
+ bazel_argv.append("--macos_minimum_os=11.0")
+
+ self.spawn(bazel_argv)
+
+ shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
+ ext_name = ext.target_name + shared_lib_suffix
+ ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
+
+ ext_dest_path = Path(self.get_ext_fullpath(ext.name))
+ shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
+
+ # explicitly call `bazel shutdown` for graceful exit
+ self.spawn(["bazel", "shutdown"])
setuptools.setup(
- name="google_benchmark",
- version=_get_version(),
- url="https://github.com/google/benchmark",
- description="A library to benchmark code snippets.",
- author="Google",
- author_email="benchmark-py@google.com",
- # Contained modules and scripts.
- package_dir={"": "bindings/python"},
- packages=setuptools.find_packages("bindings/python"),
- install_requires=_parse_requirements("bindings/python/requirements.txt"),
cmdclass=dict(build_ext=BuildBazelExtension),
ext_modules=[
BazelExtension(
- "google_benchmark._benchmark",
- "//bindings/python/google_benchmark:_benchmark",
+ name="google_benchmark._benchmark",
+ bazel_target="//bindings/python/google_benchmark:_benchmark",
)
],
- zip_safe=False,
- # PyPI package information.
- classifiers=[
- "Development Status :: 4 - Beta",
- "Intended Audience :: Developers",
- "Intended Audience :: Science/Research",
- "License :: OSI Approved :: Apache Software License",
- "Programming Language :: Python :: 3.6",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Topic :: Software Development :: Testing",
- "Topic :: System :: Benchmark",
- ],
- license="Apache 2.0",
- keywords="benchmark",
)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 35d559e..daf82fb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -25,32 +25,42 @@ set_target_properties(benchmark PROPERTIES
SOVERSION ${GENERIC_LIB_SOVERSION}
)
target_include_directories(benchmark PUBLIC
- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
- )
+ $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
+)
-# Link threads.
-target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-find_library(LIBRT rt)
-if(LIBRT)
- target_link_libraries(benchmark ${LIBRT})
+# libpfm, if available
+if (PFM_FOUND)
+ target_link_libraries(benchmark PRIVATE PFM::libpfm)
+ target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
endif()
-if(CMAKE_BUILD_TYPE)
- string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
-endif()
-if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*")
- message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.")
- target_link_libraries(benchmark -pthread)
+# pthread affinity, if available
+if(HAVE_PTHREAD_AFFINITY)
+ target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
endif()
+# Link threads.
+target_link_libraries(benchmark PRIVATE Threads::Threads)
+
+target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES})
+
+if(HAVE_LIB_RT)
+ target_link_libraries(benchmark PRIVATE rt)
+endif(HAVE_LIB_RT)
+
+
# We need extra libraries on Windows
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
- target_link_libraries(benchmark shlwapi)
+ target_link_libraries(benchmark PRIVATE shlwapi)
endif()
# We need extra libraries on Solaris
if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
- target_link_libraries(benchmark kstat)
+ target_link_libraries(benchmark PRIVATE kstat)
+endif()
+
+if (NOT BUILD_SHARED_LIBS)
+ target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
endif()
# Benchmark main library
@@ -60,34 +70,45 @@ set_target_properties(benchmark_main PROPERTIES
OUTPUT_NAME "benchmark_main"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
+ DEFINE_SYMBOL benchmark_EXPORTS
)
-target_include_directories(benchmark PUBLIC
- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
- )
-target_link_libraries(benchmark_main benchmark::benchmark)
-
+target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
-set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
+set(generated_dir "${PROJECT_BINARY_DIR}")
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
+set(targets_to_export benchmark benchmark_main)
set(targets_export_name "${PROJECT_NAME}Targets")
set(namespace "${PROJECT_NAME}::")
include(CMakePackageConfigHelpers)
+
+configure_package_config_file (
+ ${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in
+ ${project_config}
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+ NO_SET_AND_CHECK_MACRO
+ NO_CHECK_REQUIRED_COMPONENTS_MACRO
+)
write_basic_package_version_file(
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion
)
-configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
+export (
+ TARGETS ${targets_to_export}
+ NAMESPACE "${namespace}"
+ FILE ${generated_dir}/${targets_export_name}.cmake
+)
+
if (BENCHMARK_ENABLE_INSTALL)
# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
install(
- TARGETS benchmark benchmark_main
+ TARGETS ${targets_to_export}
EXPORT ${targets_export_name}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
@@ -96,6 +117,7 @@ if (BENCHMARK_ENABLE_INSTALL)
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
+ "${PROJECT_BINARY_DIR}/include/benchmark"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.*h")
@@ -112,3 +134,37 @@ if (BENCHMARK_ENABLE_INSTALL)
NAMESPACE "${namespace}"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
+
+if (BENCHMARK_ENABLE_DOXYGEN)
+ find_package(Doxygen REQUIRED)
+ set(DOXYGEN_QUIET YES)
+ set(DOXYGEN_RECURSIVE YES)
+ set(DOXYGEN_GENERATE_HTML YES)
+ set(DOXYGEN_GENERATE_MAN NO)
+ set(DOXYGEN_MARKDOWN_SUPPORT YES)
+ set(DOXYGEN_BUILTIN_STL_SUPPORT YES)
+ set(DOXYGEN_EXTRACT_PACKAGE YES)
+ set(DOXYGEN_EXTRACT_STATIC YES)
+ set(DOXYGEN_SHOW_INCLUDE_FILES YES)
+ set(DOXYGEN_BINARY_TOC YES)
+ set(DOXYGEN_TOC_EXPAND YES)
+ set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md")
+ doxygen_add_docs(benchmark_doxygen
+ docs
+ include
+ src
+ ALL
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMENT "Building documentation with Doxygen.")
+ if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
+ install(
+ DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/"
+ DESTINATION ${CMAKE_INSTALL_DOCDIR})
+ endif()
+else()
+ if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
+ install(
+ DIRECTORY "${PROJECT_SOURCE_DIR}/docs/"
+ DESTINATION ${CMAKE_INSTALL_DOCDIR})
+ endif()
+endif()
diff --git a/src/benchmark.cc b/src/benchmark.cc
index 1c049f2..6139e59 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -13,12 +13,13 @@
// limitations under the License.
#include "benchmark/benchmark.h"
+
#include "benchmark_api_internal.h"
#include "benchmark_runner.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -32,7 +33,10 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
+#include <limits>
+#include <map>
#include <memory>
+#include <random>
#include <string>
#include <thread>
#include <utility>
@@ -45,94 +49,146 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
+#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "thread_manager.h"
#include "thread_timer.h"
+namespace benchmark {
// Print a list of benchmarks. This option overrides all other options.
-DEFINE_bool(benchmark_list_tests, false);
+BM_DEFINE_bool(benchmark_list_tests, false);
// A regular expression that specifies the set of benchmarks to execute. If
// this flag is empty, or if this flag is the string \"all\", all benchmarks
// linked into the binary are run.
-DEFINE_string(benchmark_filter, ".");
+BM_DEFINE_string(benchmark_filter, "");
-// Minimum number of seconds we should run benchmark before results are
-// considered significant. For cpu-time based tests, this is the lower bound
+// Specification of how long to run the benchmark.
+//
+// It can be either an exact number of iterations (specified as `<integer>x`),
+// or a minimum number of seconds (specified as `<float>s`). If the latter
+// format (ie., min seconds) is used, the system may run the benchmark longer
+// until the results are considered significant.
+//
+// For backward compatibility, the `s` suffix may be omitted, in which case,
+// the specified number is interpreted as the number of seconds.
+//
+// For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
-DEFINE_double(benchmark_min_time, 0.5);
+BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
+
+// Minimum number of seconds a benchmark should be run before results should be
+// taken into account. This e.g can be necessary for benchmarks of code which
+// needs to fill some form of cache before performance is of interest.
+// Note: results gathered within this period are discarded and not used for
+// reported result.
+BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
// The number of runs of each benchmark. If greater than 1, the mean and
// standard deviation of the runs will be reported.
-DEFINE_int32(benchmark_repetitions, 1);
+BM_DEFINE_int32(benchmark_repetitions, 1);
+
+// If set, enable random interleaving of repetitions of all benchmarks.
+// See http://github.com/google/benchmark/issues/1051 for details.
+BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
// Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for
// repeated benchmarks. Affects all reporters.
-DEFINE_bool(benchmark_report_aggregates_only, false);
+BM_DEFINE_bool(benchmark_report_aggregates_only, false);
// Display the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are displayed for
// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
// the display reporter, but *NOT* file reporter, which will still contain
// all the output.
-DEFINE_bool(benchmark_display_aggregates_only, false);
+BM_DEFINE_bool(benchmark_display_aggregates_only, false);
// The format to use for console output.
// Valid values are 'console', 'json', or 'csv'.
-DEFINE_string(benchmark_format, "console");
+BM_DEFINE_string(benchmark_format, "console");
// The format to use for file output.
// Valid values are 'console', 'json', or 'csv'.
-DEFINE_string(benchmark_out_format, "json");
+BM_DEFINE_string(benchmark_out_format, "json");
// The file to write additional output to.
-DEFINE_string(benchmark_out, "");
+BM_DEFINE_string(benchmark_out, "");
// Whether to use colors in the output. Valid values:
// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
// the output is being sent to a terminal and the TERM environment variable is
// set to a terminal type that supports colors.
-DEFINE_string(benchmark_color, "auto");
+BM_DEFINE_string(benchmark_color, "auto");
// Whether to use tabular format when printing user counters to the console.
// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
-DEFINE_bool(benchmark_counters_tabular, false);
+BM_DEFINE_bool(benchmark_counters_tabular, false);
-// The level of verbose logging to output
-DEFINE_int32(v, 0);
+// List of additional perf counters to collect, in libpfm format. For more
+// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
+BM_DEFINE_string(benchmark_perf_counters, "");
-namespace benchmark {
+// Extra context to include in the output formatted as comma-separated key-value
+// pairs. Kept internal as it's only used for parsing from env/command line.
+BM_DEFINE_kvpairs(benchmark_context, {});
+
+// Set the default time unit to use for reports
+// Valid values are 'ns', 'us', 'ms' or 's'
+BM_DEFINE_string(benchmark_time_unit, "");
+
+// The level of verbose logging to output
+BM_DEFINE_int32(v, 0);
namespace internal {
+std::map<std::string, std::string>* global_context = nullptr;
+
+BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
+ return global_context;
+}
+
// FIXME: wouldn't LTO mess this up?
void UseCharPointer(char const volatile*) {}
} // namespace internal
-State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
- int thread_i, int n_threads, internal::ThreadTimer* timer,
- internal::ThreadManager* manager)
+State::State(std::string name, IterationCount max_iters,
+ const std::vector<int64_t>& ranges, int thread_i, int n_threads,
+ internal::ThreadTimer* timer, internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
started_(false),
finished_(false),
- error_occurred_(false),
+ skipped_(internal::NotSkipped),
range_(ranges),
complexity_n_(0),
- counters(),
- thread_index(thread_i),
- threads(n_threads),
+ name_(std::move(name)),
+ thread_index_(thread_i),
+ threads_(n_threads),
timer_(timer),
- manager_(manager) {
- CHECK(max_iterations != 0) << "At least one iteration must be run";
- CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
+ manager_(manager),
+ perf_counters_measurement_(perf_counters_measurement) {
+ BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
+ BM_CHECK_LT(thread_index_, threads_)
+ << "thread_index must be less than threads";
+
+ // Add counters with correct flag now. If added with `counters[name]` in
+ // `PauseTiming`, a new `Counter` will be inserted the first time, which
+ // won't have the flag. Inserting them now also reduces the allocations
+ // during the benchmark.
+ if (perf_counters_measurement_) {
+ for (const std::string& counter_name :
+ perf_counters_measurement_->names()) {
+ counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
+ }
+ }
// Note: The use of offsetof below is technically undefined until C++17
// because State is not a standard layout type. However, all compilers
@@ -147,37 +203,78 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
#endif
+#if defined(__NVCC__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 1427
+#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic push
+#pragma diag_suppress offset_in_non_POD_nonstandard
+#endif
// Offset tests to ensure commonly accessed data is on the first cache line.
const int cache_line_size = 64;
- static_assert(offsetof(State, error_occurred_) <=
- (cache_line_size - sizeof(error_occurred_)),
- "");
+ static_assert(
+ offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
#if defined(__INTEL_COMPILER)
#pragma warning pop
#elif defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
+#if defined(__NVCC__)
+#pragma nv_diagnostic pop
+#endif
+#if defined(__NVCOMPILER)
+#pragma diagnostic pop
+#endif
}
void State::PauseTiming() {
// Add in time accumulated so far
- CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StopTimer();
+ if (perf_counters_measurement_) {
+ std::vector<std::pair<std::string, double>> measurements;
+ if (!perf_counters_measurement_->Stop(measurements)) {
+ BM_CHECK(false) << "Perf counters read the value failed.";
+ }
+ for (const auto& name_and_measurement : measurements) {
+ const std::string& name = name_and_measurement.first;
+ const double measurement = name_and_measurement.second;
+ // Counter was inserted with `kAvgIterations` flag by the constructor.
+ assert(counters.find(name) != counters.end());
+ counters[name].value += measurement;
+ }
+ }
}
void State::ResumeTiming() {
- CHECK(started_ && !finished_ && !error_occurred_);
+ BM_CHECK(started_ && !finished_ && !skipped());
timer_->StartTimer();
+ if (perf_counters_measurement_) {
+ perf_counters_measurement_->Start();
+ }
}
-void State::SkipWithError(const char* msg) {
- CHECK(msg);
- error_occurred_ = true;
+void State::SkipWithMessage(const std::string& msg) {
+ skipped_ = internal::SkippedWithMessage;
{
MutexLock l(manager_->GetBenchmarkMutex());
- if (manager_->results.has_error_ == false) {
- manager_->results.error_message_ = msg;
- manager_->results.has_error_ = true;
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
+ }
+ }
+ total_iterations_ = 0;
+ if (timer_->running()) timer_->StopTimer();
+}
+
+void State::SkipWithError(const std::string& msg) {
+ skipped_ = internal::SkippedWithError;
+ {
+ MutexLock l(manager_->GetBenchmarkMutex());
+ if (internal::NotSkipped == manager_->results.skipped_) {
+ manager_->results.skip_message_ = msg;
+ manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
@@ -188,22 +285,22 @@ void State::SetIterationTime(double seconds) {
timer_->SetIterationTime(seconds);
}
-void State::SetLabel(const char* label) {
+void State::SetLabel(const std::string& label) {
MutexLock l(manager_->GetBenchmarkMutex());
manager_->results.report_label_ = label;
}
void State::StartKeepRunning() {
- CHECK(!started_ && !finished_);
+ BM_CHECK(!started_ && !finished_);
started_ = true;
- total_iterations_ = error_occurred_ ? 0 : max_iterations;
+ total_iterations_ = skipped() ? 0 : max_iterations;
manager_->StartStopBarrier();
- if (!error_occurred_) ResumeTiming();
+ if (!skipped()) ResumeTiming();
}
void State::FinishKeepRunning() {
- CHECK(started_ && (!finished_ || error_occurred_));
- if (!error_occurred_) {
+ BM_CHECK(started_ && (!finished_ || skipped()));
+ if (!skipped()) {
PauseTiming();
}
// Total iterations has now wrapped around past 0. Fix this.
@@ -215,11 +312,42 @@ void State::FinishKeepRunning() {
namespace internal {
namespace {
+// Flushes streams after invoking reporter methods that write to them. This
+// ensures users get timely updates even when streams are not line-buffered.
+void FlushStreams(BenchmarkReporter* reporter) {
+ if (!reporter) return;
+ std::flush(reporter->GetOutputStream());
+ std::flush(reporter->GetErrorStream());
+}
+
+// Reports in both display and file reporters.
+void Report(BenchmarkReporter* display_reporter,
+ BenchmarkReporter* file_reporter, const RunResults& run_results) {
+ auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
+ const RunResults& results) {
+ assert(reporter);
+ // If there are no aggregates, do output non-aggregates.
+ aggregates_only &= !results.aggregates_only.empty();
+ if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
+ if (!results.aggregates_only.empty())
+ reporter->ReportRuns(results.aggregates_only);
+ };
+
+ report_one(display_reporter, run_results.display_report_aggregates_only,
+ run_results);
+ if (file_reporter)
+ report_one(file_reporter, run_results.file_report_aggregates_only,
+ run_results);
+
+ FlushStreams(display_reporter);
+ FlushStreams(file_reporter);
+}
+
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
// Note the file_reporter can be null.
- CHECK(display_reporter != nullptr);
+ BM_CHECK(display_reporter != nullptr);
// Determine the width of the name field using a minimum width of 10.
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
@@ -227,10 +355,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
size_t stat_field_width = 0;
for (const BenchmarkInstance& benchmark : benchmarks) {
name_field_width =
- std::max<size_t>(name_field_width, benchmark.name.str().size());
- might_have_aggregates |= benchmark.repetitions > 1;
+ std::max<size_t>(name_field_width, benchmark.name().str().size());
+ might_have_aggregates |= benchmark.repetitions() > 1;
- for (const auto& Stat : *benchmark.statistics)
+ for (const auto& Stat : benchmark.statistics())
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
}
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
@@ -239,75 +367,129 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
BenchmarkReporter::Context context;
context.name_field_width = name_field_width;
- // Keep track of running times of all instances of current benchmark
- std::vector<BenchmarkReporter::Run> complexity_reports;
-
- // We flush streams after invoking reporter methods that write to them. This
- // ensures users get timely updates even when streams are not line-buffered.
- auto flushStreams = [](BenchmarkReporter* reporter) {
- if (!reporter) return;
- std::flush(reporter->GetOutputStream());
- std::flush(reporter->GetErrorStream());
- };
+ // Keep track of running times of all instances of each benchmark family.
+ std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
+ per_family_reports;
if (display_reporter->ReportContext(context) &&
(!file_reporter || file_reporter->ReportContext(context))) {
- flushStreams(display_reporter);
- flushStreams(file_reporter);
-
- for (const auto& benchmark : benchmarks) {
- RunResults run_results = RunBenchmark(benchmark, &complexity_reports);
-
- auto report = [&run_results](BenchmarkReporter* reporter,
- bool report_aggregates_only) {
- assert(reporter);
- // If there are no aggregates, do output non-aggregates.
- report_aggregates_only &= !run_results.aggregates_only.empty();
- if (!report_aggregates_only)
- reporter->ReportRuns(run_results.non_aggregates);
- if (!run_results.aggregates_only.empty())
- reporter->ReportRuns(run_results.aggregates_only);
- };
-
- report(display_reporter, run_results.display_report_aggregates_only);
- if (file_reporter)
- report(file_reporter, run_results.file_report_aggregates_only);
+ FlushStreams(display_reporter);
+ FlushStreams(file_reporter);
+
+ size_t num_repetitions_total = 0;
+
+ // This perfcounters object needs to be created before the runners vector
+ // below so it outlasts their lifetime.
+ PerfCountersMeasurement perfcounters(
+ StrSplit(FLAGS_benchmark_perf_counters, ','));
+
+ // Vector of benchmarks to run
+ std::vector<internal::BenchmarkRunner> runners;
+ runners.reserve(benchmarks.size());
+
+ // Count the number of benchmarks with threads to warn the user in case
+ // performance counters are used.
+ int benchmarks_with_threads = 0;
+
+ // Loop through all benchmarks
+ for (const BenchmarkInstance& benchmark : benchmarks) {
+ BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
+ if (benchmark.complexity() != oNone)
+ reports_for_family = &per_family_reports[benchmark.family_index()];
+ benchmarks_with_threads += (benchmark.threads() > 1);
+ runners.emplace_back(benchmark, &perfcounters, reports_for_family);
+ int num_repeats_of_this_instance = runners.back().GetNumRepeats();
+ num_repetitions_total += num_repeats_of_this_instance;
+ if (reports_for_family)
+ reports_for_family->num_runs_total += num_repeats_of_this_instance;
+ }
+ assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
+
+ // The use of performance counters with threads would be unintuitive for
+ // the average user so we need to warn them about this case
+ if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
+ GetErrorLogInstance()
+ << "***WARNING*** There are " << benchmarks_with_threads
+ << " benchmarks with threads and " << perfcounters.num_counters()
+ << " performance counters were requested. Beware counters will "
+ "reflect the combined usage across all "
+ "threads.\n";
+ }
+
+ std::vector<size_t> repetition_indices;
+ repetition_indices.reserve(num_repetitions_total);
+ for (size_t runner_index = 0, num_runners = runners.size();
+ runner_index != num_runners; ++runner_index) {
+ const internal::BenchmarkRunner& runner = runners[runner_index];
+ std::fill_n(std::back_inserter(repetition_indices),
+ runner.GetNumRepeats(), runner_index);
+ }
+ assert(repetition_indices.size() == num_repetitions_total &&
+ "Unexpected number of repetition indexes.");
+
+ if (FLAGS_benchmark_enable_random_interleaving) {
+ std::random_device rd;
+ std::mt19937 g(rd());
+ std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
+ }
+
+ for (size_t repetition_index : repetition_indices) {
+ internal::BenchmarkRunner& runner = runners[repetition_index];
+ runner.DoOneRepetition();
+ if (runner.HasRepeatsRemaining()) continue;
+ // FIXME: report each repetition separately, not all of them in bulk.
- flushStreams(display_reporter);
- flushStreams(file_reporter);
+ display_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+ if (file_reporter)
+ file_reporter->ReportRunsConfig(
+ runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+
+ RunResults run_results = runner.GetResults();
+
+ // Maybe calculate complexity report
+ if (const auto* reports_for_family = runner.GetReportsForFamily()) {
+ if (reports_for_family->num_runs_done ==
+ reports_for_family->num_runs_total) {
+ auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
+ run_results.aggregates_only.insert(run_results.aggregates_only.end(),
+ additional_run_stats.begin(),
+ additional_run_stats.end());
+ per_family_reports.erase(
+ static_cast<int>(reports_for_family->Runs.front().family_index));
+ }
+ }
+
+ Report(display_reporter, file_reporter, run_results);
}
}
display_reporter->Finalize();
if (file_reporter) file_reporter->Finalize();
- flushStreams(display_reporter);
- flushStreams(file_reporter);
+ FlushStreams(display_reporter);
+ FlushStreams(file_reporter);
}
// Disable deprecated warnings temporarily because we need to reference
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
std::unique_ptr<BenchmarkReporter> CreateReporter(
std::string const& name, ConsoleReporter::OutputOptions output_opts) {
typedef std::unique_ptr<BenchmarkReporter> PtrType;
if (name == "console") {
return PtrType(new ConsoleReporter(output_opts));
- } else if (name == "json") {
- return PtrType(new JSONReporter);
- } else if (name == "csv") {
- return PtrType(new CSVReporter);
- } else {
- std::cerr << "Unexpected format: '" << name << "'\n";
- std::exit(1);
}
+ if (name == "json") {
+ return PtrType(new JSONReporter());
+ }
+ if (name == "csv") {
+ return PtrType(new CSVReporter());
+ }
+ std::cerr << "Unexpected format: '" << name << "'\n";
+ std::exit(1);
}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
+BENCHMARK_RESTORE_DEPRECATED_WARNING
} // end namespace
@@ -341,17 +523,41 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
} // end namespace internal
+BenchmarkReporter* CreateDefaultDisplayReporter() {
+ static auto default_display_reporter =
+ internal::CreateReporter(FLAGS_benchmark_format,
+ internal::GetOutputOptions())
+ .release();
+ return default_display_reporter;
+}
+
size_t RunSpecifiedBenchmarks() {
- return RunSpecifiedBenchmarks(nullptr, nullptr);
+ return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
+}
+
+size_t RunSpecifiedBenchmarks(std::string spec) {
+ return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
- return RunSpecifiedBenchmarks(display_reporter, nullptr);
+ return RunSpecifiedBenchmarks(display_reporter, nullptr,
+ FLAGS_benchmark_filter);
+}
+
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
+ std::string spec) {
+ return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
- std::string spec = FLAGS_benchmark_filter;
+ return RunSpecifiedBenchmarks(display_reporter, file_reporter,
+ FLAGS_benchmark_filter);
+}
+
+size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
+ BenchmarkReporter* file_reporter,
+ std::string spec) {
if (spec.empty() || spec == "all")
spec = "."; // Regexp that matches all benchmarks
@@ -360,8 +566,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
std::unique_ptr<BenchmarkReporter> default_display_reporter;
std::unique_ptr<BenchmarkReporter> default_file_reporter;
if (!display_reporter) {
- default_display_reporter = internal::CreateReporter(
- FLAGS_benchmark_format, internal::GetOutputOptions());
+ default_display_reporter.reset(CreateDefaultDisplayReporter());
display_reporter = default_display_reporter.get();
}
auto& Out = display_reporter->GetOutputStream();
@@ -377,12 +582,14 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
if (!fname.empty()) {
output_file.open(fname);
if (!output_file.is_open()) {
- Err << "invalid file name: '" << fname << std::endl;
+ Err << "invalid file name: '" << fname << "'" << std::endl;
std::exit(1);
}
if (!file_reporter) {
default_file_reporter = internal::CreateReporter(
- FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
+ FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
+ ? ConsoleReporter::OO_Tabular
+ : ConsoleReporter::OO_None);
file_reporter = default_file_reporter.get();
}
file_reporter->SetOutputStream(&output_file);
@@ -399,7 +606,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
if (FLAGS_benchmark_list_tests) {
for (auto const& benchmark : benchmarks)
- Out << benchmark.name.str() << "\n";
+ Out << benchmark.name().str() << "\n";
} else {
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
}
@@ -407,30 +614,64 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
return benchmarks.size();
}
+namespace {
+// stores the time unit benchmarks use by default
+TimeUnit default_time_unit = kNanosecond;
+} // namespace
+
+TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
+
+void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
+
+std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
+
+void SetBenchmarkFilter(std::string value) {
+ FLAGS_benchmark_filter = std::move(value);
+}
+
+int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
+
void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
+void AddCustomContext(const std::string& key, const std::string& value) {
+ if (internal::global_context == nullptr) {
+ internal::global_context = new std::map<std::string, std::string>();
+ }
+ if (!internal::global_context->emplace(key, value).second) {
+ std::cerr << "Failed to add custom context \"" << key << "\" as it already "
+ << "exists with value \"" << value << "\"\n";
+ }
+}
+
namespace internal {
+void (*HelperPrintf)();
+
void PrintUsageAndExit() {
- fprintf(stdout,
- "benchmark"
- " [--benchmark_list_tests={true|false}]\n"
- " [--benchmark_filter=<regex>]\n"
- " [--benchmark_min_time=<min_time>]\n"
- " [--benchmark_repetitions=<num_repetitions>]\n"
- " [--benchmark_report_aggregates_only={true|false}]\n"
- " [--benchmark_display_aggregates_only={true|false}]\n"
- " [--benchmark_format=<console|json|csv>]\n"
- " [--benchmark_out=<filename>]\n"
- " [--benchmark_out_format=<json|console|csv>]\n"
- " [--benchmark_color={auto|true|false}]\n"
- " [--benchmark_counters_tabular={true|false}]\n"
- " [--v=<verbosity>]\n");
+ HelperPrintf();
exit(0);
}
+void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
+ if (time_unit_flag == "s") {
+ return SetDefaultTimeUnit(kSecond);
+ }
+ if (time_unit_flag == "ms") {
+ return SetDefaultTimeUnit(kMillisecond);
+ }
+ if (time_unit_flag == "us") {
+ return SetDefaultTimeUnit(kMicrosecond);
+ }
+ if (time_unit_flag == "ns") {
+ return SetDefaultTimeUnit(kNanosecond);
+ }
+ if (!time_unit_flag.empty()) {
+ PrintUsageAndExit();
+ }
+}
+
void ParseCommandLineFlags(int* argc, char** argv) {
using namespace benchmark;
BenchmarkReporter::Context::executable_name =
@@ -439,10 +680,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
&FLAGS_benchmark_list_tests) ||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
- ParseDoubleFlag(argv[i], "benchmark_min_time",
+ ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
+ ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
+ &FLAGS_benchmark_min_warmup_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
+ ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
+ &FLAGS_benchmark_enable_random_interleaving) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
@@ -452,11 +697,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
ParseStringFlag(argv[i], "benchmark_out_format",
&FLAGS_benchmark_out_format) ||
ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
- // "color_print" is the deprecated name for "benchmark_color".
- // TODO: Remove this.
- ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
&FLAGS_benchmark_counters_tabular) ||
+ ParseStringFlag(argv[i], "benchmark_perf_counters",
+ &FLAGS_benchmark_perf_counters) ||
+ ParseKeyValueFlag(argv[i], "benchmark_context",
+ &FLAGS_benchmark_context) ||
+ ParseStringFlag(argv[i], "benchmark_time_unit",
+ &FLAGS_benchmark_time_unit) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
@@ -467,13 +715,18 @@ void ParseCommandLineFlags(int* argc, char** argv) {
}
}
for (auto const* flag :
- {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
+ {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
if (*flag != "console" && *flag != "json" && *flag != "csv") {
PrintUsageAndExit();
}
+ }
+ SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
if (FLAGS_benchmark_color.empty()) {
PrintUsageAndExit();
}
+ for (const auto& kv : FLAGS_benchmark_context) {
+ AddCustomContext(kv.first, kv.second);
+ }
}
int InitializeStreams() {
@@ -483,11 +736,38 @@ int InitializeStreams() {
} // end namespace internal
-void Initialize(int* argc, char** argv) {
+void PrintDefaultHelp() {
+ fprintf(stdout,
+ "benchmark"
+ " [--benchmark_list_tests={true|false}]\n"
+ " [--benchmark_filter=<regex>]\n"
+ " [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
+ " [--benchmark_min_warmup_time=<min_warmup_time>]\n"
+ " [--benchmark_repetitions=<num_repetitions>]\n"
+ " [--benchmark_enable_random_interleaving={true|false}]\n"
+ " [--benchmark_report_aggregates_only={true|false}]\n"
+ " [--benchmark_display_aggregates_only={true|false}]\n"
+ " [--benchmark_format=<console|json|csv>]\n"
+ " [--benchmark_out=<filename>]\n"
+ " [--benchmark_out_format=<json|console|csv>]\n"
+ " [--benchmark_color={auto|true|false}]\n"
+ " [--benchmark_counters_tabular={true|false}]\n"
+#if defined HAVE_LIBPFM
+ " [--benchmark_perf_counters=<counter>,...]\n"
+#endif
+ " [--benchmark_context=<key>=<value>,...]\n"
+ " [--benchmark_time_unit={ns|us|ms|s}]\n"
+ " [--v=<verbosity>]\n");
+}
+
+void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
+ internal::HelperPrintf = HelperPrintf;
internal::ParseCommandLineFlags(argc, argv);
internal::LogLevel() = FLAGS_v;
}
+void Shutdown() { delete internal::global_context; }
+
bool ReportUnrecognizedArguments(int argc, char** argv) {
for (int i = 1; i < argc; ++i) {
fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index d468a25..286f986 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -1,15 +1,118 @@
#include "benchmark_api_internal.h"
+#include <cinttypes>
+
+#include "string_util.h"
+
namespace benchmark {
namespace internal {
-State BenchmarkInstance::Run(IterationCount iters, int thread_id,
- internal::ThreadTimer* timer,
- internal::ThreadManager* manager) const {
- State st(iters, arg, thread_id, threads, timer, manager);
- benchmark->Run(st);
+BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
+ int per_family_instance_idx,
+ const std::vector<int64_t>& args,
+ int thread_count)
+ : benchmark_(*benchmark),
+ family_index_(family_idx),
+ per_family_instance_index_(per_family_instance_idx),
+ aggregation_report_mode_(benchmark_.aggregation_report_mode_),
+ args_(args),
+ time_unit_(benchmark_.GetTimeUnit()),
+ measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
+ use_real_time_(benchmark_.use_real_time_),
+ use_manual_time_(benchmark_.use_manual_time_),
+ complexity_(benchmark_.complexity_),
+ complexity_lambda_(benchmark_.complexity_lambda_),
+ statistics_(benchmark_.statistics_),
+ repetitions_(benchmark_.repetitions_),
+ min_time_(benchmark_.min_time_),
+ min_warmup_time_(benchmark_.min_warmup_time_),
+ iterations_(benchmark_.iterations_),
+ threads_(thread_count) {
+ name_.function_name = benchmark_.name_;
+
+ size_t arg_i = 0;
+ for (const auto& arg : args) {
+ if (!name_.args.empty()) {
+ name_.args += '/';
+ }
+
+ if (arg_i < benchmark->arg_names_.size()) {
+ const auto& arg_name = benchmark_.arg_names_[arg_i];
+ if (!arg_name.empty()) {
+ name_.args += StrFormat("%s:", arg_name.c_str());
+ }
+ }
+
+ name_.args += StrFormat("%" PRId64, arg);
+ ++arg_i;
+ }
+
+ if (!IsZero(benchmark->min_time_)) {
+ name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
+ }
+
+ if (!IsZero(benchmark->min_warmup_time_)) {
+ name_.min_warmup_time =
+ StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
+ }
+
+ if (benchmark_.iterations_ != 0) {
+ name_.iterations = StrFormat(
+ "iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
+ }
+
+ if (benchmark_.repetitions_ != 0) {
+ name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
+ }
+
+ if (benchmark_.measure_process_cpu_time_) {
+ name_.time_type = "process_time";
+ }
+
+ if (benchmark_.use_manual_time_) {
+ if (!name_.time_type.empty()) {
+ name_.time_type += '/';
+ }
+ name_.time_type += "manual_time";
+ } else if (benchmark_.use_real_time_) {
+ if (!name_.time_type.empty()) {
+ name_.time_type += '/';
+ }
+ name_.time_type += "real_time";
+ }
+
+ if (!benchmark_.thread_counts_.empty()) {
+ name_.threads = StrFormat("threads:%d", threads_);
+ }
+
+ setup_ = benchmark_.setup_;
+ teardown_ = benchmark_.teardown_;
+}
+
+State BenchmarkInstance::Run(
+ IterationCount iters, int thread_id, internal::ThreadTimer* timer,
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement) const {
+ State st(name_.function_name, iters, args_, thread_id, threads_, timer,
+ manager, perf_counters_measurement);
+ benchmark_.Run(st);
return st;
}
-} // internal
-} // benchmark
+void BenchmarkInstance::Setup() const {
+ if (setup_) {
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
+ setup_(st);
+ }
+}
+
+void BenchmarkInstance::Teardown() const {
+ if (teardown_) {
+ State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
+ nullptr, nullptr, nullptr);
+ teardown_(st);
+ }
+}
+} // namespace internal
+} // namespace benchmark
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 264eff9..94f5165 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -1,9 +1,6 @@
#ifndef BENCHMARK_API_INTERNAL_H
#define BENCHMARK_API_INTERNAL_H
-#include "benchmark/benchmark.h"
-#include "commandlineflags.h"
-
#include <cmath>
#include <iosfwd>
#include <limits>
@@ -11,32 +8,68 @@
#include <string>
#include <vector>
+#include "benchmark/benchmark.h"
+#include "commandlineflags.h"
+
namespace benchmark {
namespace internal {
// Information kept per benchmark we may want to run
-struct BenchmarkInstance {
- BenchmarkName name;
- Benchmark* benchmark;
- AggregationReportMode aggregation_report_mode;
- std::vector<int64_t> arg;
- TimeUnit time_unit;
- int range_multiplier;
- bool measure_process_cpu_time;
- bool use_real_time;
- bool use_manual_time;
- BigO complexity;
- BigOFunc* complexity_lambda;
- UserCounters counters;
- const std::vector<Statistics>* statistics;
- bool last_benchmark_instance;
- int repetitions;
- double min_time;
- IterationCount iterations;
- int threads; // Number of concurrent threads to us
+class BenchmarkInstance {
+ public:
+ BenchmarkInstance(Benchmark* benchmark, int family_index,
+ int per_family_instance_index,
+ const std::vector<int64_t>& args, int threads);
+
+ const BenchmarkName& name() const { return name_; }
+ int family_index() const { return family_index_; }
+ int per_family_instance_index() const { return per_family_instance_index_; }
+ AggregationReportMode aggregation_report_mode() const {
+ return aggregation_report_mode_;
+ }
+ TimeUnit time_unit() const { return time_unit_; }
+ bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
+ bool use_real_time() const { return use_real_time_; }
+ bool use_manual_time() const { return use_manual_time_; }
+ BigO complexity() const { return complexity_; }
+ BigOFunc* complexity_lambda() const { return complexity_lambda_; }
+ const std::vector<Statistics>& statistics() const { return statistics_; }
+ int repetitions() const { return repetitions_; }
+ double min_time() const { return min_time_; }
+ double min_warmup_time() const { return min_warmup_time_; }
+ IterationCount iterations() const { return iterations_; }
+ int threads() const { return threads_; }
+ void Setup() const;
+ void Teardown() const;
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
- internal::ThreadManager* manager) const;
+ internal::ThreadManager* manager,
+ internal::PerfCountersMeasurement* perf_counters_measurement) const;
+
+ private:
+ BenchmarkName name_;
+ Benchmark& benchmark_;
+ const int family_index_;
+ const int per_family_instance_index_;
+ AggregationReportMode aggregation_report_mode_;
+ const std::vector<int64_t>& args_;
+ TimeUnit time_unit_;
+ bool measure_process_cpu_time_;
+ bool use_real_time_;
+ bool use_manual_time_;
+ BigO complexity_;
+ BigOFunc* complexity_lambda_;
+ UserCounters counters_;
+ const std::vector<Statistics>& statistics_;
+ int repetitions_;
+ double min_time_;
+ double min_warmup_time_;
+ IterationCount iterations_;
+ int threads_; // Number of concurrent threads to us
+
+ typedef void (*callback_function)(const benchmark::State&);
+ callback_function setup_ = nullptr;
+ callback_function teardown_ = nullptr;
};
bool FindBenchmarksInternal(const std::string& re,
@@ -45,6 +78,7 @@ bool FindBenchmarksInternal(const std::string& re,
bool IsZero(double n);
+BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal
diff --git a/src/benchmark_main.cc b/src/benchmark_main.cc
index b3b2478..cd61cd2 100644
--- a/src/benchmark_main.cc
+++ b/src/benchmark_main.cc
@@ -14,4 +14,5 @@
#include "benchmark/benchmark.h"
+BENCHMARK_EXPORT int main(int, char**);
BENCHMARK_MAIN();
diff --git a/src/benchmark_name.cc b/src/benchmark_name.cc
index 2a17ebc..01676bb 100644
--- a/src/benchmark_name.cc
+++ b/src/benchmark_name.cc
@@ -51,8 +51,9 @@ std::string join(char delimiter, const Ts&... ts) {
}
} // namespace
+BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
- return join('/', function_name, args, min_time, iterations, repetitions,
- time_type, threads);
+ return join('/', function_name, args, min_time, min_warmup_time, iterations,
+ repetitions, time_type, threads);
}
} // namespace benchmark
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index 65d9944..e447c9a 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -15,7 +15,7 @@
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -24,6 +24,7 @@
#include <algorithm>
#include <atomic>
+#include <cinttypes>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
@@ -35,11 +36,6 @@
#include <sstream>
#include <thread>
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS
-#endif
-#include <inttypes.h>
-
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "check.h"
@@ -57,10 +53,13 @@ namespace benchmark {
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
-static const int kRangeMultiplier = 8;
+static constexpr int kRangeMultiplier = 8;
+
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
-static const size_t kMaxFamilySize = 100;
+static constexpr size_t kMaxFamilySize = 100;
+
+static constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
@@ -115,15 +114,15 @@ void BenchmarkFamilies::ClearBenchmarks() {
bool BenchmarkFamilies::FindBenchmarks(
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
std::ostream* ErrStream) {
- CHECK(ErrStream);
+ BM_CHECK(ErrStream);
auto& Err = *ErrStream;
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
- bool isNegativeFilter = false;
+ bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
- isNegativeFilter = true;
+ is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << std::endl;
@@ -133,8 +132,13 @@ bool BenchmarkFamilies::FindBenchmarks(
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};
+ int next_family_index = 0;
+
MutexLock l(mutex_);
for (std::unique_ptr<Benchmark>& family : families_) {
+ int family_index = next_family_index;
+ int per_family_instance_index = 0;
+
// Family was deleted or benchmark doesn't match
if (!family) continue;
@@ -153,85 +157,27 @@ bool BenchmarkFamilies::FindBenchmarks(
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
- // family size.
- if (spec == ".") benchmarks->reserve(family_size);
+ // family size. this doesn't take into account any disabled benchmarks
+ // so worst case we reserve more than we need.
+ if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size);
for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
- BenchmarkInstance instance;
- instance.name.function_name = family->name_;
- instance.benchmark = family.get();
- instance.aggregation_report_mode = family->aggregation_report_mode_;
- instance.arg = args;
- instance.time_unit = family->time_unit_;
- instance.range_multiplier = family->range_multiplier_;
- instance.min_time = family->min_time_;
- instance.iterations = family->iterations_;
- instance.repetitions = family->repetitions_;
- instance.measure_process_cpu_time = family->measure_process_cpu_time_;
- instance.use_real_time = family->use_real_time_;
- instance.use_manual_time = family->use_manual_time_;
- instance.complexity = family->complexity_;
- instance.complexity_lambda = family->complexity_lambda_;
- instance.statistics = &family->statistics_;
- instance.threads = num_threads;
-
- // Add arguments to instance name
- size_t arg_i = 0;
- for (auto const& arg : args) {
- if (!instance.name.args.empty()) {
- instance.name.args += '/';
- }
-
- if (arg_i < family->arg_names_.size()) {
- const auto& arg_name = family->arg_names_[arg_i];
- if (!arg_name.empty()) {
- instance.name.args += StrFormat("%s:", arg_name.c_str());
- }
- }
-
- instance.name.args += StrFormat("%" PRId64, arg);
- ++arg_i;
- }
-
- if (!IsZero(family->min_time_))
- instance.name.min_time =
- StrFormat("min_time:%0.3f", family->min_time_);
- if (family->iterations_ != 0) {
- instance.name.iterations =
- StrFormat("iterations:%lu",
- static_cast<unsigned long>(family->iterations_));
- }
- if (family->repetitions_ != 0)
- instance.name.repetitions =
- StrFormat("repeats:%d", family->repetitions_);
-
- if (family->measure_process_cpu_time_) {
- instance.name.time_type = "process_time";
- }
+ BenchmarkInstance instance(family.get(), family_index,
+ per_family_instance_index, args,
+ num_threads);
+
+ const auto full_name = instance.name().str();
+ if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
+ ((re.Match(full_name) && !is_negative_filter) ||
+ (!re.Match(full_name) && is_negative_filter))) {
+ benchmarks->push_back(std::move(instance));
- if (family->use_manual_time_) {
- if (!instance.name.time_type.empty()) {
- instance.name.time_type += '/';
- }
- instance.name.time_type += "manual_time";
- } else if (family->use_real_time_) {
- if (!instance.name.time_type.empty()) {
- instance.name.time_type += '/';
- }
- instance.name.time_type += "real_time";
- }
+ ++per_family_instance_index;
- // Add the number of threads used to the name
- if (!family->thread_counts_.empty()) {
- instance.name.threads = StrFormat("threads:%d", instance.threads);
- }
-
- const auto full_name = instance.name.str();
- if ((re.Match(full_name) && !isNegativeFilter) ||
- (!re.Match(full_name) && isNegativeFilter)) {
- instance.last_benchmark_instance = (&args == &family->args_.back());
- benchmarks->push_back(std::move(instance));
+ // Only bump the next family index once we've estabilished that
+ // at least one instance of this family will be run.
+ if (next_family_index == family_index) ++next_family_index;
}
}
}
@@ -258,39 +204,50 @@ bool FindBenchmarksInternal(const std::string& re,
// Benchmark
//=============================================================================//
-Benchmark::Benchmark(const char* name)
+Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
- time_unit_(kNanosecond),
+ time_unit_(GetDefaultTimeUnit()),
+ use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
+ min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
- complexity_lambda_(nullptr) {
+ complexity_lambda_(nullptr),
+ setup_(nullptr),
+ teardown_(nullptr) {
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
+ ComputeStatistics("cv", StatisticsCV, kPercentage);
}
Benchmark::~Benchmark() {}
+Benchmark* Benchmark::Name(const std::string& name) {
+ SetName(name);
+ return this;
+}
+
Benchmark* Benchmark::Arg(int64_t x) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
args_.push_back({x});
return this;
}
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
+ use_default_time_unit_ = false;
return this;
}
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
std::vector<int64_t> arglist;
AddRange(&arglist, start, limit, range_multiplier_);
@@ -302,7 +259,7 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
Benchmark* Benchmark::Ranges(
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
std::vector<std::vector<int64_t>> arglists(ranges.size());
for (std::size_t i = 0; i < ranges.size(); i++) {
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
@@ -316,7 +273,7 @@ Benchmark* Benchmark::Ranges(
Benchmark* Benchmark::ArgsProduct(
const std::vector<std::vector<int64_t>>& arglists) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
std::vector<std::size_t> indices(arglists.size());
const std::size_t total = std::accumulate(
@@ -343,20 +300,20 @@ Benchmark* Benchmark::ArgsProduct(
}
Benchmark* Benchmark::ArgName(const std::string& name) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
arg_names_ = {name};
return this;
}
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
arg_names_ = names;
return this;
}
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
- CHECK_LE(start, limit);
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
+ BM_CHECK_LE(start, limit);
for (int64_t arg = start; arg <= limit; arg += step) {
args_.push_back({arg});
}
@@ -364,7 +321,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
}
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
args_.push_back(args);
return this;
}
@@ -374,28 +331,48 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
return this;
}
+Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) {
+ BM_CHECK(setup != nullptr);
+ setup_ = setup;
+ return this;
+}
+
+Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) {
+ BM_CHECK(teardown != nullptr);
+ teardown_ = teardown;
+ return this;
+}
+
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
- CHECK(multiplier > 1);
+ BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
return this;
}
Benchmark* Benchmark::MinTime(double t) {
- CHECK(t > 0.0);
- CHECK(iterations_ == 0);
+ BM_CHECK(t > 0.0);
+ BM_CHECK(iterations_ == 0);
min_time_ = t;
return this;
}
+Benchmark* Benchmark::MinWarmUpTime(double t) {
+ BM_CHECK(t >= 0.0);
+ BM_CHECK(iterations_ == 0);
+ min_warmup_time_ = t;
+ return this;
+}
+
Benchmark* Benchmark::Iterations(IterationCount n) {
- CHECK(n > 0);
- CHECK(IsZero(min_time_));
+ BM_CHECK(n > 0);
+ BM_CHECK(IsZero(min_time_));
+ BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
Benchmark* Benchmark::Repetitions(int n) {
- CHECK(n > 0);
+ BM_CHECK(n > 0);
repetitions_ = n;
return this;
}
@@ -428,14 +405,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() {
}
Benchmark* Benchmark::UseRealTime() {
- CHECK(!use_manual_time_)
+ BM_CHECK(!use_manual_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true;
return this;
}
Benchmark* Benchmark::UseManualTime() {
- CHECK(!use_real_time_)
+ BM_CHECK(!use_real_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
return this;
@@ -452,21 +429,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
return this;
}
-Benchmark* Benchmark::ComputeStatistics(std::string name,
- StatisticsFunc* statistics) {
- statistics_.emplace_back(name, statistics);
+Benchmark* Benchmark::ComputeStatistics(const std::string& name,
+ StatisticsFunc* statistics,
+ StatisticUnit unit) {
+ statistics_.emplace_back(name, statistics, unit);
return this;
}
Benchmark* Benchmark::Threads(int t) {
- CHECK_GT(t, 0);
+ BM_CHECK_GT(t, 0);
thread_counts_.push_back(t);
return this;
}
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
- CHECK_GT(min_threads, 0);
- CHECK_GE(max_threads, min_threads);
+ BM_CHECK_GT(min_threads, 0);
+ BM_CHECK_GE(max_threads, min_threads);
AddRange(&thread_counts_, min_threads, max_threads, 2);
return this;
@@ -474,9 +452,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
int stride) {
- CHECK_GT(min_threads, 0);
- CHECK_GE(max_threads, min_threads);
- CHECK_GE(stride, 1);
+ BM_CHECK_GT(min_threads, 0);
+ BM_CHECK_GE(max_threads, min_threads);
+ BM_CHECK_GE(stride, 1);
for (auto i = min_threads; i < max_threads; i += stride) {
thread_counts_.push_back(i);
@@ -490,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() {
return this;
}
-void Benchmark::SetName(const char* name) { name_ = name; }
+void Benchmark::SetName(const std::string& name) { name_ = name; }
+
+const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
@@ -500,6 +480,16 @@ int Benchmark::ArgsCnt() const {
return static_cast<int>(args_.front().size());
}
+const char* Benchmark::GetArgName(int arg) const {
+ BM_CHECK_GE(arg, 0);
+ BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
+ return arg_names_[arg].c_str();
+}
+
+TimeUnit Benchmark::GetTimeUnit() const {
+ return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
+}
+
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
@@ -512,4 +502,19 @@ void ClearRegisteredBenchmarks() {
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
}
+std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
+ std::vector<int64_t> args;
+ internal::AddRange(&args, lo, hi, multi);
+ return args;
+}
+
+std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
+ BM_CHECK_LE(start, limit);
+ std::vector<int64_t> args;
+ for (int64_t arg = start; arg <= limit; arg += step) {
+ args.push_back(arg);
+ }
+ return args;
+}
+
} // end namespace benchmark
diff --git a/src/benchmark_register.h b/src/benchmark_register.h
index c774e6f..53367c7 100644
--- a/src/benchmark_register.h
+++ b/src/benchmark_register.h
@@ -1,6 +1,7 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
+#include <algorithm>
#include <limits>
#include <vector>
@@ -12,18 +13,18 @@ namespace internal {
// Append the powers of 'mult' in the closed interval [lo, hi].
// Returns iterator to the start of the inserted range.
template <typename T>
-typename std::vector<T>::iterator
-AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
- CHECK_GE(lo, 0);
- CHECK_GE(hi, lo);
- CHECK_GE(mult, 2);
+typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
+ int mult) {
+ BM_CHECK_GE(lo, 0);
+ BM_CHECK_GE(hi, lo);
+ BM_CHECK_GE(mult, 2);
const size_t start_offset = dst->size();
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
- for (T i = 1; i <= hi; i *= mult) {
+ for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
if (i >= lo) {
dst->push_back(i);
}
@@ -32,16 +33,16 @@ AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
if (i > kmax / mult) break;
}
- return dst->begin() + start_offset;
+ return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
// We negate lo and hi so we require that they cannot be equal to 'min'.
- CHECK_GT(lo, std::numeric_limits<T>::min());
- CHECK_GT(hi, std::numeric_limits<T>::min());
- CHECK_GE(hi, lo);
- CHECK_LE(hi, 0);
+ BM_CHECK_GT(lo, std::numeric_limits<T>::min());
+ BM_CHECK_GT(hi, std::numeric_limits<T>::min());
+ BM_CHECK_GE(hi, lo);
+ BM_CHECK_LE(hi, 0);
// Add positive powers, then negate and reverse.
// Casts necessary since small integers get promoted
@@ -60,8 +61,8 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
"Args type must be a signed integer");
- CHECK_GE(hi, lo);
- CHECK_GE(mult, 2);
+ BM_CHECK_GE(hi, lo);
+ BM_CHECK_GE(mult, 2);
// Add "lo"
dst->push_back(lo);
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 7bc6b63..f7ae424 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -13,12 +13,13 @@
// limitations under the License.
#include "benchmark_runner.h"
+
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -27,11 +28,14 @@
#include <algorithm>
#include <atomic>
+#include <climits>
+#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
+#include <limits>
#include <memory>
#include <string>
#include <thread>
@@ -45,6 +49,7 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
+#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
@@ -60,64 +65,72 @@ MemoryManager* memory_manager = nullptr;
namespace {
static constexpr IterationCount kMaxIterations = 1000000000;
+const double kDefaultMinTime =
+ std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
const internal::ThreadManager::Result& results,
IterationCount memory_iterations,
- const MemoryManager::Result& memory_result, double seconds,
- int64_t repetition_index) {
+ const MemoryManager::Result* memory_result, double seconds,
+ int64_t repetition_index, int64_t repeats) {
// Create report about this benchmark run.
BenchmarkReporter::Run report;
- report.run_name = b.name;
- report.error_occurred = results.has_error_;
- report.error_message = results.error_message_;
+ report.run_name = b.name();
+ report.family_index = b.family_index();
+ report.per_family_instance_index = b.per_family_instance_index();
+ report.skipped = results.skipped_;
+ report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
- report.time_unit = b.time_unit;
- report.threads = b.threads;
+ report.time_unit = b.time_unit();
+ report.threads = b.threads();
report.repetition_index = repetition_index;
- report.repetitions = b.repetitions;
+ report.repetitions = repeats;
- if (!report.error_occurred) {
- if (b.use_manual_time) {
+ if (!report.skipped) {
+ if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
- report.complexity = b.complexity;
- report.complexity_lambda = b.complexity_lambda;
- report.statistics = b.statistics;
+ report.complexity = b.complexity();
+ report.complexity_lambda = b.complexity_lambda();
+ report.statistics = &b.statistics();
report.counters = results.counters;
if (memory_iterations > 0) {
- report.has_memory_result = true;
+ assert(memory_result != nullptr);
+ report.memory_result = memory_result;
report.allocs_per_iter =
- memory_iterations ? static_cast<double>(memory_result.num_allocs) /
+ memory_iterations ? static_cast<double>(memory_result->num_allocs) /
memory_iterations
: 0;
- report.max_bytes_used = memory_result.max_bytes_used;
}
- internal::Finish(&report.counters, results.iterations, seconds, b.threads);
+ internal::Finish(&report.counters, results.iterations, seconds,
+ b.threads());
}
return report;
}
// Execute one thread of benchmark b for the specified number of iterations.
-// Adds the stats collected for the thread into *total.
+// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
- int thread_id, ThreadManager* manager) {
+ int thread_id, ThreadManager* manager,
+ PerfCountersMeasurement* perf_counters_measurement) {
internal::ThreadTimer timer(
- b->measure_process_cpu_time
+ b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
- State st = b->Run(iters, thread_id, &timer, manager);
- CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
+
+ State st =
+ b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
+ BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
MutexLock l(manager->GetBenchmarkMutex());
@@ -132,229 +145,351 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
manager->NotifyThreadComplete();
}
-class BenchmarkRunner {
- public:
- BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
- std::vector<BenchmarkReporter::Run>* complexity_reports_)
- : b(b_),
- complexity_reports(*complexity_reports_),
- min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
- repeats(b.repetitions != 0 ? b.repetitions
+double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (!IsZero(b.min_time())) return b.min_time();
+ // If the flag was used to specify number of iters, then return the default
+ // min_time.
+ if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
+
+ return iters_or_time.time;
+}
+
+IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
+ const BenchTimeType& iters_or_time) {
+ if (b.iterations() != 0) return b.iterations();
+
+ // We've already concluded that this flag is currently used to pass
+ // iters but do a check here again anyway.
+ BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
+ return iters_or_time.iters;
+}
+
+} // end namespace
+
+BenchTimeType ParseBenchMinTime(const std::string& value) {
+ BenchTimeType ret;
+
+ if (value.empty()) {
+ ret.tag = BenchTimeType::TIME;
+ ret.time = 0.0;
+ return ret;
+ }
+
+ if (value.back() == 'x') {
+ char* p_end;
+ // Reset errno before it's changed by strtol.
+ errno = 0;
+ IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
+
+ // After a valid parse, p_end should have been set to
+ // point to the 'x' suffix.
+ BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
+ << "Malformed iters value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<integer>x.";
+
+ ret.tag = BenchTimeType::ITERS;
+ ret.iters = num_iters;
+ return ret;
+ }
+
+ bool has_suffix = value.back() == 's';
+ if (!has_suffix) {
+ BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
+ "Eg., `30s` for 30-seconds.";
+ }
+
+ char* p_end;
+ // Reset errno before it's changed by strtod.
+ errno = 0;
+ double min_time = std::strtod(value.c_str(), &p_end);
+
+ // After a successful parse, p_end should point to the suffix 's',
+ // or the end of the string if the suffix was omitted.
+ BM_CHECK(errno == 0 && p_end != nullptr &&
+ ((has_suffix && *p_end == 's') || *p_end == '\0'))
+ << "Malformed seconds value passed to --benchmark_min_time: `" << value
+ << "`. Expected --benchmark_min_time=<float>x.";
+
+ ret.tag = BenchTimeType::TIME;
+ ret.time = min_time;
+
+ return ret;
+}
+
+BenchmarkRunner::BenchmarkRunner(
+ const benchmark::internal::BenchmarkInstance& b_,
+ PerfCountersMeasurement* pcm_,
+ BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
+ : b(b_),
+ reports_for_family(reports_for_family_),
+ parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
+ min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
+ min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
+ ? b.min_warmup_time()
+ : FLAGS_benchmark_min_warmup_time),
+ warmup_done(!(min_warmup_time > 0.0)),
+ repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
- has_explicit_iteration_count(b.iterations != 0),
- pool(b.threads - 1),
- iters(has_explicit_iteration_count ? b.iterations : 1) {
+ has_explicit_iteration_count(b.iterations() != 0 ||
+ parsed_benchtime_flag.tag ==
+ BenchTimeType::ITERS),
+ pool(b.threads() - 1),
+ iters(has_explicit_iteration_count
+ ? ComputeIters(b_, parsed_benchtime_flag)
+ : 1),
+ perf_counters_measurement_ptr(pcm_) {
+ run_results.display_report_aggregates_only =
+ (FLAGS_benchmark_report_aggregates_only ||
+ FLAGS_benchmark_display_aggregates_only);
+ run_results.file_report_aggregates_only =
+ FLAGS_benchmark_report_aggregates_only;
+ if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
- (FLAGS_benchmark_report_aggregates_only ||
- FLAGS_benchmark_display_aggregates_only);
+ (b.aggregation_report_mode() &
+ internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
- FLAGS_benchmark_report_aggregates_only;
- if (b.aggregation_report_mode != internal::ARM_Unspecified) {
- run_results.display_report_aggregates_only =
- (b.aggregation_report_mode &
- internal::ARM_DisplayReportAggregatesOnly);
- run_results.file_report_aggregates_only =
- (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
- }
+ (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
+ BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
+ (perf_counters_measurement_ptr->num_counters() == 0))
+ << "Perf counters were requested but could not be set up.";
+ }
+}
- for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
- DoOneRepetition(repetition_num);
- }
+BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
+ BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
- // Calculate additional statistics
- run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
+ std::unique_ptr<internal::ThreadManager> manager;
+ manager.reset(new internal::ThreadManager(b.threads()));
- // Maybe calculate complexity report
- if ((b.complexity != oNone) && b.last_benchmark_instance) {
- auto additional_run_stats = ComputeBigO(complexity_reports);
- run_results.aggregates_only.insert(run_results.aggregates_only.end(),
- additional_run_stats.begin(),
- additional_run_stats.end());
- complexity_reports.clear();
- }
+ // Run all but one thread in separate threads
+ for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+ pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
+ manager.get(), perf_counters_measurement_ptr);
}
+ // And run one thread here directly.
+ // (If we were asked to run just one thread, we don't create new threads.)
+ // Yes, we need to do this here *after* we start the separate threads.
+ RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
- RunResults&& get_results() { return std::move(run_results); }
+ // The main thread has finished. Now let's wait for the other threads.
+ manager->WaitForAllThreads();
+ for (std::thread& thread : pool) thread.join();
- private:
- RunResults run_results;
+ IterationResults i;
+ // Acquire the measurements/counters from the manager, UNDER THE LOCK!
+ {
+ MutexLock l(manager->GetBenchmarkMutex());
+ i.results = manager->results;
+ }
- const benchmark::internal::BenchmarkInstance& b;
- std::vector<BenchmarkReporter::Run>& complexity_reports;
+ // And get rid of the manager.
+ manager.reset();
- const double min_time;
- const int repeats;
- const bool has_explicit_iteration_count;
+ // Adjust real/manual time stats since they were reported per thread.
+ i.results.real_time_used /= b.threads();
+ i.results.manual_time_used /= b.threads();
+ // If we were measuring whole-process CPU usage, adjust the CPU time too.
+ if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
- std::vector<std::thread> pool;
+ BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
+ << i.results.real_time_used << "\n";
- IterationCount iters; // preserved between repetitions!
- // So only the first repetition has to find/calculate it,
- // the other repetitions will just use that precomputed iteration count.
+ // By using KeepRunningBatch a benchmark can iterate more times than
+ // requested, so take the iteration count from i.results.
+ i.iters = i.results.iterations / b.threads();
- struct IterationResults {
- internal::ThreadManager::Result results;
- IterationCount iters;
- double seconds;
- };
- IterationResults DoNIterations() {
- VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n";
+ // Base decisions off of real time if requested by this benchmark.
+ i.seconds = i.results.cpu_time_used;
+ if (b.use_manual_time()) {
+ i.seconds = i.results.manual_time_used;
+ } else if (b.use_real_time()) {
+ i.seconds = i.results.real_time_used;
+ }
- std::unique_ptr<internal::ThreadManager> manager;
- manager.reset(new internal::ThreadManager(b.threads));
+ return i;
+}
- // Run all but one thread in separate threads
- for (std::size_t ti = 0; ti < pool.size(); ++ti) {
- pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
- manager.get());
- }
- // And run one thread here directly.
- // (If we were asked to run just one thread, we don't create new threads.)
- // Yes, we need to do this here *after* we start the separate threads.
- RunInThread(&b, iters, 0, manager.get());
+IterationCount BenchmarkRunner::PredictNumItersNeeded(
+ const IterationResults& i) const {
+ // See how much iterations should be increased by.
+ // Note: Avoid division by zero with max(seconds, 1ns).
+ double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
+ // If our last run was at least 10% of FLAGS_benchmark_min_time then we
+ // use the multiplier directly.
+ // Otherwise we use at most 10 times expansion.
+ // NOTE: When the last run was at least 10% of the min time the max
+ // expansion should be 14x.
+ const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
+ multiplier = is_significant ? multiplier : 10.0;
+
+ // So what seems to be the sufficiently-large iteration count? Round up.
+ const IterationCount max_next_iters = static_cast<IterationCount>(
+ std::lround(std::max(multiplier * static_cast<double>(i.iters),
+ static_cast<double>(i.iters) + 1.0)));
+ // But we do have *some* limits though..
+ const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
+
+ BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
+ return next_iters; // round up before conversion to integer.
+}
- // The main thread has finished. Now let's wait for the other threads.
- manager->WaitForAllThreads();
- for (std::thread& thread : pool) thread.join();
+bool BenchmarkRunner::ShouldReportIterationResults(
+ const IterationResults& i) const {
+ // Determine if this run should be reported;
+ // Either it has run for a sufficient amount of time
+ // or because an error was reported.
+ return i.results.skipped_ ||
+ i.iters >= kMaxIterations || // Too many iterations already.
+ i.seconds >=
+ GetMinTimeToApply() || // The elapsed time is large enough.
+ // CPU time is specified but the elapsed real time greatly exceeds
+ // the minimum time.
+ // Note that user provided timers are except from this test.
+ ((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
+ !b.use_manual_time());
+}
- IterationResults i;
- // Acquire the measurements/counters from the manager, UNDER THE LOCK!
- {
- MutexLock l(manager->GetBenchmarkMutex());
- i.results = manager->results;
- }
+double BenchmarkRunner::GetMinTimeToApply() const {
+ // In order to re-use functionality to run and measure benchmarks for running
+ // a warmup phase of the benchmark, we need a way of telling whether to apply
+ // min_time or min_warmup_time. This function will figure out if we are in the
+ // warmup phase and therefore need to apply min_warmup_time or if we already
+ // in the benchmarking phase and min_time needs to be applied.
+ return warmup_done ? min_time : min_warmup_time;
+}
- // And get rid of the manager.
- manager.reset();
+void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
+ warmup_done = true;
+ iters = i;
+}
- // Adjust real/manual time stats since they were reported per thread.
- i.results.real_time_used /= b.threads;
- i.results.manual_time_used /= b.threads;
- // If we were measuring whole-process CPU usage, adjust the CPU time too.
- if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
-
- VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
- << i.results.real_time_used << "\n";
-
- // So for how long were we running?
- i.iters = iters;
- // Base decisions off of real time if requested by this benchmark.
- i.seconds = i.results.cpu_time_used;
- if (b.use_manual_time) {
- i.seconds = i.results.manual_time_used;
- } else if (b.use_real_time) {
- i.seconds = i.results.real_time_used;
+void BenchmarkRunner::RunWarmUp() {
+ // Use the same mechanisms for warming up the benchmark as used for actually
+ // running and measuring the benchmark.
+ IterationResults i_warmup;
+ // Dont use the iterations determined in the warmup phase for the actual
+ // measured benchmark phase. While this may be a good starting point for the
+ // benchmark and it would therefore get rid of the need to figure out how many
+ // iterations are needed if min_time is set again, this may also be a complete
+ // wrong guess since the warmup loops might be considerably slower (e.g
+ // because of caching effects).
+ const IterationCount i_backup = iters;
+
+ for (;;) {
+ b.Setup();
+ i_warmup = DoNIterations();
+ b.Teardown();
+
+ const bool finish = ShouldReportIterationResults(i_warmup);
+
+ if (finish) {
+ FinishWarmUp(i_backup);
+ break;
}
- return i;
+ // Although we are running "only" a warmup phase where running enough
+ // iterations at once without measuring time isn't as important as it is for
+ // the benchmarking phase, we still do it the same way as otherwise it is
+ // very confusing for the user to know how to choose a proper value for
+ // min_warmup_time if a different approach on running it is used.
+ iters = PredictNumItersNeeded(i_warmup);
+ assert(iters > i_warmup.iters &&
+ "if we did more iterations than we want to do the next time, "
+ "then we should have accepted the current iteration run.");
}
+}
- IterationCount PredictNumItersNeeded(const IterationResults& i) const {
- // See how much iterations should be increased by.
- // Note: Avoid division by zero with max(seconds, 1ns).
- double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
- // If our last run was at least 10% of FLAGS_benchmark_min_time then we
- // use the multiplier directly.
- // Otherwise we use at most 10 times expansion.
- // NOTE: When the last run was at least 10% of the min time the max
- // expansion should be 14x.
- bool is_significant = (i.seconds / min_time) > 0.1;
- multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
- if (multiplier <= 1.0) multiplier = 2.0;
-
- // So what seems to be the sufficiently-large iteration count? Round up.
- const IterationCount max_next_iters = static_cast<IterationCount>(
- std::lround(std::max(multiplier * static_cast<double>(i.iters),
- static_cast<double>(i.iters) + 1.0)));
- // But we do have *some* sanity limits though..
- const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
-
- VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
- return next_iters; // round up before conversion to integer.
+void BenchmarkRunner::DoOneRepetition() {
+ assert(HasRepeatsRemaining() && "Already done all repetitions?");
+
+ const bool is_the_first_repetition = num_repetitions_done == 0;
+
+ // In case a warmup phase is requested by the benchmark, run it now.
+ // After running the warmup phase the BenchmarkRunner should be in a state as
+ // this warmup never happened except the fact that warmup_done is set. Every
+ // other manipulation of the BenchmarkRunner instance would be a bug! Please
+ // fix it.
+ if (!warmup_done) RunWarmUp();
+
+ IterationResults i;
+ // We *may* be gradually increasing the length (iteration count)
+ // of the benchmark until we decide the results are significant.
+ // And once we do, we report those last results and exit.
+ // Please do note that the if there are repetitions, the iteration count
+ // is *only* calculated for the *first* repetition, and other repetitions
+ // simply use that precomputed iteration count.
+ for (;;) {
+ b.Setup();
+ i = DoNIterations();
+ b.Teardown();
+
+ // Do we consider the results to be significant?
+ // If we are doing repetitions, and the first repetition was already done,
+ // it has calculated the correct iteration time, so we have run that very
+ // iteration count just now. No need to calculate anything. Just report.
+ // Else, the normal rules apply.
+ const bool results_are_significant = !is_the_first_repetition ||
+ has_explicit_iteration_count ||
+ ShouldReportIterationResults(i);
+
+ if (results_are_significant) break; // Good, let's report them!
+
+ // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
+ // iteration count, and run the benchmark again...
+
+ iters = PredictNumItersNeeded(i);
+ assert(iters > i.iters &&
+ "if we did more iterations than we want to do the next time, "
+ "then we should have accepted the current iteration run.");
}
- bool ShouldReportIterationResults(const IterationResults& i) const {
- // Determine if this run should be reported;
- // Either it has run for a sufficient amount of time
- // or because an error was reported.
- return i.results.has_error_ ||
- i.iters >= kMaxIterations || // Too many iterations already.
- i.seconds >= min_time || // The elapsed time is large enough.
- // CPU time is specified but the elapsed real time greatly exceeds
- // the minimum time.
- // Note that user provided timers are except from this sanity check.
- ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
+ // Oh, one last thing, we need to also produce the 'memory measurements'..
+ MemoryManager::Result* memory_result = nullptr;
+ IterationCount memory_iterations = 0;
+ if (memory_manager != nullptr) {
+ // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
+ // optional so we don't have to own the Result here.
+ // Can't do it now due to cxx03.
+ memory_results.push_back(MemoryManager::Result());
+ memory_result = &memory_results.back();
+ // Only run a few iterations to reduce the impact of one-time
+ // allocations in benchmarks that are not properly managed.
+ memory_iterations = std::min<IterationCount>(16, iters);
+ memory_manager->Start();
+ std::unique_ptr<internal::ThreadManager> manager;
+ manager.reset(new internal::ThreadManager(1));
+ b.Setup();
+ RunInThread(&b, memory_iterations, 0, manager.get(),
+ perf_counters_measurement_ptr);
+ manager->WaitForAllThreads();
+ manager.reset();
+ b.Teardown();
+ memory_manager->Stop(*memory_result);
}
- void DoOneRepetition(int64_t repetition_index) {
- const bool is_the_first_repetition = repetition_index == 0;
- IterationResults i;
-
- // We *may* be gradually increasing the length (iteration count)
- // of the benchmark until we decide the results are significant.
- // And once we do, we report those last results and exit.
- // Please do note that the if there are repetitions, the iteration count
- // is *only* calculated for the *first* repetition, and other repetitions
- // simply use that precomputed iteration count.
- for (;;) {
- i = DoNIterations();
-
- // Do we consider the results to be significant?
- // If we are doing repetitions, and the first repetition was already done,
- // it has calculated the correct iteration time, so we have run that very
- // iteration count just now. No need to calculate anything. Just report.
- // Else, the normal rules apply.
- const bool results_are_significant = !is_the_first_repetition ||
- has_explicit_iteration_count ||
- ShouldReportIterationResults(i);
-
- if (results_are_significant) break; // Good, let's report them!
-
- // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
- // iteration count, and run the benchmark again...
-
- iters = PredictNumItersNeeded(i);
- assert(iters > i.iters &&
- "if we did more iterations than we want to do the next time, "
- "then we should have accepted the current iteration run.");
- }
+ // Ok, now actually report.
+ BenchmarkReporter::Run report =
+ CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
+ num_repetitions_done, repeats);
- // Oh, one last thing, we need to also produce the 'memory measurements'..
- MemoryManager::Result memory_result;
- IterationCount memory_iterations = 0;
- if (memory_manager != nullptr) {
- // Only run a few iterations to reduce the impact of one-time
- // allocations in benchmarks that are not properly managed.
- memory_iterations = std::min<IterationCount>(16, iters);
- memory_manager->Start();
- std::unique_ptr<internal::ThreadManager> manager;
- manager.reset(new internal::ThreadManager(1));
- RunInThread(&b, memory_iterations, 0, manager.get());
- manager->WaitForAllThreads();
- manager.reset();
-
- memory_manager->Stop(&memory_result);
- }
+ if (reports_for_family) {
+ ++reports_for_family->num_runs_done;
+ if (!report.skipped) reports_for_family->Runs.push_back(report);
+ }
- // Ok, now actualy report.
- BenchmarkReporter::Run report =
- CreateRunReport(b, i.results, memory_iterations, memory_result,
- i.seconds, repetition_index);
+ run_results.non_aggregates.push_back(report);
- if (!report.error_occurred && b.complexity != oNone)
- complexity_reports.push_back(report);
+ ++num_repetitions_done;
+}
- run_results.non_aggregates.push_back(report);
- }
-};
+RunResults&& BenchmarkRunner::GetResults() {
+ assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
-} // end namespace
+ // Calculate additional statistics over the repetitions of this instance.
+ run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
-RunResults RunBenchmark(
- const benchmark::internal::BenchmarkInstance& b,
- std::vector<BenchmarkReporter::Run>* complexity_reports) {
- internal::BenchmarkRunner r(b, complexity_reports);
- return r.get_results();
+ return std::move(run_results);
}
} // end namespace internal
diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h
index 96e8282..db2fa04 100644
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@@ -15,19 +15,23 @@
#ifndef BENCHMARK_RUNNER_H_
#define BENCHMARK_RUNNER_H_
+#include <thread>
+#include <vector>
+
#include "benchmark_api_internal.h"
#include "internal_macros.h"
-
-DECLARE_double(benchmark_min_time);
-
-DECLARE_int32(benchmark_repetitions);
-
-DECLARE_bool(benchmark_report_aggregates_only);
-
-DECLARE_bool(benchmark_display_aggregates_only);
+#include "perf_counters.h"
+#include "thread_manager.h"
namespace benchmark {
+BM_DECLARE_string(benchmark_min_time);
+BM_DECLARE_double(benchmark_min_warmup_time);
+BM_DECLARE_int32(benchmark_repetitions);
+BM_DECLARE_bool(benchmark_report_aggregates_only);
+BM_DECLARE_bool(benchmark_display_aggregates_only);
+BM_DECLARE_string(benchmark_perf_counters);
+
namespace internal {
extern MemoryManager* memory_manager;
@@ -40,9 +44,85 @@ struct RunResults {
bool file_report_aggregates_only = false;
};
-RunResults RunBenchmark(
- const benchmark::internal::BenchmarkInstance& b,
- std::vector<BenchmarkReporter::Run>* complexity_reports);
+struct BENCHMARK_EXPORT BenchTimeType {
+ enum { ITERS, TIME } tag;
+ union {
+ IterationCount iters;
+ double time;
+ };
+};
+
+BENCHMARK_EXPORT
+BenchTimeType ParseBenchMinTime(const std::string& value);
+
+class BenchmarkRunner {
+ public:
+ BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
+ benchmark::internal::PerfCountersMeasurement* pmc_,
+ BenchmarkReporter::PerFamilyRunReports* reports_for_family);
+
+ int GetNumRepeats() const { return repeats; }
+
+ bool HasRepeatsRemaining() const {
+ return GetNumRepeats() != num_repetitions_done;
+ }
+
+ void DoOneRepetition();
+
+ RunResults&& GetResults();
+
+ BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
+ return reports_for_family;
+ }
+
+ double GetMinTime() const { return min_time; }
+
+ bool HasExplicitIters() const { return has_explicit_iteration_count; }
+
+ IterationCount GetIters() const { return iters; }
+
+ private:
+ RunResults run_results;
+
+ const benchmark::internal::BenchmarkInstance& b;
+ BenchmarkReporter::PerFamilyRunReports* reports_for_family;
+
+ BenchTimeType parsed_benchtime_flag;
+ const double min_time;
+ const double min_warmup_time;
+ bool warmup_done;
+ const int repeats;
+ const bool has_explicit_iteration_count;
+
+ int num_repetitions_done = 0;
+
+ std::vector<std::thread> pool;
+
+ std::vector<MemoryManager::Result> memory_results;
+
+ IterationCount iters; // preserved between repetitions!
+ // So only the first repetition has to find/calculate it,
+ // the other repetitions will just use that precomputed iteration count.
+
+ PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
+
+ struct IterationResults {
+ internal::ThreadManager::Result results;
+ IterationCount iters;
+ double seconds;
+ };
+ IterationResults DoNIterations();
+
+ IterationCount PredictNumItersNeeded(const IterationResults& i) const;
+
+ bool ShouldReportIterationResults(const IterationResults& i) const;
+
+ double GetMinTimeToApply() const;
+
+ void FinishWarmUp(const IterationCount& i);
+
+ void RunWarmUp();
+};
} // namespace internal
diff --git a/src/check.cc b/src/check.cc
new file mode 100644
index 0000000..5f7526e
--- /dev/null
+++ b/src/check.cc
@@ -0,0 +1,11 @@
+#include "check.h"
+
+namespace benchmark {
+namespace internal {
+
+static AbortHandlerT* handler = &std::abort;
+
+BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
+
+} // namespace internal
+} // namespace benchmark
diff --git a/src/check.h b/src/check.h
index f5f8253..c1cd5e8 100644
--- a/src/check.h
+++ b/src/check.h
@@ -5,26 +5,43 @@
#include <cstdlib>
#include <ostream>
+#include "benchmark/export.h"
#include "internal_macros.h"
#include "log.h"
+#if defined(__GNUC__) || defined(__clang__)
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#elif defined(_MSC_VER) && !defined(__clang__)
+#if _MSC_VER >= 1900
+#define BENCHMARK_NOEXCEPT noexcept
+#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
+#else
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+#define __func__ __FUNCTION__
+#else
+#define BENCHMARK_NOEXCEPT
+#define BENCHMARK_NOEXCEPT_OP(x)
+#endif
+
namespace benchmark {
namespace internal {
typedef void(AbortHandlerT)();
-inline AbortHandlerT*& GetAbortHandler() {
- static AbortHandlerT* handler = &std::abort;
- return handler;
-}
+BENCHMARK_EXPORT
+AbortHandlerT*& GetAbortHandler();
BENCHMARK_NORETURN inline void CallAbortHandler() {
GetAbortHandler()();
std::abort(); // fallback to enforce noreturn
}
-// CheckHandler is the class constructed by failing CHECK macros. CheckHandler
-// will log information about the failures and abort when it is destructed.
+// CheckHandler is the class constructed by failing BM_CHECK macros.
+// CheckHandler will log information about the failures and abort when it is
+// destructed.
class CheckHandler {
public:
CheckHandler(const char* check, const char* file, const char* func, int line)
@@ -35,10 +52,17 @@ class CheckHandler {
LogType& GetLog() { return log_; }
+#if defined(COMPILER_MSVC)
+#pragma warning(push)
+#pragma warning(disable : 4722)
+#endif
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << std::endl;
CallAbortHandler();
}
+#if defined(COMPILER_MSVC)
+#pragma warning(pop)
+#endif
CheckHandler& operator=(const CheckHandler&) = delete;
CheckHandler(const CheckHandler&) = delete;
@@ -51,32 +75,32 @@ class CheckHandler {
} // end namespace internal
} // end namespace benchmark
-// The CHECK macro returns a std::ostream object that can have extra information
-// written to it.
+// The BM_CHECK macro returns a std::ostream object that can have extra
+// information written to it.
#ifndef NDEBUG
-#define CHECK(b) \
+#define BM_CHECK(b) \
(b ? ::benchmark::internal::GetNullLogInstance() \
: ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
.GetLog())
#else
-#define CHECK(b) ::benchmark::internal::GetNullLogInstance()
+#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
#endif
// clang-format off
// preserve whitespacing between operators for alignment
-#define CHECK_EQ(a, b) CHECK((a) == (b))
-#define CHECK_NE(a, b) CHECK((a) != (b))
-#define CHECK_GE(a, b) CHECK((a) >= (b))
-#define CHECK_LE(a, b) CHECK((a) <= (b))
-#define CHECK_GT(a, b) CHECK((a) > (b))
-#define CHECK_LT(a, b) CHECK((a) < (b))
-
-#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps))
-#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
-#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
-#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
-#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps))
-#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps))
+#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b))
+#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b))
+#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b))
+#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b))
+#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b))
+#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b))
+
+#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps))
+#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps))
+#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps))
+#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps))
+#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps))
+#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps))
//clang-format on
#endif // CHECK_H_
diff --git a/src/colorprint.cc b/src/colorprint.cc
index fff6a98..0bfd670 100644
--- a/src/colorprint.cc
+++ b/src/colorprint.cc
@@ -25,8 +25,8 @@
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
-#include <windows.h>
#include <io.h>
+#include <windows.h>
#else
#include <unistd.h>
#endif // BENCHMARK_OS_WINDOWS
@@ -94,20 +94,20 @@ std::string FormatString(const char* msg, va_list args) {
va_end(args_cp);
// currently there is no error handling for failure, so this is hack.
- CHECK(ret >= 0);
+ BM_CHECK(ret >= 0);
- if (ret == 0) // handle empty expansion
+ if (ret == 0) { // handle empty expansion
return {};
- else if (static_cast<size_t>(ret) < size)
+ }
+ if (static_cast<size_t>(ret) < size) {
return local_buff;
- else {
- // we did not provide a long enough buffer on our first attempt.
- size = (size_t)ret + 1; // + 1 for the null byte
- std::unique_ptr<char[]> buff(new char[size]);
- ret = vsnprintf(buff.get(), size, msg, args);
- CHECK(ret > 0 && ((size_t)ret) < size);
- return buff.get();
}
+ // we did not provide a long enough buffer on our first attempt.
+ size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
+ std::unique_ptr<char[]> buff(new char[size]);
+ ret = vsnprintf(buff.get(), size, msg, args);
+ BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
+ return buff.get();
}
std::string FormatString(const char* msg, ...) {
@@ -163,12 +163,24 @@ bool IsColorTerminal() {
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
- // <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
+ // <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
const char* const SUPPORTED_TERM_VALUES[] = {
- "xterm", "xterm-color", "xterm-256color",
- "screen", "screen-256color", "tmux",
- "tmux-256color", "rxvt-unicode", "rxvt-unicode-256color",
- "linux", "cygwin",
+ "xterm",
+ "xterm-color",
+ "xterm-256color",
+ "screen",
+ "screen-256color",
+ "tmux",
+ "tmux-256color",
+ "rxvt-unicode",
+ "rxvt-unicode-256color",
+ "linux",
+ "cygwin",
+ "xterm-kitty",
+ "alacritty",
+ "foot",
+ "foot-extra",
+ "wezterm",
};
const char* const term = getenv("TERM");
diff --git a/src/commandlineflags.cc b/src/commandlineflags.cc
index 0648fe3..dcb4149 100644
--- a/src/commandlineflags.cc
+++ b/src/commandlineflags.cc
@@ -20,6 +20,10 @@
#include <cstring>
#include <iostream>
#include <limits>
+#include <map>
+#include <utility>
+
+#include "../src/string_util.h"
namespace benchmark {
namespace {
@@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
return true;
}
+// Parses 'str' into KV pairs. If successful, writes the result to *value and
+// returns true; otherwise leaves *value unchanged and returns false.
+bool ParseKvPairs(const std::string& src_text, const char* str,
+ std::map<std::string, std::string>* value) {
+ std::map<std::string, std::string> kvs;
+ for (const auto& kvpair : StrSplit(str, ',')) {
+ const auto kv = StrSplit(kvpair, '=');
+ if (kv.size() != 2) {
+ std::cerr << src_text << " is expected to be a comma-separated list of "
+ << "<key>=<value> strings, but actually has value \"" << str
+ << "\".\n";
+ return false;
+ }
+ if (!kvs.emplace(kv[0], kv[1]).second) {
+ std::cerr << src_text << " is expected to contain unique keys but key \""
+ << kv[0] << "\" was repeated.\n";
+ return false;
+ }
+ }
+
+ *value = kvs;
+ return true;
+}
+
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "BENCHMARK_FOO" in the open-source version.
@@ -93,12 +121,14 @@ static std::string FlagToEnvVar(const char* flag) {
} // namespace
+BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
}
+BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@@ -111,6 +141,7 @@ int32_t Int32FromEnv(const char* flag, int32_t default_val) {
return value;
}
+BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@@ -123,12 +154,28 @@ double DoubleFromEnv(const char* flag, double default_val) {
return value;
}
+BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = getenv(env_var.c_str());
return value == nullptr ? default_val : value;
}
+BENCHMARK_EXPORT
+std::map<std::string, std::string> KvPairsFromEnv(
+ const char* flag, std::map<std::string, std::string> default_val) {
+ const std::string env_var = FlagToEnvVar(flag);
+ const char* const value_str = getenv(env_var.c_str());
+
+ if (value_str == nullptr) return default_val;
+
+ std::map<std::string, std::string> value;
+ if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
+ return default_val;
+ }
+ return value;
+}
+
// Parses a string as a command line flag. The string should have
// the format "--flag=value". When def_optional is true, the "=value"
// part can be omitted.
@@ -159,6 +206,7 @@ const char* ParseFlagValue(const char* str, const char* flag,
return flag_end + 1;
}
+BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
@@ -171,6 +219,7 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
return true;
}
+BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -183,6 +232,7 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
value);
}
+BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -195,6 +245,7 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
value);
}
+BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@@ -206,23 +257,42 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
return true;
}
+BENCHMARK_EXPORT
+bool ParseKeyValueFlag(const char* str, const char* flag,
+ std::map<std::string, std::string>* value) {
+ const char* const value_str = ParseFlagValue(str, flag, false);
+
+ if (value_str == nullptr) return false;
+
+ for (const auto& kvpair : StrSplit(value_str, ',')) {
+ const auto kv = StrSplit(kvpair, '=');
+ if (kv.size() != 2) return false;
+ value->emplace(kv[0], kv[1]);
+ }
+
+ return true;
+}
+
+BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != nullptr);
}
+BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value) {
if (value.size() == 1) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
- } else if (!value.empty()) {
+ }
+ if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
- } else
- return true;
+ }
+ return true;
}
} // end namespace benchmark
diff --git a/src/commandlineflags.h b/src/commandlineflags.h
index 3a1f6a8..7882628 100644
--- a/src/commandlineflags.h
+++ b/src/commandlineflags.h
@@ -2,61 +2,80 @@
#define BENCHMARK_COMMANDLINEFLAGS_H_
#include <cstdint>
+#include <map>
#include <string>
+#include "benchmark/export.h"
+
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
-#define DECLARE_bool(name) extern bool FLAG(name)
-#define DECLARE_int32(name) extern int32_t FLAG(name)
-#define DECLARE_double(name) extern double FLAG(name)
-#define DECLARE_string(name) extern std::string FLAG(name)
+#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
+#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
+#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
+#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
+#define BM_DECLARE_kvpairs(name) \
+ BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
// Macros for defining flags.
-#define DEFINE_bool(name, default_val) \
- bool FLAG(name) = \
- benchmark::BoolFromEnv(#name, default_val)
-#define DEFINE_int32(name, default_val) \
- int32_t FLAG(name) = \
- benchmark::Int32FromEnv(#name, default_val)
-#define DEFINE_double(name, default_val) \
- double FLAG(name) = \
- benchmark::DoubleFromEnv(#name, default_val)
-#define DEFINE_string(name, default_val) \
- std::string FLAG(name) = \
- benchmark::StringFromEnv(#name, default_val)
+#define BM_DEFINE_bool(name, default_val) \
+ BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
+#define BM_DEFINE_int32(name, default_val) \
+ BENCHMARK_EXPORT int32_t FLAG(name) = \
+ benchmark::Int32FromEnv(#name, default_val)
+#define BM_DEFINE_double(name, default_val) \
+ BENCHMARK_EXPORT double FLAG(name) = \
+ benchmark::DoubleFromEnv(#name, default_val)
+#define BM_DEFINE_string(name, default_val) \
+ BENCHMARK_EXPORT std::string FLAG(name) = \
+ benchmark::StringFromEnv(#name, default_val)
+#define BM_DEFINE_kvpairs(name, default_val) \
+ BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
+ benchmark::KvPairsFromEnv(#name, default_val)
namespace benchmark {
-// Parses a bool from the environment variable
-// corresponding to the given flag.
+// Parses a bool from the environment variable corresponding to the given flag.
//
// If the variable exists, returns IsTruthyFlagValue() value; if not,
// returns the given default value.
+BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val);
-// Parses an Int32 from the environment variable
-// corresponding to the given flag.
+// Parses an Int32 from the environment variable corresponding to the given
+// flag.
//
// If the variable exists, returns ParseInt32() value; if not, returns
// the given default value.
+BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val);
-// Parses an Double from the environment variable
-// corresponding to the given flag.
+// Parses an Double from the environment variable corresponding to the given
+// flag.
//
// If the variable exists, returns ParseDouble(); if not, returns
// the given default value.
+BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val);
-// Parses a string from the environment variable
-// corresponding to the given flag.
+// Parses a string from the environment variable corresponding to the given
+// flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
+BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val);
+// Parses a set of kvpairs from the environment variable corresponding to the
+// given flag.
+//
+// If variable exists, returns its value; if not, returns
+// the given default value.
+BENCHMARK_EXPORT
+std::map<std::string, std::string> KvPairsFromEnv(
+ const char* flag, std::map<std::string, std::string> default_val);
+
// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
@@ -66,36 +85,47 @@ const char* StringFromEnv(const char* flag, const char* default_val);
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
-// Parses a string for an Int32 flag, in the form of
-// "--flag=value".
+// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
-// Parses a string for a Double flag, in the form of
-// "--flag=value".
+// Parses a string for a Double flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
-// Parses a string for a string flag, in the form of
-// "--flag=value".
+// Parses a string for a string flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
+BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
+// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
+//
+// On success, stores the value of the flag in *value and returns true. On
+// failure returns false, though *value may have been mutated.
+BENCHMARK_EXPORT
+bool ParseKeyValueFlag(const char* str, const char* flag,
+ std::map<std::string, std::string>* value);
+
// Returns true if the string matches the flag.
+BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag);
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
// some non-alphanumeric character. Also returns false if the value matches
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
// returns true if value is the empty string.
+BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value);
} // end namespace benchmark
diff --git a/src/complexity.cc b/src/complexity.cc
index aeed67f..825c573 100644
--- a/src/complexity.cc
+++ b/src/complexity.cc
@@ -15,12 +15,13 @@
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
// Adapted to be used with google benchmark
-#include "benchmark/benchmark.h"
+#include "complexity.h"
#include <algorithm>
#include <cmath>
+
+#include "benchmark/benchmark.h"
#include "check.h"
-#include "complexity.h"
namespace benchmark {
@@ -82,7 +83,6 @@ std::string GetBigOString(BigO complexity) {
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
- double sigma_gn = 0.0;
double sigma_gn_squared = 0.0;
double sigma_time = 0.0;
double sigma_time_gn = 0.0;
@@ -90,7 +90,6 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// Calculate least square fitting parameter
for (size_t i = 0; i < n.size(); ++i) {
double gn_i = fitting_curve(n[i]);
- sigma_gn += gn_i;
sigma_gn_squared += gn_i * gn_i;
sigma_time += time[i];
sigma_time_gn += time[i] * gn_i;
@@ -125,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// fitting curve.
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time, const BigO complexity) {
- CHECK_EQ(n.size(), time.size());
- CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
- // benchmark runs are given
- CHECK_NE(complexity, oNone);
+ BM_CHECK_EQ(n.size(), time.size());
+ BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
+ // benchmark runs are given
+ BM_CHECK_NE(complexity, oNone);
LeastSq best_fit;
@@ -169,7 +168,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Populate the accumulators.
for (const Run& run : reports) {
- CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
+ BM_CHECK_GT(run.complexity_n, 0)
+ << "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
real_time.push_back(run.real_accumulated_time / run.iterations);
cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
@@ -193,11 +193,14 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run big_o;
big_o.run_name = run_name;
+ big_o.family_index = reports[0].family_index;
+ big_o.per_family_instance_index = reports[0].per_family_instance_index;
big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
big_o.repetitions = reports[0].repetitions;
big_o.repetition_index = Run::no_repetition_index;
big_o.threads = reports[0].threads;
big_o.aggregate_name = "BigO";
+ big_o.aggregate_unit = StatisticUnit::kTime;
big_o.report_label = reports[0].report_label;
big_o.iterations = 0;
big_o.real_accumulated_time = result_real.coef;
@@ -215,8 +218,11 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Only add label to mean/stddev if it is same for all runs
Run rms;
rms.run_name = run_name;
+ rms.family_index = reports[0].family_index;
+ rms.per_family_instance_index = reports[0].per_family_instance_index;
rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
rms.aggregate_name = "RMS";
+ rms.aggregate_unit = StatisticUnit::kPercentage;
rms.report_label = big_o.report_label;
rms.iterations = 0;
rms.repetition_index = Run::no_repetition_index;
diff --git a/src/complexity.h b/src/complexity.h
index df29b48..0a0679b 100644
--- a/src/complexity.h
+++ b/src/complexity.h
@@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
-// - coef : Estimated coeficient for the high-order term as
+// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
diff --git a/src/console_reporter.cc b/src/console_reporter.cc
index 6fd7645..10e05e1 100644
--- a/src/console_reporter.cc
+++ b/src/console_reporter.cc
@@ -33,6 +33,7 @@
namespace benchmark {
+BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
@@ -45,19 +46,21 @@ bool ConsoleReporter::ReportContext(const Context& context) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
- output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
+ output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
#endif
return true;
}
+BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
- std::string str = FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
- "Benchmark", "Time", "CPU", "Iterations");
- if(!run.counters.empty()) {
- if(output_options_ & OO_Tabular) {
- for(auto const& c : run.counters) {
+ std::string str =
+ FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
+ "Benchmark", "Time", "CPU", "Iterations");
+ if (!run.counters.empty()) {
+ if (output_options_ & OO_Tabular) {
+ for (auto const& c : run.counters) {
str += FormatString(" %10s", c.first.c_str());
}
} else {
@@ -68,6 +71,7 @@ void ConsoleReporter::PrintHeader(const Run& run) {
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}
+BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
@@ -97,8 +101,10 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
va_end(args);
}
-
static std::string FormatTime(double time) {
+ // For the time columns of the console printer 13 digits are reserved. One of
+ // them is a space and max two of them are the time unit (e.g ns). That puts
+ // us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
@@ -109,22 +115,33 @@ static std::string FormatTime(double time) {
if (time < 100.0) {
return FormatString("%10.1f", time);
}
+ // Assuming the time is at max 9.9999e+99 and we have 10 digits for the
+ // number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
+ if (time > 9999999999 /*max 10 digit number*/) {
+ return FormatString("%1.4e", time);
+ }
return FormatString("%10.0f", time);
}
+BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
- PrinterFn* printer = (output_options_ & OO_Color) ?
- (PrinterFn*)ColorPrintf : IgnoreColorPrint;
+ PrinterFn* printer = (output_options_ & OO_Color)
+ ? static_cast<PrinterFn*>(ColorPrintf)
+ : IgnoreColorPrint;
auto name_color =
(result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
- if (result.error_occurred) {
+ if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
- result.error_message.c_str());
+ result.skip_message.c_str());
+ printer(Out, COLOR_DEFAULT, "\n");
+ return;
+ } else if (internal::SkippedWithMessage == result.skipped) {
+ printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
@@ -134,18 +151,23 @@ void ConsoleReporter::PrintRunData(const Run& result) {
const std::string real_time_str = FormatTime(real_time);
const std::string cpu_time_str = FormatTime(cpu_time);
-
if (result.report_big_o) {
std::string big_o = GetBigOString(result.complexity);
- printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(),
- cpu_time, big_o.c_str());
+ printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time,
+ big_o.c_str(), cpu_time, big_o.c_str());
} else if (result.report_rms) {
printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%",
cpu_time * 100, "%");
- } else {
+ } else if (result.run_type != Run::RT_Aggregate ||
+ result.aggregate_unit == StatisticUnit::kTime) {
const char* timeLabel = GetTimeUnitString(result.time_unit);
- printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel,
- cpu_time_str.c_str(), timeLabel);
+ printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(),
+ timeLabel, cpu_time_str.c_str(), timeLabel);
+ } else {
+ assert(result.aggregate_unit == StatisticUnit::kPercentage);
+ printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ",
+ (100. * result.real_accumulated_time), "%",
+ (100. * result.cpu_accumulated_time), "%");
}
if (!result.report_big_o && !result.report_rms) {
@@ -153,12 +175,19 @@ void ConsoleReporter::PrintRunData(const Run& result) {
}
for (auto& c : result.counters) {
- const std::size_t cNameLen = std::max(std::string::size_type(10),
- c.first.length());
- auto const& s = HumanReadableNumber(c.second.value, c.second.oneK);
+ const std::size_t cNameLen =
+ std::max(std::string::size_type(10), c.first.length());
+ std::string s;
const char* unit = "";
- if (c.second.flags & Counter::kIsRate)
- unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
+ if (result.run_type == Run::RT_Aggregate &&
+ result.aggregate_unit == StatisticUnit::kPercentage) {
+ s = StrFormat("%.2f", 100. * c.second.value);
+ unit = "%";
+ } else {
+ s = HumanReadableNumber(c.second.value, c.second.oneK);
+ if (c.second.flags & Counter::kIsRate)
+ unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
+ }
if (output_options_ & OO_Tabular) {
printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
unit);
diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc
index af2c18f..7b56da1 100644
--- a/src/csv_reporter.cc
+++ b/src/csv_reporter.cc
@@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "benchmark/benchmark.h"
-#include "complexity.h"
-
#include <algorithm>
#include <cstdint>
#include <iostream>
@@ -22,7 +19,9 @@
#include <tuple>
#include <vector>
+#include "benchmark/benchmark.h"
#include "check.h"
+#include "complexity.h"
#include "string_util.h"
#include "timers.h"
@@ -37,23 +36,29 @@ std::vector<std::string> elements = {
"error_occurred", "error_message"};
} // namespace
-std::string CsvEscape(const std::string & s) {
+std::string CsvEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size() + 2);
for (char c : s) {
switch (c) {
- case '"' : tmp += "\"\""; break;
- default : tmp += c; break;
+ case '"':
+ tmp += "\"\"";
+ break;
+ default:
+ tmp += c;
+ break;
}
}
return '"' + tmp + '"';
}
+BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}
+BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();
@@ -85,7 +90,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
continue;
- CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end())
+ BM_CHECK(user_counter_names_.find(cnt.first) !=
+ user_counter_names_.end())
<< "All counters must be present in each run. "
<< "Counter named \"" << cnt.first
<< "\" was not in a run after being added to the header";
@@ -99,13 +105,14 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
}
}
+BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
- if (run.error_occurred) {
+ if (run.skipped) {
Out << std::string(elements.size() - 3, ',');
- Out << "true,";
- Out << CsvEscape(run.error_message) << "\n";
+ Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
+ Out << CsvEscape(run.skip_message) << "\n";
return;
}
diff --git a/src/cycleclock.h b/src/cycleclock.h
index 6843b69..ae1ef2d 100644
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@@ -36,7 +36,8 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
-#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64)
+#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
+ !defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
@@ -114,8 +115,8 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
-#elif defined(COMPILER_MSVC) && defined(_M_ARM64)
- // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019
+#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
+ // See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
@@ -132,7 +133,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
- // because is provides nanosecond resolution (which is noticable at
+ // because is provides nanosecond resolution (which is noticeable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
@@ -173,6 +174,10 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__loongarch__) || defined(__csky__)
+ struct timeval tv;
+ gettimeofday(&tv, nullptr);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__s390__) // Covers both s390 and s390x.
// Return the CPU clock.
uint64_t tsc;
@@ -183,7 +188,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
asm("stck %0" : "=Q"(tsc) : : "cc");
#endif
return tsc;
-#elif defined(__riscv) // RISC-V
+#elif defined(__riscv) // RISC-V
// Use RDCYCLE (and RDCYCLEH on riscv32)
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
@@ -204,6 +209,14 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
asm volatile("rdcycle %0" : "=r"(cycles));
return cycles;
#endif
+#elif defined(__e2k__) || defined(__elbrus__)
+ struct timeval tv;
+ gettimeofday(&tv, nullptr);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__hexagon__)
+ uint64_t pcycle;
+ asm volatile("%0 = C15:14" : "=r"(pcycle));
+ return static_cast<double>(pcycle);
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
diff --git a/src/internal_macros.h b/src/internal_macros.h
index 91f367b..8dd7d0c 100644
--- a/src/internal_macros.h
+++ b/src/internal_macros.h
@@ -1,8 +1,6 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_
-#include "benchmark/benchmark.h"
-
/* Needed to detect STL */
#include <cstdlib>
@@ -44,6 +42,19 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
+ // WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
+ // We include windows.h which implicitly includes winapifamily.h for compatibility.
+ #ifndef NOMINMAX
+ #define NOMINMAX
+ #endif
+ #include <windows.h>
+ #if defined(WINAPI_FAMILY_PARTITION)
+ #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+ #define BENCHMARK_OS_WINDOWS_WIN32 1
+ #elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+ #define BENCHMARK_OS_WINDOWS_RT 1
+ #endif
+ #endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
@@ -80,6 +91,8 @@
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
+#elif defined(__hexagon__)
+#define BENCHMARK_OS_QURT 1
#endif
#if defined(__ANDROID__) && defined(__GLIBCXX__)
diff --git a/src/json_reporter.cc b/src/json_reporter.cc
index 959d245..6559dfd 100644
--- a/src/json_reporter.cc
+++ b/src/json_reporter.cc
@@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "benchmark/benchmark.h"
-#include "complexity.h"
-
#include <algorithm>
#include <cmath>
#include <cstdint>
@@ -25,41 +22,61 @@
#include <tuple>
#include <vector>
+#include "benchmark/benchmark.h"
+#include "complexity.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
-
namespace {
-std::string StrEscape(const std::string & s) {
+std::string StrEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size());
for (char c : s) {
switch (c) {
- case '\b': tmp += "\\b"; break;
- case '\f': tmp += "\\f"; break;
- case '\n': tmp += "\\n"; break;
- case '\r': tmp += "\\r"; break;
- case '\t': tmp += "\\t"; break;
- case '\\': tmp += "\\\\"; break;
- case '"' : tmp += "\\\""; break;
- default : tmp += c; break;
+ case '\b':
+ tmp += "\\b";
+ break;
+ case '\f':
+ tmp += "\\f";
+ break;
+ case '\n':
+ tmp += "\\n";
+ break;
+ case '\r':
+ tmp += "\\r";
+ break;
+ case '\t':
+ tmp += "\\t";
+ break;
+ case '\\':
+ tmp += "\\\\";
+ break;
+ case '"':
+ tmp += "\\\"";
+ break;
+ default:
+ tmp += c;
+ break;
}
}
return tmp;
}
std::string FormatKV(std::string const& key, std::string const& value) {
- return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
+ return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
+ StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, const char* value) {
- return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
+ return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
+ StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, bool value) {
- return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false");
+ return StrFormat("\"%s\": %s", StrEscape(key).c_str(),
+ value ? "true" : "false");
}
std::string FormatKV(std::string const& key, int64_t value) {
@@ -68,12 +85,6 @@ std::string FormatKV(std::string const& key, int64_t value) {
return ss.str();
}
-std::string FormatKV(std::string const& key, IterationCount value) {
- std::stringstream ss;
- ss << '"' << StrEscape(key) << "\": " << value;
- return ss.str();
-}
-
std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": ";
@@ -123,7 +134,9 @@ bool JSONReporter::ReportContext(const Context& context) {
RoundDouble(info.cycles_per_second / 1000000.0))
<< ",\n";
if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
- out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
+ out << indent
+ << FormatKV("cpu_scaling_enabled",
+ info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
<< ",\n";
}
@@ -136,8 +149,8 @@ bool JSONReporter::ReportContext(const Context& context) {
out << cache_indent << FormatKV("type", CI.type) << ",\n";
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
<< ",\n";
- out << cache_indent
- << FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n";
+ out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size))
+ << ",\n";
out << cache_indent
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
<< "\n";
@@ -159,7 +172,19 @@ bool JSONReporter::ReportContext(const Context& context) {
#else
const char build_type[] = "debug";
#endif
- out << indent << FormatKV("library_build_type", build_type) << "\n";
+ out << indent << FormatKV("library_build_type", build_type);
+
+ std::map<std::string, std::string>* global_context =
+ internal::GetGlobalContext();
+
+ if (global_context != nullptr) {
+ for (const auto& kv : *global_context) {
+ out << ",\n";
+ out << indent << FormatKV(kv.first, kv.second);
+ }
+ }
+ out << "\n";
+
// Close context block and open the list of benchmarks.
out << inner_indent << "},\n";
out << inner_indent << "\"benchmarks\": [\n";
@@ -197,6 +222,10 @@ void JSONReporter::PrintRunData(Run const& run) {
std::string indent(6, ' ');
std::ostream& out = GetOutputStream();
out << indent << FormatKV("name", run.benchmark_name()) << ",\n";
+ out << indent << FormatKV("family_index", run.family_index) << ",\n";
+ out << indent
+ << FormatKV("per_family_instance_index", run.per_family_instance_index)
+ << ",\n";
out << indent << FormatKV("run_name", run.run_name.str()) << ",\n";
out << indent << FormatKV("run_type", [&run]() -> const char* {
switch (run.run_type) {
@@ -215,15 +244,36 @@ void JSONReporter::PrintRunData(Run const& run) {
out << indent << FormatKV("threads", run.threads) << ",\n";
if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n";
+ out << indent << FormatKV("aggregate_unit", [&run]() -> const char* {
+ switch (run.aggregate_unit) {
+ case StatisticUnit::kTime:
+ return "time";
+ case StatisticUnit::kPercentage:
+ return "percentage";
+ }
+ BENCHMARK_UNREACHABLE();
+ }()) << ",\n";
}
- if (run.error_occurred) {
- out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
- out << indent << FormatKV("error_message", run.error_message) << ",\n";
+ if (internal::SkippedWithError == run.skipped) {
+ out << indent << FormatKV("error_occurred", true) << ",\n";
+ out << indent << FormatKV("error_message", run.skip_message) << ",\n";
+ } else if (internal::SkippedWithMessage == run.skipped) {
+ out << indent << FormatKV("skipped", true) << ",\n";
+ out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
- out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n";
- out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
+ if (run.run_type != Run::RT_Aggregate ||
+ run.aggregate_unit == StatisticUnit::kTime) {
+ out << indent << FormatKV("real_time", run.GetAdjustedRealTime())
+ << ",\n";
+ out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
+ } else {
+ assert(run.aggregate_unit == StatisticUnit::kPercentage);
+ out << indent << FormatKV("real_time", run.real_accumulated_time)
+ << ",\n";
+ out << indent << FormatKV("cpu_time", run.cpu_accumulated_time);
+ }
out << ",\n"
<< indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
} else if (run.report_big_o) {
@@ -241,9 +291,21 @@ void JSONReporter::PrintRunData(Run const& run) {
out << ",\n" << indent << FormatKV(c.first, c.second);
}
- if (run.has_memory_result) {
+ if (run.memory_result) {
+ const MemoryManager::Result memory_result = *run.memory_result;
out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
- out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used);
+ out << ",\n"
+ << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
+
+ auto report_if_present = [&out, &indent](const std::string& label,
+ int64_t val) {
+ if (val != MemoryManager::TombstoneValue)
+ out << ",\n" << indent << FormatKV(label, val);
+ };
+
+ report_if_present("total_allocated_bytes",
+ memory_result.total_allocated_bytes);
+ report_if_present("net_heap_growth", memory_result.net_heap_growth);
}
if (!run.report_label.empty()) {
@@ -252,4 +314,7 @@ void JSONReporter::PrintRunData(Run const& run) {
out << '\n';
}
+const int64_t MemoryManager::TombstoneValue =
+ std::numeric_limits<int64_t>::max();
+
} // end namespace benchmark
diff --git a/src/log.h b/src/log.h
index 47d0c35..9a21400 100644
--- a/src/log.h
+++ b/src/log.h
@@ -4,7 +4,12 @@
#include <iostream>
#include <ostream>
-#include "benchmark/benchmark.h"
+// NOTE: this is also defined in benchmark.h but we're trying to avoid a
+// dependency.
+// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
+#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
+#define BENCHMARK_HAS_CXX11
+#endif
namespace benchmark {
namespace internal {
@@ -23,7 +28,16 @@ class LogType {
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
- BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
+
+ // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
+ // a dependency on benchmark.h from here.
+#ifndef BENCHMARK_HAS_CXX11
+ LogType(const LogType&);
+ LogType& operator=(const LogType&);
+#else
+ LogType(const LogType&) = delete;
+ LogType& operator=(const LogType&) = delete;
+#endif
};
template <class Tp>
@@ -47,13 +61,13 @@ inline int& LogLevel() {
}
inline LogType& GetNullLogInstance() {
- static LogType log(nullptr);
- return log;
+ static LogType null_log(static_cast<std::ostream*>(nullptr));
+ return null_log;
}
inline LogType& GetErrorLogInstance() {
- static LogType log(&std::clog);
- return log;
+ static LogType error_log(&std::clog);
+ return error_log;
}
inline LogType& GetLogInstanceForLevel(int level) {
@@ -67,7 +81,7 @@ inline LogType& GetLogInstanceForLevel(int level) {
} // end namespace benchmark
// clang-format off
-#define VLOG(x) \
+#define BM_VLOG(x) \
(::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
" ")
// clang-format on
diff --git a/src/mutex.h b/src/mutex.h
index 3fac79a..bec78d9 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -9,60 +9,60 @@
// Enable thread safety attributes only with clang.
// The attributes can be safely erased when compiling with other compilers.
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
-#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
+#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x))
#else
-#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
+#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op
#endif
-#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
+#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x))
-#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable)
-#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x))
-#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x))
#define ACQUIRED_BEFORE(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__))
#define ACQUIRED_AFTER(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__))
#define REQUIRES(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__))
#define REQUIRES_SHARED(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__))
#define ACQUIRE(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__))
#define ACQUIRE_SHARED(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__))
#define RELEASE(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__))
#define RELEASE_SHARED(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__))
#define TRY_ACQUIRE(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__))
#define TRY_ACQUIRE_SHARED(...) \
- THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
+ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__))
-#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__))
-#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
+#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x))
#define ASSERT_SHARED_CAPABILITY(x) \
- THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
+ THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x))
-#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x))
#define NO_THREAD_SAFETY_ANALYSIS \
- THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
+ THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis)
namespace benchmark {
@@ -130,7 +130,7 @@ class Barrier {
// entered the barrier. Returns iff this is the last thread to
// enter the barrier.
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
- CHECK_LT(entered_, running_threads_);
+ BM_CHECK_LT(entered_, running_threads_);
entered_++;
if (entered_ < running_threads_) {
// Wait for all threads to enter
diff --git a/src/perf_counters.cc b/src/perf_counters.cc
new file mode 100644
index 0000000..417acdb
--- /dev/null
+++ b/src/perf_counters.cc
@@ -0,0 +1,282 @@
+// Copyright 2021 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "perf_counters.h"
+
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#if defined HAVE_LIBPFM
+#include "perfmon/pfmlib.h"
+#include "perfmon/pfmlib_perf_event.h"
+#endif
+
+namespace benchmark {
+namespace internal {
+
+constexpr size_t PerfCounterValues::kMaxCounters;
+
+#if defined HAVE_LIBPFM
+
+size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
+ // Create a pointer for multiple reads
+ const size_t bufsize = values_.size() * sizeof(values_[0]);
+ char* ptr = reinterpret_cast<char*>(values_.data());
+ size_t size = bufsize;
+ for (int lead : leaders) {
+ auto read_bytes = ::read(lead, ptr, size);
+ if (read_bytes >= ssize_t(sizeof(uint64_t))) {
+ // Actual data bytes are all bytes minus initial padding
+ std::size_t data_bytes = read_bytes - sizeof(uint64_t);
+ // This should be very cheap since it's in hot cache
+ std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
+ // Increment our counters
+ ptr += data_bytes;
+ size -= data_bytes;
+ } else {
+ int err = errno;
+ GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
+ << " " << ::strerror(err) << "\n";
+ return 0;
+ }
+ }
+ return (bufsize - size) / sizeof(uint64_t);
+}
+
+const bool PerfCounters::kSupported = true;
+
+// Initializes libpfm only on the first call. Returns whether that single
+// initialization was successful.
+bool PerfCounters::Initialize() {
+ // Function-scope static gets initialized only once on first call.
+ static const bool success = []() {
+ return pfm_initialize() == PFM_SUCCESS;
+ }();
+ return success;
+}
+
+bool PerfCounters::IsCounterSupported(const std::string& name) {
+ Initialize();
+ perf_event_attr_t attr;
+ std::memset(&attr, 0, sizeof(attr));
+ pfm_perf_encode_arg_t arg;
+ std::memset(&arg, 0, sizeof(arg));
+ arg.attr = &attr;
+ const int mode = PFM_PLM3; // user mode only
+ int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
+ &arg);
+ return (ret == PFM_SUCCESS);
+}
+
+PerfCounters PerfCounters::Create(
+ const std::vector<std::string>& counter_names) {
+ if (!counter_names.empty()) {
+ Initialize();
+ }
+
+ // Valid counters will populate these arrays but we start empty
+ std::vector<std::string> valid_names;
+ std::vector<int> counter_ids;
+ std::vector<int> leader_ids;
+
+ // Resize to the maximum possible
+ valid_names.reserve(counter_names.size());
+ counter_ids.reserve(counter_names.size());
+
+ const int kCounterMode = PFM_PLM3; // user mode only
+
+ // Group leads will be assigned on demand. The idea is that once we cannot
+ // create a counter descriptor, the reason is that this group has maxed out
+ // so we set the group_id again to -1 and retry - giving the algorithm a
+ // chance to create a new group leader to hold the next set of counters.
+ int group_id = -1;
+
+ // Loop through all performance counters
+ for (size_t i = 0; i < counter_names.size(); ++i) {
+ // we are about to push into the valid names vector
+ // check if we did not reach the maximum
+ if (valid_names.size() == PerfCounterValues::kMaxCounters) {
+ // Log a message if we maxed out and stop adding
+ GetErrorLogInstance()
+ << counter_names.size() << " counters were requested. The maximum is "
+ << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
+ << " were already added. All remaining counters will be ignored\n";
+ // stop the loop and return what we have already
+ break;
+ }
+
+ // Check if this name is empty
+ const auto& name = counter_names[i];
+ if (name.empty()) {
+ GetErrorLogInstance()
+ << "A performance counter name was the empty string\n";
+ continue;
+ }
+
+ // Here first means first in group, ie the group leader
+ const bool is_first = (group_id < 0);
+
+ // This struct will be populated by libpfm from the counter string
+ // and then fed into the syscall perf_event_open
+ struct perf_event_attr attr {};
+ attr.size = sizeof(attr);
+
+ // This is the input struct to libpfm.
+ pfm_perf_encode_arg_t arg{};
+ arg.attr = &attr;
+ const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
+ PFM_OS_PERF_EVENT, &arg);
+ if (pfm_get != PFM_SUCCESS) {
+ GetErrorLogInstance()
+ << "Unknown performance counter name: " << name << "\n";
+ continue;
+ }
+
+ // We then proceed to populate the remaining fields in our attribute struct
+ // Note: the man page for perf_event_create suggests inherit = true and
+ // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
+ // case.
+ attr.disabled = is_first;
+ attr.inherit = true;
+ attr.pinned = is_first;
+ attr.exclude_kernel = true;
+ attr.exclude_user = false;
+ attr.exclude_hv = true;
+
+ // Read all counters in a group in one read.
+ attr.read_format = PERF_FORMAT_GROUP;
+
+ int id = -1;
+ while (id < 0) {
+ static constexpr size_t kNrOfSyscallRetries = 5;
+ // Retry syscall as it was interrupted often (b/64774091).
+ for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
+ ++num_retries) {
+ id = perf_event_open(&attr, 0, -1, group_id, 0);
+ if (id >= 0 || errno != EINTR) {
+ break;
+ }
+ }
+ if (id < 0) {
+ // If the file descriptor is negative we might have reached a limit
+ // in the current group. Set the group_id to -1 and retry
+ if (group_id >= 0) {
+ // Create a new group
+ group_id = -1;
+ } else {
+ // At this point we have already retried to set a new group id and
+ // failed. We then give up.
+ break;
+ }
+ }
+ }
+
+ // We failed to get a new file descriptor. We might have reached a hard
+ // hardware limit that cannot be resolved even with group multiplexing
+ if (id < 0) {
+ GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
+ "for performance counter "
+ << name << ". Ignoring\n";
+
+ // We give up on this counter but try to keep going
+ // as the others would be fine
+ continue;
+ }
+ if (group_id < 0) {
+ // This is a leader, store and assign it to the current file descriptor
+ leader_ids.push_back(id);
+ group_id = id;
+ }
+ // This is a valid counter, add it to our descriptor's list
+ counter_ids.push_back(id);
+ valid_names.push_back(name);
+ }
+
+ // Loop through all group leaders activating them
+ // There is another option of starting ALL counters in a process but
+ // that would be far reaching an intrusion. If the user is using PMCs
+ // by themselves then this would have a side effect on them. It is
+ // friendlier to loop through all groups individually.
+ for (int lead : leader_ids) {
+ if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
+ // This should never happen but if it does, we give up on the
+ // entire batch as recovery would be a mess.
+ GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
+ "Claring out all counters.\n";
+
+ // Close all peformance counters
+ for (int id : counter_ids) {
+ ::close(id);
+ }
+
+ // Return an empty object so our internal state is still good and
+ // the process can continue normally without impact
+ return NoCounters();
+ }
+ }
+
+ return PerfCounters(std::move(valid_names), std::move(counter_ids),
+ std::move(leader_ids));
+}
+
+void PerfCounters::CloseCounters() const {
+ if (counter_ids_.empty()) {
+ return;
+ }
+ for (int lead : leader_ids_) {
+ ioctl(lead, PERF_EVENT_IOC_DISABLE);
+ }
+ for (int fd : counter_ids_) {
+ close(fd);
+ }
+}
+#else // defined HAVE_LIBPFM
+size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
+
+const bool PerfCounters::kSupported = false;
+
+bool PerfCounters::Initialize() { return false; }
+
+bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
+
+PerfCounters PerfCounters::Create(
+ const std::vector<std::string>& counter_names) {
+ if (!counter_names.empty()) {
+ GetErrorLogInstance() << "Performance counters not supported.";
+ }
+ return NoCounters();
+}
+
+void PerfCounters::CloseCounters() const {}
+#endif // defined HAVE_LIBPFM
+
+PerfCountersMeasurement::PerfCountersMeasurement(
+ const std::vector<std::string>& counter_names)
+ : start_values_(counter_names.size()), end_values_(counter_names.size()) {
+ counters_ = PerfCounters::Create(counter_names);
+}
+
+PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
+ if (this != &other) {
+ CloseCounters();
+
+ counter_ids_ = std::move(other.counter_ids_);
+ leader_ids_ = std::move(other.leader_ids_);
+ counter_names_ = std::move(other.counter_names_);
+ }
+ return *this;
+}
+} // namespace internal
+} // namespace benchmark
diff --git a/src/perf_counters.h b/src/perf_counters.h
new file mode 100644
index 0000000..bf5eb6b
--- /dev/null
+++ b/src/perf_counters.h
@@ -0,0 +1,200 @@
+// Copyright 2021 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BENCHMARK_PERF_COUNTERS_H
+#define BENCHMARK_PERF_COUNTERS_H
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "check.h"
+#include "log.h"
+#include "mutex.h"
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include <unistd.h>
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4251: <symbol> needs to have dll-interface to be used by clients of class
+#pragma warning(disable : 4251)
+#endif
+
+namespace benchmark {
+namespace internal {
+
+// Typically, we can only read a small number of counters. There is also a
+// padding preceding counter values, when reading multiple counters with one
+// syscall (which is desirable). PerfCounterValues abstracts these details.
+// The implementation ensures the storage is inlined, and allows 0-based
+// indexing into the counter values.
+// The object is used in conjunction with a PerfCounters object, by passing it
+// to Snapshot(). The Read() method relocates individual reads, discarding
+// the initial padding from each group leader in the values buffer such that
+// all user accesses through the [] operator are correct.
+class BENCHMARK_EXPORT PerfCounterValues {
+ public:
+ explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
+ BM_CHECK_LE(nr_counters_, kMaxCounters);
+ }
+
+ // We are reading correctly now so the values don't need to skip padding
+ uint64_t operator[](size_t pos) const { return values_[pos]; }
+
+ // Increased the maximum to 32 only since the buffer
+ // is std::array<> backed
+ static constexpr size_t kMaxCounters = 32;
+
+ private:
+ friend class PerfCounters;
+ // Get the byte buffer in which perf counters can be captured.
+ // This is used by PerfCounters::Read
+ std::pair<char*, size_t> get_data_buffer() {
+ return {reinterpret_cast<char*>(values_.data()),
+ sizeof(uint64_t) * (kPadding + nr_counters_)};
+ }
+
+ // This reading is complex and as the goal of this class is to
+ // abstract away the intrincacies of the reading process, this is
+ // a better place for it
+ size_t Read(const std::vector<int>& leaders);
+
+ // Move the padding to 2 due to the reading algorithm (1st padding plus a
+ // current read padding)
+ static constexpr size_t kPadding = 2;
+ std::array<uint64_t, kPadding + kMaxCounters> values_;
+ const size_t nr_counters_;
+};
+
+// Collect PMU counters. The object, once constructed, is ready to be used by
+// calling read(). PMU counter collection is enabled from the time create() is
+// called, to obtain the object, until the object's destructor is called.
+class BENCHMARK_EXPORT PerfCounters final {
+ public:
+ // True iff this platform supports performance counters.
+ static const bool kSupported;
+
+ // Returns an empty object
+ static PerfCounters NoCounters() { return PerfCounters(); }
+
+ ~PerfCounters() { CloseCounters(); }
+ PerfCounters() = default;
+ PerfCounters(PerfCounters&&) = default;
+ PerfCounters(const PerfCounters&) = delete;
+ PerfCounters& operator=(PerfCounters&&) noexcept;
+ PerfCounters& operator=(const PerfCounters&) = delete;
+
+ // Platform-specific implementations may choose to do some library
+ // initialization here.
+ static bool Initialize();
+
+ // Check if the given counter is supported, if the app wants to
+ // check before passing
+ static bool IsCounterSupported(const std::string& name);
+
+ // Return a PerfCounters object ready to read the counters with the names
+ // specified. The values are user-mode only. The counter name format is
+ // implementation and OS specific.
+ // In case of failure, this method will in the worst case return an
+ // empty object whose state will still be valid.
+ static PerfCounters Create(const std::vector<std::string>& counter_names);
+
+ // Take a snapshot of the current value of the counters into the provided
+ // valid PerfCounterValues storage. The values are populated such that:
+ // names()[i]'s value is (*values)[i]
+ BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
+#ifndef BENCHMARK_OS_WINDOWS
+ assert(values != nullptr);
+ return values->Read(leader_ids_) == counter_ids_.size();
+#else
+ (void)values;
+ return false;
+#endif
+ }
+
+ const std::vector<std::string>& names() const { return counter_names_; }
+ size_t num_counters() const { return counter_names_.size(); }
+
+ private:
+ PerfCounters(const std::vector<std::string>& counter_names,
+ std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
+ : counter_ids_(std::move(counter_ids)),
+ leader_ids_(std::move(leader_ids)),
+ counter_names_(counter_names) {}
+
+ void CloseCounters() const;
+
+ std::vector<int> counter_ids_;
+ std::vector<int> leader_ids_;
+ std::vector<std::string> counter_names_;
+};
+
+// Typical usage of the above primitives.
+class BENCHMARK_EXPORT PerfCountersMeasurement final {
+ public:
+ PerfCountersMeasurement(const std::vector<std::string>& counter_names);
+
+ size_t num_counters() const { return counters_.num_counters(); }
+
+ std::vector<std::string> names() const { return counters_.names(); }
+
+ BENCHMARK_ALWAYS_INLINE bool Start() {
+ if (num_counters() == 0) return true;
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ valid_read_ &= counters_.Snapshot(&start_values_);
+ ClobberMemory();
+
+ return valid_read_;
+ }
+
+ BENCHMARK_ALWAYS_INLINE bool Stop(
+ std::vector<std::pair<std::string, double>>& measurements) {
+ if (num_counters() == 0) return true;
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ valid_read_ &= counters_.Snapshot(&end_values_);
+ ClobberMemory();
+
+ for (size_t i = 0; i < counters_.names().size(); ++i) {
+ double measurement = static_cast<double>(end_values_[i]) -
+ static_cast<double>(start_values_[i]);
+ measurements.push_back({counters_.names()[i], measurement});
+ }
+
+ return valid_read_;
+ }
+
+ private:
+ PerfCounters counters_;
+ bool valid_read_ = true;
+ PerfCounterValues start_values_;
+ PerfCounterValues end_values_;
+};
+
+} // namespace internal
+} // namespace benchmark
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#endif // BENCHMARK_PERF_COUNTERS_H
diff --git a/src/re.h b/src/re.h
index fbe2503..9afb869 100644
--- a/src/re.h
+++ b/src/re.h
@@ -33,7 +33,7 @@
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
- defined(BENCHMARK_HAVE_STD_REGEX) && \
+ defined(HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
@@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) {
// regerror returns the number of bytes necessary to null terminate
// the string, so we move that when assigning to error.
- CHECK_NE(needed, 0);
+ BM_CHECK_NE(needed, 0);
error->assign(errbuf, needed - 1);
delete[] errbuf;
diff --git a/src/reporter.cc b/src/reporter.cc
index 337575a..076bc31 100644
--- a/src/reporter.cc
+++ b/src/reporter.cc
@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "benchmark/benchmark.h"
-#include "timers.h"
-
#include <cstdlib>
-
#include <iostream>
+#include <map>
+#include <string>
#include <tuple>
#include <vector>
+#include "benchmark/benchmark.h"
#include "check.h"
#include "string_util.h"
+#include "timers.h"
namespace benchmark {
@@ -33,10 +33,14 @@ BenchmarkReporter::~BenchmarkReporter() {}
void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Context const &context) {
- CHECK(out) << "cannot be null";
+ BM_CHECK(out) << "cannot be null";
auto &Out = *out;
+#ifndef BENCHMARK_OS_QURT
+ // Date/time information is not available on QuRT.
+ // Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
+#endif
if (context.executable_name)
Out << "Running " << context.executable_name << "\n";
@@ -64,6 +68,15 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Out << "\n";
}
+ std::map<std::string, std::string> *global_context =
+ internal::GetGlobalContext();
+
+ if (global_context != nullptr) {
+ for (const auto &kv : *global_context) {
+ Out << kv.first << ": " << kv.second << "\n";
+ }
+ }
+
if (CPUInfo::Scaling::ENABLED == info.scaling) {
Out << "***WARNING*** CPU scaling is enabled, the benchmark "
"real time measurements may be noisy and will incur extra "
diff --git a/src/sleep.cc b/src/sleep.cc
deleted file mode 100644
index 4609d54..0000000
--- a/src/sleep.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2015 Google Inc. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "sleep.h"
-
-#include <cerrno>
-#include <cstdlib>
-#include <ctime>
-
-#include "internal_macros.h"
-
-#ifdef BENCHMARK_OS_WINDOWS
-#include <windows.h>
-#endif
-
-#ifdef BENCHMARK_OS_ZOS
-#include <unistd.h>
-#endif
-
-namespace benchmark {
-#ifdef BENCHMARK_OS_WINDOWS
-// Window's Sleep takes milliseconds argument.
-void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
-void SleepForSeconds(double seconds) {
- SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
-}
-#else // BENCHMARK_OS_WINDOWS
-void SleepForMicroseconds(int microseconds) {
-#ifdef BENCHMARK_OS_ZOS
- // z/OS does not support nanosleep. Instead call sleep() and then usleep() to
- // sleep for the remaining microseconds because usleep() will fail if its
- // argument is greater than 1000000.
- div_t sleepTime = div(microseconds, kNumMicrosPerSecond);
- int seconds = sleepTime.quot;
- while (seconds != 0)
- seconds = sleep(seconds);
- while (usleep(sleepTime.rem) == -1 && errno == EINTR)
- ;
-#else
- struct timespec sleep_time;
- sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
- sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
- while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
- ; // Ignore signals and wait for the full interval to elapse.
-#endif
-}
-
-void SleepForMilliseconds(int milliseconds) {
- SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
-}
-
-void SleepForSeconds(double seconds) {
- SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
-}
-#endif // BENCHMARK_OS_WINDOWS
-} // end namespace benchmark
diff --git a/src/sleep.h b/src/sleep.h
deleted file mode 100644
index f98551a..0000000
--- a/src/sleep.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef BENCHMARK_SLEEP_H_
-#define BENCHMARK_SLEEP_H_
-
-namespace benchmark {
-const int kNumMillisPerSecond = 1000;
-const int kNumMicrosPerMilli = 1000;
-const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
-const int kNumNanosPerMicro = 1000;
-const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
-
-void SleepForMilliseconds(int milliseconds);
-void SleepForSeconds(double seconds);
-} // end namespace benchmark
-
-#endif // BENCHMARK_SLEEP_H_
diff --git a/src/statistics.cc b/src/statistics.cc
index bd5a3d6..844e926 100644
--- a/src/statistics.cc
+++ b/src/statistics.cc
@@ -13,15 +13,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "benchmark/benchmark.h"
+#include "statistics.h"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <string>
#include <vector>
+
+#include "benchmark/benchmark.h"
#include "check.h"
-#include "statistics.h"
namespace benchmark {
@@ -41,13 +42,13 @@ double StatisticsMedian(const std::vector<double>& v) {
auto center = copy.begin() + v.size() / 2;
std::nth_element(copy.begin(), center, copy.end());
- // did we have an odd number of samples?
- // if yes, then center is the median
- // it no, then we are looking for the average between center and the value
- // before
+ // Did we have an odd number of samples? If yes, then center is the median.
+ // If not, then we are looking for the average between center and the value
+ // before. Instead of resorting, we just look for the max value before it,
+ // which is not necessarily the element immediately preceding `center` Since
+ // `copy` is only partially sorted by `nth_element`.
if (v.size() % 2 == 1) return *center;
- auto center2 = copy.begin() + v.size() / 2 - 1;
- std::nth_element(copy.begin(), center2, copy.end());
+ auto center2 = std::max_element(copy.begin(), center);
return (*center + *center2) / 2.0;
}
@@ -74,14 +75,22 @@ double StatisticsStdDev(const std::vector<double>& v) {
return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
}
+double StatisticsCV(const std::vector<double>& v) {
+ if (v.size() < 2) return 0.0;
+
+ const auto stddev = StatisticsStdDev(v);
+ const auto mean = StatisticsMean(v);
+
+ return stddev / mean;
+}
+
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports) {
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
- auto error_count =
- std::count_if(reports.begin(), reports.end(),
- [](Run const& run) { return run.error_occurred; });
+ auto error_count = std::count_if(reports.begin(), reports.end(),
+ [](Run const& run) { return run.skipped; });
if (reports.size() - error_count < 2) {
// We don't report aggregated data if there was a single run.
@@ -108,26 +117,28 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
for (auto const& cnt : r.counters) {
auto it = counter_stats.find(cnt.first);
if (it == counter_stats.end()) {
- counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
- it = counter_stats.find(cnt.first);
+ it = counter_stats
+ .emplace(cnt.first,
+ CounterStat{cnt.second, std::vector<double>{}})
+ .first;
it->second.s.reserve(reports.size());
} else {
- CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
+ BM_CHECK_EQ(it->second.c.flags, cnt.second.flags);
}
}
}
// Populate the accumulators.
for (Run const& run : reports) {
- CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
- CHECK_EQ(run_iterations, run.iterations);
- if (run.error_occurred) continue;
+ BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
+ BM_CHECK_EQ(run_iterations, run.iterations);
+ if (run.skipped) continue;
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
// user counters
for (auto const& cnt : run.counters) {
auto it = counter_stats.find(cnt.first);
- CHECK_NE(it, counter_stats.end());
+ BM_CHECK_NE(it, counter_stats.end());
it->second.s.emplace_back(cnt.second);
}
}
@@ -148,11 +159,14 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run data;
data.run_name = reports[0].run_name;
+ data.family_index = reports[0].family_index;
+ data.per_family_instance_index = reports[0].per_family_instance_index;
data.run_type = BenchmarkReporter::Run::RT_Aggregate;
data.threads = reports[0].threads;
data.repetitions = reports[0].repetitions;
data.repetition_index = Run::no_repetition_index;
data.aggregate_name = Stat.name_;
+ data.aggregate_unit = Stat.unit_;
data.report_label = report_label;
// It is incorrect to say that an aggregate is computed over
@@ -165,13 +179,15 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
- // We will divide these times by data.iterations when reporting, but the
- // data.iterations is not nessesairly the scale of these measurements,
- // because in each repetition, these timers are sum over all the iterations.
- // And if we want to say that the stats are over N repetitions and not
- // M iterations, we need to multiply these by (N/M).
- data.real_accumulated_time *= iteration_rescale_factor;
- data.cpu_accumulated_time *= iteration_rescale_factor;
+ if (data.aggregate_unit == StatisticUnit::kTime) {
+ // We will divide these times by data.iterations when reporting, but the
+ // data.iterations is not necessarily the scale of these measurements,
+ // because in each repetition, these timers are sum over all the iters.
+ // And if we want to say that the stats are over N repetitions and not
+ // M iterations, we need to multiply these by (N/M).
+ data.real_accumulated_time *= iteration_rescale_factor;
+ data.cpu_accumulated_time *= iteration_rescale_factor;
+ }
data.time_unit = reports[0].time_unit;
diff --git a/src/statistics.h b/src/statistics.h
index 7eccc85..6e5560e 100644
--- a/src/statistics.h
+++ b/src/statistics.h
@@ -22,15 +22,22 @@
namespace benchmark {
-// Return a vector containing the mean, median and standard devation information
-// (and any user-specified info) for the specified list of reports. If 'reports'
-// contains less than two non-errored runs an empty vector is returned
+// Return a vector containing the mean, median and standard deviation
+// information (and any user-specified info) for the specified list of reports.
+// If 'reports' contains less than two non-errored runs an empty vector is
+// returned
+BENCHMARK_EXPORT
std::vector<BenchmarkReporter::Run> ComputeStats(
const std::vector<BenchmarkReporter::Run>& reports);
+BENCHMARK_EXPORT
double StatisticsMean(const std::vector<double>& v);
+BENCHMARK_EXPORT
double StatisticsMedian(const std::vector<double>& v);
+BENCHMARK_EXPORT
double StatisticsStdDev(const std::vector<double>& v);
+BENCHMARK_EXPORT
+double StatisticsCV(const std::vector<double>& v);
} // end namespace benchmark
diff --git a/src/string_util.cc b/src/string_util.cc
index ac60b55..c69e40a 100644
--- a/src/string_util.cc
+++ b/src/string_util.cc
@@ -11,16 +11,17 @@
#include <sstream>
#include "arraysize.h"
+#include "benchmark/benchmark.h"
namespace benchmark {
namespace {
-
// kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta.
-const char kBigSIUnits[] = "kMGTPEZY";
+const char* const kBigSIUnits[] = {"k", "M", "G", "T", "P", "E", "Z", "Y"};
// Kibi, Mebi, Gibi, Tebi, Pebi, Exbi, Zebi, Yobi.
-const char kBigIECUnits[] = "KMGTPEZY";
+const char* const kBigIECUnits[] = {"Ki", "Mi", "Gi", "Ti",
+ "Pi", "Ei", "Zi", "Yi"};
// milli, micro, nano, pico, femto, atto, zepto, yocto.
-const char kSmallSIUnits[] = "munpfazy";
+const char* const kSmallSIUnits[] = {"m", "u", "n", "p", "f", "a", "z", "y"};
// We require that all three arrays have the same size.
static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
@@ -30,9 +31,8 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
static const int64_t kUnitsSize = arraysize(kBigSIUnits);
-void ToExponentAndMantissa(double val, double thresh, int precision,
- double one_k, std::string* mantissa,
- int64_t* exponent) {
+void ToExponentAndMantissa(double val, int precision, double one_k,
+ std::string* mantissa, int64_t* exponent) {
std::stringstream mantissa_stream;
if (val < 0) {
@@ -43,8 +43,8 @@ void ToExponentAndMantissa(double val, double thresh, int precision,
// Adjust threshold so that it never excludes things which can't be rendered
// in 'precision' digits.
const double adjusted_threshold =
- std::max(thresh, 1.0 / std::pow(10.0, precision));
- const double big_threshold = adjusted_threshold * one_k;
+ std::max(1.0, 1.0 / std::pow(10.0, precision));
+ const double big_threshold = (adjusted_threshold * one_k) - 1;
const double small_threshold = adjusted_threshold;
// Values in ]simple_threshold,small_threshold[ will be printed as-is
const double simple_threshold = 0.01;
@@ -92,37 +92,20 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
if (index >= kUnitsSize) return "";
- const char* array =
+ const char* const* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
- if (iec)
- return array[index] + std::string("i");
- else
- return std::string(1, array[index]);
+
+ return std::string(array[index]);
}
-std::string ToBinaryStringFullySpecified(double value, double threshold,
- int precision, double one_k = 1024.0) {
+std::string ToBinaryStringFullySpecified(double value, int precision,
+ Counter::OneK one_k) {
std::string mantissa;
int64_t exponent;
- ToExponentAndMantissa(value, threshold, precision, one_k, &mantissa,
+ ToExponentAndMantissa(value, precision,
+ one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa,
&exponent);
- return mantissa + ExponentToPrefix(exponent, false);
-}
-
-} // end namespace
-
-void AppendHumanReadable(int n, std::string* str) {
- std::stringstream ss;
- // Round down to the nearest SI prefix.
- ss << ToBinaryStringFullySpecified(n, 1.0, 0);
- *str += ss.str();
-}
-
-std::string HumanReadableNumber(double n, double one_k) {
- // 1.1 means that figures up to 1.1k should be shown with the next unit down;
- // this softens edge effects.
- // 1 means that we should show one decimal place of precision.
- return ToBinaryStringFullySpecified(n, 1.1, 1, one_k);
+ return mantissa + ExponentToPrefix(exponent, one_k == Counter::kIs1024);
}
std::string StrFormatImp(const char* msg, va_list args) {
@@ -133,28 +116,34 @@ std::string StrFormatImp(const char* msg, va_list args) {
// TODO(ericwf): use std::array for first attempt to avoid one memory
// allocation guess what the size might be
std::array<char, 256> local_buff;
- std::size_t size = local_buff.size();
+
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
- auto ret = vsnprintf(local_buff.data(), size, msg, args_cp);
+ auto ret = vsnprintf(local_buff.data(), local_buff.size(), msg, args_cp);
va_end(args_cp);
// handle empty expansion
if (ret == 0) return std::string{};
- if (static_cast<std::size_t>(ret) < size)
+ if (static_cast<std::size_t>(ret) < local_buff.size())
return std::string(local_buff.data());
// we did not provide a long enough buffer on our first attempt.
// add 1 to size to account for null-byte in size cast to prevent overflow
- size = static_cast<std::size_t>(ret) + 1;
+ std::size_t size = static_cast<std::size_t>(ret) + 1;
auto buff_ptr = std::unique_ptr<char[]>(new char[size]);
// 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
// in the android-ndk
- ret = vsnprintf(buff_ptr.get(), size, msg, args);
+ vsnprintf(buff_ptr.get(), size, msg, args);
return std::string(buff_ptr.get());
}
+} // end namespace
+
+std::string HumanReadableNumber(double n, Counter::OneK one_k) {
+ return ToBinaryStringFullySpecified(n, 1, one_k);
+}
+
std::string StrFormat(const char* format, ...) {
va_list args;
va_start(args, format);
@@ -163,6 +152,19 @@ std::string StrFormat(const char* format, ...) {
return tmp;
}
+std::vector<std::string> StrSplit(const std::string& str, char delim) {
+ if (str.empty()) return {};
+ std::vector<std::string> ret;
+ size_t first = 0;
+ size_t next = str.find(delim);
+ for (; next != std::string::npos;
+ first = next + 1, next = str.find(delim, first)) {
+ ret.push_back(str.substr(first, next - first));
+ }
+ ret.push_back(str.substr(first));
+ return ret;
+}
+
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
@@ -185,11 +187,10 @@ unsigned long stoul(const std::string& str, size_t* pos, int base) {
/* Check for errors and return */
if (strtoulErrno == ERANGE) {
- throw std::out_of_range(
- "stoul failed: " + str + " is outside of range of unsigned long");
+ throw std::out_of_range("stoul failed: " + str +
+ " is outside of range of unsigned long");
} else if (strEnd == strStart || strtoulErrno != 0) {
- throw std::invalid_argument(
- "stoul failed: " + str + " is not an integer");
+ throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
@@ -212,11 +213,10 @@ int stoi(const std::string& str, size_t* pos, int base) {
/* Check for errors and return */
if (strtolErrno == ERANGE || long(int(result)) != result) {
- throw std::out_of_range(
- "stoul failed: " + str + " is outside of range of int");
+ throw std::out_of_range("stoul failed: " + str +
+ " is outside of range of int");
} else if (strEnd == strStart || strtolErrno != 0) {
- throw std::invalid_argument(
- "stoul failed: " + str + " is not an integer");
+ throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
@@ -239,11 +239,10 @@ double stod(const std::string& str, size_t* pos) {
/* Check for errors and return */
if (strtodErrno == ERANGE) {
- throw std::out_of_range(
- "stoul failed: " + str + " is outside of range of int");
+ throw std::out_of_range("stoul failed: " + str +
+ " is outside of range of int");
} else if (strEnd == strStart || strtodErrno != 0) {
- throw std::invalid_argument(
- "stoul failed: " + str + " is not an integer");
+ throw std::invalid_argument("stoul failed: " + str + " is not an integer");
}
if (pos != nullptr) {
*pos = static_cast<size_t>(strEnd - strStart);
diff --git a/src/string_util.h b/src/string_util.h
index 09d7b4b..731aa2c 100644
--- a/src/string_util.h
+++ b/src/string_util.h
@@ -4,14 +4,19 @@
#include <sstream>
#include <string>
#include <utility>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "benchmark/export.h"
+#include "check.h"
#include "internal_macros.h"
namespace benchmark {
-void AppendHumanReadable(int n, std::string* str);
-
-std::string HumanReadableNumber(double n, double one_k = 1024.0);
+BENCHMARK_EXPORT
+std::string HumanReadableNumber(double n, Counter::OneK one_k);
+BENCHMARK_EXPORT
#if defined(__MINGW32__)
__attribute__((format(__MINGW_PRINTF_FORMAT, 1, 2)))
#elif defined(__GNUC__)
@@ -37,6 +42,11 @@ inline std::string StrCat(Args&&... args) {
return ss.str();
}
+BENCHMARK_EXPORT
+std::vector<std::string> StrSplit(const std::string& str, char delim);
+
+// Disable lint checking for this block since it re-implements C functions.
+// NOLINTBEGIN
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
/*
* GNU STL in Android NDK lacks support for some C++11 functions, including
@@ -45,14 +55,15 @@ inline std::string StrCat(Args&&... args) {
* namespace, not std:: namespace.
*/
unsigned long stoul(const std::string& str, size_t* pos = nullptr,
- int base = 10);
+ int base = 10);
int stoi(const std::string& str, size_t* pos = nullptr, int base = 10);
double stod(const std::string& str, size_t* pos = nullptr);
#else
-using std::stoul;
-using std::stoi;
-using std::stod;
+using std::stod; // NOLINT(misc-unused-using-decls)
+using std::stoi; // NOLINT(misc-unused-using-decls)
+using std::stoul; // NOLINT(misc-unused-using-decls)
#endif
+// NOLINTEND
} // end namespace benchmark
diff --git a/src/sysinfo.cc b/src/sysinfo.cc
index b30b4f8..922e83a 100644
--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@@ -19,10 +19,11 @@
#undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA
#include <versionhelpers.h>
#include <windows.h>
+
#include <codecvt>
#else
#include <fcntl.h>
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -37,10 +38,17 @@
#endif
#if defined(BENCHMARK_OS_SOLARIS)
#include <kstat.h>
+#include <netdb.h>
#endif
#if defined(BENCHMARK_OS_QNX)
#include <sys/syspage.h>
#endif
+#if defined(BENCHMARK_OS_QURT)
+#include <qurt.h>
+#endif
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+#include <pthread.h>
+#endif
#include <algorithm>
#include <array>
@@ -55,17 +63,19 @@
#include <iostream>
#include <iterator>
#include <limits>
+#include <locale>
#include <memory>
+#include <random>
#include <sstream>
-#include <locale>
#include <utility>
+#include "benchmark/benchmark.h"
#include "check.h"
#include "cycleclock.h"
#include "internal_macros.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
+#include "timers.h"
namespace benchmark {
namespace {
@@ -90,67 +100,59 @@ BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
/// `sysctl` with the result type it's to be interpreted as.
struct ValueUnion {
union DataT {
- uint32_t uint32_value;
- uint64_t uint64_value;
+ int32_t int32_value;
+ int64_t int64_value;
// For correct aliasing of union members from bytes.
char bytes[8];
};
using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
// The size of the data union member + its trailing array size.
- size_t Size;
- DataPtr Buff;
+ std::size_t size;
+ DataPtr buff;
public:
- ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
+ ValueUnion() : size(0), buff(nullptr, &std::free) {}
- explicit ValueUnion(size_t BuffSize)
- : Size(sizeof(DataT) + BuffSize),
- Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
+ explicit ValueUnion(std::size_t buff_size)
+ : size(sizeof(DataT) + buff_size),
+ buff(::new (std::malloc(size)) DataT(), &std::free) {}
ValueUnion(ValueUnion&& other) = default;
- explicit operator bool() const { return bool(Buff); }
+ explicit operator bool() const { return bool(buff); }
- char* data() const { return Buff->bytes; }
+ char* data() const { return buff->bytes; }
std::string GetAsString() const { return std::string(data()); }
int64_t GetAsInteger() const {
- if (Size == sizeof(Buff->uint32_value))
- return static_cast<int32_t>(Buff->uint32_value);
- else if (Size == sizeof(Buff->uint64_value))
- return static_cast<int64_t>(Buff->uint64_value);
- BENCHMARK_UNREACHABLE();
- }
-
- uint64_t GetAsUnsigned() const {
- if (Size == sizeof(Buff->uint32_value))
- return Buff->uint32_value;
- else if (Size == sizeof(Buff->uint64_value))
- return Buff->uint64_value;
+ if (size == sizeof(buff->int32_value))
+ return buff->int32_value;
+ else if (size == sizeof(buff->int64_value))
+ return buff->int64_value;
BENCHMARK_UNREACHABLE();
}
template <class T, int N>
std::array<T, N> GetAsArray() {
- const int ArrSize = sizeof(T) * N;
- CHECK_LE(ArrSize, Size);
- std::array<T, N> Arr;
- std::memcpy(Arr.data(), data(), ArrSize);
- return Arr;
+ const int arr_size = sizeof(T) * N;
+ BM_CHECK_LE(arr_size, size);
+ std::array<T, N> arr;
+ std::memcpy(arr.data(), data(), arr_size);
+ return arr;
}
};
-ValueUnion GetSysctlImp(std::string const& Name) {
+ValueUnion GetSysctlImp(std::string const& name) {
#if defined BENCHMARK_OS_OPENBSD
int mib[2];
mib[0] = CTL_HW;
- if ((Name == "hw.ncpu") || (Name == "hw.cpuspeed")){
+ if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) {
ValueUnion buff(sizeof(int));
- if (Name == "hw.ncpu") {
+ if (name == "hw.ncpu") {
mib[1] = HW_NCPU;
} else {
mib[1] = HW_CPUSPEED;
@@ -163,41 +165,41 @@ ValueUnion GetSysctlImp(std::string const& Name) {
}
return ValueUnion();
#else
- size_t CurBuffSize = 0;
- if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
+ std::size_t cur_buff_size = 0;
+ if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1)
return ValueUnion();
- ValueUnion buff(CurBuffSize);
- if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
+ ValueUnion buff(cur_buff_size);
+ if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0)
return buff;
return ValueUnion();
#endif
}
BENCHMARK_MAYBE_UNUSED
-bool GetSysctl(std::string const& Name, std::string* Out) {
- Out->clear();
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- Out->assign(Buff.data());
+bool GetSysctl(std::string const& name, std::string* out) {
+ out->clear();
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ out->assign(buff.data());
return true;
}
template <class Tp,
class = typename std::enable_if<std::is_integral<Tp>::value>::type>
-bool GetSysctl(std::string const& Name, Tp* Out) {
- *Out = 0;
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- *Out = static_cast<Tp>(Buff.GetAsUnsigned());
+bool GetSysctl(std::string const& name, Tp* out) {
+ *out = 0;
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ *out = static_cast<Tp>(buff.GetAsInteger());
return true;
}
template <class Tp, size_t N>
-bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
- auto Buff = GetSysctlImp(Name);
- if (!Buff) return false;
- *Out = Buff.GetAsArray<Tp, N>();
+bool GetSysctl(std::string const& name, std::array<Tp, N>* out) {
+ auto buff = GetSysctlImp(name);
+ if (!buff) return false;
+ *out = buff.GetAsArray<Tp, N>();
return true;
}
#endif
@@ -214,10 +216,9 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) {
CPUInfo::Scaling CpuScaling(int num_cpus) {
// We don't have a valid CPU count, so don't even bother.
if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN;
-#ifdef BENCHMARK_OS_QNX
+#if defined(BENCHMARK_OS_QNX)
return CPUInfo::Scaling::UNKNOWN;
-#endif
-#ifndef BENCHMARK_OS_WINDOWS
+#elif !defined(BENCHMARK_OS_WINDOWS)
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
// local file system. If reading the exported files fails, then we may not be
// running on Linux, so we silently ignore all the read errors.
@@ -225,28 +226,30 @@ CPUInfo::Scaling CpuScaling(int num_cpus) {
for (int cpu = 0; cpu < num_cpus; ++cpu) {
std::string governor_file =
StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
- if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED;
+ if (ReadFromFile(governor_file, &res) && res != "performance")
+ return CPUInfo::Scaling::ENABLED;
}
return CPUInfo::Scaling::DISABLED;
-#endif
+#else
return CPUInfo::Scaling::UNKNOWN;
+#endif
}
-int CountSetBitsInCPUMap(std::string Val) {
- auto CountBits = [](std::string Part) {
+int CountSetBitsInCPUMap(std::string val) {
+ auto CountBits = [](std::string part) {
using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
- Part = "0x" + Part;
- CPUMask Mask(benchmark::stoul(Part, nullptr, 16));
- return static_cast<int>(Mask.count());
+ part = "0x" + part;
+ CPUMask mask(benchmark::stoul(part, nullptr, 16));
+ return static_cast<int>(mask.count());
};
- size_t Pos;
+ std::size_t pos;
int total = 0;
- while ((Pos = Val.find(',')) != std::string::npos) {
- total += CountBits(Val.substr(0, Pos));
- Val = Val.substr(Pos + 1);
+ while ((pos = val.find(',')) != std::string::npos) {
+ total += CountBits(val.substr(0, pos));
+ val = val.substr(pos + 1);
}
- if (!Val.empty()) {
- total += CountBits(Val);
+ if (!val.empty()) {
+ total += CountBits(val);
}
return total;
}
@@ -255,16 +258,16 @@ BENCHMARK_MAYBE_UNUSED
std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
std::vector<CPUInfo::CacheInfo> res;
std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
- int Idx = 0;
+ int idx = 0;
while (true) {
CPUInfo::CacheInfo info;
- std::string FPath = StrCat(dir, "index", Idx++, "/");
- std::ifstream f(StrCat(FPath, "size").c_str());
+ std::string fpath = StrCat(dir, "index", idx++, "/");
+ std::ifstream f(StrCat(fpath, "size").c_str());
if (!f.is_open()) break;
std::string suffix;
f >> info.size;
if (f.fail())
- PrintErrorAndDie("Failed while reading file '", FPath, "size'");
+ PrintErrorAndDie("Failed while reading file '", fpath, "size'");
if (f.good()) {
f >> suffix;
if (f.bad())
@@ -275,13 +278,13 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
else if (suffix == "K")
info.size *= 1024;
}
- if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
- PrintErrorAndDie("Failed to read from file ", FPath, "type");
- if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
- PrintErrorAndDie("Failed to read from file ", FPath, "level");
+ if (!ReadFromFile(StrCat(fpath, "type"), &info.type))
+ PrintErrorAndDie("Failed to read from file ", fpath, "type");
+ if (!ReadFromFile(StrCat(fpath, "level"), &info.level))
+ PrintErrorAndDie("Failed to read from file ", fpath, "level");
std::string map_str;
- if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
- PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
+ if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str))
+ PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map");
info.num_sharing = CountSetBitsInCPUMap(map_str);
res.push_back(info);
}
@@ -292,26 +295,26 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
#ifdef BENCHMARK_OS_MACOSX
std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
std::vector<CPUInfo::CacheInfo> res;
- std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
- GetSysctl("hw.cacheconfig", &CacheCounts);
+ std::array<int, 4> cache_counts{{0, 0, 0, 0}};
+ GetSysctl("hw.cacheconfig", &cache_counts);
struct {
std::string name;
std::string type;
int level;
- uint64_t num_sharing;
- } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
- {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
- {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
- {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
- for (auto& C : Cases) {
+ int num_sharing;
+ } cases[] = {{"hw.l1dcachesize", "Data", 1, cache_counts[1]},
+ {"hw.l1icachesize", "Instruction", 1, cache_counts[1]},
+ {"hw.l2cachesize", "Unified", 2, cache_counts[2]},
+ {"hw.l3cachesize", "Unified", 3, cache_counts[3]}};
+ for (auto& c : cases) {
int val;
- if (!GetSysctl(C.name, &val)) continue;
+ if (!GetSysctl(c.name, &val)) continue;
CPUInfo::CacheInfo info;
- info.type = C.type;
- info.level = C.level;
+ info.type = c.type;
+ info.level = c.level;
info.size = val;
- info.num_sharing = static_cast<int>(C.num_sharing);
+ info.num_sharing = c.num_sharing;
res.push_back(std::move(info));
}
return res;
@@ -325,7 +328,7 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
GetLogicalProcessorInformation(nullptr, &buffer_size);
- UPtr buff((PInfo*)malloc(buffer_size), &std::free);
+ UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free);
if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
GetLastError());
@@ -336,15 +339,16 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
for (; it != end; ++it) {
if (it->Relationship != RelationCache) continue;
using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
- BitSet B(it->ProcessorMask);
+ BitSet b(it->ProcessorMask);
// To prevent duplicates, only consider caches where CPU 0 is specified
- if (!B.test(0)) continue;
- CInfo* Cache = &it->Cache;
+ if (!b.test(0)) continue;
+ const CInfo& cache = it->Cache;
CPUInfo::CacheInfo C;
- C.num_sharing = static_cast<int>(B.count());
- C.level = Cache->Level;
- C.size = Cache->Size;
- switch (Cache->Type) {
+ C.num_sharing = static_cast<int>(b.count());
+ C.level = cache.Level;
+ C.size = cache.Size;
+ C.type = "Unknown";
+ switch (cache.Type) {
case CacheUnified:
C.type = "Unified";
break;
@@ -357,9 +361,6 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
case CacheTrace:
C.type = "Trace";
break;
- default:
- C.type = "Unknown";
- break;
}
res.push_back(C);
}
@@ -368,29 +369,29 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
#elif BENCHMARK_OS_QNX
std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() {
std::vector<CPUInfo::CacheInfo> res;
- struct cacheattr_entry *cache = SYSPAGE_ENTRY(cacheattr);
+ struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr);
uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr);
- int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize ;
- for(int i = 0; i < num; ++i ) {
+ int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize;
+ for (int i = 0; i < num; ++i) {
CPUInfo::CacheInfo info;
- switch (cache->flags){
- case CACHE_FLAG_INSTR :
+ switch (cache->flags) {
+ case CACHE_FLAG_INSTR:
info.type = "Instruction";
info.level = 1;
break;
- case CACHE_FLAG_DATA :
+ case CACHE_FLAG_DATA:
info.type = "Data";
info.level = 1;
break;
- case CACHE_FLAG_UNIFIED :
+ case CACHE_FLAG_UNIFIED:
info.type = "Unified";
info.level = 2;
break;
- case CACHE_FLAG_SHARED :
+ case CACHE_FLAG_SHARED:
info.type = "Shared";
info.level = 3;
break;
- default :
+ default:
continue;
break;
}
@@ -410,6 +411,8 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
return GetCacheSizesWindows();
#elif defined(BENCHMARK_OS_QNX)
return GetCacheSizesQNX();
+#elif defined(BENCHMARK_OS_QURT)
+ return std::vector<CPUInfo::CacheInfo>();
#else
return GetCacheSizesFromKVFS();
#endif
@@ -418,24 +421,32 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
std::string GetSystemName() {
#if defined(BENCHMARK_OS_WINDOWS)
std::string str;
- const unsigned COUNT = MAX_COMPUTERNAME_LENGTH+1;
- TCHAR hostname[COUNT] = {'\0'};
+ static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1;
+ TCHAR hostname[COUNT] = {'\0'};
DWORD DWCOUNT = COUNT;
- if (!GetComputerName(hostname, &DWCOUNT))
- return std::string("");
+ if (!GetComputerName(hostname, &DWCOUNT)) return std::string("");
#ifndef UNICODE
str = std::string(hostname, DWCOUNT);
#else
- //Using wstring_convert, Is deprecated in C++17
- using convert_type = std::codecvt_utf8<wchar_t>;
- std::wstring_convert<convert_type, wchar_t> converter;
- std::wstring wStr(hostname, DWCOUNT);
- str = converter.to_bytes(wStr);
+ // `WideCharToMultiByte` returns `0` when conversion fails.
+ int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname,
+ DWCOUNT, NULL, 0, NULL, NULL);
+ str.resize(len);
+ WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0],
+ str.size(), NULL, NULL);
#endif
return str;
-#else // defined(BENCHMARK_OS_WINDOWS)
+#elif defined(BENCHMARK_OS_QURT)
+ std::string str = "Hexagon DSP";
+ qurt_arch_version_t arch_version_struct;
+ if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) {
+ str += " v";
+ str += std::to_string(arch_version_struct.arch_version);
+ }
+ return str;
+#else
#ifndef HOST_NAME_MAX
-#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac Doesnt have HOST_NAME_MAX defined
+#ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined
#define HOST_NAME_MAX 64
#elif defined(BENCHMARK_OS_NACL)
#define HOST_NAME_MAX 64
@@ -443,22 +454,24 @@ std::string GetSystemName() {
#define HOST_NAME_MAX 154
#elif defined(BENCHMARK_OS_RTEMS)
#define HOST_NAME_MAX 256
+#elif defined(BENCHMARK_OS_SOLARIS)
+#define HOST_NAME_MAX MAXHOSTNAMELEN
#else
-#warning "HOST_NAME_MAX not defined. using 64"
+#pragma message("HOST_NAME_MAX not defined. using 64")
#define HOST_NAME_MAX 64
#endif
-#endif // def HOST_NAME_MAX
+#endif // def HOST_NAME_MAX
char hostname[HOST_NAME_MAX];
int retVal = gethostname(hostname, HOST_NAME_MAX);
if (retVal != 0) return std::string("");
return std::string(hostname);
-#endif // Catch-all POSIX block.
+#endif // Catch-all POSIX block.
}
int GetNumCPUs() {
#ifdef BENCHMARK_HAS_SYSCTL
- int NumCPU = -1;
- if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
+ int num_cpu = -1;
+ if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu;
fprintf(stderr, "Err: %s\n", strerror(errno));
std::exit(EXIT_FAILURE);
#elif defined(BENCHMARK_OS_WINDOWS)
@@ -472,18 +485,23 @@ int GetNumCPUs() {
// group
#elif defined(BENCHMARK_OS_SOLARIS)
// Returns -1 in case of a failure.
- int NumCPU = sysconf(_SC_NPROCESSORS_ONLN);
- if (NumCPU < 0) {
- fprintf(stderr,
- "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
+ long num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
+ if (num_cpu < 0) {
+ fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
strerror(errno));
}
- return NumCPU;
+ return (int)num_cpu;
#elif defined(BENCHMARK_OS_QNX)
return static_cast<int>(_syspage_ptr->num_cpu);
+#elif defined(BENCHMARK_OS_QURT)
+ qurt_sysenv_max_hthreads_t hardware_threads;
+ if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) {
+ hardware_threads.max_hthreads = 1;
+ }
+ return hardware_threads.max_hthreads;
#else
- int NumCPUs = 0;
- int MaxID = -1;
+ int num_cpus = 0;
+ int max_id = -1;
std::ifstream f("/proc/cpuinfo");
if (!f.is_open()) {
std::cerr << "failed to open /proc/cpuinfo\n";
@@ -493,20 +511,21 @@ int GetNumCPUs() {
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
- size_t SplitIdx = ln.find(':');
+ std::size_t split_idx = ln.find(':');
std::string value;
#if defined(__s390__)
// s390 has another format in /proc/cpuinfo
// it needs to be parsed differently
- if (SplitIdx != std::string::npos) value = ln.substr(Key.size()+1,SplitIdx-Key.size()-1);
+ if (split_idx != std::string::npos)
+ value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1);
#else
- if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+ if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
#endif
if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
- NumCPUs++;
+ num_cpus++;
if (!value.empty()) {
- int CurID = benchmark::stoi(value);
- MaxID = std::max(CurID, MaxID);
+ const int cur_id = benchmark::stoi(value);
+ max_id = std::max(cur_id, max_id);
}
}
}
@@ -520,17 +539,95 @@ int GetNumCPUs() {
}
f.close();
- if ((MaxID + 1) != NumCPUs) {
+ if ((max_id + 1) != num_cpus) {
fprintf(stderr,
"CPU ID assignments in /proc/cpuinfo seem messed up."
" This is usually caused by a bad BIOS.\n");
}
- return NumCPUs;
+ return num_cpus;
#endif
BENCHMARK_UNREACHABLE();
}
-double GetCPUCyclesPerSecond() {
+class ThreadAffinityGuard final {
+ public:
+ ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
+ if (!reset_affinity)
+ std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
+ "frequency may be incorrect."
+ << std::endl;
+ }
+
+ ~ThreadAffinityGuard() {
+ if (!reset_affinity) return;
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret == 0) return;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
+ if (ret != 0) return;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ PrintErrorAndDie("Failed to reset thread affinity");
+ }
+
+ ThreadAffinityGuard(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard(const ThreadAffinityGuard&) = delete;
+ ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete;
+ ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete;
+
+ private:
+ bool SetAffinity() {
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ int ret;
+ self = pthread_self();
+ ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
+ &previous_affinity);
+ if (ret != 0) return false;
+
+ cpu_set_t affinity;
+ memcpy(&affinity, &previous_affinity, sizeof(affinity));
+
+ bool is_first_cpu = true;
+
+ for (int i = 0; i < CPU_SETSIZE; ++i)
+ if (CPU_ISSET(i, &affinity)) {
+ if (is_first_cpu)
+ is_first_cpu = false;
+ else
+ CPU_CLR(i, &affinity);
+ }
+
+ if (is_first_cpu) return false;
+
+ ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
+ return ret == 0;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ self = GetCurrentThread();
+ DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber();
+ previous_affinity = SetThreadAffinityMask(self, mask);
+ return previous_affinity != 0;
+#else
+ return false;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ }
+
+#if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
+ pthread_t self;
+ cpu_set_t previous_affinity;
+#elif defined(BENCHMARK_OS_WINDOWS_WIN32)
+ HANDLE self;
+ DWORD_PTR previous_affinity;
+#endif // def BENCHMARK_HAS_PTHREAD_AFFINITY
+ bool reset_affinity;
+};
+
+double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
+ // Currently, scaling is only used on linux path here,
+ // suppress diagnostics about it being unused on other paths.
+ (void)scaling;
+
#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
long freq;
@@ -541,8 +638,15 @@ double GetCPUCyclesPerSecond() {
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
// well.
if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
- // If CPU scaling is in effect, we want to use the *maximum* frequency,
- // not whatever CPU speed some random processor happens to be using now.
+ // If CPU scaling is disabled, use the *current* frequency.
+ // Note that we specifically don't want to read cpuinfo_cur_freq,
+ // because it is only readable by root.
+ || (scaling == CPUInfo::Scaling::DISABLED &&
+ ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq",
+ &freq))
+ // Otherwise, if CPU scaling may be in effect, we want to use
+ // the *maximum* frequency, not whatever CPU speed some random processor
+ // happens to be using now.
|| ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&freq)) {
// The value is in kHz (as the file name suggests). For example, on a
@@ -559,7 +663,7 @@ double GetCPUCyclesPerSecond() {
return error_value;
}
- auto startsWithKey = [](std::string const& Value, std::string const& Key) {
+ auto StartsWithKey = [](std::string const& Value, std::string const& Key) {
if (Key.size() > Value.size()) return false;
auto Cmp = [&](char X, char Y) {
return std::tolower(X) == std::tolower(Y);
@@ -570,18 +674,18 @@ double GetCPUCyclesPerSecond() {
std::string ln;
while (std::getline(f, ln)) {
if (ln.empty()) continue;
- size_t SplitIdx = ln.find(':');
+ std::size_t split_idx = ln.find(':');
std::string value;
- if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+ if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept positive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
- if (startsWithKey(ln, "cpu MHz")) {
+ if (StartsWithKey(ln, "cpu MHz")) {
if (!value.empty()) {
double cycles_per_second = benchmark::stod(value) * 1000000.0;
if (cycles_per_second > 0) return cycles_per_second;
}
- } else if (startsWithKey(ln, "bogomips")) {
+ } else if (StartsWithKey(ln, "bogomips")) {
if (!value.empty()) {
bogo_clock = benchmark::stod(value) * 1000000.0;
if (bogo_clock < 0.0) bogo_clock = error_value;
@@ -603,7 +707,7 @@ double GetCPUCyclesPerSecond() {
if (bogo_clock >= 0.0) return bogo_clock;
#elif defined BENCHMARK_HAS_SYSCTL
- constexpr auto* FreqStr =
+ constexpr auto* freqStr =
#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
"machdep.tsc_freq";
#elif defined BENCHMARK_OS_OPENBSD
@@ -615,14 +719,17 @@ double GetCPUCyclesPerSecond() {
#endif
unsigned long long hz = 0;
#if defined BENCHMARK_OS_OPENBSD
- if (GetSysctl(FreqStr, &hz)) return hz * 1000000;
+ if (GetSysctl(freqStr, &hz)) return hz * 1000000;
#else
- if (GetSysctl(FreqStr, &hz)) return hz;
+ if (GetSysctl(freqStr, &hz)) return hz;
#endif
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
- FreqStr, strerror(errno));
+ freqStr, strerror(errno));
+ fprintf(stderr,
+ "This does not affect benchmark measurements, only the "
+ "metadata output.\n");
-#elif defined BENCHMARK_OS_WINDOWS
+#elif defined BENCHMARK_OS_WINDOWS_WIN32
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
DWORD data, data_size = sizeof(data);
@@ -631,15 +738,16 @@ double GetCPUCyclesPerSecond() {
SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", nullptr, &data, &data_size)))
- return static_cast<double>((int64_t)data *
- (int64_t)(1000 * 1000)); // was mhz
-#elif defined (BENCHMARK_OS_SOLARIS)
- kstat_ctl_t *kc = kstat_open();
+ return static_cast<double>(static_cast<int64_t>(data) *
+ static_cast<int64_t>(1000 * 1000)); // was mhz
+#elif defined(BENCHMARK_OS_SOLARIS)
+ kstat_ctl_t* kc = kstat_open();
if (!kc) {
std::cerr << "failed to open /dev/kstat\n";
return -1;
}
- kstat_t *ksp = kstat_lookup(kc, (char*)"cpu_info", -1, (char*)"cpu_info0");
+ kstat_t* ksp = kstat_lookup(kc, const_cast<char*>("cpu_info"), -1,
+ const_cast<char*>("cpu_info0"));
if (!ksp) {
std::cerr << "failed to lookup in /dev/kstat\n";
return -1;
@@ -648,8 +756,8 @@ double GetCPUCyclesPerSecond() {
std::cerr << "failed to read from /dev/kstat\n";
return -1;
}
- kstat_named_t *knp =
- (kstat_named_t*)kstat_data_lookup(ksp, (char*)"current_clock_Hz");
+ kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup(
+ ksp, const_cast<char*>("current_clock_Hz"));
if (!knp) {
std::cerr << "failed to lookup data in /dev/kstat\n";
return -1;
@@ -662,23 +770,55 @@ double GetCPUCyclesPerSecond() {
double clock_hz = knp->value.ui64;
kstat_close(kc);
return clock_hz;
-#elif defined (BENCHMARK_OS_QNX)
+#elif defined(BENCHMARK_OS_QNX)
return static_cast<double>((int64_t)(SYSPAGE_ENTRY(cpuinfo)->speed) *
(int64_t)(1000 * 1000));
+#elif defined(BENCHMARK_OS_QURT)
+ // QuRT doesn't provide any API to query Hexagon frequency.
+ return 1000000000;
#endif
// If we've fallen through, attempt to roughly estimate the CPU clock rate.
- const int estimate_time_ms = 1000;
+
+ // Make sure to use the same cycle counter when starting and stopping the
+ // cycle timer. We just pin the current thread to a cpu in the previous
+ // affinity set.
+ ThreadAffinityGuard affinity_guard;
+
+ static constexpr double estimate_time_s = 1.0;
+ const double start_time = ChronoClockNow();
const auto start_ticks = cycleclock::Now();
- SleepForMilliseconds(estimate_time_ms);
- return static_cast<double>(cycleclock::Now() - start_ticks);
+
+ // Impose load instead of calling sleep() to make sure the cycle counter
+ // works.
+ using PRNG = std::minstd_rand;
+ using Result = PRNG::result_type;
+ PRNG rng(static_cast<Result>(start_ticks));
+
+ Result state = 0;
+
+ do {
+ static constexpr size_t batch_size = 10000;
+ rng.discard(batch_size);
+ state += rng();
+
+ } while (ChronoClockNow() - start_time < estimate_time_s);
+
+ DoNotOptimize(state);
+
+ const auto end_ticks = cycleclock::Now();
+ const double end_time = ChronoClockNow();
+
+ return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time);
+ // Reset the affinity of current thread when the lifetime of affinity_guard
+ // ends.
}
std::vector<double> GetLoadAvg() {
#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \
defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \
defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \
- !defined(__ANDROID__)
- constexpr int kMaxSamples = 3;
+ !(defined(__ANDROID__) && __ANDROID_API__ < 29)
+ static constexpr int kMaxSamples = 3;
std::vector<double> res(kMaxSamples, 0.0);
const int nelem = getloadavg(res.data(), kMaxSamples);
if (nelem < 1) {
@@ -701,12 +841,11 @@ const CPUInfo& CPUInfo::Get() {
CPUInfo::CPUInfo()
: num_cpus(GetNumCPUs()),
- cycles_per_second(GetCPUCyclesPerSecond()),
- caches(GetCacheSizes()),
scaling(CpuScaling(num_cpus)),
+ cycles_per_second(GetCPUCyclesPerSecond(scaling)),
+ caches(GetCacheSizes()),
load_avg(GetLoadAvg()) {}
-
const SystemInfo& SystemInfo::Get() {
static const SystemInfo* info = new SystemInfo();
return *info;
diff --git a/src/thread_manager.h b/src/thread_manager.h
index 28e2dd5..819b3c4 100644
--- a/src/thread_manager.h
+++ b/src/thread_manager.h
@@ -36,7 +36,6 @@ class ThreadManager {
[this]() { return alive_threads_ == 0; });
}
- public:
struct Result {
IterationCount iterations = 0;
double real_time_used = 0;
@@ -44,8 +43,8 @@ class ThreadManager {
double manual_time_used = 0;
int64_t complexity_n = 0;
std::string report_label_;
- std::string error_message_;
- bool has_error_ = false;
+ std::string skip_message_;
+ internal::Skipped skipped_ = internal::NotSkipped;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
diff --git a/src/thread_timer.h b/src/thread_timer.h
index 1703ca0..eb23f59 100644
--- a/src/thread_timer.h
+++ b/src/thread_timer.h
@@ -28,7 +28,7 @@ class ThreadTimer {
// Called by each thread
void StopTimer() {
- CHECK(running_);
+ BM_CHECK(running_);
running_ = false;
real_time_used_ += ChronoClockNow() - start_real_time_;
// Floating point error can result in the subtraction producing a negative
@@ -44,19 +44,19 @@ class ThreadTimer {
// REQUIRES: timer is not running
double real_time_used() const {
- CHECK(!running_);
+ BM_CHECK(!running_);
return real_time_used_;
}
// REQUIRES: timer is not running
double cpu_time_used() const {
- CHECK(!running_);
+ BM_CHECK(!running_);
return cpu_time_used_;
}
// REQUIRES: timer is not running
double manual_time_used() const {
- CHECK(!running_);
+ BM_CHECK(!running_);
return manual_time_used_;
}
diff --git a/src/timers.cc b/src/timers.cc
index 1d3ab9a..b23feea 100644
--- a/src/timers.cc
+++ b/src/timers.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include "timers.h"
+
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
@@ -22,7 +23,7 @@
#include <windows.h>
#else
#include <fcntl.h>
-#ifndef BENCHMARK_OS_FUCHSIA
+#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -37,6 +38,9 @@
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#endif
+#if defined(BENCHMARK_OS_QURT)
+#include <qurt.h>
+#endif
#endif
#ifdef BENCHMARK_OS_EMSCRIPTEN
@@ -55,7 +59,6 @@
#include "check.h"
#include "log.h"
-#include "sleep.h"
#include "string_util.h"
namespace benchmark {
@@ -64,6 +67,9 @@ namespace benchmark {
#if defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wunused-function"
#endif
+#if defined(__NVCOMPILER)
+#pragma diag_suppress declared_but_not_referenced
+#endif
namespace {
#if defined(BENCHMARK_OS_WINDOWS)
@@ -78,7 +84,7 @@ double MakeTime(FILETIME const& kernel_time, FILETIME const& user_time) {
static_cast<double>(user.QuadPart)) *
1e-7;
}
-#elif !defined(BENCHMARK_OS_FUCHSIA)
+#elif !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
double MakeTime(struct rusage const& ru) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
@@ -118,15 +124,19 @@ double ProcessCPUUsage() {
&user_time))
return MakeTime(kernel_time, user_time);
DiagnoseAndExit("GetProccessTimes() failed");
+#elif defined(BENCHMARK_OS_QURT)
+ return static_cast<double>(
+ qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ 1.0e-6;
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) returns 0 on Emscripten.
// Use Emscripten-specific API. Reported CPU time would be exactly the
// same as total time, but this is ok because there aren't long-latency
- // syncronous system calls in Emscripten.
+ // synchronous system calls in Emscripten.
return emscripten_get_now() * 1e-3;
#elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
- // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
- // https://github.com/google/benchmark/pull/292
+ // FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
+ // See https://github.com/google/benchmark/pull/292
struct timespec spec;
if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
return MakeTime(spec);
@@ -148,14 +158,19 @@ double ThreadCPUUsage() {
GetThreadTimes(this_thread, &creation_time, &exit_time, &kernel_time,
&user_time);
return MakeTime(kernel_time, user_time);
+#elif defined(BENCHMARK_OS_QURT)
+ return static_cast<double>(
+ qurt_timer_timetick_to_us(qurt_timer_get_ticks())) *
+ 1.0e-6;
#elif defined(BENCHMARK_OS_MACOSX)
- // FIXME We want to use clock_gettime, but its not available in MacOS 10.11. See
- // https://github.com/google/benchmark/pull/292
+ // FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
+ // See https://github.com/google/benchmark/pull/292
mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
thread_basic_info_data_t info;
mach_port_t thread = pthread_mach_thread_np(pthread_self());
- if (thread_info(thread, THREAD_BASIC_INFO, (thread_info_t)&info, &count) ==
- KERN_SUCCESS) {
+ if (thread_info(thread, THREAD_BASIC_INFO,
+ reinterpret_cast<thread_info_t>(&info),
+ &count) == KERN_SUCCESS) {
return MakeTime(info);
}
DiagnoseAndExit("ThreadCPUUsage() failed when evaluating thread_info");
@@ -190,15 +205,26 @@ std::string LocalDateTimeString() {
std::size_t timestamp_len;
long int offset_minutes;
char tz_offset_sign = '+';
- // Long enough buffers to avoid format-overflow warnings
- char tz_offset[128];
+ // tz_offset is set in one of three ways:
+ // * strftime with %z - This either returns empty or the ISO 8601 time. The
+ // maximum length an
+ // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero).
+ // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to
+ // 19 for %02li,
+ // one for :, up to 19 %02li, plus trailing zero).
+ // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus
+ // trailing zero).
+ //
+ // Thus, the maximum size this needs to be is 41.
+ char tz_offset[41];
+ // Long enough buffer to avoid format-overflow warnings
char storage[128];
#if defined(BENCHMARK_OS_WINDOWS)
- std::tm *timeinfo_p = ::localtime(&now);
+ std::tm* timeinfo_p = ::localtime(&now);
#else
std::tm timeinfo;
- std::tm *timeinfo_p = &timeinfo;
+ std::tm* timeinfo_p = &timeinfo;
::localtime_r(&now, &timeinfo);
#endif
@@ -215,10 +241,11 @@ std::string LocalDateTimeString() {
tz_offset_sign = '-';
}
- tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
- tz_offset_sign, offset_minutes / 100, offset_minutes % 100);
- CHECK(tz_len == kTzOffsetLen);
- ((void)tz_len); // Prevent unused variable warning in optimized build.
+ tz_len =
+ ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li",
+ tz_offset_sign, offset_minutes / 100, offset_minutes % 100);
+ BM_CHECK(tz_len == kTzOffsetLen);
+ ((void)tz_len); // Prevent unused variable warning in optimized build.
} else {
// Unknown offset. RFC3339 specifies that unknown local offsets should be
// written as UTC time with -00:00 timezone.
@@ -232,9 +259,9 @@ std::string LocalDateTimeString() {
strncpy(tz_offset, "-00:00", kTzOffsetLen + 1);
}
- timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S",
- timeinfo_p);
- CHECK(timestamp_len == kTimestampLen);
+ timestamp_len =
+ std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", timeinfo_p);
+ BM_CHECK(timestamp_len == kTimestampLen);
// Prevent unused variable warning in optimized build.
((void)kTimestampLen);
diff --git a/test/AssemblyTests.cmake b/test/AssemblyTests.cmake
index 3d07858..c43c711 100644
--- a/test/AssemblyTests.cmake
+++ b/test/AssemblyTests.cmake
@@ -1,3 +1,23 @@
+set(CLANG_SUPPORTED_VERSION "5.0.0")
+set(GCC_SUPPORTED_VERSION "5.5.0")
+
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${CLANG_SUPPORTED_VERSION})
+ message (WARNING
+ "Unsupported Clang version " ${CMAKE_CXX_COMPILER_VERSION}
+ ". Expected is " ${CLANG_SUPPORTED_VERSION}
+ ". Assembly tests may be broken.")
+ endif()
+elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL ${GCC_SUPPORTED_VERSION})
+ message (WARNING
+ "Unsupported GCC version " ${CMAKE_CXX_COMPILER_VERSION}
+ ". Expected is " ${GCC_SUPPORTED_VERSION}
+ ". Assembly tests may be broken.")
+ endif()
+else()
+ message (WARNING "Unsupported compiler. Assembly tests may be broken.")
+endif()
include(split_list)
@@ -23,6 +43,7 @@ string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER)
macro(add_filecheck_test name)
cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV})
add_library(${name} OBJECT ${name}.cc)
+ target_link_libraries(${name} PRIVATE benchmark::benchmark)
set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}")
set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s")
add_custom_target(copy_${name} ALL
diff --git a/test/BUILD b/test/BUILD
index 9bb8cb0..ea34fd4 100644
--- a/test/BUILD
+++ b/test/BUILD
@@ -1,8 +1,18 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+
+platform(
+ name = "windows",
+ constraint_values = [
+ "@platforms//os:windows",
+ ],
+)
+
TEST_COPTS = [
"-pedantic",
"-pedantic-errors",
"-std=c++11",
"-Wall",
+ "-Wconversion",
"-Wextra",
"-Wshadow",
# "-Wshorten-64-to-32",
@@ -10,64 +20,108 @@ TEST_COPTS = [
"-fstrict-aliasing",
]
-PER_SRC_COPTS = ({
- "cxx03_test.cc": ["-std=c++03"],
- # Some of the issues with DoNotOptimize only occur when optimization is enabled
+# Some of the issues with DoNotOptimize only occur when optimization is enabled
+PER_SRC_COPTS = {
"donotoptimize_test.cc": ["-O3"],
-})
+}
-TEST_ARGS = ["--benchmark_min_time=0.01"]
+TEST_ARGS = ["--benchmark_min_time=0.01s"]
-PER_SRC_TEST_ARGS = ({
+PER_SRC_TEST_ARGS = {
"user_counters_tabular_test.cc": ["--benchmark_counters_tabular=true"],
-})
-
-load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+ "repetitions_test.cc": [" --benchmark_repetitions=3"],
+ "spec_arg_test.cc": ["--benchmark_filter=BM_NotChosen"],
+ "spec_arg_verbosity_test.cc": ["--v=42"],
+}
cc_library(
name = "output_test_helper",
testonly = 1,
srcs = ["output_test_helper.cc"],
hdrs = ["output_test.h"],
- copts = TEST_COPTS,
+ copts = select({
+ "//:windows": [],
+ "//conditions:default": TEST_COPTS,
+ }),
deps = [
"//:benchmark",
"//:benchmark_internal_headers",
],
)
+# Tests that use gtest. These rely on `gtest_main`.
+[
+ cc_test(
+ name = test_src[:-len(".cc")],
+ size = "small",
+ srcs = [test_src],
+ copts = select({
+ "//:windows": [],
+ "//conditions:default": TEST_COPTS,
+ }) + PER_SRC_COPTS.get(test_src, []),
+ deps = [
+ "//:benchmark",
+ "//:benchmark_internal_headers",
+ "@com_google_googletest//:gtest",
+ "@com_google_googletest//:gtest_main",
+ ],
+ )
+ for test_src in glob(["*_gtest.cc"])
+]
+
+# Tests that do not use gtest. These have their own `main` defined.
[
cc_test(
name = test_src[:-len(".cc")],
size = "small",
srcs = [test_src],
args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []),
- copts = TEST_COPTS + PER_SRC_COPTS.get(test_src, []),
+ copts = select({
+ "//:windows": [],
+ "//conditions:default": TEST_COPTS,
+ }) + PER_SRC_COPTS.get(test_src, []),
deps = [
":output_test_helper",
"//:benchmark",
"//:benchmark_internal_headers",
- "@com_google_googletest//:gtest",
- ] + (
- ["@com_google_googletest//:gtest_main"] if (test_src[-len("gtest.cc"):] == "gtest.cc") else []
- ),
+ ],
# FIXME: Add support for assembly tests to bazel.
# See Issue #556
# https://github.com/google/benchmark/issues/556
)
for test_src in glob(
- ["*test.cc"],
+ ["*_test.cc"],
exclude = [
"*_assembly_test.cc",
+ "cxx03_test.cc",
"link_main_test.cc",
],
)
]
cc_test(
+ name = "cxx03_test",
+ size = "small",
+ srcs = ["cxx03_test.cc"],
+ copts = TEST_COPTS + ["-std=c++03"],
+ target_compatible_with = select({
+ "//:windows": ["@platforms//:incompatible"],
+ "//conditions:default": [],
+ }),
+ deps = [
+ ":output_test_helper",
+ "//:benchmark",
+ "//:benchmark_internal_headers",
+ ],
+)
+
+cc_test(
name = "link_main_test",
size = "small",
srcs = ["link_main_test.cc"],
- copts = TEST_COPTS,
+ copts = select({
+ "//:windows": [],
+ "//conditions:default": TEST_COPTS,
+ }),
deps = ["//:benchmark_main"],
)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c1a3a3f..fd88131 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,7 @@
# Enable the tests
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)
@@ -22,6 +24,10 @@ if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
endforeach()
endif()
+if (NOT BUILD_SHARED_LIBS)
+ add_definitions(-DBENCHMARK_STATIC_DEFINE)
+endif()
+
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
set(BENCHMARK_O3_FLAG "")
if (BENCHMARK_HAS_O3_FLAG)
@@ -35,10 +41,14 @@ if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
endif()
add_library(output_test_helper STATIC output_test_helper.cc output_test.h)
+target_link_libraries(output_test_helper PRIVATE benchmark::benchmark)
macro(compile_benchmark_test name)
add_executable(${name} "${name}.cc")
target_link_libraries(${name} benchmark::benchmark ${CMAKE_THREAD_LIBS_INIT})
+ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
+ target_compile_options( ${name} PRIVATE --diag_suppress partial_override )
+ endif()
endmacro(compile_benchmark_test)
macro(compile_benchmark_test_with_main name)
@@ -48,20 +58,35 @@ endmacro(compile_benchmark_test_with_main)
macro(compile_output_test name)
add_executable(${name} "${name}.cc" output_test.h)
- target_link_libraries(${name} output_test_helper benchmark::benchmark
+ target_link_libraries(${name} output_test_helper benchmark::benchmark_main
${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
endmacro(compile_output_test)
# Demonstration executable
compile_benchmark_test(benchmark_test)
-add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01)
+add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)
+
+compile_benchmark_test(spec_arg_test)
+add_test(NAME spec_arg COMMAND spec_arg_test --benchmark_filter=BM_NotChosen)
+
+compile_benchmark_test(spec_arg_verbosity_test)
+add_test(NAME spec_arg_verbosity COMMAND spec_arg_verbosity_test --v=42)
+
+compile_benchmark_test(benchmark_setup_teardown_test)
+add_test(NAME benchmark_setup_teardown COMMAND benchmark_setup_teardown_test)
compile_benchmark_test(filter_test)
macro(add_filter_test name filter expect)
- add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect})
+ add_test(NAME ${name} COMMAND filter_test --benchmark_min_time=0.01s --benchmark_filter=${filter} ${expect})
add_test(NAME ${name}_list_only COMMAND filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect})
endmacro(add_filter_test)
+compile_benchmark_test(benchmark_min_time_flag_time_test)
+add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_test)
+
+compile_benchmark_test(benchmark_min_time_flag_iters_test)
+add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)
+
add_filter_test(filter_simple "Foo" 3)
add_filter_test(filter_simple_negative "-Foo" 2)
add_filter_test(filter_suffix "BM_.*" 4)
@@ -82,72 +107,83 @@ add_filter_test(filter_regex_end ".*Ba$" 1)
add_filter_test(filter_regex_end_negative "-.*Ba$" 4)
compile_benchmark_test(options_test)
-add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01)
+add_test(NAME options_benchmarks COMMAND options_test --benchmark_min_time=0.01s)
compile_benchmark_test(basic_test)
-add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01)
+add_test(NAME basic_benchmark COMMAND basic_test --benchmark_min_time=0.01s)
+
+compile_output_test(repetitions_test)
+add_test(NAME repetitions_benchmark COMMAND repetitions_test --benchmark_min_time=0.01s --benchmark_repetitions=3)
compile_benchmark_test(diagnostics_test)
-add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01)
+add_test(NAME diagnostics_test COMMAND diagnostics_test --benchmark_min_time=0.01s)
compile_benchmark_test(skip_with_error_test)
-add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01)
+add_test(NAME skip_with_error_test COMMAND skip_with_error_test --benchmark_min_time=0.01s)
compile_benchmark_test(donotoptimize_test)
+# Enable errors for deprecated deprecations (DoNotOptimize(Tp const& value)).
+check_cxx_compiler_flag(-Werror=deprecated-declarations BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
+if (BENCHMARK_HAS_DEPRECATED_DECLARATIONS_FLAG)
+ target_compile_options (donotoptimize_test PRIVATE "-Werror=deprecated-declarations")
+endif()
# Some of the issues with DoNotOptimize only occur when optimization is enabled
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
if (BENCHMARK_HAS_O3_FLAG)
set_target_properties(donotoptimize_test PROPERTIES COMPILE_FLAGS "-O3")
endif()
-add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01)
+add_test(NAME donotoptimize_test COMMAND donotoptimize_test --benchmark_min_time=0.01s)
compile_benchmark_test(fixture_test)
-add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01)
+add_test(NAME fixture_test COMMAND fixture_test --benchmark_min_time=0.01s)
compile_benchmark_test(register_benchmark_test)
-add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01)
+add_test(NAME register_benchmark_test COMMAND register_benchmark_test --benchmark_min_time=0.01s)
compile_benchmark_test(map_test)
-add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01)
+add_test(NAME map_test COMMAND map_test --benchmark_min_time=0.01s)
compile_benchmark_test(multiple_ranges_test)
-add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01)
+add_test(NAME multiple_ranges_test COMMAND multiple_ranges_test --benchmark_min_time=0.01s)
compile_benchmark_test(args_product_test)
-add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01)
+add_test(NAME args_product_test COMMAND args_product_test --benchmark_min_time=0.01s)
compile_benchmark_test_with_main(link_main_test)
-add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01)
+add_test(NAME link_main_test COMMAND link_main_test --benchmark_min_time=0.01s)
compile_output_test(reporter_output_test)
-add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01)
+add_test(NAME reporter_output_test COMMAND reporter_output_test --benchmark_min_time=0.01s)
compile_output_test(templated_fixture_test)
-add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01)
+add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)
compile_output_test(user_counters_test)
-add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
+add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)
+
+compile_output_test(perf_counters_test)
+add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01s --benchmark_perf_counters=CYCLES,BRANCHES)
compile_output_test(internal_threading_test)
-add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
+add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
compile_output_test(report_aggregates_only_test)
-add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01)
+add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
compile_output_test(display_aggregates_only_test)
-add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01)
+add_test(NAME display_aggregates_only_test COMMAND display_aggregates_only_test --benchmark_min_time=0.01s)
compile_output_test(user_counters_tabular_test)
-add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01)
+add_test(NAME user_counters_tabular_test COMMAND user_counters_tabular_test --benchmark_counters_tabular=true --benchmark_min_time=0.01s)
compile_output_test(user_counters_thousands_test)
-add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01)
+add_test(NAME user_counters_thousands_test COMMAND user_counters_thousands_test --benchmark_min_time=0.01s)
compile_output_test(memory_manager_test)
-add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01)
+add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
-check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG)
-if (BENCHMARK_HAS_CXX03_FLAG)
+# MSVC does not allow to set the language standard to C++98/03.
+if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
compile_benchmark_test(cxx03_test)
set_target_properties(cxx03_test
PROPERTIES
@@ -158,19 +194,25 @@ if (BENCHMARK_HAS_CXX03_FLAG)
# causing the test to fail to compile. To prevent this we explicitly disable
# the warning.
check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR)
- if (BENCHMARK_ENABLE_LTO AND BENCHMARK_HAS_WNO_ODR)
- set_target_properties(cxx03_test
- PROPERTIES
- LINK_FLAGS "-Wno-odr")
+ check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
+ # Cannot set_target_properties multiple times here because the warnings will
+ # be overwritten on each call
+ set (DISABLE_LTO_WARNINGS "")
+ if (BENCHMARK_HAS_WNO_ODR)
+ set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr")
+ endif()
+ if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
+ set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch")
endif()
- add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01)
+ set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}")
+ add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
endif()
# Attempt to work around flaky test failures when running on Appveyor servers.
if (DEFINED ENV{APPVEYOR})
- set(COMPLEXITY_MIN_TIME "0.5")
+ set(COMPLEXITY_MIN_TIME "0.5s")
else()
- set(COMPLEXITY_MIN_TIME "0.01")
+ set(COMPLEXITY_MIN_TIME "0.01s")
endif()
compile_output_test(complexity_test)
add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=${COMPLEXITY_MIN_TIME})
@@ -193,9 +235,13 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
add_gtest(benchmark_gtest)
add_gtest(benchmark_name_gtest)
+ add_gtest(benchmark_random_interleaving_gtest)
add_gtest(commandlineflags_gtest)
add_gtest(statistics_gtest)
add_gtest(string_util_gtest)
+ add_gtest(perf_counters_gtest)
+ add_gtest(time_unit_gtest)
+ add_gtest(min_time_parse_gtest)
endif(BENCHMARK_ENABLE_GTEST_TESTS)
###############################################################################
diff --git a/test/args_product_test.cc b/test/args_product_test.cc
index 8a859f8..63b8b71 100644
--- a/test/args_product_test.cc
+++ b/test/args_product_test.cc
@@ -1,10 +1,10 @@
-#include "benchmark/benchmark.h"
-
#include <cassert>
#include <iostream>
#include <set>
#include <vector>
+#include "benchmark/benchmark.h"
+
class ArgsProductFixture : public ::benchmark::Fixture {
public:
ArgsProductFixture()
@@ -23,7 +23,7 @@ class ArgsProductFixture : public ::benchmark::Fixture {
{2, 15, 10, 9},
{4, 5, 6, 11}}) {}
- void SetUp(const ::benchmark::State& state) {
+ void SetUp(const ::benchmark::State& state) override {
std::vector<int64_t> ranges = {state.range(0), state.range(1),
state.range(2), state.range(3)};
@@ -34,10 +34,10 @@ class ArgsProductFixture : public ::benchmark::Fixture {
// NOTE: This is not TearDown as we want to check after _all_ runs are
// complete.
- virtual ~ArgsProductFixture() {
+ ~ArgsProductFixture() override {
if (actualValues != expectedValues) {
std::cout << "EXPECTED\n";
- for (auto v : expectedValues) {
+ for (const auto& v : expectedValues) {
std::cout << "{";
for (int64_t iv : v) {
std::cout << iv << ", ";
@@ -45,7 +45,7 @@ class ArgsProductFixture : public ::benchmark::Fixture {
std::cout << "}\n";
}
std::cout << "ACTUAL\n";
- for (auto v : actualValues) {
+ for (const auto& v : actualValues) {
std::cout << "{";
for (int64_t iv : v) {
std::cout << iv << ", ";
diff --git a/test/basic_test.cc b/test/basic_test.cc
index 5f3dd1a..cba1b0f 100644
--- a/test/basic_test.cc
+++ b/test/basic_test.cc
@@ -5,7 +5,8 @@
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
@@ -13,7 +14,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu();
void BM_spin_empty(benchmark::State& state) {
for (auto _ : state) {
- for (int x = 0; x < state.range(0); ++x) {
+ for (auto x = 0; x < state.range(0); ++x) {
benchmark::DoNotOptimize(x);
}
}
@@ -22,11 +23,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty);
BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();
void BM_spin_pause_before(benchmark::State& state) {
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
for (auto _ : state) {
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
@@ -37,11 +38,11 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
void BM_spin_pause_during(benchmark::State& state) {
for (auto _ : state) {
state.PauseTiming();
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
state.ResumeTiming();
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
@@ -62,11 +63,11 @@ BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
void BM_spin_pause_after(benchmark::State& state) {
for (auto _ : state) {
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
@@ -74,15 +75,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after);
BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
void BM_spin_pause_before_and_after(benchmark::State& state) {
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
for (auto _ : state) {
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
- for (int i = 0; i < state.range(0); ++i) {
+ for (auto i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(i);
}
}
@@ -96,7 +97,6 @@ void BM_empty_stop_start(benchmark::State& state) {
BENCHMARK(BM_empty_stop_start);
BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
-
void BM_KeepRunning(benchmark::State& state) {
benchmark::IterationCount iter_count = 0;
assert(iter_count == state.iterations());
@@ -108,15 +108,30 @@ void BM_KeepRunning(benchmark::State& state) {
BENCHMARK(BM_KeepRunning);
void BM_KeepRunningBatch(benchmark::State& state) {
- // Choose a prime batch size to avoid evenly dividing max_iterations.
- const benchmark::IterationCount batch_size = 101;
+ // Choose a batch size >1000 to skip the typical runs with iteration
+ // targets of 10, 100 and 1000. If these are not actually skipped the
+ // bug would be detectable as consecutive runs with the same iteration
+ // count. Below we assert that this does not happen.
+ const benchmark::IterationCount batch_size = 1009;
+
+ static benchmark::IterationCount prior_iter_count = 0;
benchmark::IterationCount iter_count = 0;
while (state.KeepRunningBatch(batch_size)) {
iter_count += batch_size;
}
assert(state.iterations() == iter_count);
+
+ // Verify that the iteration count always increases across runs (see
+ // comment above).
+ assert(iter_count == batch_size // max_iterations == 1
+ || iter_count > prior_iter_count); // max_iterations > batch_size
+ prior_iter_count = iter_count;
}
-BENCHMARK(BM_KeepRunningBatch);
+// Register with a fixed repetition count to establish the invariant that
+// the iteration count should always change across runs. This overrides
+// the --benchmark_repetitions command line flag, which would otherwise
+// cause this test to fail if set > 1.
+BENCHMARK(BM_KeepRunningBatch)->Repetitions(1);
void BM_RangedFor(benchmark::State& state) {
benchmark::IterationCount iter_count = 0;
@@ -127,10 +142,39 @@ void BM_RangedFor(benchmark::State& state) {
}
BENCHMARK(BM_RangedFor);
+#ifdef BENCHMARK_HAS_CXX11
+template <typename T>
+void BM_OneTemplateFunc(benchmark::State& state) {
+ auto arg = state.range(0);
+ T sum = 0;
+ for (auto _ : state) {
+ sum += static_cast<T>(arg);
+ }
+}
+BENCHMARK(BM_OneTemplateFunc<int>)->Arg(1);
+BENCHMARK(BM_OneTemplateFunc<double>)->Arg(1);
+
+template <typename A, typename B>
+void BM_TwoTemplateFunc(benchmark::State& state) {
+ auto arg = state.range(0);
+ A sum = 0;
+ B prod = 1;
+ for (auto _ : state) {
+ sum += static_cast<A>(arg);
+ prod *= static_cast<B>(arg);
+ }
+}
+BENCHMARK(BM_TwoTemplateFunc<int, double>)->Arg(1);
+BENCHMARK(BM_TwoTemplateFunc<double, int>)->Arg(1);
+
+#endif // BENCHMARK_HAS_CXX11
+
// Ensure that StateIterator provides all the necessary typedefs required to
// instantiate std::iterator_traits.
-static_assert(std::is_same<
- typename std::iterator_traits<benchmark::State::StateIterator>::value_type,
- typename benchmark::State::StateIterator::value_type>::value, "");
+static_assert(
+ std::is_same<typename std::iterator_traits<
+ benchmark::State::StateIterator>::value_type,
+ typename benchmark::State::StateIterator::value_type>::value,
+ "");
BENCHMARK_MAIN();
diff --git a/test/benchmark_gtest.cc b/test/benchmark_gtest.cc
index 6dbf7a5..2c9e555 100644
--- a/test/benchmark_gtest.cc
+++ b/test/benchmark_gtest.cc
@@ -1,11 +1,15 @@
+#include <map>
+#include <string>
#include <vector>
#include "../src/benchmark_register.h"
+#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace benchmark {
namespace internal {
+
namespace {
TEST(AddRangeTest, Simple) {
@@ -34,8 +38,9 @@ TEST(AddRangeTest, Advanced64) {
TEST(AddRangeTest, FullRange8) {
std::vector<int8_t> dst;
- AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), 8);
- EXPECT_THAT(dst, testing::ElementsAre(1, 8, 64, 127));
+ AddRange(&dst, int8_t{1}, std::numeric_limits<int8_t>::max(), int8_t{8});
+ EXPECT_THAT(
+ dst, testing::ElementsAre(int8_t{1}, int8_t{8}, int8_t{64}, int8_t{127}));
}
TEST(AddRangeTest, FullRange64) {
@@ -125,8 +130,38 @@ TEST(AddRangeTest, FullNegativeRange64) {
TEST(AddRangeTest, Simple8) {
std::vector<int8_t> dst;
- AddRange<int8_t>(&dst, 1, 8, 2);
- EXPECT_THAT(dst, testing::ElementsAre(1, 2, 4, 8));
+ AddRange<int8_t>(&dst, int8_t{1}, int8_t{8}, int8_t{2});
+ EXPECT_THAT(dst,
+ testing::ElementsAre(int8_t{1}, int8_t{2}, int8_t{4}, int8_t{8}));
+}
+
+TEST(AddCustomContext, Simple) {
+ std::map<std::string, std::string> *&global_context = GetGlobalContext();
+ EXPECT_THAT(global_context, nullptr);
+
+ AddCustomContext("foo", "bar");
+ AddCustomContext("baz", "qux");
+
+ EXPECT_THAT(*global_context,
+ testing::UnorderedElementsAre(testing::Pair("foo", "bar"),
+ testing::Pair("baz", "qux")));
+
+ delete global_context;
+ global_context = nullptr;
+}
+
+TEST(AddCustomContext, DuplicateKey) {
+ std::map<std::string, std::string> *&global_context = GetGlobalContext();
+ EXPECT_THAT(global_context, nullptr);
+
+ AddCustomContext("foo", "bar");
+ AddCustomContext("foo", "qux");
+
+ EXPECT_THAT(*global_context,
+ testing::UnorderedElementsAre(testing::Pair("foo", "bar")));
+
+ delete global_context;
+ global_context = nullptr;
}
} // namespace
diff --git a/test/benchmark_min_time_flag_iters_test.cc b/test/benchmark_min_time_flag_iters_test.cc
new file mode 100644
index 0000000..3de93a7
--- /dev/null
+++ b/test/benchmark_min_time_flag_iters_test.cc
@@ -0,0 +1,66 @@
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can specify the number of iterations with
+// --benchmark_min_time=<NUM>x.
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ return ConsoleReporter::ReportContext(context);
+ };
+
+ virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ assert(report.size() == 1);
+ iter_nums_.push_back(report[0].iterations);
+ ConsoleReporter::ReportRuns(report);
+ };
+
+ TestReporter() {}
+
+ virtual ~TestReporter() {}
+
+ const std::vector<benchmark::IterationCount>& GetIters() const {
+ return iter_nums_;
+ }
+
+ private:
+ std::vector<benchmark::IterationCount> iter_nums_;
+};
+
+} // end namespace
+
+static void BM_MyBench(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_MyBench);
+
+int main(int argc, char** argv) {
+ // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
+ int fake_argc = argc + 1;
+ const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
+ for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
+ fake_argv[argc] = "--benchmark_min_time=4x";
+
+ benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv));
+
+ TestReporter test_reporter;
+ const size_t returned_count =
+ benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench");
+ assert(returned_count == 1);
+
+ // Check the executed iters.
+ const std::vector<benchmark::IterationCount> iters = test_reporter.GetIters();
+ assert(!iters.empty() && iters[0] == 4);
+
+ delete[] fake_argv;
+ return 0;
+}
diff --git a/test/benchmark_min_time_flag_time_test.cc b/test/benchmark_min_time_flag_time_test.cc
new file mode 100644
index 0000000..04a82eb
--- /dev/null
+++ b/test/benchmark_min_time_flag_time_test.cc
@@ -0,0 +1,90 @@
+#include <cassert>
+#include <climits>
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can specify the min time with
+// --benchmark_min_time=<NUM> (no suffix needed) OR
+// --benchmark_min_time=<NUM>s
+namespace {
+
+// This is from benchmark.h
+typedef int64_t IterationCount;
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+ return ConsoleReporter::ReportContext(context);
+ };
+
+ virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+ assert(report.size() == 1);
+ ConsoleReporter::ReportRuns(report);
+ };
+
+ virtual void ReportRunsConfig(double min_time, bool /* has_explicit_iters */,
+ IterationCount /* iters */) BENCHMARK_OVERRIDE {
+ min_times_.push_back(min_time);
+ }
+
+ TestReporter() {}
+
+ virtual ~TestReporter() {}
+
+ const std::vector<double>& GetMinTimes() const { return min_times_; }
+
+ private:
+ std::vector<double> min_times_;
+};
+
+bool AlmostEqual(double a, double b) {
+ return std::fabs(a - b) < std::numeric_limits<double>::epsilon();
+}
+
+void DoTestHelper(int* argc, const char** argv, double expected) {
+ benchmark::Initialize(argc, const_cast<char**>(argv));
+
+ TestReporter test_reporter;
+ const size_t returned_count =
+ benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench");
+ assert(returned_count == 1);
+
+ // Check the min_time
+ const std::vector<double>& min_times = test_reporter.GetMinTimes();
+ assert(!min_times.empty() && AlmostEqual(min_times[0], expected));
+}
+
+} // end namespace
+
+static void BM_MyBench(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_MyBench);
+
+int main(int argc, char** argv) {
+ // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
+ int fake_argc = argc + 1;
+ const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
+
+ for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
+
+ const char* no_suffix = "--benchmark_min_time=4";
+ const char* with_suffix = "--benchmark_min_time=4.0s";
+ double expected = 4.0;
+
+ fake_argv[argc] = no_suffix;
+ DoTestHelper(&fake_argc, fake_argv, expected);
+
+ fake_argv[argc] = with_suffix;
+ DoTestHelper(&fake_argc, fake_argv, expected);
+
+ delete[] fake_argv;
+ return 0;
+}
diff --git a/test/benchmark_name_gtest.cc b/test/benchmark_name_gtest.cc
index afb401c..0a6746d 100644
--- a/test/benchmark_name_gtest.cc
+++ b/test/benchmark_name_gtest.cc
@@ -32,6 +32,14 @@ TEST(BenchmarkNameTest, MinTime) {
EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_time:3.4s");
}
+TEST(BenchmarkNameTest, MinWarmUpTime) {
+ auto name = BenchmarkName();
+ name.function_name = "function_name";
+ name.args = "some_args:3/4";
+ name.min_warmup_time = "min_warmup_time:3.5s";
+ EXPECT_EQ(name.str(), "function_name/some_args:3/4/min_warmup_time:3.5s");
+}
+
TEST(BenchmarkNameTest, Iterations) {
auto name = BenchmarkName();
name.function_name = "function_name";
diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
new file mode 100644
index 0000000..7f20867
--- /dev/null
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -0,0 +1,126 @@
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "../src/commandlineflags.h"
+#include "../src/string_util.h"
+#include "benchmark/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace benchmark {
+
+BM_DECLARE_bool(benchmark_enable_random_interleaving);
+BM_DECLARE_string(benchmark_filter);
+BM_DECLARE_int32(benchmark_repetitions);
+
+namespace internal {
+namespace {
+
+class EventQueue : public std::queue<std::string> {
+ public:
+ void Put(const std::string& event) { push(event); }
+
+ void Clear() {
+ while (!empty()) {
+ pop();
+ }
+ }
+
+ std::string Get() {
+ std::string event = front();
+ pop();
+ return event;
+ }
+};
+
+EventQueue* queue = new EventQueue();
+
+class NullReporter : public BenchmarkReporter {
+ public:
+ bool ReportContext(const Context& /*context*/) override { return true; }
+ void ReportRuns(const std::vector<Run>& /* report */) override {}
+};
+
+class BenchmarkTest : public testing::Test {
+ public:
+ static void SetupHook(int /* num_threads */) { queue->push("Setup"); }
+
+ static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); }
+
+ void Execute(const std::string& pattern) {
+ queue->Clear();
+
+ std::unique_ptr<BenchmarkReporter> reporter(new NullReporter());
+ FLAGS_benchmark_filter = pattern;
+ RunSpecifiedBenchmarks(reporter.get());
+
+ queue->Put("DONE"); // End marker
+ }
+};
+
+void BM_Match1(benchmark::State& state) {
+ const int64_t arg = state.range(0);
+
+ for (auto _ : state) {
+ }
+ queue->Put(StrFormat("BM_Match1/%d", static_cast<int>(arg)));
+}
+BENCHMARK(BM_Match1)
+ ->Iterations(100)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Range(10, 80)
+ ->Args({90})
+ ->Args({100});
+
+TEST_F(BenchmarkTest, Match1) {
+ Execute("BM_Match1");
+ ASSERT_EQ("BM_Match1/1", queue->Get());
+ ASSERT_EQ("BM_Match1/2", queue->Get());
+ ASSERT_EQ("BM_Match1/3", queue->Get());
+ ASSERT_EQ("BM_Match1/10", queue->Get());
+ ASSERT_EQ("BM_Match1/64", queue->Get());
+ ASSERT_EQ("BM_Match1/80", queue->Get());
+ ASSERT_EQ("BM_Match1/90", queue->Get());
+ ASSERT_EQ("BM_Match1/100", queue->Get());
+ ASSERT_EQ("DONE", queue->Get());
+}
+
+TEST_F(BenchmarkTest, Match1WithRepetition) {
+ FLAGS_benchmark_repetitions = 2;
+
+ Execute("BM_Match1/(64|80)");
+ ASSERT_EQ("BM_Match1/64", queue->Get());
+ ASSERT_EQ("BM_Match1/64", queue->Get());
+ ASSERT_EQ("BM_Match1/80", queue->Get());
+ ASSERT_EQ("BM_Match1/80", queue->Get());
+ ASSERT_EQ("DONE", queue->Get());
+}
+
+TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
+ FLAGS_benchmark_enable_random_interleaving = true;
+ FLAGS_benchmark_repetitions = 100;
+
+ std::map<std::string, int> element_count;
+ std::map<std::string, int> interleaving_count;
+ Execute("BM_Match1/(64|80)");
+ for (int i = 0; i < 100; ++i) {
+ std::vector<std::string> interleaving;
+ interleaving.push_back(queue->Get());
+ interleaving.push_back(queue->Get());
+ element_count[interleaving[0]]++;
+ element_count[interleaving[1]]++;
+ interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(),
+ interleaving[1].c_str())]++;
+ }
+ EXPECT_EQ(element_count["BM_Match1/64"], 100) << "Unexpected repetitions.";
+ EXPECT_EQ(element_count["BM_Match1/80"], 100) << "Unexpected repetitions.";
+ EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized.";
+ ASSERT_EQ("DONE", queue->Get());
+}
+
+} // namespace
+} // namespace internal
+} // namespace benchmark
diff --git a/test/benchmark_setup_teardown_test.cc b/test/benchmark_setup_teardown_test.cc
new file mode 100644
index 0000000..6c3cc2e
--- /dev/null
+++ b/test/benchmark_setup_teardown_test.cc
@@ -0,0 +1,157 @@
+#include <atomic>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+
+#include "benchmark/benchmark.h"
+
+// Test that Setup() and Teardown() are called exactly once
+// for each benchmark run (single-threaded).
+namespace singlethreaded {
+static int setup_call = 0;
+static int teardown_call = 0;
+} // namespace singlethreaded
+static void DoSetup1(const benchmark::State& state) {
+ ++singlethreaded::setup_call;
+
+ // Setup/Teardown should never be called with any thread_idx != 0.
+ assert(state.thread_index() == 0);
+}
+
+static void DoTeardown1(const benchmark::State& state) {
+ ++singlethreaded::teardown_call;
+ assert(state.thread_index() == 0);
+}
+
+static void BM_with_setup(benchmark::State& state) {
+ for (auto s : state) {
+ }
+}
+BENCHMARK(BM_with_setup)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Iterations(100)
+ ->Setup(DoSetup1)
+ ->Teardown(DoTeardown1);
+
+// Test that Setup() and Teardown() are called once for each group of threads.
+namespace concurrent {
+static std::atomic<int> setup_call(0);
+static std::atomic<int> teardown_call(0);
+static std::atomic<int> func_call(0);
+} // namespace concurrent
+
+static void DoSetup2(const benchmark::State& state) {
+ concurrent::setup_call.fetch_add(1, std::memory_order_acquire);
+ assert(state.thread_index() == 0);
+}
+
+static void DoTeardown2(const benchmark::State& state) {
+ concurrent::teardown_call.fetch_add(1, std::memory_order_acquire);
+ assert(state.thread_index() == 0);
+}
+
+static void BM_concurrent(benchmark::State& state) {
+ for (auto s : state) {
+ }
+ concurrent::func_call.fetch_add(1, std::memory_order_acquire);
+}
+
+BENCHMARK(BM_concurrent)
+ ->Setup(DoSetup2)
+ ->Teardown(DoTeardown2)
+ ->Iterations(100)
+ ->Threads(5)
+ ->Threads(10)
+ ->Threads(15);
+
+// Testing interaction with Fixture::Setup/Teardown
+namespace fixture_interaction {
+int setup = 0;
+int fixture_setup = 0;
+} // namespace fixture_interaction
+
+#define FIXTURE_BECHMARK_NAME MyFixture
+
+class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
+ public:
+ void SetUp(const ::benchmark::State&) override {
+ fixture_interaction::fixture_setup++;
+ }
+
+ ~FIXTURE_BECHMARK_NAME() override {}
+};
+
+BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) {
+ for (auto _ : st) {
+ }
+}
+
+static void DoSetupWithFixture(const benchmark::State&) {
+ fixture_interaction::setup++;
+}
+
+BENCHMARK_REGISTER_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Setup(DoSetupWithFixture)
+ ->Repetitions(1)
+ ->Iterations(100);
+
+// Testing repetitions.
+namespace repetitions {
+int setup = 0;
+}
+
+static void DoSetupWithRepetitions(const benchmark::State&) {
+ repetitions::setup++;
+}
+static void BM_WithRep(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+
+BENCHMARK(BM_WithRep)
+ ->Arg(1)
+ ->Arg(3)
+ ->Arg(5)
+ ->Arg(7)
+ ->Setup(DoSetupWithRepetitions)
+ ->Iterations(100)
+ ->Repetitions(4);
+
+int main(int argc, char** argv) {
+ benchmark::Initialize(&argc, argv);
+
+ size_t ret = benchmark::RunSpecifiedBenchmarks(".");
+ assert(ret > 0);
+
+ // Setup/Teardown is called once for each arg group (1,3,5,7).
+ assert(singlethreaded::setup_call == 4);
+ assert(singlethreaded::teardown_call == 4);
+
+ // 3 group of threads calling this function (3,5,10).
+ assert(concurrent::setup_call.load(std::memory_order_relaxed) == 3);
+ assert(concurrent::teardown_call.load(std::memory_order_relaxed) == 3);
+ assert((5 + 10 + 15) ==
+ concurrent::func_call.load(std::memory_order_relaxed));
+
+ // Setup is called 4 times, once for each arg group (1,3,5,7)
+ assert(fixture_interaction::setup == 4);
+ // Fixture::Setup is called every time the bm routine is run.
+ // The exact number is indeterministic, so we just assert that
+ // it's more than setup.
+ assert(fixture_interaction::fixture_setup > fixture_interaction::setup);
+
+ // Setup is call once for each repetition * num_arg = 4 * 4 = 16.
+ assert(repetitions::setup == 16);
+
+ return 0;
+}
diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc
index 3cd4f55..94590d5 100644
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@@ -5,6 +5,7 @@
#include <stdint.h>
#include <chrono>
+#include <complex>
#include <cstdlib>
#include <iostream>
#include <limits>
@@ -26,7 +27,7 @@
namespace {
-int BENCHMARK_NOINLINE Factorial(uint32_t n) {
+int BENCHMARK_NOINLINE Factorial(int n) {
return (n == 1) ? 1 : n * Factorial(n - 1);
}
@@ -74,7 +75,8 @@ BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
for (auto _ : state) {
- benchmark::DoNotOptimize(CalculatePi(static_cast<int>(depth)));
+ double pi = CalculatePi(static_cast<int>(depth));
+ benchmark::DoNotOptimize(pi);
}
}
BENCHMARK(BM_CalculatePi)->Threads(8);
@@ -90,11 +92,13 @@ static void BM_SetInsert(benchmark::State& state) {
for (int j = 0; j < state.range(1); ++j) data.insert(rand());
}
state.SetItemsProcessed(state.iterations() * state.range(1));
- state.SetBytesProcessed(state.iterations() * state.range(1) * sizeof(int));
+ state.SetBytesProcessed(state.iterations() * state.range(1) *
+ static_cast<int64_t>(sizeof(int)));
}
-// Test many inserts at once to reduce the total iterations needed. Otherwise, the slower,
-// non-timed part of each iteration will make the benchmark take forever.
+// Test many inserts at once to reduce the total iterations needed. Otherwise,
+// the slower, non-timed part of each iteration will make the benchmark take
+// forever.
BENCHMARK(BM_SetInsert)->Ranges({{1 << 10, 8 << 10}, {128, 512}});
template <typename Container,
@@ -107,7 +111,7 @@ static void BM_Sequential(benchmark::State& state) {
}
const int64_t items_processed = state.iterations() * state.range(0);
state.SetItemsProcessed(items_processed);
- state.SetBytesProcessed(items_processed * sizeof(v));
+ state.SetBytesProcessed(items_processed * static_cast<int64_t>(sizeof(v)));
}
BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
->Range(1 << 0, 1 << 10);
@@ -121,12 +125,15 @@ static void BM_StringCompare(benchmark::State& state) {
size_t len = static_cast<size_t>(state.range(0));
std::string s1(len, '-');
std::string s2(len, '-');
- for (auto _ : state) benchmark::DoNotOptimize(s1.compare(s2));
+ for (auto _ : state) {
+ auto comp = s1.compare(s2);
+ benchmark::DoNotOptimize(comp);
+ }
}
BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
static void BM_SetupTeardown(benchmark::State& state) {
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
// No need to lock test_vector_mu here as this is running single-threaded.
test_vector = new std::vector<int>();
}
@@ -139,7 +146,7 @@ static void BM_SetupTeardown(benchmark::State& state) {
test_vector->pop_back();
++i;
}
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
delete test_vector;
}
}
@@ -156,11 +163,11 @@ BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28);
static void BM_ParallelMemset(benchmark::State& state) {
int64_t size = state.range(0) / static_cast<int64_t>(sizeof(int));
- int thread_size = static_cast<int>(size) / state.threads;
- int from = thread_size * state.thread_index;
+ int thread_size = static_cast<int>(size) / state.threads();
+ int from = thread_size * state.thread_index();
int to = from + thread_size;
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
test_vector = new std::vector<int>(static_cast<size_t>(size));
}
@@ -168,11 +175,11 @@ static void BM_ParallelMemset(benchmark::State& state) {
for (int i = from; i < to; i++) {
// No need to lock test_vector_mu as ranges
// do not overlap between threads.
- benchmark::DoNotOptimize(test_vector->at(i) = 1);
+ benchmark::DoNotOptimize(test_vector->at(static_cast<size_t>(i)) = 1);
}
}
- if (state.thread_index == 0) {
+ if (state.thread_index() == 0) {
delete test_vector;
}
}
@@ -214,7 +221,8 @@ BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, std::string("abc"),
std::pair<int, double>(42, 3.8));
void BM_non_template_args(benchmark::State& state, int, double) {
- while(state.KeepRunning()) {}
+ while (state.KeepRunning()) {
+ }
}
BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
@@ -223,14 +231,14 @@ BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0);
static void BM_DenseThreadRanges(benchmark::State& st) {
switch (st.range(0)) {
case 1:
- assert(st.threads == 1 || st.threads == 2 || st.threads == 3);
+ assert(st.threads() == 1 || st.threads() == 2 || st.threads() == 3);
break;
case 2:
- assert(st.threads == 1 || st.threads == 3 || st.threads == 4);
+ assert(st.threads() == 1 || st.threads() == 3 || st.threads() == 4);
break;
case 3:
- assert(st.threads == 5 || st.threads == 8 || st.threads == 11 ||
- st.threads == 14);
+ assert(st.threads() == 5 || st.threads() == 8 || st.threads() == 11 ||
+ st.threads() == 14);
break;
default:
assert(false && "Invalid test case number");
@@ -242,4 +250,25 @@ BENCHMARK(BM_DenseThreadRanges)->Arg(1)->DenseThreadRange(1, 3);
BENCHMARK(BM_DenseThreadRanges)->Arg(2)->DenseThreadRange(1, 4, 2);
BENCHMARK(BM_DenseThreadRanges)->Arg(3)->DenseThreadRange(5, 14, 3);
+static void BM_BenchmarkName(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+
+ // Check that the benchmark name is passed correctly to `state`.
+ assert("BM_BenchmarkName" == state.name());
+}
+BENCHMARK(BM_BenchmarkName);
+
+// regression test for #1446
+template <typename type>
+static void BM_templated_test(benchmark::State& state) {
+ for (auto _ : state) {
+ type created_string;
+ benchmark::DoNotOptimize(created_string);
+ }
+}
+
+static auto BM_templated_test_double = BM_templated_test<std::complex<double>>;
+BENCHMARK(BM_templated_test_double);
+
BENCHMARK_MAIN();
diff --git a/test/clobber_memory_assembly_test.cc b/test/clobber_memory_assembly_test.cc
index f41911a..54e26cc 100644
--- a/test/clobber_memory_assembly_test.cc
+++ b/test/clobber_memory_assembly_test.cc
@@ -3,13 +3,13 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
extern "C" {
extern int ExternInt;
extern int ExternInt2;
extern int ExternInt3;
-
}
// CHECK-LABEL: test_basic:
diff --git a/test/commandlineflags_gtest.cc b/test/commandlineflags_gtest.cc
index 656020f..8412008 100644
--- a/test/commandlineflags_gtest.cc
+++ b/test/commandlineflags_gtest.cc
@@ -2,6 +2,7 @@
#include "../src/commandlineflags.h"
#include "../src/internal_macros.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace benchmark {
@@ -19,9 +20,7 @@ int setenv(const char* name, const char* value, int overwrite) {
return _putenv_s(name, value);
}
-int unsetenv(const char* name) {
- return _putenv_s(name, "");
-}
+int unsetenv(const char* name) { return _putenv_s(name, ""); }
#endif // BENCHMARK_OS_WINDOWS
@@ -197,5 +196,33 @@ TEST(StringFromEnv, Valid) {
unsetenv("IN_ENV");
}
+TEST(KvPairsFromEnv, Default) {
+ ASSERT_EQ(unsetenv("NOT_IN_ENV"), 0);
+ EXPECT_THAT(KvPairsFromEnv("not_in_env", {{"foo", "bar"}}),
+ testing::ElementsAre(testing::Pair("foo", "bar")));
+}
+
+TEST(KvPairsFromEnv, MalformedReturnsDefault) {
+ ASSERT_EQ(setenv("IN_ENV", "foo", 1), 0);
+ EXPECT_THAT(KvPairsFromEnv("in_env", {{"foo", "bar"}}),
+ testing::ElementsAre(testing::Pair("foo", "bar")));
+ unsetenv("IN_ENV");
+}
+
+TEST(KvPairsFromEnv, Single) {
+ ASSERT_EQ(setenv("IN_ENV", "foo=bar", 1), 0);
+ EXPECT_THAT(KvPairsFromEnv("in_env", {}),
+ testing::ElementsAre(testing::Pair("foo", "bar")));
+ unsetenv("IN_ENV");
+}
+
+TEST(KvPairsFromEnv, Multiple) {
+ ASSERT_EQ(setenv("IN_ENV", "foo=bar,baz=qux", 1), 0);
+ EXPECT_THAT(KvPairsFromEnv("in_env", {}),
+ testing::UnorderedElementsAre(testing::Pair("foo", "bar"),
+ testing::Pair("baz", "qux")));
+ unsetenv("IN_ENV");
+}
+
} // namespace
} // namespace benchmark
diff --git a/test/complexity_test.cc b/test/complexity_test.cc
index 5681fdc..76891e0 100644
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@@ -4,6 +4,7 @@
#include <cmath>
#include <cstdlib>
#include <vector>
+
#include "benchmark/benchmark.h"
#include "output_test.h"
@@ -12,8 +13,10 @@ namespace {
#define ADD_COMPLEXITY_CASES(...) \
int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
-int AddComplexityTest(std::string test_name, std::string big_o_test_name,
- std::string rms_test_name, std::string big_o) {
+int AddComplexityTest(const std::string &test_name,
+ const std::string &big_o_test_name,
+ const std::string &rms_test_name,
+ const std::string &big_o, int family_index) {
SetSubstitutions({{"%name", test_name},
{"%bigo_name", big_o_test_name},
{"%rms_name", rms_test_name},
@@ -25,25 +28,33 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name,
{{"^%bigo_name %bigo_str %bigo_str[ ]*$"},
{"^%bigo_name", MR_Not}, // Assert we we didn't only matched a name.
{"^%rms_name %rms %rms[ ]*$", MR_Next}});
- AddCases(TC_JSONOut, {{"\"name\": \"%bigo_name\",$"},
- {"\"run_name\": \"%name\",$", MR_Next},
- {"\"run_type\": \"aggregate\",$", MR_Next},
- {"\"repetitions\": %int,$", MR_Next},
- {"\"threads\": 1,$", MR_Next},
- {"\"aggregate_name\": \"BigO\",$", MR_Next},
- {"\"cpu_coefficient\": %float,$", MR_Next},
- {"\"real_coefficient\": %float,$", MR_Next},
- {"\"big_o\": \"%bigo\",$", MR_Next},
- {"\"time_unit\": \"ns\"$", MR_Next},
- {"}", MR_Next},
- {"\"name\": \"%rms_name\",$"},
- {"\"run_name\": \"%name\",$", MR_Next},
- {"\"run_type\": \"aggregate\",$", MR_Next},
- {"\"repetitions\": %int,$", MR_Next},
- {"\"threads\": 1,$", MR_Next},
- {"\"aggregate_name\": \"RMS\",$", MR_Next},
- {"\"rms\": %float$", MR_Next},
- {"}", MR_Next}});
+ AddCases(
+ TC_JSONOut,
+ {{"\"name\": \"%bigo_name\",$"},
+ {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"%name\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": %int,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"BigO\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"cpu_coefficient\": %float,$", MR_Next},
+ {"\"real_coefficient\": %float,$", MR_Next},
+ {"\"big_o\": \"%bigo\",$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next},
+ {"\"name\": \"%rms_name\",$"},
+ {"\"family_index\": " + std::to_string(family_index) + ",$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"%name\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": %int,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"RMS\",$", MR_Next},
+ {"\"aggregate_unit\": \"percentage\",$", MR_Next},
+ {"\"rms\": %float$", MR_Next},
+ {"}", MR_Next}});
AddCases(TC_CSVOut, {{"^\"%bigo_name\",,%float,%float,%bigo,,,,,$"},
{"^\"%bigo_name\"", MR_Not},
{"^\"%rms_name\",,%float,%float,,,,,,$", MR_Next}});
@@ -56,10 +67,10 @@ int AddComplexityTest(std::string test_name, std::string big_o_test_name,
// --------------------------- Testing BigO O(1) --------------------------- //
// ========================================================================= //
-void BM_Complexity_O1(benchmark::State& state) {
+void BM_Complexity_O1(benchmark::State &state) {
for (auto _ : state) {
for (int i = 0; i < 1024; ++i) {
- benchmark::DoNotOptimize(&i);
+ benchmark::DoNotOptimize(i);
}
}
state.SetComplexityN(state.range(0));
@@ -82,15 +93,15 @@ const char *lambda_big_o_1 = "f\\(N\\)";
// Add enum tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
- enum_big_o_1);
+ enum_big_o_1, /*family_index=*/0);
// Add auto enum tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
- auto_big_o_1);
+ auto_big_o_1, /*family_index=*/1);
// Add lambda tests
ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
- lambda_big_o_1);
+ lambda_big_o_1, /*family_index=*/2);
// ========================================================================= //
// --------------------------- Testing BigO O(N) --------------------------- //
@@ -98,19 +109,20 @@ ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
std::vector<int> ConstructRandomVector(int64_t size) {
std::vector<int> v;
- v.reserve(static_cast<int>(size));
+ v.reserve(static_cast<size_t>(size));
for (int i = 0; i < size; ++i) {
v.push_back(static_cast<int>(std::rand() % size));
}
return v;
}
-void BM_Complexity_O_N(benchmark::State& state) {
+void BM_Complexity_O_N(benchmark::State &state) {
auto v = ConstructRandomVector(state.range(0));
// Test worst case scenario (item not in vector)
const int64_t item_not_in_vector = state.range(0) * 2;
for (auto _ : state) {
- benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector));
+ auto it = std::find(v.begin(), v.end(), item_not_in_vector);
+ benchmark::DoNotOptimize(it);
}
state.SetComplexityN(state.range(0));
}
@@ -137,17 +149,17 @@ const char *lambda_big_o_n = "f\\(N\\)";
// Add enum tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
- enum_auto_big_o_n);
+ enum_auto_big_o_n, /*family_index=*/3);
// Add lambda tests
ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
- lambda_big_o_n);
+ lambda_big_o_n, /*family_index=*/4);
// ========================================================================= //
// ------------------------- Testing BigO O(N*lgN) ------------------------- //
// ========================================================================= //
-static void BM_Complexity_O_N_log_N(benchmark::State& state) {
+static void BM_Complexity_O_N_log_N(benchmark::State &state) {
auto v = ConstructRandomVector(state.range(0));
for (auto _ : state) {
std::sort(v.begin(), v.end());
@@ -163,7 +175,7 @@ BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16)
->Complexity([](benchmark::IterationCount n) {
- return kLog2E * n * log(static_cast<double>(n));
+ return kLog2E * static_cast<double>(n) * log(static_cast<double>(n));
});
BENCHMARK(BM_Complexity_O_N_log_N)
->RangeMultiplier(2)
@@ -178,20 +190,23 @@ const char *lambda_big_o_n_lg_n = "f\\(N\\)";
// Add enum tests
ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
- rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n);
+ rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n,
+ /*family_index=*/6);
// Add lambda tests
ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
- rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n);
+ rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n,
+ /*family_index=*/7);
// ========================================================================= //
// -------- Testing formatting of Complexity with captured args ------------ //
// ========================================================================= //
-void BM_ComplexityCaptureArgs(benchmark::State& state, int n) {
+void BM_ComplexityCaptureArgs(benchmark::State &state, int n) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
state.SetComplexityN(n);
}
@@ -204,7 +219,7 @@ const std::string complexity_capture_name =
"BM_ComplexityCaptureArgs/capture_test";
ADD_COMPLEXITY_CASES(complexity_capture_name, complexity_capture_name + "_BigO",
- complexity_capture_name + "_RMS", "N");
+ complexity_capture_name + "_RMS", "N", /*family_index=*/9);
// ========================================================================= //
// --------------------------- TEST CASES END ------------------------------ //
diff --git a/test/cxx03_test.cc b/test/cxx03_test.cc
index c4c9a52..9711c1b 100644
--- a/test/cxx03_test.cc
+++ b/test/cxx03_test.cc
@@ -44,8 +44,7 @@ BENCHMARK_TEMPLATE(BM_template1, long);
BENCHMARK_TEMPLATE1(BM_template1, int);
template <class T>
-struct BM_Fixture : public ::benchmark::Fixture {
-};
+struct BM_Fixture : public ::benchmark::Fixture {};
BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) {
BM_empty(state);
@@ -55,8 +54,8 @@ BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) {
}
void BM_counters(benchmark::State& state) {
- BM_empty(state);
- state.counters["Foo"] = 2;
+ BM_empty(state);
+ state.counters["Foo"] = 2;
}
BENCHMARK(BM_counters);
diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc
index dd64a33..0cd3edb 100644
--- a/test/diagnostics_test.cc
+++ b/test/diagnostics_test.cc
@@ -26,7 +26,8 @@ void TestHandler() {
}
void try_invalid_pause_resume(benchmark::State& state) {
-#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && !defined(TEST_HAS_NO_EXCEPTIONS)
+#if !defined(TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS) && \
+ !defined(TEST_HAS_NO_EXCEPTIONS)
try {
state.PauseTiming();
std::abort();
@@ -48,7 +49,8 @@ void BM_diagnostic_test(benchmark::State& state) {
if (called_once == false) try_invalid_pause_resume(state);
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
if (called_once == false) try_invalid_pause_resume(state);
@@ -57,14 +59,14 @@ void BM_diagnostic_test(benchmark::State& state) {
}
BENCHMARK(BM_diagnostic_test);
-
void BM_diagnostic_test_keep_running(benchmark::State& state) {
static bool called_once = false;
if (called_once == false) try_invalid_pause_resume(state);
- while(state.KeepRunning()) {
- benchmark::DoNotOptimize(state.iterations());
+ while (state.KeepRunning()) {
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
if (called_once == false) try_invalid_pause_resume(state);
@@ -74,7 +76,16 @@ void BM_diagnostic_test_keep_running(benchmark::State& state) {
BENCHMARK(BM_diagnostic_test_keep_running);
int main(int argc, char* argv[]) {
+#ifdef NDEBUG
+ // This test is exercising functionality for debug builds, which are not
+ // available in release builds. Skip the test if we are in that environment
+ // to avoid a test failure.
+ std::cout << "Diagnostic test disabled in release build" << std::endl;
+ (void)argc;
+ (void)argv;
+#else
benchmark::internal::GetAbortHandler() = &TestHandler;
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
+#endif
}
diff --git a/test/display_aggregates_only_test.cc b/test/display_aggregates_only_test.cc
index 3c36d3f..6ad65e7 100644
--- a/test/display_aggregates_only_test.cc
+++ b/test/display_aggregates_only_test.cc
@@ -19,21 +19,23 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->DisplayAggregatesOnly();
int main(int argc, char* argv[]) {
const std::string output = GetFileReporterOutput(argc, argv);
- if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 6 ||
+ if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 7 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3\"") != 3 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") !=
1 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") !=
- 1) {
- std::cout << "Precondition mismatch. Expected to only find 6 "
+ 1 ||
+ SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) {
+ std::cout << "Precondition mismatch. Expected to only find 8 "
"occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n"
"\"name\": \"BM_SummaryRepeat/repeats:3\", "
"\"name\": \"BM_SummaryRepeat/repeats:3\", "
"\"name\": \"BM_SummaryRepeat/repeats:3\", "
"\"name\": \"BM_SummaryRepeat/repeats:3_mean\", "
"\"name\": \"BM_SummaryRepeat/repeats:3_median\", "
- "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire "
+ "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", "
+ "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire "
"output:\n";
std::cout << output;
return 1;
diff --git a/test/donotoptimize_assembly_test.cc b/test/donotoptimize_assembly_test.cc
index d4b0bab..dc286f5 100644
--- a/test/donotoptimize_assembly_test.cc
+++ b/test/donotoptimize_assembly_test.cc
@@ -3,19 +3,23 @@
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
extern "C" {
extern int ExternInt;
extern int ExternInt2;
extern int ExternInt3;
+extern int BigArray[2049];
+
+const int ConstBigArray[2049]{};
inline int Add42(int x) { return x + 42; }
struct NotTriviallyCopyable {
NotTriviallyCopyable();
explicit NotTriviallyCopyable(int x) : value(x) {}
- NotTriviallyCopyable(NotTriviallyCopyable const&);
+ NotTriviallyCopyable(NotTriviallyCopyable const &);
int value;
};
@@ -24,7 +28,14 @@ struct Large {
int data[2];
};
+struct ExtraLarge {
+ int arr[2049];
+};
}
+
+extern ExtraLarge ExtraLargeObj;
+const ExtraLarge ConstExtraLargeObj{};
+
// CHECK-LABEL: test_with_rvalue:
extern "C" void test_with_rvalue() {
benchmark::DoNotOptimize(Add42(0));
@@ -69,6 +80,22 @@ extern "C" void test_with_large_lvalue() {
// CHECK: ret
}
+// CHECK-LABEL: test_with_extra_large_lvalue_with_op:
+extern "C" void test_with_extra_large_lvalue_with_op() {
+ ExtraLargeObj.arr[16] = 42;
+ benchmark::DoNotOptimize(ExtraLargeObj);
+ // CHECK: movl $42, ExtraLargeObj+64(%rip)
+ // CHECK: ret
+}
+
+// CHECK-LABEL: test_with_big_array_with_op
+extern "C" void test_with_big_array_with_op() {
+ BigArray[16] = 42;
+ benchmark::DoNotOptimize(BigArray);
+ // CHECK: movl $42, BigArray+64(%rip)
+ // CHECK: ret
+}
+
// CHECK-LABEL: test_with_non_trivial_lvalue:
extern "C" void test_with_non_trivial_lvalue() {
NotTriviallyCopyable NTC(ExternInt);
@@ -97,6 +124,18 @@ extern "C" void test_with_large_const_lvalue() {
// CHECK: ret
}
+// CHECK-LABEL: test_with_const_extra_large_obj:
+extern "C" void test_with_const_extra_large_obj() {
+ benchmark::DoNotOptimize(ConstExtraLargeObj);
+ // CHECK: ret
+}
+
+// CHECK-LABEL: test_with_const_big_array
+extern "C" void test_with_const_big_array() {
+ benchmark::DoNotOptimize(ConstBigArray);
+ // CHECK: ret
+}
+
// CHECK-LABEL: test_with_non_trivial_const_lvalue:
extern "C" void test_with_non_trivial_const_lvalue() {
const NotTriviallyCopyable Obj(ExternInt);
@@ -118,8 +157,7 @@ extern "C" int test_div_by_two(int input) {
// CHECK-LABEL: test_inc_integer:
extern "C" int test_inc_integer() {
int x = 0;
- for (int i=0; i < 5; ++i)
- benchmark::DoNotOptimize(++x);
+ for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x);
// CHECK: movl $1, [[DEST:.*]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
@@ -147,7 +185,7 @@ extern "C" void test_pointer_const_lvalue() {
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: ret
int x = 42;
- int * const xp = &x;
+ int *const xp = &x;
benchmark::DoNotOptimize(xp);
}
diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc
index 2ce92d1..04ec938 100644
--- a/test/donotoptimize_test.cc
+++ b/test/donotoptimize_test.cc
@@ -1,33 +1,43 @@
-#include "benchmark/benchmark.h"
-
#include <cstdint>
+#include "benchmark/benchmark.h"
+
namespace {
#if defined(__GNUC__)
-std::uint64_t double_up(const std::uint64_t x) __attribute__((const));
+std::int64_t double_up(const std::int64_t x) __attribute__((const));
#endif
-std::uint64_t double_up(const std::uint64_t x) { return x * 2; }
-}
+std::int64_t double_up(const std::int64_t x) { return x * 2; }
+} // namespace
// Using DoNotOptimize on types like BitRef seem to cause a lot of problems
// with the inline assembly on both GCC and Clang.
struct BitRef {
int index;
- unsigned char &byte;
+ unsigned char& byte;
-public:
+ public:
static BitRef Make() {
static unsigned char arr[2] = {};
BitRef b(1, arr[0]);
return b;
}
-private:
+
+ private:
BitRef(int i, unsigned char& b) : index(i), byte(b) {}
};
int main(int, char*[]) {
// this test verifies compilation of DoNotOptimize() for some types
+ char buffer1[1] = "";
+ benchmark::DoNotOptimize(buffer1);
+
+ char buffer2[2] = "";
+ benchmark::DoNotOptimize(buffer2);
+
+ char buffer3[3] = "";
+ benchmark::DoNotOptimize(buffer3);
+
char buffer8[8] = "";
benchmark::DoNotOptimize(buffer8);
@@ -36,17 +46,24 @@ int main(int, char*[]) {
char buffer1024[1024] = "";
benchmark::DoNotOptimize(buffer1024);
- benchmark::DoNotOptimize(&buffer1024[0]);
+ char* bptr = &buffer1024[0];
+ benchmark::DoNotOptimize(bptr);
int x = 123;
benchmark::DoNotOptimize(x);
- benchmark::DoNotOptimize(&x);
+ int* xp = &x;
+ benchmark::DoNotOptimize(xp);
benchmark::DoNotOptimize(x += 42);
- benchmark::DoNotOptimize(double_up(x));
+ std::int64_t y = double_up(x);
+ benchmark::DoNotOptimize(y);
// These tests are to e
- benchmark::DoNotOptimize(BitRef::Make());
BitRef lval = BitRef::Make();
benchmark::DoNotOptimize(lval);
+
+#ifdef BENCHMARK_HAS_CXX11
+ // Check that accept rvalue.
+ benchmark::DoNotOptimize(BitRef::Make());
+#endif
}
diff --git a/test/filter_test.cc b/test/filter_test.cc
index 0e27065..4c8b8ea 100644
--- a/test/filter_test.cc
+++ b/test/filter_test.cc
@@ -1,36 +1,40 @@
-#include "benchmark/benchmark.h"
-
+#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <cstdlib>
-
#include <iostream>
#include <limits>
#include <sstream>
#include <string>
+#include "benchmark/benchmark.h"
+
namespace {
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual bool ReportContext(const Context& context) {
+ bool ReportContext(const Context& context) override {
return ConsoleReporter::ReportContext(context);
};
- virtual void ReportRuns(const std::vector<Run>& report) {
+ void ReportRuns(const std::vector<Run>& report) override {
++count_;
+ max_family_index_ = std::max(max_family_index_, report[0].family_index);
ConsoleReporter::ReportRuns(report);
};
- TestReporter() : count_(0) {}
+ TestReporter() : count_(0), max_family_index_(0) {}
- virtual ~TestReporter() {}
+ ~TestReporter() override {}
- size_t GetCount() const { return count_; }
+ int GetCount() const { return count_; }
+
+ int64_t GetMaxFamilyIndex() const { return max_family_index_; }
private:
- mutable size_t count_;
+ mutable int count_;
+ mutable int64_t max_family_index_;
};
} // end namespace
@@ -65,7 +69,7 @@ static void BM_FooBa(benchmark::State& state) {
}
BENCHMARK(BM_FooBa);
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
bool list_only = false;
for (int i = 0; i < argc; ++i)
list_only |= std::string(argv[i]).find("--benchmark_list_tests") !=
@@ -74,13 +78,13 @@ int main(int argc, char **argv) {
benchmark::Initialize(&argc, argv);
TestReporter test_reporter;
- const size_t returned_count =
- benchmark::RunSpecifiedBenchmarks(&test_reporter);
+ const int64_t returned_count =
+ static_cast<int64_t>(benchmark::RunSpecifiedBenchmarks(&test_reporter));
if (argc == 2) {
// Make sure we ran all of the tests
std::stringstream ss(argv[1]);
- size_t expected_return;
+ int64_t expected_return;
ss >> expected_return;
if (returned_count != expected_return) {
@@ -90,14 +94,23 @@ int main(int argc, char **argv) {
return -1;
}
- const size_t expected_reports = list_only ? 0 : expected_return;
- const size_t reports_count = test_reporter.GetCount();
+ const int64_t expected_reports = list_only ? 0 : expected_return;
+ const int64_t reports_count = test_reporter.GetCount();
if (reports_count != expected_reports) {
std::cerr << "ERROR: Expected " << expected_reports
<< " tests to be run but reported_count = " << reports_count
<< std::endl;
return -1;
}
+
+ const int64_t max_family_index = test_reporter.GetMaxFamilyIndex();
+ const int64_t num_families = reports_count == 0 ? 0 : 1 + max_family_index;
+ if (num_families != expected_reports) {
+ std::cerr << "ERROR: Expected " << expected_reports
+ << " test families to be run but num_families = "
+ << num_families << std::endl;
+ return -1;
+ }
}
return 0;
diff --git a/test/fixture_test.cc b/test/fixture_test.cc
index a331c7d..d1093eb 100644
--- a/test/fixture_test.cc
+++ b/test/fixture_test.cc
@@ -1,33 +1,33 @@
-#include "benchmark/benchmark.h"
-
#include <cassert>
#include <memory>
+#include "benchmark/benchmark.h"
+
#define FIXTURE_BECHMARK_NAME MyFixture
class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State& state) {
- if (state.thread_index == 0) {
+ void SetUp(const ::benchmark::State& state) override {
+ if (state.thread_index() == 0) {
assert(data.get() == nullptr);
data.reset(new int(42));
}
}
- void TearDown(const ::benchmark::State& state) {
- if (state.thread_index == 0) {
+ void TearDown(const ::benchmark::State& state) override {
+ if (state.thread_index() == 0) {
assert(data.get() != nullptr);
data.reset();
}
}
- ~FIXTURE_BECHMARK_NAME() { assert(data == nullptr); }
+ ~FIXTURE_BECHMARK_NAME() override { assert(data == nullptr); }
std::unique_ptr<int> data;
};
-BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) {
+BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State& st) {
assert(data.get() != nullptr);
assert(*data == 42);
for (auto _ : st) {
@@ -35,7 +35,7 @@ BENCHMARK_F(FIXTURE_BECHMARK_NAME, Foo)(benchmark::State &st) {
}
BENCHMARK_DEFINE_F(FIXTURE_BECHMARK_NAME, Bar)(benchmark::State& st) {
- if (st.thread_index == 0) {
+ if (st.thread_index() == 0) {
assert(data.get() != nullptr);
assert(*data == 42);
}
diff --git a/test/internal_threading_test.cc b/test/internal_threading_test.cc
index 039d7c1..62b5b95 100644
--- a/test/internal_threading_test.cc
+++ b/test/internal_threading_test.cc
@@ -3,6 +3,7 @@
#include <chrono>
#include <thread>
+
#include "../src/timers.h"
#include "benchmark/benchmark.h"
#include "output_test.h"
diff --git a/test/link_main_test.cc b/test/link_main_test.cc
index 241ad5c..e806500 100644
--- a/test/link_main_test.cc
+++ b/test/link_main_test.cc
@@ -2,7 +2,8 @@
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
diff --git a/test/map_test.cc b/test/map_test.cc
index dbf7982..0fdba7c 100644
--- a/test/map_test.cc
+++ b/test/map_test.cc
@@ -1,8 +1,8 @@
-#include "benchmark/benchmark.h"
-
#include <cstdlib>
#include <map>
+#include "benchmark/benchmark.h"
+
namespace {
std::map<int, int> ConstructRandomMap(int size) {
@@ -24,7 +24,8 @@ static void BM_MapLookup(benchmark::State& state) {
m = ConstructRandomMap(size);
state.ResumeTiming();
for (int i = 0; i < size; ++i) {
- benchmark::DoNotOptimize(m.find(std::rand() % size));
+ auto it = m.find(std::rand() % size);
+ benchmark::DoNotOptimize(it);
}
}
state.SetItemsProcessed(state.iterations() * size);
@@ -34,11 +35,11 @@ BENCHMARK(BM_MapLookup)->Range(1 << 3, 1 << 12);
// Using fixtures.
class MapFixture : public ::benchmark::Fixture {
public:
- void SetUp(const ::benchmark::State& st) {
+ void SetUp(const ::benchmark::State& st) override {
m = ConstructRandomMap(static_cast<int>(st.range(0)));
}
- void TearDown(const ::benchmark::State&) { m.clear(); }
+ void TearDown(const ::benchmark::State&) override { m.clear(); }
std::map<int, int> m;
};
@@ -47,7 +48,8 @@ BENCHMARK_DEFINE_F(MapFixture, Lookup)(benchmark::State& state) {
const int size = static_cast<int>(state.range(0));
for (auto _ : state) {
for (int i = 0; i < size; ++i) {
- benchmark::DoNotOptimize(m.find(std::rand() % size));
+ auto it = m.find(std::rand() % size);
+ benchmark::DoNotOptimize(it);
}
}
state.SetItemsProcessed(state.iterations() * size);
diff --git a/test/memory_manager_test.cc b/test/memory_manager_test.cc
index 90bed16..d94bd51 100644
--- a/test/memory_manager_test.cc
+++ b/test/memory_manager_test.cc
@@ -5,25 +5,28 @@
#include "output_test.h"
class TestMemoryManager : public benchmark::MemoryManager {
- void Start() {}
- void Stop(Result* result) {
- result->num_allocs = 42;
- result->max_bytes_used = 42000;
+ void Start() override {}
+ void Stop(Result& result) override {
+ result.num_allocs = 42;
+ result.max_bytes_used = 42000;
}
};
void BM_empty(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
}
BENCHMARK(BM_empty);
ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_empty\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
diff --git a/test/min_time_parse_gtest.cc b/test/min_time_parse_gtest.cc
new file mode 100644
index 0000000..e2bdf67
--- /dev/null
+++ b/test/min_time_parse_gtest.cc
@@ -0,0 +1,30 @@
+#include "../src/benchmark_runner.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(ParseMinTimeTest, InvalidInput) {
+#if GTEST_HAS_DEATH_TEST
+ // Tests only runnable in debug mode (when BM_CHECK is enabled).
+#ifndef NDEBUG
+#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("abc"); },
+ "Malformed seconds value passed to --benchmark_min_time: `abc`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("123ms"); },
+ "Malformed seconds value passed to --benchmark_min_time: `123ms`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("1z"); },
+ "Malformed seconds value passed to --benchmark_min_time: `1z`");
+
+ ASSERT_DEATH_IF_SUPPORTED(
+ { benchmark::internal::ParseBenchMinTime("1hs"); },
+ "Malformed seconds value passed to --benchmark_min_time: `1hs`");
+#endif
+#endif
+#endif
+}
+} // namespace
diff --git a/test/multiple_ranges_test.cc b/test/multiple_ranges_test.cc
index b25f40e..5300a96 100644
--- a/test/multiple_ranges_test.cc
+++ b/test/multiple_ranges_test.cc
@@ -1,10 +1,10 @@
-#include "benchmark/benchmark.h"
-
#include <cassert>
#include <iostream>
#include <set>
#include <vector>
+#include "benchmark/benchmark.h"
+
class MultipleRangesFixture : public ::benchmark::Fixture {
public:
MultipleRangesFixture()
@@ -28,7 +28,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture {
{2, 7, 15},
{7, 6, 3}}) {}
- void SetUp(const ::benchmark::State& state) {
+ void SetUp(const ::benchmark::State& state) override {
std::vector<int64_t> ranges = {state.range(0), state.range(1),
state.range(2)};
@@ -39,10 +39,10 @@ class MultipleRangesFixture : public ::benchmark::Fixture {
// NOTE: This is not TearDown as we want to check after _all_ runs are
// complete.
- virtual ~MultipleRangesFixture() {
+ ~MultipleRangesFixture() override {
if (actualValues != expectedValues) {
std::cout << "EXPECTED\n";
- for (auto v : expectedValues) {
+ for (const auto& v : expectedValues) {
std::cout << "{";
for (int64_t iv : v) {
std::cout << iv << ", ";
@@ -50,7 +50,7 @@ class MultipleRangesFixture : public ::benchmark::Fixture {
std::cout << "}\n";
}
std::cout << "ACTUAL\n";
- for (auto v : actualValues) {
+ for (const auto& v : actualValues) {
std::cout << "{";
for (int64_t iv : v) {
std::cout << iv << ", ";
diff --git a/test/options_test.cc b/test/options_test.cc
index 9f9a786..a1b209f 100644
--- a/test/options_test.cc
+++ b/test/options_test.cc
@@ -1,7 +1,8 @@
-#include "benchmark/benchmark.h"
#include <chrono>
#include <thread>
+#include "benchmark/benchmark.h"
+
#if defined(NDEBUG)
#undef NDEBUG
#endif
@@ -32,6 +33,8 @@ BENCHMARK(BM_basic)->DenseRange(10, 15);
BENCHMARK(BM_basic)->Args({42, 42});
BENCHMARK(BM_basic)->Ranges({{64, 512}, {64, 512}});
BENCHMARK(BM_basic)->MinTime(0.7);
+BENCHMARK(BM_basic)->MinWarmUpTime(0.8);
+BENCHMARK(BM_basic)->MinTime(0.1)->MinWarmUpTime(0.2);
BENCHMARK(BM_basic)->UseRealTime();
BENCHMARK(BM_basic)->ThreadRange(2, 4);
BENCHMARK(BM_basic)->ThreadPerCpu();
@@ -64,12 +67,10 @@ void BM_explicit_iteration_count(benchmark::State& state) {
// Test that the requested iteration count is respected.
assert(state.max_iterations == 42);
- size_t actual_iterations = 0;
- for (auto _ : state)
- ++actual_iterations;
+ for (auto _ : state) {
+ }
assert(state.iterations() == state.max_iterations);
assert(state.iterations() == 42);
-
}
BENCHMARK(BM_explicit_iteration_count)->Iterations(42);
diff --git a/test/output_test.h b/test/output_test.h
index 9385761..c08fe1d 100644
--- a/test/output_test.h
+++ b/test/output_test.h
@@ -85,7 +85,7 @@ std::string GetFileReporterOutput(int argc, char* argv[]);
struct Results;
typedef std::function<void(Results const&)> ResultsCheckFn;
-size_t AddChecker(const char* bm_name_pattern, ResultsCheckFn fn);
+size_t AddChecker(const std::string& bm_name_pattern, const ResultsCheckFn& fn);
// Class holding the results of a benchmark.
// It is passed in calls to checker functions.
@@ -113,13 +113,11 @@ struct Results {
return NumIterations() * GetTime(kRealTime);
}
// get the cpu_time duration of the benchmark in seconds
- double DurationCPUTime() const {
- return NumIterations() * GetTime(kCpuTime);
- }
+ double DurationCPUTime() const { return NumIterations() * GetTime(kCpuTime); }
// get the string for a result by name, or nullptr if the name
// is not found
- const std::string* Get(const char* entry_name) const {
+ const std::string* Get(const std::string& entry_name) const {
auto it = values.find(entry_name);
if (it == values.end()) return nullptr;
return &it->second;
@@ -128,12 +126,12 @@ struct Results {
// get a result by name, parsed as a specific type.
// NOTE: for counters, use GetCounterAs instead.
template <class T>
- T GetAs(const char* entry_name) const;
+ T GetAs(const std::string& entry_name) const;
// counters are written as doubles, so they have to be read first
// as a double, and only then converted to the asked type.
template <class T>
- T GetCounterAs(const char* entry_name) const {
+ T GetCounterAs(const std::string& entry_name) const {
double dval = GetAs<double>(entry_name);
T tval = static_cast<T>(dval);
return tval;
@@ -141,14 +139,14 @@ struct Results {
};
template <class T>
-T Results::GetAs(const char* entry_name) const {
+T Results::GetAs(const std::string& entry_name) const {
auto* sv = Get(entry_name);
- CHECK(sv != nullptr && !sv->empty());
+ BM_CHECK(sv != nullptr && !sv->empty());
std::stringstream ss;
ss << *sv;
T out;
ss >> out;
- CHECK(!ss.fail());
+ BM_CHECK(!ss.fail());
return out;
}
@@ -158,8 +156,8 @@ T Results::GetAs(const char* entry_name) const {
// clang-format off
-#define _CHECK_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value) \
- CONCAT(CHECK_, relationship) \
+#define CHECK_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value) \
+ CONCAT(BM_CHECK_, relationship) \
(entry.getfn< var_type >(var_name), (value)) << "\n" \
<< __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \
<< __FILE__ << ":" << __LINE__ << ": " \
@@ -169,8 +167,8 @@ T Results::GetAs(const char* entry_name) const {
// check with tolerance. eps_factor is the tolerance window, which is
// interpreted relative to value (eg, 0.1 means 10% of value).
-#define _CHECK_FLOAT_RESULT_VALUE(entry, getfn, var_type, var_name, relationship, value, eps_factor) \
- CONCAT(CHECK_FLOAT_, relationship) \
+#define CHECK_FLOAT_RESULT_VALUE_IMPL(entry, getfn, var_type, var_name, relationship, value, eps_factor) \
+ CONCAT(BM_CHECK_FLOAT_, relationship) \
(entry.getfn< var_type >(var_name), (value), (eps_factor) * (value)) << "\n" \
<< __FILE__ << ":" << __LINE__ << ": " << (entry).name << ":\n" \
<< __FILE__ << ":" << __LINE__ << ": " \
@@ -187,16 +185,16 @@ T Results::GetAs(const char* entry_name) const {
<< "%)"
#define CHECK_RESULT_VALUE(entry, var_type, var_name, relationship, value) \
- _CHECK_RESULT_VALUE(entry, GetAs, var_type, var_name, relationship, value)
+ CHECK_RESULT_VALUE_IMPL(entry, GetAs, var_type, var_name, relationship, value)
#define CHECK_COUNTER_VALUE(entry, var_type, var_name, relationship, value) \
- _CHECK_RESULT_VALUE(entry, GetCounterAs, var_type, var_name, relationship, value)
+ CHECK_RESULT_VALUE_IMPL(entry, GetCounterAs, var_type, var_name, relationship, value)
#define CHECK_FLOAT_RESULT_VALUE(entry, var_name, relationship, value, eps_factor) \
- _CHECK_FLOAT_RESULT_VALUE(entry, GetAs, double, var_name, relationship, value, eps_factor)
+ CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetAs, double, var_name, relationship, value, eps_factor)
#define CHECK_FLOAT_COUNTER_VALUE(entry, var_name, relationship, value, eps_factor) \
- _CHECK_FLOAT_RESULT_VALUE(entry, GetCounterAs, double, var_name, relationship, value, eps_factor)
+ CHECK_FLOAT_RESULT_VALUE_IMPL(entry, GetCounterAs, double, var_name, relationship, value, eps_factor)
// clang-format on
diff --git a/test/output_test_helper.cc b/test/output_test_helper.cc
index 1aebc55..2567370 100644
--- a/test/output_test_helper.cc
+++ b/test/output_test_helper.cc
@@ -10,6 +10,7 @@
#include "../src/benchmark_api_internal.h"
#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include "../src/log.h" // NOTE: log.h is for internal use only
#include "../src/re.h" // NOTE: re.h is for internal use only
#include "output_test.h"
@@ -40,14 +41,17 @@ SubMap& GetSubstitutions() {
// clang-format off
static std::string safe_dec_re = "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?";
static std::string time_re = "([0-9]+[.])?[0-9]+";
+ static std::string percentage_re = "[0-9]+[.][0-9]{2}";
static SubMap map = {
{"%float", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?"},
// human-readable float
- {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kMGTPEZYmunpfazy]?"},
+ {"%hrfloat", "[0-9]*[.]?[0-9]+([eE][-+][0-9]+)?[kKMGTPEZYmunpfazy]?i?"},
+ {"%percentage", percentage_re},
{"%int", "[ ]*[0-9]+"},
{" %s ", "[ ]+"},
{"%time", "[ ]*" + time_re + "[ ]+ns"},
{"%console_report", "[ ]*" + time_re + "[ ]+ns [ ]*" + time_re + "[ ]+ns [ ]*[0-9]+"},
+ {"%console_percentage_report", "[ ]*" + percentage_re + "[ ]+% [ ]*" + percentage_re + "[ ]+% [ ]*[0-9]+"},
{"%console_us_report", "[ ]*" + time_re + "[ ]+us [ ]*" + time_re + "[ ]+us [ ]*[0-9]+"},
{"%console_ms_report", "[ ]*" + time_re + "[ ]+ms [ ]*" + time_re + "[ ]+ms [ ]*[0-9]+"},
{"%console_s_report", "[ ]*" + time_re + "[ ]+s [ ]*" + time_re + "[ ]+s [ ]*[0-9]+"},
@@ -94,27 +98,27 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC,
bool on_first = true;
std::string line;
while (remaining_output.eof() == false) {
- CHECK(remaining_output.good());
+ BM_CHECK(remaining_output.good());
std::getline(remaining_output, line);
if (on_first) {
first_line = line;
on_first = false;
}
for (const auto& NC : not_checks) {
- CHECK(!NC.regex->Match(line))
+ BM_CHECK(!NC.regex->Match(line))
<< "Unexpected match for line \"" << line << "\" for MR_Not regex \""
<< NC.regex_str << "\""
<< "\n actual regex string \"" << TC.substituted_regex << "\""
<< "\n started matching near: " << first_line;
}
if (TC.regex->Match(line)) return;
- CHECK(TC.match_rule != MR_Next)
+ BM_CHECK(TC.match_rule != MR_Next)
<< "Expected line \"" << line << "\" to match regex \"" << TC.regex_str
<< "\""
<< "\n actual regex string \"" << TC.substituted_regex << "\""
<< "\n started matching near: " << first_line;
}
- CHECK(remaining_output.eof() == false)
+ BM_CHECK(remaining_output.eof() == false)
<< "End of output reached before match for regex \"" << TC.regex_str
<< "\" was found"
<< "\n actual regex string \"" << TC.substituted_regex << "\""
@@ -137,14 +141,14 @@ void CheckCases(TestCaseList const& checks, std::stringstream& output) {
class TestReporter : public benchmark::BenchmarkReporter {
public:
TestReporter(std::vector<benchmark::BenchmarkReporter*> reps)
- : reporters_(reps) {}
+ : reporters_(std::move(reps)) {}
- virtual bool ReportContext(const Context& context) {
+ bool ReportContext(const Context& context) override {
bool last_ret = false;
bool first = true;
for (auto rep : reporters_) {
bool new_ret = rep->ReportContext(context);
- CHECK(first || new_ret == last_ret)
+ BM_CHECK(first || new_ret == last_ret)
<< "Reports return different values for ReportContext";
first = false;
last_ret = new_ret;
@@ -153,10 +157,10 @@ class TestReporter : public benchmark::BenchmarkReporter {
return last_ret;
}
- void ReportRuns(const std::vector<Run>& report) {
+ void ReportRuns(const std::vector<Run>& report) override {
for (auto rep : reporters_) rep->ReportRuns(report);
}
- void Finalize() {
+ void Finalize() override {
for (auto rep : reporters_) rep->Finalize();
}
@@ -179,7 +183,7 @@ class ResultsChecker {
public:
struct PatternAndFn : public TestCase { // reusing TestCase for its regexes
PatternAndFn(const std::string& rx, ResultsCheckFn fn_)
- : TestCase(rx), fn(fn_) {}
+ : TestCase(rx), fn(std::move(fn_)) {}
ResultsCheckFn fn;
};
@@ -187,7 +191,7 @@ class ResultsChecker {
std::vector<Results> results;
std::vector<std::string> field_names;
- void Add(const std::string& entry_pattern, ResultsCheckFn fn);
+ void Add(const std::string& entry_pattern, const ResultsCheckFn& fn);
void CheckResults(std::stringstream& output);
@@ -206,7 +210,8 @@ ResultsChecker& GetResultsChecker() {
}
// add a results checker for a benchmark
-void ResultsChecker::Add(const std::string& entry_pattern, ResultsCheckFn fn) {
+void ResultsChecker::Add(const std::string& entry_pattern,
+ const ResultsCheckFn& fn) {
check_patterns.emplace_back(entry_pattern, fn);
}
@@ -226,7 +231,7 @@ void ResultsChecker::CheckResults(std::stringstream& output) {
std::string line;
bool on_first = true;
while (output.eof() == false) {
- CHECK(output.good());
+ BM_CHECK(output.good());
std::getline(output, line);
if (on_first) {
SetHeader_(line); // this is important
@@ -237,18 +242,17 @@ void ResultsChecker::CheckResults(std::stringstream& output) {
}
// finally we can call the subscribed check functions
for (const auto& p : check_patterns) {
- VLOG(2) << "--------------------------------\n";
- VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n";
+ BM_VLOG(2) << "--------------------------------\n";
+ BM_VLOG(2) << "checking for benchmarks matching " << p.regex_str << "...\n";
for (const auto& r : results) {
if (!p.regex->Match(r.name)) {
- VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n";
+ BM_VLOG(2) << p.regex_str << " is not matched by " << r.name << "\n";
continue;
- } else {
- VLOG(2) << p.regex_str << " is matched by " << r.name << "\n";
}
- VLOG(1) << "Checking results of " << r.name << ": ... \n";
+ BM_VLOG(2) << p.regex_str << " is matched by " << r.name << "\n";
+ BM_VLOG(1) << "Checking results of " << r.name << ": ... \n";
p.fn(r);
- VLOG(1) << "Checking results of " << r.name << ": OK.\n";
+ BM_VLOG(1) << "Checking results of " << r.name << ": OK.\n";
}
}
}
@@ -261,9 +265,9 @@ void ResultsChecker::SetHeader_(const std::string& csv_header) {
// set the values for a benchmark
void ResultsChecker::SetValues_(const std::string& entry_csv_line) {
if (entry_csv_line.empty()) return; // some lines are empty
- CHECK(!field_names.empty());
+ BM_CHECK(!field_names.empty());
auto vals = SplitCsv_(entry_csv_line);
- CHECK_EQ(vals.size(), field_names.size());
+ BM_CHECK_EQ(vals.size(), field_names.size());
results.emplace_back(vals[0]); // vals[0] is the benchmark name
auto& entry = results.back();
for (size_t i = 1, e = vals.size(); i < e; ++i) {
@@ -278,7 +282,7 @@ std::vector<std::string> ResultsChecker::SplitCsv_(const std::string& line) {
if (!field_names.empty()) out.reserve(field_names.size());
size_t prev = 0, pos = line.find_first_of(','), curr = pos;
while (pos != line.npos) {
- CHECK(curr > 0);
+ BM_CHECK(curr > 0);
if (line[prev] == '"') ++prev;
if (line[curr - 1] == '"') --curr;
out.push_back(line.substr(prev, curr - prev));
@@ -295,7 +299,7 @@ std::vector<std::string> ResultsChecker::SplitCsv_(const std::string& line) {
} // end namespace internal
-size_t AddChecker(const char* bm_name, ResultsCheckFn fn) {
+size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) {
auto& rc = internal::GetResultsChecker();
rc.Add(bm_name, fn);
return rc.results.size();
@@ -309,32 +313,32 @@ int Results::NumThreads() const {
ss << name.substr(pos + 9, end);
int num = 1;
ss >> num;
- CHECK(!ss.fail());
+ BM_CHECK(!ss.fail());
return num;
}
-double Results::NumIterations() const {
- return GetAs<double>("iterations");
-}
+double Results::NumIterations() const { return GetAs<double>("iterations"); }
double Results::GetTime(BenchmarkTime which) const {
- CHECK(which == kCpuTime || which == kRealTime);
+ BM_CHECK(which == kCpuTime || which == kRealTime);
const char* which_str = which == kCpuTime ? "cpu_time" : "real_time";
double val = GetAs<double>(which_str);
auto unit = Get("time_unit");
- CHECK(unit);
+ BM_CHECK(unit);
if (*unit == "ns") {
return val * 1.e-9;
- } else if (*unit == "us") {
+ }
+ if (*unit == "us") {
return val * 1.e-6;
- } else if (*unit == "ms") {
+ }
+ if (*unit == "ms") {
return val * 1.e-3;
- } else if (*unit == "s") {
+ }
+ if (*unit == "s") {
return val;
- } else {
- CHECK(1 == 0) << "unknown time unit: " << *unit;
- return 0;
}
+ BM_CHECK(1 == 0) << "unknown time unit: " << *unit;
+ return 0;
}
// ========================================================================= //
@@ -348,10 +352,10 @@ TestCase::TestCase(std::string re, int rule)
regex(std::make_shared<benchmark::Regex>()) {
std::string err_str;
regex->Init(substituted_regex, &err_str);
- CHECK(err_str.empty()) << "Could not construct regex \"" << substituted_regex
- << "\""
- << "\n originally \"" << regex_str << "\""
- << "\n got error: " << err_str;
+ BM_CHECK(err_str.empty())
+ << "Could not construct regex \"" << substituted_regex << "\""
+ << "\n originally \"" << regex_str << "\""
+ << "\n got error: " << err_str;
}
int AddCases(TestCaseID ID, std::initializer_list<TestCase> il) {
@@ -380,10 +384,8 @@ int SetSubstitutions(
// Disable deprecated warnings temporarily because we need to reference
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
+BENCHMARK_DISABLE_DEPRECATED_WARNING
+
void RunOutputTests(int argc, char* argv[]) {
using internal::GetTestCaseList;
benchmark::Initialize(&argc, argv);
@@ -392,14 +394,14 @@ void RunOutputTests(int argc, char* argv[]) {
benchmark::JSONReporter JR;
benchmark::CSVReporter CSVR;
struct ReporterTest {
- const char* name;
+ std::string name;
std::vector<TestCase>& output_cases;
std::vector<TestCase>& error_cases;
benchmark::BenchmarkReporter& reporter;
std::stringstream out_stream;
std::stringstream err_stream;
- ReporterTest(const char* n, std::vector<TestCase>& out_tc,
+ ReporterTest(const std::string& n, std::vector<TestCase>& out_tc,
std::vector<TestCase>& err_tc,
benchmark::BenchmarkReporter& br)
: name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) {
@@ -407,12 +409,12 @@ void RunOutputTests(int argc, char* argv[]) {
reporter.SetErrorStream(&err_stream);
}
} TestCases[] = {
- {"ConsoleReporter", GetTestCaseList(TC_ConsoleOut),
+ {std::string("ConsoleReporter"), GetTestCaseList(TC_ConsoleOut),
GetTestCaseList(TC_ConsoleErr), CR},
- {"JSONReporter", GetTestCaseList(TC_JSONOut), GetTestCaseList(TC_JSONErr),
- JR},
- {"CSVReporter", GetTestCaseList(TC_CSVOut), GetTestCaseList(TC_CSVErr),
- CSVR},
+ {std::string("JSONReporter"), GetTestCaseList(TC_JSONOut),
+ GetTestCaseList(TC_JSONErr), JR},
+ {std::string("CSVReporter"), GetTestCaseList(TC_CSVOut),
+ GetTestCaseList(TC_CSVErr), CSVR},
};
// Create the test reporter and run the benchmarks.
@@ -421,7 +423,8 @@ void RunOutputTests(int argc, char* argv[]) {
benchmark::RunSpecifiedBenchmarks(&test_rep);
for (auto& rep_test : TestCases) {
- std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n";
+ std::string msg =
+ std::string("\nTesting ") + rep_test.name + std::string(" Output\n");
std::string banner(msg.size() - 1, '-');
std::cout << banner << msg << banner << "\n";
@@ -438,13 +441,11 @@ void RunOutputTests(int argc, char* argv[]) {
// the checks to subscribees.
auto& csv = TestCases[2];
// would use == but gcc spits a warning
- CHECK(std::strcmp(csv.name, "CSVReporter") == 0);
+ BM_CHECK(csv.name == std::string("CSVReporter"));
internal::GetResultsChecker().CheckResults(csv.out_stream);
}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
+BENCHMARK_RESTORE_DEPRECATED_WARNING
int SubstrCnt(const std::string& haystack, const std::string& pat) {
if (pat.length() == 0) return 0;
@@ -468,9 +469,8 @@ static char RandomHexChar() {
static std::string GetRandomFileName() {
std::string model = "test.%%%%%%";
- for (auto & ch : model) {
- if (ch == '%')
- ch = RandomHexChar();
+ for (auto& ch : model) {
+ if (ch == '%') ch = RandomHexChar();
}
return model;
}
@@ -487,8 +487,7 @@ static std::string GetTempFileName() {
int retries = 3;
while (--retries) {
std::string name = GetRandomFileName();
- if (!FileExists(name))
- return name;
+ if (!FileExists(name)) return name;
}
std::cerr << "Failed to create unique temporary file name" << std::endl;
std::abort();
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
new file mode 100644
index 0000000..54c7863
--- /dev/null
+++ b/test/perf_counters_gtest.cc
@@ -0,0 +1,307 @@
+#include <random>
+#include <thread>
+
+#include "../src/perf_counters.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#ifndef GTEST_SKIP
+struct MsgHandler {
+ void operator=(std::ostream&) {}
+};
+#define GTEST_SKIP() return MsgHandler() = std::cout
+#endif
+
+using benchmark::internal::PerfCounters;
+using benchmark::internal::PerfCountersMeasurement;
+using benchmark::internal::PerfCounterValues;
+using ::testing::AllOf;
+using ::testing::Gt;
+using ::testing::Lt;
+
+namespace {
+const char kGenericPerfEvent1[] = "CYCLES";
+const char kGenericPerfEvent2[] = "INSTRUCTIONS";
+
+TEST(PerfCountersTest, Init) {
+ EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
+}
+
+TEST(PerfCountersTest, OneCounter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Performance counters not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
+}
+
+TEST(PerfCountersTest, NegativeTest) {
+ if (!PerfCounters::kSupported) {
+ EXPECT_FALSE(PerfCounters::Initialize());
+ return;
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ // Sanity checks
+ // Create() will always create a valid object, even if passed no or
+ // wrong arguments as the new behavior is to warn and drop unsupported
+ // counters
+ EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
+ {
+ // Try sneaking in a bad egg to see if it is filtered out. The
+ // number of counters has to be two, not zero
+ auto counter =
+ PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
+ }
+ {
+ // Try sneaking in an outrageous counter, like a fat finger mistake
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
+ }
+ {
+ // Finally try a golden input - it should like both of them
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
+ .num_counters(),
+ 2);
+ }
+ {
+ // Add a bad apple in the end of the chain to check the edges
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent1, kGenericPerfEvent2}));
+ }
+}
+
+TEST(PerfCountersTest, Read1Counter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters = PerfCounters::Create({kGenericPerfEvent1});
+ EXPECT_EQ(counters.num_counters(), 1);
+ PerfCounterValues values1(1);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ PerfCounterValues values2(1);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[0], values1[0]);
+}
+
+TEST(PerfCountersTest, Read2Counters) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters =
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
+ EXPECT_EQ(counters.num_counters(), 2);
+ PerfCounterValues values1(2);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ EXPECT_GT(values1[1], 0);
+ PerfCounterValues values2(2);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[1], 0);
+}
+
+TEST(PerfCountersTest, ReopenExistingCounters) {
+ // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
+ // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ std::vector<std::string> kMetrics({kGenericPerfEvent1});
+ std::vector<PerfCounters> counters(2);
+ for (auto& counter : counters) {
+ counter = PerfCounters::Create(kMetrics);
+ }
+ PerfCounterValues values(1);
+ EXPECT_TRUE(counters[0].Snapshot(&values));
+ EXPECT_TRUE(counters[1].Snapshot(&values));
+}
+
+TEST(PerfCountersTest, CreateExistingMeasurements) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (2) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // This means we will try 10 counters but we can only guarantee
+ // for sure at this time that only 3 will work. Perhaps in the future
+ // we could use libpfm to query for the hardware limits on this
+ // particular platform.
+ const int kMaxCounters = 10;
+ const int kMinValidCounters = 2;
+
+ // Let's use a ubiquitous counter that is guaranteed to work
+ // on all platforms
+ const std::vector<std::string> kMetrics{"cycles"};
+
+ // Cannot create a vector of actual objects because the
+ // copy constructor of PerfCounters is deleted - and so is
+ // implicitly deleted on PerfCountersMeasurement too
+ std::vector<std::unique_ptr<PerfCountersMeasurement>>
+ perf_counter_measurements;
+
+ perf_counter_measurements.reserve(kMaxCounters);
+ for (int j = 0; j < kMaxCounters; ++j) {
+ perf_counter_measurements.emplace_back(
+ new PerfCountersMeasurement(kMetrics));
+ }
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ // Start all counters together to see if they hold
+ size_t max_counters = kMaxCounters;
+ for (size_t i = 0; i < kMaxCounters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_EQ(counter.num_counters(), 1);
+ if (!counter.Start()) {
+ max_counters = i;
+ break;
+ };
+ }
+
+ ASSERT_GE(max_counters, kMinValidCounters);
+
+ // Start all together
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+
+ // Start/stop individually
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ measurements.clear();
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+}
+
+// We try to do some meaningful work here but the compiler
+// insists in optimizing away our loop so we had to add a
+// no-optimize macro. In case it fails, we added some entropy
+// to this pool as well.
+
+BENCHMARK_DONT_OPTIMIZE size_t do_work() {
+ static std::mt19937 rd{std::random_device{}()};
+ static std::uniform_int_distribution<size_t> mrand(0, 10);
+ const size_t kNumLoops = 1000000;
+ size_t sum = 0;
+ for (size_t j = 0; j < kNumLoops; ++j) {
+ sum += mrand(rd);
+ }
+ benchmark::DoNotOptimize(sum);
+ return sum;
+}
+
+void measure(size_t threadcount, PerfCounterValues* before,
+ PerfCounterValues* after) {
+ BM_CHECK_NE(before, nullptr);
+ BM_CHECK_NE(after, nullptr);
+ std::vector<std::thread> threads(threadcount);
+ auto work = [&]() { BM_CHECK(do_work() > 1000); };
+
+ // We need to first set up the counters, then start the threads, so the
+ // threads would inherit the counters. But later, we need to first destroy
+ // the thread pool (so all the work finishes), then measure the counters. So
+ // the scopes overlap, and we need to explicitly control the scope of the
+ // threadpool.
+ auto counters =
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
+ for (auto& t : threads) t = std::thread(work);
+ counters.Snapshot(before);
+ for (auto& t : threads) t.join();
+ counters.Snapshot(after);
+}
+
+TEST(PerfCountersTest, MultiThreaded) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ PerfCounterValues before(2);
+ PerfCounterValues after(2);
+
+ // Notice that this test will work even if we taskset it to a single CPU
+ // In this case the threads will run sequentially
+ // Start two threads and measure the number of combined cycles and
+ // instructions
+ measure(2, &before, &after);
+ std::vector<double> Elapsed2Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
+
+ // Start four threads and measure the number of combined cycles and
+ // instructions
+ measure(4, &before, &after);
+ std::vector<double> Elapsed4Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
+
+ // The following expectations fail (at least on a beefy workstation with lots
+ // of cpus) - it seems that in some circumstances the runtime of 4 threads
+ // can even be better than with 2.
+ // So instead of expecting 4 threads to be slower, let's just make sure they
+ // do not differ too much in general (one is not more than 10x than the
+ // other).
+ EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
+ EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
+}
+
+TEST(PerfCountersTest, HardwareLimits) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (3-4) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // Taken from `perf list`, but focusses only on those HW events that actually
+ // were reported when running `sudo perf stat -a sleep 10`, intersected over
+ // several platforms. All HW events listed in the first command not reported
+ // in the second seem to not work. This is sad as we don't really get to test
+ // the grouping here (groups can contain up to 6 members)...
+ std::vector<std::string> counter_names{
+ "cycles", // leader
+ "instructions", //
+ "branch-misses", //
+ };
+
+ // In the off-chance that some of these values are not supported,
+ // we filter them out so the test will complete without failure
+ // albeit it might not actually test the grouping on that platform
+ std::vector<std::string> valid_names;
+ for (const std::string& name : counter_names) {
+ if (PerfCounters::IsCounterSupported(name)) {
+ valid_names.push_back(name);
+ }
+ }
+ PerfCountersMeasurement counter(valid_names);
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements));
+}
+
+} // namespace
diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc
new file mode 100644
index 0000000..b0a3ab0
--- /dev/null
+++ b/test/perf_counters_test.cc
@@ -0,0 +1,92 @@
+#include <cstdarg>
+#undef NDEBUG
+
+#include "../src/commandlineflags.h"
+#include "../src/perf_counters.h"
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+namespace benchmark {
+
+BM_DECLARE_string(benchmark_perf_counters);
+
+} // namespace benchmark
+
+static void BM_Simple(benchmark::State& state) {
+ for (auto _ : state) {
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
+ }
+}
+BENCHMARK(BM_Simple);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
+
+const int kIters = 1000000;
+
+void BM_WithoutPauseResume(benchmark::State& state) {
+ int n = 0;
+
+ for (auto _ : state) {
+ for (auto i = 0; i < kIters; ++i) {
+ n = 1 - n;
+ benchmark::DoNotOptimize(n);
+ }
+ }
+}
+
+BENCHMARK(BM_WithoutPauseResume);
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithoutPauseResume\",$"}});
+
+void BM_WithPauseResume(benchmark::State& state) {
+ int m = 0, n = 0;
+
+ for (auto _ : state) {
+ for (auto i = 0; i < kIters; ++i) {
+ n = 1 - n;
+ benchmark::DoNotOptimize(n);
+ }
+
+ state.PauseTiming();
+ for (auto j = 0; j < kIters; ++j) {
+ m = 1 - m;
+ benchmark::DoNotOptimize(m);
+ }
+ state.ResumeTiming();
+ }
+}
+
+BENCHMARK(BM_WithPauseResume);
+
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithPauseResume\",$"}});
+
+static void CheckSimple(Results const& e) {
+ CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
+}
+
+double withoutPauseResumeInstrCount = 0.0;
+double withPauseResumeInstrCount = 0.0;
+
+static void SaveInstrCountWithoutResume(Results const& e) {
+ withoutPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS");
+}
+
+static void SaveInstrCountWithResume(Results const& e) {
+ withPauseResumeInstrCount = e.GetAs<double>("INSTRUCTIONS");
+}
+
+CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
+CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume);
+CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume);
+
+int main(int argc, char* argv[]) {
+ if (!benchmark::internal::PerfCounters::kSupported) {
+ return 0;
+ }
+ benchmark::FLAGS_benchmark_perf_counters = "CYCLES,INSTRUCTIONS";
+ benchmark::internal::PerfCounters::Initialize();
+ RunOutputTests(argc, argv);
+
+ BM_CHECK_GT(withPauseResumeInstrCount, kIters);
+ BM_CHECK_GT(withoutPauseResumeInstrCount, kIters);
+ BM_CHECK_LT(withPauseResumeInstrCount, 1.5 * withoutPauseResumeInstrCount);
+}
diff --git a/test/register_benchmark_test.cc b/test/register_benchmark_test.cc
index 3ac5b21..d69d144 100644
--- a/test/register_benchmark_test.cc
+++ b/test/register_benchmark_test.cc
@@ -10,7 +10,7 @@ namespace {
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual void ReportRuns(const std::vector<Run>& report) {
+ void ReportRuns(const std::vector<Run>& report) override {
all_runs_.insert(all_runs_.end(), begin(report), end(report));
ConsoleReporter::ReportRuns(report);
}
@@ -19,24 +19,24 @@ class TestReporter : public benchmark::ConsoleReporter {
};
struct TestCase {
- std::string name;
- const char* label;
+ const std::string name;
+ const std::string label;
// Note: not explicit as we rely on it being converted through ADD_CASES.
- TestCase(const char* xname) : TestCase(xname, nullptr) {}
- TestCase(const char* xname, const char* xlabel)
+ TestCase(const std::string& xname) : TestCase(xname, "") {}
+ TestCase(const std::string& xname, const std::string& xlabel)
: name(xname), label(xlabel) {}
typedef benchmark::BenchmarkReporter::Run Run;
void CheckRun(Run const& run) const {
// clang-format off
- CHECK(name == run.benchmark_name()) << "expected " << name << " got "
+ BM_CHECK(name == run.benchmark_name()) << "expected " << name << " got "
<< run.benchmark_name();
- if (label) {
- CHECK(run.report_label == label) << "expected " << label << " got "
+ if (!label.empty()) {
+ BM_CHECK(run.report_label == label) << "expected " << label << " got "
<< run.report_label;
} else {
- CHECK(run.report_label == "");
+ BM_CHECK(run.report_label.empty());
}
// clang-format on
}
@@ -45,7 +45,7 @@ struct TestCase {
std::vector<TestCase> ExpectedResults;
int AddCases(std::initializer_list<TestCase> const& v) {
- for (auto N : v) {
+ for (const auto& N : v) {
ExpectedResults.push_back(N);
}
return 0;
@@ -96,6 +96,18 @@ ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"});
#endif // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
//----------------------------------------------------------------------------//
+// Test RegisterBenchmark with DISABLED_ benchmark
+//----------------------------------------------------------------------------//
+void DISABLED_BM_function(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(DISABLED_BM_function);
+ReturnVal dummy3 = benchmark::RegisterBenchmark("DISABLED_BM_function_manual",
+ DISABLED_BM_function);
+// No need to add cases because we don't expect them to run.
+
+//----------------------------------------------------------------------------//
// Test RegisterBenchmark with different callable types
//----------------------------------------------------------------------------//
@@ -111,7 +123,7 @@ void TestRegistrationAtRuntime() {
{
CustomFixture fx;
benchmark::RegisterBenchmark("custom_fixture", fx);
- AddCases({"custom_fixture"});
+ AddCases({std::string("custom_fixture")});
}
#endif
#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
diff --git a/test/repetitions_test.cc b/test/repetitions_test.cc
new file mode 100644
index 0000000..569777d
--- /dev/null
+++ b/test/repetitions_test.cc
@@ -0,0 +1,214 @@
+
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+// ========================================================================= //
+// ------------------------ Testing Basic Output --------------------------- //
+// ========================================================================= //
+
+static void BM_ExplicitRepetitions(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_ExplicitRepetitions)->Repetitions(2);
+
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}});
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_ExplicitRepetitions/repeats:2 %console_report$"}});
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_ExplicitRepetitions/repeats:2_mean %console_report$"}});
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_ExplicitRepetitions/repeats:2_median %console_report$"}});
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_ExplicitRepetitions/repeats:2_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"repetition_index\": 0,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_ExplicitRepetitions/repeats:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"repetition_index\": 1,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_mean\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_median\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_ExplicitRepetitions/repeats:2_stddev\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ExplicitRepetitions/repeats:2\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ExplicitRepetitions/repeats:2\",%csv_report$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_ExplicitRepetitions/repeats:2_mean\",%csv_report$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_ExplicitRepetitions/repeats:2_median\",%csv_report$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_ExplicitRepetitions/repeats:2_stddev\",%csv_report$"}});
+
+// ========================================================================= //
+// ------------------------ Testing Basic Output --------------------------- //
+// ========================================================================= //
+
+static void BM_ImplicitRepetitions(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_ImplicitRepetitions);
+
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions %console_report$"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_mean %console_report$"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_median %console_report$"}});
+ADD_CASES(TC_ConsoleOut, {{"^BM_ImplicitRepetitions_stddev %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 0,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 1,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_mean\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_median\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_ImplicitRepetitions_stddev\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_ImplicitRepetitions\",$", MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions\",%csv_report$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_mean\",%csv_report$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_median\",%csv_report$"}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_ImplicitRepetitions_stddev\",%csv_report$"}});
+
+// ========================================================================= //
+// --------------------------- TEST CASES END ------------------------------ //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }
diff --git a/test/report_aggregates_only_test.cc b/test/report_aggregates_only_test.cc
index 9646b9b..47da503 100644
--- a/test/report_aggregates_only_test.cc
+++ b/test/report_aggregates_only_test.cc
@@ -19,17 +19,19 @@ BENCHMARK(BM_SummaryRepeat)->Repetitions(3)->ReportAggregatesOnly();
int main(int argc, char* argv[]) {
const std::string output = GetFileReporterOutput(argc, argv);
- if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 3 ||
+ if (SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3") != 4 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_mean\"") != 1 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_median\"") !=
1 ||
SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"") !=
- 1) {
- std::cout << "Precondition mismatch. Expected to only find three "
+ 1 ||
+ SubstrCnt(output, "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"") != 1) {
+ std::cout << "Precondition mismatch. Expected to only find four "
"occurrences of \"BM_SummaryRepeat/repeats:3\" substring:\n"
"\"name\": \"BM_SummaryRepeat/repeats:3_mean\", "
"\"name\": \"BM_SummaryRepeat/repeats:3_median\", "
- "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\"\nThe entire "
+ "\"name\": \"BM_SummaryRepeat/repeats:3_stddev\", "
+ "\"name\": \"BM_SummaryRepeat/repeats:3_cv\"\nThe entire "
"output:\n";
std::cout << output;
return 1;
diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc
index d24a57d..2eb545a 100644
--- a/test/reporter_output_test.cc
+++ b/test/reporter_output_test.cc
@@ -1,5 +1,6 @@
#undef NDEBUG
+#include <numeric>
#include <utility>
#include "benchmark/benchmark.h"
@@ -16,7 +17,7 @@ static int AddContextCases() {
AddCases(TC_ConsoleErr,
{
{"^%int-%int-%intT%int:%int:%int[-+]%int:%int$", MR_Default},
- {"Running .*/reporter_output_test(\\.exe)?$", MR_Next},
+ {"Running .*(/|\\\\)reporter_output_test(\\.exe)?$", MR_Next},
{"Run on \\(%int X %float MHz CPU s?\\)", MR_Next},
});
AddCases(TC_JSONOut,
@@ -71,9 +72,11 @@ BENCHMARK(BM_basic);
ADD_CASES(TC_ConsoleOut, {{"^BM_basic %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_basic\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_basic\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -90,7 +93,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
void BM_bytes_per_second(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
state.SetBytesProcessed(1);
}
@@ -99,9 +103,11 @@ BENCHMARK(BM_bytes_per_second);
ADD_CASES(TC_ConsoleOut, {{"^BM_bytes_per_second %console_report "
"bytes_per_second=%float[kM]{0,1}/s$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_bytes_per_second\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_bytes_per_second\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -119,7 +125,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
void BM_items_per_second(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
state.SetItemsProcessed(1);
}
@@ -128,9 +135,11 @@ BENCHMARK(BM_items_per_second);
ADD_CASES(TC_ConsoleOut, {{"^BM_items_per_second %console_report "
"items_per_second=%float[kM]{0,1}/s$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_items_per_second\",$"},
+ {"\"family_index\": 2,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_items_per_second\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -154,9 +163,11 @@ BENCHMARK(BM_label);
ADD_CASES(TC_ConsoleOut, {{"^BM_label %console_report some label$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_label\",$"},
+ {"\"family_index\": 3,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_label\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -181,9 +192,11 @@ BENCHMARK(BM_time_label_nanosecond)->Unit(benchmark::kNanosecond);
ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_nanosecond %console_report$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_time_label_nanosecond\",$"},
+ {"\"family_index\": 4,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_time_label_nanosecond\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -202,9 +215,11 @@ BENCHMARK(BM_time_label_microsecond)->Unit(benchmark::kMicrosecond);
ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_microsecond %console_us_report$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_time_label_microsecond\",$"},
+ {"\"family_index\": 5,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_time_label_microsecond\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -223,9 +238,11 @@ BENCHMARK(BM_time_label_millisecond)->Unit(benchmark::kMillisecond);
ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_millisecond %console_ms_report$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_time_label_millisecond\",$"},
+ {"\"family_index\": 6,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_time_label_millisecond\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -243,9 +260,11 @@ BENCHMARK(BM_time_label_second)->Unit(benchmark::kSecond);
ADD_CASES(TC_ConsoleOut, {{"^BM_time_label_second %console_s_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_time_label_second\",$"},
+ {"\"family_index\": 7,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_time_label_second\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -267,9 +286,11 @@ void BM_error(benchmark::State& state) {
BENCHMARK(BM_error);
ADD_CASES(TC_ConsoleOut, {{"^BM_error[ ]+ERROR OCCURRED: 'message'$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_error\",$"},
+ {"\"family_index\": 8,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_error\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"error_occurred\": true,$", MR_Next},
@@ -289,15 +310,17 @@ void BM_no_arg_name(benchmark::State& state) {
BENCHMARK(BM_no_arg_name)->Arg(3);
ADD_CASES(TC_ConsoleOut, {{"^BM_no_arg_name/3 %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_no_arg_name/3\",$"},
+ {"\"family_index\": 9,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_no_arg_name/3\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_no_arg_name/3\",%csv_report$"}});
// ========================================================================= //
-// ------------------------ Testing Arg Name Output ----------------------- //
+// ------------------------ Testing Arg Name Output ------------------------ //
// ========================================================================= //
void BM_arg_name(benchmark::State& state) {
@@ -307,9 +330,11 @@ void BM_arg_name(benchmark::State& state) {
BENCHMARK(BM_arg_name)->ArgName("first")->Arg(3);
ADD_CASES(TC_ConsoleOut, {{"^BM_arg_name/first:3 %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_arg_name/first:3\",$"},
+ {"\"family_index\": 10,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_arg_name/first:3\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_arg_name/first:3\",%csv_report$"}});
@@ -327,14 +352,42 @@ ADD_CASES(TC_ConsoleOut,
{{"^BM_arg_names/first:2/5/third:4 %console_report$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_arg_names/first:2/5/third:4\",$"},
+ {"\"family_index\": 11,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_arg_names/first:2/5/third:4\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_arg_names/first:2/5/third:4\",%csv_report$"}});
// ========================================================================= //
+// ------------------------ Testing Name Output ---------------------------- //
+// ========================================================================= //
+
+void BM_name(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_name)->Name("BM_custom_name");
+
+ADD_CASES(TC_ConsoleOut, {{"^BM_custom_name %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_custom_name\",$"},
+ {"\"family_index\": 12,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_custom_name\",$", MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
+ {"\"repetition_index\": 0,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\"$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_custom_name\",%csv_report$"}});
+
+// ========================================================================= //
// ------------------------ Testing Big Args Output ------------------------ //
// ========================================================================= //
@@ -353,7 +406,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"},
void BM_Complexity_O1(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
state.SetComplexityN(state.range(0));
}
@@ -381,37 +435,50 @@ ADD_CASES(TC_ConsoleOut,
{"^BM_Repeat/repeats:2_median %console_time_only_report [ ]*2$"},
{"^BM_Repeat/repeats:2_stddev %console_time_only_report [ ]*2$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:2\",$"},
+ {"\"family_index\": 15,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:2\"", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:2\",$"},
+ {"\"family_index\": 15,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"repetition_index\": 1,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:2_mean\",$"},
+ {"\"family_index\": 15,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:2_median\",$"},
+ {"\"family_index\": 15,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:2_stddev\",$"},
+ {"\"family_index\": 15,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:2\",%csv_report$"},
{"^\"BM_Repeat/repeats:2\",%csv_report$"},
@@ -428,43 +495,58 @@ ADD_CASES(TC_ConsoleOut,
{"^BM_Repeat/repeats:3_median %console_time_only_report [ ]*3$"},
{"^BM_Repeat/repeats:3_stddev %console_time_only_report [ ]*3$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:3\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:3\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"repetition_index\": 1,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:3\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"repetition_index\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:3_mean\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:3_median\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:3_stddev\",$"},
+ {"\"family_index\": 16,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:3\",%csv_report$"},
{"^\"BM_Repeat/repeats:3\",%csv_report$"},
@@ -483,49 +565,66 @@ ADD_CASES(TC_ConsoleOut,
{"^BM_Repeat/repeats:4_median %console_time_only_report [ ]*4$"},
{"^BM_Repeat/repeats:4_stddev %console_time_only_report [ ]*4$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Repeat/repeats:4\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"repetition_index\": 1,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"repetition_index\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"repetition_index\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4_mean\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 4,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4_median\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 4,$", MR_Next},
{"\"name\": \"BM_Repeat/repeats:4_stddev\",$"},
+ {"\"family_index\": 17,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Repeat/repeats:4\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 4,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 4,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{"^\"BM_Repeat/repeats:4\",%csv_report$"},
{"^\"BM_Repeat/repeats:4\",%csv_report$"},
@@ -544,6 +643,8 @@ void BM_RepeatOnce(benchmark::State& state) {
BENCHMARK(BM_RepeatOnce)->Repetitions(1)->ReportAggregatesOnly();
ADD_CASES(TC_ConsoleOut, {{"^BM_RepeatOnce/repeats:1 %console_report$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_RepeatOnce/repeats:1\",$"},
+ {"\"family_index\": 18,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_RepeatOnce/repeats:1\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 1,$", MR_Next},
@@ -566,25 +667,34 @@ ADD_CASES(
ADD_CASES(TC_JSONOut,
{{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
{"\"name\": \"BM_SummaryRepeat/repeats:3_mean\",$"},
+ {"\"family_index\": 19,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"name\": \"BM_SummaryRepeat/repeats:3_median\",$"},
+ {"\"family_index\": 19,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"name\": \"BM_SummaryRepeat/repeats:3_stddev\",$"},
+ {"\"family_index\": 19,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryRepeat/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next}});
ADD_CASES(TC_CSVOut, {{".*BM_SummaryRepeat/repeats:3 ", MR_Not},
{"^\"BM_SummaryRepeat/repeats:3_mean\",%csv_report$"},
@@ -608,25 +718,34 @@ ADD_CASES(
ADD_CASES(TC_JSONOut,
{{".*BM_SummaryDisplay/repeats:2 ", MR_Not},
{"\"name\": \"BM_SummaryDisplay/repeats:2_mean\",$"},
+ {"\"family_index\": 20,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"name\": \"BM_SummaryDisplay/repeats:2_median\",$"},
+ {"\"family_index\": 20,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"name\": \"BM_SummaryDisplay/repeats:2_stddev\",$"},
+ {"\"family_index\": 20,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_SummaryDisplay/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next}});
ADD_CASES(TC_CSVOut,
{{".*BM_SummaryDisplay/repeats:2 ", MR_Not},
@@ -654,27 +773,36 @@ ADD_CASES(
ADD_CASES(TC_JSONOut,
{{".*BM_RepeatTimeUnit/repeats:3 ", MR_Not},
{"\"name\": \"BM_RepeatTimeUnit/repeats:3_mean\",$"},
+ {"\"family_index\": 21,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"time_unit\": \"us\",?$"},
{"\"name\": \"BM_RepeatTimeUnit/repeats:3_median\",$"},
+ {"\"family_index\": 21,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"time_unit\": \"us\",?$"},
{"\"name\": \"BM_RepeatTimeUnit/repeats:3_stddev\",$"},
+ {"\"family_index\": 21,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_RepeatTimeUnit/repeats:3\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"time_unit\": \"us\",?$"}});
ADD_CASES(TC_CSVOut,
@@ -722,6 +850,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_UserStats/iterations:5/repeats:3/manual_time [ "
ADD_CASES(
TC_JSONOut,
{{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
@@ -731,6 +861,8 @@ ADD_CASES(
{"\"iterations\": 5,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
@@ -740,6 +872,8 @@ ADD_CASES(
{"\"iterations\": 5,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
@@ -749,39 +883,51 @@ ADD_CASES(
{"\"iterations\": 5,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_mean\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_median\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_stddev\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"name\": \"BM_UserStats/iterations:5/repeats:3/manual_time_\",$"},
+ {"\"family_index\": 22,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_UserStats/iterations:5/repeats:3/manual_time\",$",
MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 3,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 3,$", MR_Next},
{"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next}});
ADD_CASES(
@@ -797,6 +943,154 @@ ADD_CASES(
{"^\"BM_UserStats/iterations:5/repeats:3/manual_time_\",%csv_report$"}});
// ========================================================================= //
+// ------------- Testing relative standard deviation statistics ------------ //
+// ========================================================================= //
+
+const auto UserPercentStatistics = [](const std::vector<double>&) {
+ return 1. / 100.;
+};
+void BM_UserPercentStats(benchmark::State& state) {
+ for (auto _ : state) {
+ state.SetIterationTime(150 / 10e8);
+ }
+}
+// clang-format off
+BENCHMARK(BM_UserPercentStats)
+ ->Repetitions(3)
+ ->Iterations(5)
+ ->UseManualTime()
+ ->Unit(benchmark::TimeUnit::kNanosecond)
+ ->ComputeStatistics("", UserPercentStatistics, benchmark::StatisticUnit::kPercentage);
+// clang-format on
+
+// check that UserPercent-provided stats is calculated, and is after the
+// default-ones empty string as name is intentional, it would sort before
+// anything else
+ADD_CASES(TC_ConsoleOut,
+ {{"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ "
+ "]* 150 ns %time [ ]*5$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ "
+ "]* 150 ns %time [ ]*5$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time [ "
+ "]* 150 ns %time [ ]*5$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_mean [ ]* 150 ns %time [ ]*3$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_median [ ]* 150 ns %time [ ]*3$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_stddev [ ]* 0.000 ns %time [ ]*3$"},
+ {"^BM_UserPercentStats/iterations:5/repeats:3/manual_time_ "
+ "[ ]* 1.00 % [ ]* 1.00 %[ ]*3$"}});
+ADD_CASES(
+ TC_JSONOut,
+ {{"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 0,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": 5,$", MR_Next},
+ {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
+ {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 1,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": 5,$", MR_Next},
+ {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
+ {"\"name\": \"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"repetition_index\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": 5,$", MR_Next},
+ {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
+ {"\"name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_mean\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": 3,$", MR_Next},
+ {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
+ {"\"name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_median\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": 3,$", MR_Next},
+ {"\"real_time\": 1\\.5(0)*e\\+(0)*2,$", MR_Next},
+ {"\"name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_stddev\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": 3,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time_\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": "
+ "\"BM_UserPercentStats/iterations:5/repeats:3/manual_time\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 3,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"\",$", MR_Next},
+ {"\"aggregate_unit\": \"percentage\",$", MR_Next},
+ {"\"iterations\": 3,$", MR_Next},
+ {"\"real_time\": 1\\.(0)*e-(0)*2,$", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_mean\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_median\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_stddev\",%csv_report$"},
+ {"^\"BM_UserPercentStats/iterations:5/repeats:3/"
+ "manual_time_\",%csv_report$"}});
+
+// ========================================================================= //
// ------------------------- Testing StrEscape JSON ------------------------ //
// ========================================================================= //
#if 0 // enable when csv testing code correctly handles multi-line fields
@@ -807,9 +1101,11 @@ void BM_JSON_Format(benchmark::State& state) {
}
BENCHMARK(BM_JSON_Format);
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_JSON_Format\",$"},
+ {"\"family_index\": 23,$", MR_Next},
+{"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_JSON_Format\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"error_occurred\": true,$", MR_Next},
diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc
index 97a2e3c..b4c5e15 100644
--- a/test/skip_with_error_test.cc
+++ b/test/skip_with_error_test.cc
@@ -10,17 +10,17 @@ namespace {
class TestReporter : public benchmark::ConsoleReporter {
public:
- virtual bool ReportContext(const Context& context) {
+ bool ReportContext(const Context& context) override {
return ConsoleReporter::ReportContext(context);
};
- virtual void ReportRuns(const std::vector<Run>& report) {
+ void ReportRuns(const std::vector<Run>& report) override {
all_runs_.insert(all_runs_.end(), begin(report), end(report));
ConsoleReporter::ReportRuns(report);
}
TestReporter() {}
- virtual ~TestReporter() {}
+ ~TestReporter() override {}
mutable std::vector<Run> all_runs_;
};
@@ -33,21 +33,23 @@ struct TestCase {
typedef benchmark::BenchmarkReporter::Run Run;
void CheckRun(Run const& run) const {
- CHECK(name == run.benchmark_name())
+ BM_CHECK(name == run.benchmark_name())
<< "expected " << name << " got " << run.benchmark_name();
- CHECK(error_occurred == run.error_occurred);
- CHECK(error_message == run.error_message);
+ BM_CHECK_EQ(error_occurred,
+ benchmark::internal::SkippedWithError == run.skipped);
+ BM_CHECK(error_message == run.skip_message);
if (error_occurred) {
- // CHECK(run.iterations == 0);
+ // BM_CHECK(run.iterations == 0);
} else {
- CHECK(run.iterations != 0);
+ BM_CHECK(run.iterations != 0);
}
}
};
std::vector<TestCase> ExpectedResults;
-int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
+int AddCases(const std::string& base_name,
+ std::initializer_list<TestCase> const& v) {
for (auto TC : v) {
TC.name = base_name + TC.name;
ExpectedResults.push_back(std::move(TC));
@@ -97,7 +99,7 @@ ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}});
void BM_error_during_running(benchmark::State& state) {
int first_iter = true;
while (state.KeepRunning()) {
- if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
+ if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) {
assert(first_iter);
first_iter = false;
state.SkipWithError("error message");
@@ -119,12 +121,13 @@ ADD_CASES("BM_error_during_running", {{"/1/threads:1", true, "error message"},
void BM_error_during_running_ranged_for(benchmark::State& state) {
assert(state.max_iterations > 3 && "test requires at least a few iterations");
- int first_iter = true;
+ bool first_iter = true;
// NOTE: Users should not write the for loop explicitly.
for (auto It = state.begin(), End = state.end(); It != End; ++It) {
if (state.range(0) == 1) {
assert(first_iter);
first_iter = false;
+ (void)first_iter;
state.SkipWithError("error message");
// Test the unfortunate but documented behavior that the ranged-for loop
// doesn't automatically terminate when SkipWithError is set.
@@ -140,9 +143,10 @@ ADD_CASES("BM_error_during_running_ranged_for",
void BM_error_after_running(benchmark::State& state) {
for (auto _ : state) {
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
- if (state.thread_index <= (state.threads / 2))
+ if (state.thread_index() <= (state.threads() / 2))
state.SkipWithError("error message");
}
BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
@@ -154,7 +158,7 @@ ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"},
void BM_error_while_paused(benchmark::State& state) {
bool first_iter = true;
while (state.KeepRunning()) {
- if (state.range(0) == 1 && state.thread_index <= (state.threads / 2)) {
+ if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) {
assert(first_iter);
first_iter = false;
state.PauseTiming();
diff --git a/test/spec_arg_test.cc b/test/spec_arg_test.cc
new file mode 100644
index 0000000..06aafbe
--- /dev/null
+++ b/test/spec_arg_test.cc
@@ -0,0 +1,105 @@
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can override benchmark-spec value from FLAGS_benchmark_filter
+// with argument to RunSpecifiedBenchmarks(...).
+
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+ bool ReportContext(const Context& context) override {
+ return ConsoleReporter::ReportContext(context);
+ };
+
+ void ReportRuns(const std::vector<Run>& report) override {
+ assert(report.size() == 1);
+ matched_functions.push_back(report[0].run_name.function_name);
+ ConsoleReporter::ReportRuns(report);
+ };
+
+ TestReporter() {}
+
+ ~TestReporter() override {}
+
+ const std::vector<std::string>& GetMatchedFunctions() const {
+ return matched_functions;
+ }
+
+ private:
+ std::vector<std::string> matched_functions;
+};
+
+} // end namespace
+
+static void BM_NotChosen(benchmark::State& state) {
+ assert(false && "SHOULD NOT BE CALLED");
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_NotChosen);
+
+static void BM_Chosen(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_Chosen);
+
+int main(int argc, char** argv) {
+ const std::string flag = "BM_NotChosen";
+
+ // Verify that argv specify --benchmark_filter=BM_NotChosen.
+ bool found = false;
+ for (int i = 0; i < argc; ++i) {
+ if (strcmp("--benchmark_filter=BM_NotChosen", argv[i]) == 0) {
+ found = true;
+ break;
+ }
+ }
+ assert(found);
+
+ benchmark::Initialize(&argc, argv);
+
+ // Check that the current flag value is reported accurately via the
+ // GetBenchmarkFilter() function.
+ if (flag != benchmark::GetBenchmarkFilter()) {
+ std::cerr
+ << "Seeing different value for flags. GetBenchmarkFilter() returns ["
+ << benchmark::GetBenchmarkFilter() << "] expected flag=[" << flag
+ << "]\n";
+ return 1;
+ }
+ TestReporter test_reporter;
+ const char* const spec = "BM_Chosen";
+ const size_t returned_count =
+ benchmark::RunSpecifiedBenchmarks(&test_reporter, spec);
+ assert(returned_count == 1);
+ const std::vector<std::string> matched_functions =
+ test_reporter.GetMatchedFunctions();
+ assert(matched_functions.size() == 1);
+ if (strcmp(spec, matched_functions.front().c_str()) != 0) {
+ std::cerr << "Expected benchmark [" << spec << "] to run, but got ["
+ << matched_functions.front() << "]\n";
+ return 2;
+ }
+
+ // Test that SetBenchmarkFilter works.
+ const std::string golden_value = "golden_value";
+ benchmark::SetBenchmarkFilter(golden_value);
+ std::string current_value = benchmark::GetBenchmarkFilter();
+ if (golden_value != current_value) {
+ std::cerr << "Expected [" << golden_value
+ << "] for --benchmark_filter but got [" << current_value << "]\n";
+ return 3;
+ }
+ return 0;
+}
diff --git a/test/spec_arg_verbosity_test.cc b/test/spec_arg_verbosity_test.cc
new file mode 100644
index 0000000..8f8eb6d
--- /dev/null
+++ b/test/spec_arg_verbosity_test.cc
@@ -0,0 +1,43 @@
+#include <string.h>
+
+#include <iostream>
+
+#include "benchmark/benchmark.h"
+
+// Tests that the user specified verbosity level can be get.
+static void BM_Verbosity(benchmark::State& state) {
+ for (auto _ : state) {
+ }
+}
+BENCHMARK(BM_Verbosity);
+
+int main(int argc, char** argv) {
+ const int32_t flagv = 42;
+
+ // Verify that argv specify --v=42.
+ bool found = false;
+ for (int i = 0; i < argc; ++i) {
+ if (strcmp("--v=42", argv[i]) == 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ std::cerr << "This test requires '--v=42' to be passed as a command-line "
+ << "argument.\n";
+ return 1;
+ }
+
+ benchmark::Initialize(&argc, argv);
+
+ // Check that the current flag value is reported accurately via the
+ // GetBenchmarkVerbosity() function.
+ if (flagv != benchmark::GetBenchmarkVerbosity()) {
+ std::cerr
+ << "Seeing different value for flags. GetBenchmarkVerbosity() returns ["
+ << benchmark::GetBenchmarkVerbosity() << "] expected flag=[" << flagv
+ << "]\n";
+ return 1;
+ }
+ return 0;
+}
diff --git a/test/statistics_gtest.cc b/test/statistics_gtest.cc
index 3ddc72d..1de2d87 100644
--- a/test/statistics_gtest.cc
+++ b/test/statistics_gtest.cc
@@ -25,4 +25,11 @@ TEST(StatisticsTest, StdDev) {
1.151086443322134);
}
+TEST(StatisticsTest, CV) {
+ EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({101, 101, 101, 101}), 0.0);
+ EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({1, 2, 3}), 1. / 2.);
+ EXPECT_DOUBLE_EQ(benchmark::StatisticsCV({2.5, 2.4, 3.3, 4.2, 5.1}),
+ 0.32888184094918121);
+}
+
} // end namespace
diff --git a/test/string_util_gtest.cc b/test/string_util_gtest.cc
index 01bf155..67b4bc0 100644
--- a/test/string_util_gtest.cc
+++ b/test/string_util_gtest.cc
@@ -1,9 +1,12 @@
//===---------------------------------------------------------------------===//
-// statistics_test - Unit tests for src/statistics.cc
+// string_util_test - Unit tests for src/string_util.cc
//===---------------------------------------------------------------------===//
-#include "../src/string_util.h"
+#include <tuple>
+
#include "../src/internal_macros.h"
+#include "../src/string_util.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
@@ -32,7 +35,8 @@ TEST(StringUtilTest, stoul) {
#elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul
{
size_t pos = 0;
- EXPECT_EQ(0xFFFFFFFFFFFFFFFFul, benchmark::stoul("18446744073709551615", &pos));
+ EXPECT_EQ(0xFFFFFFFFFFFFFFFFul,
+ benchmark::stoul("18446744073709551615", &pos));
EXPECT_EQ(20ul, pos);
}
#endif
@@ -63,91 +67,133 @@ TEST(StringUtilTest, stoul) {
}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
{
- ASSERT_THROW(benchmark::stoul("this is a test"), std::invalid_argument);
+ ASSERT_THROW(std::ignore = benchmark::stoul("this is a test"),
+ std::invalid_argument);
}
#endif
}
-TEST(StringUtilTest, stoi) {
- {
- size_t pos = 0;
- EXPECT_EQ(0, benchmark::stoi("0", &pos));
- EXPECT_EQ(1ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(-17, benchmark::stoi("-17", &pos));
- EXPECT_EQ(3ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(1357, benchmark::stoi("1357", &pos));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16));
- EXPECT_EQ(4ul, pos);
- }
+TEST(StringUtilTest, stoi){{size_t pos = 0;
+EXPECT_EQ(0, benchmark::stoi("0", &pos));
+EXPECT_EQ(1ul, pos);
+} // namespace
+{
+ size_t pos = 0;
+ EXPECT_EQ(-17, benchmark::stoi("-17", &pos));
+ EXPECT_EQ(3ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(1357, benchmark::stoi("1357", &pos));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16));
+ EXPECT_EQ(4ul, pos);
+}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
- {
- ASSERT_THROW(benchmark::stoi("this is a test"), std::invalid_argument);
- }
+{
+ ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
+ std::invalid_argument);
+}
#endif
}
-TEST(StringUtilTest, stod) {
- {
- size_t pos = 0;
- EXPECT_EQ(0.0, benchmark::stod("0", &pos));
- EXPECT_EQ(1ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(-84.0, benchmark::stod("-84", &pos));
- EXPECT_EQ(3ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(1234.0, benchmark::stod("1234", &pos));
- EXPECT_EQ(4ul, pos);
- }
- {
- size_t pos = 0;
- EXPECT_EQ(1.5, benchmark::stod("1.5", &pos));
- EXPECT_EQ(3ul, pos);
- }
- {
- size_t pos = 0;
- /* Note: exactly representable as double */
- EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos));
- EXPECT_EQ(8ul, pos);
- }
+TEST(StringUtilTest, stod){{size_t pos = 0;
+EXPECT_EQ(0.0, benchmark::stod("0", &pos));
+EXPECT_EQ(1ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(-84.0, benchmark::stod("-84", &pos));
+ EXPECT_EQ(3ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(1234.0, benchmark::stod("1234", &pos));
+ EXPECT_EQ(4ul, pos);
+}
+{
+ size_t pos = 0;
+ EXPECT_EQ(1.5, benchmark::stod("1.5", &pos));
+ EXPECT_EQ(3ul, pos);
+}
+{
+ size_t pos = 0;
+ /* Note: exactly representable as double */
+ EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos));
+ EXPECT_EQ(8ul, pos);
+}
#ifndef BENCHMARK_HAS_NO_EXCEPTIONS
- {
- ASSERT_THROW(benchmark::stod("this is a test"), std::invalid_argument);
- }
+{
+ ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
+ std::invalid_argument);
+}
#endif
}
+TEST(StringUtilTest, StrSplit) {
+ EXPECT_EQ(benchmark::StrSplit("", ','), std::vector<std::string>{});
+ EXPECT_EQ(benchmark::StrSplit("hello", ','),
+ std::vector<std::string>({"hello"}));
+ EXPECT_EQ(benchmark::StrSplit("hello,there,is,more", ','),
+ std::vector<std::string>({"hello", "there", "is", "more"}));
+}
+
+using HumanReadableFixture = ::testing::TestWithParam<
+ std::tuple<double, benchmark::Counter::OneK, std::string>>;
+
+INSTANTIATE_TEST_SUITE_P(
+ HumanReadableTests, HumanReadableFixture,
+ ::testing::Values(
+ std::make_tuple(0.0, benchmark::Counter::kIs1024, "0"),
+ std::make_tuple(999.0, benchmark::Counter::kIs1024, "999"),
+ std::make_tuple(1000.0, benchmark::Counter::kIs1024, "1000"),
+ std::make_tuple(1024.0, benchmark::Counter::kIs1024, "1Ki"),
+ std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1024,
+ "976\\.56.Ki"),
+ std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1024, "1Mi"),
+ std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1024,
+ "953\\.674Mi"),
+ std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1024,
+ "1Gi"),
+ std::make_tuple(0.0, benchmark::Counter::kIs1000, "0"),
+ std::make_tuple(999.0, benchmark::Counter::kIs1000, "999"),
+ std::make_tuple(1000.0, benchmark::Counter::kIs1000, "1k"),
+ std::make_tuple(1024.0, benchmark::Counter::kIs1000, "1.024k"),
+ std::make_tuple(1000 * 1000.0, benchmark::Counter::kIs1000, "1M"),
+ std::make_tuple(1024 * 1024.0, benchmark::Counter::kIs1000,
+ "1\\.04858M"),
+ std::make_tuple(1000 * 1000 * 1000.0, benchmark::Counter::kIs1000,
+ "1G"),
+ std::make_tuple(1024 * 1024 * 1024.0, benchmark::Counter::kIs1000,
+ "1\\.07374G")));
+
+TEST_P(HumanReadableFixture, HumanReadableNumber) {
+ std::string str = benchmark::HumanReadableNumber(std::get<0>(GetParam()),
+ std::get<1>(GetParam()));
+ ASSERT_THAT(str, ::testing::MatchesRegex(std::get<2>(GetParam())));
+}
+
} // end namespace
diff --git a/test/templated_fixture_test.cc b/test/templated_fixture_test.cc
index fe9865c..af239c3 100644
--- a/test/templated_fixture_test.cc
+++ b/test/templated_fixture_test.cc
@@ -1,9 +1,9 @@
-#include "benchmark/benchmark.h"
-
#include <cassert>
#include <memory>
+#include "benchmark/benchmark.h"
+
template <typename T>
class MyFixture : public ::benchmark::Fixture {
public:
diff --git a/test/time_unit_gtest.cc b/test/time_unit_gtest.cc
new file mode 100644
index 0000000..484ecbc
--- /dev/null
+++ b/test/time_unit_gtest.cc
@@ -0,0 +1,37 @@
+#include "../include/benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace benchmark {
+namespace internal {
+
+namespace {
+
+class DummyBenchmark : public Benchmark {
+ public:
+ DummyBenchmark() : Benchmark("dummy") {}
+ void Run(State&) override {}
+};
+
+TEST(DefaultTimeUnitTest, TimeUnitIsNotSet) {
+ DummyBenchmark benchmark;
+ EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond);
+}
+
+TEST(DefaultTimeUnitTest, DefaultIsSet) {
+ DummyBenchmark benchmark;
+ EXPECT_EQ(benchmark.GetTimeUnit(), kNanosecond);
+ SetDefaultTimeUnit(kMillisecond);
+ EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond);
+}
+
+TEST(DefaultTimeUnitTest, DefaultAndExplicitUnitIsSet) {
+ DummyBenchmark benchmark;
+ benchmark.Unit(kMillisecond);
+ SetDefaultTimeUnit(kMicrosecond);
+
+ EXPECT_EQ(benchmark.GetTimeUnit(), kMillisecond);
+}
+
+} // namespace
+} // namespace internal
+} // namespace benchmark
diff --git a/test/user_counters_tabular_test.cc b/test/user_counters_tabular_test.cc
index 18373c0..c98b769 100644
--- a/test/user_counters_tabular_test.cc
+++ b/test/user_counters_tabular_test.cc
@@ -7,19 +7,25 @@
// @todo: <jpmag> this checks the full output at once; the rule for
// CounterSet1 was failing because it was not matching "^[-]+$".
// @todo: <jpmag> check that the counters are vertically aligned.
-ADD_CASES(
- TC_ConsoleOut,
- {
- // keeping these lines long improves readability, so:
- // clang-format off
+ADD_CASES(TC_ConsoleOut,
+ {
+ // keeping these lines long improves readability, so:
+ // clang-format off
{"^[-]+$", MR_Next},
{"^Benchmark %s Time %s CPU %s Iterations %s Bar %s Bat %s Baz %s Foo %s Frob %s Lob$", MR_Next},
{"^[-]+$", MR_Next},
- {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
- {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
- {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
- {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
- {"^BM_Counters_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:1_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2 %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2_mean %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2_median %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2_stddev %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
+ {"^BM_Counters_Tabular/repeats:2/threads:2_cv %console_percentage_report [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*% [ ]*%percentage[ ]*%$", MR_Next},
{"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
{"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
{"^BM_CounterRates_Tabular/threads:%int %console_report [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s [ ]*%hrfloat/s$", MR_Next},
@@ -46,8 +52,8 @@ ADD_CASES(
{"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
{"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$", MR_Next},
{"^BM_CounterSet2_Tabular/threads:%int %console_report [ ]*%hrfloat [ ]*%hrfloat [ ]*%hrfloat$"},
- // clang-format on
- });
+ // clang-format on
+ });
ADD_CASES(TC_CSVOut, {{"%csv_header,"
"\"Bar\",\"Bat\",\"Baz\",\"Foo\",\"Frob\",\"Lob\""}});
@@ -68,12 +74,15 @@ void BM_Counters_Tabular(benchmark::State& state) {
{"Lob", {32, bm::Counter::kAvgThreads}},
});
}
-BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 16);
+BENCHMARK(BM_Counters_Tabular)->ThreadRange(1, 2)->Repetitions(2);
ADD_CASES(TC_JSONOut,
- {{"\"name\": \"BM_Counters_Tabular/threads:%int\",$"},
- {"\"run_name\": \"BM_Counters_Tabular/threads:%int\",$", MR_Next},
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -87,8 +96,260 @@ ADD_CASES(TC_JSONOut,
{"\"Frob\": %float,$", MR_Next},
{"\"Lob\": %float$", MR_Next},
{"}", MR_Next}});
-ADD_CASES(TC_CSVOut, {{"^\"BM_Counters_Tabular/threads:%int\",%csv_report,"
- "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"repetition_index\": 1,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_mean\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_median\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_stddev\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:1_cv\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:1\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 1,$", MR_Next},
+ {"\"aggregate_name\": \"cv\",$", MR_Next},
+ {"\"aggregate_unit\": \"percentage\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 1,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"repetition_index\": 0,$", MR_Next},
+ {"\"threads\": 2,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 1,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$",
+ MR_Next},
+ {"\"run_type\": \"iteration\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"repetition_index\": 1,$", MR_Next},
+ {"\"threads\": 2,$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_median\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 1,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 2,$", MR_Next},
+ {"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_stddev\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 1,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 2,$", MR_Next},
+ {"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_JSONOut,
+ {{"\"name\": \"BM_Counters_Tabular/repeats:2/threads:2_cv\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 1,$", MR_Next},
+ {"\"run_name\": \"BM_Counters_Tabular/repeats:2/threads:2\",$",
+ MR_Next},
+ {"\"run_type\": \"aggregate\",$", MR_Next},
+ {"\"repetitions\": 2,$", MR_Next},
+ {"\"threads\": 2,$", MR_Next},
+ {"\"aggregate_name\": \"cv\",$", MR_Next},
+ {"\"aggregate_unit\": \"percentage\",$", MR_Next},
+ {"\"iterations\": %int,$", MR_Next},
+ {"\"real_time\": %float,$", MR_Next},
+ {"\"cpu_time\": %float,$", MR_Next},
+ {"\"time_unit\": \"ns\",$", MR_Next},
+ {"\"Bar\": %float,$", MR_Next},
+ {"\"Bat\": %float,$", MR_Next},
+ {"\"Baz\": %float,$", MR_Next},
+ {"\"Foo\": %float,$", MR_Next},
+ {"\"Frob\": %float,$", MR_Next},
+ {"\"Lob\": %float$", MR_Next},
+ {"}", MR_Next}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1_mean\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1_median\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1_stddev\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:1_cv\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2_mean\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2_median\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2_stddev\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
+ADD_CASES(TC_CSVOut,
+ {{"^\"BM_Counters_Tabular/repeats:2/threads:2_cv\",%csv_report,"
+ "%float,%float,%float,%float,%float,%float$"}});
// VS2013 does not allow this function to be passed as a lambda argument
// to CHECK_BENCHMARK_RESULTS()
void CheckTabular(Results const& e) {
@@ -99,7 +360,10 @@ void CheckTabular(Results const& e) {
CHECK_COUNTER_VALUE(e, int, "Frob", EQ, 16);
CHECK_COUNTER_VALUE(e, int, "Lob", EQ, 32);
}
-CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular);
+CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:1$",
+ &CheckTabular);
+CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/repeats:2/threads:2$",
+ &CheckTabular);
// ========================================================================= //
// -------------------- Tabular+Rate Counters Output ----------------------- //
@@ -108,7 +372,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Tabular/threads:%int", &CheckTabular);
void BM_CounterRates_Tabular(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters.insert({
@@ -123,10 +388,12 @@ void BM_CounterRates_Tabular(benchmark::State& state) {
BENCHMARK(BM_CounterRates_Tabular)->ThreadRange(1, 16);
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_CounterRates_Tabular/threads:%int\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_CounterRates_Tabular/threads:%int\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -174,9 +441,11 @@ void BM_CounterSet0_Tabular(benchmark::State& state) {
BENCHMARK(BM_CounterSet0_Tabular)->ThreadRange(1, 16);
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_CounterSet0_Tabular/threads:%int\",$"},
+ {"\"family_index\": 2,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_CounterSet0_Tabular/threads:%int\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -212,9 +481,11 @@ void BM_CounterSet1_Tabular(benchmark::State& state) {
BENCHMARK(BM_CounterSet1_Tabular)->ThreadRange(1, 16);
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_CounterSet1_Tabular/threads:%int\",$"},
+ {"\"family_index\": 3,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_CounterSet1_Tabular/threads:%int\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -254,9 +525,11 @@ void BM_CounterSet2_Tabular(benchmark::State& state) {
BENCHMARK(BM_CounterSet2_Tabular)->ThreadRange(1, 16);
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_CounterSet2_Tabular/threads:%int\",$"},
+ {"\"family_index\": 4,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_CounterSet2_Tabular/threads:%int\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
diff --git a/test/user_counters_test.cc b/test/user_counters_test.cc
index 5699f4f..4cd8ee3 100644
--- a/test/user_counters_test.cc
+++ b/test/user_counters_test.cc
@@ -26,15 +26,17 @@ void BM_Counters_Simple(benchmark::State& state) {
for (auto _ : state) {
}
state.counters["foo"] = 1;
- state.counters["bar"] = 2 * (double)state.iterations();
+ state.counters["bar"] = 2 * static_cast<double>(state.iterations());
}
BENCHMARK(BM_Counters_Simple);
ADD_CASES(TC_ConsoleOut,
{{"^BM_Counters_Simple %console_report bar=%hrfloat foo=%hrfloat$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Simple\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Simple\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -65,7 +67,8 @@ int num_calls1 = 0;
void BM_Counters_WithBytesAndItemsPSec(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
state.counters["foo"] = 1;
state.counters["bar"] = ++num_calls1;
@@ -78,9 +81,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_WithBytesAndItemsPSec %console_report "
"foo=%hrfloat items_per_second=%hrfloat/s$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_WithBytesAndItemsPSec\",$"},
+ {"\"family_index\": 1,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_WithBytesAndItemsPSec\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -114,7 +119,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_WithBytesAndItemsPSec",
void BM_Counters_Rate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kIsRate};
@@ -125,9 +131,11 @@ ADD_CASES(
TC_ConsoleOut,
{{"^BM_Counters_Rate %console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Counters_Rate\",$"},
+ {"\"family_index\": 2,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Rate\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -155,7 +163,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_Rate", &CheckRate);
void BM_Invert(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{0.0001, bm::Counter::kInvert};
@@ -165,9 +174,11 @@ BENCHMARK(BM_Invert);
ADD_CASES(TC_ConsoleOut,
{{"^BM_Invert %console_report bar=%hrfloatu foo=%hrfloatk$"}});
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Invert\",$"},
+ {"\"family_index\": 3,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Invert\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -187,14 +198,14 @@ void CheckInvert(Results const& e) {
CHECK_BENCHMARK_RESULTS("BM_Invert", &CheckInvert);
// ========================================================================= //
-// ------------------------- InvertedRate Counters Output
-// -------------------------- //
+// --------------------- InvertedRate Counters Output ---------------------- //
// ========================================================================= //
void BM_Counters_InvertedRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] =
@@ -207,9 +218,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_InvertedRate %console_report "
"bar=%hrfloats foo=%hrfloats$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_InvertedRate\",$"},
+ {"\"family_index\": 4,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_InvertedRate\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -246,9 +259,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_Threads/threads:%int %console_report "
"bar=%hrfloat foo=%hrfloat$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Threads/threads:%int\",$"},
+ {"\"family_index\": 5,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Threads/threads:%int\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -285,9 +300,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreads/threads:%int "
"%console_report bar=%hrfloat foo=%hrfloat$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_AvgThreads/threads:%int\",$"},
+ {"\"family_index\": 6,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_AvgThreads/threads:%int\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -316,7 +333,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_AvgThreads/threads:%int",
void BM_Counters_AvgThreadsRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgThreadsRate};
@@ -327,10 +345,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgThreadsRate/threads:%int "
"%console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$"},
+ {"\"family_index\": 7,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_AvgThreadsRate/threads:%int\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -367,9 +387,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_IterationInvariant %console_report "
"bar=%hrfloat foo=%hrfloat$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_IterationInvariant\",$"},
+ {"\"family_index\": 8,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_IterationInvariant\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -399,7 +421,8 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_IterationInvariant",
void BM_Counters_kIsIterationInvariantRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] =
@@ -412,10 +435,12 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kIsIterationInvariantRate "
"%console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_kIsIterationInvariantRate\",$"},
+ {"\"family_index\": 9,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_kIsIterationInvariantRate\",$",
MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -440,7 +465,7 @@ CHECK_BENCHMARK_RESULTS("BM_Counters_kIsIterationInvariantRate",
&CheckIsIterationInvariantRate);
// ========================================================================= //
-// ------------------- AvgIterations Counters Output ------------------ //
+// --------------------- AvgIterations Counters Output --------------------- //
// ========================================================================= //
void BM_Counters_AvgIterations(benchmark::State& state) {
@@ -455,9 +480,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_AvgIterations %console_report "
"bar=%hrfloat foo=%hrfloat$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_AvgIterations\",$"},
+ {"\"family_index\": 10,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_AvgIterations\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
@@ -480,13 +507,14 @@ void CheckAvgIterations(Results const& e) {
CHECK_BENCHMARK_RESULTS("BM_Counters_AvgIterations", &CheckAvgIterations);
// ========================================================================= //
-// ----------------- AvgIterationsRate Counters Output ---------------- //
+// ------------------- AvgIterationsRate Counters Output ------------------- //
// ========================================================================= //
void BM_Counters_kAvgIterationsRate(benchmark::State& state) {
for (auto _ : state) {
// This test requires a non-zero CPU time to avoid divide-by-zero
- benchmark::DoNotOptimize(state.iterations());
+ auto iterations = state.iterations();
+ benchmark::DoNotOptimize(iterations);
}
namespace bm = benchmark;
state.counters["foo"] = bm::Counter{1, bm::Counter::kAvgIterationsRate};
@@ -498,9 +526,11 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_Counters_kAvgIterationsRate "
"%console_report bar=%hrfloat/s foo=%hrfloat/s$"}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_kAvgIterationsRate\",$"},
+ {"\"family_index\": 11,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_kAvgIterationsRate\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
- {"\"repetitions\": 0,$", MR_Next},
+ {"\"repetitions\": 1,$", MR_Next},
{"\"repetition_index\": 0,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"iterations\": %int,$", MR_Next},
diff --git a/test/user_counters_thousands_test.cc b/test/user_counters_thousands_test.cc
index 21d8285..fc15383 100644
--- a/test/user_counters_thousands_test.cc
+++ b/test/user_counters_thousands_test.cc
@@ -16,13 +16,13 @@ void BM_Counters_Thousands(benchmark::State& state) {
{"t0_1000000DefaultBase",
bm::Counter(1000 * 1000, bm::Counter::kDefaults)},
{"t1_1000000Base1000", bm::Counter(1000 * 1000, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1000)},
+ bm::Counter::OneK::kIs1000)},
{"t2_1000000Base1024", bm::Counter(1000 * 1000, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1024)},
+ bm::Counter::OneK::kIs1024)},
{"t3_1048576Base1000", bm::Counter(1024 * 1024, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1000)},
+ bm::Counter::OneK::kIs1000)},
{"t4_1048576Base1024", bm::Counter(1024 * 1024, bm::Counter::kDefaults,
- benchmark::Counter::OneK::kIs1024)},
+ bm::Counter::OneK::kIs1024)},
});
}
BENCHMARK(BM_Counters_Thousands)->Repetitions(2);
@@ -30,27 +30,29 @@ ADD_CASES(
TC_ConsoleOut,
{
{"^BM_Counters_Thousands/repeats:2 %console_report "
- "t0_1000000DefaultBase=1000k "
- "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k "
- "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M "
+ "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki "
+ "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2 %console_report "
- "t0_1000000DefaultBase=1000k "
- "t1_1000000Base1000=1000k t2_1000000Base1024=976.56[23]k "
- "t3_1048576Base1000=1048.58k t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M "
+ "t1_1000000Base1000=1M t2_1000000Base1024=976.56[23]Ki "
+ "t3_1048576Base1000=1.04858M t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_mean %console_report "
- "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k "
- "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k "
- "t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M t1_1000000Base1000=1M "
+ "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M "
+ "t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_median %console_report "
- "t0_1000000DefaultBase=1000k t1_1000000Base1000=1000k "
- "t2_1000000Base1024=976.56[23]k t3_1048576Base1000=1048.58k "
- "t4_1048576Base1024=1024k$"},
+ "t0_1000000DefaultBase=1M t1_1000000Base1000=1M "
+ "t2_1000000Base1024=976.56[23]Ki t3_1048576Base1000=1.04858M "
+ "t4_1048576Base1024=1Mi$"},
{"^BM_Counters_Thousands/repeats:2_stddev %console_time_only_report [ "
"]*2 t0_1000000DefaultBase=0 t1_1000000Base1000=0 "
"t2_1000000Base1024=0 t3_1048576Base1000=0 t4_1048576Base1024=0$"},
});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
@@ -68,6 +70,8 @@ ADD_CASES(TC_JSONOut,
{"}", MR_Next}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Thousands/repeats:2\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next},
{"\"run_type\": \"iteration\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
@@ -85,11 +89,14 @@ ADD_CASES(TC_JSONOut,
{"}", MR_Next}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Thousands/repeats:2_mean\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"mean\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
@@ -102,11 +109,14 @@ ADD_CASES(TC_JSONOut,
{"}", MR_Next}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Thousands/repeats:2_median\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"median\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
@@ -119,11 +129,14 @@ ADD_CASES(TC_JSONOut,
{"}", MR_Next}});
ADD_CASES(TC_JSONOut,
{{"\"name\": \"BM_Counters_Thousands/repeats:2_stddev\",$"},
+ {"\"family_index\": 0,$", MR_Next},
+ {"\"per_family_instance_index\": 0,$", MR_Next},
{"\"run_name\": \"BM_Counters_Thousands/repeats:2\",$", MR_Next},
{"\"run_type\": \"aggregate\",$", MR_Next},
{"\"repetitions\": 2,$", MR_Next},
{"\"threads\": 1,$", MR_Next},
{"\"aggregate_name\": \"stddev\",$", MR_Next},
+ {"\"aggregate_unit\": \"time\",$", MR_Next},
{"\"iterations\": 2,$", MR_Next},
{"\"real_time\": %float,$", MR_Next},
{"\"cpu_time\": %float,$", MR_Next},
diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel
index 5895883..d25caa7 100644
--- a/tools/BUILD.bazel
+++ b/tools/BUILD.bazel
@@ -1,4 +1,4 @@
-load("@py_deps//:requirements.bzl", "requirement")
+load("@tools_pip_deps//:requirements.bzl", "requirement")
py_library(
name = "gbench",
@@ -12,7 +12,7 @@ py_library(
py_binary(
name = "compare",
srcs = ["compare.py"],
- python_version = "PY2",
+ python_version = "PY3",
deps = [
":gbench",
],
diff --git a/tools/compare.py b/tools/compare.py
index 66eed93..e5eeb24 100755
--- a/tools/compare.py
+++ b/tools/compare.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import unittest
"""
@@ -9,25 +9,28 @@ import argparse
from argparse import ArgumentParser
import json
import sys
+import os
import gbench
from gbench import util, report
-from gbench.util import *
def check_inputs(in1, in2, flags):
"""
Perform checking on the user provided inputs and diagnose any abnormalities
"""
- in1_kind, in1_err = classify_input_file(in1)
- in2_kind, in2_err = classify_input_file(in2)
- output_file = find_benchmark_flag('--benchmark_out=', flags)
- output_type = find_benchmark_flag('--benchmark_out_format=', flags)
- if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+ in1_kind, in1_err = util.classify_input_file(in1)
+ in2_kind, in2_err = util.classify_input_file(in2)
+ output_file = util.find_benchmark_flag('--benchmark_out=', flags)
+ output_type = util.find_benchmark_flag('--benchmark_out_format=', flags)
+ if in1_kind == util.IT_Executable and in2_kind == util.IT_Executable and output_file:
print(("WARNING: '--benchmark_out=%s' will be passed to both "
"benchmarks causing it to be overwritten") % output_file)
- if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
- print("WARNING: passing optional flags has no effect since both "
- "inputs are JSON")
+ if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
+ # When both sides are JSON the only supported flag is
+ # --benchmark_filter=
+ for flag in util.remove_benchmark_flags('--benchmark_filter=', flags):
+ print("WARNING: passing %s has no effect since both "
+ "inputs are JSON" % flag)
if output_type is not None and output_type != 'json':
print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
" is not supported.") % output_type)
@@ -238,10 +241,10 @@ def main():
options_contender = ['--benchmark_filter=%s' % filter_contender]
# Run the benchmarks and report the results
- json1 = json1_orig = gbench.util.run_or_load_benchmark(
- test_baseline, benchmark_options + options_baseline)
- json2 = json2_orig = gbench.util.run_or_load_benchmark(
- test_contender, benchmark_options + options_contender)
+ json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
+ test_baseline, benchmark_options + options_baseline))
+ json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark(
+ test_contender, benchmark_options + options_contender))
# Now, filter the benchmarks so that the difference report can work
if filter_baseline and filter_contender:
diff --git a/tools/gbench/Inputs/test1_run1.json b/tools/gbench/Inputs/test1_run1.json
index 601e327..9daed0b 100644
--- a/tools/gbench/Inputs/test1_run1.json
+++ b/tools/gbench/Inputs/test1_run1.json
@@ -114,6 +114,14 @@
"real_time": 1,
"cpu_time": 1,
"time_unit": "s"
+ },
+ {
+ "name": "BM_hasLabel",
+ "label": "a label",
+ "iterations": 1,
+ "real_time": 1,
+ "cpu_time": 1,
+ "time_unit": "s"
}
]
}
diff --git a/tools/gbench/Inputs/test1_run2.json b/tools/gbench/Inputs/test1_run2.json
index 3cbcf39..dc52970 100644
--- a/tools/gbench/Inputs/test1_run2.json
+++ b/tools/gbench/Inputs/test1_run2.json
@@ -114,6 +114,14 @@
"real_time": 1,
"cpu_time": 1,
"time_unit": "ns"
+ },
+ {
+ "name": "BM_hasLabel",
+ "label": "a label",
+ "iterations": 1,
+ "real_time": 1,
+ "cpu_time": 1,
+ "time_unit": "s"
}
]
}
diff --git a/tools/gbench/Inputs/test4_run.json b/tools/gbench/Inputs/test4_run.json
new file mode 100644
index 0000000..eaa005f
--- /dev/null
+++ b/tools/gbench/Inputs/test4_run.json
@@ -0,0 +1,96 @@
+{
+ "benchmarks": [
+ {
+ "name": "99 family 0 instance 0 repetition 0",
+ "run_type": "iteration",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "repetition_index": 0
+ },
+ {
+ "name": "98 family 0 instance 0 repetition 1",
+ "run_type": "iteration",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "repetition_index": 1
+ },
+ {
+ "name": "97 family 0 instance 0 aggregate",
+ "run_type": "aggregate",
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "aggregate_name": "9 aggregate"
+ },
+
+
+ {
+ "name": "96 family 0 instance 1 repetition 0",
+ "run_type": "iteration",
+ "family_index": 0,
+ "per_family_instance_index": 1,
+ "repetition_index": 0
+ },
+ {
+ "name": "95 family 0 instance 1 repetition 1",
+ "run_type": "iteration",
+ "family_index": 0,
+ "per_family_instance_index": 1,
+ "repetition_index": 1
+ },
+ {
+ "name": "94 family 0 instance 1 aggregate",
+ "run_type": "aggregate",
+ "family_index": 0,
+ "per_family_instance_index": 1,
+ "aggregate_name": "9 aggregate"
+ },
+
+
+
+
+ {
+ "name": "93 family 1 instance 0 repetition 0",
+ "run_type": "iteration",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "repetition_index": 0
+ },
+ {
+ "name": "92 family 1 instance 0 repetition 1",
+ "run_type": "iteration",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "repetition_index": 1
+ },
+ {
+ "name": "91 family 1 instance 0 aggregate",
+ "run_type": "aggregate",
+ "family_index": 1,
+ "per_family_instance_index": 0,
+ "aggregate_name": "9 aggregate"
+ },
+
+
+ {
+ "name": "90 family 1 instance 1 repetition 0",
+ "run_type": "iteration",
+ "family_index": 1,
+ "per_family_instance_index": 1,
+ "repetition_index": 0
+ },
+ {
+ "name": "89 family 1 instance 1 repetition 1",
+ "run_type": "iteration",
+ "family_index": 1,
+ "per_family_instance_index": 1,
+ "repetition_index": 1
+ },
+ {
+ "name": "88 family 1 instance 1 aggregate",
+ "run_type": "aggregate",
+ "family_index": 1,
+ "per_family_instance_index": 1,
+ "aggregate_name": "9 aggregate"
+ }
+ ]
+}
diff --git a/tools/gbench/Inputs/test4_run0.json b/tools/gbench/Inputs/test4_run0.json
new file mode 100644
index 0000000..54cf127
--- /dev/null
+++ b/tools/gbench/Inputs/test4_run0.json
@@ -0,0 +1,21 @@
+{
+ "context": {
+ "date": "2016-08-02 17:44:46",
+ "num_cpus": 4,
+ "mhz_per_cpu": 4228,
+ "cpu_scaling_enabled": false,
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "whocares",
+ "run_type": "aggregate",
+ "aggregate_name": "zz",
+ "aggregate_unit": "percentage",
+ "iterations": 1000,
+ "real_time": 0.01,
+ "cpu_time": 0.10,
+ "time_unit": "ns"
+ }
+ ]
+}
diff --git a/tools/gbench/Inputs/test4_run1.json b/tools/gbench/Inputs/test4_run1.json
new file mode 100644
index 0000000..25d5605
--- /dev/null
+++ b/tools/gbench/Inputs/test4_run1.json
@@ -0,0 +1,21 @@
+{
+ "context": {
+ "date": "2016-08-02 17:44:46",
+ "num_cpus": 4,
+ "mhz_per_cpu": 4228,
+ "cpu_scaling_enabled": false,
+ "library_build_type": "release"
+ },
+ "benchmarks": [
+ {
+ "name": "whocares",
+ "run_type": "aggregate",
+ "aggregate_name": "zz",
+ "aggregate_unit": "percentage",
+ "iterations": 1000,
+ "real_time": 0.005,
+ "cpu_time": 0.15,
+ "time_unit": "ns"
+ }
+ ]
+}
diff --git a/tools/gbench/report.py b/tools/gbench/report.py
index bf29492..b2bbfb9 100644
--- a/tools/gbench/report.py
+++ b/tools/gbench/report.py
@@ -1,11 +1,14 @@
-import unittest
"""report.py - Utilities for reporting statistics about benchmark results
"""
+
+import unittest
import os
import re
import copy
+import random
-from scipy.stats import mannwhitneyu
+from scipy.stats import mannwhitneyu, gmean
+from numpy import array
class BenchmarkColor(object):
@@ -39,6 +42,13 @@ UTEST_MIN_REPETITIONS = 2
UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
UTEST_COL_NAME = "_pvalue"
+_TIME_UNIT_TO_SECONDS_MULTIPLIER = {
+ "s": 1.0,
+ "ms": 1e-3,
+ "us": 1e-6,
+ "ns": 1e-9,
+}
+
def color_format(use_color, fmt_str, *args, **kwargs):
"""
@@ -148,6 +158,30 @@ def partition_benchmarks(json1, json2):
return partitions
+def get_timedelta_field_as_seconds(benchmark, field_name):
+ """
+ Get value of field_name field of benchmark, which is time with time unit
+ time_unit, as time in seconds.
+ """
+ timedelta = benchmark[field_name]
+ time_unit = benchmark.get('time_unit', 's')
+ return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
+
+
+def calculate_geomean(json):
+ """
+ Extract all real/cpu times from all the benchmarks as seconds,
+ and calculate their geomean.
+ """
+ times = []
+ for benchmark in json['benchmarks']:
+ if 'run_type' in benchmark and benchmark['run_type'] == 'aggregate':
+ continue
+ times.append([get_timedelta_field_as_seconds(benchmark, 'real_time'),
+ get_timedelta_field_as_seconds(benchmark, 'cpu_time')])
+ return gmean(times) if times else array([])
+
+
def extract_field(partition, field_name):
# The count of elements may be different. We want *all* of them.
lhs = [x[field_name] for x in partition[0]]
@@ -172,6 +206,7 @@ def calc_utest(timings_cpu, timings_time):
return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
+
def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
def get_utest_color(pval):
return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
@@ -220,6 +255,7 @@ def get_difference_report(
partitions = partition_benchmarks(json1, json2)
for partition in partitions:
benchmark_name = partition[0][0]['name']
+ label = partition[0][0]['label'] if 'label' in partition[0][0] else ''
time_unit = partition[0][0]['time_unit']
measurements = []
utest_results = {}
@@ -240,7 +276,8 @@ def get_difference_report(
if utest:
timings_cpu = extract_field(partition, 'cpu_time')
timings_time = extract_field(partition, 'real_time')
- have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
+ have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
+ timings_cpu, timings_time)
if cpu_pvalue and time_pvalue:
utest_results = {
'have_optimal_repetitions': have_optimal_repetitions,
@@ -259,6 +296,7 @@ def get_difference_report(
aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
diff_report.append({
'name': benchmark_name,
+ 'label': label,
'measurements': measurements,
'time_unit': time_unit,
'run_type': run_type,
@@ -266,6 +304,26 @@ def get_difference_report(
'utest': utest_results
})
+ lhs_gmean = calculate_geomean(json1)
+ rhs_gmean = calculate_geomean(json2)
+ if lhs_gmean.any() and rhs_gmean.any():
+ diff_report.append({
+ 'name': 'OVERALL_GEOMEAN',
+ 'label': '',
+ 'measurements': [{
+ 'real_time': lhs_gmean[0],
+ 'cpu_time': lhs_gmean[1],
+ 'real_time_other': rhs_gmean[0],
+ 'cpu_time_other': rhs_gmean[1],
+ 'time': calculate_change(lhs_gmean[0], rhs_gmean[0]),
+ 'cpu': calculate_change(lhs_gmean[1], rhs_gmean[1])
+ }],
+ 'time_unit': 's',
+ 'run_type': 'aggregate',
+ 'aggregate_name': 'geomean',
+ 'utest': {}
+ })
+
return diff_report
@@ -301,26 +359,23 @@ def print_difference_report(
fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
for benchmark in json_diff_report:
# *If* we were asked to only include aggregates,
- # and if it is non-aggregate, then skip it.
- if include_aggregates_only and 'run_type' in benchmark:
- if benchmark['run_type'] != 'aggregate':
- continue
-
- for measurement in benchmark['measurements']:
- output_strs += [color_format(use_color,
- fmt_str,
- BC_HEADER,
- benchmark['name'],
- first_col_width,
- get_color(measurement['time']),
- measurement['time'],
- get_color(measurement['cpu']),
- measurement['cpu'],
- measurement['real_time'],
- measurement['real_time_other'],
- measurement['cpu_time'],
- measurement['cpu_time_other'],
- endc=BC_ENDC)]
+ # and if it is non-aggregate, then don't print it.
+ if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
+ for measurement in benchmark['measurements']:
+ output_strs += [color_format(use_color,
+ fmt_str,
+ BC_HEADER,
+ benchmark['name'],
+ first_col_width,
+ get_color(measurement['time']),
+ measurement['time'],
+ get_color(measurement['cpu']),
+ measurement['cpu'],
+ measurement['real_time'],
+ measurement['real_time_other'],
+ measurement['cpu_time'],
+ measurement['cpu_time_other'],
+ endc=BC_ENDC)]
# After processing the measurements, if requested and
# if applicable (e.g. u-test exists for given benchmark),
@@ -404,6 +459,8 @@ class TestReportDifference(unittest.TestCase):
'-0.1000', '100', '110', '100', '90'],
['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
+ ['BM_hasLabel', '+0.0000', '+0.0000', '1', '1', '1', '1'],
+ ['OVERALL_GEOMEAN', '-0.8113', '-0.7779', '0', '0', '0', '0']
]
output_lines_with_header = print_difference_report(
self.json_diff_report, use_color=False)
@@ -420,81 +477,137 @@ class TestReportDifference(unittest.TestCase):
expected_output = [
{
'name': 'BM_SameTimes',
- 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
+ 'label': '',
+ 'measurements': [{'time': 0.0000, 'cpu': 0.0000,
+ 'real_time': 10, 'real_time_other': 10,
+ 'cpu_time': 10, 'cpu_time_other': 10}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_2xFaster',
- 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
+ 'label': '',
+ 'measurements': [{'time': -0.5000, 'cpu': -0.5000,
+ 'real_time': 50, 'real_time_other': 25,
+ 'cpu_time': 50, 'cpu_time_other': 25}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_2xSlower',
- 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
+ 'label': '',
+ 'measurements': [{'time': 1.0000, 'cpu': 1.0000,
+ 'real_time': 50, 'real_time_other': 100,
+ 'cpu_time': 50, 'cpu_time_other': 100}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_1PercentFaster',
- 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
+ 'label': '',
+ 'measurements': [{'time': -0.0100, 'cpu': -0.0100,
+ 'real_time': 100, 'real_time_other': 98.9999999,
+ 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_1PercentSlower',
- 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
+ 'label': '',
+ 'measurements': [{'time': 0.0100, 'cpu': 0.0100,
+ 'real_time': 100, 'real_time_other': 101,
+ 'cpu_time': 100, 'cpu_time_other': 101}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_10PercentFaster',
- 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
+ 'label': '',
+ 'measurements': [{'time': -0.1000, 'cpu': -0.1000,
+ 'real_time': 100, 'real_time_other': 90,
+ 'cpu_time': 100, 'cpu_time_other': 90}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_10PercentSlower',
- 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
+ 'label': '',
+ 'measurements': [{'time': 0.1000, 'cpu': 0.1000,
+ 'real_time': 100, 'real_time_other': 110,
+ 'cpu_time': 100, 'cpu_time_other': 110}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_100xSlower',
- 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
+ 'label': '',
+ 'measurements': [{'time': 99.0000, 'cpu': 99.0000,
+ 'real_time': 100, 'real_time_other': 10000,
+ 'cpu_time': 100, 'cpu_time_other': 10000}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_100xFaster',
- 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
+ 'label': '',
+ 'measurements': [{'time': -0.9900, 'cpu': -0.9900,
+ 'real_time': 10000, 'real_time_other': 100,
+ 'cpu_time': 10000, 'cpu_time_other': 100}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_10PercentCPUToTime',
- 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
+ 'label': '',
+ 'measurements': [{'time': 0.1000, 'cpu': -0.1000,
+ 'real_time': 100, 'real_time_other': 110,
+ 'cpu_time': 100, 'cpu_time_other': 90}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_ThirdFaster',
- 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
+ 'label': '',
+ 'measurements': [{'time': -0.3333, 'cpu': -0.3334,
+ 'real_time': 100, 'real_time_other': 67,
+ 'cpu_time': 100, 'cpu_time_other': 67}],
'time_unit': 'ns',
'utest': {}
},
{
'name': 'BM_NotBadTimeUnit',
- 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
+ 'label': '',
+ 'measurements': [{'time': -0.9000, 'cpu': 0.2000,
+ 'real_time': 0.4, 'real_time_other': 0.04,
+ 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
+ 'time_unit': 's',
+ 'utest': {}
+ },
+ {
+ 'name': 'BM_hasLabel',
+ 'label': 'a label',
+ 'measurements': [{'time': 0.0000, 'cpu': 0.0000,
+ 'real_time': 1, 'real_time_other': 1,
+ 'cpu_time': 1, 'cpu_time_other': 1}],
'time_unit': 's',
'utest': {}
},
+ {
+ 'name': 'OVERALL_GEOMEAN',
+ 'label': '',
+ 'measurements': [{'real_time': 3.1622776601683826e-06, 'cpu_time': 3.2130844755623912e-06,
+ 'real_time_other': 1.9768988699420897e-07, 'cpu_time_other': 2.397447755209533e-07,
+ 'time': -0.8112976497120911, 'cpu': -0.7778551721181174}],
+ 'time_unit': 's',
+ 'run_type': 'aggregate',
+ 'aggregate_name': 'geomean', 'utest': {}
+ },
]
self.assertEqual(len(self.json_diff_report), len(expected_output))
for out, expected in zip(
self.json_diff_report, expected_output):
self.assertEqual(out['name'], expected['name'])
+ self.assertEqual(out['label'], expected['label'])
self.assertEqual(out['time_unit'], expected['time_unit'])
assert_utest(self, out, expected)
assert_measurements(self, out, expected)
@@ -525,6 +638,7 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
+ ['OVERALL_GEOMEAN', '-0.5000', '-0.5000', '0', '0', '0', '0']
]
output_lines_with_header = print_difference_report(
self.json_diff_report, use_color=False)
@@ -562,6 +676,16 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase):
'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
'time_unit': 'ns',
'utest': {}
+ },
+ {
+ 'name': 'OVERALL_GEOMEAN',
+ 'measurements': [{'real_time': 2.213363839400641e-08, 'cpu_time': 2.213363839400641e-08,
+ 'real_time_other': 1.1066819197003185e-08, 'cpu_time_other': 1.1066819197003185e-08,
+ 'time': -0.5000000000000009, 'cpu': -0.5000000000000009}],
+ 'time_unit': 's',
+ 'run_type': 'aggregate',
+ 'aggregate_name': 'geomean',
+ 'utest': {}
}
]
self.assertEqual(len(self.json_diff_report), len(expected_output))
@@ -600,8 +724,8 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
['BM_Two_pvalue',
- '0.6985',
- '0.6985',
+ '1.0000',
+ '0.6667',
'U',
'Test,',
'Repetitions:',
@@ -618,7 +742,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
['short_pvalue',
'0.7671',
- '0.1489',
+ '0.2000',
'U',
'Test,',
'Repetitions:',
@@ -632,6 +756,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
'repetitions',
'recommended.'],
['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
+ ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
]
output_lines_with_header = print_difference_report(
self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
@@ -643,6 +768,53 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
parts = [x for x in output_lines[i].split(' ') if x]
self.assertEqual(expect_lines[i], parts)
+ def test_json_diff_report_pretty_printing_aggregates_only(self):
+ expect_lines = [
+ ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
+ ['BM_Two_pvalue',
+ '1.0000',
+ '0.6667',
+ 'U',
+ 'Test,',
+ 'Repetitions:',
+ '2',
+ 'vs',
+ '2.',
+ 'WARNING:',
+ 'Results',
+ 'unreliable!',
+ '9+',
+ 'repetitions',
+ 'recommended.'],
+ ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
+ ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
+ ['short_pvalue',
+ '0.7671',
+ '0.2000',
+ 'U',
+ 'Test,',
+ 'Repetitions:',
+ '2',
+ 'vs',
+ '3.',
+ 'WARNING:',
+ 'Results',
+ 'unreliable!',
+ '9+',
+ 'repetitions',
+ 'recommended.'],
+ ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
+ ]
+ output_lines_with_header = print_difference_report(
+ self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
+ output_lines = output_lines_with_header[2:]
+ print("\n")
+ print("\n".join(output_lines_with_header))
+ self.assertEqual(len(output_lines), len(expect_lines))
+ for i in range(0, len(output_lines)):
+ parts = [x for x in output_lines[i].split(' ') if x]
+ self.assertEqual(expect_lines[i], parts)
+
def test_json_diff_report(self):
expected_output = [
{
@@ -672,7 +844,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
],
'time_unit': 'ns',
'utest': {
- 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
}
},
{
@@ -693,7 +865,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
],
'time_unit': 'ns',
'utest': {
- 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
}
},
{
@@ -708,6 +880,16 @@ class TestReportDifferenceWithUTest(unittest.TestCase):
],
'time_unit': 'ns',
'utest': {}
+ },
+ {
+ 'name': 'OVERALL_GEOMEAN',
+ 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
+ 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
+ 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
+ 'time_unit': 's',
+ 'run_type': 'aggregate',
+ 'aggregate_name': 'geomean',
+ 'utest': {}
}
]
self.assertEqual(len(self.json_diff_report), len(expected_output))
@@ -747,8 +929,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
['BM_Two_pvalue',
- '0.6985',
- '0.6985',
+ '1.0000',
+ '0.6667',
'U',
'Test,',
'Repetitions:',
@@ -765,7 +947,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
['short_pvalue',
'0.7671',
- '0.1489',
+ '0.2000',
'U',
'Test,',
'Repetitions:',
@@ -778,7 +960,8 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
'9+',
'repetitions',
'recommended.'],
- ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
+ ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
+ ['OVERALL_GEOMEAN', '+1.6405', '-0.6985', '0', '0', '0', '0']
]
output_lines_with_header = print_difference_report(
self.json_diff_report,
@@ -820,7 +1003,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
],
'time_unit': 'ns',
'utest': {
- 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
}
},
{
@@ -841,7 +1024,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
],
'time_unit': 'ns',
'utest': {
- 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
}
},
{
@@ -853,11 +1036,83 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
'real_time': 8,
'cpu_time_other': 53,
'cpu': -0.3375
- }
+ }
],
'utest': {},
'time_unit': u'ns',
'aggregate_name': ''
+ },
+ {
+ 'name': 'OVERALL_GEOMEAN',
+ 'measurements': [{'real_time': 8.48528137423858e-09, 'cpu_time': 8.441336246629233e-08,
+ 'real_time_other': 2.2405267593145244e-08, 'cpu_time_other': 2.5453661413660466e-08,
+ 'time': 1.6404861082353634, 'cpu': -0.6984640740519662}],
+ 'time_unit': 's',
+ 'run_type': 'aggregate',
+ 'aggregate_name': 'geomean',
+ 'utest': {}
+ }
+ ]
+ self.assertEqual(len(self.json_diff_report), len(expected_output))
+ for out, expected in zip(
+ self.json_diff_report, expected_output):
+ self.assertEqual(out['name'], expected['name'])
+ self.assertEqual(out['time_unit'], expected['time_unit'])
+ assert_utest(self, out, expected)
+ assert_measurements(self, out, expected)
+
+
+class TestReportDifferenceForPercentageAggregates(
+ unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ def load_results():
+ import json
+ testInputs = os.path.join(
+ os.path.dirname(
+ os.path.realpath(__file__)),
+ 'Inputs')
+ testOutput1 = os.path.join(testInputs, 'test4_run0.json')
+ testOutput2 = os.path.join(testInputs, 'test4_run1.json')
+ with open(testOutput1, 'r') as f:
+ json1 = json.load(f)
+ with open(testOutput2, 'r') as f:
+ json2 = json.load(f)
+ return json1, json2
+
+ json1, json2 = load_results()
+ cls.json_diff_report = get_difference_report(
+ json1, json2, utest=True)
+
+ def test_json_diff_report_pretty_printing(self):
+ expect_lines = [
+ ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
+ ]
+ output_lines_with_header = print_difference_report(
+ self.json_diff_report,
+ utest=True, utest_alpha=0.05, use_color=False)
+ output_lines = output_lines_with_header[2:]
+ print("\n")
+ print("\n".join(output_lines_with_header))
+ self.assertEqual(len(output_lines), len(expect_lines))
+ for i in range(0, len(output_lines)):
+ parts = [x for x in output_lines[i].split(' ') if x]
+ self.assertEqual(expect_lines[i], parts)
+
+ def test_json_diff_report(self):
+ expected_output = [
+ {
+ 'name': u'whocares',
+ 'measurements': [
+ {'time': -0.5,
+ 'cpu': 0.5,
+ 'real_time': 0.01,
+ 'real_time_other': 0.005,
+ 'cpu_time': 0.10,
+ 'cpu_time_other': 0.15}
+ ],
+ 'time_unit': 'ns',
+ 'utest': {}
}
]
self.assertEqual(len(self.json_diff_report), len(expected_output))
@@ -869,6 +1124,49 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
assert_measurements(self, out, expected)
+class TestReportSorting(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ def load_result():
+ import json
+ testInputs = os.path.join(
+ os.path.dirname(
+ os.path.realpath(__file__)),
+ 'Inputs')
+ testOutput = os.path.join(testInputs, 'test4_run.json')
+ with open(testOutput, 'r') as f:
+ json = json.load(f)
+ return json
+
+ cls.json = load_result()
+
+ def test_json_diff_report_pretty_printing(self):
+ import util
+
+ expected_names = [
+ "99 family 0 instance 0 repetition 0",
+ "98 family 0 instance 0 repetition 1",
+ "97 family 0 instance 0 aggregate",
+ "96 family 0 instance 1 repetition 0",
+ "95 family 0 instance 1 repetition 1",
+ "94 family 0 instance 1 aggregate",
+ "93 family 1 instance 0 repetition 0",
+ "92 family 1 instance 0 repetition 1",
+ "91 family 1 instance 0 aggregate",
+ "90 family 1 instance 1 repetition 0",
+ "89 family 1 instance 1 repetition 1",
+ "88 family 1 instance 1 aggregate"
+ ]
+
+ for n in range(len(self.json['benchmarks']) ** 2):
+ random.shuffle(self.json['benchmarks'])
+ sorted_benchmarks = util.sort_benchmark_results(self.json)[
+ 'benchmarks']
+ self.assertEqual(len(expected_names), len(sorted_benchmarks))
+ for out, expected in zip(sorted_benchmarks, expected_names):
+ self.assertEqual(out['name'], expected)
+
+
def assert_utest(unittest_instance, lhs, rhs):
if lhs['utest']:
unittest_instance.assertAlmostEqual(
diff --git a/tools/gbench/util.py b/tools/gbench/util.py
index 661c4ba..5e79da8 100644
--- a/tools/gbench/util.py
+++ b/tools/gbench/util.py
@@ -2,9 +2,11 @@
"""
import json
import os
-import tempfile
+import re
import subprocess
import sys
+import tempfile
+
# Input file type enumeration
IT_Invalid = 0
@@ -57,7 +59,7 @@ def classify_input_file(filename):
"""
Return a tuple (type, msg) where 'type' specifies the classified type
of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
- string represeting the error.
+ string representing the error.
"""
ftype = IT_Invalid
err_msg = None
@@ -110,13 +112,49 @@ def remove_benchmark_flags(prefix, benchmark_flags):
return [f for f in benchmark_flags if not f.startswith(prefix)]
-def load_benchmark_results(fname):
+def load_benchmark_results(fname, benchmark_filter):
"""
Read benchmark output from a file and return the JSON object.
+
+ Apply benchmark_filter, a regular expression, with nearly the same
+ semantics of the --benchmark_filter argument. May be None.
+ Note: the Python regular expression engine is used instead of the
+ one used by the C++ code, which may produce different results
+ in complex cases.
+
REQUIRES: 'fname' names a file containing JSON benchmark output.
"""
+ def benchmark_wanted(benchmark):
+ if benchmark_filter is None:
+ return True
+ name = benchmark.get('run_name', None) or benchmark['name']
+ if re.search(benchmark_filter, name):
+ return True
+ return False
+
with open(fname, 'r') as f:
- return json.load(f)
+ results = json.load(f)
+ if 'benchmarks' in results:
+ results['benchmarks'] = list(filter(benchmark_wanted,
+ results['benchmarks']))
+ return results
+
+
+def sort_benchmark_results(result):
+ benchmarks = result['benchmarks']
+
+ # From inner key to the outer key!
+ benchmarks = sorted(
+ benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1)
+ benchmarks = sorted(
+ benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0)
+ benchmarks = sorted(
+ benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1)
+ benchmarks = sorted(
+ benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1)
+
+ result['benchmarks'] = benchmarks
+ return result
def run_benchmark(exe_name, benchmark_flags):
@@ -142,7 +180,7 @@ def run_benchmark(exe_name, benchmark_flags):
if exitCode != 0:
print('TEST FAILED...')
sys.exit(exitCode)
- json_res = load_benchmark_results(output_name)
+ json_res = load_benchmark_results(output_name, None)
if is_temp_output:
os.unlink(output_name)
return json_res
@@ -157,7 +195,9 @@ def run_or_load_benchmark(filename, benchmark_flags):
"""
ftype = check_input_file(filename)
if ftype == IT_JSON:
- return load_benchmark_results(filename)
+ benchmark_filter = find_benchmark_flag('--benchmark_filter=',
+ benchmark_flags)
+ return load_benchmark_results(filename, benchmark_filter)
if ftype == IT_Executable:
return run_benchmark(filename, benchmark_flags)
raise ValueError('Unknown file type %s' % ftype)
diff --git a/tools/libpfm.BUILD.bazel b/tools/libpfm.BUILD.bazel
new file mode 100644
index 0000000..6269534
--- /dev/null
+++ b/tools/libpfm.BUILD.bazel
@@ -0,0 +1,22 @@
+# Build rule for libpfm, which is required to collect performance counters for
+# BENCHMARK_ENABLE_LIBPFM builds.
+
+load("@rules_foreign_cc//foreign_cc:defs.bzl", "make")
+
+filegroup(
+ name = "pfm_srcs",
+ srcs = glob(["**"]),
+)
+
+make(
+ name = "libpfm",
+ lib_source = ":pfm_srcs",
+ lib_name = "libpfm",
+ copts = [
+ "-Wno-format-truncation",
+ "-Wno-use-after-free",
+ ],
+ visibility = [
+ "//visibility:public",
+ ],
+)
diff --git a/tools/requirements.txt b/tools/requirements.txt
index 3b3331b..f32f35b 100644
--- a/tools/requirements.txt
+++ b/tools/requirements.txt
@@ -1 +1,2 @@
-scipy>=1.5.0 \ No newline at end of file
+numpy == 1.25
+scipy == 1.10.0
diff --git a/tools/strip_asm.py b/tools/strip_asm.py
index 9030550..d131dc7 100755
--- a/tools/strip_asm.py
+++ b/tools/strip_asm.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
"""
strip_asm.py - Cleanup ASM output for the specified file