diff options
author | Sven <sven@sven-johannsen.de> | 2016-07-15 21:24:25 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2016-07-15 21:24:25 +0000 |
commit | db1632fadc79df388a2a7248157c965ee40248a1 (patch) | |
tree | 7bf4634a4e8722e41f0af1bedcfde6072c3c23f9 | |
parent | a7aad22e27abd91b7c88b99bad057d3b72e11ac1 (diff) | |
parent | 01950b6483ba3d71ecff360d11ed83abaea42187 (diff) | |
download | google-benchmark-emu-3.0-release.tar.gz |
Merge remote-tracking branch \'aosp/upstream-master\' into mymergeandroid-n-mr1-preview-2android-n-mr1-preview-1emu-master-qemu-releaseemu-32-releaseemu-32-devemu-31-stable-releaseemu-31-releaseemu-30-releaseemu-3.1-releaseemu-3.0-releaseemu-29.0-releaseemu-2.8-releaseemu-2.7-releaseemu-2.6-releaseemu-2.5-releaseemu-2.4-releaseemu-2.3-releaseaosp-emu-30-release
am: 01950b6483
Change-Id: Id316aa08e6fe20ffbdc32ecf3c69f9010e6b32c8
35 files changed, 2456 insertions, 410 deletions
diff --git a/.travis.yml b/.travis.yml index 8b138ce..bf26395 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,3 +39,5 @@ after_success: - if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .; fi + +sudo: required @@ -8,6 +8,7 @@ # # Please keep the list sorted. +Albert Pretorius <pretoalb@gmail.com> Arne Beer <arne@twobeer.de> Christopher Seymour <chris.j.seymour@hotmail.com> David Coeurjolly <david.coeurjolly@liris.cnrs.fr> @@ -16,7 +17,9 @@ Eugene Zhuk <eugene.zhuk@gmail.com> Evgeny Safronov <division494@gmail.com> Felix Homann <linuxaudio@showlabor.de> Google Inc. +Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com> JianXiong Zhou <zhoujianxiong2@gmail.com> +Jussi Knuuttila <jussi.knuuttila@gmail.com> Kaito Udagawa <umireon@gmail.com> Lei Xu <eddyxu@gmail.com> Matt Clarkson <mattyclarkson@gmail.com> @@ -32,6 +32,7 @@ cc_library_static { "src/benchmark.cc", "src/colorprint.cc", "src/commandlineflags.cc", + "src/complexity.cc", "src/console_reporter.cc", "src/csv_reporter.cc", "src/json_reporter.cc", diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c72252..a1251e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ project (benchmark) foreach(p CMP0054 # CMake 3.1 + CMP0056 # export EXE_LINKER_FLAGS to try_run ) if(POLICY ${p}) cmake_policy(SET ${p} NEW) @@ -33,15 +34,29 @@ include(CXXFeatureCheck) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # Turn compiler warnings up to 11 - add_cxx_compiler_flag(-W4) + string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") add_definitions(-D_CRT_SECURE_NO_WARNINGS) # Link time optimisation if (BENCHMARK_ENABLE_LTO) - set(CMAKE_CXX_FLAGS_RELEASE "/GL") - set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "/LTCG") - set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "/LTCG") - set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/LTCG") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GL") + set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG") + + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /GL") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + string(REGEX REPLACE "[-/]INCREMENTAL" "/INCREMENTAL:NO" CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO}") + set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") + + set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /GL") + set(CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL "${CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL} /LTCG") + set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "${CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL} /LTCG") + set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG") endif() else() # Try and enable C++11. Don't use C++14 because it doesn't work in some @@ -57,6 +72,8 @@ else() add_cxx_compiler_flag(-Wextra) add_cxx_compiler_flag(-Wshadow) add_cxx_compiler_flag(-Werror RELEASE) + add_cxx_compiler_flag(-Werror RELWITHDEBINFO) + add_cxx_compiler_flag(-Werror MINSIZEREL) add_cxx_compiler_flag(-pedantic) add_cxx_compiler_flag(-pedantic-errors) add_cxx_compiler_flag(-Wshorten-64-to-32) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ed55bcf..4bff126 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -22,16 +22,22 @@ # # Please keep the list sorted. +Albert Pretorius <pretoalb@gmail.com> Arne Beer <arne@twobeer.de> +Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com> Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com> Christopher Seymour <chris.j.seymour@hotmail.com> David Coeurjolly <david.coeurjolly@liris.cnrs.fr> Dominic Hamon <dma@stripysock.com> +Eric Fiselier <eric@efcs.ca> Eugene Zhuk <eugene.zhuk@gmail.com> Evgeny Safronov <division494@gmail.com> Felix Homann <linuxaudio@showlabor.de> +Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com> JianXiong Zhou <zhoujianxiong2@gmail.com> +Jussi Knuuttila <jussi.knuuttila@gmail.com> Kaito Udagawa <umireon@gmail.com> +Kai Wolf <kai.wolf@gmail.com> Lei Xu <eddyxu@gmail.com> Matt Clarkson <mattyclarkson@gmail.com> Oleksandr Sochka <sasha.sochka@gmail.com> @@ -1,5 +1,4 @@ -benchmark -========= +# benchmark [![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) [![Build status](https://ci.appveyor.com/api/projects/status/u0qsyp7t1tk7cpxs/branch/master?svg=true)](https://ci.appveyor.com/project/google/benchmark/branch/master) [![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) @@ -10,10 +9,9 @@ Discussion group: https://groups.google.com/d/forum/benchmark-discuss IRC channel: https://freenode.net #googlebenchmark -Example usage -------------- -Define a function that executes the code to be measured a -specified number of times: +## Example usage +### Basic usage +Define a function that executes the code to be measured. ```c++ static void BM_StringCreation(benchmark::State& state) { @@ -34,15 +32,16 @@ BENCHMARK(BM_StringCopy); BENCHMARK_MAIN(); ``` -Sometimes a family of microbenchmarks can be implemented with -just one routine that takes an extra argument to specify which -one of the family of benchmarks to run. For example, the following -code defines a family of microbenchmarks for measuring the speed -of `memcpy()` calls of different lengths: +### Passing arguments +Sometimes a family of benchmarks can be implemented with just one routine that +takes an extra argument to specify which one of the family of benchmarks to +run. For example, the following code defines a family of benchmarks for +measuring the speed of `memcpy()` calls of different lengths: ```c++ static void BM_memcpy(benchmark::State& state) { - char* src = new char[state.range_x()]; char* dst = new char[state.range_x()]; + char* src = new char[state.range_x()]; + char* dst = new char[state.range_x()]; memset(src, 'x', state.range_x()); while (state.KeepRunning()) memcpy(dst, src, state.range_x()); @@ -54,18 +53,26 @@ static void BM_memcpy(benchmark::State& state) { BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10); ``` -The preceding code is quite repetitive, and can be replaced with the -following short-hand. The following invocation will pick a few -appropriate arguments in the specified range and will generate a -microbenchmark for each such argument. +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following invocation will pick a few appropriate arguments in +the specified range and will generate a benchmark for each such argument. ```c++ BENCHMARK(BM_memcpy)->Range(8, 8<<10); ``` -You might have a microbenchmark that depends on two inputs. For -example, the following code defines a family of microbenchmarks for -measuring the speed of set insertion. +By default the arguments in the range are generated in multiples of eight and +the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the +range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + +You might have a benchmark that depends on two inputs. For example, the +following code defines a family of benchmarks for measuring the speed of set +insertion. ```c++ static void BM_SetInsert(benchmark::State& state) { @@ -88,19 +95,18 @@ BENCHMARK(BM_SetInsert) ->ArgPair(8<<10, 512); ``` -The preceding code is quite repetitive, and can be replaced with -the following short-hand. The following macro will pick a few -appropriate arguments in the product of the two specified ranges -and will generate a microbenchmark for each such pair. +The preceding code is quite repetitive, and can be replaced with the following +short-hand. The following macro will pick a few appropriate arguments in the +product of the two specified ranges and will generate a benchmark for each such +pair. ```c++ BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512); ``` -For more complex patterns of inputs, passing a custom function -to Apply allows programmatic specification of an -arbitrary set of arguments to run the microbenchmark on. -The following example enumerates a dense range on one parameter, +For more complex patterns of inputs, passing a custom function to `Apply` allows +programmatic specification of an arbitrary set of arguments on which to run the +benchmark. The following example enumerates a dense range on one parameter, and a sparse range on the second. ```c++ @@ -112,9 +118,44 @@ static void CustomArguments(benchmark::internal::Benchmark* b) { BENCHMARK(BM_SetInsert)->Apply(CustomArguments); ``` -Templated microbenchmarks work the same way: -Produce then consume 'size' messages 'iters' times -Measures throughput in the absence of multiprogramming. +### Calculate asymptotic complexity (Big O) +Asymptotic complexity might be calculated for a family of benchmarks. The +following code will calculate the coefficient for the high-order term in the +running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + while (state.KeepRunning()) { + benchmark::DoNotOptimize(s1.compare(s2)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be +calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(); +``` + +The following code will specify asymptotic complexity with a lambda function, +that might be used to customize high-order term calculation. + +```c++ +BENCHMARK(BM_StringCompare)->RangeMultiplier(2) + ->Range(1<<10, 1<<18)->Complexity([](int n)->double{return n; }); +``` + +### Templated benchmarks +Templated benchmarks work the same way: This example produces and consumes +messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the +absence of multiprogramming. ```c++ template <class Q> int BM_Sequential(benchmark::State& state) { @@ -145,11 +186,32 @@ Three macros are provided for adding benchmark templates. #define BENCHMARK_TEMPLATE2(func, arg1, arg2) ``` +## Passing arbitrary arguments to a benchmark +In C++11 it is possible to define a benchmark that takes an arbitrary number +of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)` +macro creates a benchmark that invokes `func` with the `benchmark::State` as +the first argument followed by the specified `args...`. +The `test_case_name` is appended to the name of the benchmark and +should describe the values passed. + +```c++ +template <class ...ExtraArgs>` +void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { + [...] +} +// Registers a benchmark named "BM_takes_args/int_string_test` that passes +// the specified values to `extra_args`. +BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +``` +Note that elements of `...args` may refer to global variables. Users should +avoid modifying global state inside of a benchmark. + +### Multithreaded benchmarks In a multithreaded test (benchmark invoked by multiple threads simultaneously), it is guaranteed that none of the threads will start until all have called -KeepRunning, and all will have finished before KeepRunning returns false. As -such, any global setup or teardown you want to do can be -wrapped in a check against the thread index: +`KeepRunning`, and all will have finished before KeepRunning returns false. As +such, any global setup or teardown can be wrapped in a check against the thread +index: ```c++ static void BM_MultiThreaded(benchmark::State& state) { @@ -176,8 +238,49 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); Without `UseRealTime`, CPU time is used by default. + +## Manual timing +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the `KeepRunning` loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range_x(); + std::chrono::duration<double, std::micro> sleep_duration { + static_cast<double>(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast<std::chrono::duration<double>>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + +### Preventing optimisation To prevent a value or expression from being optimized away by the compiler -the `benchmark::DoNotOptimize(...)` function can be used. +the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()` +functions can be used. ```c++ static void BM_test(benchmark::State& state) { @@ -190,8 +293,77 @@ static void BM_test(benchmark::State& state) { } ``` -Benchmark Fixtures ------------------- +`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either +memory or a register. For GNU based compilers it acts as read/write barrier +for global memory. More specifically it forces the compiler to flush pending +writes to memory and reload any other values as necessary. + +Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>` +in any way. `<expr>` may even be removed entirely when the result is already +known. For example: + +```c++ + /* Example 1: `<expr>` is removed entirely. */ + int foo(int x) { return x + 42; } + while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42); + + /* Example 2: Result of '<expr>' is only reused */ + int bar(int) __attribute__((const)); + while (...) DoNotOptimize(bar(0)); // Optimized to: + // int __result__ = bar(0); + // while (...) DoNotOptimize(__result__); +``` + +The second tool for preventing optimizations is `ClobberMemory()`. In essence +`ClobberMemory()` forces the compiler to perform all pending writes to global +memory. Memory managed by block scope objects must be "escaped" using +`DoNotOptimize(...)` before it can be clobbered. In the below example +`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized +away. + +```c++ +static void BM_vector_push_back(benchmark::State& state) { + while (state.KeepRunning()) { + std::vector<int> v; + v.reserve(1); + benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered. + v.push_back(42); + benchmark::ClobberMemory(); // Force 42 to be written to memory. + } +} +``` + +Note that `ClobberMemory()` is only available for GNU based compilers. + +### Set time unit manually +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +```c++ +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); +``` + +## Controlling number of iterations +In all cases, the number of iterations for which the benchmark is run is +governed by the amount of time the benchmark takes. Concretely, the number of +iterations is at least one, not more than 1e9, until CPU time is greater than +the minimum time, or the wallclock time is 5x minimum time. The minimum time is +set as a flag `--benchmark_min_time` or per benchmark by calling `MinTime` on +the registered benchmark object. + +## Reporting the mean and standard devation by repeated benchmarks +By default each benchmark is run once and that single result is reported. +However benchmarks are often noisy and a single result may not be representative +of the overall behavior. For this reason it's possible to repeatedly rerun the +benchmark. + +The number of runs of each benchmark is specified globally by the +`--benchmark_repetitions` flag or on a per benchmark basis by calling +`Repetitions` on the registered benchmark object. When a benchmark is run +more than once the mean and standard deviation of the runs will be reported. + +## Fixtures Fixture tests are created by first defining a type that derives from ::benchmark::Fixture and then creating/registering the tests using the following macros: @@ -221,10 +393,41 @@ BENCHMARK_REGISTER_F(MyFixture, BarTest)->Threads(2); /* BarTest is now registered */ ``` -Output Formats --------------- +## Exiting Benchmarks in Error + +When errors caused by external influences, such as file I/O and network +communication, occur within a benchmark the +`State::SkipWithError(const char* msg)` function can be used to skip that run +of benchmark and report the error. Note that only future iterations of the +`KeepRunning()` are skipped. Users may explicitly return to exit the +benchmark immediately. + +The `SkipWithError(...)` function may be used at any point within the benchmark, +including before and after the `KeepRunning()` loop. + +For example: + +```c++ +static void BM_test(benchmark::State& state) { + auto resource = GetResource(); + if (!resource.good()) { + state.SkipWithError("Resource is not good!"); + // KeepRunning() loop will not be entered. + } + while (state.KeepRunning()) { + auto data = resource.read_data(); + if (!resource.good()) { + state.SkipWithError("Failed to read data!"); + break; // Needed to skip the rest of the iteration. + } + do_stuff(data); + } +} +``` + +## Output Formats The library supports multiple output formats. Use the -`--benchmark_format=<tabular|json>` flag to set the format type. `tabular` is +`--benchmark_format=<tabular|json|csv>` flag to set the format type. `tabular` is the default format. The Tabular format is intended to be a human readable format. By default @@ -290,8 +493,7 @@ name,iterations,real_time,cpu_time,bytes_per_second,items_per_second,label "BM_SetInsert/1024/10",106365,17238.4,8421.53,4.74973e+06,1.18743e+06, ``` -Debug vs Release ----------------- +## Debug vs Release By default, benchmark builds as a debug library. You will see a warning in the output when this is the case. To build it as a release library instead, use: ``` @@ -304,6 +506,5 @@ To enable link-time optimisation, use cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_LTO=true ``` -Linking against the library ---------------------------- +## Linking against the library When using gcc, it is necessary to link against pthread to avoid runtime exceptions. This is due to how gcc implements std::thread. See [issue #67](https://github.com/google/benchmark/issues/67) for more details. diff --git a/cmake/CXXFeatureCheck.cmake b/cmake/CXXFeatureCheck.cmake index 23ee8ac..3059024 100644 --- a/cmake/CXXFeatureCheck.cmake +++ b/cmake/CXXFeatureCheck.cmake @@ -21,12 +21,15 @@ function(cxx_feature_check FILE) string(TOLOWER ${FILE} FILE) string(TOUPPER ${FILE} VAR) string(TOUPPER "HAVE_${VAR}" FEATURE) + if (DEFINED HAVE_${VAR}) + return() + endif() message("-- Performing Test ${FEATURE}") try_run(RUN_${FEATURE} COMPILE_${FEATURE} ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp) if(RUN_${FEATURE} EQUAL 0) message("-- Performing Test ${FEATURE} -- success") - set(HAVE_${VAR} 1 PARENT_SCOPE) + set(HAVE_${VAR} 1 CACHE INTERNAL "Feature test for ${FILE}" PARENT_SCOPE) add_definitions(-DHAVE_${VAR}) else() if(NOT COMPILE_${FEATURE}) diff --git a/cmake/posix_regex.cpp b/cmake/posix_regex.cpp index a31af80..466dc62 100644 --- a/cmake/posix_regex.cpp +++ b/cmake/posix_regex.cpp @@ -7,6 +7,8 @@ int main() { if (ec != 0) { return ec; } - return regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; + int ret = regexec(&re, str.c_str(), 0, nullptr, 0) ? -1 : 0; + regfree(&re); + return ret; } diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 0c5115d..664ca2a 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -137,6 +137,13 @@ static void BM_MultiThreaded(benchmark::State& state) { } } BENCHMARK(BM_MultiThreaded)->Threads(4); + + +If a benchmark runs a few milliseconds it may be hard to visually compare the +measured times, since the output data is given in nanoseconds per default. In +order to manually set the time unit, you can specify it manually: + +BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); */ #ifndef BENCHMARK_BENCHMARK_API_H_ @@ -153,10 +160,17 @@ class BenchmarkReporter; void Initialize(int* argc, char** argv); -// Otherwise, run all benchmarks specified by the --benchmark_filter flag, -// and exit after running the benchmarks. -void RunSpecifiedBenchmarks(); -void RunSpecifiedBenchmarks(BenchmarkReporter* reporter); +// Generate a list of benchmarks matching the specified --benchmark_filter flag +// and if --benchmark_list_tests is specified return after printing the name +// of each matching benchmark. Otherwise run each matching benchmark and +// report the results. +// +// The second overload reports the results using the specified 'reporter'. +// +// RETURNS: The number of matching benchmarks. +size_t RunSpecifiedBenchmarks(); +size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter); + // If this routine is called, peak memory allocation past this point in the // benchmark is reported at the end of the benchmark report line. (It is @@ -193,61 +207,90 @@ Benchmark* RegisterBenchmarkInternal(Benchmark*); // The DoNotOptimize(...) function can be used to prevent a value or // expression from being optimized away by the compiler. This function is -// intented to add little to no overhead. -// See: http://stackoverflow.com/questions/28287064 -#if defined(__clang__) && defined(__GNUC__) -// TODO(ericwf): Clang has a bug where it tries to always use a register -// even if value must be stored in memory. This causes codegen to fail. -// To work around this we remove the "r" modifier so the operand is always -// loaded into memory. +// intended to add little to no overhead. +// See: https://youtu.be/nXaxk27zwlk?t=2441 +#if defined(__GNUC__) template <class Tp> inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - asm volatile("" : "+m" (const_cast<Tp&>(value))); + asm volatile("" : : "g"(value) : "memory"); } -#elif defined(__GNUC__) -template <class Tp> -inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - asm volatile("" : "+rm" (const_cast<Tp&>(value))); +// Force the compiler to flush pending writes to global memory. Acts as an +// effective read/write barrier +inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { + asm volatile("" : : : "memory"); } #else template <class Tp> inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value)); } +// FIXME Add ClobberMemory() for non-gnu compilers #endif +// TimeUnit is passed to a benchmark in order to specify the order of magnitude +// for the measured time. +enum TimeUnit { + kNanosecond, + kMicrosecond, + kMillisecond +}; + +// BigO is passed to a benchmark in order to specify the asymptotic computational +// complexity for the benchmark. In case oAuto is selected, complexity will be +// calculated automatically to the best fit. +enum BigO { + oNone, + o1, + oN, + oNSquared, + oNCubed, + oLogN, + oNLogN, + oAuto, + oLambda +}; + +// BigOFunc is passed to a benchmark in order to specify the asymptotic +// computational complexity for the benchmark. +typedef double(BigOFunc)(int); // State is passed to a running Benchmark and contains state for the // benchmark to use. class State { public: - State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads); + State(size_t max_iters, bool has_x, int x, bool has_y, int y, + int thread_i, int n_threads); - // Returns true iff the benchmark should continue through another iteration. + // Returns true if the benchmark should continue through another iteration. // NOTE: A benchmark may not return from the test until KeepRunning() has // returned false. bool KeepRunning() { if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { - ResumeTiming(); - started_ = true; + assert(!finished_); + started_ = true; + ResumeTiming(); } bool const res = total_iterations_++ < max_iterations; if (BENCHMARK_BUILTIN_EXPECT(!res, false)) { - assert(started_); + assert(started_ && (!finished_ || error_occurred_)); + if (!error_occurred_) { PauseTiming(); - // Total iterations now is one greater than max iterations. Fix this. - total_iterations_ = max_iterations; + } + // Total iterations now is one greater than max iterations. Fix this. + total_iterations_ = max_iterations; + finished_ = true; } return res; } - // REQUIRES: timer is running + // REQUIRES: timer is running and 'SkipWithError(...)' has not been called + // by the current thread. // Stop the benchmark timer. If not called, the timer will be // automatically stopped after KeepRunning() returns false for the first time. // // For threaded benchmarks the PauseTiming() function acts // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all threads have made their ith call. + // function will block until all active threads have made their ith call. // The timer will stop when the last thread has called this function. // // NOTE: PauseTiming()/ResumeTiming() are relatively @@ -255,13 +298,14 @@ public: // within each benchmark iteration, if possible. void PauseTiming(); - // REQUIRES: timer is not running + // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called + // by the current thread. // Start the benchmark timer. The timer is NOT running on entrance to the // benchmark function. It begins running after the first call to KeepRunning() // // For threaded benchmarks the ResumeTiming() function acts // like a barrier. I.e., the ith call by a particular thread to this - // function will block until all threads have made their ith call. + // function will block until all active threads have made their ith call. // The timer will start when the last thread has called this function. // // NOTE: PauseTiming()/ResumeTiming() are relatively @@ -269,6 +313,34 @@ public: // within each benchmark iteration, if possible. void ResumeTiming(); + // REQUIRES: 'SkipWithError(...)' has not been called previously by the + // current thread. + // Skip any future iterations of the 'KeepRunning()' loop in the current + // thread and report an error with the specified 'msg'. After this call + // the user may explicitly 'return' from the benchmark. + // + // For threaded benchmarks only the current thread stops executing. If + // multiple threads report an error only the first error message is used. + // The current thread is no longer considered 'active' by + // 'PauseTiming()' and 'ResumingTiming()'. + // + // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit + // the current scope immediately. If the function is called from within + // the 'KeepRunning()' loop the current iteration will finish. It is the users + // responsibility to exit the scope as needed. + void SkipWithError(const char* msg); + + // REQUIRES: called exactly once per iteration of the KeepRunning loop. + // Set the manually measured time for this benchmark iteration, which + // is used instead of automatically measured time if UseManualTime() was + // specified. + // + // For threaded benchmarks the SetIterationTime() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all threads have made their ith call. + // The time will be set by the last thread to call this function. + void SetIterationTime(double seconds); + // Set the number of bytes processed by the current benchmark // execution. This routine is typically called once at the end of a // throughput oriented benchmark. If this routine is called with a @@ -286,6 +358,19 @@ public: return bytes_processed_; } + // If this routine is called with complexity_n > 0 and complexity report is requested for the + // family benchmark, then current benchmark will be part of the computation and complexity_n will + // represent the length of N. + BENCHMARK_ALWAYS_INLINE + void SetComplexityN(int complexity_n) { + complexity_n_ = complexity_n; + } + + BENCHMARK_ALWAYS_INLINE + size_t complexity_length_n() { + return complexity_n_; + } + // If this routine is called with items > 0, then an items/s // label is printed on the benchmark report line for the currently // executing benchmark. It is typically called at the end of a processing @@ -305,10 +390,10 @@ public: // If this routine is called, the specified label is printed at the // end of the benchmark report line for the currently executing // benchmark. Example: - // static void BM_Compress(int iters) { + // static void BM_Compress(benchmark::State& state) { // ... // double compress = input_size / output_size; - // benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression)); + // state.SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression)); // } // Produces output that looks like: // BM_Compress 50 50 14115038 compress:27.3% @@ -346,6 +431,7 @@ public: private: bool started_; + bool finished_; size_t total_iterations_; bool has_range_x_; @@ -357,6 +443,11 @@ private: size_t bytes_processed_; size_t items_processed_; + int complexity_n_; + +public: + // FIXME: Make this private somehow. + bool error_occurred_; public: // Index of the executing thread. Values from [0, threads). const int thread_index; @@ -390,6 +481,9 @@ public: // REQUIRES: The function passed to the constructor must accept an arg1. Benchmark* Arg(int x); + // Run this benchmark with the given time unit for the generated output report + Benchmark* Unit(TimeUnit unit); + // Run this benchmark once for a number of values picked from the // range [start..limit]. (start and limit are always picked.) // REQUIRES: The function passed to the constructor must accept an arg1. @@ -416,10 +510,20 @@ public: // Threads, etc. Benchmark* Apply(void (*func)(Benchmark* benchmark)); + // Set the range multiplier for non-dense range. If not called, the range multiplier + // kRangeMultiplier will be used. + Benchmark* RangeMultiplier(int multiplier); + // Set the minimum amount of time to use when running this benchmark. This // option overrides the `benchmark_min_time` flag. + // REQUIRES: `t > 0` Benchmark* MinTime(double t); + // Specify the amount of times to repeat this benchmark. This option overrides + // the `benchmark_repetitions` flag. + // REQUIRES: `n > 0` + Benchmark* Repetitions(int n); + // If a particular benchmark is I/O bound, runs multiple threads internally or // if for some reason CPU timings are not representative, call this method. If // called, the elapsed time will be used to control how many iterations are @@ -427,6 +531,21 @@ public: // called, the cpu time used by the benchmark will be used. Benchmark* UseRealTime(); + // If a benchmark must measure time manually (e.g. if GPU execution time is being + // measured), call this method. If called, each benchmark iteration should call + // SetIterationTime(seconds) to report the measured time, which will be used + // to control how many iterations are run, and in the printing of items/second + // or MB/second values. + Benchmark* UseManualTime(); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigO complexity = benchmark::oAuto); + + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigOFunc* complexity); + // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling // of some piece of code. @@ -491,11 +610,11 @@ public: virtual void Run(State& st) { this->SetUp(st); this->BenchmarkCase(st); - this->TearDown(); + this->TearDown(st); } virtual void SetUp(const State&) {} - virtual void TearDown() {} + virtual void TearDown(const State&) {} protected: virtual void BenchmarkCase(State&) = 0; @@ -534,10 +653,33 @@ protected: // Old-style macros #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a)) #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2)) +#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t)) #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi)) #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \ BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2)) +#if __cplusplus >= 201103L + +// Register a benchmark which invokes the function specified by `func` +// with the additional arguments specified by `...`. +// +// For example: +// +// template <class ...ExtraArgs>` +// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) { +// [...] +//} +// /* Registers a benchmark named "BM_takes_args/int_string_test` */ +// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc")); +#define BENCHMARK_CAPTURE(func, test_case_name, ...) \ + BENCHMARK_PRIVATE_DECLARE(func) = \ + (::benchmark::internal::RegisterBenchmarkInternal( \ + new ::benchmark::internal::FunctionBenchmark( \ + #func "/" #test_case_name, \ + [](::benchmark::State& st) { func(st, __VA_ARGS__); }))) + +#endif // __cplusplus >= 11 + // This will register a benchmark for a templatized function. For example: // // template<int arg> diff --git a/include/benchmark/macros.h b/include/benchmark/macros.h index 3e9540e..09d13c1 100644 --- a/include/benchmark/macros.h +++ b/include/benchmark/macros.h @@ -28,15 +28,23 @@ # define BENCHMARK_UNUSED __attribute__((unused)) # define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) # define BENCHMARK_NOEXCEPT noexcept +# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) #elif defined(_MSC_VER) && !defined(__clang__) # define BENCHMARK_UNUSED # define BENCHMARK_ALWAYS_INLINE __forceinline -# define BENCHMARK_NOEXCEPT +# if _MSC_VER >= 1900 +# define BENCHMARK_NOEXCEPT noexcept +# define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) +# else +# define BENCHMARK_NOEXCEPT +# define BENCHMARK_NOEXCEPT_OP(x) +# endif # define __func__ __FUNCTION__ #else # define BENCHMARK_UNUSED # define BENCHMARK_ALWAYS_INLINE # define BENCHMARK_NOEXCEPT +# define BENCHMARK_NOEXCEPT_OP(x) #endif #if defined(__GNUC__) diff --git a/include/benchmark/reporter.h b/include/benchmark/reporter.h index d23ab65..22c97a0 100644 --- a/include/benchmark/reporter.h +++ b/include/benchmark/reporter.h @@ -14,11 +14,13 @@ #ifndef BENCHMARK_REPORTER_H_ #define BENCHMARK_REPORTER_H_ +#include <cassert> +#include <iosfwd> #include <string> #include <utility> #include <vector> -#include "benchmark_api.h" // For forward declaration of BenchmarkReporter +#include "benchmark_api.h" // For forward declaration of BenchmarkReporter namespace benchmark { @@ -40,27 +42,62 @@ class BenchmarkReporter { struct Run { Run() : + error_occurred(false), iterations(1), + time_unit(kNanosecond), real_accumulated_time(0), cpu_accumulated_time(0), bytes_per_second(0), items_per_second(0), - max_heapbytes_used(0) {} + max_heapbytes_used(0), + complexity(oNone), + complexity_n(0), + report_big_o(false), + report_rms(false) {} std::string benchmark_name; std::string report_label; // Empty if not set by benchmark. + bool error_occurred; + std::string error_message; + int64_t iterations; + TimeUnit time_unit; double real_accumulated_time; double cpu_accumulated_time; + // Return a value representing the real time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedRealTime() const; + + // Return a value representing the cpu time per iteration in the unit + // specified by 'time_unit'. + // NOTE: If 'iterations' is zero the returned value represents the + // accumulated time. + double GetAdjustedCPUTime() const; + // Zero if not set by benchmark. double bytes_per_second; double items_per_second; // This is set to 0.0 if memory tracing is not enabled. double max_heapbytes_used; + + // Keep track of arguments to compute asymptotic complexity + BigO complexity; + BigOFunc* complexity_lambda; + int complexity_n; + + // Inform print function whether the current run is a complexity report + bool report_big_o; + bool report_rms; }; + // Construct a BenchmarkReporter with the output stream set to 'std::cout' + // and the error stream set to 'std::cerr' + BenchmarkReporter(); + // Called once for every suite of benchmarks run. // The parameter "context" contains information that the // reporter may wish to use when generating its report, for example the @@ -70,18 +107,50 @@ class BenchmarkReporter { virtual bool ReportContext(const Context& context) = 0; // Called once for each group of benchmark runs, gives information about - // cpu-time and heap memory usage during the benchmark run. - // Note that all the grouped benchmark runs should refer to the same - // benchmark, thus have the same name. + // cpu-time and heap memory usage during the benchmark run. If the group + // of runs contained more than two entries then 'report' contains additional + // elements representing the mean and standard deviation of those runs. + // Additionally if this group of runs was the last in a family of benchmarks + // 'reports' contains additional entries representing the asymptotic + // complexity and RMS of that benchmark family. virtual void ReportRuns(const std::vector<Run>& report) = 0; // Called once and only once after ever group of benchmarks is run and // reported. - virtual void Finalize(); + virtual void Finalize() {} + + // REQUIRES: The object referenced by 'out' is valid for the lifetime + // of the reporter. + void SetOutputStream(std::ostream* out) { + assert(out); + output_stream_ = out; + } + + // REQUIRES: The object referenced by 'err' is valid for the lifetime + // of the reporter. + void SetErrorStream(std::ostream* err) { + assert(err); + error_stream_ = err; + } + + std::ostream& GetOutputStream() const { + return *output_stream_; + } + + std::ostream& GetErrorStream() const { + return *error_stream_; + } virtual ~BenchmarkReporter(); -protected: - static void ComputeStats(std::vector<Run> const& reports, Run* mean, Run* stddev); + + // Write a human readable string to 'out' representing the specified + // 'context'. + // REQUIRES: 'out' is non-null. + static void PrintBasicContext(std::ostream* out, Context const& context); + + private: + std::ostream* output_stream_; + std::ostream* error_stream_; }; // Simple reporter that outputs benchmark data to the console. This is the @@ -90,33 +159,58 @@ class ConsoleReporter : public BenchmarkReporter { public: virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector<Run>& reports); -protected: + + protected: virtual void PrintRunData(const Run& report); size_t name_field_width_; }; class JSONReporter : public BenchmarkReporter { -public: + public: JSONReporter() : first_report_(true) {} virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector<Run>& reports); virtual void Finalize(); -private: + private: void PrintRunData(const Run& report); bool first_report_; }; class CSVReporter : public BenchmarkReporter { -public: + public: virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector<Run>& reports); -private: + private: void PrintRunData(const Run& report); }; -} // end namespace benchmark -#endif // BENCHMARK_REPORTER_H_ +inline const char* GetTimeUnitString(TimeUnit unit) { + switch (unit) { + case kMillisecond: + return "ms"; + case kMicrosecond: + return "us"; + case kNanosecond: + default: + return "ns"; + } +} + +inline double GetTimeUnitMultiplier(TimeUnit unit) { + switch (unit) { + case kMillisecond: + return 1e3; + case kMicrosecond: + return 1e6; + case kNanosecond: + default: + return 1e9; + } +} + +} // end namespace benchmark +#endif // BENCHMARK_REPORTER_H_ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 811d075..6dab64b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(${PROJECT_SOURCE_DIR}/src) set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" - "sysinfo.cc" "walltime.cc") + "sysinfo.cc" "walltime.cc" "complexity.cc") # Determine the correct regular expression engine to use if(HAVE_STD_REGEX) set(RE_FILES "re_std.cc") diff --git a/src/benchmark.cc b/src/benchmark.cc index 08b180e..cb8e132 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -33,6 +33,7 @@ #include "check.h" #include "commandlineflags.h" +#include "complexity.h" #include "log.h" #include "mutex.h" #include "re.h" @@ -64,9 +65,9 @@ DEFINE_int32(benchmark_repetitions, 1, "The number of runs of each benchmark. If greater than 1, the " "mean and standard deviation of the runs will be reported."); -DEFINE_string(benchmark_format, "tabular", +DEFINE_string(benchmark_format, "console", "The format to use for console output. Valid values are " - "'tabular', 'json', or 'csv'."); + "'console', 'json', or 'csv'."); DEFINE_bool(color_print, true, "Enables colorized logging."); @@ -112,13 +113,24 @@ std::string* GetReportLabel() { return &label; } +// Global variable so that a benchmark can report an error as a human readable +// string. If error_message is null no error occurred. +#if defined(_MSC_VER) && _MSC_VER <= 1800 +typedef char* error_message_type; +#else +typedef const char* error_message_type; +#endif + +static std::atomic<error_message_type> error_message = ATOMIC_VAR_INIT(nullptr); + // TODO(ericwf): support MallocCounter. //static benchmark::MallocCounter *benchmark_mc; struct ThreadStats { - ThreadStats() : bytes_processed(0), items_processed(0) {} + ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {} int64_t bytes_processed; int64_t items_processed; + int complexity_n; }; // Timer management class @@ -126,13 +138,16 @@ class TimerManager { public: TimerManager(int num_threads, Notification* done) : num_threads_(num_threads), + running_threads_(num_threads), done_(done), running_(false), real_time_used_(0), cpu_time_used_(0), + manual_time_used_(0), num_finalized_(0), phase_number_(0), - entered_(0) { + entered_(0) + { } // Called by each thread @@ -170,6 +185,21 @@ class TimerManager { } // Called by each thread + void SetIterationTime(double seconds) EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + manual_time_used_ += seconds; + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + + // Called by each thread void Finalize() EXCLUDES(lock_) { MutexLock l(lock_); num_finalized_++; @@ -180,6 +210,15 @@ class TimerManager { } } + void RemoveErroredThread() EXCLUDES(lock_) { + MutexLock ml(lock_); + int last_thread = --running_threads_ == 0; + if (last_thread && running_) + InternalStop(); + else if (!last_thread) + phase_condition_.notify_all(); + } + // REQUIRES: timer is not running double real_time_used() EXCLUDES(lock_) { MutexLock l(lock_); @@ -194,10 +233,18 @@ class TimerManager { return cpu_time_used_; } + // REQUIRES: timer is not running + double manual_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return manual_time_used_; + } + private: Mutex lock_; Condition phase_condition_; int num_threads_; + int running_threads_; Notification* done_; bool running_; // Is the timer running @@ -207,6 +254,8 @@ class TimerManager { // Accumulated time so far (does not contain current slice if running_) double real_time_used_; double cpu_time_used_; + // Manually set iteration time. User sets this with SetIterationTime(seconds). + double manual_time_used_; // How many threads have called Finalize() int num_finalized_; @@ -227,22 +276,24 @@ class TimerManager { // entered the barrier. Returns iff this is the last thread to // enter the barrier. bool Barrier(MutexLock& ml) REQUIRES(lock_) { - CHECK_LT(entered_, num_threads_); + CHECK_LT(entered_, running_threads_); entered_++; - if (entered_ < num_threads_) { + if (entered_ < running_threads_) { // Wait for all threads to enter int phase_number_cp = phase_number_; auto cb = [this, phase_number_cp]() { - return this->phase_number_ > phase_number_cp; + return this->phase_number_ > phase_number_cp || + entered_ == running_threads_; // A thread has aborted in error }; phase_condition_.wait(ml.native_handle(), cb); - return false; // I was not the last one - } else { - // Last thread has reached the barrier - phase_number_++; - entered_ = 0; - return true; + if (phase_number_ > phase_number_cp) + return false; + // else (running_threads_ == entered_) and we are the last thread. } + // Last thread has reached the barrier + phase_number_++; + entered_ = 0; + return true; } }; @@ -261,7 +312,14 @@ struct Benchmark::Instance { int arg1; bool has_arg2; int arg2; + TimeUnit time_unit; + int range_multiplier; bool use_real_time; + bool use_manual_time; + BigO complexity; + BigOFunc* complexity_lambda; + bool last_benchmark_instance; + int repetitions; double min_time; int threads; // Number of concurrent threads to use bool multithreaded; // Is benchmark multi-threaded? @@ -294,12 +352,18 @@ public: ~BenchmarkImp(); void Arg(int x); + void Unit(TimeUnit unit); void Range(int start, int limit); void DenseRange(int start, int limit); void ArgPair(int start, int limit); void RangePair(int lo1, int hi1, int lo2, int hi2); + void RangeMultiplier(int multiplier); void MinTime(double n); + void Repetitions(int n); void UseRealTime(); + void UseManualTime(); + void Complexity(BigO complexity); + void ComplexityLambda(BigOFunc* complexity); void Threads(int t); void ThreadRange(int min_threads, int max_threads); void ThreadPerCpu(); @@ -313,8 +377,14 @@ private: std::string name_; int arg_count_; std::vector< std::pair<int, int> > args_; // Args for all benchmark runs + TimeUnit time_unit_; + int range_multiplier_; double min_time_; + int repetitions_; bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; std::vector<int> thread_counts_; BenchmarkImp& operator=(BenchmarkImp const&); @@ -372,8 +442,14 @@ bool BenchmarkFamilies::FindBenchmarks( instance.arg1 = args.first; instance.has_arg2 = family->arg_count_ == 2; instance.arg2 = args.second; + instance.time_unit = family->time_unit_; + instance.range_multiplier = family->range_multiplier_; instance.min_time = family->min_time_; + instance.repetitions = family->repetitions_; instance.use_real_time = family->use_real_time_; + instance.use_manual_time = family->use_manual_time_; + instance.complexity = family->complexity_; + instance.complexity_lambda = family->complexity_lambda_; instance.threads = num_threads; instance.multithreaded = !(family->thread_counts_.empty()); @@ -387,7 +463,12 @@ bool BenchmarkFamilies::FindBenchmarks( if (!IsZero(family->min_time_)) { instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); } - if (family->use_real_time_) { + if (family->repetitions_ != 0) { + instance.name += StringPrintF("/repeats:%d", family->repetitions_); + } + if (family->use_manual_time_) { + instance.name += "/manual_time"; + } else if (family->use_real_time_) { instance.name += "/real_time"; } @@ -397,6 +478,7 @@ bool BenchmarkFamilies::FindBenchmarks( } if (re.Match(instance.name)) { + instance.last_benchmark_instance = (args == family->args_.back()); benchmarks->push_back(instance); } } @@ -406,8 +488,10 @@ bool BenchmarkFamilies::FindBenchmarks( } BenchmarkImp::BenchmarkImp(const char* name) - : name_(name), arg_count_(-1), - min_time_(0.0), use_real_time_(false) { + : name_(name), arg_count_(-1), time_unit_(kNanosecond), + range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0), + use_real_time_(false), use_manual_time_(false), + complexity_(oNone) { } BenchmarkImp::~BenchmarkImp() { @@ -419,11 +503,15 @@ void BenchmarkImp::Arg(int x) { args_.emplace_back(x, -1); } +void BenchmarkImp::Unit(TimeUnit unit) { + time_unit_ = unit; +} + void BenchmarkImp::Range(int start, int limit) { CHECK(arg_count_ == -1 || arg_count_ == 1); arg_count_ = 1; std::vector<int> arglist; - AddRange(&arglist, start, limit, kRangeMultiplier); + AddRange(&arglist, start, limit, range_multiplier_); for (int i : arglist) { args_.emplace_back(i, -1); @@ -450,8 +538,8 @@ void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) { CHECK(arg_count_ == -1 || arg_count_ == 2); arg_count_ = 2; std::vector<int> arglist1, arglist2; - AddRange(&arglist1, lo1, hi1, kRangeMultiplier); - AddRange(&arglist2, lo2, hi2, kRangeMultiplier); + AddRange(&arglist1, lo1, hi1, range_multiplier_); + AddRange(&arglist2, lo2, hi2, range_multiplier_); for (int i : arglist1) { for (int j : arglist2) { @@ -460,15 +548,40 @@ void BenchmarkImp::RangePair(int lo1, int hi1, int lo2, int hi2) { } } +void BenchmarkImp::RangeMultiplier(int multiplier) { + CHECK(multiplier > 1); + range_multiplier_ = multiplier; +} + void BenchmarkImp::MinTime(double t) { CHECK(t > 0.0); min_time_ = t; } + +void BenchmarkImp::Repetitions(int n) { + CHECK(n > 0); + repetitions_ = n; +} + void BenchmarkImp::UseRealTime() { + CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; } +void BenchmarkImp::UseManualTime() { + CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_manual_time_ = true; +} + +void BenchmarkImp::Complexity(BigO complexity){ + complexity_ = complexity; +} + +void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) { + complexity_lambda_ = complexity; +} + void BenchmarkImp::Threads(int t) { CHECK_GT(t, 0); thread_counts_.push_back(t); @@ -493,6 +606,7 @@ void BenchmarkImp::SetName(const char* name) { void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) { CHECK_GE(lo, 0); CHECK_GE(hi, lo); + CHECK_GE(mult, 2); // Add "lo" dst->push_back(lo); @@ -531,6 +645,11 @@ Benchmark* Benchmark::Arg(int x) { return this; } +Benchmark* Benchmark::Unit(TimeUnit unit) { + imp_->Unit(unit); + return this; +} + Benchmark* Benchmark::Range(int start, int limit) { imp_->Range(start, limit); return this; @@ -556,6 +675,17 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { return this; } +Benchmark* Benchmark::RangeMultiplier(int multiplier) { + imp_->RangeMultiplier(multiplier); + return this; +} + + +Benchmark* Benchmark::Repetitions(int t) { + imp_->Repetitions(t); + return this; +} + Benchmark* Benchmark::MinTime(double t) { imp_->MinTime(t); return this; @@ -566,6 +696,22 @@ Benchmark* Benchmark::UseRealTime() { return this; } +Benchmark* Benchmark::UseManualTime() { + imp_->UseManualTime(); + return this; +} + +Benchmark* Benchmark::Complexity(BigO complexity) { + imp_->Complexity(complexity); + return this; +} + +Benchmark* Benchmark::Complexity(BigOFunc* complexity) { + imp_->Complexity(oLambda); + imp_->ComplexityLambda(complexity); + return this; +} + Benchmark* Benchmark::Threads(int t) { imp_->Threads(t); return this; @@ -593,7 +739,6 @@ void FunctionBenchmark::Run(State& st) { namespace { - // Execute one thread of benchmark b for the specified number of iterations. // Adds the stats collected for the thread into *total. void RunInThread(const benchmark::internal::Benchmark::Instance* b, @@ -607,13 +752,16 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, MutexLock l(GetBenchmarkLock()); total->bytes_processed += st.bytes_processed(); total->items_processed += st.items_processed(); + total->complexity_n += st.complexity_length_n(); } timer_manager->Finalize(); } void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, - BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) { + BenchmarkReporter* br, + std::vector<BenchmarkReporter::Run>& complexity_reports) + EXCLUDES(GetBenchmarkLock()) { size_t iters = 1; std::vector<BenchmarkReporter::Run> reports; @@ -622,7 +770,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, if (b.multithreaded) pool.resize(b.threads); - for (int i = 0; i < FLAGS_benchmark_repetitions; i++) { + const int repeats = b.repetitions != 0 ? b.repetitions + : FLAGS_benchmark_repetitions; + for (int i = 0; i < repeats; i++) { std::string mem; for (;;) { // Try benchmark @@ -632,6 +782,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, MutexLock l(GetBenchmarkLock()); GetReportLabel()->clear(); } + error_message = nullptr; Notification done; timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done)); @@ -647,7 +798,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, thread.join(); } for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, ti, &total); + pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti), &total); } } else { // Run directly in this thread @@ -658,6 +809,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, const double cpu_accumulated_time = timer_manager->cpu_time_used(); const double real_accumulated_time = timer_manager->real_time_used(); + const double manual_accumulated_time = timer_manager->manual_time_used(); timer_manager.reset(); VLOG(2) << "Ran in " << cpu_accumulated_time << "/" @@ -665,7 +817,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Base decisions off of real time if requested by this benchmark. double seconds = cpu_accumulated_time; - if (b.use_real_time) { + if (b.use_manual_time) { + seconds = manual_accumulated_time; + } else if (b.use_real_time) { seconds = real_accumulated_time; } @@ -674,35 +828,53 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, MutexLock l(GetBenchmarkLock()); label = *GetReportLabel(); } + error_message_type error_msg = error_message; const double min_time = !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. - if ((i > 0) || + if ((i > 0) || (error_msg != nullptr) || (iters >= kMaxIterations) || (seconds >= min_time) || (real_accumulated_time >= 5*min_time)) { - double bytes_per_second = 0; - if (total.bytes_processed > 0 && seconds > 0.0) { - bytes_per_second = (total.bytes_processed / seconds); - } - double items_per_second = 0; - if (total.items_processed > 0 && seconds > 0.0) { - items_per_second = (total.items_processed / seconds); - } // Create report about this benchmark run. BenchmarkReporter::Run report; report.benchmark_name = b.name; + report.error_occurred = error_msg != nullptr; + report.error_message = error_msg != nullptr ? error_msg : ""; report.report_label = label; // Report the total iterations across all threads. report.iterations = static_cast<int64_t>(iters) * b.threads; - report.real_accumulated_time = real_accumulated_time; - report.cpu_accumulated_time = cpu_accumulated_time; - report.bytes_per_second = bytes_per_second; - report.items_per_second = items_per_second; + report.time_unit = b.time_unit; + + if (!report.error_occurred) { + double bytes_per_second = 0; + if (total.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (total.bytes_processed / seconds); + } + double items_per_second = 0; + if (total.items_processed > 0 && seconds > 0.0) { + items_per_second = (total.items_processed / seconds); + } + + if (b.use_manual_time) { + report.real_accumulated_time = manual_accumulated_time; + } else { + report.real_accumulated_time = real_accumulated_time; + } + report.cpu_accumulated_time = cpu_accumulated_time; + report.bytes_per_second = bytes_per_second; + report.items_per_second = items_per_second; + report.complexity_n = total.complexity_n; + report.complexity = b.complexity; + report.complexity_lambda = b.complexity_lambda; + if(report.complexity != oNone) + complexity_reports.push_back(report); + } + reports.push_back(report); break; } @@ -726,7 +898,19 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, iters = static_cast<int>(next_iters + 0.5); } } + std::vector<BenchmarkReporter::Run> additional_run_stats = ComputeStats(reports); + reports.insert(reports.end(), additional_run_stats.begin(), + additional_run_stats.end()); + + if((b.complexity != oNone) && b.last_benchmark_instance) { + additional_run_stats = ComputeBigO(complexity_reports); + reports.insert(reports.end(), additional_run_stats.begin(), + additional_run_stats.end()); + complexity_reports.clear(); + } + br->ReportRuns(reports); + if (b.multithreaded) { for (std::thread& thread : pool) thread.join(); @@ -737,10 +921,12 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads) - : started_(false), total_iterations_(0), + : started_(false), finished_(false), total_iterations_(0), has_range_x_(has_x), range_x_(x), has_range_y_(has_y), range_y_(y), bytes_processed_(0), items_processed_(0), + complexity_n_(0), + error_occurred_(false), thread_index(thread_i), threads(n_threads), max_iterations(max_iters) @@ -752,14 +938,33 @@ State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, void State::PauseTiming() { // Add in time accumulated so far CHECK(running_benchmark); + CHECK(started_ && !finished_ && !error_occurred_); timer_manager->StopTimer(); } void State::ResumeTiming() { CHECK(running_benchmark); + CHECK(started_ && !finished_ && !error_occurred_); timer_manager->StartTimer(); } +void State::SkipWithError(const char* msg) { + CHECK(msg); + error_occurred_ = true; + error_message_type expected_no_error_msg = nullptr; + error_message.compare_exchange_weak(expected_no_error_msg, + const_cast<error_message_type>(msg)); + started_ = finished_ = true; + total_iterations_ = max_iterations; + timer_manager->RemoveErroredThread(); +} + +void State::SetIterationTime(double seconds) +{ + CHECK(running_benchmark); + timer_manager->SetIterationTime(seconds); +} + void State::SetLabel(const char* label) { CHECK(running_benchmark); MutexLock l(GetBenchmarkLock()); @@ -769,32 +974,19 @@ void State::SetLabel(const char* label) { namespace internal { namespace { -void PrintBenchmarkList() { - std::vector<Benchmark::Instance> benchmarks; - auto families = BenchmarkFamilies::GetInstance(); - if (!families->FindBenchmarks(".", &benchmarks)) return; - - for (const internal::Benchmark::Instance& benchmark : benchmarks) { - std::cout << benchmark.name << "\n"; - } -} - -void RunMatchingBenchmarks(const std::string& spec, +void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks, BenchmarkReporter* reporter) { CHECK(reporter != nullptr); - if (spec.empty()) return; - - std::vector<Benchmark::Instance> benchmarks; - auto families = BenchmarkFamilies::GetInstance(); - if (!families->FindBenchmarks(spec, &benchmarks)) return; // Determine the width of the name field using a minimum width of 10. + bool has_repetitions = FLAGS_benchmark_repetitions > 1; size_t name_field_width = 10; for (const Benchmark::Instance& benchmark : benchmarks) { name_field_width = std::max<size_t>(name_field_width, benchmark.name.size()); + has_repetitions |= benchmark.repetitions > 1; } - if (FLAGS_benchmark_repetitions > 1) + if (has_repetitions) name_field_width += std::strlen("_stddev"); // Print header here @@ -805,16 +997,19 @@ void RunMatchingBenchmarks(const std::string& spec, context.cpu_scaling_enabled = CpuScalingEnabled(); context.name_field_width = name_field_width; + // Keep track of runing times of all instances of current benchmark + std::vector<BenchmarkReporter::Run> complexity_reports; + if (reporter->ReportContext(context)) { for (const auto& benchmark : benchmarks) { - RunBenchmark(benchmark, reporter); + RunBenchmark(benchmark, reporter, complexity_reports); } } } std::unique_ptr<BenchmarkReporter> GetDefaultReporter() { typedef std::unique_ptr<BenchmarkReporter> PtrType; - if (FLAGS_benchmark_format == "tabular") { + if (FLAGS_benchmark_format == "console") { return PtrType(new ConsoleReporter); } else if (FLAGS_benchmark_format == "json") { return PtrType(new JSONReporter); @@ -829,26 +1024,32 @@ std::unique_ptr<BenchmarkReporter> GetDefaultReporter() { } // end namespace } // end namespace internal -void RunSpecifiedBenchmarks() { - RunSpecifiedBenchmarks(nullptr); +size_t RunSpecifiedBenchmarks() { + return RunSpecifiedBenchmarks(nullptr); } -void RunSpecifiedBenchmarks(BenchmarkReporter* reporter) { - if (FLAGS_benchmark_list_tests) { - internal::PrintBenchmarkList(); - return; - } +size_t RunSpecifiedBenchmarks(BenchmarkReporter* reporter) { std::string spec = FLAGS_benchmark_filter; if (spec.empty() || spec == "all") spec = "."; // Regexp that matches all benchmarks - std::unique_ptr<BenchmarkReporter> default_reporter; - if (!reporter) { - default_reporter = internal::GetDefaultReporter(); - reporter = default_reporter.get(); + std::vector<internal::Benchmark::Instance> benchmarks; + auto families = internal::BenchmarkFamilies::GetInstance(); + if (!families->FindBenchmarks(spec, &benchmarks)) return 0; + + if (FLAGS_benchmark_list_tests) { + for (auto const& benchmark : benchmarks) + std::cout << benchmark.name << "\n"; + } else { + std::unique_ptr<BenchmarkReporter> default_reporter; + if (!reporter) { + default_reporter = internal::GetDefaultReporter(); + reporter = default_reporter.get(); + } + internal::RunMatchingBenchmarks(benchmarks, reporter); + reporter->Finalize(); } - internal::RunMatchingBenchmarks(spec, reporter); - reporter->Finalize(); + return benchmarks.size(); } namespace internal { @@ -860,7 +1061,7 @@ void PrintUsageAndExit() { " [--benchmark_filter=<regex>]\n" " [--benchmark_min_time=<min_time>]\n" " [--benchmark_repetitions=<num_repetitions>]\n" - " [--benchmark_format=<tabular|json|csv>]\n" + " [--benchmark_format=<console|json|csv>]\n" " [--color_print={true|false}]\n" " [--v=<verbosity>]\n"); exit(0); @@ -891,7 +1092,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { PrintUsageAndExit(); } } - if (FLAGS_benchmark_format != "tabular" && + + if (FLAGS_benchmark_format != "console" && FLAGS_benchmark_format != "json" && FLAGS_benchmark_format != "csv") { PrintUsageAndExit(); diff --git a/src/check.h b/src/check.h index d2c1fda..4572bab 100644 --- a/src/check.h +++ b/src/check.h @@ -10,6 +10,18 @@ namespace benchmark { namespace internal { +typedef void(AbortHandlerT)(); + +inline AbortHandlerT*& GetAbortHandler() { + static AbortHandlerT* handler = &std::abort; + return handler; +} + +BENCHMARK_NORETURN inline void CallAbortHandler() { + GetAbortHandler()(); + std::abort(); // fallback to enforce noreturn +} + // CheckHandler is the class constructed by failing CHECK macros. CheckHandler // will log information about the failures and abort when it is destructed. class CheckHandler { @@ -25,13 +37,13 @@ public: return log_; } - BENCHMARK_NORETURN ~CheckHandler() { + BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) { log_ << std::endl; - std::abort(); + CallAbortHandler(); } - CheckHandler & operator=(const CheckHandler&) = delete;
- CheckHandler(const CheckHandler&) = delete;
+ CheckHandler & operator=(const CheckHandler&) = delete; + CheckHandler(const CheckHandler&) = delete; CheckHandler() = delete; private: std::ostream& log_; diff --git a/src/colorprint.cc b/src/colorprint.cc index 81f917b..efb8626 100644 --- a/src/colorprint.cc +++ b/src/colorprint.cc @@ -16,8 +16,12 @@ #include <cstdarg> #include <cstdio> +#include <cstdarg> +#include <string> +#include <memory> #include "commandlineflags.h" +#include "check.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -74,14 +78,51 @@ PlatformColorCode GetPlatformColorCode(LogColor color) { }; #endif } + } // end namespace -void ColorPrintf(LogColor color, const char* fmt, ...) { +std::string FormatString(const char *msg, va_list args) { + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + std::size_t size = 256; + char local_buff[256]; + auto ret = std::vsnprintf(local_buff, size, msg, args_cp); + + va_end(args_cp); + + // currently there is no error handling for failure, so this is hack. + CHECK(ret >= 0); + + if (ret == 0) // handle empty expansion + return {}; + else if (static_cast<size_t>(ret) < size) + return local_buff; + else { + // we did not provide a long enough buffer on our first attempt. + size = (size_t)ret + 1; // + 1 for the null byte + std::unique_ptr<char[]> buff(new char[size]); + ret = std::vsnprintf(buff.get(), size, msg, args); + CHECK(ret > 0 && ((size_t)ret) < size); + return buff.get(); + } +} + +std::string FormatString(const char *msg, ...) { + va_list args; + va_start(args, msg); + auto tmp = FormatString(msg, args); + va_end(args); + return tmp; +} + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...) { va_list args; va_start(args, fmt); if (!FLAGS_color_print) { - vprintf(fmt, args); + out << FormatString(fmt, args); va_end(args); return; } @@ -107,10 +148,11 @@ void ColorPrintf(LogColor color, const char* fmt, ...) { SetConsoleTextAttribute(stdout_handle, old_color_attrs); #else const char* color_code = GetPlatformColorCode(color); - if (color_code) fprintf(stdout, "\033[0;3%sm", color_code); - vprintf(fmt, args); - printf("\033[m"); // Resets the terminal to default. + if (color_code) out << FormatString("\033[0;3%sm", color_code); + out << FormatString(fmt, args) << "\033[m"; #endif + va_end(args); } + } // end namespace benchmark diff --git a/src/colorprint.h b/src/colorprint.h index 54d1f66..2b3c082 100644 --- a/src/colorprint.h +++ b/src/colorprint.h @@ -1,6 +1,10 @@ #ifndef BENCHMARK_COLORPRINT_H_ #define BENCHMARK_COLORPRINT_H_ +#include <cstdarg> +#include <string> +#include <iostream> + namespace benchmark { enum LogColor { COLOR_DEFAULT, @@ -13,7 +17,11 @@ enum LogColor { COLOR_WHITE }; -void ColorPrintf(LogColor color, const char* fmt, ...); +std::string FormatString(const char* msg, va_list args); +std::string FormatString(const char* msg, ...); + +void ColorPrintf(std::ostream& out, LogColor color, const char* fmt, ...); + } // end namespace benchmark #endif // BENCHMARK_COLORPRINT_H_ diff --git a/src/complexity.cc b/src/complexity.cc new file mode 100644 index 0000000..b42bd38 --- /dev/null +++ b/src/complexity.cc @@ -0,0 +1,283 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#include "benchmark/benchmark_api.h" + +#include <algorithm> +#include <cmath> +#include "check.h" +#include "complexity.h" +#include "stat.h" + +namespace benchmark { + +// Internal function to calculate the different scalability forms +BigOFunc* FittingCurve(BigO complexity) { + switch (complexity) { + case oN: + return [](int n) -> double { return n; }; + case oNSquared: + return [](int n) -> double { return n * n; }; + case oNCubed: + return [](int n) -> double { return n * n * n; }; + case oLogN: + return [](int n) { return std::log2(n); }; + case oNLogN: + return [](int n) { return n * std::log2(n); }; + case o1: + default: + return [](int) { return 1.0; }; + } +} + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity) { + switch (complexity) { + case oN: + return "N"; + case oNSquared: + return "N^2"; + case oNCubed: + return "N^3"; + case oLogN: + return "lgN"; + case oNLogN: + return "NlgN"; + case o1: + return "(1)"; + default: + return "f(N)"; + } +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error, for the fitting curve +// given by the lambda expresion. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - fitting_curve : lambda expresion (e.g. [](int n) {return n; };). + +// For a deeper explanation on the algorithm logic, look the README file at +// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit + +LeastSq MinimalLeastSq(const std::vector<int>& n, + const std::vector<double>& time, + BigOFunc* fitting_curve) { + double sigma_gn = 0.0; + double sigma_gn_squared = 0.0; + double sigma_time = 0.0; + double sigma_time_gn = 0.0; + + // Calculate least square fitting parameter + for (size_t i = 0; i < n.size(); ++i) { + double gn_i = fitting_curve(n[i]); + sigma_gn += gn_i; + sigma_gn_squared += gn_i * gn_i; + sigma_time += time[i]; + sigma_time_gn += time[i] * gn_i; + } + + LeastSq result; + result.complexity = oLambda; + + // Calculate complexity. + result.coef = sigma_time_gn / sigma_gn_squared; + + // Calculate RMS + double rms = 0.0; + for (size_t i = 0; i < n.size(); ++i) { + double fit = result.coef * fitting_curve(n[i]); + rms += pow((time[i] - fit), 2); + } + + // Normalized RMS by the mean of the observed values + double mean = sigma_time / n.size(); + result.rms = sqrt(rms / n.size()) / mean; + + return result; +} + +// Find the coefficient for the high-order term in the running time, by +// minimizing the sum of squares of relative error. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - complexity : If different than oAuto, the fitting curve will stick to +// this one. If it is oAuto, it will be calculated the best +// fitting curve. +LeastSq MinimalLeastSq(const std::vector<int>& n, + const std::vector<double>& time, + const BigO complexity) { + CHECK_EQ(n.size(), time.size()); + CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two + // benchmark runs are given + CHECK_NE(complexity, oNone); + + LeastSq best_fit; + + if (complexity == oAuto) { + std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed}; + + // Take o1 as default best fitting curve + best_fit = MinimalLeastSq(n, time, FittingCurve(o1)); + best_fit.complexity = o1; + + // Compute all possible fitting curves and stick to the best one + for (const auto& fit : fit_curves) { + LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit)); + if (current_fit.rms < best_fit.rms) { + best_fit = current_fit; + best_fit.complexity = fit; + } + } + } else { + best_fit = MinimalLeastSq(n, time, FittingCurve(complexity)); + best_fit.complexity = complexity; + } + + return best_fit; +} + +std::vector<BenchmarkReporter::Run> ComputeStats( + const std::vector<BenchmarkReporter::Run>& reports) { + typedef BenchmarkReporter::Run Run; + std::vector<Run> results; + + auto error_count = + std::count_if(reports.begin(), reports.end(), + [](Run const& run) { return run.error_occurred; }); + + if (reports.size() - error_count < 2) { + // We don't report aggregated data if there was a single run. + return results; + } + // Accumulators. + Stat1_d real_accumulated_time_stat; + Stat1_d cpu_accumulated_time_stat; + Stat1_d bytes_per_second_stat; + Stat1_d items_per_second_stat; + // All repetitions should be run with the same number of iterations so we + // can take this information from the first benchmark. + int64_t const run_iterations = reports.front().iterations; + + // Populate the accumulators. + for (Run const& run : reports) { + CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + CHECK_EQ(run_iterations, run.iterations); + if (run.error_occurred) continue; + real_accumulated_time_stat += + Stat1_d(run.real_accumulated_time / run.iterations, run.iterations); + cpu_accumulated_time_stat += + Stat1_d(run.cpu_accumulated_time / run.iterations, run.iterations); + items_per_second_stat += Stat1_d(run.items_per_second, run.iterations); + bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations); + } + + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run mean_data; + mean_data.benchmark_name = reports[0].benchmark_name + "_mean"; + mean_data.iterations = run_iterations; + mean_data.real_accumulated_time = + real_accumulated_time_stat.Mean() * run_iterations; + mean_data.cpu_accumulated_time = + cpu_accumulated_time_stat.Mean() * run_iterations; + mean_data.bytes_per_second = bytes_per_second_stat.Mean(); + mean_data.items_per_second = items_per_second_stat.Mean(); + + // Only add label to mean/stddev if it is same for all runs + mean_data.report_label = reports[0].report_label; + for (std::size_t i = 1; i < reports.size(); i++) { + if (reports[i].report_label != reports[0].report_label) { + mean_data.report_label = ""; + break; + } + } + + Run stddev_data; + stddev_data.benchmark_name = reports[0].benchmark_name + "_stddev"; + stddev_data.report_label = mean_data.report_label; + stddev_data.iterations = 0; + stddev_data.real_accumulated_time = real_accumulated_time_stat.StdDev(); + stddev_data.cpu_accumulated_time = cpu_accumulated_time_stat.StdDev(); + stddev_data.bytes_per_second = bytes_per_second_stat.StdDev(); + stddev_data.items_per_second = items_per_second_stat.StdDev(); + + results.push_back(mean_data); + results.push_back(stddev_data); + return results; +} + +std::vector<BenchmarkReporter::Run> ComputeBigO( + const std::vector<BenchmarkReporter::Run>& reports) { + typedef BenchmarkReporter::Run Run; + std::vector<Run> results; + + if (reports.size() < 2) return results; + + // Accumulators. + std::vector<int> n; + std::vector<double> real_time; + std::vector<double> cpu_time; + + // Populate the accumulators. + for (const Run& run : reports) { + CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; + n.push_back(run.complexity_n); + real_time.push_back(run.real_accumulated_time / run.iterations); + cpu_time.push_back(run.cpu_accumulated_time / run.iterations); + } + + LeastSq result_cpu; + LeastSq result_real; + + if (reports[0].complexity == oLambda) { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda); + result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda); + } else { + result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); + result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); + } + std::string benchmark_name = + reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/')); + + // Get the data from the accumulator to BenchmarkReporter::Run's. + Run big_o; + big_o.benchmark_name = benchmark_name + "_BigO"; + big_o.iterations = 0; + big_o.real_accumulated_time = result_real.coef; + big_o.cpu_accumulated_time = result_cpu.coef; + big_o.report_big_o = true; + big_o.complexity = result_cpu.complexity; + + double multiplier = GetTimeUnitMultiplier(reports[0].time_unit); + + // Only add label to mean/stddev if it is same for all runs + Run rms; + big_o.report_label = reports[0].report_label; + rms.benchmark_name = benchmark_name + "_RMS"; + rms.report_label = big_o.report_label; + rms.iterations = 0; + rms.real_accumulated_time = result_real.rms / multiplier; + rms.cpu_accumulated_time = result_cpu.rms / multiplier; + rms.report_rms = true; + rms.complexity = result_cpu.complexity; + + results.push_back(big_o); + results.push_back(rms); + return results; +} + +} // end namespace benchmark diff --git a/src/complexity.h b/src/complexity.h new file mode 100644 index 0000000..85cc125 --- /dev/null +++ b/src/complexity.h @@ -0,0 +1,64 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#ifndef COMPLEXITY_H_ +#define COMPLEXITY_H_ + +#include <string> +#include <vector> + +#include "benchmark/benchmark_api.h" +#include "benchmark/reporter.h" + +namespace benchmark { + +// Return a vector containing the mean and standard devation information for +// the specified list of reports. If 'reports' contains less than two +// non-errored runs an empty vector is returned +std::vector<BenchmarkReporter::Run> ComputeStats( + const std::vector<BenchmarkReporter::Run>& reports); + +// Return a vector containing the bigO and RMS information for the specified +// list of reports. If 'reports.size() < 2' an empty vector is returned. +std::vector<BenchmarkReporter::Run> ComputeBigO( + const std::vector<BenchmarkReporter::Run>& reports); + +// This data structure will contain the result returned by MinimalLeastSq +// - coef : Estimated coeficient for the high-order term as +// interpolated from data. +// - rms : Normalized Root Mean Squared Error. +// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability +// form has been provided to MinimalLeastSq this will return +// the same value. In case BigO::oAuto has been selected, this +// parameter will return the best fitting curve detected. + +struct LeastSq { + LeastSq() : + coef(0.0), + rms(0.0), + complexity(oNone) {} + + double coef; + double rms; + BigO complexity; +}; + +// Function to return an string for the calculated complexity +std::string GetBigOString(BigO complexity); + +} // end namespace benchmark +#endif // COMPLEXITY_H_ diff --git a/src/console_reporter.cc b/src/console_reporter.cc index 092936d..080c324 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -13,72 +13,66 @@ // limitations under the License. #include "benchmark/reporter.h" +#include "complexity.h" +#include <algorithm> #include <cstdint> #include <cstdio> #include <iostream> #include <string> +#include <tuple> #include <vector> #include "check.h" #include "colorprint.h" +#include "commandlineflags.h" +#include "internal_macros.h" #include "string_util.h" #include "walltime.h" +DECLARE_bool(color_print); + namespace benchmark { bool ConsoleReporter::ReportContext(const Context& context) { name_field_width_ = context.name_field_width; - std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu - << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; - - std::cerr << LocalDateTimeString() << "\n"; + PrintBasicContext(&GetErrorStream(), context); - if (context.cpu_scaling_enabled) { - std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark " - "real time measurements may be noisy and will incur extra " - "overhead.\n"; +#ifdef BENCHMARK_OS_WINDOWS + if (FLAGS_color_print && &std::cout != &GetOutputStream()) { + GetErrorStream() << "Color printing is only supported for stdout on windows." + " Disabling color printing\n"; + FLAGS_color_print = false; } - -#ifndef NDEBUG - std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be " - "affected.\n"; #endif - - int output_width = fprintf(stdout, "%-*s %10s %10s %10s\n", + std::string str = FormatString("%-*s %13s %13s %10s\n", static_cast<int>(name_field_width_), "Benchmark", - "Time(ns)", "CPU(ns)", "Iterations"); - std::cout << std::string(output_width - 1, '-') << "\n"; + "Time", "CPU", "Iterations"); + GetOutputStream() << str << std::string(str.length() - 1, '-') << "\n"; return true; } void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) { - if (reports.empty()) { - return; - } - - for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); + for (const auto& run : reports) PrintRunData(run); - } - - if (reports.size() < 2) { - // We don't report aggregated data if there was a single run. - return; - } - - Run mean_data; - Run stddev_data; - BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); - - // Output using PrintRun. - PrintRunData(mean_data); - PrintRunData(stddev_data); } void ConsoleReporter::PrintRunData(const Run& result) { + auto& Out = GetOutputStream(); + + auto name_color = + (result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN; + ColorPrintf(Out, name_color, "%-*s ", name_field_width_, + result.benchmark_name.c_str()); + + if (result.error_occurred) { + ColorPrintf(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'", + result.error_message.c_str()); + ColorPrintf(Out, COLOR_DEFAULT, "\n"); + return; + } // Format bytes per second std::string rate; if (result.bytes_per_second > 0) { @@ -90,27 +84,41 @@ void ConsoleReporter::PrintRunData(const Run& result) { if (result.items_per_second > 0) { items = StrCat(" ", HumanReadableNumber(result.items_per_second), " items/s"); + } + + const double real_time = result.GetAdjustedRealTime(); + const double cpu_time = result.GetAdjustedCPUTime(); + + if (result.report_big_o) { + std::string big_o = GetBigOString(result.complexity); + ColorPrintf(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time, + big_o.c_str(), cpu_time, big_o.c_str()); + } else if (result.report_rms) { + ColorPrintf(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100, + cpu_time * 100); + } else { + const char* timeLabel = GetTimeUnitString(result.time_unit); + ColorPrintf(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel, + cpu_time, timeLabel); } - double const multiplier = 1e9; // nano second multiplier - ColorPrintf(COLOR_GREEN, "%-*s ", - name_field_width_, result.benchmark_name.c_str()); - if (result.iterations == 0) { - ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", - result.real_accumulated_time * multiplier, - result.cpu_accumulated_time * multiplier); - } else { - ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ", - (result.real_accumulated_time * multiplier) / - (static_cast<double>(result.iterations)), - (result.cpu_accumulated_time * multiplier) / - (static_cast<double>(result.iterations))); + if (!result.report_big_o && !result.report_rms) { + ColorPrintf(Out, COLOR_CYAN, "%10lld", result.iterations); } - ColorPrintf(COLOR_CYAN, "%10lld", result.iterations); - ColorPrintf(COLOR_DEFAULT, "%*s %*s %s\n", - 13, rate.c_str(), - 18, items.c_str(), - result.report_label.c_str()); + + if (!rate.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %*s", 13, rate.c_str()); + } + + if (!items.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %*s", 18, items.c_str()); + } + + if (!result.report_label.empty()) { + ColorPrintf(Out, COLOR_DEFAULT, " %s", result.report_label.c_str()); + } + + ColorPrintf(Out, COLOR_DEFAULT, "\n"); } } // end namespace benchmark diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index d78a9df..7bc7ef3 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -13,10 +13,13 @@ // limitations under the License. #include "benchmark/reporter.h" +#include "complexity.h" +#include <algorithm> #include <cstdint> #include <iostream> #include <string> +#include <tuple> #include <vector> #include "string_util.h" @@ -26,80 +29,90 @@ namespace benchmark { -bool CSVReporter::ReportContext(const Context& context) { - std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu - << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; +namespace { +std::vector<std::string> elements = { + "name", + "iterations", + "real_time", + "cpu_time", + "time_unit", + "bytes_per_second", + "items_per_second", + "label", + "error_occurred", + "error_message" +}; +} - std::cerr << LocalDateTimeString() << "\n"; +bool CSVReporter::ReportContext(const Context& context) { + PrintBasicContext(&GetErrorStream(), context); - if (context.cpu_scaling_enabled) { - std::cerr << "***WARNING*** CPU scaling is enabled, the benchmark " - "real time measurements may be noisy and will incur extra " - "overhead.\n"; + std::ostream& Out = GetOutputStream(); + for (auto B = elements.begin(); B != elements.end(); ) { + Out << *B++; + if (B != elements.end()) + Out << ","; } - -#ifndef NDEBUG - std::cerr << "***WARNING*** Library was built as DEBUG. Timings may be " - "affected.\n"; -#endif - std::cout << "name,iterations,real_time,cpu_time,bytes_per_second," - "items_per_second,label\n"; + Out << "\n"; return true; } -void CSVReporter::ReportRuns(std::vector<Run> const& reports) { - if (reports.empty()) { - return; - } - - std::vector<Run> reports_cp = reports; - if (reports.size() >= 2) { - Run mean_data; - Run stddev_data; - BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); - reports_cp.push_back(mean_data); - reports_cp.push_back(stddev_data); - } - for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) { - PrintRunData(*it); - } +void CSVReporter::ReportRuns(const std::vector<Run> & reports) { + for (const auto& run : reports) + PrintRunData(run); } -void CSVReporter::PrintRunData(Run const& run) { - double const multiplier = 1e9; // nano second multiplier - double cpu_time = run.cpu_accumulated_time * multiplier; - double real_time = run.real_accumulated_time * multiplier; - if (run.iterations != 0) { - real_time = real_time / static_cast<double>(run.iterations); - cpu_time = cpu_time / static_cast<double>(run.iterations); - } +void CSVReporter::PrintRunData(const Run & run) { + std::ostream& Out = GetOutputStream(); // Field with embedded double-quote characters must be doubled and the field // delimited with double-quotes. std::string name = run.benchmark_name; ReplaceAll(&name, "\"", "\"\""); - std::cout << "\"" << name << "\","; + Out << '"' << name << "\","; + if (run.error_occurred) { + Out << std::string(elements.size() - 3, ','); + Out << "true,"; + std::string msg = run.error_message; + ReplaceAll(&msg, "\"", "\"\""); + Out << '"' << msg << "\"\n"; + return; + } - std::cout << run.iterations << ","; - std::cout << real_time << ","; - std::cout << cpu_time << ","; + // Do not print iteration on bigO and RMS report + if (!run.report_big_o && !run.report_rms) { + Out << run.iterations; + } + Out << ","; + + Out << run.GetAdjustedRealTime() << ","; + Out << run.GetAdjustedCPUTime() << ","; + + // Do not print timeLabel on bigO and RMS report + if (run.report_big_o) { + Out << GetBigOString(run.complexity); + } else if (!run.report_rms) { + Out << GetTimeUnitString(run.time_unit); + } + Out << ","; if (run.bytes_per_second > 0.0) { - std::cout << run.bytes_per_second; + Out << run.bytes_per_second; } - std::cout << ","; + Out << ","; if (run.items_per_second > 0.0) { - std::cout << run.items_per_second; + Out << run.items_per_second; } - std::cout << ","; + Out << ","; if (!run.report_label.empty()) { // Field with embedded double-quote characters must be doubled and the field // delimited with double-quotes. std::string label = run.report_label; ReplaceAll(&label, "\"", "\"\""); - std::cout << "\"" << label << "\""; + Out << "\"" << label << "\""; } - std::cout << '\n'; + Out << ",,"; // for error_occurred and error_message + Out << '\n'; } } // end namespace benchmark diff --git a/src/cycleclock.h b/src/cycleclock.h index 3110804..e4825d4 100644 --- a/src/cycleclock.h +++ b/src/cycleclock.h @@ -113,11 +113,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { uint32_t pmuseren; uint32_t pmcntenset; // Read the user mode perf monitor counter access permissions. - asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. - asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); if (pmcntenset & 0x80000000ul) { // Is it counting? - asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); // The counter is set up to count every 64th cycle return static_cast<int64_t>(pmccntr) * 64; // Should optimize to << 6 } diff --git a/src/json_reporter.cc b/src/json_reporter.cc index def50ac..485d305 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -13,10 +13,13 @@ // limitations under the License. #include "benchmark/reporter.h" +#include "complexity.h" +#include <algorithm> #include <cstdint> #include <iostream> #include <string> +#include <tuple> #include <vector> #include "string_util.h" @@ -51,7 +54,7 @@ int64_t RoundDouble(double v) { } // end namespace bool JSONReporter::ReportContext(const Context& context) { - std::ostream& out = std::cout; + std::ostream& out = GetOutputStream(); out << "{\n"; std::string inner_indent(2, ' '); @@ -90,70 +93,86 @@ void JSONReporter::ReportRuns(std::vector<Run> const& reports) { return; } std::string indent(4, ' '); - std::ostream& out = std::cout; + std::ostream& out = GetOutputStream(); if (!first_report_) { out << ",\n"; } first_report_ = false; - std::vector<Run> reports_cp = reports; - if (reports.size() >= 2) { - Run mean_data; - Run stddev_data; - BenchmarkReporter::ComputeStats(reports, &mean_data, &stddev_data); - reports_cp.push_back(mean_data); - reports_cp.push_back(stddev_data); - } - for (auto it = reports_cp.begin(); it != reports_cp.end(); ++it) { - out << indent << "{\n"; - PrintRunData(*it); - out << indent << '}'; - auto it_cp = it; - if (++it_cp != reports_cp.end()) { - out << ",\n"; - } + + for (auto it = reports.begin(); it != reports.end(); ++it) { + out << indent << "{\n"; + PrintRunData(*it); + out << indent << '}'; + auto it_cp = it; + if (++it_cp != reports.end()) { + out << ",\n"; + } } } void JSONReporter::Finalize() { - // Close the list of benchmarks and the top level object. - std::cout << "\n ]\n}\n"; + // Close the list of benchmarks and the top level object. + GetOutputStream() << "\n ]\n}\n"; } void JSONReporter::PrintRunData(Run const& run) { - double const multiplier = 1e9; // nano second multiplier - double cpu_time = run.cpu_accumulated_time * multiplier; - double real_time = run.real_accumulated_time * multiplier; - if (run.iterations != 0) { - real_time = real_time / static_cast<double>(run.iterations); - cpu_time = cpu_time / static_cast<double>(run.iterations); - } - - std::string indent(6, ' '); - std::ostream& out = std::cout; + std::string indent(6, ' '); + std::ostream& out = GetOutputStream(); out << indent << FormatKV("name", run.benchmark_name) << ",\n"; + if (run.error_occurred) { + out << indent + << FormatKV("error_occurred", run.error_occurred) + << ",\n"; + out << indent + << FormatKV("error_message", run.error_message) + << ",\n"; + } + if (!run.report_big_o && !run.report_rms) { + out << indent + << FormatKV("iterations", run.iterations) + << ",\n"; + out << indent + << FormatKV("real_time", RoundDouble(run.GetAdjustedRealTime())) + << ",\n"; + out << indent + << FormatKV("cpu_time", RoundDouble(run.GetAdjustedCPUTime())); + out << ",\n" << indent + << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if (run.report_big_o) { out << indent - << FormatKV("iterations", run.iterations) + << FormatKV("cpu_coefficient", RoundDouble(run.GetAdjustedCPUTime())) << ",\n"; out << indent - << FormatKV("real_time", RoundDouble(real_time)) + << FormatKV("real_coefficient", RoundDouble(run.GetAdjustedRealTime())) << ",\n"; out << indent - << FormatKV("cpu_time", RoundDouble(cpu_time)); - if (run.bytes_per_second > 0.0) { - out << ",\n" << indent - << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second)); - } - if (run.items_per_second > 0.0) { - out << ",\n" << indent - << FormatKV("items_per_second", RoundDouble(run.items_per_second)); - } - if (!run.report_label.empty()) { - out << ",\n" << indent - << FormatKV("label", run.report_label); - } - out << '\n'; + << FormatKV("big_o", GetBigOString(run.complexity)) + << ",\n"; + out << indent + << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); + } else if(run.report_rms) { + out << indent + << FormatKV("rms", RoundDouble(run.GetAdjustedCPUTime()*100)) + << '%'; + } + if (run.bytes_per_second > 0.0) { + out << ",\n" + << indent + << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second)); + } + if (run.items_per_second > 0.0) { + out << ",\n" + << indent + << FormatKV("items_per_second", RoundDouble(run.items_per_second)); + } + if (!run.report_label.empty()) { + out << ",\n" + << indent + << FormatKV("label", run.report_label); + } + out << '\n'; } -} // end namespace benchmark +} // end namespace benchmark diff --git a/src/reporter.cc b/src/reporter.cc index 4b47e3d..5187859 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -13,74 +13,63 @@ // limitations under the License. #include "benchmark/reporter.h" +#include "walltime.h" #include <cstdlib> + +#include <iostream> #include <vector> +#include <tuple> #include "check.h" #include "stat.h" namespace benchmark { -void BenchmarkReporter::ComputeStats( - const std::vector<Run>& reports, - Run* mean_data, Run* stddev_data) { - CHECK(reports.size() >= 2) << "Cannot compute stats for less than 2 reports"; - // Accumulators. - Stat1_d real_accumulated_time_stat; - Stat1_d cpu_accumulated_time_stat; - Stat1_d bytes_per_second_stat; - Stat1_d items_per_second_stat; - // All repetitions should be run with the same number of iterations so we - // can take this information from the first benchmark. - int64_t const run_iterations = reports.front().iterations; - - // Populate the accumulators. - for (Run const& run : reports) { - CHECK_EQ(reports[0].benchmark_name, run.benchmark_name); - CHECK_EQ(run_iterations, run.iterations); - real_accumulated_time_stat += - Stat1_d(run.real_accumulated_time/run.iterations, run.iterations); - cpu_accumulated_time_stat += - Stat1_d(run.cpu_accumulated_time/run.iterations, run.iterations); - items_per_second_stat += Stat1_d(run.items_per_second, run.iterations); - bytes_per_second_stat += Stat1_d(run.bytes_per_second, run.iterations); - } +BenchmarkReporter::BenchmarkReporter() + : output_stream_(&std::cout), error_stream_(&std::cerr) +{ +} + +BenchmarkReporter::~BenchmarkReporter() { +} - // Get the data from the accumulator to BenchmarkReporter::Run's. - mean_data->benchmark_name = reports[0].benchmark_name + "_mean"; - mean_data->iterations = run_iterations; - mean_data->real_accumulated_time = real_accumulated_time_stat.Mean() * - run_iterations; - mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean() * - run_iterations; - mean_data->bytes_per_second = bytes_per_second_stat.Mean(); - mean_data->items_per_second = items_per_second_stat.Mean(); - - // Only add label to mean/stddev if it is same for all runs - mean_data->report_label = reports[0].report_label; - for (std::size_t i = 1; i < reports.size(); i++) { - if (reports[i].report_label != reports[0].report_label) { - mean_data->report_label = ""; - break; - } +void BenchmarkReporter::PrintBasicContext(std::ostream *out_ptr, + Context const &context) { + CHECK(out_ptr) << "cannot be null"; + auto& Out = *out_ptr; + + Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu + << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; + + Out << LocalDateTimeString() << "\n"; + + if (context.cpu_scaling_enabled) { + Out << "***WARNING*** CPU scaling is enabled, the benchmark " + "real time measurements may be noisy and will incur extra " + "overhead.\n"; } - stddev_data->benchmark_name = reports[0].benchmark_name + "_stddev"; - stddev_data->report_label = mean_data->report_label; - stddev_data->iterations = 0; - stddev_data->real_accumulated_time = - real_accumulated_time_stat.StdDev(); - stddev_data->cpu_accumulated_time = - cpu_accumulated_time_stat.StdDev(); - stddev_data->bytes_per_second = bytes_per_second_stat.StdDev(); - stddev_data->items_per_second = items_per_second_stat.StdDev(); +#ifndef NDEBUG + Out << "***WARNING*** Library was built as DEBUG. Timings may be " + "affected.\n"; +#endif } -void BenchmarkReporter::Finalize() { +double BenchmarkReporter::Run::GetAdjustedRealTime() const { + double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) + new_time /= static_cast<double>(iterations); + return new_time; } -BenchmarkReporter::~BenchmarkReporter() { +double BenchmarkReporter::Run::GetAdjustedCPUTime() const { + double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit); + if (iterations != 0) + new_time /= static_cast<double>(iterations); + return new_time; } + + } // end namespace benchmark diff --git a/src/sysinfo.cc b/src/sysinfo.cc index e10e19d..3a5d942 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -239,6 +239,7 @@ void InitializeSystemInfo() { } // TODO: also figure out cpuinfo_num_cpus + #elif defined BENCHMARK_OS_WINDOWS // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. @@ -251,7 +252,10 @@ void InitializeSystemInfo() { cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz else cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); -// TODO: also figure out cpuinfo_num_cpus + + SYSTEM_INFO sysinfo = { 0 }; + GetSystemInfo(&sysinfo); + cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical processors in the current group #elif defined BENCHMARK_OS_MACOSX // returning "mach time units" per second. the current number of elapsed diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a10a53a..aeb720a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,10 +2,6 @@ find_package(Threads REQUIRED) -set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}") -string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") -string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") - macro(compile_benchmark_test name) add_executable(${name} "${name}.cc") target_link_libraries(${name} benchmark ${CMAKE_THREAD_LIBS_INIT}) @@ -18,6 +14,7 @@ add_test(benchmark benchmark_test --benchmark_min_time=0.01) compile_benchmark_test(filter_test) macro(add_filter_test name filter expect) add_test(${name} filter_test --benchmark_min_time=0.01 --benchmark_filter=${filter} ${expect}) + add_test(${name}_list_only filter_test --benchmark_list_tests --benchmark_filter=${filter} ${expect}) endmacro(add_filter_test) add_filter_test(filter_simple "Foo" 3) @@ -36,16 +33,38 @@ add_test(options_benchmarks options_test --benchmark_min_time=0.01) compile_benchmark_test(basic_test) add_test(basic_benchmark basic_test --benchmark_min_time=0.01) +compile_benchmark_test(diagnostics_test) +add_test(diagnostics_test diagnostics_test --benchmark_min_time=0.01) + +compile_benchmark_test(skip_with_error_test) +add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01) + +compile_benchmark_test(donotoptimize_test) +add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01) + compile_benchmark_test(fixture_test) add_test(fixture_test fixture_test --benchmark_min_time=0.01) compile_benchmark_test(map_test) add_test(map_test map_test --benchmark_min_time=0.01) -compile_benchmark_test(cxx03_test) -set_target_properties(cxx03_test - PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}") -add_test(cxx03 cxx03_test --benchmark_min_time=0.01) +compile_benchmark_test(reporter_output_test) +add_test(reporter_output_test reporter_output_test --benchmark_min_time=0.01) + +check_cxx_compiler_flag(-std=c++03 BENCHMARK_HAS_CXX03_FLAG) +if (BENCHMARK_HAS_CXX03_FLAG) + set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-std=c++11" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") + string(REPLACE "-std=c++0x" "-std=c++03" CXX03_FLAGS "${CXX03_FLAGS}") + + compile_benchmark_test(cxx03_test) + set_target_properties(cxx03_test + PROPERTIES COMPILE_FLAGS "${CXX03_FLAGS}") + add_test(cxx03 cxx03_test --benchmark_min_time=0.01) +endif() + +compile_benchmark_test(complexity_test) +add_test(complexity_benchmark complexity_test --benchmark_min_time=0.01) # Add the coverage command(s) if(CMAKE_BUILD_TYPE) @@ -66,7 +85,7 @@ if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark - DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test + DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMENT "Running LCOV" ) diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 97abb68..66f5956 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -14,6 +14,9 @@ #include <sstream> #include <string> #include <vector> +#include <chrono> +#include <thread> +#include <utility> #if defined(__GNUC__) # define BENCHMARK_NOINLINE __attribute__((noinline)) @@ -174,5 +177,48 @@ static void BM_ParallelMemset(benchmark::State& state) { } BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); +static void BM_ManualTiming(benchmark::State& state) { + size_t slept_for = 0; + int microseconds = state.range_x(); + std::chrono::duration<double, std::micro> sleep_duration { + static_cast<double>(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(std::chrono::duration_cast< + std::chrono::nanoseconds>(sleep_duration)); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed = + std::chrono::duration_cast<std::chrono::duration<double>>( + end - start); + + state.SetIterationTime(elapsed.count()); + slept_for += microseconds; + } + state.SetItemsProcessed(slept_for); +} +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); + +#if __cplusplus >= 201103L + +template <class ...Args> +void BM_with_args(benchmark::State& state, Args&&...) { + while (state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_with_args, int_test, 42, 43, 44); +BENCHMARK_CAPTURE(BM_with_args, string_and_pair_test, + std::string("abc"), std::pair<int, double>(42, 3.8)); + +void BM_non_template_args(benchmark::State& state, int, double) { + while(state.KeepRunning()) {} +} +BENCHMARK_CAPTURE(BM_non_template_args, basic_test, 0, 0); + +#endif // __cplusplus >= 201103L + BENCHMARK_MAIN() diff --git a/test/complexity_test.cc b/test/complexity_test.cc new file mode 100644 index 0000000..8ab88f9 --- /dev/null +++ b/test/complexity_test.cc @@ -0,0 +1,297 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/re.h" // NOTE: re.h is for internal use only +#include <cassert> +#include <cstring> +#include <iostream> +#include <sstream> +#include <vector> +#include <utility> +#include <algorithm> +#include <cmath> + +namespace { + +// ========================================================================= // +// -------------------------- Testing Case --------------------------------- // +// ========================================================================= // + +enum MatchRules { + MR_Default, // Skip non-matching lines until a match is found. + MR_Next // Match must occur on the next line. +}; + +struct TestCase { + std::string regex; + int match_rule; + + TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {} + + void Check(std::stringstream& remaining_output) const { + benchmark::Regex r; + std::string err_str; + r.Init(regex, &err_str); + CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\"" + << " got Error: " << err_str; + + std::string line; + while (remaining_output.eof() == false) { + CHECK(remaining_output.good()); + std::getline(remaining_output, line); + if (r.Match(line)) return; + CHECK(match_rule != MR_Next) << "Expected line \"" << line + << "\" to match regex \"" << regex << "\""; + } + + CHECK(remaining_output.eof() == false) + << "End of output reached before match for regex \"" << regex + << "\" was found"; + } +}; + +std::vector<TestCase> ConsoleOutputTests; +std::vector<TestCase> JSONOutputTests; +std::vector<TestCase> CSVOutputTests; + +// ========================================================================= // +// -------------------------- Test Helpers --------------------------------- // +// ========================================================================= // + +class TestReporter : public benchmark::BenchmarkReporter { +public: + TestReporter(std::vector<benchmark::BenchmarkReporter*> reps) + : reporters_(reps) {} + + virtual bool ReportContext(const Context& context) { + bool last_ret = false; + bool first = true; + for (auto rep : reporters_) { + bool new_ret = rep->ReportContext(context); + CHECK(first || new_ret == last_ret) + << "Reports return different values for ReportContext"; + first = false; + last_ret = new_ret; + } + return last_ret; + } + + virtual void ReportRuns(const std::vector<Run>& report) { + for (auto rep : reporters_) + rep->ReportRuns(report); + } + + virtual void Finalize() { + for (auto rep : reporters_) + rep->Finalize(); + } + +private: + std::vector<benchmark::BenchmarkReporter*> reporters_; +}; + + +#define CONCAT2(x, y) x##y +#define CONCAT(x, y) CONCAT2(x, y) + +#define ADD_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +int AddCases(std::vector<TestCase>* out, std::initializer_list<TestCase> const& v) { + for (auto const& TC : v) + out->push_back(TC); + return 0; +} + +template <class First> +std::string join(First f) { return f; } + +template <class First, class ...Args> +std::string join(First f, Args&&... args) { + return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...); +} + +std::string dec_re = "[0-9]+\\.[0-9]+"; + +#define ADD_COMPLEXITY_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__) + +int AddComplexityTest(std::vector<TestCase>* console_out, std::vector<TestCase>* json_out, + std::vector<TestCase>* csv_out, std::string big_o_test_name, + std::string rms_test_name, std::string big_o) { + std::string big_o_str = dec_re + " " + big_o; + AddCases(console_out, { + {join("^" + big_o_test_name + "", big_o_str, big_o_str) + "[ ]*$"}, + {join("^" + rms_test_name + "", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"} + }); + AddCases(json_out, { + {"\"name\": \"" + big_o_test_name + "\",$"}, + {"\"cpu_coefficient\": [0-9]+,$", MR_Next}, + {"\"real_coefficient\": [0-9]{1,5},$", MR_Next}, + {"\"big_o\": \"" + big_o + "\",$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next}, + {"\"name\": \"" + rms_test_name + "\",$"}, + {"\"rms\": [0-9]+%$", MR_Next}, + {"}", MR_Next} + }); + AddCases(csv_out, { + {"^\"" + big_o_test_name + "\",," + dec_re + "," + dec_re + "," + big_o + ",,,,,$"}, + {"^\"" + rms_test_name + "\",," + dec_re + "," + dec_re + ",,,,,,$"} + }); + return 0; +} + +} // end namespace + +// ========================================================================= // +// --------------------------- Testing BigO O(1) --------------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + while (state.KeepRunning()) { + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1); +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity([](int){return 1.0; }); +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(); + +const char* big_o_1_test_name = "BM_Complexity_O1_BigO"; +const char* rms_o_1_test_name = "BM_Complexity_O1_RMS"; +const char* enum_auto_big_o_1 = "\\([0-9]+\\)"; +const char* lambda_big_o_1 = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_1_test_name, rms_o_1_test_name, enum_auto_big_o_1); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_1_test_name, rms_o_1_test_name, lambda_big_o_1); + +// ========================================================================= // +// --------------------------- Testing BigO O(N) --------------------------- // +// ========================================================================= // + +std::vector<int> ConstructRandomVector(int size) { + std::vector<int> v; + v.reserve(size); + for (int i = 0; i < size; ++i) { + v.push_back(rand() % size); + } + return v; +} + +void BM_Complexity_O_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector) + while (state.KeepRunning()) { + benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN); +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) -> double{return n; }); +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(); + +const char* big_o_n_test_name = "BM_Complexity_O_N_BigO"; +const char* rms_o_n_test_name = "BM_Complexity_O_N_RMS"; +const char* enum_auto_big_o_n = "N"; +const char* lambda_big_o_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_test_name, rms_o_n_test_name, enum_auto_big_o_n); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_test_name, rms_o_n_test_name, lambda_big_o_n); + +// ========================================================================= // +// ------------------------- Testing BigO O(N*lgN) ------------------------- // +// ========================================================================= // + +static void BM_Complexity_O_N_log_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + while (state.KeepRunning()) { + std::sort(v.begin(), v.end()); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN); +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity([](int n) {return n * std::log2(n); }); +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(); + +const char* big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_BigO"; +const char* rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N_RMS"; +const char* enum_auto_big_o_n_lg_n = "NlgN"; +const char* lambda_big_o_n_lg_n = "f\\(N\\)"; + +// Add enum tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, enum_auto_big_o_n_lg_n); + +// Add lambda tests +ADD_COMPLEXITY_CASES(&ConsoleOutputTests, &JSONOutputTests, &CSVOutputTests, + big_o_n_lg_n_test_name, rms_o_n_lg_n_test_name, lambda_big_o_n_lg_n); + + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + + +int main(int argc, char* argv[]) { + // Add --color_print=false to argv since we don't want to match color codes. + char new_arg[64]; + char* new_argv[64]; + std::copy(argv, argv + argc, new_argv); + new_argv[argc++] = std::strcpy(new_arg, "--color_print=false"); + benchmark::Initialize(&argc, new_argv); + + benchmark::ConsoleReporter CR; + benchmark::JSONReporter JR; + benchmark::CSVReporter CSVR; + struct ReporterTest { + const char* name; + std::vector<TestCase>& output_cases; + benchmark::BenchmarkReporter& reporter; + std::stringstream out_stream; + std::stringstream err_stream; + + ReporterTest(const char* n, + std::vector<TestCase>& out_tc, + benchmark::BenchmarkReporter& br) + : name(n), output_cases(out_tc), reporter(br) { + reporter.SetOutputStream(&out_stream); + reporter.SetErrorStream(&err_stream); + } + } TestCases[] = { + {"ConsoleReporter", ConsoleOutputTests, CR}, + {"JSONReporter", JSONOutputTests, JR}, + {"CSVReporter", CSVOutputTests, CSVR} + }; + + // Create the test reporter and run the benchmarks. + std::cout << "Running benchmarks...\n"; + TestReporter test_rep({&CR, &JR, &CSVR}); + benchmark::RunSpecifiedBenchmarks(&test_rep); + + for (auto& rep_test : TestCases) { + std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string banner(msg.size() - 1, '-'); + std::cout << banner << msg << banner << "\n"; + + std::cerr << rep_test.err_stream.str(); + std::cout << rep_test.out_stream.str(); + + for (const auto& TC : rep_test.output_cases) + TC.Check(rep_test.out_stream); + + std::cout << "\n"; + } + return 0; +} + diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc new file mode 100644 index 0000000..60fa3b1 --- /dev/null +++ b/test/diagnostics_test.cc @@ -0,0 +1,61 @@ +// Testing: +// State::PauseTiming() +// State::ResumeTiming() +// Test that CHECK's within these function diagnose when they are called +// outside of the KeepRunning() loop. +// +// NOTE: Users should NOT include or use src/check.h. This is only done in +// order to test library internals. + +#include "benchmark/benchmark_api.h" +#include "../src/check.h" +#include <stdexcept> +#include <cstdlib> + +#if defined(__GNUC__) && !defined(__EXCEPTIONS) +#define TEST_HAS_NO_EXCEPTIONS +#endif + +void TestHandler() { +#ifndef TEST_HAS_NO_EXCEPTIONS + throw std::logic_error(""); +#else + std::abort(); +#endif +} + +void try_invalid_pause_resume(benchmark::State& state) { +#if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS) + try { + state.PauseTiming(); + std::abort(); + } catch (std::logic_error const&) {} + try { + state.ResumeTiming(); + std::abort(); + } catch (std::logic_error const&) {} +#else + (void)state; // avoid unused warning +#endif +} + +void BM_diagnostic_test(benchmark::State& state) { + static bool called_once = false; + + if (called_once == false) try_invalid_pause_resume(state); + + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } + + if (called_once == false) try_invalid_pause_resume(state); + + called_once = true; +} +BENCHMARK(BM_diagnostic_test); + +int main(int argc, char** argv) { + benchmark::internal::GetAbortHandler() = &TestHandler; + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc new file mode 100644 index 0000000..e4453fb --- /dev/null +++ b/test/donotoptimize_test.cc @@ -0,0 +1,36 @@ +#include "benchmark/benchmark.h" + +#include <cstdint> + +namespace { +#if defined(__GNUC__) + std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const)); +#endif + std::uint64_t double_up(const std::uint64_t x) { + return x * 2; + } +} + +int main(int, char*[]) { + + // this test verifies compilation of DoNotOptimize() for some types + + char buffer8[8]; + benchmark::DoNotOptimize(buffer8); + + char buffer20[20]; + benchmark::DoNotOptimize(buffer20); + + char buffer1024[1024]; + benchmark::DoNotOptimize(buffer1024); + benchmark::DoNotOptimize(&buffer1024[0]); + + int x = 123; + benchmark::DoNotOptimize(x); + benchmark::DoNotOptimize(&x); + benchmark::DoNotOptimize(x += 42); + + benchmark::DoNotOptimize(double_up(x)); + + return 0; +} diff --git a/test/filter_test.cc b/test/filter_test.cc index 2a278ff..0ba4071 100644 --- a/test/filter_test.cc +++ b/test/filter_test.cc @@ -68,24 +68,38 @@ BENCHMARK(BM_FooBa); -int main(int argc, char* argv[]) { +int main(int argc, char** argv) { + bool list_only = false; + for (int i=0; i < argc; ++i) + list_only |= std::string(argv[i]).find("--benchmark_list_tests") != std::string::npos; + benchmark::Initialize(&argc, argv); TestReporter test_reporter; - benchmark::RunSpecifiedBenchmarks(&test_reporter); + const size_t returned_count = benchmark::RunSpecifiedBenchmarks(&test_reporter); if (argc == 2) { // Make sure we ran all of the tests std::stringstream ss(argv[1]); - size_t expected; - ss >> expected; + size_t expected_return; + ss >> expected_return; - const size_t count = test_reporter.GetCount(); - if (count != expected) { - std::cerr << "ERROR: Expected " << expected << " tests to be ran but only " - << count << " completed" << std::endl; + if (returned_count != expected_return) { + std::cerr << "ERROR: Expected " << expected_return + << " tests to match the filter but returned_count = " + << returned_count << std::endl; + return -1; + } + + const size_t expected_reports = list_only ? 0 : expected_return; + const size_t reports_count = test_reporter.GetCount(); + if (reports_count != expected_reports) { + std::cerr << "ERROR: Expected " << expected_reports + << " tests to be run but reported_count = " << reports_count + << std::endl; return -1; } } + return 0; } diff --git a/test/fixture_test.cc b/test/fixture_test.cc index 92fbc4c..bf800fd 100644 --- a/test/fixture_test.cc +++ b/test/fixture_test.cc @@ -6,14 +6,18 @@ class MyFixture : public ::benchmark::Fixture { public: - void SetUp(const ::benchmark::State&) { - assert(data.get() == nullptr); - data.reset(new int(42)); + void SetUp(const ::benchmark::State& state) { + if (state.thread_index == 0) { + assert(data.get() == nullptr); + data.reset(new int(42)); + } } - void TearDown() { - assert(data.get() != nullptr); - data.release(); + void TearDown(const ::benchmark::State& state) { + if (state.thread_index == 0) { + assert(data.get() != nullptr); + data.reset(); + } } ~MyFixture() { @@ -32,10 +36,17 @@ BENCHMARK_F(MyFixture, Foo)(benchmark::State& st) { } BENCHMARK_DEFINE_F(MyFixture, Bar)(benchmark::State& st) { + if (st.thread_index == 0) { + assert(data.get() != nullptr); + assert(*data == 42); + } while (st.KeepRunning()) { + assert(data.get() != nullptr); + assert(*data == 42); } st.SetItemsProcessed(st.range_x()); } BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42); +BENCHMARK_REGISTER_F(MyFixture, Bar)->Arg(42)->ThreadPerCpu(); BENCHMARK_MAIN() diff --git a/test/map_test.cc b/test/map_test.cc index 8d5f6ec..5eccf8d 100644 --- a/test/map_test.cc +++ b/test/map_test.cc @@ -1,5 +1,6 @@ #include "benchmark/benchmark.h" +#include <cstdlib> #include <map> namespace { @@ -36,7 +37,7 @@ class MapFixture : public ::benchmark::Fixture { m = ConstructRandomMap(st.range_x()); } - void TearDown() { + void TearDown(const ::benchmark::State&) { m.clear(); } diff --git a/test/options_test.cc b/test/options_test.cc index d4c682d..78cedae 100644 --- a/test/options_test.cc +++ b/test/options_test.cc @@ -1,12 +1,29 @@ #include "benchmark/benchmark_api.h" +#include <chrono> +#include <thread> + void BM_basic(benchmark::State& state) { while (state.KeepRunning()) { } } + +void BM_basic_slow(benchmark::State& state) { + std::chrono::milliseconds sleep_duration(state.range_x()); + while (state.KeepRunning()) { + std::this_thread::sleep_for( + std::chrono::duration_cast<std::chrono::nanoseconds>(sleep_duration) + ); + } +} + BENCHMARK(BM_basic); BENCHMARK(BM_basic)->Arg(42); +BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond); +BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond); BENCHMARK(BM_basic)->Range(1, 8); +BENCHMARK(BM_basic)->RangeMultiplier(2)->Range(1, 8); BENCHMARK(BM_basic)->DenseRange(10, 15); BENCHMARK(BM_basic)->ArgPair(42, 42); BENCHMARK(BM_basic)->RangePair(64, 512, 64, 512); @@ -14,6 +31,7 @@ BENCHMARK(BM_basic)->MinTime(0.7); BENCHMARK(BM_basic)->UseRealTime(); BENCHMARK(BM_basic)->ThreadRange(2, 4); BENCHMARK(BM_basic)->ThreadPerCpu(); +BENCHMARK(BM_basic)->Repetitions(3); void CustomArgs(benchmark::internal::Benchmark* b) { for (int i = 0; i < 10; ++i) { diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc new file mode 100644 index 0000000..b3898ac --- /dev/null +++ b/test/reporter_output_test.cc @@ -0,0 +1,259 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include "../src/re.h" // NOTE: re.h is for internal use only +#include <cassert> +#include <cstring> +#include <iostream> +#include <sstream> +#include <vector> +#include <utility> + +namespace { + +// ========================================================================= // +// -------------------------- Testing Case --------------------------------- // +// ========================================================================= // + +enum MatchRules { + MR_Default, // Skip non-matching lines until a match is found. + MR_Next // Match must occur on the next line. +}; + +struct TestCase { + std::string regex; + int match_rule; + + TestCase(std::string re, int rule = MR_Default) : regex(re), match_rule(rule) {} + + void Check(std::stringstream& remaining_output) const { + benchmark::Regex r; + std::string err_str; + r.Init(regex, &err_str); + CHECK(err_str.empty()) << "Could not construct regex \"" << regex << "\"" + << " got Error: " << err_str; + + std::string line; + while (remaining_output.eof() == false) { + CHECK(remaining_output.good()); + std::getline(remaining_output, line); + if (r.Match(line)) return; + CHECK(match_rule != MR_Next) << "Expected line \"" << line + << "\" to match regex \"" << regex << "\""; + } + + CHECK(remaining_output.eof() == false) + << "End of output reached before match for regex \"" << regex + << "\" was found"; + } +}; + +std::vector<TestCase> ConsoleOutputTests; +std::vector<TestCase> JSONOutputTests; +std::vector<TestCase> CSVOutputTests; + +std::vector<TestCase> ConsoleErrorTests; +std::vector<TestCase> JSONErrorTests; +std::vector<TestCase> CSVErrorTests; + +// ========================================================================= // +// -------------------------- Test Helpers --------------------------------- // +// ========================================================================= // + +class TestReporter : public benchmark::BenchmarkReporter { +public: + TestReporter(std::vector<benchmark::BenchmarkReporter*> reps) + : reporters_(reps) {} + + virtual bool ReportContext(const Context& context) { + bool last_ret = false; + bool first = true; + for (auto rep : reporters_) { + bool new_ret = rep->ReportContext(context); + CHECK(first || new_ret == last_ret) + << "Reports return different values for ReportContext"; + first = false; + last_ret = new_ret; + } + return last_ret; + } + + virtual void ReportRuns(const std::vector<Run>& report) { + for (auto rep : reporters_) + rep->ReportRuns(report); + } + + virtual void Finalize() { + for (auto rep : reporters_) + rep->Finalize(); + } + +private: + std::vector<benchmark::BenchmarkReporter*> reporters_; +}; + + +#define CONCAT2(x, y) x##y +#define CONCAT(x, y) CONCAT2(x, y) + +#define ADD_CASES(...) \ + int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +int AddCases(std::vector<TestCase>* out, std::initializer_list<TestCase> const& v) { + for (auto const& TC : v) + out->push_back(TC); + return 0; +} + +template <class First> +std::string join(First f) { return f; } + +template <class First, class ...Args> +std::string join(First f, Args&&... args) { + return std::string(std::move(f)) + "[ ]+" + join(std::forward<Args>(args)...); +} + +std::string dec_re = "[0-9]+\\.[0-9]+"; + +} // end namespace + +// ========================================================================= // +// ---------------------- Testing Prologue Output -------------------------- // +// ========================================================================= // + +ADD_CASES(&ConsoleOutputTests, { + {join("^Benchmark", "Time", "CPU", "Iterations$"), MR_Next}, + {"^[-]+$", MR_Next} +}); +ADD_CASES(&CSVOutputTests, { + {"name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second," + "label,error_occurred,error_message"} +}); + +// ========================================================================= // +// ------------------------ Testing Basic Output --------------------------- // +// ========================================================================= // + +void BM_basic(benchmark::State& state) { + while (state.KeepRunning()) {} +} +BENCHMARK(BM_basic); + +ADD_CASES(&ConsoleOutputTests, { + {"^BM_basic[ ]+[0-9]{1,5} ns[ ]+[0-9]{1,5} ns[ ]+[0-9]+$"} +}); +ADD_CASES(&JSONOutputTests, { + {"\"name\": \"BM_basic\",$"}, + {"\"iterations\": [0-9]+,$", MR_Next}, + {"\"real_time\": [0-9]{1,5},$", MR_Next}, + {"\"cpu_time\": [0-9]{1,5},$", MR_Next}, + {"\"time_unit\": \"ns\"$", MR_Next}, + {"}", MR_Next} +}); +ADD_CASES(&CSVOutputTests, { + {"^\"BM_basic\",[0-9]+," + dec_re + "," + dec_re + ",ns,,,,,$"} +}); + +// ========================================================================= // +// ------------------------ Testing Error Output --------------------------- // +// ========================================================================= // + +void BM_error(benchmark::State& state) { + state.SkipWithError("message"); + while(state.KeepRunning()) {} +} +BENCHMARK(BM_error); +ADD_CASES(&ConsoleOutputTests, { + {"^BM_error[ ]+ERROR OCCURRED: 'message'$"} +}); +ADD_CASES(&JSONOutputTests, { + {"\"name\": \"BM_error\",$"}, + {"\"error_occurred\": true,$", MR_Next}, + {"\"error_message\": \"message\",$", MR_Next} +}); + +ADD_CASES(&CSVOutputTests, { + {"^\"BM_error\",,,,,,,,true,\"message\"$"} +}); + + +// ========================================================================= // +// ----------------------- Testing Complexity Output ----------------------- // +// ========================================================================= // + +void BM_Complexity_O1(benchmark::State& state) { + while (state.KeepRunning()) { + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O1)->Range(1, 1<<18)->Complexity(benchmark::o1); + +std::string bigOStr = "[0-9]+\\.[0-9]+ \\([0-9]+\\)"; + +ADD_CASES(&ConsoleOutputTests, { + {join("^BM_Complexity_O1_BigO", bigOStr, bigOStr) + "[ ]*$"}, + {join("^BM_Complexity_O1_RMS", "[0-9]+ %", "[0-9]+ %") + "[ ]*$"} +}); + + +// ========================================================================= // +// --------------------------- TEST CASES END ------------------------------ // +// ========================================================================= // + + +int main(int argc, char* argv[]) { + // Add --color_print=false to argv since we don't want to match color codes. + char new_arg[64]; + char* new_argv[64]; + std::copy(argv, argv + argc, new_argv); + new_argv[argc++] = std::strcpy(new_arg, "--color_print=false"); + benchmark::Initialize(&argc, new_argv); + + benchmark::ConsoleReporter CR; + benchmark::JSONReporter JR; + benchmark::CSVReporter CSVR; + struct ReporterTest { + const char* name; + std::vector<TestCase>& output_cases; + std::vector<TestCase>& error_cases; + benchmark::BenchmarkReporter& reporter; + std::stringstream out_stream; + std::stringstream err_stream; + + ReporterTest(const char* n, + std::vector<TestCase>& out_tc, + std::vector<TestCase>& err_tc, + benchmark::BenchmarkReporter& br) + : name(n), output_cases(out_tc), error_cases(err_tc), reporter(br) { + reporter.SetOutputStream(&out_stream); + reporter.SetErrorStream(&err_stream); + } + } TestCases[] = { + {"ConsoleReporter", ConsoleOutputTests, ConsoleErrorTests, CR}, + {"JSONReporter", JSONOutputTests, JSONErrorTests, JR}, + {"CSVReporter", CSVOutputTests, CSVErrorTests, CSVR} + }; + + // Create the test reporter and run the benchmarks. + std::cout << "Running benchmarks...\n"; + TestReporter test_rep({&CR, &JR, &CSVR}); + benchmark::RunSpecifiedBenchmarks(&test_rep); + + for (auto& rep_test : TestCases) { + std::string msg = std::string("\nTesting ") + rep_test.name + " Output\n"; + std::string banner(msg.size() - 1, '-'); + std::cout << banner << msg << banner << "\n"; + + std::cerr << rep_test.err_stream.str(); + std::cout << rep_test.out_stream.str(); + + for (const auto& TC : rep_test.error_cases) + TC.Check(rep_test.err_stream); + for (const auto& TC : rep_test.output_cases) + TC.Check(rep_test.out_stream); + + std::cout << "\n"; + } + return 0; +} diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc new file mode 100644 index 0000000..dafbd64 --- /dev/null +++ b/test/skip_with_error_test.cc @@ -0,0 +1,161 @@ + +#undef NDEBUG +#include "benchmark/benchmark.h" +#include "../src/check.h" // NOTE: check.h is for internal use only! +#include <cassert> +#include <vector> + +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector<Run>& report) { + all_runs_.insert(all_runs_.end(), begin(report), end(report)); + ConsoleReporter::ReportRuns(report); + } + + TestReporter() {} + virtual ~TestReporter() {} + + mutable std::vector<Run> all_runs_; +}; + +struct TestCase { + std::string name; + bool error_occurred; + std::string error_message; + + typedef benchmark::BenchmarkReporter::Run Run; + + void CheckRun(Run const& run) const { + CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name; + CHECK(error_occurred == run.error_occurred); + CHECK(error_message == run.error_message); + if (error_occurred) { + //CHECK(run.iterations == 0); + } else { + CHECK(run.iterations != 0); + } + } +}; + +std::vector<TestCase> ExpectedResults; + +int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) { + for (auto TC : v) { + TC.name = base_name + TC.name; + ExpectedResults.push_back(std::move(TC)); + } + return 0; +} + +#define CONCAT(x, y) CONCAT2(x, y) +#define CONCAT2(x, y) x##y +#define ADD_CASES(...) \ +int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__) + +} // end namespace + + +void BM_error_before_running(benchmark::State& state) { + state.SkipWithError("error message"); + while (state.KeepRunning()) { + assert(false); + } +} +BENCHMARK(BM_error_before_running); +ADD_CASES("BM_error_before_running", + {{"", true, "error message"}}); + +void BM_error_during_running(benchmark::State& state) { + int first_iter = true; + while (state.KeepRunning()) { + if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_during_running", + {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}} +); + +void BM_error_after_running(benchmark::State& state) { + while (state.KeepRunning()) { + benchmark::DoNotOptimize(state.iterations()); + } + if (state.thread_index <= (state.threads / 2)) + state.SkipWithError("error message"); +} +BENCHMARK(BM_error_after_running)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_after_running", + {{"/threads:1", true, "error message"}, + {"/threads:2", true, "error message"}, + {"/threads:4", true, "error message"}, + {"/threads:8", true, "error message"}} +); + +void BM_error_while_paused(benchmark::State& state) { + bool first_iter = true; + while (state.KeepRunning()) { + if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) { + assert(first_iter); + first_iter = false; + state.PauseTiming(); + state.SkipWithError("error message"); + } else { + state.PauseTiming(); + state.ResumeTiming(); + } + } +} +BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8); +ADD_CASES( + "BM_error_while_paused", + {{"/1/threads:1", true, "error message"}, + {"/1/threads:2", true, "error message"}, + {"/1/threads:4", true, "error message"}, + {"/1/threads:8", true, "error message"}, + {"/2/threads:1", false, ""}, + {"/2/threads:2", false, ""}, + {"/2/threads:4", false, ""}, + {"/2/threads:8", false, ""}} +); + + +int main(int argc, char* argv[]) { + benchmark::Initialize(&argc, argv); + + TestReporter test_reporter; + benchmark::RunSpecifiedBenchmarks(&test_reporter); + + typedef benchmark::BenchmarkReporter::Run Run; + auto EB = ExpectedResults.begin(); + + for (Run const& run : test_reporter.all_runs_) { + assert(EB != ExpectedResults.end()); + EB->CheckRun(run); + ++EB; + } + assert(EB == ExpectedResults.end()); + + return 0; +} |