diff options
author | Marat Dukhan <maratek@gmail.com> | 2017-02-12 01:32:06 -0500 |
---|---|---|
committer | Marat Dukhan <maratek@gmail.com> | 2017-02-12 01:32:06 -0500 |
commit | 967a027102f1ba35bb5abcacf6d2591b7666dda6 (patch) | |
tree | ff05d9ea2ce3cd3b510e1c923f6b8f196b2f315d | |
parent | 5e42a78ef71524df14d04ec22a5b37ba683fc6d0 (diff) | |
download | FXdiv-967a027102f1ba35bb5abcacf6d2591b7666dda6.tar.gz |
Full set of benchmarks
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | bench/divide.cc | 51 | ||||
-rw-r--r-- | bench/init.cc | 23 | ||||
-rw-r--r-- | bench/multiply.cc | 47 | ||||
-rw-r--r-- | bench/quotient.cc | 47 | ||||
-rw-r--r-- | bench/round-down.cc | 47 | ||||
-rwxr-xr-x | configure.py | 13 | ||||
-rw-r--r-- | confu.yaml | 1 | ||||
-rw-r--r-- | test/multiply-high.cc (renamed from test/MultiplyHigh.cc) | 0 | ||||
-rw-r--r-- | test/quotient.cc (renamed from test/Quotient.cc) | 0 |
10 files changed, 229 insertions, 4 deletions
@@ -9,7 +9,7 @@ On modern CPUs and GPUs integer division is several times slower than multiplica - Header-only library, no installation or build required - Compatible with C99, C++, OpenCL, and CUDA - Uses platform-specific compiler intrinsics for optimal performance -- Fully covered with unit tests (Google Test framework) +- Covered with unit tests and microbenchmarks ## Example @@ -45,6 +45,8 @@ Project is in alpha stage. API is unstable. Currently working features: | x86 MSVC | Works | Works | Works | | ARMv7 gcc | Works | Works | Works | | PPC64 gcc | Works | Works | Works | +| PNaCl clang | Works | Works | Works | +| Asm.js clang | Works | Works | Works | | CUDA | Untested | Untested | Untested | | OpenCL | Untested | Untested | Untested | diff --git a/bench/divide.cc b/bench/divide.cc new file mode 100644 index 0000000..78aa38c --- /dev/null +++ b/bench/divide.cc @@ -0,0 +1,51 @@ +#include <benchmark/benchmark.h> + +#include <fxdiv.h> + +static void fxdiv_divide_uint32_t(benchmark::State& state) { + const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537)); + uint32_t x = 0; + while (state.KeepRunning()) { + const fxdiv_result_uint32_t result = fxdiv_divide_uint32_t(x++, divisor); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK(fxdiv_divide_uint32_t); + +static void fxdiv_divide_uint64_t(benchmark::State& state) { + const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311)); + uint64_t x = 0; + while (state.KeepRunning()) { + const fxdiv_result_uint64_t result = fxdiv_divide_uint64_t(x++, divisor); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK(fxdiv_divide_uint64_t); + +static void native_divide_uint32_t(benchmark::State& state) { + uint32_t divisor = UINT32_C(65537); + benchmark::DoNotOptimize(&divisor); + uint32_t x = 0; + while (state.KeepRunning()) { + const uint32_t quotient = x / divisor; + const uint32_t remainder = x++ % divisor; + benchmark::DoNotOptimize(quotient); + benchmark::DoNotOptimize(remainder); + } +} +BENCHMARK(native_divide_uint32_t); + +static void native_divide_uint64_t(benchmark::State& state) { + uint64_t divisor = UINT64_C(4294967311); + benchmark::DoNotOptimize(&divisor); + uint64_t x = 0; + while (state.KeepRunning()) { + const uint64_t quotient = x / divisor; + const uint64_t remainder = x++ % divisor; + benchmark::DoNotOptimize(quotient); + benchmark::DoNotOptimize(remainder); + } +} +BENCHMARK(native_divide_uint64_t); + +BENCHMARK_MAIN(); diff --git a/bench/init.cc b/bench/init.cc new file mode 100644 index 0000000..060db49 --- /dev/null +++ b/bench/init.cc @@ -0,0 +1,23 @@ +#include <benchmark/benchmark.h> + +#include <fxdiv.h> + +static void init_uint32_t(benchmark::State& state) { + uint32_t d = UINT32_C(0x1971DB6B); + while (state.KeepRunning()) { + const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(d++); + benchmark::DoNotOptimize(divisor); + } +} +BENCHMARK(init_uint32_t); + +static void init_uint64_t(benchmark::State& state) { + uint64_t d = UINT64_C(0x425E892B38148FAD); + while (state.KeepRunning()) { + const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(d++); + benchmark::DoNotOptimize(divisor); + } +} +BENCHMARK(init_uint64_t); + +BENCHMARK_MAIN(); diff --git a/bench/multiply.cc b/bench/multiply.cc new file mode 100644 index 0000000..c9757a4 --- /dev/null +++ b/bench/multiply.cc @@ -0,0 +1,47 @@ +#include <benchmark/benchmark.h> + +#include <fxdiv.h> + +static void fxdiv_mulext_uint32_t(benchmark::State& state) { + uint32_t c = UINT32_C(0x1971DB6B); + benchmark::DoNotOptimize(&c); + uint32_t d = c; + while (state.KeepRunning()) { + const uint64_t product = fxdiv_mulext_uint32_t(c, d++); + benchmark::DoNotOptimize(product); + } +} +BENCHMARK(fxdiv_mulext_uint32_t); + +static void native_mulext_uint32_t(benchmark::State& state) { + uint32_t c = UINT32_C(0x1971DB6B); + benchmark::DoNotOptimize(&c); + uint32_t d = c; + while (state.KeepRunning()) { + const uint64_t product = (uint64_t) c * (uint64_t) (d++); + benchmark::DoNotOptimize(product); + } +} +BENCHMARK(native_mulext_uint32_t); + +static void fxdiv_mulhi_uint32_t(benchmark::State& state) { + const uint32_t c = UINT32_C(0x1971DB6B); + uint32_t x = c; + while (state.KeepRunning()) { + const uint32_t product = fxdiv_mulhi_uint32_t(c, x++); + benchmark::DoNotOptimize(product); + } +} +BENCHMARK(fxdiv_mulhi_uint32_t); + +static void fxdiv_mulhi_uint64_t(benchmark::State& state) { + const uint64_t c = UINT64_C(0x425E892B38148FAD); + uint64_t x = c; + while (state.KeepRunning()) { + const uint64_t product = fxdiv_mulhi_uint64_t(c, x++); + benchmark::DoNotOptimize(product); + } +} +BENCHMARK(fxdiv_mulhi_uint64_t); + +BENCHMARK_MAIN(); diff --git a/bench/quotient.cc b/bench/quotient.cc new file mode 100644 index 0000000..374ac2b --- /dev/null +++ b/bench/quotient.cc @@ -0,0 +1,47 @@ +#include <benchmark/benchmark.h> + +#include <fxdiv.h> + +static void fxdiv_quotient_uint32_t(benchmark::State& state) { + const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537)); + uint32_t x = 0; + while (state.KeepRunning()) { + uint32_t quotient = fxdiv_quotient_uint32_t(x++, divisor); + benchmark::DoNotOptimize(quotient); + } +} +BENCHMARK(fxdiv_quotient_uint32_t); + +static void fxdiv_quotient_uint64_t(benchmark::State& state) { + const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311)); + uint64_t x = 0; + while (state.KeepRunning()) { + uint64_t quotient = fxdiv_quotient_uint64_t(x++, divisor); + benchmark::DoNotOptimize(quotient); + } +} +BENCHMARK(fxdiv_quotient_uint64_t); + +static void native_quotient_uint32_t(benchmark::State& state) { + uint32_t divisor = UINT32_C(65537); + benchmark::DoNotOptimize(&divisor); + uint32_t x = UINT32_MAX; + while (state.KeepRunning()) { + uint32_t quotient = x-- / divisor; + benchmark::DoNotOptimize(quotient); + } +} +BENCHMARK(native_quotient_uint32_t); + +static void native_quotient_uint64_t(benchmark::State& state) { + uint64_t divisor = UINT64_C(4294967311); + benchmark::DoNotOptimize(&divisor); + uint64_t x = UINT64_MAX; + while (state.KeepRunning()) { + const uint64_t quotient = x-- / divisor; + benchmark::DoNotOptimize(quotient); + } +} +BENCHMARK(native_quotient_uint64_t); + +BENCHMARK_MAIN(); diff --git a/bench/round-down.cc b/bench/round-down.cc new file mode 100644 index 0000000..860ce11 --- /dev/null +++ b/bench/round-down.cc @@ -0,0 +1,47 @@ +#include <benchmark/benchmark.h> + +#include <fxdiv.h> + +static void fxdiv_round_down_uint32_t(benchmark::State& state) { + const fxdiv_divisor_uint32_t multiple = fxdiv_init_uint32_t(UINT32_C(65537)); + uint32_t x = 0; + while (state.KeepRunning()) { + const uint32_t rounded_x = fxdiv_round_down_uint32_t(x++, multiple); + benchmark::DoNotOptimize(rounded_x); + } +} +BENCHMARK(fxdiv_round_down_uint32_t); + +static void fxdiv_round_down_uint64_t(benchmark::State& state) { + const fxdiv_divisor_uint64_t multiple = fxdiv_init_uint64_t(UINT64_C(4294967311)); + uint64_t x = 0; + while (state.KeepRunning()) { + const uint64_t rounded_x = fxdiv_round_down_uint64_t(x++, multiple); + benchmark::DoNotOptimize(rounded_x); + } +} +BENCHMARK(fxdiv_round_down_uint64_t); + +static void native_round_down_uint32_t(benchmark::State& state) { + uint32_t multiple = UINT32_C(65537); + benchmark::DoNotOptimize(&multiple); + uint32_t x = 0; + while (state.KeepRunning()) { + const uint32_t rounded_x = x++ / multiple * multiple; + benchmark::DoNotOptimize(rounded_x); + } +} +BENCHMARK(native_round_down_uint32_t); + +static void native_round_down_uint64_t(benchmark::State& state) { + uint64_t multiple = UINT64_C(4294967311); + benchmark::DoNotOptimize(&multiple); + uint64_t x = 0; + while (state.KeepRunning()) { + const uint64_t rounded_x = x++ / multiple * multiple; + benchmark::DoNotOptimize(rounded_x); + } +} +BENCHMARK(native_round_down_uint64_t); + +BENCHMARK_MAIN(); diff --git a/configure.py b/configure.py index 4c415bf..e61363f 100755 --- a/configure.py +++ b/configure.py @@ -11,9 +11,16 @@ def main(args): build.export_cpath("include", ["fxdiv.h"]) - with build.options(source_dir="test", deps=[build.deps.googletest]): - build.unittest("MultiplyHighTest", build.cxx("MultiplyHigh.cc")) - build.unittest("QuotientTest", build.cxx("Quotient.cc")) + with build.options(source_dir="test", deps=build.deps.googletest): + build.unittest("multiply-high-test", build.cxx("multiply-high.cc")) + build.unittest("quotient-test", build.cxx("quotient.cc")) + + with build.options(source_dir="bench", deps=build.deps.googlebenchmark): + build.benchmark("init-bench", build.cxx("init.cc")) + build.benchmark("multiply-bench", build.cxx("multiply.cc")) + build.benchmark("divide-bench", build.cxx("divide.cc")) + build.benchmark("quotient-bench", build.cxx("quotient.cc")) + build.benchmark("round-down-bench", build.cxx("round-down.cc")) return build @@ -3,3 +3,4 @@ title: division via fixed-point multiplication by inverse license: MIT deps: - name: googletest + - name: googlebenchmark diff --git a/test/MultiplyHigh.cc b/test/multiply-high.cc index 10f9013..10f9013 100644 --- a/test/MultiplyHigh.cc +++ b/test/multiply-high.cc diff --git a/test/Quotient.cc b/test/quotient.cc index ad5cd8e..ad5cd8e 100644 --- a/test/Quotient.cc +++ b/test/quotient.cc |