aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@gmail.com>2017-02-12 01:32:06 -0500
committerMarat Dukhan <maratek@gmail.com>2017-02-12 01:32:06 -0500
commit967a027102f1ba35bb5abcacf6d2591b7666dda6 (patch)
treeff05d9ea2ce3cd3b510e1c923f6b8f196b2f315d
parent5e42a78ef71524df14d04ec22a5b37ba683fc6d0 (diff)
downloadFXdiv-967a027102f1ba35bb5abcacf6d2591b7666dda6.tar.gz
Full set of benchmarks
-rw-r--r--README.md4
-rw-r--r--bench/divide.cc51
-rw-r--r--bench/init.cc23
-rw-r--r--bench/multiply.cc47
-rw-r--r--bench/quotient.cc47
-rw-r--r--bench/round-down.cc47
-rwxr-xr-xconfigure.py13
-rw-r--r--confu.yaml1
-rw-r--r--test/multiply-high.cc (renamed from test/MultiplyHigh.cc)0
-rw-r--r--test/quotient.cc (renamed from test/Quotient.cc)0
10 files changed, 229 insertions, 4 deletions
diff --git a/README.md b/README.md
index 8144bfc..d0493a4 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ On modern CPUs and GPUs integer division is several times slower than multiplica
- Header-only library, no installation or build required
- Compatible with C99, C++, OpenCL, and CUDA
- Uses platform-specific compiler intrinsics for optimal performance
-- Fully covered with unit tests (Google Test framework)
+- Covered with unit tests and microbenchmarks
## Example
@@ -45,6 +45,8 @@ Project is in alpha stage. API is unstable. Currently working features:
| x86 MSVC | Works | Works | Works |
| ARMv7 gcc | Works | Works | Works |
| PPC64 gcc | Works | Works | Works |
+| PNaCl clang | Works | Works | Works |
+| Asm.js clang | Works | Works | Works |
| CUDA | Untested | Untested | Untested |
| OpenCL | Untested | Untested | Untested |
diff --git a/bench/divide.cc b/bench/divide.cc
new file mode 100644
index 0000000..78aa38c
--- /dev/null
+++ b/bench/divide.cc
@@ -0,0 +1,51 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_divide_uint32_t(benchmark::State& state) {
+ const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537));
+ uint32_t x = 0;
+ while (state.KeepRunning()) {
+ const fxdiv_result_uint32_t result = fxdiv_divide_uint32_t(x++, divisor);
+ benchmark::DoNotOptimize(result);
+ }
+}
+BENCHMARK(fxdiv_divide_uint32_t);
+
+static void fxdiv_divide_uint64_t(benchmark::State& state) {
+ const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311));
+ uint64_t x = 0;
+ while (state.KeepRunning()) {
+ const fxdiv_result_uint64_t result = fxdiv_divide_uint64_t(x++, divisor);
+ benchmark::DoNotOptimize(result);
+ }
+}
+BENCHMARK(fxdiv_divide_uint64_t);
+
+static void native_divide_uint32_t(benchmark::State& state) {
+ uint32_t divisor = UINT32_C(65537);
+ benchmark::DoNotOptimize(&divisor);
+ uint32_t x = 0;
+ while (state.KeepRunning()) {
+ const uint32_t quotient = x / divisor;
+ const uint32_t remainder = x++ % divisor;
+ benchmark::DoNotOptimize(quotient);
+ benchmark::DoNotOptimize(remainder);
+ }
+}
+BENCHMARK(native_divide_uint32_t);
+
+static void native_divide_uint64_t(benchmark::State& state) {
+ uint64_t divisor = UINT64_C(4294967311);
+ benchmark::DoNotOptimize(&divisor);
+ uint64_t x = 0;
+ while (state.KeepRunning()) {
+ const uint64_t quotient = x / divisor;
+ const uint64_t remainder = x++ % divisor;
+ benchmark::DoNotOptimize(quotient);
+ benchmark::DoNotOptimize(remainder);
+ }
+}
+BENCHMARK(native_divide_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/init.cc b/bench/init.cc
new file mode 100644
index 0000000..060db49
--- /dev/null
+++ b/bench/init.cc
@@ -0,0 +1,23 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void init_uint32_t(benchmark::State& state) {
+ uint32_t d = UINT32_C(0x1971DB6B);
+ while (state.KeepRunning()) {
+ const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(d++);
+ benchmark::DoNotOptimize(divisor);
+ }
+}
+BENCHMARK(init_uint32_t);
+
+static void init_uint64_t(benchmark::State& state) {
+ uint64_t d = UINT64_C(0x425E892B38148FAD);
+ while (state.KeepRunning()) {
+ const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(d++);
+ benchmark::DoNotOptimize(divisor);
+ }
+}
+BENCHMARK(init_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/multiply.cc b/bench/multiply.cc
new file mode 100644
index 0000000..c9757a4
--- /dev/null
+++ b/bench/multiply.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_mulext_uint32_t(benchmark::State& state) {
+ uint32_t c = UINT32_C(0x1971DB6B);
+ benchmark::DoNotOptimize(&c);
+ uint32_t d = c;
+ while (state.KeepRunning()) {
+ const uint64_t product = fxdiv_mulext_uint32_t(c, d++);
+ benchmark::DoNotOptimize(product);
+ }
+}
+BENCHMARK(fxdiv_mulext_uint32_t);
+
+static void native_mulext_uint32_t(benchmark::State& state) {
+ uint32_t c = UINT32_C(0x1971DB6B);
+ benchmark::DoNotOptimize(&c);
+ uint32_t d = c;
+ while (state.KeepRunning()) {
+ const uint64_t product = (uint64_t) c * (uint64_t) (d++);
+ benchmark::DoNotOptimize(product);
+ }
+}
+BENCHMARK(native_mulext_uint32_t);
+
+static void fxdiv_mulhi_uint32_t(benchmark::State& state) {
+ const uint32_t c = UINT32_C(0x1971DB6B);
+ uint32_t x = c;
+ while (state.KeepRunning()) {
+ const uint32_t product = fxdiv_mulhi_uint32_t(c, x++);
+ benchmark::DoNotOptimize(product);
+ }
+}
+BENCHMARK(fxdiv_mulhi_uint32_t);
+
+static void fxdiv_mulhi_uint64_t(benchmark::State& state) {
+ const uint64_t c = UINT64_C(0x425E892B38148FAD);
+ uint64_t x = c;
+ while (state.KeepRunning()) {
+ const uint64_t product = fxdiv_mulhi_uint64_t(c, x++);
+ benchmark::DoNotOptimize(product);
+ }
+}
+BENCHMARK(fxdiv_mulhi_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/quotient.cc b/bench/quotient.cc
new file mode 100644
index 0000000..374ac2b
--- /dev/null
+++ b/bench/quotient.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_quotient_uint32_t(benchmark::State& state) {
+ const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537));
+ uint32_t x = 0;
+ while (state.KeepRunning()) {
+ uint32_t quotient = fxdiv_quotient_uint32_t(x++, divisor);
+ benchmark::DoNotOptimize(quotient);
+ }
+}
+BENCHMARK(fxdiv_quotient_uint32_t);
+
+static void fxdiv_quotient_uint64_t(benchmark::State& state) {
+ const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311));
+ uint64_t x = 0;
+ while (state.KeepRunning()) {
+ uint64_t quotient = fxdiv_quotient_uint64_t(x++, divisor);
+ benchmark::DoNotOptimize(quotient);
+ }
+}
+BENCHMARK(fxdiv_quotient_uint64_t);
+
+static void native_quotient_uint32_t(benchmark::State& state) {
+ uint32_t divisor = UINT32_C(65537);
+ benchmark::DoNotOptimize(&divisor);
+ uint32_t x = UINT32_MAX;
+ while (state.KeepRunning()) {
+ uint32_t quotient = x-- / divisor;
+ benchmark::DoNotOptimize(quotient);
+ }
+}
+BENCHMARK(native_quotient_uint32_t);
+
+static void native_quotient_uint64_t(benchmark::State& state) {
+ uint64_t divisor = UINT64_C(4294967311);
+ benchmark::DoNotOptimize(&divisor);
+ uint64_t x = UINT64_MAX;
+ while (state.KeepRunning()) {
+ const uint64_t quotient = x-- / divisor;
+ benchmark::DoNotOptimize(quotient);
+ }
+}
+BENCHMARK(native_quotient_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/round-down.cc b/bench/round-down.cc
new file mode 100644
index 0000000..860ce11
--- /dev/null
+++ b/bench/round-down.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_round_down_uint32_t(benchmark::State& state) {
+ const fxdiv_divisor_uint32_t multiple = fxdiv_init_uint32_t(UINT32_C(65537));
+ uint32_t x = 0;
+ while (state.KeepRunning()) {
+ const uint32_t rounded_x = fxdiv_round_down_uint32_t(x++, multiple);
+ benchmark::DoNotOptimize(rounded_x);
+ }
+}
+BENCHMARK(fxdiv_round_down_uint32_t);
+
+static void fxdiv_round_down_uint64_t(benchmark::State& state) {
+ const fxdiv_divisor_uint64_t multiple = fxdiv_init_uint64_t(UINT64_C(4294967311));
+ uint64_t x = 0;
+ while (state.KeepRunning()) {
+ const uint64_t rounded_x = fxdiv_round_down_uint64_t(x++, multiple);
+ benchmark::DoNotOptimize(rounded_x);
+ }
+}
+BENCHMARK(fxdiv_round_down_uint64_t);
+
+static void native_round_down_uint32_t(benchmark::State& state) {
+ uint32_t multiple = UINT32_C(65537);
+ benchmark::DoNotOptimize(&multiple);
+ uint32_t x = 0;
+ while (state.KeepRunning()) {
+ const uint32_t rounded_x = x++ / multiple * multiple;
+ benchmark::DoNotOptimize(rounded_x);
+ }
+}
+BENCHMARK(native_round_down_uint32_t);
+
+static void native_round_down_uint64_t(benchmark::State& state) {
+ uint64_t multiple = UINT64_C(4294967311);
+ benchmark::DoNotOptimize(&multiple);
+ uint64_t x = 0;
+ while (state.KeepRunning()) {
+ const uint64_t rounded_x = x++ / multiple * multiple;
+ benchmark::DoNotOptimize(rounded_x);
+ }
+}
+BENCHMARK(native_round_down_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/configure.py b/configure.py
index 4c415bf..e61363f 100755
--- a/configure.py
+++ b/configure.py
@@ -11,9 +11,16 @@ def main(args):
build.export_cpath("include", ["fxdiv.h"])
- with build.options(source_dir="test", deps=[build.deps.googletest]):
- build.unittest("MultiplyHighTest", build.cxx("MultiplyHigh.cc"))
- build.unittest("QuotientTest", build.cxx("Quotient.cc"))
+ with build.options(source_dir="test", deps=build.deps.googletest):
+ build.unittest("multiply-high-test", build.cxx("multiply-high.cc"))
+ build.unittest("quotient-test", build.cxx("quotient.cc"))
+
+ with build.options(source_dir="bench", deps=build.deps.googlebenchmark):
+ build.benchmark("init-bench", build.cxx("init.cc"))
+ build.benchmark("multiply-bench", build.cxx("multiply.cc"))
+ build.benchmark("divide-bench", build.cxx("divide.cc"))
+ build.benchmark("quotient-bench", build.cxx("quotient.cc"))
+ build.benchmark("round-down-bench", build.cxx("round-down.cc"))
return build
diff --git a/confu.yaml b/confu.yaml
index 0f0b262..2f1bf46 100644
--- a/confu.yaml
+++ b/confu.yaml
@@ -3,3 +3,4 @@ title: division via fixed-point multiplication by inverse
license: MIT
deps:
- name: googletest
+ - name: googlebenchmark
diff --git a/test/MultiplyHigh.cc b/test/multiply-high.cc
index 10f9013..10f9013 100644
--- a/test/MultiplyHigh.cc
+++ b/test/multiply-high.cc
diff --git a/test/Quotient.cc b/test/quotient.cc
index ad5cd8e..ad5cd8e 100644
--- a/test/Quotient.cc
+++ b/test/quotient.cc