Full set of benchmarks

author: Marat Dukhan <maratek@gmail.com> 2017-02-12 01:32:06 -0500
committer: Marat Dukhan <maratek@gmail.com> 2017-02-12 01:32:06 -0500
commit: 967a027102f1ba35bb5abcacf6d2591b7666dda6 (patch)
tree: ff05d9ea2ce3cd3b510e1c923f6b8f196b2f315d
parent: 5e42a78ef71524df14d04ec22a5b37ba683fc6d0 (diff)
download: FXdiv-967a027102f1ba35bb5abcacf6d2591b7666dda6.tar.gz
10 files changed, 229 insertions, 4 deletions
diff --git a/README.md b/README.md
index 8144bfc..d0493a4 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ On modern CPUs and GPUs integer division is several times slower than multiplica
 - Header-only library, no installation or build required
 - Compatible with C99, C++, OpenCL, and CUDA
 - Uses platform-specific compiler intrinsics for optimal performance
-- Fully covered with unit tests (Google Test framework)
+- Covered with unit tests and microbenchmarks
 
 ## Example
 
@@ -45,6 +45,8 @@ Project is in alpha stage. API is unstable. Currently working features:
 | x86 MSVC        | Works    | Works    | Works    |
 | ARMv7 gcc       | Works    | Works    | Works    |
 | PPC64 gcc       | Works    | Works    | Works    |
+| PNaCl clang     | Works    | Works    | Works    |
+| Asm.js clang    | Works    | Works    | Works    |
 | CUDA            | Untested | Untested | Untested |
 | OpenCL          | Untested | Untested | Untested |
 
diff --git a/bench/divide.cc b/bench/divide.cc
new file mode 100644
index 0000000..78aa38c
--- /dev/null
+++ b/bench/divide.cc
@@ -0,0 +1,51 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_divide_uint32_t(benchmark::State& state) {
+	const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537));
+	uint32_t x = 0;
+	while (state.KeepRunning()) {
+		const fxdiv_result_uint32_t result = fxdiv_divide_uint32_t(x++, divisor);
+		benchmark::DoNotOptimize(result);
+	}
+}
+BENCHMARK(fxdiv_divide_uint32_t);
+
+static void fxdiv_divide_uint64_t(benchmark::State& state) {
+	const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311));
+	uint64_t x = 0;
+	while (state.KeepRunning()) {
+		const fxdiv_result_uint64_t result = fxdiv_divide_uint64_t(x++, divisor);
+		benchmark::DoNotOptimize(result);
+	}
+}
+BENCHMARK(fxdiv_divide_uint64_t);
+
+static void native_divide_uint32_t(benchmark::State& state) {
+	uint32_t divisor = UINT32_C(65537);
+	benchmark::DoNotOptimize(&divisor);
+	uint32_t x = 0;
+	while (state.KeepRunning()) {
+		const uint32_t quotient = x / divisor;
+		const uint32_t remainder = x++ % divisor;
+		benchmark::DoNotOptimize(quotient);
+		benchmark::DoNotOptimize(remainder);
+	}
+}
+BENCHMARK(native_divide_uint32_t);
+
+static void native_divide_uint64_t(benchmark::State& state) {
+	uint64_t divisor = UINT64_C(4294967311);
+	benchmark::DoNotOptimize(&divisor);
+	uint64_t x = 0;
+	while (state.KeepRunning()) {
+		const uint64_t quotient = x / divisor;
+		const uint64_t remainder = x++ % divisor;
+		benchmark::DoNotOptimize(quotient);
+		benchmark::DoNotOptimize(remainder);
+	}
+}
+BENCHMARK(native_divide_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/init.cc b/bench/init.cc
new file mode 100644
index 0000000..060db49
--- /dev/null
+++ b/bench/init.cc
@@ -0,0 +1,23 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void init_uint32_t(benchmark::State& state) {
+	uint32_t d = UINT32_C(0x1971DB6B);
+	while (state.KeepRunning()) {
+		const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(d++);
+		benchmark::DoNotOptimize(divisor);
+	}
+}
+BENCHMARK(init_uint32_t);
+
+static void init_uint64_t(benchmark::State& state) {
+	uint64_t d = UINT64_C(0x425E892B38148FAD);
+	while (state.KeepRunning()) {
+		const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(d++);
+		benchmark::DoNotOptimize(divisor);
+	}
+}
+BENCHMARK(init_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/multiply.cc b/bench/multiply.cc
new file mode 100644
index 0000000..c9757a4
--- /dev/null
+++ b/bench/multiply.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_mulext_uint32_t(benchmark::State& state) {
+	uint32_t c = UINT32_C(0x1971DB6B);
+	benchmark::DoNotOptimize(&c);
+	uint32_t d = c;
+	while (state.KeepRunning()) {
+		const uint64_t product = fxdiv_mulext_uint32_t(c, d++);
+		benchmark::DoNotOptimize(product);
+	}
+}
+BENCHMARK(fxdiv_mulext_uint32_t);
+
+static void native_mulext_uint32_t(benchmark::State& state) {
+	uint32_t c = UINT32_C(0x1971DB6B);
+	benchmark::DoNotOptimize(&c);
+	uint32_t d = c;
+	while (state.KeepRunning()) {
+		const uint64_t product = (uint64_t) c * (uint64_t) (d++);
+		benchmark::DoNotOptimize(product);
+	}
+}
+BENCHMARK(native_mulext_uint32_t);
+
+static void fxdiv_mulhi_uint32_t(benchmark::State& state) {
+	const uint32_t c = UINT32_C(0x1971DB6B);
+	uint32_t x = c;
+	while (state.KeepRunning()) {
+		const uint32_t product = fxdiv_mulhi_uint32_t(c, x++);
+		benchmark::DoNotOptimize(product);
+	}
+}
+BENCHMARK(fxdiv_mulhi_uint32_t);
+
+static void fxdiv_mulhi_uint64_t(benchmark::State& state) {
+	const uint64_t c = UINT64_C(0x425E892B38148FAD);
+	uint64_t x = c;
+	while (state.KeepRunning()) {
+		const uint64_t product = fxdiv_mulhi_uint64_t(c, x++);
+		benchmark::DoNotOptimize(product);
+	}
+}
+BENCHMARK(fxdiv_mulhi_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/quotient.cc b/bench/quotient.cc
new file mode 100644
index 0000000..374ac2b
--- /dev/null
+++ b/bench/quotient.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_quotient_uint32_t(benchmark::State& state) {
+	const fxdiv_divisor_uint32_t divisor = fxdiv_init_uint32_t(UINT32_C(65537));
+	uint32_t x = 0;
+	while (state.KeepRunning()) {
+		uint32_t quotient = fxdiv_quotient_uint32_t(x++, divisor);
+		benchmark::DoNotOptimize(quotient);
+	}
+}
+BENCHMARK(fxdiv_quotient_uint32_t);
+
+static void fxdiv_quotient_uint64_t(benchmark::State& state) {
+	const fxdiv_divisor_uint64_t divisor = fxdiv_init_uint64_t(UINT64_C(4294967311));
+	uint64_t x = 0;
+	while (state.KeepRunning()) {
+		uint64_t quotient = fxdiv_quotient_uint64_t(x++, divisor);
+		benchmark::DoNotOptimize(quotient);
+	}
+}
+BENCHMARK(fxdiv_quotient_uint64_t);
+
+static void native_quotient_uint32_t(benchmark::State& state) {
+	uint32_t divisor = UINT32_C(65537);
+	benchmark::DoNotOptimize(&divisor);
+	uint32_t x = UINT32_MAX;
+	while (state.KeepRunning()) {
+		uint32_t quotient = x-- / divisor;
+		benchmark::DoNotOptimize(quotient);
+	}
+}
+BENCHMARK(native_quotient_uint32_t);
+
+static void native_quotient_uint64_t(benchmark::State& state) {
+	uint64_t divisor = UINT64_C(4294967311);
+    benchmark::DoNotOptimize(&divisor);
+	uint64_t x = UINT64_MAX;
+	while (state.KeepRunning()) {
+		const uint64_t quotient = x-- / divisor;
+		benchmark::DoNotOptimize(quotient);
+	}
+}
+BENCHMARK(native_quotient_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/bench/round-down.cc b/bench/round-down.cc
new file mode 100644
index 0000000..860ce11
--- /dev/null
+++ b/bench/round-down.cc
@@ -0,0 +1,47 @@
+#include <benchmark/benchmark.h>
+
+#include <fxdiv.h>
+
+static void fxdiv_round_down_uint32_t(benchmark::State& state) {
+	const fxdiv_divisor_uint32_t multiple = fxdiv_init_uint32_t(UINT32_C(65537));
+	uint32_t x = 0;
+	while (state.KeepRunning()) {
+		const uint32_t rounded_x = fxdiv_round_down_uint32_t(x++, multiple);
+		benchmark::DoNotOptimize(rounded_x);
+	}
+}
+BENCHMARK(fxdiv_round_down_uint32_t);
+
+static void fxdiv_round_down_uint64_t(benchmark::State& state) {
+	const fxdiv_divisor_uint64_t multiple = fxdiv_init_uint64_t(UINT64_C(4294967311));
+	uint64_t x = 0;
+	while (state.KeepRunning()) {
+		const uint64_t rounded_x = fxdiv_round_down_uint64_t(x++, multiple);
+		benchmark::DoNotOptimize(rounded_x);
+	}
+}
+BENCHMARK(fxdiv_round_down_uint64_t);
+
+static void native_round_down_uint32_t(benchmark::State& state) {
+	uint32_t multiple = UINT32_C(65537);
+	benchmark::DoNotOptimize(&multiple);
+	uint32_t x = 0;
+	while (state.KeepRunning()) {
+		const uint32_t rounded_x = x++ / multiple * multiple;
+		benchmark::DoNotOptimize(rounded_x);
+	}
+}
+BENCHMARK(native_round_down_uint32_t);
+
+static void native_round_down_uint64_t(benchmark::State& state) {
+	uint64_t multiple = UINT64_C(4294967311);
+	benchmark::DoNotOptimize(&multiple);
+	uint64_t x = 0;
+	while (state.KeepRunning()) {
+		const uint64_t rounded_x = x++ / multiple * multiple;
+		benchmark::DoNotOptimize(rounded_x);
+	}
+}
+BENCHMARK(native_round_down_uint64_t);
+
+BENCHMARK_MAIN();
diff --git a/configure.py b/configure.py
index 4c415bf..e61363f 100755
--- a/configure.py
+++ b/configure.py
@@ -11,9 +11,16 @@ def main(args):
 
     build.export_cpath("include", ["fxdiv.h"])
 
-    with build.options(source_dir="test", deps=[build.deps.googletest]):
-        build.unittest("MultiplyHighTest", build.cxx("MultiplyHigh.cc"))
-        build.unittest("QuotientTest", build.cxx("Quotient.cc"))
+    with build.options(source_dir="test", deps=build.deps.googletest):
+        build.unittest("multiply-high-test", build.cxx("multiply-high.cc"))
+        build.unittest("quotient-test", build.cxx("quotient.cc"))
+
+    with build.options(source_dir="bench", deps=build.deps.googlebenchmark):
+        build.benchmark("init-bench", build.cxx("init.cc"))
+        build.benchmark("multiply-bench", build.cxx("multiply.cc"))
+        build.benchmark("divide-bench", build.cxx("divide.cc"))
+        build.benchmark("quotient-bench", build.cxx("quotient.cc"))
+        build.benchmark("round-down-bench", build.cxx("round-down.cc"))
 
     return build
 
diff --git a/confu.yaml b/confu.yaml
index 0f0b262..2f1bf46 100644
--- a/confu.yaml
+++ b/confu.yaml
@@ -3,3 +3,4 @@ title: division via fixed-point multiplication by inverse
 license: MIT
 deps:
   - name: googletest
+  - name: googlebenchmark
diff --git a/test/MultiplyHigh.cc b/test/multiply-high.cc
index 10f9013..10f9013 100644
--- a/test/MultiplyHigh.cc
+++ b/test/multiply-high.cc
diff --git a/test/Quotient.cc b/test/quotient.cc
index ad5cd8e..ad5cd8e 100644
--- a/test/Quotient.cc
+++ b/test/quotient.cc
author	Marat Dukhan <maratek@gmail.com>	2017-02-12 01:32:06 -0500
committer	Marat Dukhan <maratek@gmail.com>	2017-02-12 01:32:06 -0500
commit	967a027102f1ba35bb5abcacf6d2591b7666dda6 (patch)
tree	ff05d9ea2ce3cd3b510e1c923f6b8f196b2f315d
parent	5e42a78ef71524df14d04ec22a5b37ba683fc6d0 (diff)
download	FXdiv-967a027102f1ba35bb5abcacf6d2591b7666dda6.tar.gz