aboutsummaryrefslogtreecommitdiff
path: root/test/perf_counters_gtest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'test/perf_counters_gtest.cc')
-rw-r--r--test/perf_counters_gtest.cc307
1 files changed, 307 insertions, 0 deletions
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
new file mode 100644
index 0000000..54c7863
--- /dev/null
+++ b/test/perf_counters_gtest.cc
@@ -0,0 +1,307 @@
+#include <random>
+#include <thread>
+
+#include "../src/perf_counters.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#ifndef GTEST_SKIP
+struct MsgHandler {
+ void operator=(std::ostream&) {}
+};
+#define GTEST_SKIP() return MsgHandler() = std::cout
+#endif
+
+using benchmark::internal::PerfCounters;
+using benchmark::internal::PerfCountersMeasurement;
+using benchmark::internal::PerfCounterValues;
+using ::testing::AllOf;
+using ::testing::Gt;
+using ::testing::Lt;
+
+namespace {
+const char kGenericPerfEvent1[] = "CYCLES";
+const char kGenericPerfEvent2[] = "INSTRUCTIONS";
+
+TEST(PerfCountersTest, Init) {
+ EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
+}
+
+TEST(PerfCountersTest, OneCounter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Performance counters not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1}).num_counters(), 1);
+}
+
+TEST(PerfCountersTest, NegativeTest) {
+ if (!PerfCounters::kSupported) {
+ EXPECT_FALSE(PerfCounters::Initialize());
+ return;
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ // Sanity checks
+ // Create() will always create a valid object, even if passed no or
+ // wrong arguments as the new behavior is to warn and drop unsupported
+ // counters
+ EXPECT_EQ(PerfCounters::Create({}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({""}).num_counters(), 0);
+ EXPECT_EQ(PerfCounters::Create({"not a counter name"}).num_counters(), 0);
+ {
+ // Try sneaking in a bad egg to see if it is filtered out. The
+ // number of counters has to be two, not zero
+ auto counter =
+ PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
+ }
+ {
+ // Try sneaking in an outrageous counter, like a fat finger mistake
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent2, "not a counter name", kGenericPerfEvent1});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent2, kGenericPerfEvent1}));
+ }
+ {
+ // Finally try a golden input - it should like both of them
+ EXPECT_EQ(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2})
+ .num_counters(),
+ 2);
+ }
+ {
+ // Add a bad apple in the end of the chain to check the edges
+ auto counter = PerfCounters::Create(
+ {kGenericPerfEvent1, kGenericPerfEvent2, "bad event name"});
+ EXPECT_EQ(counter.num_counters(), 2);
+ EXPECT_EQ(counter.names(), std::vector<std::string>(
+ {kGenericPerfEvent1, kGenericPerfEvent2}));
+ }
+}
+
+TEST(PerfCountersTest, Read1Counter) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters = PerfCounters::Create({kGenericPerfEvent1});
+ EXPECT_EQ(counters.num_counters(), 1);
+ PerfCounterValues values1(1);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ PerfCounterValues values2(1);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[0], values1[0]);
+}
+
+TEST(PerfCountersTest, Read2Counters) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ auto counters =
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
+ EXPECT_EQ(counters.num_counters(), 2);
+ PerfCounterValues values1(2);
+ EXPECT_TRUE(counters.Snapshot(&values1));
+ EXPECT_GT(values1[0], 0);
+ EXPECT_GT(values1[1], 0);
+ PerfCounterValues values2(2);
+ EXPECT_TRUE(counters.Snapshot(&values2));
+ EXPECT_GT(values2[0], 0);
+ EXPECT_GT(values2[1], 0);
+}
+
+TEST(PerfCountersTest, ReopenExistingCounters) {
+ // This test works in recent and old Intel hardware, Pixel 3, and Pixel 6.
+ // However we cannot make assumptions beyond 2 HW counters due to Pixel 6.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ std::vector<std::string> kMetrics({kGenericPerfEvent1});
+ std::vector<PerfCounters> counters(2);
+ for (auto& counter : counters) {
+ counter = PerfCounters::Create(kMetrics);
+ }
+ PerfCounterValues values(1);
+ EXPECT_TRUE(counters[0].Snapshot(&values));
+ EXPECT_TRUE(counters[1].Snapshot(&values));
+}
+
+TEST(PerfCountersTest, CreateExistingMeasurements) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (2) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // This means we will try 10 counters but we can only guarantee
+ // for sure at this time that only 3 will work. Perhaps in the future
+ // we could use libpfm to query for the hardware limits on this
+ // particular platform.
+ const int kMaxCounters = 10;
+ const int kMinValidCounters = 2;
+
+ // Let's use a ubiquitous counter that is guaranteed to work
+ // on all platforms
+ const std::vector<std::string> kMetrics{"cycles"};
+
+ // Cannot create a vector of actual objects because the
+ // copy constructor of PerfCounters is deleted - and so is
+ // implicitly deleted on PerfCountersMeasurement too
+ std::vector<std::unique_ptr<PerfCountersMeasurement>>
+ perf_counter_measurements;
+
+ perf_counter_measurements.reserve(kMaxCounters);
+ for (int j = 0; j < kMaxCounters; ++j) {
+ perf_counter_measurements.emplace_back(
+ new PerfCountersMeasurement(kMetrics));
+ }
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ // Start all counters together to see if they hold
+ size_t max_counters = kMaxCounters;
+ for (size_t i = 0; i < kMaxCounters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_EQ(counter.num_counters(), 1);
+ if (!counter.Start()) {
+ max_counters = i;
+ break;
+ };
+ }
+
+ ASSERT_GE(max_counters, kMinValidCounters);
+
+ // Start all together
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+
+ // Start/stop individually
+ for (size_t i = 0; i < max_counters; ++i) {
+ auto& counter(*perf_counter_measurements[i]);
+ measurements.clear();
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements) || (i >= kMinValidCounters));
+ }
+}
+
+// We try to do some meaningful work here but the compiler
+// insists in optimizing away our loop so we had to add a
+// no-optimize macro. In case it fails, we added some entropy
+// to this pool as well.
+
+BENCHMARK_DONT_OPTIMIZE size_t do_work() {
+ static std::mt19937 rd{std::random_device{}()};
+ static std::uniform_int_distribution<size_t> mrand(0, 10);
+ const size_t kNumLoops = 1000000;
+ size_t sum = 0;
+ for (size_t j = 0; j < kNumLoops; ++j) {
+ sum += mrand(rd);
+ }
+ benchmark::DoNotOptimize(sum);
+ return sum;
+}
+
+void measure(size_t threadcount, PerfCounterValues* before,
+ PerfCounterValues* after) {
+ BM_CHECK_NE(before, nullptr);
+ BM_CHECK_NE(after, nullptr);
+ std::vector<std::thread> threads(threadcount);
+ auto work = [&]() { BM_CHECK(do_work() > 1000); };
+
+ // We need to first set up the counters, then start the threads, so the
+ // threads would inherit the counters. But later, we need to first destroy
+ // the thread pool (so all the work finishes), then measure the counters. So
+ // the scopes overlap, and we need to explicitly control the scope of the
+ // threadpool.
+ auto counters =
+ PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
+ for (auto& t : threads) t = std::thread(work);
+ counters.Snapshot(before);
+ for (auto& t : threads) t.join();
+ counters.Snapshot(after);
+}
+
+TEST(PerfCountersTest, MultiThreaded) {
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ PerfCounterValues before(2);
+ PerfCounterValues after(2);
+
+ // Notice that this test will work even if we taskset it to a single CPU
+ // In this case the threads will run sequentially
+ // Start two threads and measure the number of combined cycles and
+ // instructions
+ measure(2, &before, &after);
+ std::vector<double> Elapsed2Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
+
+ // Start four threads and measure the number of combined cycles and
+ // instructions
+ measure(4, &before, &after);
+ std::vector<double> Elapsed4Threads{
+ static_cast<double>(after[0] - before[0]),
+ static_cast<double>(after[1] - before[1])};
+
+ // The following expectations fail (at least on a beefy workstation with lots
+ // of cpus) - it seems that in some circumstances the runtime of 4 threads
+ // can even be better than with 2.
+ // So instead of expecting 4 threads to be slower, let's just make sure they
+ // do not differ too much in general (one is not more than 10x than the
+ // other).
+ EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
+ EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
+}
+
+TEST(PerfCountersTest, HardwareLimits) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (3-4) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+
+ // Taken from `perf list`, but focusses only on those HW events that actually
+ // were reported when running `sudo perf stat -a sleep 10`, intersected over
+ // several platforms. All HW events listed in the first command not reported
+ // in the second seem to not work. This is sad as we don't really get to test
+ // the grouping here (groups can contain up to 6 members)...
+ std::vector<std::string> counter_names{
+ "cycles", // leader
+ "instructions", //
+ "branch-misses", //
+ };
+
+ // In the off-chance that some of these values are not supported,
+ // we filter them out so the test will complete without failure
+ // albeit it might not actually test the grouping on that platform
+ std::vector<std::string> valid_names;
+ for (const std::string& name : counter_names) {
+ if (PerfCounters::IsCounterSupported(name)) {
+ valid_names.push_back(name);
+ }
+ }
+ PerfCountersMeasurement counter(valid_names);
+
+ std::vector<std::pair<std::string, double>> measurements;
+
+ counter.Start();
+ EXPECT_TRUE(counter.Stop(measurements));
+}
+
+} // namespace