aboutsummaryrefslogtreecommitdiff
path: root/src/perf_counters.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/perf_counters.h')
-rw-r--r--src/perf_counters.h200
1 files changed, 200 insertions, 0 deletions
diff --git a/src/perf_counters.h b/src/perf_counters.h
new file mode 100644
index 0000000..bf5eb6b
--- /dev/null
+++ b/src/perf_counters.h
@@ -0,0 +1,200 @@
+// Copyright 2021 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BENCHMARK_PERF_COUNTERS_H
+#define BENCHMARK_PERF_COUNTERS_H
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "check.h"
+#include "log.h"
+#include "mutex.h"
+
+#ifndef BENCHMARK_OS_WINDOWS
+#include <unistd.h>
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4251: <symbol> needs to have dll-interface to be used by clients of class
+#pragma warning(disable : 4251)
+#endif
+
+namespace benchmark {
+namespace internal {
+
+// Typically, we can only read a small number of counters. There is also a
+// padding preceding counter values, when reading multiple counters with one
+// syscall (which is desirable). PerfCounterValues abstracts these details.
+// The implementation ensures the storage is inlined, and allows 0-based
+// indexing into the counter values.
+// The object is used in conjunction with a PerfCounters object, by passing it
+// to Snapshot(). The Read() method relocates individual reads, discarding
+// the initial padding from each group leader in the values buffer such that
+// all user accesses through the [] operator are correct.
+class BENCHMARK_EXPORT PerfCounterValues {
+ public:
+ explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
+ BM_CHECK_LE(nr_counters_, kMaxCounters);
+ }
+
+ // We are reading correctly now so the values don't need to skip padding
+ uint64_t operator[](size_t pos) const { return values_[pos]; }
+
+ // Increased the maximum to 32 only since the buffer
+ // is std::array<> backed
+ static constexpr size_t kMaxCounters = 32;
+
+ private:
+ friend class PerfCounters;
+ // Get the byte buffer in which perf counters can be captured.
+ // This is used by PerfCounters::Read
+ std::pair<char*, size_t> get_data_buffer() {
+ return {reinterpret_cast<char*>(values_.data()),
+ sizeof(uint64_t) * (kPadding + nr_counters_)};
+ }
+
+ // This reading is complex and as the goal of this class is to
+ // abstract away the intrincacies of the reading process, this is
+ // a better place for it
+ size_t Read(const std::vector<int>& leaders);
+
+ // Move the padding to 2 due to the reading algorithm (1st padding plus a
+ // current read padding)
+ static constexpr size_t kPadding = 2;
+ std::array<uint64_t, kPadding + kMaxCounters> values_;
+ const size_t nr_counters_;
+};
+
+// Collect PMU counters. The object, once constructed, is ready to be used by
+// calling read(). PMU counter collection is enabled from the time create() is
+// called, to obtain the object, until the object's destructor is called.
+class BENCHMARK_EXPORT PerfCounters final {
+ public:
+ // True iff this platform supports performance counters.
+ static const bool kSupported;
+
+ // Returns an empty object
+ static PerfCounters NoCounters() { return PerfCounters(); }
+
+ ~PerfCounters() { CloseCounters(); }
+ PerfCounters() = default;
+ PerfCounters(PerfCounters&&) = default;
+ PerfCounters(const PerfCounters&) = delete;
+ PerfCounters& operator=(PerfCounters&&) noexcept;
+ PerfCounters& operator=(const PerfCounters&) = delete;
+
+ // Platform-specific implementations may choose to do some library
+ // initialization here.
+ static bool Initialize();
+
+ // Check if the given counter is supported, if the app wants to
+ // check before passing
+ static bool IsCounterSupported(const std::string& name);
+
+ // Return a PerfCounters object ready to read the counters with the names
+ // specified. The values are user-mode only. The counter name format is
+ // implementation and OS specific.
+ // In case of failure, this method will in the worst case return an
+ // empty object whose state will still be valid.
+ static PerfCounters Create(const std::vector<std::string>& counter_names);
+
+ // Take a snapshot of the current value of the counters into the provided
+ // valid PerfCounterValues storage. The values are populated such that:
+ // names()[i]'s value is (*values)[i]
+ BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
+#ifndef BENCHMARK_OS_WINDOWS
+ assert(values != nullptr);
+ return values->Read(leader_ids_) == counter_ids_.size();
+#else
+ (void)values;
+ return false;
+#endif
+ }
+
+ const std::vector<std::string>& names() const { return counter_names_; }
+ size_t num_counters() const { return counter_names_.size(); }
+
+ private:
+ PerfCounters(const std::vector<std::string>& counter_names,
+ std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
+ : counter_ids_(std::move(counter_ids)),
+ leader_ids_(std::move(leader_ids)),
+ counter_names_(counter_names) {}
+
+ void CloseCounters() const;
+
+ std::vector<int> counter_ids_;
+ std::vector<int> leader_ids_;
+ std::vector<std::string> counter_names_;
+};
+
+// Typical usage of the above primitives.
+class BENCHMARK_EXPORT PerfCountersMeasurement final {
+ public:
+ PerfCountersMeasurement(const std::vector<std::string>& counter_names);
+
+ size_t num_counters() const { return counters_.num_counters(); }
+
+ std::vector<std::string> names() const { return counters_.names(); }
+
+ BENCHMARK_ALWAYS_INLINE bool Start() {
+ if (num_counters() == 0) return true;
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ valid_read_ &= counters_.Snapshot(&start_values_);
+ ClobberMemory();
+
+ return valid_read_;
+ }
+
+ BENCHMARK_ALWAYS_INLINE bool Stop(
+ std::vector<std::pair<std::string, double>>& measurements) {
+ if (num_counters() == 0) return true;
+ // Tell the compiler to not move instructions above/below where we take
+ // the snapshot.
+ ClobberMemory();
+ valid_read_ &= counters_.Snapshot(&end_values_);
+ ClobberMemory();
+
+ for (size_t i = 0; i < counters_.names().size(); ++i) {
+ double measurement = static_cast<double>(end_values_[i]) -
+ static_cast<double>(start_values_[i]);
+ measurements.push_back({counters_.names()[i], measurement});
+ }
+
+ return valid_read_;
+ }
+
+ private:
+ PerfCounters counters_;
+ bool valid_read_ = true;
+ PerfCounterValues start_values_;
+ PerfCounterValues end_values_;
+};
+
+} // namespace internal
+} // namespace benchmark
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#endif // BENCHMARK_PERF_COUNTERS_H