diff options
Diffstat (limited to 'src/perf_counters.h')
-rw-r--r-- | src/perf_counters.h | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/src/perf_counters.h b/src/perf_counters.h new file mode 100644 index 0000000..bf5eb6b --- /dev/null +++ b/src/perf_counters.h @@ -0,0 +1,200 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef BENCHMARK_PERF_COUNTERS_H +#define BENCHMARK_PERF_COUNTERS_H + +#include <array> +#include <cstdint> +#include <cstring> +#include <memory> +#include <vector> + +#include "benchmark/benchmark.h" +#include "check.h" +#include "log.h" +#include "mutex.h" + +#ifndef BENCHMARK_OS_WINDOWS +#include <unistd.h> +#endif + +#if defined(_MSC_VER) +#pragma warning(push) +// C4251: <symbol> needs to have dll-interface to be used by clients of class +#pragma warning(disable : 4251) +#endif + +namespace benchmark { +namespace internal { + +// Typically, we can only read a small number of counters. There is also a +// padding preceding counter values, when reading multiple counters with one +// syscall (which is desirable). PerfCounterValues abstracts these details. +// The implementation ensures the storage is inlined, and allows 0-based +// indexing into the counter values. +// The object is used in conjunction with a PerfCounters object, by passing it +// to Snapshot(). The Read() method relocates individual reads, discarding +// the initial padding from each group leader in the values buffer such that +// all user accesses through the [] operator are correct. +class BENCHMARK_EXPORT PerfCounterValues { + public: + explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { + BM_CHECK_LE(nr_counters_, kMaxCounters); + } + + // We are reading correctly now so the values don't need to skip padding + uint64_t operator[](size_t pos) const { return values_[pos]; } + + // Increased the maximum to 32 only since the buffer + // is std::array<> backed + static constexpr size_t kMaxCounters = 32; + + private: + friend class PerfCounters; + // Get the byte buffer in which perf counters can be captured. + // This is used by PerfCounters::Read + std::pair<char*, size_t> get_data_buffer() { + return {reinterpret_cast<char*>(values_.data()), + sizeof(uint64_t) * (kPadding + nr_counters_)}; + } + + // This reading is complex and as the goal of this class is to + // abstract away the intrincacies of the reading process, this is + // a better place for it + size_t Read(const std::vector<int>& leaders); + + // Move the padding to 2 due to the reading algorithm (1st padding plus a + // current read padding) + static constexpr size_t kPadding = 2; + std::array<uint64_t, kPadding + kMaxCounters> values_; + const size_t nr_counters_; +}; + +// Collect PMU counters. The object, once constructed, is ready to be used by +// calling read(). PMU counter collection is enabled from the time create() is +// called, to obtain the object, until the object's destructor is called. +class BENCHMARK_EXPORT PerfCounters final { + public: + // True iff this platform supports performance counters. + static const bool kSupported; + + // Returns an empty object + static PerfCounters NoCounters() { return PerfCounters(); } + + ~PerfCounters() { CloseCounters(); } + PerfCounters() = default; + PerfCounters(PerfCounters&&) = default; + PerfCounters(const PerfCounters&) = delete; + PerfCounters& operator=(PerfCounters&&) noexcept; + PerfCounters& operator=(const PerfCounters&) = delete; + + // Platform-specific implementations may choose to do some library + // initialization here. + static bool Initialize(); + + // Check if the given counter is supported, if the app wants to + // check before passing + static bool IsCounterSupported(const std::string& name); + + // Return a PerfCounters object ready to read the counters with the names + // specified. The values are user-mode only. The counter name format is + // implementation and OS specific. + // In case of failure, this method will in the worst case return an + // empty object whose state will still be valid. + static PerfCounters Create(const std::vector<std::string>& counter_names); + + // Take a snapshot of the current value of the counters into the provided + // valid PerfCounterValues storage. The values are populated such that: + // names()[i]'s value is (*values)[i] + BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { +#ifndef BENCHMARK_OS_WINDOWS + assert(values != nullptr); + return values->Read(leader_ids_) == counter_ids_.size(); +#else + (void)values; + return false; +#endif + } + + const std::vector<std::string>& names() const { return counter_names_; } + size_t num_counters() const { return counter_names_.size(); } + + private: + PerfCounters(const std::vector<std::string>& counter_names, + std::vector<int>&& counter_ids, std::vector<int>&& leader_ids) + : counter_ids_(std::move(counter_ids)), + leader_ids_(std::move(leader_ids)), + counter_names_(counter_names) {} + + void CloseCounters() const; + + std::vector<int> counter_ids_; + std::vector<int> leader_ids_; + std::vector<std::string> counter_names_; +}; + +// Typical usage of the above primitives. +class BENCHMARK_EXPORT PerfCountersMeasurement final { + public: + PerfCountersMeasurement(const std::vector<std::string>& counter_names); + + size_t num_counters() const { return counters_.num_counters(); } + + std::vector<std::string> names() const { return counters_.names(); } + + BENCHMARK_ALWAYS_INLINE bool Start() { + if (num_counters() == 0) return true; + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + valid_read_ &= counters_.Snapshot(&start_values_); + ClobberMemory(); + + return valid_read_; + } + + BENCHMARK_ALWAYS_INLINE bool Stop( + std::vector<std::pair<std::string, double>>& measurements) { + if (num_counters() == 0) return true; + // Tell the compiler to not move instructions above/below where we take + // the snapshot. + ClobberMemory(); + valid_read_ &= counters_.Snapshot(&end_values_); + ClobberMemory(); + + for (size_t i = 0; i < counters_.names().size(); ++i) { + double measurement = static_cast<double>(end_values_[i]) - + static_cast<double>(start_values_[i]); + measurements.push_back({counters_.names()[i], measurement}); + } + + return valid_read_; + } + + private: + PerfCounters counters_; + bool valid_read_ = true; + PerfCounterValues start_values_; + PerfCounterValues end_values_; +}; + +} // namespace internal +} // namespace benchmark + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#endif // BENCHMARK_PERF_COUNTERS_H |