diff options
Diffstat (limited to 'src/profiling/perf/event_config.cc')
-rw-r--r-- | src/profiling/perf/event_config.cc | 240 |
1 files changed, 165 insertions, 75 deletions
diff --git a/src/profiling/perf/event_config.cc b/src/profiling/perf/event_config.cc index a7b6533f0..8506a1c55 100644 --- a/src/profiling/perf/event_config.cc +++ b/src/profiling/perf/event_config.cc @@ -25,7 +25,6 @@ #include "perfetto/base/flat_set.h" #include "perfetto/ext/base/optional.h" #include "perfetto/ext/base/utils.h" -#include "perfetto/profiling/normalize.h" #include "src/profiling/perf/regs_parsing.h" #include "protos/perfetto/common/perf_events.gen.h" @@ -40,23 +39,6 @@ constexpr uint32_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB: 256x 4k pages constexpr uint32_t kDefaultReadTickPeriodMs = 100; constexpr uint32_t kDefaultRemoteDescriptorTimeoutMs = 100; -base::Optional<std::string> Normalize(const std::string& src) { - // Construct a null-terminated string that will be mutated by the normalizer. - std::vector<char> base(src.size() + 1); - memcpy(base.data(), src.data(), src.size()); - base[src.size()] = '\0'; - - char* new_start = base.data(); - ssize_t new_sz = NormalizeCmdLine(&new_start, base.size()); - if (new_sz < 0) { - PERFETTO_ELOG("Failed to normalize config cmdline [%s], aborting", - base.data()); - return base::nullopt; - } - return base::make_optional<std::string>(new_start, - static_cast<size_t>(new_sz)); -} - // Acceptable forms: "sched/sched_switch" or "sched:sched_switch". std::pair<std::string, std::string> SplitTracepointString( const std::string& input) { @@ -100,40 +82,29 @@ base::Optional<uint32_t> ParseTracepointAndResolveId( return base::make_optional(tracepoint_id); } -// Returns |base::nullopt| if any of the input cmdlines couldn't be normalized. // |T| is either gen::PerfEventConfig or gen::PerfEventConfig::Scope. +// Note: the semantics of target_cmdline and exclude_cmdline were changed since +// their original introduction. They used to be put through a canonicalization +// function that simplified them to the binary name alone. We no longer do this, +// regardless of whether we're parsing an old-style config. The overall outcome +// shouldn't change for almost all existing uses. template <typename T> -base::Optional<TargetFilter> ParseTargetFilter(const T& cfg) { +TargetFilter ParseTargetFilter(const T& cfg) { TargetFilter filter; for (const auto& str : cfg.target_cmdline()) { - base::Optional<std::string> opt = Normalize(str); - if (!opt.has_value()) { - PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str()); - return base::nullopt; - } - filter.cmdlines.insert(std::move(opt.value())); + filter.cmdlines.push_back(str); } - for (const auto& str : cfg.exclude_cmdline()) { - base::Optional<std::string> opt = Normalize(str); - if (!opt.has_value()) { - PERFETTO_ELOG("Failure normalizing cmdline: [%s]", str.c_str()); - return base::nullopt; - } - filter.exclude_cmdlines.insert(std::move(opt.value())); + filter.exclude_cmdlines.push_back(str); } - for (const int32_t pid : cfg.target_pid()) { filter.pids.insert(pid); } - for (const int32_t pid : cfg.exclude_pid()) { filter.exclude_pids.insert(pid); } - filter.additional_cmdline_count = cfg.additional_cmdline_count(); - - return base::make_optional(std::move(filter)); + return filter; } constexpr bool IsPowerOfTwo(size_t v) { @@ -156,23 +127,91 @@ base::Optional<uint32_t> ChooseActualRingBufferPages(uint32_t config_value) { } base::Optional<PerfCounter> ToPerfCounter( + std::string name, protos::gen::PerfEvents::Counter pb_enum) { using protos::gen::PerfEvents; switch (static_cast<int>(pb_enum)) { // cast to pacify -Wswitch-enum case PerfEvents::SW_CPU_CLOCK: - return PerfCounter::Counter(PerfEvents::SW_CPU_CLOCK, PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_CPU_CLOCK); + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_CPU_CLOCK, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_CPU_CLOCK); case PerfEvents::SW_PAGE_FAULTS: - return PerfCounter::Counter(PerfEvents::SW_PAGE_FAULTS, - PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_PAGE_FAULTS); + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_PAGE_FAULTS, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_PAGE_FAULTS); + case PerfEvents::SW_TASK_CLOCK: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_TASK_CLOCK, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_TASK_CLOCK); + case PerfEvents::SW_CONTEXT_SWITCHES: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_CONTEXT_SWITCHES, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_CONTEXT_SWITCHES); + case PerfEvents::SW_CPU_MIGRATIONS: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_CPU_MIGRATIONS, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_CPU_MIGRATIONS); + case PerfEvents::SW_PAGE_FAULTS_MIN: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_PAGE_FAULTS_MIN, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_PAGE_FAULTS_MIN); + case PerfEvents::SW_PAGE_FAULTS_MAJ: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_PAGE_FAULTS_MAJ, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_PAGE_FAULTS_MAJ); + case PerfEvents::SW_ALIGNMENT_FAULTS: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_ALIGNMENT_FAULTS, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_ALIGNMENT_FAULTS); + case PerfEvents::SW_EMULATION_FAULTS: + return PerfCounter::BuiltinCounter(name, PerfEvents::SW_EMULATION_FAULTS, + PERF_TYPE_SOFTWARE, + PERF_COUNT_SW_EMULATION_FAULTS); + case PerfEvents::SW_DUMMY: + return PerfCounter::BuiltinCounter( + name, PerfEvents::SW_DUMMY, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); + case PerfEvents::HW_CPU_CYCLES: - return PerfCounter::Counter(PerfEvents::HW_CPU_CYCLES, PERF_TYPE_HARDWARE, - PERF_COUNT_HW_CPU_CYCLES); + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_CPU_CYCLES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES); case PerfEvents::HW_INSTRUCTIONS: - return PerfCounter::Counter(PerfEvents::HW_INSTRUCTIONS, - PERF_TYPE_HARDWARE, - PERF_COUNT_HW_INSTRUCTIONS); + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_INSTRUCTIONS, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_INSTRUCTIONS); + case PerfEvents::HW_CACHE_REFERENCES: + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CACHE_REFERENCES); + case PerfEvents::HW_CACHE_MISSES: + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_CACHE_MISSES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CACHE_MISSES); + case PerfEvents::HW_BRANCH_INSTRUCTIONS: + return PerfCounter::BuiltinCounter( + name, PerfEvents::HW_BRANCH_INSTRUCTIONS, PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + case PerfEvents::HW_BRANCH_MISSES: + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_BRANCH_MISSES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BRANCH_MISSES); + case PerfEvents::HW_BUS_CYCLES: + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_BUS_CYCLES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_BUS_CYCLES); + case PerfEvents::HW_STALLED_CYCLES_FRONTEND: + return PerfCounter::BuiltinCounter( + name, PerfEvents::HW_STALLED_CYCLES_FRONTEND, PERF_TYPE_HARDWARE, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); + case PerfEvents::HW_STALLED_CYCLES_BACKEND: + return PerfCounter::BuiltinCounter( + name, PerfEvents::HW_STALLED_CYCLES_BACKEND, PERF_TYPE_HARDWARE, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND); + case PerfEvents::HW_REF_CPU_CYCLES: + return PerfCounter::BuiltinCounter(name, PerfEvents::HW_REF_CPU_CYCLES, + PERF_TYPE_HARDWARE, + PERF_COUNT_HW_REF_CPU_CYCLES); + default: PERFETTO_ELOG("Unrecognised PerfEvents::Counter enum value: %zu", static_cast<size_t>(pb_enum)); @@ -180,27 +219,74 @@ base::Optional<PerfCounter> ToPerfCounter( } } +int32_t ToClockId(protos::gen::PerfEvents::PerfClock pb_enum) { + using protos::gen::PerfEvents; + switch (static_cast<int>(pb_enum)) { // cast to pacify -Wswitch-enum + case PerfEvents::PERF_CLOCK_REALTIME: + return CLOCK_REALTIME; + case PerfEvents::PERF_CLOCK_MONOTONIC: + return CLOCK_MONOTONIC; + case PerfEvents::PERF_CLOCK_MONOTONIC_RAW: + return CLOCK_MONOTONIC_RAW; + case PerfEvents::PERF_CLOCK_BOOTTIME: + return CLOCK_BOOTTIME; + // Default to a monotonic clock since it should be compatible with all types + // of events. Whereas boottime cannot be used with hardware events due to + // potential access within non-maskable interrupts. + default: + return CLOCK_MONOTONIC_RAW; + } +} + } // namespace // static -PerfCounter PerfCounter::Counter(protos::gen::PerfEvents::Counter counter, - uint32_t type, - uint32_t config) { +PerfCounter PerfCounter::BuiltinCounter( + std::string name, + protos::gen::PerfEvents::Counter counter, + uint32_t type, + uint64_t config) { PerfCounter ret; + ret.type = PerfCounter::Type::kBuiltinCounter; ret.counter = counter; - ret.type = type; - ret.config = config; + ret.name = std::move(name); + + ret.attr_type = type; + ret.attr_config = config; + // none of the builtin counters require config1 and config2 at the moment + return ret; +} + +// static +PerfCounter PerfCounter::Tracepoint(std::string name, + std::string tracepoint_name, + std::string tracepoint_filter, + uint64_t id) { + PerfCounter ret; + ret.type = PerfCounter::Type::kTracepoint; + ret.tracepoint_name = std::move(tracepoint_name); + ret.tracepoint_filter = std::move(tracepoint_filter); + ret.name = std::move(name); + + ret.attr_type = PERF_TYPE_TRACEPOINT; + ret.attr_config = id; return ret; } // static -PerfCounter PerfCounter::Tracepoint( - protos::gen::PerfEvents::Tracepoint tracepoint, - uint32_t id) { +PerfCounter PerfCounter::RawEvent(std::string name, + uint32_t type, + uint64_t config, + uint64_t config1, + uint64_t config2) { PerfCounter ret; - ret.tracepoint = std::move(tracepoint); - ret.type = PERF_TYPE_TRACEPOINT; - ret.config = id; + ret.type = PerfCounter::Type::kRawEvent; + ret.name = std::move(name); + + ret.attr_type = type; + ret.attr_config = config; + ret.attr_config1 = config1; + ret.attr_config2 = config2; return ret; } @@ -237,8 +323,10 @@ base::Optional<EventConfig> EventConfig::Create( // Timebase event. Default: CPU timer. PerfCounter timebase_event; + std::string timebase_name = pb_config.timebase().name(); if (pb_config.timebase().has_counter()) { - auto maybe_counter = ToPerfCounter(pb_config.timebase().counter()); + auto maybe_counter = + ToPerfCounter(timebase_name, pb_config.timebase().counter()); if (!maybe_counter) return base::nullopt; timebase_event = *maybe_counter; @@ -249,12 +337,18 @@ base::Optional<EventConfig> EventConfig::Create( ParseTracepointAndResolveId(tracepoint_pb, tracepoint_id_lookup); if (!maybe_id) return base::nullopt; - timebase_event = PerfCounter::Tracepoint(tracepoint_pb, *maybe_id); + timebase_event = PerfCounter::Tracepoint( + timebase_name, tracepoint_pb.name(), tracepoint_pb.filter(), *maybe_id); + + } else if (pb_config.timebase().has_raw_event()) { + const auto& raw = pb_config.timebase().raw_event(); + timebase_event = PerfCounter::RawEvent( + timebase_name, raw.type(), raw.config(), raw.config1(), raw.config2()); } else { - timebase_event = - PerfCounter::Counter(protos::gen::PerfEvents::PerfEvents::SW_CPU_CLOCK, - PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); + timebase_event = PerfCounter::BuiltinCounter( + timebase_name, protos::gen::PerfEvents::PerfEvents::SW_CPU_CLOCK, + PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } // Callstack sampling. @@ -266,14 +360,10 @@ base::Optional<EventConfig> EventConfig::Create( sample_callstacks = true; // Process scoping. - auto maybe_filter = + target_filter = pb_config.callstack_sampling().has_scope() ? ParseTargetFilter(pb_config.callstack_sampling().scope()) : ParseTargetFilter(pb_config); // backwards compatibility - if (!maybe_filter.has_value()) - return base::nullopt; - - target_filter = std::move(maybe_filter.value()); // Inclusion of kernel callchains. kernel_frames = pb_config.callstack_sampling().kernel_frames() || @@ -298,7 +388,7 @@ base::Optional<EventConfig> EventConfig::Create( // expected = rate * period, with a conversion of period from ms to s: uint64_t expected_samples_per_tick = 1 + (sampling_frequency * read_tick_period_ms) / 1000; - // Double the the limit to account of actual sample rate uncertainties, as + // Double the limit to account of actual sample rate uncertainties, as // well as any other factors: samples_per_tick_limit = 2 * expected_samples_per_tick; } else { // sampling_period @@ -332,8 +422,10 @@ base::Optional<EventConfig> EventConfig::Create( pe.disabled = 1; // will be activated via ioctl // Sampling timebase. - pe.type = timebase_event.type; - pe.config = timebase_event.config; + pe.type = timebase_event.attr_type; + pe.config = timebase_event.attr_config; + pe.config1 = timebase_event.attr_config1; + pe.config2 = timebase_event.attr_config2; if (sampling_frequency) { pe.freq = true; pe.sample_freq = sampling_frequency; @@ -344,9 +436,7 @@ base::Optional<EventConfig> EventConfig::Create( // What the samples will contain. pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_READ; // PERF_SAMPLE_TIME: - // We used to use CLOCK_BOOTTIME, but that is not nmi-safe, and therefore - // works only for software events. - pe.clockid = CLOCK_MONOTONIC_RAW; + pe.clockid = ToClockId(pb_config.timebase().timestamp_clock()); pe.use_clockid = true; if (sample_callstacks) { |