diff options
Diffstat (limited to 'src/profiling/perf/perf_producer.cc')
-rw-r--r-- | src/profiling/perf/perf_producer.cc | 410 |
1 files changed, 92 insertions, 318 deletions
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc index b1c7e1f27..e0dde42d5 100644 --- a/src/profiling/perf/perf_producer.cc +++ b/src/profiling/perf/perf_producer.cc @@ -19,6 +19,7 @@ #include <random> #include <utility> +#include <malloc.h> #include <unistd.h> #include <unwindstack/Error.h> @@ -27,7 +28,6 @@ #include "perfetto/base/logging.h" #include "perfetto/base/task_runner.h" #include "perfetto/ext/base/metatrace.h" -#include "perfetto/ext/base/utils.h" #include "perfetto/ext/base/weak_ptr.h" #include "perfetto/ext/tracing/core/basic_types.h" #include "perfetto/ext/tracing/core/producer.h" @@ -37,19 +37,13 @@ #include "perfetto/tracing/core/data_source_descriptor.h" #include "src/profiling/common/callstack_trie.h" #include "src/profiling/common/proc_utils.h" -#include "src/profiling/common/producer_support.h" -#include "src/profiling/common/profiler_guardrails.h" #include "src/profiling/common/unwind_support.h" #include "src/profiling/perf/common_types.h" #include "src/profiling/perf/event_reader.h" -#include "protos/perfetto/common/builtin_clock.pbzero.h" -#include "protos/perfetto/common/perf_events.gen.h" -#include "protos/perfetto/common/perf_events.pbzero.h" -#include "protos/perfetto/config/profiling/perf_event_config.gen.h" +#include "protos/perfetto/config/profiling/perf_event_config.pbzero.h" #include "protos/perfetto/trace/profiling/profile_packet.pbzero.h" #include "protos/perfetto/trace/trace_packet.pbzero.h" -#include "protos/perfetto/trace/trace_packet_defaults.pbzero.h" namespace perfetto { namespace profiling { @@ -68,8 +62,6 @@ namespace { // The proper fix is in the platform, see bug for progress. constexpr uint32_t kProcDescriptorsAndroidDelayMs = 50; -constexpr uint32_t kMemoryLimitCheckPeriodMs = 5 * 1000; - constexpr uint32_t kInitialConnectionBackoffMs = 100; constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000; @@ -80,54 +72,6 @@ size_t NumberOfCpus() { return static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF)); } -TraceWriter::TracePacketHandle StartTracePacket(TraceWriter* trace_writer) { - auto packet = trace_writer->NewTracePacket(); - packet->set_sequence_flags( - protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE); - return packet; -} - -void WritePerfEventDefaultsPacket(const EventConfig& event_config, - TraceWriter* trace_writer) { - auto packet = trace_writer->NewTracePacket(); - packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count())); - packet->set_timestamp_clock_id(protos::pbzero::BUILTIN_CLOCK_BOOTTIME); - - // start new incremental state generation: - packet->set_sequence_flags( - protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED); - - // default packet timestamp clockid: - auto* defaults = packet->set_trace_packet_defaults(); - defaults->set_timestamp_clock_id(protos::pbzero::BUILTIN_CLOCK_MONOTONIC_RAW); - PERFETTO_DCHECK(event_config.perf_attr()->clockid == CLOCK_MONOTONIC_RAW); - - auto* perf_defaults = defaults->set_perf_sample_defaults(); - auto* timebase_pb = perf_defaults->set_timebase(); - - // frequency/period: - perf_event_attr* perf_attr = event_config.perf_attr(); - if (perf_attr->freq) { - timebase_pb->set_frequency(perf_attr->sample_freq); - } else { - timebase_pb->set_period(perf_attr->sample_period); - } - - // event: - const PerfCounter& timebase = event_config.timebase_event(); - if (timebase.is_counter()) { - timebase_pb->set_counter( - static_cast<protos::pbzero::PerfEvents::Counter>(timebase.counter)); - } else { - PERFETTO_DCHECK(timebase.is_tracepoint()); - // TODO(rsavitski): reconsider using a struct with two strings instead - // of the ::gen::Tracepoint class in the C++ code. - auto* tracepoint_pb = timebase_pb->set_tracepoint(); - tracepoint_pb->set_name(timebase.tracepoint.name()); - tracepoint_pb->set_filter(timebase.tracepoint.filter()); - } -} - uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id, uint32_t period_ms) { // Normally, we'd schedule the next tick at the next |period_ms| // boundary of the boot clock. However, to avoid aligning the read tasks of @@ -141,54 +85,41 @@ uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id, uint32_t period_ms) { return period_ms - ((now_ms - ds_period_offset) % period_ms); } -bool ShouldRejectDueToFilter(pid_t pid, - base::FlatSet<std::string>* additional_cmdlines, - const TargetFilter& filter) { - PERFETTO_CHECK(additional_cmdlines); +bool ShouldRejectDueToFilter(pid_t pid, const TargetFilter& filter) { + bool reject_cmd = false; std::string cmdline; - bool have_cmdline = GetCmdlineForPID(pid, &cmdline); // normalized form - if (!have_cmdline) { + if (GetCmdlineForPID(pid, &cmdline)) { // normalized form + // reject if absent from non-empty whitelist, or present in blacklist + reject_cmd = (filter.cmdlines.size() && !filter.cmdlines.count(cmdline)) || + filter.exclude_cmdlines.count(cmdline); + } else { PERFETTO_DLOG("Failed to look up cmdline for pid [%d]", static_cast<int>(pid)); + // reject only if there's a whitelist present + reject_cmd = filter.cmdlines.size() > 0; } - if (have_cmdline && filter.exclude_cmdlines.count(cmdline)) { - PERFETTO_DLOG("Explicitly rejecting samples for pid [%d] due to cmdline", - static_cast<int>(pid)); - return true; - } - if (filter.exclude_pids.count(pid)) { - PERFETTO_DLOG("Explicitly rejecting samples for pid [%d] due to pid", - static_cast<int>(pid)); - return true; - } + bool reject_pid = (filter.pids.size() && !filter.pids.count(pid)) || + filter.exclude_pids.count(pid); - if (have_cmdline && filter.cmdlines.count(cmdline)) { - return false; - } - if (filter.pids.count(pid)) { - return false; - } - if (filter.cmdlines.empty() && filter.pids.empty() && - !filter.additional_cmdline_count) { - // If no filters are set allow everything. - return false; - } + if (reject_cmd || reject_pid) { + PERFETTO_DLOG( + "Rejecting samples for pid [%d] due to cmdline(%d) or pid(%d)", + static_cast<int>(pid), reject_cmd, reject_pid); - // If we didn't read the command line that's a good prediction we will not be - // able to profile either. - if (have_cmdline) { - if (additional_cmdlines->count(cmdline)) { - return false; - } - if (additional_cmdlines->size() < filter.additional_cmdline_count) { - additional_cmdlines->insert(cmdline); - return false; - } + return true; } + return false; +} - PERFETTO_DLOG("Rejecting samples for pid [%d]", static_cast<int>(pid)); - return true; +void MaybeReleaseAllocatorMemToOS() { +#if defined(__BIONIC__) + // TODO(b/152414415): libunwindstack's volume of small allocations is + // adverarial to scudo, which doesn't automatically release small + // allocation regions back to the OS. Forceful purge does reclaim all size + // classes. + mallopt(M_PURGE, 0); +#endif } protos::pbzero::Profiling::CpuMode ToCpuModeEnum(uint16_t perf_cpu_mode) { @@ -229,12 +160,6 @@ protos::pbzero::Profiling::StackUnwindError ToProtoEnum( return Profiling::UNWIND_ERROR_REPEATED_FRAME; case unwindstack::ERROR_INVALID_ELF: return Profiling::UNWIND_ERROR_INVALID_ELF; - case unwindstack::ERROR_SYSTEM_CALL: - return Profiling::UNWIND_ERROR_SYSTEM_CALL; - case unwindstack::ERROR_THREAD_TIMEOUT: - return Profiling::UNWIND_ERROR_THREAD_TIMEOUT; - case unwindstack::ERROR_THREAD_DOES_NOT_EXIST: - return Profiling::UNWIND_ERROR_THREAD_DOES_NOT_EXIST; } return Profiling::UNWIND_ERROR_UNKNOWN; } @@ -250,16 +175,18 @@ PerfProducer::PerfProducer(ProcDescriptorGetter* proc_fd_getter, proc_fd_getter->SetDelegate(this); } +// TODO(rsavitski): consider configure at setup + enable at start instead. void PerfProducer::SetupDataSource(DataSourceInstanceID, const DataSourceConfig&) {} -void PerfProducer::StartDataSource(DataSourceInstanceID ds_id, +void PerfProducer::StartDataSource(DataSourceInstanceID instance_id, const DataSourceConfig& config) { - PERFETTO_LOG("StartDataSource(%zu, %s)", static_cast<size_t>(ds_id), + PERFETTO_LOG("StartDataSource(%zu, %s)", static_cast<size_t>(instance_id), config.name().c_str()); if (config.name() == MetatraceWriter::kDataSourceName) { - StartMetatraceSource(ds_id, static_cast<BufferID>(config.target_buffer())); + StartMetatraceSource(instance_id, + static_cast<BufferID>(config.target_buffer())); return; } @@ -267,28 +194,17 @@ void PerfProducer::StartDataSource(DataSourceInstanceID ds_id, if (config.name() != kDataSourceName) return; - // Tracepoint name -> id lookup in case the config asks for tracepoints: - auto tracepoint_id_lookup = [this](const std::string& group, - const std::string& name) { - if (!tracefs_) // lazy init or retry - tracefs_ = FtraceProcfs::CreateGuessingMountPoint(); - if (!tracefs_) // still didn't find an accessible tracefs - return 0u; - return tracefs_->ReadEventId(group, name); - }; - - protos::gen::PerfEventConfig event_config_pb; - if (!event_config_pb.ParseFromString(config.perf_event_config_raw())) { - PERFETTO_ELOG("PerfEventConfig could not be parsed."); - return; - } - base::Optional<EventConfig> event_config = - EventConfig::Create(event_config_pb, config, tracepoint_id_lookup); + base::Optional<EventConfig> event_config = EventConfig::Create(config); if (!event_config.has_value()) { PERFETTO_ELOG("PerfEventConfig rejected."); return; } + // TODO(rsavitski): consider supporting specific cpu subsets. + if (!event_config->target_all_cpus()) { + PERFETTO_ELOG("PerfEventConfig{all_cpus} required"); + return; + } size_t num_cpus = NumberOfCpus(); std::vector<EventReader> per_cpu_readers; for (uint32_t cpu = 0; cpu < num_cpus; cpu++) { @@ -310,100 +226,50 @@ void PerfProducer::StartDataSource(DataSourceInstanceID ds_id, std::map<DataSourceInstanceID, DataSourceState>::iterator ds_it; bool inserted; std::tie(ds_it, inserted) = data_sources_.emplace( - std::piecewise_construct, std::forward_as_tuple(ds_id), + std::piecewise_construct, std::forward_as_tuple(instance_id), std::forward_as_tuple(event_config.value(), std::move(writer), std::move(per_cpu_readers))); PERFETTO_CHECK(inserted); DataSourceState& ds = ds_it->second; - // Start the configured events. - for (auto& per_cpu_reader : ds.per_cpu_readers) { - per_cpu_reader.EnableEvents(); - } - - WritePerfEventDefaultsPacket(ds.event_config, ds.trace_writer.get()); - + // Write out a packet to initialize the incremental state for this sequence. InterningOutputTracker::WriteFixedInterningsPacket( - ds_it->second.trace_writer.get(), - protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE); + ds_it->second.trace_writer.get()); // Inform unwinder of the new data source instance, and optionally start a // periodic task to clear its cached state. - unwinding_worker_->PostStartDataSource(ds_id, - ds.event_config.kernel_frames()); + unwinding_worker_->PostStartDataSource(instance_id); if (ds.event_config.unwind_state_clear_period_ms()) { unwinding_worker_->PostClearCachedStatePeriodic( - ds_id, ds.event_config.unwind_state_clear_period_ms()); + instance_id, ds.event_config.unwind_state_clear_period_ms()); } // Kick off periodic read task. auto tick_period_ms = ds.event_config.read_tick_period_ms(); auto weak_this = weak_factory_.GetWeakPtr(); task_runner_->PostDelayedTask( - [weak_this, ds_id] { - if (weak_this) - weak_this->TickDataSourceRead(ds_id); - }, - TimeToNextReadTickMs(ds_id, tick_period_ms)); - - // Optionally kick off periodic memory footprint limit check. - uint32_t max_daemon_memory_kb = event_config_pb.max_daemon_memory_kb(); - if (max_daemon_memory_kb > 0) { - task_runner_->PostDelayedTask( - [weak_this, ds_id, max_daemon_memory_kb] { - if (weak_this) - weak_this->CheckMemoryFootprintPeriodic(ds_id, - max_daemon_memory_kb); - }, - kMemoryLimitCheckPeriodMs); - } -} - -void PerfProducer::CheckMemoryFootprintPeriodic(DataSourceInstanceID ds_id, - uint32_t max_daemon_memory_kb) { - auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) - return; // stop recurring - - GuardrailConfig gconfig = {}; - gconfig.memory_guardrail_kb = max_daemon_memory_kb; - - ProfilerMemoryGuardrails footprint_snapshot; - if (footprint_snapshot.IsOverMemoryThreshold(gconfig)) { - PurgeDataSource(ds_id); - return; // stop recurring - } - - // repost - auto weak_this = weak_factory_.GetWeakPtr(); - task_runner_->PostDelayedTask( - [weak_this, ds_id, max_daemon_memory_kb] { + [weak_this, instance_id] { if (weak_this) - weak_this->CheckMemoryFootprintPeriodic(ds_id, max_daemon_memory_kb); + weak_this->TickDataSourceRead(instance_id); }, - kMemoryLimitCheckPeriodMs); + TimeToNextReadTickMs(instance_id, tick_period_ms)); } -void PerfProducer::StopDataSource(DataSourceInstanceID ds_id) { - PERFETTO_LOG("StopDataSource(%zu)", static_cast<size_t>(ds_id)); +void PerfProducer::StopDataSource(DataSourceInstanceID instance_id) { + PERFETTO_LOG("StopDataSource(%zu)", static_cast<size_t>(instance_id)); // Metatrace: stop immediately (will miss the events from the // asynchronous shutdown of the primary data source). - auto meta_it = metatrace_writers_.find(ds_id); + auto meta_it = metatrace_writers_.find(instance_id); if (meta_it != metatrace_writers_.end()) { meta_it->second.WriteAllAndFlushTraceWriter([] {}); metatrace_writers_.erase(meta_it); return; } - auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) { - // Most likely, the source is missing due to an abrupt stop (via - // |PurgeDataSource|). Tell the service that we've stopped the source now, - // so that it doesn't wait for the ack until the timeout. - endpoint_->NotifyDataSourceStopped(ds_id); + auto ds_it = data_sources_.find(instance_id); + if (ds_it == data_sources_.end()) return; - } // Start shutting down the reading frontend, which will propagate the stop // further as the intermediate buffers are cleared. @@ -419,7 +285,7 @@ void PerfProducer::StopDataSource(DataSourceInstanceID ds_id) { void PerfProducer::Flush(FlushRequestID flush_id, const DataSourceInstanceID* data_source_ids, size_t num_data_sources) { - // Flush metatracing if requested. + bool should_ack_flush = false; for (size_t i = 0; i < num_data_sources; i++) { auto ds_id = data_source_ids[i]; PERFETTO_DLOG("Flush(%zu)", static_cast<size_t>(ds_id)); @@ -427,10 +293,14 @@ void PerfProducer::Flush(FlushRequestID flush_id, auto meta_it = metatrace_writers_.find(ds_id); if (meta_it != metatrace_writers_.end()) { meta_it->second.WriteAllAndFlushTraceWriter([] {}); + should_ack_flush = true; + } + if (data_sources_.find(ds_id) != data_sources_.end()) { + should_ack_flush = true; } } - - endpoint_->NotifyFlushComplete(flush_id); + if (should_ack_flush) + endpoint_->NotifyFlushComplete(flush_id); } void PerfProducer::ClearIncrementalState( @@ -451,13 +321,9 @@ void PerfProducer::ClearIncrementalState( } DataSourceState& ds = ds_it->second; - WritePerfEventDefaultsPacket(ds.event_config, ds.trace_writer.get()); - // Forget which incremental state we've emitted before. ds.interning_output.ClearHistory(); - InterningOutputTracker::WriteFixedInterningsPacket( - ds.trace_writer.get(), - protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE); + InterningOutputTracker::WriteFixedInterningsPacket(ds.trace_writer.get()); // Drop the cross-datasource callstack interning trie. This is not // necessary for correctness (the preceding step is sufficient). However, @@ -483,7 +349,7 @@ void PerfProducer::TickDataSourceRead(DataSourceInstanceID ds_id) { PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_TICK); // Make a pass over all per-cpu readers. - uint64_t max_samples = ds.event_config.samples_per_tick_limit(); + uint32_t max_samples = ds.event_config.samples_per_tick_limit(); bool more_records_available = false; for (EventReader& reader : ds.per_cpu_readers) { if (ReadAndParsePerCpuBuffer(&reader, max_samples, ds_id, &ds)) { @@ -511,7 +377,7 @@ void PerfProducer::TickDataSourceRead(DataSourceInstanceID ds_id) { } bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, - uint64_t max_samples, + uint32_t max_samples, DataSourceInstanceID ds_id, DataSourceState* ds) { PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_CPU); @@ -526,36 +392,26 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, }); }; - for (uint64_t i = 0; i < max_samples; i++) { + for (uint32_t i = 0; i < max_samples; i++) { base::Optional<ParsedSample> sample = reader->ReadUntilSample(records_lost_callback); if (!sample) { return false; // caught up to the writer } - // Counter-only mode: skip the unwinding stage, enqueue the sample for - // output immediately. - if (!ds->event_config.sample_callstacks()) { - CompletedSample output; - output.common = sample->common; - EmitSample(ds_id, std::move(output)); - continue; - } - - // If sampling callstacks, we're not interested in kernel threads/workers. if (!sample->regs) { - continue; + continue; // skip kernel threads/workers } // Request proc-fds for the process if this is the first time we see it. - pid_t pid = sample->common.pid; + pid_t pid = sample->pid; auto& process_state = ds->process_states[pid]; // insert if new if (process_state == ProcessTrackingStatus::kExpired) { PERFETTO_DLOG("Skipping sample for previously expired pid [%d]", static_cast<int>(pid)); - EmitSkippedSample(ds_id, std::move(sample.value()), - SampleSkipReason::kReadStage); + PostEmitSkippedSample(ds_id, std::move(sample.value()), + SampleSkipReason::kReadStage); continue; } @@ -571,9 +427,9 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, PERFETTO_DLOG("New pid: [%d]", static_cast<int>(pid)); // Check whether samples for this new process should be - // dropped due to the target filtering. + // dropped due to the target whitelist/blacklist. const TargetFilter& filter = ds->event_config.filter(); - if (ShouldRejectDueToFilter(pid, &ds->additional_cmdlines, filter)) { + if (ShouldRejectDueToFilter(pid, filter)) { process_state = ProcessTrackingStatus::kRejected; continue; } @@ -588,21 +444,6 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, PERFETTO_CHECK(process_state == ProcessTrackingStatus::kResolved || process_state == ProcessTrackingStatus::kResolving); - // Optionally: drop sample if above a given threshold of sampled stacks - // that are waiting in the unwinding queue. - uint64_t max_footprint_bytes = - ds->event_config.max_enqueued_footprint_bytes(); - uint64_t sample_stack_size = sample->stack.size(); - if (max_footprint_bytes) { - uint64_t footprint_bytes = unwinding_worker_->GetEnqueuedFootprint(); - if (footprint_bytes + sample_stack_size >= max_footprint_bytes) { - PERFETTO_DLOG("Skipping sample enqueueing due to footprint limit."); - EmitSkippedSample(ds_id, std::move(sample.value()), - SampleSkipReason::kUnwindEnqueue); - continue; - } - } - // Push the sample into the unwinding queue if there is room. auto& queue = unwinding_worker_->unwind_queue(); WriteView write_view = queue.BeginWrite(); @@ -610,11 +451,10 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, queue.at(write_view.write_pos) = UnwindEntry{ds_id, std::move(sample.value())}; queue.CommitWrite(); - unwinding_worker_->IncrementEnqueuedFootprint(sample_stack_size); } else { PERFETTO_DLOG("Unwinder queue full, skipping sample"); - EmitSkippedSample(ds_id, std::move(sample.value()), - SampleSkipReason::kUnwindEnqueue); + PostEmitSkippedSample(ds_id, std::move(sample.value()), + SampleSkipReason::kUnwindEnqueue); } } @@ -626,7 +466,6 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader, // Note: first-fit makes descriptor request fulfillment not true FIFO. But the // edge-cases where it matters are very unlikely. void PerfProducer::OnProcDescriptors(pid_t pid, - uid_t uid, base::ScopedFile maps_fd, base::ScopedFile mem_fd) { // Find first-fit data source that requested descriptors for the process. @@ -636,14 +475,6 @@ void PerfProducer::OnProcDescriptors(pid_t pid, if (proc_status_it == ds.process_states.end()) continue; - if (!CanProfile(ds.event_config.raw_ds_config(), uid, - ds.event_config.target_installed_by())) { - PERFETTO_DLOG("Not profileable: pid [%d], uid [%d] for DS [%zu]", - static_cast<int>(pid), static_cast<int>(uid), - static_cast<size_t>(it.first)); - continue; - } - // Match against either resolving, or expired state. In the latter // case, it means that the async response was slow enough that we've marked // the lookup as expired (but can now recover for future samples). @@ -736,21 +567,17 @@ void PerfProducer::PostEmitSample(DataSourceInstanceID ds_id, void PerfProducer::EmitSample(DataSourceInstanceID ds_id, CompletedSample sample) { auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) { - PERFETTO_DLOG("EmitSample(ds: %zu): source gone", - static_cast<size_t>(ds_id)); - return; - } + PERFETTO_CHECK(ds_it != data_sources_.end()); DataSourceState& ds = ds_it->second; // intern callsite GlobalCallstackTrie::Node* callstack_root = - callstack_trie_.CreateCallsite(sample.frames, sample.build_ids); + callstack_trie_.CreateCallsite(sample.frames); uint64_t callstack_iid = callstack_root->id(); - // start packet, timestamp domain defaults to monotonic_raw - auto packet = StartTracePacket(ds.trace_writer.get()); - packet->set_timestamp(sample.common.timestamp); + // start packet + auto packet = ds.trace_writer->NewTracePacket(); + packet->set_timestamp(sample.timestamp); // write new interning data (if any) protos::pbzero::InternedData* interned_out = packet->set_interned_data(); @@ -759,11 +586,10 @@ void PerfProducer::EmitSample(DataSourceInstanceID ds_id, // write the sample itself auto* perf_sample = packet->set_perf_sample(); - perf_sample->set_cpu(sample.common.cpu); - perf_sample->set_pid(static_cast<uint32_t>(sample.common.pid)); - perf_sample->set_tid(static_cast<uint32_t>(sample.common.tid)); - perf_sample->set_cpu_mode(ToCpuModeEnum(sample.common.cpu_mode)); - perf_sample->set_timebase_count(sample.common.timebase_count); + perf_sample->set_cpu(sample.cpu); + perf_sample->set_pid(static_cast<uint32_t>(sample.pid)); + perf_sample->set_tid(static_cast<uint32_t>(sample.tid)); + perf_sample->set_cpu_mode(ToCpuModeEnum(sample.cpu_mode)); perf_sample->set_callstack_iid(callstack_iid); if (sample.unwind_error != unwindstack::ERROR_NONE) { perf_sample->set_unwind_error(ToProtoEnum(sample.unwind_error)); @@ -774,8 +600,7 @@ void PerfProducer::EmitRingBufferLoss(DataSourceInstanceID ds_id, size_t cpu, uint64_t records_lost) { auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) - return; + PERFETTO_CHECK(ds_it != data_sources_.end()); DataSourceState& ds = ds_it->second; PERFETTO_DLOG("DataSource(%zu): cpu%zu lost [%" PRIu64 "] records", static_cast<size_t>(ds_id), cpu, records_lost); @@ -786,10 +611,8 @@ void PerfProducer::EmitRingBufferLoss(DataSourceInstanceID ds_id, // We timestamp the packet with the boot clock for packet ordering purposes, // but it no longer has a (precise) interpretation relative to the sample // stream from that per-cpu buffer. See the proto comments for more details. - auto packet = StartTracePacket(ds.trace_writer.get()); + auto packet = ds.trace_writer->NewTracePacket(); packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count())); - packet->set_timestamp_clock_id( - protos::pbzero::BuiltinClock::BUILTIN_CLOCK_BOOTTIME); auto* perf_sample = packet->set_perf_sample(); perf_sample->set_cpu(static_cast<uint32_t>(cpu)); @@ -819,19 +642,16 @@ void PerfProducer::EmitSkippedSample(DataSourceInstanceID ds_id, ParsedSample sample, SampleSkipReason reason) { auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) - return; + PERFETTO_CHECK(ds_it != data_sources_.end()); DataSourceState& ds = ds_it->second; - // Note: timestamp defaults to the monotonic_raw domain. - auto packet = StartTracePacket(ds.trace_writer.get()); - packet->set_timestamp(sample.common.timestamp); + auto packet = ds.trace_writer->NewTracePacket(); + packet->set_timestamp(sample.timestamp); auto* perf_sample = packet->set_perf_sample(); - perf_sample->set_cpu(sample.common.cpu); - perf_sample->set_pid(static_cast<uint32_t>(sample.common.pid)); - perf_sample->set_tid(static_cast<uint32_t>(sample.common.tid)); - perf_sample->set_cpu_mode(ToCpuModeEnum(sample.common.cpu_mode)); - perf_sample->set_timebase_count(sample.common.timebase_count); + perf_sample->set_cpu(sample.cpu); + perf_sample->set_pid(static_cast<uint32_t>(sample.pid)); + perf_sample->set_tid(static_cast<uint32_t>(sample.tid)); + perf_sample->set_cpu_mode(ToCpuModeEnum(sample.cpu_mode)); using PerfSample = protos::pbzero::PerfSample; switch (reason) { @@ -856,7 +676,7 @@ void PerfProducer::InitiateReaderStop(DataSourceState* ds) { ds->status = DataSourceState::Status::kShuttingDown; for (auto& event_reader : ds->per_cpu_readers) { - event_reader.DisableEvents(); + event_reader.PauseEvents(); } } @@ -871,11 +691,7 @@ void PerfProducer::PostFinishDataSourceStop(DataSourceInstanceID ds_id) { void PerfProducer::FinishDataSourceStop(DataSourceInstanceID ds_id) { PERFETTO_LOG("FinishDataSourceStop(%zu)", static_cast<size_t>(ds_id)); auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) { - PERFETTO_DLOG("FinishDataSourceStop(%zu): source gone", - static_cast<size_t>(ds_id)); - return; - } + PERFETTO_CHECK(ds_it != data_sources_.end()); DataSourceState& ds = ds_it->second; PERFETTO_CHECK(ds.status == DataSourceState::Status::kShuttingDown); @@ -887,49 +703,7 @@ void PerfProducer::FinishDataSourceStop(DataSourceInstanceID ds_id) { // Clean up resources if there are no more active sources. if (data_sources_.empty()) { callstack_trie_.ClearTrie(); // purge internings - base::MaybeReleaseAllocatorMemToOS(); - } -} - -// TODO(rsavitski): maybe make the tracing service respect premature -// producer-driven stops, and then issue a NotifyDataSourceStopped here. -// Alternatively (and at the expense of higher complexity) introduce a new data -// source status of "tombstoned", and propagate it until the source is stopped -// by the service (this would technically allow for stricter lifetime checking -// of data sources, and help with discarding periodic flushes). -// TODO(rsavitski): Purging while stopping will currently leave the stop -// unacknowledged. Consider checking whether the DS is stopping here, and if so, -// notifying immediately after erasing. -void PerfProducer::PurgeDataSource(DataSourceInstanceID ds_id) { - auto ds_it = data_sources_.find(ds_id); - if (ds_it == data_sources_.end()) - return; - DataSourceState& ds = ds_it->second; - - PERFETTO_LOG("Stopping DataSource(%zu) prematurely", - static_cast<size_t>(ds_id)); - - unwinding_worker_->PostPurgeDataSource(ds_id); - - // Write a packet indicating the abrupt stop. - { - auto packet = StartTracePacket(ds.trace_writer.get()); - packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count())); - packet->set_timestamp_clock_id( - protos::pbzero::BuiltinClock::BUILTIN_CLOCK_BOOTTIME); - auto* perf_sample = packet->set_perf_sample(); - auto* producer_event = perf_sample->set_producer_event(); - producer_event->set_source_stop_reason( - protos::pbzero::PerfSample::ProducerEvent::PROFILER_STOP_GUARDRAIL); - } - - ds.trace_writer->Flush(); - data_sources_.erase(ds_it); - - // Clean up resources if there are no more active sources. - if (data_sources_.empty()) { - callstack_trie_.ClearTrie(); // purge internings - base::MaybeReleaseAllocatorMemToOS(); + MaybeReleaseAllocatorMemToOS(); } } |