1 files changed, 92 insertions, 318 deletions
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc
index b1c7e1f27..e0dde42d5 100644
--- a/src/profiling/perf/perf_producer.cc
+++ b/src/profiling/perf/perf_producer.cc
@@ -19,6 +19,7 @@
 #include <random>
 #include <utility>
 
+#include <malloc.h>
 #include <unistd.h>
 
 #include <unwindstack/Error.h>
@@ -27,7 +28,6 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/base/task_runner.h"
 #include "perfetto/ext/base/metatrace.h"
-#include "perfetto/ext/base/utils.h"
 #include "perfetto/ext/base/weak_ptr.h"
 #include "perfetto/ext/tracing/core/basic_types.h"
 #include "perfetto/ext/tracing/core/producer.h"
@@ -37,19 +37,13 @@
 #include "perfetto/tracing/core/data_source_descriptor.h"
 #include "src/profiling/common/callstack_trie.h"
 #include "src/profiling/common/proc_utils.h"
-#include "src/profiling/common/producer_support.h"
-#include "src/profiling/common/profiler_guardrails.h"
 #include "src/profiling/common/unwind_support.h"
 #include "src/profiling/perf/common_types.h"
 #include "src/profiling/perf/event_reader.h"
 
-#include "protos/perfetto/common/builtin_clock.pbzero.h"
-#include "protos/perfetto/common/perf_events.gen.h"
-#include "protos/perfetto/common/perf_events.pbzero.h"
-#include "protos/perfetto/config/profiling/perf_event_config.gen.h"
+#include "protos/perfetto/config/profiling/perf_event_config.pbzero.h"
 #include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
-#include "protos/perfetto/trace/trace_packet_defaults.pbzero.h"
 
 namespace perfetto {
 namespace profiling {
@@ -68,8 +62,6 @@ namespace {
 // The proper fix is in the platform, see bug for progress.
 constexpr uint32_t kProcDescriptorsAndroidDelayMs = 50;
 
-constexpr uint32_t kMemoryLimitCheckPeriodMs = 5 * 1000;
-
 constexpr uint32_t kInitialConnectionBackoffMs = 100;
 constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000;
 
@@ -80,54 +72,6 @@ size_t NumberOfCpus() {
   return static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF));
 }
 
-TraceWriter::TracePacketHandle StartTracePacket(TraceWriter* trace_writer) {
-  auto packet = trace_writer->NewTracePacket();
-  packet->set_sequence_flags(
-      protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
-  return packet;
-}
-
-void WritePerfEventDefaultsPacket(const EventConfig& event_config,
-                                  TraceWriter* trace_writer) {
-  auto packet = trace_writer->NewTracePacket();
-  packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
-  packet->set_timestamp_clock_id(protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
-
-  // start new incremental state generation:
-  packet->set_sequence_flags(
-      protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
-
-  // default packet timestamp clockid:
-  auto* defaults = packet->set_trace_packet_defaults();
-  defaults->set_timestamp_clock_id(protos::pbzero::BUILTIN_CLOCK_MONOTONIC_RAW);
-  PERFETTO_DCHECK(event_config.perf_attr()->clockid == CLOCK_MONOTONIC_RAW);
-
-  auto* perf_defaults = defaults->set_perf_sample_defaults();
-  auto* timebase_pb = perf_defaults->set_timebase();
-
-  // frequency/period:
-  perf_event_attr* perf_attr = event_config.perf_attr();
-  if (perf_attr->freq) {
-    timebase_pb->set_frequency(perf_attr->sample_freq);
-  } else {
-    timebase_pb->set_period(perf_attr->sample_period);
-  }
-
-  // event:
-  const PerfCounter& timebase = event_config.timebase_event();
-  if (timebase.is_counter()) {
-    timebase_pb->set_counter(
-        static_cast<protos::pbzero::PerfEvents::Counter>(timebase.counter));
-  } else {
-    PERFETTO_DCHECK(timebase.is_tracepoint());
-    // TODO(rsavitski): reconsider using a struct with two strings instead
-    // of the ::gen::Tracepoint class in the C++ code.
-    auto* tracepoint_pb = timebase_pb->set_tracepoint();
-    tracepoint_pb->set_name(timebase.tracepoint.name());
-    tracepoint_pb->set_filter(timebase.tracepoint.filter());
-  }
-}
-
 uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id, uint32_t period_ms) {
   // Normally, we'd schedule the next tick at the next |period_ms|
   // boundary of the boot clock. However, to avoid aligning the read tasks of
@@ -141,54 +85,41 @@ uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id, uint32_t period_ms) {
   return period_ms - ((now_ms - ds_period_offset) % period_ms);
 }
 
-bool ShouldRejectDueToFilter(pid_t pid,
-                             base::FlatSet<std::string>* additional_cmdlines,
-                             const TargetFilter& filter) {
-  PERFETTO_CHECK(additional_cmdlines);
+bool ShouldRejectDueToFilter(pid_t pid, const TargetFilter& filter) {
+  bool reject_cmd = false;
   std::string cmdline;
-  bool have_cmdline = GetCmdlineForPID(pid, &cmdline);  // normalized form
-  if (!have_cmdline) {
+  if (GetCmdlineForPID(pid, &cmdline)) {  // normalized form
+    // reject if absent from non-empty whitelist, or present in blacklist
+    reject_cmd = (filter.cmdlines.size() && !filter.cmdlines.count(cmdline)) ||
+                 filter.exclude_cmdlines.count(cmdline);
+  } else {
     PERFETTO_DLOG("Failed to look up cmdline for pid [%d]",
                   static_cast<int>(pid));
+    // reject only if there's a whitelist present
+    reject_cmd = filter.cmdlines.size() > 0;
   }
 
-  if (have_cmdline && filter.exclude_cmdlines.count(cmdline)) {
-    PERFETTO_DLOG("Explicitly rejecting samples for pid [%d] due to cmdline",
-                  static_cast<int>(pid));
-    return true;
-  }
-  if (filter.exclude_pids.count(pid)) {
-    PERFETTO_DLOG("Explicitly rejecting samples for pid [%d] due to pid",
-                  static_cast<int>(pid));
-    return true;
-  }
+  bool reject_pid = (filter.pids.size() && !filter.pids.count(pid)) ||
+                    filter.exclude_pids.count(pid);
 
-  if (have_cmdline && filter.cmdlines.count(cmdline)) {
-    return false;
-  }
-  if (filter.pids.count(pid)) {
-    return false;
-  }
-  if (filter.cmdlines.empty() && filter.pids.empty() &&
-      !filter.additional_cmdline_count) {
-    // If no filters are set allow everything.
-    return false;
-  }
+  if (reject_cmd || reject_pid) {
+    PERFETTO_DLOG(
+        "Rejecting samples for pid [%d] due to cmdline(%d) or pid(%d)",
+        static_cast<int>(pid), reject_cmd, reject_pid);
 
-  // If we didn't read the command line that's a good prediction we will not be
-  // able to profile either.
-  if (have_cmdline) {
-    if (additional_cmdlines->count(cmdline)) {
-      return false;
-    }
-    if (additional_cmdlines->size() < filter.additional_cmdline_count) {
-      additional_cmdlines->insert(cmdline);
-      return false;
-    }
+    return true;
   }
+  return false;
+}
 
-  PERFETTO_DLOG("Rejecting samples for pid [%d]", static_cast<int>(pid));
-  return true;
+void MaybeReleaseAllocatorMemToOS() {
+#if defined(__BIONIC__)
+  // TODO(b/152414415): libunwindstack's volume of small allocations is
+  // adverarial to scudo, which doesn't automatically release small
+  // allocation regions back to the OS. Forceful purge does reclaim all size
+  // classes.
+  mallopt(M_PURGE, 0);
+#endif
 }
 
 protos::pbzero::Profiling::CpuMode ToCpuModeEnum(uint16_t perf_cpu_mode) {
@@ -229,12 +160,6 @@ protos::pbzero::Profiling::StackUnwindError ToProtoEnum(
       return Profiling::UNWIND_ERROR_REPEATED_FRAME;
     case unwindstack::ERROR_INVALID_ELF:
       return Profiling::UNWIND_ERROR_INVALID_ELF;
-    case unwindstack::ERROR_SYSTEM_CALL:
-      return Profiling::UNWIND_ERROR_SYSTEM_CALL;
-    case unwindstack::ERROR_THREAD_TIMEOUT:
-      return Profiling::UNWIND_ERROR_THREAD_TIMEOUT;
-    case unwindstack::ERROR_THREAD_DOES_NOT_EXIST:
-      return Profiling::UNWIND_ERROR_THREAD_DOES_NOT_EXIST;
   }
   return Profiling::UNWIND_ERROR_UNKNOWN;
 }
@@ -250,16 +175,18 @@ PerfProducer::PerfProducer(ProcDescriptorGetter* proc_fd_getter,
   proc_fd_getter->SetDelegate(this);
 }
 
+// TODO(rsavitski): consider configure at setup + enable at start instead.
 void PerfProducer::SetupDataSource(DataSourceInstanceID,
                                    const DataSourceConfig&) {}
 
-void PerfProducer::StartDataSource(DataSourceInstanceID ds_id,
+void PerfProducer::StartDataSource(DataSourceInstanceID instance_id,
                                    const DataSourceConfig& config) {
-  PERFETTO_LOG("StartDataSource(%zu, %s)", static_cast<size_t>(ds_id),
+  PERFETTO_LOG("StartDataSource(%zu, %s)", static_cast<size_t>(instance_id),
                config.name().c_str());
 
   if (config.name() == MetatraceWriter::kDataSourceName) {
-    StartMetatraceSource(ds_id, static_cast<BufferID>(config.target_buffer()));
+    StartMetatraceSource(instance_id,
+                         static_cast<BufferID>(config.target_buffer()));
     return;
   }
 
@@ -267,28 +194,17 @@ void PerfProducer::StartDataSource(DataSourceInstanceID ds_id,
   if (config.name() != kDataSourceName)
     return;
 
-  // Tracepoint name -> id lookup in case the config asks for tracepoints:
-  auto tracepoint_id_lookup = [this](const std::string& group,
-                                     const std::string& name) {
-    if (!tracefs_)  // lazy init or retry
-      tracefs_ = FtraceProcfs::CreateGuessingMountPoint();
-    if (!tracefs_)  // still didn't find an accessible tracefs
-      return 0u;
-    return tracefs_->ReadEventId(group, name);
-  };
-
-  protos::gen::PerfEventConfig event_config_pb;
-  if (!event_config_pb.ParseFromString(config.perf_event_config_raw())) {
-    PERFETTO_ELOG("PerfEventConfig could not be parsed.");
-    return;
-  }
-  base::Optional<EventConfig> event_config =
-      EventConfig::Create(event_config_pb, config, tracepoint_id_lookup);
+  base::Optional<EventConfig> event_config = EventConfig::Create(config);
   if (!event_config.has_value()) {
     PERFETTO_ELOG("PerfEventConfig rejected.");
     return;
   }
 
+  // TODO(rsavitski): consider supporting specific cpu subsets.
+  if (!event_config->target_all_cpus()) {
+    PERFETTO_ELOG("PerfEventConfig{all_cpus} required");
+    return;
+  }
   size_t num_cpus = NumberOfCpus();
   std::vector<EventReader> per_cpu_readers;
   for (uint32_t cpu = 0; cpu < num_cpus; cpu++) {
@@ -310,100 +226,50 @@ void PerfProducer::StartDataSource(DataSourceInstanceID ds_id,
   std::map<DataSourceInstanceID, DataSourceState>::iterator ds_it;
   bool inserted;
   std::tie(ds_it, inserted) = data_sources_.emplace(
-      std::piecewise_construct, std::forward_as_tuple(ds_id),
+      std::piecewise_construct, std::forward_as_tuple(instance_id),
       std::forward_as_tuple(event_config.value(), std::move(writer),
                             std::move(per_cpu_readers)));
   PERFETTO_CHECK(inserted);
   DataSourceState& ds = ds_it->second;
 
-  // Start the configured events.
-  for (auto& per_cpu_reader : ds.per_cpu_readers) {
-    per_cpu_reader.EnableEvents();
-  }
-
-  WritePerfEventDefaultsPacket(ds.event_config, ds.trace_writer.get());
-
+  // Write out a packet to initialize the incremental state for this sequence.
   InterningOutputTracker::WriteFixedInterningsPacket(
-      ds_it->second.trace_writer.get(),
-      protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
+      ds_it->second.trace_writer.get());
 
   // Inform unwinder of the new data source instance, and optionally start a
   // periodic task to clear its cached state.
-  unwinding_worker_->PostStartDataSource(ds_id,
-                                         ds.event_config.kernel_frames());
+  unwinding_worker_->PostStartDataSource(instance_id);
   if (ds.event_config.unwind_state_clear_period_ms()) {
     unwinding_worker_->PostClearCachedStatePeriodic(
-        ds_id, ds.event_config.unwind_state_clear_period_ms());
+        instance_id, ds.event_config.unwind_state_clear_period_ms());
   }
 
   // Kick off periodic read task.
   auto tick_period_ms = ds.event_config.read_tick_period_ms();
   auto weak_this = weak_factory_.GetWeakPtr();
   task_runner_->PostDelayedTask(
-      [weak_this, ds_id] {
-        if (weak_this)
-          weak_this->TickDataSourceRead(ds_id);
-      },
-      TimeToNextReadTickMs(ds_id, tick_period_ms));
-
-  // Optionally kick off periodic memory footprint limit check.
-  uint32_t max_daemon_memory_kb = event_config_pb.max_daemon_memory_kb();
-  if (max_daemon_memory_kb > 0) {
-    task_runner_->PostDelayedTask(
-        [weak_this, ds_id, max_daemon_memory_kb] {
-          if (weak_this)
-            weak_this->CheckMemoryFootprintPeriodic(ds_id,
-                                                    max_daemon_memory_kb);
-        },
-        kMemoryLimitCheckPeriodMs);
-  }
-}
-
-void PerfProducer::CheckMemoryFootprintPeriodic(DataSourceInstanceID ds_id,
-                                                uint32_t max_daemon_memory_kb) {
-  auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end())
-    return;  // stop recurring
-
-  GuardrailConfig gconfig = {};
-  gconfig.memory_guardrail_kb = max_daemon_memory_kb;
-
-  ProfilerMemoryGuardrails footprint_snapshot;
-  if (footprint_snapshot.IsOverMemoryThreshold(gconfig)) {
-    PurgeDataSource(ds_id);
-    return;  // stop recurring
-  }
-
-  // repost
-  auto weak_this = weak_factory_.GetWeakPtr();
-  task_runner_->PostDelayedTask(
-      [weak_this, ds_id, max_daemon_memory_kb] {
+      [weak_this, instance_id] {
         if (weak_this)
-          weak_this->CheckMemoryFootprintPeriodic(ds_id, max_daemon_memory_kb);
+          weak_this->TickDataSourceRead(instance_id);
       },
-      kMemoryLimitCheckPeriodMs);
+      TimeToNextReadTickMs(instance_id, tick_period_ms));
 }
 
-void PerfProducer::StopDataSource(DataSourceInstanceID ds_id) {
-  PERFETTO_LOG("StopDataSource(%zu)", static_cast<size_t>(ds_id));
+void PerfProducer::StopDataSource(DataSourceInstanceID instance_id) {
+  PERFETTO_LOG("StopDataSource(%zu)", static_cast<size_t>(instance_id));
 
   // Metatrace: stop immediately (will miss the events from the
   // asynchronous shutdown of the primary data source).
-  auto meta_it = metatrace_writers_.find(ds_id);
+  auto meta_it = metatrace_writers_.find(instance_id);
   if (meta_it != metatrace_writers_.end()) {
     meta_it->second.WriteAllAndFlushTraceWriter([] {});
     metatrace_writers_.erase(meta_it);
     return;
   }
 
-  auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end()) {
-    // Most likely, the source is missing due to an abrupt stop (via
-    // |PurgeDataSource|). Tell the service that we've stopped the source now,
-    // so that it doesn't wait for the ack until the timeout.
-    endpoint_->NotifyDataSourceStopped(ds_id);
+  auto ds_it = data_sources_.find(instance_id);
+  if (ds_it == data_sources_.end())
     return;
-  }
 
   // Start shutting down the reading frontend, which will propagate the stop
   // further as the intermediate buffers are cleared.
@@ -419,7 +285,7 @@ void PerfProducer::StopDataSource(DataSourceInstanceID ds_id) {
 void PerfProducer::Flush(FlushRequestID flush_id,
                          const DataSourceInstanceID* data_source_ids,
                          size_t num_data_sources) {
-  // Flush metatracing if requested.
+  bool should_ack_flush = false;
   for (size_t i = 0; i < num_data_sources; i++) {
     auto ds_id = data_source_ids[i];
     PERFETTO_DLOG("Flush(%zu)", static_cast<size_t>(ds_id));
@@ -427,10 +293,14 @@ void PerfProducer::Flush(FlushRequestID flush_id,
     auto meta_it = metatrace_writers_.find(ds_id);
     if (meta_it != metatrace_writers_.end()) {
       meta_it->second.WriteAllAndFlushTraceWriter([] {});
+      should_ack_flush = true;
+    }
+    if (data_sources_.find(ds_id) != data_sources_.end()) {
+      should_ack_flush = true;
     }
   }
-
-  endpoint_->NotifyFlushComplete(flush_id);
+  if (should_ack_flush)
+    endpoint_->NotifyFlushComplete(flush_id);
 }
 
 void PerfProducer::ClearIncrementalState(
@@ -451,13 +321,9 @@ void PerfProducer::ClearIncrementalState(
     }
     DataSourceState& ds = ds_it->second;
 
-    WritePerfEventDefaultsPacket(ds.event_config, ds.trace_writer.get());
-
     // Forget which incremental state we've emitted before.
     ds.interning_output.ClearHistory();
-    InterningOutputTracker::WriteFixedInterningsPacket(
-        ds.trace_writer.get(),
-        protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
+    InterningOutputTracker::WriteFixedInterningsPacket(ds.trace_writer.get());
 
     // Drop the cross-datasource callstack interning trie. This is not
     // necessary for correctness (the preceding step is sufficient). However,
@@ -483,7 +349,7 @@ void PerfProducer::TickDataSourceRead(DataSourceInstanceID ds_id) {
   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_TICK);
 
   // Make a pass over all per-cpu readers.
-  uint64_t max_samples = ds.event_config.samples_per_tick_limit();
+  uint32_t max_samples = ds.event_config.samples_per_tick_limit();
   bool more_records_available = false;
   for (EventReader& reader : ds.per_cpu_readers) {
     if (ReadAndParsePerCpuBuffer(&reader, max_samples, ds_id, &ds)) {
@@ -511,7 +377,7 @@ void PerfProducer::TickDataSourceRead(DataSourceInstanceID ds_id) {
 }
 
 bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
-                                            uint64_t max_samples,
+                                            uint32_t max_samples,
                                             DataSourceInstanceID ds_id,
                                             DataSourceState* ds) {
   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_CPU);
@@ -526,36 +392,26 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
     });
   };
 
-  for (uint64_t i = 0; i < max_samples; i++) {
+  for (uint32_t i = 0; i < max_samples; i++) {
     base::Optional<ParsedSample> sample =
         reader->ReadUntilSample(records_lost_callback);
     if (!sample) {
       return false;  // caught up to the writer
     }
 
-    // Counter-only mode: skip the unwinding stage, enqueue the sample for
-    // output immediately.
-    if (!ds->event_config.sample_callstacks()) {
-      CompletedSample output;
-      output.common = sample->common;
-      EmitSample(ds_id, std::move(output));
-      continue;
-    }
-
-    // If sampling callstacks, we're not interested in kernel threads/workers.
     if (!sample->regs) {
-      continue;
+      continue;  // skip kernel threads/workers
     }
 
     // Request proc-fds for the process if this is the first time we see it.
-    pid_t pid = sample->common.pid;
+    pid_t pid = sample->pid;
     auto& process_state = ds->process_states[pid];  // insert if new
 
     if (process_state == ProcessTrackingStatus::kExpired) {
       PERFETTO_DLOG("Skipping sample for previously expired pid [%d]",
                     static_cast<int>(pid));
-      EmitSkippedSample(ds_id, std::move(sample.value()),
-                        SampleSkipReason::kReadStage);
+      PostEmitSkippedSample(ds_id, std::move(sample.value()),
+                            SampleSkipReason::kReadStage);
       continue;
     }
 
@@ -571,9 +427,9 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
       PERFETTO_DLOG("New pid: [%d]", static_cast<int>(pid));
 
       // Check whether samples for this new process should be
-      // dropped due to the target filtering.
+      // dropped due to the target whitelist/blacklist.
       const TargetFilter& filter = ds->event_config.filter();
-      if (ShouldRejectDueToFilter(pid, &ds->additional_cmdlines, filter)) {
+      if (ShouldRejectDueToFilter(pid, filter)) {
         process_state = ProcessTrackingStatus::kRejected;
         continue;
       }
@@ -588,21 +444,6 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
     PERFETTO_CHECK(process_state == ProcessTrackingStatus::kResolved ||
                    process_state == ProcessTrackingStatus::kResolving);
 
-    // Optionally: drop sample if above a given threshold of sampled stacks
-    // that are waiting in the unwinding queue.
-    uint64_t max_footprint_bytes =
-        ds->event_config.max_enqueued_footprint_bytes();
-    uint64_t sample_stack_size = sample->stack.size();
-    if (max_footprint_bytes) {
-      uint64_t footprint_bytes = unwinding_worker_->GetEnqueuedFootprint();
-      if (footprint_bytes + sample_stack_size >= max_footprint_bytes) {
-        PERFETTO_DLOG("Skipping sample enqueueing due to footprint limit.");
-        EmitSkippedSample(ds_id, std::move(sample.value()),
-                          SampleSkipReason::kUnwindEnqueue);
-        continue;
-      }
-    }
-
     // Push the sample into the unwinding queue if there is room.
     auto& queue = unwinding_worker_->unwind_queue();
     WriteView write_view = queue.BeginWrite();
@@ -610,11 +451,10 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
       queue.at(write_view.write_pos) =
           UnwindEntry{ds_id, std::move(sample.value())};
       queue.CommitWrite();
-      unwinding_worker_->IncrementEnqueuedFootprint(sample_stack_size);
     } else {
       PERFETTO_DLOG("Unwinder queue full, skipping sample");
-      EmitSkippedSample(ds_id, std::move(sample.value()),
-                        SampleSkipReason::kUnwindEnqueue);
+      PostEmitSkippedSample(ds_id, std::move(sample.value()),
+                            SampleSkipReason::kUnwindEnqueue);
     }
   }
 
@@ -626,7 +466,6 @@ bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
 // Note: first-fit makes descriptor request fulfillment not true FIFO. But the
 // edge-cases where it matters are very unlikely.
 void PerfProducer::OnProcDescriptors(pid_t pid,
-                                     uid_t uid,
                                      base::ScopedFile maps_fd,
                                      base::ScopedFile mem_fd) {
   // Find first-fit data source that requested descriptors for the process.
@@ -636,14 +475,6 @@ void PerfProducer::OnProcDescriptors(pid_t pid,
     if (proc_status_it == ds.process_states.end())
       continue;
 
-    if (!CanProfile(ds.event_config.raw_ds_config(), uid,
-                    ds.event_config.target_installed_by())) {
-      PERFETTO_DLOG("Not profileable: pid [%d], uid [%d] for DS [%zu]",
-                    static_cast<int>(pid), static_cast<int>(uid),
-                    static_cast<size_t>(it.first));
-      continue;
-    }
-
     // Match against either resolving, or expired state. In the latter
     // case, it means that the async response was slow enough that we've marked
     // the lookup as expired (but can now recover for future samples).
@@ -736,21 +567,17 @@ void PerfProducer::PostEmitSample(DataSourceInstanceID ds_id,
 void PerfProducer::EmitSample(DataSourceInstanceID ds_id,
                               CompletedSample sample) {
   auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end()) {
-    PERFETTO_DLOG("EmitSample(ds: %zu): source gone",
-                  static_cast<size_t>(ds_id));
-    return;
-  }
+  PERFETTO_CHECK(ds_it != data_sources_.end());
   DataSourceState& ds = ds_it->second;
 
   // intern callsite
   GlobalCallstackTrie::Node* callstack_root =
-      callstack_trie_.CreateCallsite(sample.frames, sample.build_ids);
+      callstack_trie_.CreateCallsite(sample.frames);
   uint64_t callstack_iid = callstack_root->id();
 
-  // start packet, timestamp domain defaults to monotonic_raw
-  auto packet = StartTracePacket(ds.trace_writer.get());
-  packet->set_timestamp(sample.common.timestamp);
+  // start packet
+  auto packet = ds.trace_writer->NewTracePacket();
+  packet->set_timestamp(sample.timestamp);
 
   // write new interning data (if any)
   protos::pbzero::InternedData* interned_out = packet->set_interned_data();
@@ -759,11 +586,10 @@ void PerfProducer::EmitSample(DataSourceInstanceID ds_id,
 
   // write the sample itself
   auto* perf_sample = packet->set_perf_sample();
-  perf_sample->set_cpu(sample.common.cpu);
-  perf_sample->set_pid(static_cast<uint32_t>(sample.common.pid));
-  perf_sample->set_tid(static_cast<uint32_t>(sample.common.tid));
-  perf_sample->set_cpu_mode(ToCpuModeEnum(sample.common.cpu_mode));
-  perf_sample->set_timebase_count(sample.common.timebase_count);
+  perf_sample->set_cpu(sample.cpu);
+  perf_sample->set_pid(static_cast<uint32_t>(sample.pid));
+  perf_sample->set_tid(static_cast<uint32_t>(sample.tid));
+  perf_sample->set_cpu_mode(ToCpuModeEnum(sample.cpu_mode));
   perf_sample->set_callstack_iid(callstack_iid);
   if (sample.unwind_error != unwindstack::ERROR_NONE) {
     perf_sample->set_unwind_error(ToProtoEnum(sample.unwind_error));
@@ -774,8 +600,7 @@ void PerfProducer::EmitRingBufferLoss(DataSourceInstanceID ds_id,
                                       size_t cpu,
                                       uint64_t records_lost) {
   auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end())
-    return;
+  PERFETTO_CHECK(ds_it != data_sources_.end());
   DataSourceState& ds = ds_it->second;
   PERFETTO_DLOG("DataSource(%zu): cpu%zu lost [%" PRIu64 "] records",
                 static_cast<size_t>(ds_id), cpu, records_lost);
@@ -786,10 +611,8 @@ void PerfProducer::EmitRingBufferLoss(DataSourceInstanceID ds_id,
   // We timestamp the packet with the boot clock for packet ordering purposes,
   // but it no longer has a (precise) interpretation relative to the sample
   // stream from that per-cpu buffer. See the proto comments for more details.
-  auto packet = StartTracePacket(ds.trace_writer.get());
+  auto packet = ds.trace_writer->NewTracePacket();
   packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
-  packet->set_timestamp_clock_id(
-      protos::pbzero::BuiltinClock::BUILTIN_CLOCK_BOOTTIME);
 
   auto* perf_sample = packet->set_perf_sample();
   perf_sample->set_cpu(static_cast<uint32_t>(cpu));
@@ -819,19 +642,16 @@ void PerfProducer::EmitSkippedSample(DataSourceInstanceID ds_id,
                                      ParsedSample sample,
                                      SampleSkipReason reason) {
   auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end())
-    return;
+  PERFETTO_CHECK(ds_it != data_sources_.end());
   DataSourceState& ds = ds_it->second;
 
-  // Note: timestamp defaults to the monotonic_raw domain.
-  auto packet = StartTracePacket(ds.trace_writer.get());
-  packet->set_timestamp(sample.common.timestamp);
+  auto packet = ds.trace_writer->NewTracePacket();
+  packet->set_timestamp(sample.timestamp);
   auto* perf_sample = packet->set_perf_sample();
-  perf_sample->set_cpu(sample.common.cpu);
-  perf_sample->set_pid(static_cast<uint32_t>(sample.common.pid));
-  perf_sample->set_tid(static_cast<uint32_t>(sample.common.tid));
-  perf_sample->set_cpu_mode(ToCpuModeEnum(sample.common.cpu_mode));
-  perf_sample->set_timebase_count(sample.common.timebase_count);
+  perf_sample->set_cpu(sample.cpu);
+  perf_sample->set_pid(static_cast<uint32_t>(sample.pid));
+  perf_sample->set_tid(static_cast<uint32_t>(sample.tid));
+  perf_sample->set_cpu_mode(ToCpuModeEnum(sample.cpu_mode));
 
   using PerfSample = protos::pbzero::PerfSample;
   switch (reason) {
@@ -856,7 +676,7 @@ void PerfProducer::InitiateReaderStop(DataSourceState* ds) {
 
   ds->status = DataSourceState::Status::kShuttingDown;
   for (auto& event_reader : ds->per_cpu_readers) {
-    event_reader.DisableEvents();
+    event_reader.PauseEvents();
   }
 }
 
@@ -871,11 +691,7 @@ void PerfProducer::PostFinishDataSourceStop(DataSourceInstanceID ds_id) {
 void PerfProducer::FinishDataSourceStop(DataSourceInstanceID ds_id) {
   PERFETTO_LOG("FinishDataSourceStop(%zu)", static_cast<size_t>(ds_id));
   auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end()) {
-    PERFETTO_DLOG("FinishDataSourceStop(%zu): source gone",
-                  static_cast<size_t>(ds_id));
-    return;
-  }
+  PERFETTO_CHECK(ds_it != data_sources_.end());
   DataSourceState& ds = ds_it->second;
   PERFETTO_CHECK(ds.status == DataSourceState::Status::kShuttingDown);
 
@@ -887,49 +703,7 @@ void PerfProducer::FinishDataSourceStop(DataSourceInstanceID ds_id) {
   // Clean up resources if there are no more active sources.
   if (data_sources_.empty()) {
     callstack_trie_.ClearTrie();  // purge internings
-    base::MaybeReleaseAllocatorMemToOS();
-  }
-}
-
-// TODO(rsavitski): maybe make the tracing service respect premature
-// producer-driven stops, and then issue a NotifyDataSourceStopped here.
-// Alternatively (and at the expense of higher complexity) introduce a new data
-// source status of "tombstoned", and propagate it until the source is stopped
-// by the service (this would technically allow for stricter lifetime checking
-// of data sources, and help with discarding periodic flushes).
-// TODO(rsavitski): Purging while stopping will currently leave the stop
-// unacknowledged. Consider checking whether the DS is stopping here, and if so,
-// notifying immediately after erasing.
-void PerfProducer::PurgeDataSource(DataSourceInstanceID ds_id) {
-  auto ds_it = data_sources_.find(ds_id);
-  if (ds_it == data_sources_.end())
-    return;
-  DataSourceState& ds = ds_it->second;
-
-  PERFETTO_LOG("Stopping DataSource(%zu) prematurely",
-               static_cast<size_t>(ds_id));
-
-  unwinding_worker_->PostPurgeDataSource(ds_id);
-
-  // Write a packet indicating the abrupt stop.
-  {
-    auto packet = StartTracePacket(ds.trace_writer.get());
-    packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
-    packet->set_timestamp_clock_id(
-        protos::pbzero::BuiltinClock::BUILTIN_CLOCK_BOOTTIME);
-    auto* perf_sample = packet->set_perf_sample();
-    auto* producer_event = perf_sample->set_producer_event();
-    producer_event->set_source_stop_reason(
-        protos::pbzero::PerfSample::ProducerEvent::PROFILER_STOP_GUARDRAIL);
-  }
-
-  ds.trace_writer->Flush();
-  data_sources_.erase(ds_it);
-
-  // Clean up resources if there are no more active sources.
-  if (data_sources_.empty()) {
-    callstack_trie_.ClearTrie();  // purge internings
-    base::MaybeReleaseAllocatorMemToOS();
+    MaybeReleaseAllocatorMemToOS();
   }
 }