diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-19 20:16:01 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-19 20:16:01 +0000 |
commit | 1436b03ce8d197748019aee86199430407375a98 (patch) | |
tree | 804c11ed028f172747f45d366701c70c1ec3795a | |
parent | 1bfb479ba18126d544fb7270675f1b938073e44e (diff) | |
parent | 48b70bc1d2f881b7a8a9ad34bbec0b1304485975 (diff) | |
download | perfetto-android14-qpr2-s1-release.tar.gz |
Merge cherrypicks of ['android-review.googlesource.com/2873556'] into 24Q1-release.android-14.0.0_r37android-14.0.0_r36android-14.0.0_r35android-14.0.0_r34android-14.0.0_r33android-14.0.0_r32android-14.0.0_r31android-14.0.0_r30android-14.0.0_r29android14-qpr2-s5-releaseandroid14-qpr2-s4-releaseandroid14-qpr2-s3-releaseandroid14-qpr2-s2-releaseandroid14-qpr2-s1-releaseandroid14-qpr2-release
Change-Id: I4e924e7d4032b1713c5c1d73d217fce35f818ecd
-rw-r--r-- | protos/perfetto/config/perfetto_config.proto | 24 | ||||
-rw-r--r-- | protos/perfetto/config/trace_config.proto | 24 | ||||
-rw-r--r-- | protos/perfetto/trace/perfetto_trace.proto | 24 | ||||
-rw-r--r-- | src/protozero/filtering/string_filter.cc | 18 | ||||
-rw-r--r-- | src/protozero/filtering/string_filter.h | 1 | ||||
-rw-r--r-- | src/protozero/filtering/string_filter_benchmark.cc | 39 | ||||
-rw-r--r-- | src/protozero/filtering/string_filter_unittest.cc | 52 | ||||
-rw-r--r-- | src/tools/proto_filter/proto_filter.cc | 2 | ||||
-rw-r--r-- | src/tracing/core/tracing_service_impl.cc | 2 |
9 files changed, 184 insertions, 2 deletions
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto index 96aa5c81a..230c56d3d 100644 --- a/protos/perfetto/config/perfetto_config.proto +++ b/protos/perfetto/config/perfetto_config.proto @@ -3652,6 +3652,30 @@ message TraceConfig { // |atrace_post_tgid_starts_with|. The regex matching is only performed if // this check succeeds. SFP_ATRACE_MATCH_BREAK = 4; + + // Tries to repeatedly search (i.e. find substrings of) the string field + // with |regex_pattern|. For each match, redacts any matching groups (i.e. + // replaced with a constant string). Once there are no further matches, + // filtering is terminated (i.e. no further rules are checked). + // + // Note that this is policy is a "search" policy not a "match" policy + // unlike the above policies: + // * Match policies require matching the full string i.e. there is an + // implicit leading `^` and trailing `$`. + // * Search policies perform repeated partial matching of the string + // e.g. + // - String: `foo=aaa,bar=123,foo=bbb,baz=456` + // - Pattern: `foo=(\d+)` + // - Output: `foo=P6O,bar=123,foo=P6O,baz=456` + // where P6O is the redaction string + // + // All of this is only performed after some pre-work where we try to parse + // the string field as an atrace tracepoint and check if the post-tgid + // field starts with |atrace_post_tgid_starts_with|. + // + // If there are no partial matches, the string is left unchanged and the + // next rule in chain is considered. + SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5; } // A rule specifies how strings should be filtered. diff --git a/protos/perfetto/config/trace_config.proto b/protos/perfetto/config/trace_config.proto index 30f2e295a..cca996b90 100644 --- a/protos/perfetto/config/trace_config.proto +++ b/protos/perfetto/config/trace_config.proto @@ -575,6 +575,30 @@ message TraceConfig { // |atrace_post_tgid_starts_with|. The regex matching is only performed if // this check succeeds. SFP_ATRACE_MATCH_BREAK = 4; + + // Tries to repeatedly search (i.e. find substrings of) the string field + // with |regex_pattern|. For each match, redacts any matching groups (i.e. + // replaced with a constant string). Once there are no further matches, + // filtering is terminated (i.e. no further rules are checked). + // + // Note that this is policy is a "search" policy not a "match" policy + // unlike the above policies: + // * Match policies require matching the full string i.e. there is an + // implicit leading `^` and trailing `$`. + // * Search policies perform repeated partial matching of the string + // e.g. + // - String: `foo=aaa,bar=123,foo=bbb,baz=456` + // - Pattern: `foo=(\d+)` + // - Output: `foo=P6O,bar=123,foo=P6O,baz=456` + // where P6O is the redaction string + // + // All of this is only performed after some pre-work where we try to parse + // the string field as an atrace tracepoint and check if the post-tgid + // field starts with |atrace_post_tgid_starts_with|. + // + // If there are no partial matches, the string is left unchanged and the + // next rule in chain is considered. + SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5; } // A rule specifies how strings should be filtered. diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto index 8cfa67ef7..32ab9d07d 100644 --- a/protos/perfetto/trace/perfetto_trace.proto +++ b/protos/perfetto/trace/perfetto_trace.proto @@ -3652,6 +3652,30 @@ message TraceConfig { // |atrace_post_tgid_starts_with|. The regex matching is only performed if // this check succeeds. SFP_ATRACE_MATCH_BREAK = 4; + + // Tries to repeatedly search (i.e. find substrings of) the string field + // with |regex_pattern|. For each match, redacts any matching groups (i.e. + // replaced with a constant string). Once there are no further matches, + // filtering is terminated (i.e. no further rules are checked). + // + // Note that this is policy is a "search" policy not a "match" policy + // unlike the above policies: + // * Match policies require matching the full string i.e. there is an + // implicit leading `^` and trailing `$`. + // * Search policies perform repeated partial matching of the string + // e.g. + // - String: `foo=aaa,bar=123,foo=bbb,baz=456` + // - Pattern: `foo=(\d+)` + // - Output: `foo=P6O,bar=123,foo=P6O,baz=456` + // where P6O is the redaction string + // + // All of this is only performed after some pre-work where we try to parse + // the string field as an atrace tracepoint and check if the post-tgid + // field starts with |atrace_post_tgid_starts_with|. + // + // If there are no partial matches, the string is left unchanged and the + // next rule in chain is considered. + SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5; } // A rule specifies how strings should be filtered. diff --git a/src/protozero/filtering/string_filter.cc b/src/protozero/filtering/string_filter.cc index c6642f474..90fa40ba5 100644 --- a/src/protozero/filtering/string_filter.cc +++ b/src/protozero/filtering/string_filter.cc @@ -135,6 +135,24 @@ bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) const { return true; } break; + case Policy::kAtraceRepeatedSearchRedactGroups: + atrace_payload_ptr = atrace_find_tried + ? atrace_payload_ptr + : FindAtracePayloadPtr(ptr, ptr + len); + atrace_find_tried = true; + if (atrace_payload_ptr && StartsWith(atrace_payload_ptr, ptr + len, + rule.atrace_payload_starts_with)) { + auto beg = std::regex_iterator<char*>(ptr, ptr + len, rule.pattern); + auto end = std::regex_iterator<char*>(); + bool has_any_matches = beg != end; + for (auto it = std::move(beg); it != end; ++it) { + RedactMatches(*it); + } + if (has_any_matches) { + return true; + } + } + break; } } return false; diff --git a/src/protozero/filtering/string_filter.h b/src/protozero/filtering/string_filter.h index f435d020b..e7ed33113 100644 --- a/src/protozero/filtering/string_filter.h +++ b/src/protozero/filtering/string_filter.h @@ -32,6 +32,7 @@ class StringFilter { kAtraceMatchRedactGroups = 2, kMatchBreak = 3, kAtraceMatchBreak = 4, + kAtraceRepeatedSearchRedactGroups = 5, }; // Adds a new rule for filtering strings. diff --git a/src/protozero/filtering/string_filter_benchmark.cc b/src/protozero/filtering/string_filter_benchmark.cc index 867f69574..2b976c679 100644 --- a/src/protozero/filtering/string_filter_benchmark.cc +++ b/src/protozero/filtering/string_filter_benchmark.cc @@ -62,10 +62,12 @@ void Benchmark(benchmark::State& state, std::vector<char> storage; auto strs = LoadTraceStrings(state, storage); + uint32_t match = 0; for (auto _ : state) { - uint32_t match = 0; + match = 0; + std::vector<char> local = storage; for (auto& str : strs) { - match += rewriter.MaybeFilter(storage.data() + str.first, str.second); + match += rewriter.MaybeFilter(local.data() + str.first, str.second); } benchmark::DoNotOptimize(match); } @@ -73,6 +75,12 @@ void Benchmark(benchmark::State& state, benchmark::Counter(static_cast<double>(strs.size()), benchmark::Counter::kIsIterationInvariantRate | benchmark::Counter::kInvert); + state.counters["time/redaction"] = + benchmark::Counter(static_cast<double>(match), + benchmark::Counter::kIsIterationInvariantRate | + benchmark::Counter::kInvert); + state.counters["redactions"] = benchmark::Counter( + static_cast<double>(match), benchmark::Counter::kDefaults); } } // namespace @@ -111,6 +119,15 @@ BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactRare) ->Unit(benchmark::kMillisecond) ->Arg(10); +static void BM_ProtozeroStringRewriterAtraceSearchSingleRedactRare( + benchmark::State& state) { + Benchmark(state, Policy::kAtraceRepeatedSearchRedactGroups, + R"(VerifyClass (.*)\n)", "VerifyClass"); +} +BENCHMARK(BM_ProtozeroStringRewriterAtraceSearchSingleRedactRare) + ->Unit(benchmark::kMillisecond) + ->Arg(10); + static void BM_ProtozeroStringRewriterRedactCommon(benchmark::State& state) { Benchmark(state, Policy::kMatchRedactGroups, R"(B\|[^|]+\|Lock contention on a monitor lock (.*)\n)", ""); @@ -128,3 +145,21 @@ static void BM_ProtozeroStringRewriterAtraceRedactCommon( BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactCommon) ->Unit(benchmark::kMillisecond) ->Arg(10); + +static void BM_ProtozeroStringRewriterAtraceRedactSpammy( + benchmark::State& state) { + Benchmark(state, Policy::kAtraceMatchRedactGroups, + R"(C\|[^|]+\|Heap size \(KB\)\|(\d+)\n)", "Heap size (KB)"); +} +BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactSpammy) + ->Unit(benchmark::kMillisecond) + ->Arg(10); + +static void BM_ProtozeroStringRewriterAtraceSearchSingleRedactSpammy( + benchmark::State& state) { + Benchmark(state, Policy::kAtraceRepeatedSearchRedactGroups, + R"(Heap size \(KB\)\|(\d+))", "Heap size (KB)"); +} +BENCHMARK(BM_ProtozeroStringRewriterAtraceSearchSingleRedactSpammy) + ->Unit(benchmark::kMillisecond) + ->Arg(10); diff --git a/src/protozero/filtering/string_filter_unittest.cc b/src/protozero/filtering/string_filter_unittest.cc index 4e42c0f63..06000fda0 100644 --- a/src/protozero/filtering/string_filter_unittest.cc +++ b/src/protozero/filtering/string_filter_unittest.cc @@ -180,6 +180,58 @@ TEST(StringFilterTest, AtraceBreak) { ASSERT_EQ(res, "B|1234|foo 1234"); } +TEST(StringFilterTest, AtraceSearch) { + StringFilter filter; + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(x:(\d+))", "foo"); + + std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039"; + ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size())); + ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:dfja x:P60RED"); +} + +TEST(StringFilterTest, AtraceSearchBreaks) { + StringFilter filter; + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(x:(\d+))", "foo"); + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(y:(\d+))", "foo"); + + std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039"; + ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size())); + ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:dfja x:P60RED"); +} + +TEST(StringFilterTest, AtraceSearchReturnsFalseOnNoMatch) { + StringFilter filter; + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(x:(\d+))", "foo"); + + std::string res = "B|1234|foo x:dfja"; + ASSERT_FALSE(filter.MaybeFilter(res.data(), res.size())); + ASSERT_EQ(res, "B|1234|foo x:dfja"); +} + +TEST(StringFilterTest, AtraceSearchMultipleGroups) { + StringFilter filter; + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(x:(\d+)|y:(\d+))", "foo"); + + std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039"; + ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size())); + ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:P60R x:dfja x:P60RED"); +} + +TEST(StringFilterTest, AtraceSearchRecursive) { + StringFilter filter; + filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups, + R"(x:([^\s-]*))", "foo"); + + std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039"; + ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size())); + ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:P60R x:P60RED"); +} + TEST(StringFilterTest, RegexRedactionNonUtf) { StringFilter filter; filter.AddRule(StringFilter::Policy::kMatchRedactGroups, diff --git a/src/tools/proto_filter/proto_filter.cc b/src/tools/proto_filter/proto_filter.cc index d63d4c838..0ccdecc91 100644 --- a/src/tools/proto_filter/proto_filter.cc +++ b/src/tools/proto_filter/proto_filter.cc @@ -140,6 +140,8 @@ std::optional<protozero::StringFilter::Policy> ConvertPolicy( return protozero::StringFilter::Policy::kMatchBreak; case TraceFilter::SFP_ATRACE_MATCH_BREAK: return protozero::StringFilter::Policy::kAtraceMatchBreak; + case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS: + return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups; } return std::nullopt; } diff --git a/src/tracing/core/tracing_service_impl.cc b/src/tracing/core/tracing_service_impl.cc index 1d469ac75..08af82df7 100644 --- a/src/tracing/core/tracing_service_impl.cc +++ b/src/tracing/core/tracing_service_impl.cc @@ -315,6 +315,8 @@ std::optional<protozero::StringFilter::Policy> ConvertPolicy( return protozero::StringFilter::Policy::kMatchBreak; case TraceFilter::SFP_ATRACE_MATCH_BREAK: return protozero::StringFilter::Policy::kAtraceMatchBreak; + case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS: + return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups; } return std::nullopt; } |