aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-19 20:16:01 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-19 20:16:01 +0000
commit1436b03ce8d197748019aee86199430407375a98 (patch)
tree804c11ed028f172747f45d366701c70c1ec3795a
parent1bfb479ba18126d544fb7270675f1b938073e44e (diff)
parent48b70bc1d2f881b7a8a9ad34bbec0b1304485975 (diff)
downloadperfetto-android14-qpr2-s1-release.tar.gz
Change-Id: I4e924e7d4032b1713c5c1d73d217fce35f818ecd
-rw-r--r--protos/perfetto/config/perfetto_config.proto24
-rw-r--r--protos/perfetto/config/trace_config.proto24
-rw-r--r--protos/perfetto/trace/perfetto_trace.proto24
-rw-r--r--src/protozero/filtering/string_filter.cc18
-rw-r--r--src/protozero/filtering/string_filter.h1
-rw-r--r--src/protozero/filtering/string_filter_benchmark.cc39
-rw-r--r--src/protozero/filtering/string_filter_unittest.cc52
-rw-r--r--src/tools/proto_filter/proto_filter.cc2
-rw-r--r--src/tracing/core/tracing_service_impl.cc2
9 files changed, 184 insertions, 2 deletions
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto
index 96aa5c81a..230c56d3d 100644
--- a/protos/perfetto/config/perfetto_config.proto
+++ b/protos/perfetto/config/perfetto_config.proto
@@ -3652,6 +3652,30 @@ message TraceConfig {
// |atrace_post_tgid_starts_with|. The regex matching is only performed if
// this check succeeds.
SFP_ATRACE_MATCH_BREAK = 4;
+
+ // Tries to repeatedly search (i.e. find substrings of) the string field
+ // with |regex_pattern|. For each match, redacts any matching groups (i.e.
+ // replaced with a constant string). Once there are no further matches,
+ // filtering is terminated (i.e. no further rules are checked).
+ //
+ // Note that this is policy is a "search" policy not a "match" policy
+ // unlike the above policies:
+ // * Match policies require matching the full string i.e. there is an
+ // implicit leading `^` and trailing `$`.
+ // * Search policies perform repeated partial matching of the string
+ // e.g.
+ // - String: `foo=aaa,bar=123,foo=bbb,baz=456`
+ // - Pattern: `foo=(\d+)`
+ // - Output: `foo=P6O,bar=123,foo=P6O,baz=456`
+ // where P6O is the redaction string
+ //
+ // All of this is only performed after some pre-work where we try to parse
+ // the string field as an atrace tracepoint and check if the post-tgid
+ // field starts with |atrace_post_tgid_starts_with|.
+ //
+ // If there are no partial matches, the string is left unchanged and the
+ // next rule in chain is considered.
+ SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5;
}
// A rule specifies how strings should be filtered.
diff --git a/protos/perfetto/config/trace_config.proto b/protos/perfetto/config/trace_config.proto
index 30f2e295a..cca996b90 100644
--- a/protos/perfetto/config/trace_config.proto
+++ b/protos/perfetto/config/trace_config.proto
@@ -575,6 +575,30 @@ message TraceConfig {
// |atrace_post_tgid_starts_with|. The regex matching is only performed if
// this check succeeds.
SFP_ATRACE_MATCH_BREAK = 4;
+
+ // Tries to repeatedly search (i.e. find substrings of) the string field
+ // with |regex_pattern|. For each match, redacts any matching groups (i.e.
+ // replaced with a constant string). Once there are no further matches,
+ // filtering is terminated (i.e. no further rules are checked).
+ //
+ // Note that this is policy is a "search" policy not a "match" policy
+ // unlike the above policies:
+ // * Match policies require matching the full string i.e. there is an
+ // implicit leading `^` and trailing `$`.
+ // * Search policies perform repeated partial matching of the string
+ // e.g.
+ // - String: `foo=aaa,bar=123,foo=bbb,baz=456`
+ // - Pattern: `foo=(\d+)`
+ // - Output: `foo=P6O,bar=123,foo=P6O,baz=456`
+ // where P6O is the redaction string
+ //
+ // All of this is only performed after some pre-work where we try to parse
+ // the string field as an atrace tracepoint and check if the post-tgid
+ // field starts with |atrace_post_tgid_starts_with|.
+ //
+ // If there are no partial matches, the string is left unchanged and the
+ // next rule in chain is considered.
+ SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5;
}
// A rule specifies how strings should be filtered.
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 8cfa67ef7..32ab9d07d 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -3652,6 +3652,30 @@ message TraceConfig {
// |atrace_post_tgid_starts_with|. The regex matching is only performed if
// this check succeeds.
SFP_ATRACE_MATCH_BREAK = 4;
+
+ // Tries to repeatedly search (i.e. find substrings of) the string field
+ // with |regex_pattern|. For each match, redacts any matching groups (i.e.
+ // replaced with a constant string). Once there are no further matches,
+ // filtering is terminated (i.e. no further rules are checked).
+ //
+ // Note that this is policy is a "search" policy not a "match" policy
+ // unlike the above policies:
+ // * Match policies require matching the full string i.e. there is an
+ // implicit leading `^` and trailing `$`.
+ // * Search policies perform repeated partial matching of the string
+ // e.g.
+ // - String: `foo=aaa,bar=123,foo=bbb,baz=456`
+ // - Pattern: `foo=(\d+)`
+ // - Output: `foo=P6O,bar=123,foo=P6O,baz=456`
+ // where P6O is the redaction string
+ //
+ // All of this is only performed after some pre-work where we try to parse
+ // the string field as an atrace tracepoint and check if the post-tgid
+ // field starts with |atrace_post_tgid_starts_with|.
+ //
+ // If there are no partial matches, the string is left unchanged and the
+ // next rule in chain is considered.
+ SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS = 5;
}
// A rule specifies how strings should be filtered.
diff --git a/src/protozero/filtering/string_filter.cc b/src/protozero/filtering/string_filter.cc
index c6642f474..90fa40ba5 100644
--- a/src/protozero/filtering/string_filter.cc
+++ b/src/protozero/filtering/string_filter.cc
@@ -135,6 +135,24 @@ bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) const {
return true;
}
break;
+ case Policy::kAtraceRepeatedSearchRedactGroups:
+ atrace_payload_ptr = atrace_find_tried
+ ? atrace_payload_ptr
+ : FindAtracePayloadPtr(ptr, ptr + len);
+ atrace_find_tried = true;
+ if (atrace_payload_ptr && StartsWith(atrace_payload_ptr, ptr + len,
+ rule.atrace_payload_starts_with)) {
+ auto beg = std::regex_iterator<char*>(ptr, ptr + len, rule.pattern);
+ auto end = std::regex_iterator<char*>();
+ bool has_any_matches = beg != end;
+ for (auto it = std::move(beg); it != end; ++it) {
+ RedactMatches(*it);
+ }
+ if (has_any_matches) {
+ return true;
+ }
+ }
+ break;
}
}
return false;
diff --git a/src/protozero/filtering/string_filter.h b/src/protozero/filtering/string_filter.h
index f435d020b..e7ed33113 100644
--- a/src/protozero/filtering/string_filter.h
+++ b/src/protozero/filtering/string_filter.h
@@ -32,6 +32,7 @@ class StringFilter {
kAtraceMatchRedactGroups = 2,
kMatchBreak = 3,
kAtraceMatchBreak = 4,
+ kAtraceRepeatedSearchRedactGroups = 5,
};
// Adds a new rule for filtering strings.
diff --git a/src/protozero/filtering/string_filter_benchmark.cc b/src/protozero/filtering/string_filter_benchmark.cc
index 867f69574..2b976c679 100644
--- a/src/protozero/filtering/string_filter_benchmark.cc
+++ b/src/protozero/filtering/string_filter_benchmark.cc
@@ -62,10 +62,12 @@ void Benchmark(benchmark::State& state,
std::vector<char> storage;
auto strs = LoadTraceStrings(state, storage);
+ uint32_t match = 0;
for (auto _ : state) {
- uint32_t match = 0;
+ match = 0;
+ std::vector<char> local = storage;
for (auto& str : strs) {
- match += rewriter.MaybeFilter(storage.data() + str.first, str.second);
+ match += rewriter.MaybeFilter(local.data() + str.first, str.second);
}
benchmark::DoNotOptimize(match);
}
@@ -73,6 +75,12 @@ void Benchmark(benchmark::State& state,
benchmark::Counter(static_cast<double>(strs.size()),
benchmark::Counter::kIsIterationInvariantRate |
benchmark::Counter::kInvert);
+ state.counters["time/redaction"] =
+ benchmark::Counter(static_cast<double>(match),
+ benchmark::Counter::kIsIterationInvariantRate |
+ benchmark::Counter::kInvert);
+ state.counters["redactions"] = benchmark::Counter(
+ static_cast<double>(match), benchmark::Counter::kDefaults);
}
} // namespace
@@ -111,6 +119,15 @@ BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactRare)
->Unit(benchmark::kMillisecond)
->Arg(10);
+static void BM_ProtozeroStringRewriterAtraceSearchSingleRedactRare(
+ benchmark::State& state) {
+ Benchmark(state, Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(VerifyClass (.*)\n)", "VerifyClass");
+}
+BENCHMARK(BM_ProtozeroStringRewriterAtraceSearchSingleRedactRare)
+ ->Unit(benchmark::kMillisecond)
+ ->Arg(10);
+
static void BM_ProtozeroStringRewriterRedactCommon(benchmark::State& state) {
Benchmark(state, Policy::kMatchRedactGroups,
R"(B\|[^|]+\|Lock contention on a monitor lock (.*)\n)", "");
@@ -128,3 +145,21 @@ static void BM_ProtozeroStringRewriterAtraceRedactCommon(
BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactCommon)
->Unit(benchmark::kMillisecond)
->Arg(10);
+
+static void BM_ProtozeroStringRewriterAtraceRedactSpammy(
+ benchmark::State& state) {
+ Benchmark(state, Policy::kAtraceMatchRedactGroups,
+ R"(C\|[^|]+\|Heap size \(KB\)\|(\d+)\n)", "Heap size (KB)");
+}
+BENCHMARK(BM_ProtozeroStringRewriterAtraceRedactSpammy)
+ ->Unit(benchmark::kMillisecond)
+ ->Arg(10);
+
+static void BM_ProtozeroStringRewriterAtraceSearchSingleRedactSpammy(
+ benchmark::State& state) {
+ Benchmark(state, Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(Heap size \(KB\)\|(\d+))", "Heap size (KB)");
+}
+BENCHMARK(BM_ProtozeroStringRewriterAtraceSearchSingleRedactSpammy)
+ ->Unit(benchmark::kMillisecond)
+ ->Arg(10);
diff --git a/src/protozero/filtering/string_filter_unittest.cc b/src/protozero/filtering/string_filter_unittest.cc
index 4e42c0f63..06000fda0 100644
--- a/src/protozero/filtering/string_filter_unittest.cc
+++ b/src/protozero/filtering/string_filter_unittest.cc
@@ -180,6 +180,58 @@ TEST(StringFilterTest, AtraceBreak) {
ASSERT_EQ(res, "B|1234|foo 1234");
}
+TEST(StringFilterTest, AtraceSearch) {
+ StringFilter filter;
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(x:(\d+))", "foo");
+
+ std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039";
+ ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size()));
+ ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:dfja x:P60RED");
+}
+
+TEST(StringFilterTest, AtraceSearchBreaks) {
+ StringFilter filter;
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(x:(\d+))", "foo");
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(y:(\d+))", "foo");
+
+ std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039";
+ ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size()));
+ ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:dfja x:P60RED");
+}
+
+TEST(StringFilterTest, AtraceSearchReturnsFalseOnNoMatch) {
+ StringFilter filter;
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(x:(\d+))", "foo");
+
+ std::string res = "B|1234|foo x:dfja";
+ ASSERT_FALSE(filter.MaybeFilter(res.data(), res.size()));
+ ASSERT_EQ(res, "B|1234|foo x:dfja");
+}
+
+TEST(StringFilterTest, AtraceSearchMultipleGroups) {
+ StringFilter filter;
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(x:(\d+)|y:(\d+))", "foo");
+
+ std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039";
+ ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size()));
+ ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:P60R x:dfja x:P60RED");
+}
+
+TEST(StringFilterTest, AtraceSearchRecursive) {
+ StringFilter filter;
+ filter.AddRule(StringFilter::Policy::kAtraceRepeatedSearchRedactGroups,
+ R"(x:([^\s-]*))", "foo");
+
+ std::string res = "B|1234|foo x:1234 x:494 y:4904 x:dfja x:239039";
+ ASSERT_TRUE(filter.MaybeFilter(res.data(), res.size()));
+ ASSERT_EQ(res, "B|1234|foo x:P60R x:P60 y:4904 x:P60R x:P60RED");
+}
+
TEST(StringFilterTest, RegexRedactionNonUtf) {
StringFilter filter;
filter.AddRule(StringFilter::Policy::kMatchRedactGroups,
diff --git a/src/tools/proto_filter/proto_filter.cc b/src/tools/proto_filter/proto_filter.cc
index d63d4c838..0ccdecc91 100644
--- a/src/tools/proto_filter/proto_filter.cc
+++ b/src/tools/proto_filter/proto_filter.cc
@@ -140,6 +140,8 @@ std::optional<protozero::StringFilter::Policy> ConvertPolicy(
return protozero::StringFilter::Policy::kMatchBreak;
case TraceFilter::SFP_ATRACE_MATCH_BREAK:
return protozero::StringFilter::Policy::kAtraceMatchBreak;
+ case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS:
+ return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups;
}
return std::nullopt;
}
diff --git a/src/tracing/core/tracing_service_impl.cc b/src/tracing/core/tracing_service_impl.cc
index 1d469ac75..08af82df7 100644
--- a/src/tracing/core/tracing_service_impl.cc
+++ b/src/tracing/core/tracing_service_impl.cc
@@ -315,6 +315,8 @@ std::optional<protozero::StringFilter::Policy> ConvertPolicy(
return protozero::StringFilter::Policy::kMatchBreak;
case TraceFilter::SFP_ATRACE_MATCH_BREAK:
return protozero::StringFilter::Policy::kAtraceMatchBreak;
+ case TraceFilter::SFP_ATRACE_REPEATED_SEARCH_REDACT_GROUPS:
+ return protozero::StringFilter::Policy::kAtraceRepeatedSearchRedactGroups;
}
return std::nullopt;
}