aboutsummaryrefslogtreecommitdiff
path: root/src/protozero/filtering/string_filter.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/protozero/filtering/string_filter.cc')
-rw-r--r--src/protozero/filtering/string_filter.cc143
1 files changed, 143 insertions, 0 deletions
diff --git a/src/protozero/filtering/string_filter.cc b/src/protozero/filtering/string_filter.cc
new file mode 100644
index 000000000..32542076d
--- /dev/null
+++ b/src/protozero/filtering/string_filter.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/protozero/filtering/string_filter.h"
+
+#include <cstring>
+#include <regex>
+#include <string_view>
+
+#include "perfetto/base/compiler.h"
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/public/compiler.h"
+
+namespace protozero {
+namespace {
+
+using Matches = std::match_results<char*>;
+
+static constexpr std::string_view kRedacted = "P60REDACTED";
+static constexpr char kRedactedDash = '-';
+
+// Returns a pointer to the first character after the tgid pipe character in
+// the atrace string given by [ptr, end). Returns null if no such character
+// exists.
+//
+// Examples:
+// E|1024 -> nullptr
+// foobarbaz -> nullptr
+// B|1024|x -> pointer to x
+const char* FindAtracePayloadPtr(const char* ptr, const char* end) {
+ // Don't even bother checking any strings which are so short that they could
+ // not contain a post-tgid section. This filters out strings like "E|" which
+ // emitted by Bionic.
+ //
+ // Also filter out any other strings starting with "E" as they never contain
+ // anything past the tgid: this removes >half of the strings for ~zero cost.
+ static constexpr size_t kEarliestSecondPipeIndex = 2;
+ const char* search_start = ptr + kEarliestSecondPipeIndex;
+ if (search_start >= end || *ptr == 'E') {
+ return nullptr;
+ }
+
+ // We skipped past the first '|' character by starting at the character at
+ // index 2. Just find the next pipe character (i.e. the one after tgid) using
+ // memchr.
+ const char* pipe = static_cast<const char*>(
+ memchr(search_start, '|', size_t(end - search_start)));
+ return pipe ? pipe + 1 : nullptr;
+}
+
+bool StartsWith(const char* ptr,
+ const char* end,
+ const std::string& starts_with) {
+ // Verify that the atrace string has enough characters to match against all
+ // the characters in the "starts with" string. If it does, memcmp to check if
+ // all the characters match and return true if they do.
+ return ptr + starts_with.size() <= end &&
+ memcmp(ptr, starts_with.data(), starts_with.size()) == 0;
+}
+
+void RedactMatches(const Matches& matches) {
+ // Go through every group in the matches.
+ for (size_t i = 1; i < matches.size(); ++i) {
+ const auto& match = matches[i];
+ PERFETTO_CHECK(match.second >= match.first);
+
+ // Overwrite the match with characters from |kRedacted|. If match is
+ // smaller, we will not use all of |kRedacted| but that's fine (i.e. we
+ // will overwrite with a truncated |kRedacted|).
+ size_t match_len = static_cast<size_t>(match.second - match.first);
+ size_t redacted_len = std::min(match_len, kRedacted.size());
+ memcpy(match.first, kRedacted.data(), redacted_len);
+
+ // Overwrite any characters after |kRedacted| with |kRedactedDash|.
+ memset(match.first + redacted_len, kRedactedDash, match_len - redacted_len);
+ }
+}
+
+} // namespace
+
+void StringFilter::AddRule(Policy policy,
+ std::string_view pattern_str,
+ std::string atrace_payload_starts_with) {
+ rules_.emplace_back(StringFilter::Rule{
+ policy,
+ std::regex(pattern_str.begin(), pattern_str.end(),
+ std::regex::ECMAScript | std::regex_constants::optimize),
+ std::move(atrace_payload_starts_with)});
+}
+
+bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) {
+ std::match_results<char*> matches;
+ bool atrace_find_tried = false;
+ const char* atrace_payload_ptr = nullptr;
+ for (const Rule& rule : rules_) {
+ switch (rule.policy) {
+ case Policy::kMatchRedactGroups:
+ case Policy::kMatchBreak:
+ if (std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
+ if (rule.policy == Policy::kMatchBreak) {
+ return false;
+ }
+ RedactMatches(matches);
+ return true;
+ }
+ break;
+ case Policy::kAtraceMatchRedactGroups:
+ case Policy::kAtraceMatchBreak:
+ atrace_payload_ptr = atrace_find_tried
+ ? atrace_payload_ptr
+ : FindAtracePayloadPtr(ptr, ptr + len);
+ atrace_find_tried = true;
+ if (atrace_payload_ptr &&
+ StartsWith(atrace_payload_ptr, ptr + len,
+ rule.atrace_payload_starts_with) &&
+ std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
+ if (rule.policy == Policy::kAtraceMatchBreak) {
+ return false;
+ }
+ RedactMatches(matches);
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+}
+
+} // namespace protozero