aboutsummaryrefslogtreecommitdiff
path: root/src/protozero/filtering/string_filter.cc
blob: 90fa40ba52e3a34e51e49f63a42b1050f7652906 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/*
 * Copyright (C) 2023 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "src/protozero/filtering/string_filter.h"

#include <cstring>
#include <regex>
#include <string_view>

#include "perfetto/base/compiler.h"
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/string_view.h"
#include "perfetto/public/compiler.h"

namespace protozero {
namespace {

using Matches = std::match_results<char*>;

static constexpr std::string_view kRedacted = "P60REDACTED";
static constexpr char kRedactedDash = '-';

// Returns a pointer to the first character after the tgid pipe character in
// the atrace string given by [ptr, end). Returns null if no such character
// exists.
//
// Examples:
// E|1024 -> nullptr
// foobarbaz -> nullptr
// B|1024|x -> pointer to x
const char* FindAtracePayloadPtr(const char* ptr, const char* end) {
  // Don't even bother checking any strings which are so short that they could
  // not contain a post-tgid section. This filters out strings like "E|" which
  // emitted by Bionic.
  //
  // Also filter out any other strings starting with "E" as they never contain
  // anything past the tgid: this removes >half of the strings for ~zero cost.
  static constexpr size_t kEarliestSecondPipeIndex = 2;
  const char* search_start = ptr + kEarliestSecondPipeIndex;
  if (search_start >= end || *ptr == 'E') {
    return nullptr;
  }

  // We skipped past the first '|' character by starting at the character at
  // index 2. Just find the next pipe character (i.e. the one after tgid) using
  // memchr.
  const char* pipe = static_cast<const char*>(
      memchr(search_start, '|', size_t(end - search_start)));
  return pipe ? pipe + 1 : nullptr;
}

bool StartsWith(const char* ptr,
                const char* end,
                const std::string& starts_with) {
  // Verify that the atrace string has enough characters to match against all
  // the characters in the "starts with" string. If it does, memcmp to check if
  // all the characters match and return true if they do.
  return ptr + starts_with.size() <= end &&
         memcmp(ptr, starts_with.data(), starts_with.size()) == 0;
}

void RedactMatches(const Matches& matches) {
  // Go through every group in the matches.
  for (size_t i = 1; i < matches.size(); ++i) {
    const auto& match = matches[i];
    PERFETTO_CHECK(match.second >= match.first);

    // Overwrite the match with characters from |kRedacted|. If match is
    // smaller, we will not use all of |kRedacted| but that's fine (i.e. we
    // will overwrite with a truncated |kRedacted|).
    size_t match_len = static_cast<size_t>(match.second - match.first);
    size_t redacted_len = std::min(match_len, kRedacted.size());
    memcpy(match.first, kRedacted.data(), redacted_len);

    // Overwrite any characters after |kRedacted| with |kRedactedDash|.
    memset(match.first + redacted_len, kRedactedDash, match_len - redacted_len);
  }
}

}  // namespace

void StringFilter::AddRule(Policy policy,
                           std::string_view pattern_str,
                           std::string atrace_payload_starts_with) {
  rules_.emplace_back(StringFilter::Rule{
      policy,
      std::regex(pattern_str.begin(), pattern_str.end(),
                 std::regex::ECMAScript | std::regex_constants::optimize),
      std::move(atrace_payload_starts_with)});
}

bool StringFilter::MaybeFilterInternal(char* ptr, size_t len) const {
  std::match_results<char*> matches;
  bool atrace_find_tried = false;
  const char* atrace_payload_ptr = nullptr;
  for (const Rule& rule : rules_) {
    switch (rule.policy) {
      case Policy::kMatchRedactGroups:
      case Policy::kMatchBreak:
        if (std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
          if (rule.policy == Policy::kMatchBreak) {
            return false;
          }
          RedactMatches(matches);
          return true;
        }
        break;
      case Policy::kAtraceMatchRedactGroups:
      case Policy::kAtraceMatchBreak:
        atrace_payload_ptr = atrace_find_tried
                                 ? atrace_payload_ptr
                                 : FindAtracePayloadPtr(ptr, ptr + len);
        atrace_find_tried = true;
        if (atrace_payload_ptr &&
            StartsWith(atrace_payload_ptr, ptr + len,
                       rule.atrace_payload_starts_with) &&
            std::regex_match(ptr, ptr + len, matches, rule.pattern)) {
          if (rule.policy == Policy::kAtraceMatchBreak) {
            return false;
          }
          RedactMatches(matches);
          return true;
        }
        break;
      case Policy::kAtraceRepeatedSearchRedactGroups:
        atrace_payload_ptr = atrace_find_tried
                                 ? atrace_payload_ptr
                                 : FindAtracePayloadPtr(ptr, ptr + len);
        atrace_find_tried = true;
        if (atrace_payload_ptr && StartsWith(atrace_payload_ptr, ptr + len,
                                             rule.atrace_payload_starts_with)) {
          auto beg = std::regex_iterator<char*>(ptr, ptr + len, rule.pattern);
          auto end = std::regex_iterator<char*>();
          bool has_any_matches = beg != end;
          for (auto it = std::move(beg); it != end; ++it) {
            RedactMatches(*it);
          }
          if (has_any_matches) {
            return true;
          }
        }
        break;
    }
  }
  return false;
}

}  // namespace protozero