aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLalit Maganti <lalitm@google.com>2023-06-20 14:36:48 +0100
committerLalit Maganti <lalitm@google.com>2023-06-21 10:34:54 +0100
commit589fc100e38f1ae9f07b7a19d177e47b931b9bd8 (patch)
tree0bb71831e7c854d1cd9918d929e54c84baef4a30
parentd0d520995cd4491cf5fe0a59d5045b2c06cedc8d (diff)
downloadperfetto-589fc100e38f1ae9f07b7a19d177e47b931b9bd8.tar.gz
protozero: add filtering bytecode v2 with support for string filtering
This CL introduces a new bytecode v2 format which adds a new opcode for filtering strings in protos. This CL does not implement the actual filtering logic yet, instead making the field a noop at the moment. (cherry-picked from aosp/2616329) Bug: 283246642 Change-Id: I4e1dfbeafa7b7728faf3dc063f868c9a36f1672a Merged-In: I4e1dfbeafa7b7728faf3dc063f868c9a36f1672a
-rw-r--r--protos/perfetto/config/perfetto_config.proto12
-rw-r--r--protos/perfetto/config/trace_config.proto12
-rw-r--r--protos/perfetto/trace/perfetto_trace.proto12
-rw-r--r--src/protozero/filtering/filter_bytecode_common.h7
-rw-r--r--src/protozero/filtering/filter_bytecode_generator.cc8
-rw-r--r--src/protozero/filtering/filter_bytecode_generator.h3
-rw-r--r--src/protozero/filtering/filter_bytecode_parser.cc8
-rw-r--r--src/protozero/filtering/filter_bytecode_parser.h20
-rw-r--r--src/protozero/filtering/message_filter.cc23
-rw-r--r--src/protozero/filtering/message_filter.h19
10 files changed, 105 insertions, 19 deletions
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto
index 5fbcf8dc2..14dac1010 100644
--- a/protos/perfetto/config/perfetto_config.proto
+++ b/protos/perfetto/config/perfetto_config.proto
@@ -3170,9 +3170,19 @@ message TraceConfig {
// design.
//
// Introduced in Android S, but it was broken (b/195065199). Reintroduced in
- // Android T with a different field number.
+ // Android T with a different field number. Updated in Android U with a new
+ // bytecode version which supports string filtering.
message TraceFilter {
+ // =========================
+ // Filter bytecode.
+ // =========================
+
+ // The bytecode as implemented in Android T.
optional bytes bytecode = 1;
+
+ // The bytecode as implemented in Android U. Adds support for string
+ // filtering.
+ optional bytes bytecode_v2 = 2;
}
// old field number for trace_filter
reserved 32;
diff --git a/protos/perfetto/config/trace_config.proto b/protos/perfetto/config/trace_config.proto
index 67c5f3d87..05d4520c9 100644
--- a/protos/perfetto/config/trace_config.proto
+++ b/protos/perfetto/config/trace_config.proto
@@ -491,9 +491,19 @@ message TraceConfig {
// design.
//
// Introduced in Android S, but it was broken (b/195065199). Reintroduced in
- // Android T with a different field number.
+ // Android T with a different field number. Updated in Android U with a new
+ // bytecode version which supports string filtering.
message TraceFilter {
+ // =========================
+ // Filter bytecode.
+ // =========================
+
+ // The bytecode as implemented in Android T.
optional bytes bytecode = 1;
+
+ // The bytecode as implemented in Android U. Adds support for string
+ // filtering.
+ optional bytes bytecode_v2 = 2;
}
// old field number for trace_filter
reserved 32;
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 77ff04114..4ed8d5a6c 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -3170,9 +3170,19 @@ message TraceConfig {
// design.
//
// Introduced in Android S, but it was broken (b/195065199). Reintroduced in
- // Android T with a different field number.
+ // Android T with a different field number. Updated in Android U with a new
+ // bytecode version which supports string filtering.
message TraceFilter {
+ // =========================
+ // Filter bytecode.
+ // =========================
+
+ // The bytecode as implemented in Android T.
optional bytes bytecode = 1;
+
+ // The bytecode as implemented in Android U. Adds support for string
+ // filtering.
+ optional bytes bytecode_v2 = 2;
}
// old field number for trace_filter
reserved 32;
diff --git a/src/protozero/filtering/filter_bytecode_common.h b/src/protozero/filtering/filter_bytecode_common.h
index 9c0e6f238..02a6cd02c 100644
--- a/src/protozero/filtering/filter_bytecode_common.h
+++ b/src/protozero/filtering/filter_bytecode_common.h
@@ -36,7 +36,14 @@ enum FilterOpcode : uint32_t {
// (without any shifting) is the index of the filter that should be used to
// recurse into the nested message.
kFilterOpcode_NestedField = 3,
+
+ // The imediate value is the id of the allowed field. The behaviour of this
+ // opcode is the same as kFilterOpcode_SimpleField, with the further semantic
+ // that the field is a string and needs to be processed using the string
+ // filtering fules.
+ kFilterOpcode_FilterString = 4,
};
+
} // namespace protozero
#endif // SRC_PROTOZERO_FILTERING_FILTER_BYTECODE_COMMON_H_
diff --git a/src/protozero/filtering/filter_bytecode_generator.cc b/src/protozero/filtering/filter_bytecode_generator.cc
index b8a059aca..487534b84 100644
--- a/src/protozero/filtering/filter_bytecode_generator.cc
+++ b/src/protozero/filtering/filter_bytecode_generator.cc
@@ -43,6 +43,14 @@ void FilterBytecodeGenerator::AddSimpleField(uint32_t field_id) {
endmessage_called_ = false;
}
+// Allows a string field which needs to be rewritten using the given chain.
+void FilterBytecodeGenerator::AddFilterStringField(uint32_t field_id) {
+ PERFETTO_CHECK(field_id > last_field_id_);
+ bytecode_.push_back(field_id << 3 | kFilterOpcode_FilterString);
+ last_field_id_ = field_id;
+ endmessage_called_ = false;
+}
+
// Allows a range of simple fields. |range_start| is the id of the first field
// in range, |range_len| the number of fields in the range.
// AddSimpleFieldRange(N,1) is semantically equivalent to AddSimpleField(N).
diff --git a/src/protozero/filtering/filter_bytecode_generator.h b/src/protozero/filtering/filter_bytecode_generator.h
index 809799c24..ecff87e11 100644
--- a/src/protozero/filtering/filter_bytecode_generator.h
+++ b/src/protozero/filtering/filter_bytecode_generator.h
@@ -43,6 +43,9 @@ class FilterBytecodeGenerator {
// Allows a simple field (varint, fixed32/64, string or bytes).
void AddSimpleField(uint32_t field_id);
+ // Allows a string field which needs to be filtered.
+ void AddFilterStringField(uint32_t field_id);
+
// Allows a range of simple fields. |range_start| is the id of the first field
// in range, |range_len| the number of fields in the range.
// AddSimpleFieldRange(N,1) is semantically equivalent to AddSimpleField(N)
diff --git a/src/protozero/filtering/filter_bytecode_parser.cc b/src/protozero/filtering/filter_bytecode_parser.cc
index e8712c771..6c22722fd 100644
--- a/src/protozero/filtering/filter_bytecode_parser.cc
+++ b/src/protozero/filtering/filter_bytecode_parser.cc
@@ -105,15 +105,19 @@ bool FilterBytecodeParser::LoadInternal(const uint8_t* bytecode_data,
}
if (opcode == kFilterOpcode_SimpleField ||
- opcode == kFilterOpcode_NestedField) {
+ opcode == kFilterOpcode_NestedField ||
+ opcode == kFilterOpcode_FilterString) {
// Field words are organized as follow:
// MSB: 1 if allowed, 0 if not allowed.
// Remaining bits:
// Message index in the case of nested (non-simple) messages.
- // 0x7f..f in the case of simple messages.
+ // 0x7f..e in the case of string fields which need filtering.
+ // 0x7f..f in the case of simple fields.
uint32_t msg_id;
if (opcode == kFilterOpcode_SimpleField) {
msg_id = kSimpleField;
+ } else if (opcode == kFilterOpcode_FilterString) {
+ msg_id = kFilterStringField;
} else { // FILTER_OPCODE_NESTED_FIELD
// The next word in the bytecode contains the message index.
if (!has_next_word) {
diff --git a/src/protozero/filtering/filter_bytecode_parser.h b/src/protozero/filtering/filter_bytecode_parser.h
index 8744a06a7..fa07be2eb 100644
--- a/src/protozero/filtering/filter_bytecode_parser.h
+++ b/src/protozero/filtering/filter_bytecode_parser.h
@@ -20,6 +20,7 @@
#include <stddef.h>
#include <stdint.h>
+#include <optional>
#include <vector>
namespace protozero {
@@ -55,10 +56,20 @@ class FilterBytecodeParser {
// fixed32/64, string or byte).
bool simple_field() const { return nested_msg_index == kSimpleField; }
+ // If |allowed|==true, specifies if this field is a string field that needs
+ // to be filtered.
+ bool filter_string_field() const {
+ return nested_msg_index == kFilterStringField;
+ }
+
// If |allowed|==true, specifies if the field is a nested field that needs
// recursion. The caller is expected to use |nested_msg_index| for the next
// Query() calls.
- bool nested_msg_field() const { return nested_msg_index < kSimpleField; }
+ bool nested_msg_field() const {
+ static_assert(kFilterStringField < kSimpleField,
+ "kFilterStringField < kSimpleField");
+ return nested_msg_index < kFilterStringField;
+ }
};
// Loads a filter. The filter data consists of a sequence of varints which
@@ -77,6 +88,7 @@ class FilterBytecodeParser {
static constexpr uint32_t kDirectlyIndexLimit = 128;
static constexpr uint32_t kAllowed = 1u << 31u;
static constexpr uint32_t kSimpleField = 0x7fffffff;
+ static constexpr uint32_t kFilterStringField = 0x7ffffffe;
bool LoadInternal(const uint8_t* filter_data, size_t len);
@@ -101,17 +113,17 @@ class FilterBytecodeParser {
// [N + 6] -> Field state for fields in range 2 (below)
// The "field state" word is as follows:
- // Bit 31: 0 if the field is disallowed, 1 if allowed.
+ // Bit 31: 1 if the field is allowed, 0 if disallowed.
// Only directly indexed fields can be 0 (it doesn't make sense to add
// a range and then say "btw it's NOT allowed".. don't add it then.
// 0 is only used for filling gaps in the directly indexed bucket.
// Bits [30..0] (only when MSB == allowed):
// 0x7fffffff: The field is "simple" (varint, fixed32/64, string, bytes) and
// can be directly passed through in output. No recursion is needed.
- // [0, 7ffffffe]: The field is a nested submessage. The value is the index
+ // 0x7ffffffe: The field is string field which needs to be filtered.
+ // [0, 7ffffffd]: The field is a nested submessage. The value is the index
// that must be passed as first argument to the next Query() calls.
// Note that the message index is purely a monotonic counter in the
- // filter bytecode, has no proto-equivalent match (unlike field ids).
std::vector<uint32_t> words_;
// One entry for each message index stored in the filter plus a sentinel at
diff --git a/src/protozero/filtering/message_filter.cc b/src/protozero/filtering/message_filter.cc
index fe5e1c526..ff283f869 100644
--- a/src/protozero/filtering/message_filter.cc
+++ b/src/protozero/filtering/message_filter.cc
@@ -154,11 +154,17 @@ void MessageFilter::FilterOneByte(uint8_t octet) {
if (state->eat_next_bytes > 0) {
// This is the case where the previous tokenizer_.Push() call returned a
// length delimited message which is NOT a submessage (a string or a bytes
- // field). We just want to consume it, and pass it through in output
+ // field). We just want to consume it, and pass it through/filter strings
// if the field was allowed.
--state->eat_next_bytes;
- if (state->passthrough_eaten_bytes)
+ if (state->action == StackState::kPassthrough) {
*(out_++) = octet;
+ } else if (state->action == StackState::kFilterString) {
+ *(out_++) = octet;
+ if (state->eat_next_bytes == 0) {
+ // TODO(lalitm): do the filtering using |filter_string_ptr|.
+ }
+ }
} else {
MessageTokenizer::Token token = tokenizer_.Push(octet);
// |token| will not be valid() in most cases and this is WAI. When pushing
@@ -224,9 +230,16 @@ void MessageFilter::FilterOneByte(uint8_t octet) {
} else {
// A string or bytes field, or a 0 length submessage.
state->eat_next_bytes = submessage_len;
- state->passthrough_eaten_bytes = filter.allowed;
- if (filter.allowed)
+ if (filter.allowed && filter.filter_string_field()) {
+ state->action = StackState::kFilterString;
+ AppendLenDelim(token.field_id, submessage_len, &out_);
+ state->filter_string_ptr = out_;
+ } else if (filter.allowed) {
+ state->action = StackState::kPassthrough;
AppendLenDelim(token.field_id, submessage_len, &out_);
+ } else {
+ state->action = StackState::kDrop;
+ }
}
break;
} // switch(type)
@@ -278,7 +291,7 @@ void MessageFilter::SetUnrecoverableErrorState() {
auto& state = stack_[0];
state.eat_next_bytes = UINT32_MAX;
state.in_bytes_limit = UINT32_MAX;
- state.passthrough_eaten_bytes = false;
+ state.action = StackState::kDrop;
out_ = out_buf_.get(); // Reset the write pointer.
}
diff --git a/src/protozero/filtering/message_filter.h b/src/protozero/filtering/message_filter.h
index c752b2a78..e67467b6b 100644
--- a/src/protozero/filtering/message_filter.h
+++ b/src/protozero/filtering/message_filter.h
@@ -181,11 +181,20 @@ class MessageFilter {
uint8_t* size_field = nullptr;
uint32_t size_field_len = 0;
- // When true the next |eat_next_bytes| are copied as-is in output.
- // It seems that keeping this field at the end rather than next to
- // |eat_next_bytes| makes the filter a little (but measurably) faster.
- // (likely something related with struct layout vs cache sizes).
- bool passthrough_eaten_bytes = false;
+ // The pointer to the start of the string to update the string if it is
+ // filtered.
+ uint8_t* filter_string_ptr = nullptr;
+
+ // How |eat_next_bytes| should be handled. It seems that keeping this field
+ // at the end rather than next to |eat_next_bytes| makes the filter a little
+ // (but measurably) faster. (likely something related with struct layout vs
+ // cache sizes).
+ enum FilterAction {
+ kDrop,
+ kPassthrough,
+ kFilterString,
+ };
+ FilterAction action = FilterAction::kDrop;
};
uint32_t out_written() { return static_cast<uint32_t>(out_ - &out_buf_[0]); }