diff options
author | Lalit Maganti <lalitm@google.com> | 2023-06-20 14:36:48 +0100 |
---|---|---|
committer | Lalit Maganti <lalitm@google.com> | 2023-06-21 10:34:54 +0100 |
commit | 589fc100e38f1ae9f07b7a19d177e47b931b9bd8 (patch) | |
tree | 0bb71831e7c854d1cd9918d929e54c84baef4a30 | |
parent | d0d520995cd4491cf5fe0a59d5045b2c06cedc8d (diff) | |
download | perfetto-589fc100e38f1ae9f07b7a19d177e47b931b9bd8.tar.gz |
protozero: add filtering bytecode v2 with support for string filtering
This CL introduces a new bytecode v2 format which adds a new opcode
for filtering strings in protos. This CL does not implement the actual
filtering logic yet, instead making the field a noop at the moment.
(cherry-picked from aosp/2616329)
Bug: 283246642
Change-Id: I4e1dfbeafa7b7728faf3dc063f868c9a36f1672a
Merged-In: I4e1dfbeafa7b7728faf3dc063f868c9a36f1672a
-rw-r--r-- | protos/perfetto/config/perfetto_config.proto | 12 | ||||
-rw-r--r-- | protos/perfetto/config/trace_config.proto | 12 | ||||
-rw-r--r-- | protos/perfetto/trace/perfetto_trace.proto | 12 | ||||
-rw-r--r-- | src/protozero/filtering/filter_bytecode_common.h | 7 | ||||
-rw-r--r-- | src/protozero/filtering/filter_bytecode_generator.cc | 8 | ||||
-rw-r--r-- | src/protozero/filtering/filter_bytecode_generator.h | 3 | ||||
-rw-r--r-- | src/protozero/filtering/filter_bytecode_parser.cc | 8 | ||||
-rw-r--r-- | src/protozero/filtering/filter_bytecode_parser.h | 20 | ||||
-rw-r--r-- | src/protozero/filtering/message_filter.cc | 23 | ||||
-rw-r--r-- | src/protozero/filtering/message_filter.h | 19 |
10 files changed, 105 insertions, 19 deletions
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto index 5fbcf8dc2..14dac1010 100644 --- a/protos/perfetto/config/perfetto_config.proto +++ b/protos/perfetto/config/perfetto_config.proto @@ -3170,9 +3170,19 @@ message TraceConfig { // design. // // Introduced in Android S, but it was broken (b/195065199). Reintroduced in - // Android T with a different field number. + // Android T with a different field number. Updated in Android U with a new + // bytecode version which supports string filtering. message TraceFilter { + // ========================= + // Filter bytecode. + // ========================= + + // The bytecode as implemented in Android T. optional bytes bytecode = 1; + + // The bytecode as implemented in Android U. Adds support for string + // filtering. + optional bytes bytecode_v2 = 2; } // old field number for trace_filter reserved 32; diff --git a/protos/perfetto/config/trace_config.proto b/protos/perfetto/config/trace_config.proto index 67c5f3d87..05d4520c9 100644 --- a/protos/perfetto/config/trace_config.proto +++ b/protos/perfetto/config/trace_config.proto @@ -491,9 +491,19 @@ message TraceConfig { // design. // // Introduced in Android S, but it was broken (b/195065199). Reintroduced in - // Android T with a different field number. + // Android T with a different field number. Updated in Android U with a new + // bytecode version which supports string filtering. message TraceFilter { + // ========================= + // Filter bytecode. + // ========================= + + // The bytecode as implemented in Android T. optional bytes bytecode = 1; + + // The bytecode as implemented in Android U. Adds support for string + // filtering. + optional bytes bytecode_v2 = 2; } // old field number for trace_filter reserved 32; diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto index 77ff04114..4ed8d5a6c 100644 --- a/protos/perfetto/trace/perfetto_trace.proto +++ b/protos/perfetto/trace/perfetto_trace.proto @@ -3170,9 +3170,19 @@ message TraceConfig { // design. // // Introduced in Android S, but it was broken (b/195065199). Reintroduced in - // Android T with a different field number. + // Android T with a different field number. Updated in Android U with a new + // bytecode version which supports string filtering. message TraceFilter { + // ========================= + // Filter bytecode. + // ========================= + + // The bytecode as implemented in Android T. optional bytes bytecode = 1; + + // The bytecode as implemented in Android U. Adds support for string + // filtering. + optional bytes bytecode_v2 = 2; } // old field number for trace_filter reserved 32; diff --git a/src/protozero/filtering/filter_bytecode_common.h b/src/protozero/filtering/filter_bytecode_common.h index 9c0e6f238..02a6cd02c 100644 --- a/src/protozero/filtering/filter_bytecode_common.h +++ b/src/protozero/filtering/filter_bytecode_common.h @@ -36,7 +36,14 @@ enum FilterOpcode : uint32_t { // (without any shifting) is the index of the filter that should be used to // recurse into the nested message. kFilterOpcode_NestedField = 3, + + // The imediate value is the id of the allowed field. The behaviour of this + // opcode is the same as kFilterOpcode_SimpleField, with the further semantic + // that the field is a string and needs to be processed using the string + // filtering fules. + kFilterOpcode_FilterString = 4, }; + } // namespace protozero #endif // SRC_PROTOZERO_FILTERING_FILTER_BYTECODE_COMMON_H_ diff --git a/src/protozero/filtering/filter_bytecode_generator.cc b/src/protozero/filtering/filter_bytecode_generator.cc index b8a059aca..487534b84 100644 --- a/src/protozero/filtering/filter_bytecode_generator.cc +++ b/src/protozero/filtering/filter_bytecode_generator.cc @@ -43,6 +43,14 @@ void FilterBytecodeGenerator::AddSimpleField(uint32_t field_id) { endmessage_called_ = false; } +// Allows a string field which needs to be rewritten using the given chain. +void FilterBytecodeGenerator::AddFilterStringField(uint32_t field_id) { + PERFETTO_CHECK(field_id > last_field_id_); + bytecode_.push_back(field_id << 3 | kFilterOpcode_FilterString); + last_field_id_ = field_id; + endmessage_called_ = false; +} + // Allows a range of simple fields. |range_start| is the id of the first field // in range, |range_len| the number of fields in the range. // AddSimpleFieldRange(N,1) is semantically equivalent to AddSimpleField(N). diff --git a/src/protozero/filtering/filter_bytecode_generator.h b/src/protozero/filtering/filter_bytecode_generator.h index 809799c24..ecff87e11 100644 --- a/src/protozero/filtering/filter_bytecode_generator.h +++ b/src/protozero/filtering/filter_bytecode_generator.h @@ -43,6 +43,9 @@ class FilterBytecodeGenerator { // Allows a simple field (varint, fixed32/64, string or bytes). void AddSimpleField(uint32_t field_id); + // Allows a string field which needs to be filtered. + void AddFilterStringField(uint32_t field_id); + // Allows a range of simple fields. |range_start| is the id of the first field // in range, |range_len| the number of fields in the range. // AddSimpleFieldRange(N,1) is semantically equivalent to AddSimpleField(N) diff --git a/src/protozero/filtering/filter_bytecode_parser.cc b/src/protozero/filtering/filter_bytecode_parser.cc index e8712c771..6c22722fd 100644 --- a/src/protozero/filtering/filter_bytecode_parser.cc +++ b/src/protozero/filtering/filter_bytecode_parser.cc @@ -105,15 +105,19 @@ bool FilterBytecodeParser::LoadInternal(const uint8_t* bytecode_data, } if (opcode == kFilterOpcode_SimpleField || - opcode == kFilterOpcode_NestedField) { + opcode == kFilterOpcode_NestedField || + opcode == kFilterOpcode_FilterString) { // Field words are organized as follow: // MSB: 1 if allowed, 0 if not allowed. // Remaining bits: // Message index in the case of nested (non-simple) messages. - // 0x7f..f in the case of simple messages. + // 0x7f..e in the case of string fields which need filtering. + // 0x7f..f in the case of simple fields. uint32_t msg_id; if (opcode == kFilterOpcode_SimpleField) { msg_id = kSimpleField; + } else if (opcode == kFilterOpcode_FilterString) { + msg_id = kFilterStringField; } else { // FILTER_OPCODE_NESTED_FIELD // The next word in the bytecode contains the message index. if (!has_next_word) { diff --git a/src/protozero/filtering/filter_bytecode_parser.h b/src/protozero/filtering/filter_bytecode_parser.h index 8744a06a7..fa07be2eb 100644 --- a/src/protozero/filtering/filter_bytecode_parser.h +++ b/src/protozero/filtering/filter_bytecode_parser.h @@ -20,6 +20,7 @@ #include <stddef.h> #include <stdint.h> +#include <optional> #include <vector> namespace protozero { @@ -55,10 +56,20 @@ class FilterBytecodeParser { // fixed32/64, string or byte). bool simple_field() const { return nested_msg_index == kSimpleField; } + // If |allowed|==true, specifies if this field is a string field that needs + // to be filtered. + bool filter_string_field() const { + return nested_msg_index == kFilterStringField; + } + // If |allowed|==true, specifies if the field is a nested field that needs // recursion. The caller is expected to use |nested_msg_index| for the next // Query() calls. - bool nested_msg_field() const { return nested_msg_index < kSimpleField; } + bool nested_msg_field() const { + static_assert(kFilterStringField < kSimpleField, + "kFilterStringField < kSimpleField"); + return nested_msg_index < kFilterStringField; + } }; // Loads a filter. The filter data consists of a sequence of varints which @@ -77,6 +88,7 @@ class FilterBytecodeParser { static constexpr uint32_t kDirectlyIndexLimit = 128; static constexpr uint32_t kAllowed = 1u << 31u; static constexpr uint32_t kSimpleField = 0x7fffffff; + static constexpr uint32_t kFilterStringField = 0x7ffffffe; bool LoadInternal(const uint8_t* filter_data, size_t len); @@ -101,17 +113,17 @@ class FilterBytecodeParser { // [N + 6] -> Field state for fields in range 2 (below) // The "field state" word is as follows: - // Bit 31: 0 if the field is disallowed, 1 if allowed. + // Bit 31: 1 if the field is allowed, 0 if disallowed. // Only directly indexed fields can be 0 (it doesn't make sense to add // a range and then say "btw it's NOT allowed".. don't add it then. // 0 is only used for filling gaps in the directly indexed bucket. // Bits [30..0] (only when MSB == allowed): // 0x7fffffff: The field is "simple" (varint, fixed32/64, string, bytes) and // can be directly passed through in output. No recursion is needed. - // [0, 7ffffffe]: The field is a nested submessage. The value is the index + // 0x7ffffffe: The field is string field which needs to be filtered. + // [0, 7ffffffd]: The field is a nested submessage. The value is the index // that must be passed as first argument to the next Query() calls. // Note that the message index is purely a monotonic counter in the - // filter bytecode, has no proto-equivalent match (unlike field ids). std::vector<uint32_t> words_; // One entry for each message index stored in the filter plus a sentinel at diff --git a/src/protozero/filtering/message_filter.cc b/src/protozero/filtering/message_filter.cc index fe5e1c526..ff283f869 100644 --- a/src/protozero/filtering/message_filter.cc +++ b/src/protozero/filtering/message_filter.cc @@ -154,11 +154,17 @@ void MessageFilter::FilterOneByte(uint8_t octet) { if (state->eat_next_bytes > 0) { // This is the case where the previous tokenizer_.Push() call returned a // length delimited message which is NOT a submessage (a string or a bytes - // field). We just want to consume it, and pass it through in output + // field). We just want to consume it, and pass it through/filter strings // if the field was allowed. --state->eat_next_bytes; - if (state->passthrough_eaten_bytes) + if (state->action == StackState::kPassthrough) { *(out_++) = octet; + } else if (state->action == StackState::kFilterString) { + *(out_++) = octet; + if (state->eat_next_bytes == 0) { + // TODO(lalitm): do the filtering using |filter_string_ptr|. + } + } } else { MessageTokenizer::Token token = tokenizer_.Push(octet); // |token| will not be valid() in most cases and this is WAI. When pushing @@ -224,9 +230,16 @@ void MessageFilter::FilterOneByte(uint8_t octet) { } else { // A string or bytes field, or a 0 length submessage. state->eat_next_bytes = submessage_len; - state->passthrough_eaten_bytes = filter.allowed; - if (filter.allowed) + if (filter.allowed && filter.filter_string_field()) { + state->action = StackState::kFilterString; + AppendLenDelim(token.field_id, submessage_len, &out_); + state->filter_string_ptr = out_; + } else if (filter.allowed) { + state->action = StackState::kPassthrough; AppendLenDelim(token.field_id, submessage_len, &out_); + } else { + state->action = StackState::kDrop; + } } break; } // switch(type) @@ -278,7 +291,7 @@ void MessageFilter::SetUnrecoverableErrorState() { auto& state = stack_[0]; state.eat_next_bytes = UINT32_MAX; state.in_bytes_limit = UINT32_MAX; - state.passthrough_eaten_bytes = false; + state.action = StackState::kDrop; out_ = out_buf_.get(); // Reset the write pointer. } diff --git a/src/protozero/filtering/message_filter.h b/src/protozero/filtering/message_filter.h index c752b2a78..e67467b6b 100644 --- a/src/protozero/filtering/message_filter.h +++ b/src/protozero/filtering/message_filter.h @@ -181,11 +181,20 @@ class MessageFilter { uint8_t* size_field = nullptr; uint32_t size_field_len = 0; - // When true the next |eat_next_bytes| are copied as-is in output. - // It seems that keeping this field at the end rather than next to - // |eat_next_bytes| makes the filter a little (but measurably) faster. - // (likely something related with struct layout vs cache sizes). - bool passthrough_eaten_bytes = false; + // The pointer to the start of the string to update the string if it is + // filtered. + uint8_t* filter_string_ptr = nullptr; + + // How |eat_next_bytes| should be handled. It seems that keeping this field + // at the end rather than next to |eat_next_bytes| makes the filter a little + // (but measurably) faster. (likely something related with struct layout vs + // cache sizes). + enum FilterAction { + kDrop, + kPassthrough, + kFilterString, + }; + FilterAction action = FilterAction::kDrop; }; uint32_t out_written() { return static_cast<uint32_t>(out_ - &out_buf_[0]); } |