aboutsummaryrefslogtreecommitdiff
path: root/pw_tokenizer/rust/pw_tokenizer_macro.rs
diff options
context:
space:
mode:
Diffstat (limited to 'pw_tokenizer/rust/pw_tokenizer_macro.rs')
-rw-r--r--pw_tokenizer/rust/pw_tokenizer_macro.rs214
1 files changed, 214 insertions, 0 deletions
diff --git a/pw_tokenizer/rust/pw_tokenizer_macro.rs b/pw_tokenizer/rust/pw_tokenizer_macro.rs
new file mode 100644
index 000000000..b271ec433
--- /dev/null
+++ b/pw_tokenizer/rust/pw_tokenizer_macro.rs
@@ -0,0 +1,214 @@
+// Copyright 2023 The Pigweed Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+// This proc macro crate is a private API for the `pw_tokenizer` crate.
+#![doc(hidden)]
+
+use std::ffi::CString;
+
+use proc_macro::TokenStream;
+use proc_macro2::Ident;
+use quote::{format_ident, quote};
+use syn::{
+ parse::{Parse, ParseStream},
+ parse_macro_input, Expr, LitStr, Token,
+};
+
+use pw_format::macros::{generate_printf, FormatAndArgs, PrintfFormatMacroGenerator, Result};
+use pw_tokenizer_core::{hash_string, TOKENIZER_ENTRY_MAGIC};
+
+type TokenStream2 = proc_macro2::TokenStream;
+
+// Handles tokenizing (hashing) `string` and adding it to the token database
+// with the specified `domain`. A detailed description of what's happening is
+// found in the docs for [`pw_tokenizer::token`] macro.
+fn token_backend(domain: &str, string: &str) -> TokenStream2 {
+ let hash = hash_string(string);
+
+ // Line number is omitted as getting that info requires an experimental API:
+ // https://doc.rust-lang.org/proc_macro/struct.Span.html#method.start
+ let ident = format_ident!("_PW_TOKENIZER_STRING_ENTRY_{:08X}", hash);
+
+ // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
+ // executables) do not support section names longer than 16 characters, so a
+ // short, unused section name is used on macOS.
+ let section = if cfg!(target_os = "macos") {
+ ",pw,".to_string()
+ } else {
+ format!(".pw_tokenizer.entries.{:08X}", hash)
+ };
+
+ let string = CString::new(string).unwrap();
+ let string_bytes = string.as_bytes_with_nul();
+ let string_bytes_len = string_bytes.len();
+
+ let domain = CString::new(domain).unwrap();
+ let domain_bytes = domain.as_bytes_with_nul();
+ let domain_bytes_len = domain_bytes.len();
+
+ quote! {
+ // Use an inner scope to avoid identifier collision. Name mangling
+ // will disambiguate these in the symbol table.
+ {
+ #[repr(C, packed(1))]
+ struct TokenEntry {
+ magic: u32,
+ token: u32,
+ domain_size: u32,
+ string_length: u32,
+ domain: [u8; #domain_bytes_len],
+ string: [u8; #string_bytes_len],
+ };
+ // This is currently manually verified to be correct.
+ // TODO: b/287132907 - Add integration tests for token database.
+ #[link_section = #section ]
+ static #ident: TokenEntry = TokenEntry {
+ magic: #TOKENIZER_ENTRY_MAGIC,
+ token: #hash,
+ domain_size: #domain_bytes_len as u32,
+ string_length: #string_bytes_len as u32,
+ domain: [ #(#domain_bytes),* ],
+ string: [ #(#string_bytes),* ],
+ };
+
+ #hash
+ }
+ }
+}
+
+// Documented in `pw_tokenizer::token`.
+#[proc_macro]
+pub fn _token(tokens: TokenStream) -> TokenStream {
+ let input = parse_macro_input!(tokens as LitStr);
+ token_backend("", &input.value()).into()
+}
+
+// Args to tokenize to buffer that are parsed according to the pattern:
+// ($buffer:expr, $format_string:literal, $($args:expr),*)
+#[derive(Debug)]
+struct TokenizeToBufferArgs {
+ buffer: Expr,
+ format_and_args: FormatAndArgs,
+}
+
+impl Parse for TokenizeToBufferArgs {
+ fn parse(input: ParseStream) -> syn::parse::Result<Self> {
+ let buffer: Expr = input.parse()?;
+ input.parse::<Token![,]>()?;
+ let format_and_args: FormatAndArgs = input.parse()?;
+
+ Ok(TokenizeToBufferArgs {
+ buffer,
+ format_and_args,
+ })
+ }
+}
+
+struct TokenizeToBufferGenerator<'a> {
+ domain: &'a str,
+ buffer: &'a Expr,
+ encoding_fragments: Vec<TokenStream2>,
+}
+
+impl<'a> TokenizeToBufferGenerator<'a> {
+ fn new(domain: &'a str, buffer: &'a Expr) -> Self {
+ Self {
+ domain,
+ buffer,
+ encoding_fragments: Vec::new(),
+ }
+ }
+}
+
+impl<'a> PrintfFormatMacroGenerator for TokenizeToBufferGenerator<'a> {
+ fn finalize(self, format_string: String) -> Result<TokenStream2> {
+ // Locally scoped aliases so we can refer to them in `quote!()`
+ let buffer = self.buffer;
+ let encoding_fragments = self.encoding_fragments;
+
+ // `token_backend` returns a `TokenStream2` which both inserts the
+ // string into the token database and returns the hash value.
+ let token = token_backend(self.domain, &format_string);
+
+ Ok(quote! {
+ {
+ // Wrapping code in an internal function to allow `?` to work in
+ // functions that don't return Results.
+ fn _pw_tokenizer_internal_encode(
+ buffer: &mut [u8],
+ token: u32
+ ) -> __pw_tokenizer_crate::Result<usize> {
+ // use pw_tokenizer's private re-export of these pw_stream bits to
+ // allow referencing with needing `pw_stream` in scope.
+ use __pw_tokenizer_crate::{Cursor, Seek, WriteInteger, WriteVarint};
+ let mut cursor = Cursor::new(buffer);
+ cursor.write_u32_le(&token)?;
+ #(#encoding_fragments);*;
+ Ok(cursor.stream_position()? as usize)
+ }
+ _pw_tokenizer_internal_encode(#buffer, #token)
+ }
+ })
+ }
+
+ fn string_fragment(&mut self, _string: &str) -> Result<()> {
+ // String fragments are encoded directly into the format string.
+ Ok(())
+ }
+
+ fn integer_conversion(&mut self, ty: Ident, expression: Expr) -> Result<Option<String>> {
+ self.encoding_fragments.push(quote! {
+ // pw_tokenizer always uses signed packing for all integers.
+ cursor.write_signed_varint(#ty::from(#expression) as i64)?;
+ });
+
+ Ok(None)
+ }
+
+ fn string_conversion(&mut self, expression: Expr) -> Result<Option<String>> {
+ self.encoding_fragments.push(quote! {
+ __pw_tokenizer_crate::internal::encode_string(&mut cursor, #expression)?;
+ });
+ Ok(None)
+ }
+
+ fn char_conversion(&mut self, expression: Expr) -> Result<Option<String>> {
+ self.encoding_fragments.push(quote! {
+ cursor.write_u8_le(&u8::from(#expression))?;
+ });
+ Ok(None)
+ }
+}
+
+// Generates code to marshal a tokenized string and arguments into a buffer.
+// See [`pw_tokenizer::tokenize_to_buffer`] for details on behavior.
+//
+// Internally the [`AsMut<u8>`] is wrapped in a [`pw_stream::Cursor`] to
+// fill the buffer incrementally.
+#[proc_macro]
+pub fn _tokenize_to_buffer(tokens: TokenStream) -> TokenStream {
+ let input = parse_macro_input!(tokens as TokenizeToBufferArgs);
+
+ // Hard codes domain to "".
+ let generator = TokenizeToBufferGenerator::new("", &input.buffer);
+
+ match generate_printf(generator, input.format_and_args) {
+ Ok(token_stream) => token_stream.into(),
+ Err(e) => e.to_compile_error().into(),
+ }
+}
+
+// Macros tested in `pw_tokenizer` crate.
+#[cfg(test)]
+mod tests {}