diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:55:47 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-07-07 04:55:47 +0000 |
commit | eaef328854585f125d2a2b600146e870e23c19a7 (patch) | |
tree | 2b233959d3883a8dc027030b9cbcfb61ecc43301 | |
parent | 9287bf18a96259c11e11e9b4c911959584d30936 (diff) | |
parent | c86350b6cc347006c9b1bad6bfbd9056f8bc8aa8 (diff) | |
download | pest_generator-aml_med_341619000.tar.gz |
Snap for 10453563 from c86350b6cc347006c9b1bad6bfbd9056f8bc8aa8 to mainline-media-releaseaml_med_341711000aml_med_341619000aml_med_341513600aml_med_341312300aml_med_341312020aml_med_341111000aml_med_341011000aml_med_340922010android14-mainline-media-release
Change-Id: Ic678281ab1deadaf772da0d18318e549ddb8ea32
-rw-r--r-- | Android.bp | 10 | ||||
-rw-r--r-- | Cargo.toml | 36 | ||||
-rw-r--r-- | Cargo.toml.orig | 19 | ||||
-rw-r--r-- | METADATA | 15 | ||||
-rw-r--r-- | _README.md | 66 | ||||
-rw-r--r-- | src/docs.rs | 122 | ||||
-rw-r--r-- | src/generator.rs | 186 | ||||
-rw-r--r-- | src/lib.rs | 209 | ||||
-rw-r--r-- | src/macros.rs | 14 | ||||
-rw-r--r-- | tests/test.pest | 23 |
10 files changed, 523 insertions, 177 deletions
@@ -1,8 +1,6 @@ // This file is generated by cargo2android.py --run --host-first-multilib. // Do not modify this file as changes will be overridden on upgrade. - - package { default_applicable_licenses: [ "external_rust_crates_pest_generator_license", @@ -45,9 +43,13 @@ rust_library_host { name: "libpest_generator", crate_name: "pest_generator", cargo_env_compat: true, - cargo_pkg_version: "2.1.3", + cargo_pkg_version: "2.5.5", srcs: ["src/lib.rs"], - edition: "2015", + edition: "2021", + features: [ + "default", + "std", + ], rustlibs: [ "libpest", "libpest_meta", @@ -3,30 +3,36 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] +edition = "2021" +rust-version = "1.56" name = "pest_generator" -version = "2.1.3" +version = "2.5.5" authors = ["DragoČ™ Tiselice <dragostiselice@gmail.com>"] description = "pest code generator" -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" documentation = "https://docs.rs/pest" readme = "_README.md" -keywords = ["pest", "generator"] +keywords = [ + "pest", + "generator", +] categories = ["parsing"] license = "MIT/Apache-2.0" repository = "https://github.com/pest-parser/pest" + [dependencies.pest] -version = "2.1.0" +version = "2.5.5" +default-features = false [dependencies.pest_meta] -version = "2.1.0" +version = "2.5.5" [dependencies.proc-macro2] version = "1.0" @@ -36,11 +42,7 @@ version = "1.0" [dependencies.syn] version = "1.0" -[badges.codecov] -repository = "pest-parser/pest" - -[badges.maintenance] -status = "actively-developed" -[badges.travis-ci] -repository = "pest-parser/pest" +[features] +default = ["std"] +std = ["pest/std"] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index a70cb5f..ab98cb2 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,24 +1,25 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.1.3" +version = "2.5.5" +edition = "2021" authors = ["DragoČ™ Tiselice <dragostiselice@gmail.com>"] -homepage = "https://pest-parser.github.io/" +homepage = "https://pest.rs/" repository = "https://github.com/pest-parser/pest" documentation = "https://docs.rs/pest" keywords = ["pest", "generator"] categories = ["parsing"] license = "MIT/Apache-2.0" readme = "_README.md" +rust-version = "1.56" + +[features] +default = ["std"] +std = ["pest/std"] [dependencies] -pest = { path = "../pest", version = "2.1.0" } -pest_meta = { path = "../meta", version = "2.1.0" } +pest = { path = "../pest", version = "2.5.5", default-features = false } +pest_meta = { path = "../meta", version = "2.5.5" } proc-macro2 = "1.0" quote = "1.0" syn = "1.0" - -[badges] -codecov = { repository = "pest-parser/pest" } -maintenance = { status = "actively-developed" } -travis-ci = { repository = "pest-parser/pest" } @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/pest_generator +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "pest_generator" description: "pest code generator" third_party { @@ -7,14 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/pest_generator/pest_generator-2.1.3.crate" + value: "https://static.crates.io/crates/pest_generator/pest_generator-2.5.5.crate" } - version: "2.1.3" - # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. + version: "2.5.5" license_type: NOTICE last_upgrade_date { - year: 2022 - month: 1 - day: 27 + year: 2023 + month: 2 + day: 16 } } @@ -1,16 +1,18 @@ + <p align="center"> <img src="https://raw.github.com/pest-parser/pest/master/pest-logo.svg?sanitize=true" width="80%"/> </p> # pest. The Elegant Parser -[![Join the chat at https://gitter.im/dragostis/pest](https://badges.gitter.im/dragostis/pest.svg)](https://gitter.im/dragostis/pest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![Book](https://img.shields.io/badge/book-WIP-4d76ae.svg)](https://pest-parser.github.io/book) +[![Join the chat at https://gitter.im/pest-parser/pest](https://badges.gitter.im/dragostis/pest.svg)](https://gitter.im/pest-parser/pest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Book](https://img.shields.io/badge/book-WIP-4d76ae.svg)](https://pest.rs/book) [![Docs](https://docs.rs/pest/badge.svg)](https://docs.rs/pest) -[![Build Status](https://travis-ci.org/pest-parser/pest.svg?branch=master)](https://travis-ci.org/pest-parser/pest) +[![pest Continuous Integration](https://github.com/pest-parser/pest/actions/workflows/ci.yml/badge.svg)](https://github.com/pest-parser/pest/actions/workflows/ci.yml) [![codecov](https://codecov.io/gh/pest-parser/pest/branch/master/graph/badge.svg)](https://codecov.io/gh/pest-parser/pest) -[![Fuzzit Status](https://app.fuzzit.dev/badge?org_id=pest-parser)](https://app.fuzzit.dev/orgs/pest-parser/dashboard) +<a href="https://blog.rust-lang.org/2021/11/01/Rust-1.56.1.html"><img alt="Rustc Version 1.56.1+" src="https://img.shields.io/badge/rustc-1.56.1%2B-lightgrey.svg"/></a> + [![Crates.io](https://img.shields.io/crates/d/pest.svg)](https://crates.io/crates/pest) [![Crates.io](https://img.shields.io/crates/v/pest.svg)](https://crates.io/crates/pest) @@ -29,25 +31,28 @@ Other helpful resources: * API reference on [docs.rs] * play with grammars and share them on our [fiddle] -* leave feedback, ask questions, or greet us on [Gitter] +* find previous common questions answered or ask questions on [GitHub Discussions] +* leave feedback, ask questions, or greet us on [Gitter] or [Discord] -[book]: https://pest-parser.github.io/book +[book]: https://pest.rs/book [docs.rs]: https://docs.rs/pest -[fiddle]: https://pest-parser.github.io/#editor -[Gitter]: https://gitter.im/dragostis/pest +[fiddle]: https://pest.rs/#editor +[Gitter]: https://gitter.im/pest-parser/pest +[Discord]: https://discord.gg/XEGACtWpT2 +[GitHub Discussions]: https://github.com/pest-parser/pest/discussions ## Example -The following is an example of a grammar for a list of alpha-numeric identifiers -where the first identifier does not start with a digit: +The following is an example of a grammar for a list of alphanumeric identifiers +where all identifiers don't start with a digit: ```rust alpha = { 'a'..'z' | 'A'..'Z' } digit = { '0'..'9' } -ident = { (alpha | digit)+ } +ident = { !digit ~ (alpha | digit)+ } -ident_list = _{ !digit ~ ident ~ (" " ~ ident)+ } +ident_list = _{ ident ~ (" " ~ ident)* } // ^ // ident_list rule is silent which means it produces no tokens ``` @@ -79,6 +84,9 @@ thread 'main' panicked at ' --> 1:1 = expected ident', src/main.rs:12 ``` +These error messages can be obtained from their default `Display` implementation, +e.g. `panic!("{}", parser_result.unwrap_err())` or `println!("{}", e)`. + ## Pairs API The grammar can be used to derive a `Parser` implementation automatically. @@ -131,6 +139,25 @@ Letter: b Digit: 2 ``` +### Defining multiple parsers in a single file +The current automatic `Parser` derivation will produce the `Rule` enum +which would have name conflicts if one tried to define multiple such structs +that automatically derive `Parser`. One possible way around it is to put each +parser struct in a separate namespace: + +```rust +mod a { + #[derive(Parser)] + #[grammar = "a.pest"] + pub struct ParserA; +} +mod b { + #[derive(Parser)] + #[grammar = "b.pest"] + pub struct ParserB; +} +``` + ## Other features * Precedence climbing @@ -143,16 +170,18 @@ Digit: 2 * [pest_meta](https://github.com/pest-parser/pest/blob/master/meta/src/grammar.pest) (bootstrapped) * [AshPaper](https://github.com/shnewto/ashpaper) * [brain](https://github.com/brain-lang/brain) -* [Chelone](https://github.com/Aaronepower/chelone) +* [cicada](https://github.com/mitnk/cicada) * [comrak](https://github.com/kivikakk/comrak) * [elastic-rs](https://github.com/cch123/elastic-rs) * [graphql-parser](https://github.com/Keats/graphql-parser) * [handlebars-rust](https://github.com/sunng87/handlebars-rust) * [hexdino](https://github.com/Luz/hexdino) * [Huia](https://gitlab.com/jimsy/huia/) +* [insta](https://github.com/mitsuhiko/insta) * [jql](https://github.com/yamafaktory/jql) * [json5-rs](https://github.com/callum-oakley/json5-rs) * [mt940](https://github.com/svenstaro/mt940-rs) +* [Myoxine](https://github.com/d3bate/myoxine) * [py_literal](https://github.com/jturner314/py_literal) * [rouler](https://github.com/jarcane/rouler) * [RuSh](https://github.com/lwandrebeck/RuSh) @@ -162,6 +191,17 @@ Digit: 2 * [ui_gen](https://github.com/emoon/ui_gen) * [ukhasnet-parser](https://github.com/adamgreig/ukhasnet-parser) * [ZoKrates](https://github.com/ZoKrates/ZoKrates) +* [Vector](https://github.com/timberio/vector) +* [AutoCorrect](https://github.com/huacnlee/autocorrect) +* [yaml-peg](https://github.com/aofdev/yaml-peg) +* [qubit](https://github.com/abhimanyu003/qubit) +* [caith](https://github.com/Geobert/caith) (a dice roller crate) +* [Melody](https://github.com/yoav-lavi/melody) + +## Minimum Supported Rust Version (MSRV) + +This library should always compile with default features on **Rust 1.56.1** +or **Rust 1.61** with `const_prec_climber`. ## Special thanks diff --git a/src/docs.rs b/src/docs.rs new file mode 100644 index 0000000..f1ce188 --- /dev/null +++ b/src/docs.rs @@ -0,0 +1,122 @@ +use pest::iterators::Pairs; +use pest_meta::parser::Rule; +use std::collections::HashMap; + +#[derive(Debug)] +pub(crate) struct DocComment { + pub grammar_doc: String, + + /// HashMap for store all doc_comments for rules. + /// key is rule name, value is doc_comment. + pub line_docs: HashMap<String, String>, +} + +/// Consume pairs to matches `Rule::grammar_doc`, `Rule::line_doc` into `DocComment` +/// +/// e.g. +/// +/// a pest file: +/// +/// ```ignore +/// //! This is a grammar doc +/// /// line doc 1 +/// /// line doc 2 +/// foo = {} +/// +/// /// line doc 3 +/// bar = {} +/// ``` +/// +/// Then will get: +/// +/// ```ignore +/// grammar_doc = "This is a grammar doc" +/// line_docs = { "foo": "line doc 1\nline doc 2", "bar": "line doc 3" } +/// ``` +pub(crate) fn consume(pairs: Pairs<'_, Rule>) -> DocComment { + let mut grammar_doc = String::new(); + + let mut line_docs: HashMap<String, String> = HashMap::new(); + let mut line_doc = String::new(); + + for pair in pairs { + match pair.as_rule() { + Rule::grammar_doc => { + // grammar_doc > inner_doc + let inner_doc = pair.into_inner().next().unwrap(); + grammar_doc.push_str(inner_doc.as_str()); + grammar_doc.push('\n'); + } + Rule::grammar_rule => { + if let Some(inner) = pair.into_inner().next() { + // grammar_rule > line_doc | identifier + match inner.as_rule() { + Rule::line_doc => { + if let Some(inner_doc) = inner.into_inner().next() { + line_doc.push_str(inner_doc.as_str()); + line_doc.push('\n'); + } + } + Rule::identifier => { + if !line_doc.is_empty() { + let rule_name = inner.as_str().to_owned(); + + // Remove last \n + line_doc.pop(); + line_docs.insert(rule_name, line_doc.clone()); + line_doc.clear(); + } + } + _ => (), + } + } + } + _ => (), + } + } + + if !grammar_doc.is_empty() { + // Remove last \n + grammar_doc.pop(); + } + + DocComment { + grammar_doc, + line_docs, + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use pest_meta::parser; + use pest_meta::parser::Rule; + + #[test] + fn test_doc_comment() { + let pairs = match parser::parse(Rule::grammar_rules, include_str!("../tests/test.pest")) { + Ok(pairs) => pairs, + Err(_) => panic!("error parsing tests/test.pest"), + }; + + let doc_comment = super::consume(pairs); + + let mut expected = HashMap::new(); + expected.insert("foo".to_owned(), "Matches foo str, e.g.: `foo`".to_owned()); + expected.insert( + "bar".to_owned(), + "Matches bar str\n\n Indent 2, e.g: `bar` or `foobar`".to_owned(), + ); + expected.insert( + "dar".to_owned(), + "Matches dar\n\nMatch dar description\n".to_owned(), + ); + assert_eq!(expected, doc_comment.line_docs); + + assert_eq!( + "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space\n", + doc_comment.grammar_doc + ); + } +} diff --git a/src/generator.rs b/src/generator.rs index bed56f3..87d1f00 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -9,21 +9,23 @@ use std::path::PathBuf; -use proc_macro2::{Span, TokenStream}; +use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; use syn::{self, Generics, Ident}; +use pest::unicode::unicode_property_names; use pest_meta::ast::*; use pest_meta::optimizer::*; -use pest_meta::UNICODE_PROPERTY_NAMES; -#[allow(clippy::needless_pass_by_value)] -pub fn generate( +use crate::docs::DocComment; + +pub(crate) fn generate( name: Ident, generics: &Generics, path: Option<PathBuf>, rules: Vec<OptimizedRule>, defaults: Vec<&str>, + doc_comment: &DocComment, include_grammar: bool, ) -> TokenStream { let uses_eoi = defaults.iter().any(|name| *name == "EOI"); @@ -37,7 +39,7 @@ pub fn generate( } else { quote!() }; - let rule_enum = generate_enum(&rules, uses_eoi); + let rule_enum = generate_enum(&rules, doc_comment, uses_eoi); let patterns = generate_patterns(&rules, uses_eoi); let skip = generate_skip(&rules); @@ -52,17 +54,20 @@ pub fn generate( let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + let result = result_type(); + let parser_impl = quote! { #[allow(clippy::all)] impl #impl_generics ::pest::Parser<Rule> for #name #ty_generics #where_clause { fn parse<'i>( rule: Rule, input: &'i str - ) -> ::std::result::Result< + ) -> #result< ::pest::iterators::Pairs<'i, Rule>, ::pest::error::Error<Rule> > { mod rules { + #![allow(clippy::upper_case_acronyms)] pub mod hidden { use super::super::Rule; #skip @@ -98,7 +103,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { let mut builtins = Vec::new(); insert_builtin!(builtins, ANY, state.skip(1)); - insert_public_builtin!( + insert_builtin!( builtins, EOI, state.rule(Rule::EOI, |state| state.end_of_input()) @@ -149,13 +154,15 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { .or_else(|state| state.match_string("\r")) ); - for property in UNICODE_PROPERTY_NAMES { + let box_ty = box_type(); + + for property in unicode_property_names() { let property_ident: Ident = syn::parse_str(property).unwrap(); // insert manually for #property substitution builtins.push((property, quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - fn #property_ident(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + fn #property_ident(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.match_char_by(::pest::unicode::#property_ident) } })); @@ -165,7 +172,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { // Needed because Cargo doesn't watch for changes in grammars. fn generate_include(name: &Ident, path: &str) -> TokenStream { - let const_name = Ident::new(&format!("_PEST_GRAMMAR_{}", name), Span::call_site()); + let const_name = format_ident!("_PEST_GRAMMAR_{}", name); // Need to make this relative to the current directory since the path to the file // is derived from the CARGO_MANIFEST_DIR environment variable let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); @@ -177,13 +184,26 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream { } } -fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { - let rules = rules - .iter() - .map(|rule| Ident::new(rule.name.as_str(), Span::call_site())); +fn generate_enum(rules: &[OptimizedRule], doc_comment: &DocComment, uses_eoi: bool) -> TokenStream { + let rules = rules.iter().map(|rule| { + let rule_name = format_ident!("r#{}", rule.name); + + match doc_comment.line_docs.get(&rule.name) { + Some(doc) => quote! { + #[doc = #doc] + #rule_name + }, + None => quote! { + #rule_name + }, + } + }); + + let grammar_doc = &doc_comment.grammar_doc; if uses_eoi { quote! { - #[allow(dead_code, non_camel_case_types)] + #[doc = #grammar_doc] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { EOI, @@ -192,7 +212,8 @@ fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } } else { quote! { - #[allow(dead_code, non_camel_case_types)] + #[doc = #grammar_doc] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { #( #rules ),* @@ -205,7 +226,8 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { let mut rules: Vec<TokenStream> = rules .iter() .map(|rule| { - let rule = Ident::new(rule.name.as_str(), Span::call_site()); + let rule = format_ident!("r#{}", rule.name); + quote! { Rule::#rule => rules::#rule(state) } @@ -224,10 +246,10 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } fn generate_rule(rule: OptimizedRule) -> TokenStream { - let name = Ident::new(&rule.name, Span::call_site()); + let name = format_ident!("r#{}", rule.name); let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { generate_expr_atomic(rule.expr) - } else if name == "WHITESPACE" || name == "COMMENT" { + } else if rule.name == "WHITESPACE" || rule.name == "COMMENT" { let atomic = generate_expr_atomic(rule.expr); quote! { @@ -239,11 +261,13 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { generate_expr(rule.expr) }; + let box_ty = box_type(); + match rule.ty { RuleType::Normal => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.rule(Rule::#name, |state| { #expr }) @@ -252,14 +276,14 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::Silent => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { #expr } }, RuleType::Atomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.rule(Rule::#name, |state| { state.atomic(::pest::Atomicity::Atomic, |state| { #expr @@ -270,7 +294,7 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::CompoundAtomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.atomic(::pest::Atomicity::CompoundAtomic, |state| { state.rule(Rule::#name, |state| { #expr @@ -281,7 +305,7 @@ fn generate_rule(rule: OptimizedRule) -> TokenStream { RuleType::NonAtomic => quote! { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn #name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn #name(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.atomic(::pest::Atomicity::NonAtomic, |state| { state.rule(Rule::#name, |state| { #expr @@ -358,7 +382,7 @@ fn generate_expr(expr: OptimizedExpr) -> TokenStream { } } OptimizedExpr::Ident(ident) => { - let ident = Ident::new(&ident, Span::call_site()); + let ident = format_ident!("r#{}", ident); quote! { self::#ident(state) } } OptimizedExpr::PeekSlice(start, end_) => { @@ -504,7 +528,7 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream { } } OptimizedExpr::Ident(ident) => { - let ident = Ident::new(&ident, Span::call_site()); + let ident = format_ident!("r#{}", ident); quote! { self::#ident(state) } } OptimizedExpr::PeekSlice(start, end_) => { @@ -619,17 +643,45 @@ struct QuoteOption<T>(Option<T>); impl<T: ToTokens> ToTokens for QuoteOption<T> { fn to_tokens(&self, tokens: &mut TokenStream) { + let option = option_type(); tokens.append_all(match self.0 { - Some(ref t) => quote! { ::std::option::Option::Some(#t) }, - None => quote! { ::std::option::Option::None }, + Some(ref t) => quote! { #option::Some(#t) }, + None => quote! { #option::None }, }); } } +fn box_type() -> TokenStream { + #[cfg(feature = "std")] + quote! { ::std::boxed::Box } + + #[cfg(not(feature = "std"))] + quote! { ::alloc::boxed::Box } +} + +fn result_type() -> TokenStream { + #[cfg(feature = "std")] + quote! { ::std::result::Result } + + #[cfg(not(feature = "std"))] + quote! { ::core::result::Result } +} + +fn option_type() -> TokenStream { + #[cfg(feature = "std")] + quote! { ::std::option::Option } + + #[cfg(not(feature = "std"))] + quote! { ::core::option::Option } +} + #[cfg(test)] mod tests { use super::*; + use proc_macro2::Span; + use std::collections::HashMap; + #[test] fn rule_enum_simple() { let rules = vec![OptimizedRule { @@ -638,13 +690,23 @@ mod tests { expr: OptimizedExpr::Ident("g".to_owned()), }]; + let mut line_docs = HashMap::new(); + line_docs.insert("f".to_owned(), "This is rule comment".to_owned()); + + let doc_comment = &DocComment { + grammar_doc: "Rule doc\nhello".to_owned(), + line_docs, + }; + assert_eq!( - generate_enum(&rules, false).to_string(), + generate_enum(&rules, doc_comment, false).to_string(), quote! { - #[allow(dead_code, non_camel_case_types)] + #[doc = "Rule doc\nhello"] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { - f + #[doc = "This is rule comment"] + r#f } } .to_string() @@ -832,7 +894,7 @@ mod tests { assert_eq!( generate_expr(expr).to_string(), quote! { - self::a(state).or_else(|state| { + self::r#a(state).or_else(|state| { state.sequence(|state| { state.match_range('a'..'b').and_then(|state| { super::hidden::skip(state) @@ -898,7 +960,7 @@ mod tests { assert_eq!( generate_expr_atomic(expr).to_string(), quote! { - self::a(state).or_else(|state| { + self::r#a(state).or_else(|state| { state.sequence(|state| { state.match_range('a'..'b').and_then(|state| { state.lookahead(false, |state| { @@ -926,28 +988,50 @@ mod tests { } #[test] - fn generate_complete() { + fn test_generate_complete() { let name = Ident::new("MyParser", Span::call_site()); let generics = Generics::default(); - let rules = vec![OptimizedRule { - name: "a".to_owned(), - ty: RuleType::Silent, - expr: OptimizedExpr::Str("b".to_owned()), - }]; + + let rules = vec![ + OptimizedRule { + name: "a".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Str("b".to_owned()), + }, + OptimizedRule { + name: "if".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Ident("a".to_owned()), + }, + ]; + + let mut line_docs = HashMap::new(); + line_docs.insert("if".to_owned(), "If statement".to_owned()); + + let doc_comment = &DocComment { + line_docs, + grammar_doc: "This is Rule doc\nThis is second line".to_owned(), + }; + let defaults = vec!["ANY"]; + let result = result_type(); + let box_ty = box_type(); let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); current_dir.push("test.pest"); let test_path = current_dir.to_str().expect("path contains invalid unicode"); assert_eq!( - generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, true).to_string(), + generate(name, &generics, Some(PathBuf::from("test.pest")), rules, defaults, doc_comment, true).to_string(), quote! { #[allow(non_upper_case_globals)] const _PEST_GRAMMAR_MyParser: &'static str = include_str!(#test_path); - #[allow(dead_code, non_camel_case_types)] + #[doc = "This is Rule doc\nThis is second line"] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { - a + r#a, + #[doc = "If statement"] + r#if } #[allow(clippy::all)] @@ -955,17 +1039,18 @@ mod tests { fn parse<'i>( rule: Rule, input: &'i str - ) -> ::std::result::Result< + ) -> #result< ::pest::iterators::Pairs<'i, Rule>, ::pest::error::Error<Rule> > { mod rules { + #![allow(clippy::upper_case_acronyms)] pub mod hidden { use super::super::Rule; #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn skip(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn skip(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { Ok(state) } } @@ -975,13 +1060,19 @@ mod tests { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn a(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.match_string("b") } #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { + self::r#a(state) + } + + #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn ANY(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.skip(1) } } @@ -991,7 +1082,8 @@ mod tests { ::pest::state(input, |state| { match rule { - Rule::a => rules::a(state) + Rule::r#a => rules::r#a(state), + Rule::r#if => rules::r#if(state) } }) } @@ -7,17 +7,19 @@ // option. All files in the project carrying such notice may not be copied, // modified, or distributed except according to those terms. -#![doc(html_root_url = "https://docs.rs/pest_derive")] +#![doc( + html_root_url = "https://docs.rs/pest_derive", + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] #![recursion_limit = "256"] +//! # pest generator +//! +//! This crate generates code from ASTs (which is used in the `pest_derive` crate). -extern crate pest; -extern crate pest_meta; - -extern crate proc_macro; -extern crate proc_macro2; #[macro_use] extern crate quote; -extern crate syn; use std::env; use std::fs::File; @@ -29,73 +31,80 @@ use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; #[macro_use] mod macros; +mod docs; mod generator; -use pest_meta::parser::{self, Rule}; +use pest_meta::parser::{self, rename_meta_rule, Rule}; use pest_meta::{optimizer, unwrap_or_report, validator}; +/// Processes the derive/proc macro input and generates the corresponding parser based +/// on the parsed grammar. If `include_grammar` is set to true, it'll generate an explicit +/// "include_str" statement (done in pest_derive, but turned off in the local bootstrap). pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let ast: DeriveInput = syn::parse2(input).unwrap(); - let (name, generics, content) = parse_derive(ast); - - let (data, path) = match content { - GrammarSource::File(ref path) => { - let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); - let path = Path::new(&root).join("src/").join(&path); - let file_name = match path.file_name() { - Some(file_name) => file_name, - None => panic!("grammar attribute should point to a file"), - }; - - let data = match read_file(&path) { - Ok(data) => data, - Err(error) => panic!("error opening {:?}: {}", file_name, error), - }; - (data, Some(path.clone())) + let (name, generics, contents) = parse_derive(ast); + + let mut data = String::new(); + let mut path = None; + + for content in contents { + let (_data, _path) = match content { + GrammarSource::File(ref path) => { + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + + // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR + // first. + // + // If we cannot find the expected file over there, fallback to the + // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience + // reasons. + // TODO: This could be refactored once `std::path::absolute()` get's stabilized. + // https://doc.rust-lang.org/std/path/fn.absolute.html + let path = if Path::new(&root).join(path).exists() { + Path::new(&root).join(path) + } else { + Path::new(&root).join("src/").join(path) + }; + + let file_name = match path.file_name() { + Some(file_name) => file_name, + None => panic!("grammar attribute should point to a file"), + }; + + let data = match read_file(&path) { + Ok(data) => data, + Err(error) => panic!("error opening {:?}: {}", file_name, error), + }; + (data, Some(path.clone())) + } + GrammarSource::Inline(content) => (content, None), + }; + + data.push_str(&_data); + if _path.is_some() { + path = _path; } - GrammarSource::Inline(content) => (content, None), - }; + } let pairs = match parser::parse(Rule::grammar_rules, &data) { Ok(pairs) => pairs, - Err(error) => panic!( - "error parsing \n{}", - error.renamed_rules(|rule| match *rule { - Rule::grammar_rule => "rule".to_owned(), - Rule::_push => "PUSH".to_owned(), - Rule::assignment_operator => "`=`".to_owned(), - Rule::silent_modifier => "`_`".to_owned(), - Rule::atomic_modifier => "`@`".to_owned(), - Rule::compound_atomic_modifier => "`$`".to_owned(), - Rule::non_atomic_modifier => "`!`".to_owned(), - Rule::opening_brace => "`{`".to_owned(), - Rule::closing_brace => "`}`".to_owned(), - Rule::opening_brack => "`[`".to_owned(), - Rule::closing_brack => "`]`".to_owned(), - Rule::opening_paren => "`(`".to_owned(), - Rule::positive_predicate_operator => "`&`".to_owned(), - Rule::negative_predicate_operator => "`!`".to_owned(), - Rule::sequence_operator => "`&`".to_owned(), - Rule::choice_operator => "`|`".to_owned(), - Rule::optional_operator => "`?`".to_owned(), - Rule::repeat_operator => "`*`".to_owned(), - Rule::repeat_once_operator => "`+`".to_owned(), - Rule::comma => "`,`".to_owned(), - Rule::closing_paren => "`)`".to_owned(), - Rule::quote => "`\"`".to_owned(), - Rule::insensitive_string => "`^`".to_owned(), - Rule::range_operator => "`..`".to_owned(), - Rule::single_quote => "`'`".to_owned(), - other_rule => format!("{:?}", other_rule), - }) - ), + Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)), }; let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); + let doc_comment = docs::consume(pairs.clone()); let ast = unwrap_or_report(parser::consume_rules(pairs)); let optimized = optimizer::optimize(ast); - generator::generate(name, &generics, path, optimized, defaults, include_grammar) + generator::generate( + name, + &generics, + path, + optimized, + defaults, + &doc_comment, + include_grammar, + ) } fn read_file<P: AsRef<Path>>(path: P) -> io::Result<String> { @@ -111,7 +120,7 @@ enum GrammarSource { Inline(String), } -fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { +fn parse_derive(ast: DeriveInput) -> (Ident, Generics, Vec<GrammarSource>) { let name = ast.ident; let generics = ast.generics; @@ -126,13 +135,16 @@ fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { }) .collect(); - let argument = match grammar.len() { - 0 => panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"), - 1 => get_attribute(grammar[0]), - _ => panic!("only 1 grammar file can be provided"), - }; + if grammar.is_empty() { + panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"); + } - (name, generics, argument) + let mut grammar_sources = Vec::with_capacity(grammar.len()); + for attr in grammar { + grammar_sources.push(get_attribute(attr)) + } + + (name, generics, grammar_sources) } fn get_attribute(attr: &Attribute) -> GrammarSource { @@ -155,7 +167,6 @@ fn get_attribute(attr: &Attribute) -> GrammarSource { mod tests { use super::parse_derive; use super::GrammarSource; - use syn; #[test] fn derive_inline_file() { @@ -165,8 +176,8 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::Inline("GRAMMAR".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::Inline("GRAMMAR".to_string())]); } #[test] @@ -177,12 +188,11 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::File("myfile.pest".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::File("myfile.pest".to_string())]); } #[test] - #[should_panic(expected = "only 1 grammar file can be provided")] fn derive_multiple_grammars() { let definition = " #[other_attr] @@ -191,7 +201,14 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - parse_derive(ast); + let (_, _, filenames) = parse_derive(ast); + assert_eq!( + filenames, + [ + GrammarSource::File("myfile1.pest".to_string()), + GrammarSource::File("myfile2.pest".to_string()) + ] + ); } #[test] @@ -205,4 +222,52 @@ mod tests { let ast = syn::parse_str(definition).unwrap(); parse_derive(ast); } + + #[test] + #[should_panic( + expected = "a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute" + )] + fn derive_no_grammar() { + let definition = " + #[other_attr] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + parse_derive(ast); + } + + #[doc = "Matches dar\n\nMatch dar description\n"] + #[test] + fn test_generate_doc() { + let input = quote! { + #[derive(Parser)] + #[grammar = "../tests/test.pest"] + pub struct TestParser; + }; + + let token = super::derive_parser(input, true); + + let expected = quote! { + #[doc = "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n indent-4-space\n"] + #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + + pub enum Rule { + #[doc = "Matches foo str, e.g.: `foo`"] + r#foo, + #[doc = "Matches bar str\n\n Indent 2, e.g: `bar` or `foobar`"] + r#bar, + r#bar1, + #[doc = "Matches dar\n\nMatch dar description\n"] + r#dar + } + }; + + assert!( + token.to_string().contains(expected.to_string().as_str()), + "{}\n\nExpected to contains:\n{}", + token, + expected + ); + } } diff --git a/src/macros.rs b/src/macros.rs index 9d02725..377f66e 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -13,30 +13,26 @@ macro_rules! insert_builtin { }; } -macro_rules! insert_public_builtin { - ($builtin: expr, $name: ident, $pattern: expr) => { - $builtin.push((stringify!($name), generate_public_rule!($name, $pattern))); - }; -} - +#[cfg(feature = "std")] macro_rules! generate_rule { ($name: ident, $pattern: expr) => { quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn $name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn $name(state: ::std::boxed::Box<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<::std::boxed::Box<::pest::ParserState<'_, Rule>>> { $pattern } } } } -macro_rules! generate_public_rule { +#[cfg(not(feature = "std"))] +macro_rules! generate_rule { ($name: ident, $pattern: expr) => { quote! { #[inline] #[allow(dead_code, non_snake_case, unused_variables)] - pub fn $name(state: Box<::pest::ParserState<Rule>>) -> ::pest::ParseResult<Box<::pest::ParserState<Rule>>> { + pub fn $name(state: ::alloc::boxed::Box<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<::alloc::boxed::Box<::pest::ParserState<'_, Rule>>> { $pattern } } diff --git a/tests/test.pest b/tests/test.pest new file mode 100644 index 0000000..95d484e --- /dev/null +++ b/tests/test.pest @@ -0,0 +1,23 @@ +//! A parser for JSON file. +//! And this is a example for JSON parser. +//! +//! indent-4-space +//! + +/// Matches foo str, e.g.: `foo` +foo = { "foo" } + +/// Matches bar str +/// +/// Indent 2, e.g: `bar` or `foobar` + +bar = { "bar" | "foobar" } + +bar1 = { "bar1" } + +/// Matches dar +/// +/// Match dar description +/// + +dar = { "da" }
\ No newline at end of file |