summaryrefslogtreecommitdiff
path: root/src/grammar.pest
diff options
context:
space:
mode:
Diffstat (limited to 'src/grammar.pest')
-rw-r--r--src/grammar.pest165
1 files changed, 107 insertions, 58 deletions
diff --git a/src/grammar.pest b/src/grammar.pest
index 405ab39..d97caba 100644
--- a/src/grammar.pest
+++ b/src/grammar.pest
@@ -1,6 +1,6 @@
// pest. The Elegant Parser
// Copyright (c) 2018 DragoČ™ Tiselice
-//
+//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
@@ -16,97 +16,126 @@
//! future (e.g. by increasing MSRV and non_exhaustive annotations).
/// The top-level rule of a grammar.
-grammar_rules = _{ SOI ~ grammar_doc* ~ (grammar_rule)+ ~ EOI }
+grammar_rules = _{ SOI ~ grammar_doc* ~ grammar_rule* ~ EOI }
/// A rule of a grammar.
grammar_rule = {
- identifier ~ assignment_operator ~ modifier? ~
- opening_brace ~ expression ~ closing_brace |
- line_doc
+ identifier ~ assignment_operator ~ modifier? ~ opening_brace ~ expression ~ closing_brace
+ | line_doc
}
/// Assignment operator.
assignment_operator = { "=" }
+
/// Opening brace for a rule.
-opening_brace = { "{" }
+opening_brace = { "{" }
+
/// Closing brace for a rule.
-closing_brace = { "}" }
+closing_brace = { "}" }
+
/// Opening parenthesis for a branch, PUSH, etc.
-opening_paren = { "(" }
+opening_paren = { "(" }
+
/// Closing parenthesis for a branch, PUSH, etc.
-closing_paren = { ")" }
+closing_paren = { ")" }
+
/// Opening bracket for PEEK (slice inside).
-opening_brack = { "[" }
+opening_brack = { "[" }
+
/// Closing bracket for PEEK (slice inside).
-closing_brack = { "]" }
+closing_brack = { "]" }
/// A rule modifier.
modifier = _{
- silent_modifier |
- atomic_modifier |
- compound_atomic_modifier |
- non_atomic_modifier
+ silent_modifier
+ | atomic_modifier
+ | compound_atomic_modifier
+ | non_atomic_modifier
}
/// Silent rule prefix.
-silent_modifier = { "_" }
+silent_modifier = { "_" }
+
/// Atomic rule prefix.
-atomic_modifier = { "@" }
+atomic_modifier = { "@" }
+
/// Compound atomic rule prefix.
compound_atomic_modifier = { "$" }
+
/// Non-atomic rule prefix.
-non_atomic_modifier = { "!" }
+non_atomic_modifier = { "!" }
+
+/// A tag label.
+tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
+
+/// For assigning labels to nodes.
+node_tag = _{ tag_id ~ assignment_operator }
/// A rule expression.
-expression = { choice_operator? ~ term ~ (infix_operator ~ term)* }
+expression = { choice_operator? ~ term ~ (infix_operator ~ term)* }
+
/// A rule term.
-term = { prefix_operator* ~ node ~ postfix_operator* }
+term = { node_tag? ~ prefix_operator* ~ node ~ postfix_operator* }
+
/// A rule node (inside terms).
-node = _{ opening_paren ~ expression ~ closing_paren | terminal }
+node = _{ opening_paren ~ expression ~ closing_paren | terminal }
+
/// A terminal expression.
-terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range }
+terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range }
/// Possible predicates for a rule.
-prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
+prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
+
/// Branches or sequences.
-infix_operator = _{ sequence_operator | choice_operator }
+infix_operator = _{ sequence_operator | choice_operator }
+
/// Possible modifiers for a rule.
postfix_operator = _{
- optional_operator |
- repeat_operator |
- repeat_once_operator |
- repeat_exact |
- repeat_min |
- repeat_max |
- repeat_min_max
+ optional_operator
+ | repeat_operator
+ | repeat_once_operator
+ | repeat_exact
+ | repeat_min
+ | repeat_max
+ | repeat_min_max
}
/// A positive predicate.
positive_predicate_operator = { "&" }
+
/// A negative predicate.
negative_predicate_operator = { "!" }
+
/// A sequence operator.
-sequence_operator = { "~" }
+sequence_operator = { "~" }
+
/// A choice operator.
-choice_operator = { "|" }
+choice_operator = { "|" }
+
/// An optional operator.
-optional_operator = { "?" }
+optional_operator = { "?" }
+
/// A repeat operator.
-repeat_operator = { "*" }
+repeat_operator = { "*" }
+
/// A repeat at least once operator.
-repeat_once_operator = { "+" }
+repeat_once_operator = { "+" }
/// A repeat exact times.
-repeat_exact = { opening_brace ~ number ~ closing_brace }
+repeat_exact = { opening_brace ~ number ~ closing_brace }
+
/// A repeat at least times.
-repeat_min = { opening_brace ~ number ~ comma ~ closing_brace }
+repeat_min = { opening_brace ~ number ~ comma ~ closing_brace }
+
/// A repeat at most times.
-repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
+repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
+
/// A repeat in a range.
repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }
/// A number.
number = @{ '0'..'9'+ }
+
/// An integer number (positive or negative).
integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }
@@ -115,62 +144,82 @@ comma = { "," }
/// A PUSH expression.
_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
+
/// A PEEK expression.
peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }
/// An identifier.
identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
+
/// An alpha character.
-alpha = _{ 'a'..'z' | 'A'..'Z' }
+alpha = _{ 'a'..'z' | 'A'..'Z' }
+
/// An alphanumeric character.
-alpha_num = _{ alpha | '0'..'9' }
+alpha_num = _{ alpha | '0'..'9' }
/// A string.
-string = ${ quote ~ inner_str ~ quote }
+string = ${ quote ~ inner_str ~ quote }
+
/// An insensitive string.
-insensitive_string = { "^" ~ string }
+insensitive_string = { "^" ~ string }
+
/// A character range.
-range = { character ~ range_operator ~ character }
+range = { character ~ range_operator ~ character }
+
/// A single quoted character
-character = ${ single_quote ~ inner_chr ~ single_quote }
+character = ${ single_quote ~ inner_chr ~ single_quote }
/// A quoted string.
inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? }
+
/// An escaped or any character.
inner_chr = @{ escape | ANY }
+
/// An escape sequence.
-escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
+escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
+
/// A hexadecimal code.
-code = @{ "x" ~ hex_digit{2} }
+code = @{ "x" ~ hex_digit{2} }
+
/// A unicode code.
-unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
+unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
+
/// A hexadecimal digit.
hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' }
/// A double quote.
-quote = { "\"" }
+quote = { "\"" }
+
/// A single quote.
-single_quote = { "'" }
+single_quote = { "'" }
+
/// A range operator.
range_operator = { ".." }
/// A newline character.
-newline = _{ "\n" | "\r\n" }
+newline = _{ "\n" | "\r\n" }
+
/// A whitespace character.
-WHITESPACE = _{ " " | "\t" | newline }
+WHITESPACE = _{ " " | "\t" | newline }
+
/// A single line comment.
-line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) }
+line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) }
+
/// A multi-line comment.
-block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
+block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
+
/// A grammar comment.
-COMMENT = _{ block_comment | line_comment }
+COMMENT = _{ block_comment | line_comment }
// ref: https://doc.rust-lang.org/reference/comments.html
/// A space character.
-space = _{ " " | "\t" }
+space = _{ " " | "\t" }
+
/// A top-level comment.
grammar_doc = ${ "//!" ~ space? ~ inner_doc }
+
/// A rule comment.
-line_doc = ${ "///" ~ space? ~ !"/" ~ inner_doc }
+line_doc = ${ "///" ~ space? ~ inner_doc }
+
/// A comment content.
-inner_doc = @{ (!newline ~ ANY)* }
+inner_doc = @{ (!newline ~ ANY)* }