summaryrefslogtreecommitdiff
path: root/src/grammar.pest
blob: 405ab39612151d50de054eac87c642c1ee7fb349 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! Pest meta-grammar
//!
//! # Warning: Semantic Versioning
//! There may be non-breaking changes to the meta-grammar
//! between minor versions. Those non-breaking changes, however,
//! may translate into semver-breaking changes due to the additional variants
//! added to the `Rule` enum. This is a known issue and will be fixed in the
//! future (e.g. by increasing MSRV and non_exhaustive annotations).

/// The top-level rule of a grammar.
grammar_rules = _{ SOI ~ grammar_doc* ~ (grammar_rule)+ ~ EOI }

/// A rule of a grammar.
grammar_rule = {
    identifier ~ assignment_operator ~ modifier? ~
    opening_brace ~ expression ~ closing_brace |
    line_doc
}

/// Assignment operator.
assignment_operator = { "=" }
/// Opening brace for a rule.
opening_brace       = { "{" }
/// Closing brace for a rule.
closing_brace       = { "}" }
/// Opening parenthesis for a branch, PUSH, etc.
opening_paren       = { "(" }
/// Closing parenthesis for a branch, PUSH, etc.
closing_paren       = { ")" }
/// Opening bracket for PEEK (slice inside).
opening_brack       = { "[" }
/// Closing bracket for PEEK (slice inside).
closing_brack       = { "]" }

/// A rule modifier.
modifier = _{
    silent_modifier |
    atomic_modifier |
    compound_atomic_modifier |
    non_atomic_modifier
}

/// Silent rule prefix.
silent_modifier          = { "_" }
/// Atomic rule prefix.
atomic_modifier          = { "@" }
/// Compound atomic rule prefix.
compound_atomic_modifier = { "$" }
/// Non-atomic rule prefix.
non_atomic_modifier      = { "!" }

/// A rule expression.
expression =  { choice_operator? ~ term ~ (infix_operator ~ term)* }
/// A rule term.
term       =  { prefix_operator* ~ node ~ postfix_operator* }
/// A rule node (inside terms).
node       = _{ opening_paren ~ expression ~ closing_paren | terminal }
/// A terminal expression.
terminal   = _{ _push | peek_slice | identifier | string | insensitive_string | range }

/// Possible predicates for a rule.
prefix_operator  = _{ positive_predicate_operator | negative_predicate_operator }
/// Branches or sequences.
infix_operator   = _{ sequence_operator | choice_operator }
/// Possible modifiers for a rule.
postfix_operator = _{
    optional_operator |
    repeat_operator |
    repeat_once_operator |
    repeat_exact |
    repeat_min |
    repeat_max |
    repeat_min_max
}

/// A positive predicate.
positive_predicate_operator = { "&" }
/// A negative predicate.
negative_predicate_operator = { "!" }
/// A sequence operator.
sequence_operator           = { "~" }
/// A choice operator.
choice_operator             = { "|" }
/// An optional operator.
optional_operator           = { "?" }
/// A repeat operator.
repeat_operator             = { "*" }
/// A repeat at least once operator.
repeat_once_operator        = { "+" }

/// A repeat exact times.
repeat_exact   = { opening_brace ~ number ~ closing_brace }
/// A repeat at least times.
repeat_min     = { opening_brace ~ number ~ comma ~ closing_brace }
/// A repeat at most times.
repeat_max     = { opening_brace ~ comma ~ number ~ closing_brace }
/// A repeat in a range.
repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }

/// A number.
number = @{ '0'..'9'+ }
/// An integer number (positive or negative).
integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }

/// A comma terminal.
comma = { "," }

/// A PUSH expression.
_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
/// A PEEK expression.
peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }

/// An identifier.
identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
/// An alpha character.
alpha      = _{ 'a'..'z' | 'A'..'Z' }
/// An alphanumeric character.
alpha_num  = _{ alpha | '0'..'9' }

/// A string.
string             = ${ quote ~ inner_str ~ quote }
/// An insensitive string.
insensitive_string =  { "^" ~ string }
/// A character range.
range              =  { character ~ range_operator ~ character }
/// A single quoted character
character          = ${ single_quote ~ inner_chr ~ single_quote }

/// A quoted string.
inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? }
/// An escaped or any character.
inner_chr = @{ escape | ANY }
/// An escape sequence.
escape    = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
/// A hexadecimal code.
code      = @{ "x" ~ hex_digit{2} }
/// A unicode code.
unicode   = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
/// A hexadecimal digit.
hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' }

/// A double quote.
quote          = { "\"" }
/// A single quote.
single_quote   = { "'" }
/// A range operator.
range_operator = { ".." }

/// A newline character.
newline        = _{ "\n" | "\r\n" }
/// A whitespace character.
WHITESPACE     = _{ " " | "\t" | newline }
/// A single line comment.
line_comment   = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) }
/// A multi-line comment.
block_comment  = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
/// A grammar comment.
COMMENT        = _{ block_comment | line_comment }

// ref: https://doc.rust-lang.org/reference/comments.html
/// A space character.
space       = _{ " " | "\t" }
/// A top-level comment.
grammar_doc = ${ "//!" ~ space? ~ inner_doc }
/// A rule comment.
line_doc    = ${ "///" ~ space? ~ !"/" ~ inner_doc }
/// A comment content.
inner_doc   = @{ (!newline ~ ANY)* }