summaryrefslogtreecommitdiff
path: root/src/iterators/pairs.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/iterators/pairs.rs')
-rw-r--r--src/iterators/pairs.rs259
1 files changed, 253 insertions, 6 deletions
diff --git a/src/iterators/pairs.rs b/src/iterators/pairs.rs
index c21a7fa..ed6a9a1 100644
--- a/src/iterators/pairs.rs
+++ b/src/iterators/pairs.rs
@@ -13,6 +13,7 @@ use alloc::string::String;
use alloc::vec::Vec;
use core::fmt;
use core::hash::{Hash, Hasher};
+use core::iter::Filter;
use core::ptr;
use core::str;
@@ -33,30 +34,44 @@ use crate::RuleType;
/// [`Pair::into_inner`]: struct.Pair.html#method.into_inner
#[derive(Clone)]
pub struct Pairs<'i, R> {
- queue: Rc<Vec<QueueableToken<R>>>,
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
input: &'i str,
start: usize,
end: usize,
+ pairs_count: usize,
line_index: Rc<LineIndex>,
}
-pub fn new<R: RuleType>(
- queue: Rc<Vec<QueueableToken<R>>>,
- input: &str,
+pub fn new<'i, R: RuleType>(
+ queue: Rc<Vec<QueueableToken<'i, R>>>,
+ input: &'i str,
line_index: Option<Rc<LineIndex>>,
start: usize,
end: usize,
-) -> Pairs<'_, R> {
+) -> Pairs<'i, R> {
let line_index = match line_index {
Some(line_index) => line_index,
None => Rc::new(LineIndex::new(input)),
};
+ let mut pairs_count = 0;
+ let mut cursor = start;
+ while cursor < end {
+ cursor = match queue[cursor] {
+ QueueableToken::Start {
+ end_token_index, ..
+ } => end_token_index,
+ _ => unreachable!(),
+ } + 1;
+ pairs_count += 1;
+ }
+
Pairs {
queue,
input,
start,
end,
+ pairs_count,
line_index,
}
}
@@ -99,6 +114,40 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
+ /// Returns the input string of `Pairs`.
+ ///
+ /// This function returns the input string of `Pairs` as a `&str`. This is the source string
+ /// from which `Pairs` was created. The returned `&str` can be used to examine the contents of
+ /// `Pairs` or to perform further processing on the string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// # use std::rc::Rc;
+ /// # use pest;
+ /// # #[allow(non_camel_case_types)]
+ /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// a,
+ /// b
+ /// }
+ ///
+ /// // Example: Get input string from Pairs
+ ///
+ /// let input = "a b";
+ /// let pairs = pest::state(input, |state| {
+ /// // generating Token pairs with Rule::a and Rule::b ...
+ /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
+ /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
+ /// }).unwrap();
+ ///
+ /// assert_eq!(pairs.as_str(), "a b");
+ /// assert_eq!(input, pairs.get_input());
+ /// ```
+ pub fn get_input(&self) -> &'i str {
+ self.input
+ }
+
/// Captures inner token `Pair`s and concatenates resulting `&str`s. This does not capture
/// the input between token `Pair`s.
///
@@ -159,6 +208,114 @@ impl<'i, R: RuleType> Pairs<'i, R> {
unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
}
+ /// Finds the first pair that has its node or branch tagged with the provided
+ /// label. Searches in the flattened [`Pairs`] iterator.
+ ///
+ /// # Examples
+ ///
+ /// Try to recognize the branch between add and mul
+ /// ```
+ /// use pest::{state, ParseResult, ParserState};
+ /// #[allow(non_camel_case_types)]
+ /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// number, // 0..9
+ /// add, // num + num
+ /// mul, // num * num
+ /// }
+ /// fn mark_branch(
+ /// state: Box<ParserState<'_, Rule>>,
+ /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// expr(state, Rule::mul, "*")
+ /// .and_then(|state| state.tag_node("mul"))
+ /// .or_else(|state| expr(state, Rule::add, "+"))
+ /// .and_then(|state| state.tag_node("add"))
+ /// }
+ /// fn expr<'a>(
+ /// state: Box<ParserState<'a, Rule>>,
+ /// r: Rule,
+ /// o: &'static str,
+ /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ /// state.rule(r, |state| {
+ /// state.sequence(|state| {
+ /// number(state)
+ /// .and_then(|state| state.tag_node("lhs"))
+ /// .and_then(|state| state.match_string(o))
+ /// .and_then(number)
+ /// .and_then(|state| state.tag_node("rhs"))
+ /// })
+ /// })
+ /// }
+ /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ /// }
+ /// let input = "1+2";
+ /// let pairs = state(input, mark_branch).unwrap();
+ /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
+ /// assert_eq!(pairs.find_first_tagged("mul"), None);
+ /// ```
+ #[inline]
+ pub fn find_first_tagged(&self, tag: &'i str) -> Option<Pair<'i, R>> {
+ self.clone().find_tagged(tag).next()
+ }
+
+ /// Returns the iterator over pairs that have their node or branch tagged
+ /// with the provided label. The iterator is built from a flattened [`Pairs`] iterator.
+ ///
+ /// # Examples
+ ///
+ /// Try to recognize the node between left and right hand side
+ /// ```
+ /// use pest::{state, ParseResult, ParserState};
+ /// #[allow(non_camel_case_types)]
+ /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ /// enum Rule {
+ /// number, // 0..9
+ /// add, // num + num
+ /// mul, // num * num
+ /// }
+ /// fn mark_branch(
+ /// state: Box<ParserState<'_, Rule>>,
+ /// ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// expr(state, Rule::mul, "*")
+ /// .and_then(|state| state.tag_node("mul"))
+ /// .or_else(|state| expr(state, Rule::add, "+"))
+ /// .and_then(|state| state.tag_node("add"))
+ /// }
+ /// fn expr<'a>(
+ /// state: Box<ParserState<'a, Rule>>,
+ /// r: Rule,
+ /// o: &'static str,
+ /// ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ /// state.rule(r, |state| {
+ /// state.sequence(|state| {
+ /// number(state)
+ /// .and_then(|state| state.tag_node("lhs"))
+ /// .and_then(|state| state.match_string(o))
+ /// .and_then(number)
+ /// .and_then(|state| state.tag_node("rhs"))
+ /// })
+ /// })
+ /// }
+ /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ /// state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ /// }
+ ///
+ /// let input = "1+2";
+ /// let pairs = state(input, mark_branch).unwrap();
+ /// let mut left_numbers = pairs.find_tagged("lhs");
+ /// assert_eq!(left_numbers.next().unwrap().as_str(), "1");
+ /// assert_eq!(left_numbers.next(), None);
+ /// ```
+ #[inline]
+ pub fn find_tagged(
+ self,
+ tag: &'i str,
+ ) -> Filter<FlatPairs<'i, R>, impl FnMut(&Pair<'i, R>) -> bool + '_> {
+ self.flatten()
+ .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag))
+ }
+
/// Returns the `Tokens` for the `Pairs`.
///
/// # Examples
@@ -237,6 +394,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
+impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.pairs_count
+ }
+}
+
impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;
@@ -244,8 +408,14 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
let pair = self.peek()?;
self.start = self.pair() + 1;
+ self.pairs_count -= 1;
Some(pair)
}
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = <Self as ExactSizeIterator>::len(self);
+ (len, Some(len))
+ }
}
impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
@@ -255,6 +425,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
}
self.end = self.pair_from_end();
+ self.pairs_count -= 1;
let pair = unsafe {
pair::new(
@@ -301,7 +472,7 @@ impl<'i, R: Eq> Eq for Pairs<'i, R> {}
impl<'i, R: Hash> Hash for Pairs<'i, R> {
fn hash<H: Hasher>(&self, state: &mut H) {
- (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
+ (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
(self.input as *const str).hash(state);
self.start.hash(state);
self.end.hash(state);
@@ -330,6 +501,7 @@ mod tests {
use super::super::super::macros::tests::*;
use super::super::super::Parser;
use alloc::borrow::ToOwned;
+ use alloc::boxed::Box;
use alloc::format;
use alloc::vec;
use alloc::vec::Vec;
@@ -390,6 +562,14 @@ mod tests {
}
#[test]
+ fn get_input_of_pairs() {
+ let input = "abcde";
+ let pairs = AbcParser::parse(Rule::a, input).unwrap();
+
+ assert_eq!(pairs.get_input(), input);
+ }
+
+ #[test]
fn as_str_empty() {
let mut pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
@@ -479,4 +659,71 @@ mod tests {
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
}
+
+ #[test]
+ // false positive: pest uses `..` as a complete range (historically)
+ #[allow(clippy::almost_complete_range)]
+ fn test_tag_node_branch() {
+ use crate::{state, ParseResult, ParserState};
+ #[allow(non_camel_case_types)]
+ #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+ enum Rule {
+ number, // 0..9
+ add, // num + num
+ mul, // num * num
+ }
+ fn mark_branch(
+ state: Box<ParserState<'_, Rule>>,
+ ) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ expr(state, Rule::mul, "*")
+ .and_then(|state| state.tag_node("mul"))
+ .or_else(|state| expr(state, Rule::add, "+"))
+ .and_then(|state| state.tag_node("add"))
+ }
+ fn expr<'a>(
+ state: Box<ParserState<'a, Rule>>,
+ r: Rule,
+ o: &'static str,
+ ) -> ParseResult<Box<ParserState<'a, Rule>>> {
+ state.rule(r, |state| {
+ state.sequence(|state| {
+ number(state)
+ .and_then(|state| state.tag_node("lhs"))
+ .and_then(|state| state.match_string(o))
+ .and_then(number)
+ .and_then(|state| state.tag_node("rhs"))
+ })
+ })
+ }
+ fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> {
+ state.rule(Rule::number, |state| state.match_range('0'..'9'))
+ }
+ let input = "1+2";
+ let pairs = state(input, mark_branch).unwrap();
+ assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add);
+ assert_eq!(pairs.find_first_tagged("mul"), None);
+
+ let mut left_numbers = pairs.clone().find_tagged("lhs");
+
+ assert_eq!(left_numbers.next().unwrap().as_str(), "1");
+ assert_eq!(left_numbers.next(), None);
+ let mut right_numbers = pairs.find_tagged("rhs");
+
+ assert_eq!(right_numbers.next().unwrap().as_str(), "2");
+ assert_eq!(right_numbers.next(), None);
+ }
+
+ #[test]
+ fn exact_size_iter_for_pairs() {
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
+ assert_eq!(pairs.len(), pairs.count());
+
+ let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
+ let pairs_len = pairs.len();
+ let _ = pairs.next().unwrap();
+ assert_eq!(pairs.count() + 1, pairs_len);
+ }
}