diff options
Diffstat (limited to 'src/iterators/pairs.rs')
-rw-r--r-- | src/iterators/pairs.rs | 259 |
1 files changed, 253 insertions, 6 deletions
diff --git a/src/iterators/pairs.rs b/src/iterators/pairs.rs index c21a7fa..ed6a9a1 100644 --- a/src/iterators/pairs.rs +++ b/src/iterators/pairs.rs @@ -13,6 +13,7 @@ use alloc::string::String; use alloc::vec::Vec; use core::fmt; use core::hash::{Hash, Hasher}; +use core::iter::Filter; use core::ptr; use core::str; @@ -33,30 +34,44 @@ use crate::RuleType; /// [`Pair::into_inner`]: struct.Pair.html#method.into_inner #[derive(Clone)] pub struct Pairs<'i, R> { - queue: Rc<Vec<QueueableToken<R>>>, + queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, start: usize, end: usize, + pairs_count: usize, line_index: Rc<LineIndex>, } -pub fn new<R: RuleType>( - queue: Rc<Vec<QueueableToken<R>>>, - input: &str, +pub fn new<'i, R: RuleType>( + queue: Rc<Vec<QueueableToken<'i, R>>>, + input: &'i str, line_index: Option<Rc<LineIndex>>, start: usize, end: usize, -) -> Pairs<'_, R> { +) -> Pairs<'i, R> { let line_index = match line_index { Some(line_index) => line_index, None => Rc::new(LineIndex::new(input)), }; + let mut pairs_count = 0; + let mut cursor = start; + while cursor < end { + cursor = match queue[cursor] { + QueueableToken::Start { + end_token_index, .. + } => end_token_index, + _ => unreachable!(), + } + 1; + pairs_count += 1; + } + Pairs { queue, input, start, end, + pairs_count, line_index, } } @@ -99,6 +114,40 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } + /// Returns the input string of `Pairs`. + /// + /// This function returns the input string of `Pairs` as a `&str`. This is the source string + /// from which `Pairs` was created. The returned `&str` can be used to examine the contents of + /// `Pairs` or to perform further processing on the string. + /// + /// # Examples + /// + /// ``` + /// # use std::rc::Rc; + /// # use pest; + /// # #[allow(non_camel_case_types)] + /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// a, + /// b + /// } + /// + /// // Example: Get input string from Pairs + /// + /// let input = "a b"; + /// let pairs = pest::state(input, |state| { + /// // generating Token pairs with Rule::a and Rule::b ... + /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1)) + /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b"))) + /// }).unwrap(); + /// + /// assert_eq!(pairs.as_str(), "a b"); + /// assert_eq!(input, pairs.get_input()); + /// ``` + pub fn get_input(&self) -> &'i str { + self.input + } + /// Captures inner token `Pair`s and concatenates resulting `&str`s. This does not capture /// the input between token `Pair`s. /// @@ -159,6 +208,114 @@ impl<'i, R: RuleType> Pairs<'i, R> { unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) } } + /// Finds the first pair that has its node or branch tagged with the provided + /// label. Searches in the flattened [`Pairs`] iterator. + /// + /// # Examples + /// + /// Try to recognize the branch between add and mul + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box<ParserState<'_, Rule>>, + /// ) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node("mul")) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node("add")) + /// } + /// fn expr<'a>( + /// state: Box<ParserState<'a, Rule>>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult<Box<ParserState<'a, Rule>>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node("lhs")) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node("rhs")) + /// }) + /// }) + /// } + /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + /// assert_eq!(pairs.find_first_tagged("mul"), None); + /// ``` + #[inline] + pub fn find_first_tagged(&self, tag: &'i str) -> Option<Pair<'i, R>> { + self.clone().find_tagged(tag).next() + } + + /// Returns the iterator over pairs that have their node or branch tagged + /// with the provided label. The iterator is built from a flattened [`Pairs`] iterator. + /// + /// # Examples + /// + /// Try to recognize the node between left and right hand side + /// ``` + /// use pest::{state, ParseResult, ParserState}; + /// #[allow(non_camel_case_types)] + /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + /// enum Rule { + /// number, // 0..9 + /// add, // num + num + /// mul, // num * num + /// } + /// fn mark_branch( + /// state: Box<ParserState<'_, Rule>>, + /// ) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// expr(state, Rule::mul, "*") + /// .and_then(|state| state.tag_node("mul")) + /// .or_else(|state| expr(state, Rule::add, "+")) + /// .and_then(|state| state.tag_node("add")) + /// } + /// fn expr<'a>( + /// state: Box<ParserState<'a, Rule>>, + /// r: Rule, + /// o: &'static str, + /// ) -> ParseResult<Box<ParserState<'a, Rule>>> { + /// state.rule(r, |state| { + /// state.sequence(|state| { + /// number(state) + /// .and_then(|state| state.tag_node("lhs")) + /// .and_then(|state| state.match_string(o)) + /// .and_then(number) + /// .and_then(|state| state.tag_node("rhs")) + /// }) + /// }) + /// } + /// fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + /// state.rule(Rule::number, |state| state.match_range('0'..'9')) + /// } + /// + /// let input = "1+2"; + /// let pairs = state(input, mark_branch).unwrap(); + /// let mut left_numbers = pairs.find_tagged("lhs"); + /// assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + /// assert_eq!(left_numbers.next(), None); + /// ``` + #[inline] + pub fn find_tagged( + self, + tag: &'i str, + ) -> Filter<FlatPairs<'i, R>, impl FnMut(&Pair<'i, R>) -> bool + '_> { + self.flatten() + .filter(move |pair: &Pair<'i, R>| matches!(pair.as_node_tag(), Some(nt) if nt == tag)) + } + /// Returns the `Tokens` for the `Pairs`. /// /// # Examples @@ -237,6 +394,13 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> { + #[inline] + fn len(&self) -> usize { + self.pairs_count + } +} + impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; @@ -244,8 +408,14 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { let pair = self.peek()?; self.start = self.pair() + 1; + self.pairs_count -= 1; Some(pair) } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = <Self as ExactSizeIterator>::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { @@ -255,6 +425,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { } self.end = self.pair_from_end(); + self.pairs_count -= 1; let pair = unsafe { pair::new( @@ -301,7 +472,7 @@ impl<'i, R: Eq> Eq for Pairs<'i, R> {} impl<'i, R: Hash> Hash for Pairs<'i, R> { fn hash<H: Hasher>(&self, state: &mut H) { - (&*self.queue as *const Vec<QueueableToken<R>>).hash(state); + (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state); (self.input as *const str).hash(state); self.start.hash(state); self.end.hash(state); @@ -330,6 +501,7 @@ mod tests { use super::super::super::macros::tests::*; use super::super::super::Parser; use alloc::borrow::ToOwned; + use alloc::boxed::Box; use alloc::format; use alloc::vec; use alloc::vec::Vec; @@ -390,6 +562,14 @@ mod tests { } #[test] + fn get_input_of_pairs() { + let input = "abcde"; + let pairs = AbcParser::parse(Rule::a, input).unwrap(); + + assert_eq!(pairs.get_input(), input); + } + + #[test] fn as_str_empty() { let mut pairs = AbcParser::parse(Rule::a, "abcde").unwrap(); @@ -479,4 +659,71 @@ mod tests { assert_eq!(pair.as_str(), "abc"); assert_eq!(pair.line_col(), (1, 1)); } + + #[test] + // false positive: pest uses `..` as a complete range (historically) + #[allow(clippy::almost_complete_range)] + fn test_tag_node_branch() { + use crate::{state, ParseResult, ParserState}; + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + enum Rule { + number, // 0..9 + add, // num + num + mul, // num * num + } + fn mark_branch( + state: Box<ParserState<'_, Rule>>, + ) -> ParseResult<Box<ParserState<'_, Rule>>> { + expr(state, Rule::mul, "*") + .and_then(|state| state.tag_node("mul")) + .or_else(|state| expr(state, Rule::add, "+")) + .and_then(|state| state.tag_node("add")) + } + fn expr<'a>( + state: Box<ParserState<'a, Rule>>, + r: Rule, + o: &'static str, + ) -> ParseResult<Box<ParserState<'a, Rule>>> { + state.rule(r, |state| { + state.sequence(|state| { + number(state) + .and_then(|state| state.tag_node("lhs")) + .and_then(|state| state.match_string(o)) + .and_then(number) + .and_then(|state| state.tag_node("rhs")) + }) + }) + } + fn number(state: Box<ParserState<'_, Rule>>) -> ParseResult<Box<ParserState<'_, Rule>>> { + state.rule(Rule::number, |state| state.match_range('0'..'9')) + } + let input = "1+2"; + let pairs = state(input, mark_branch).unwrap(); + assert_eq!(pairs.find_first_tagged("add").unwrap().as_rule(), Rule::add); + assert_eq!(pairs.find_first_tagged("mul"), None); + + let mut left_numbers = pairs.clone().find_tagged("lhs"); + + assert_eq!(left_numbers.next().unwrap().as_str(), "1"); + assert_eq!(left_numbers.next(), None); + let mut right_numbers = pairs.find_tagged("rhs"); + + assert_eq!(right_numbers.next().unwrap().as_str(), "2"); + assert_eq!(right_numbers.next(), None); + } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + assert_eq!(pairs.len(), pairs.count()); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + assert_eq!(pairs.len(), pairs.count()); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.count() + 1, pairs_len); + } } |