diff options
Diffstat (limited to 'src/hir/translate.rs')
-rw-r--r-- | src/hir/translate.rs | 88 |
1 files changed, 63 insertions, 25 deletions
diff --git a/src/hir/translate.rs b/src/hir/translate.rs index 99c9493..890e160 100644 --- a/src/hir/translate.rs +++ b/src/hir/translate.rs @@ -434,20 +434,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } ast::ClassSetItem::Ascii(ref x) => { if self.flags().unicode() { + let xcls = self.hir_ascii_unicode_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_unicode(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassUnicodeRange::new(s, e)); - } - self.unicode_fold_and_negate( - &x.span, x.negated, &mut cls, - )?; + cls.union(&xcls); self.push(HirFrame::ClassUnicode(cls)); } else { + let xcls = self.hir_ascii_byte_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_bytes(); - for &(s, e) in ascii_class(&x.kind) { - cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); - } - self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?; + cls.union(&xcls); self.push(HirFrame::ClassBytes(cls)); } } @@ -595,7 +589,7 @@ struct TranslatorI<'t, 'p> { impl<'t, 'p> TranslatorI<'t, 'p> { /// Build a new internal translator. fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { - TranslatorI { trans: trans, pattern: pattern } + TranslatorI { trans, pattern } } /// Return a reference to the underlying translator. @@ -615,7 +609,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { /// Create a new error with the given span and error type. fn error(&self, span: Span, kind: ErrorKind) -> Error { - Error { kind: kind, pattern: self.pattern.to_string(), span: span } + Error { kind, pattern: self.pattern.to_string(), span } } /// Return a copy of the active flags. @@ -785,7 +779,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, }; - Hir::group(hir::Group { kind: kind, hir: Box::new(expr) }) + Hir::group(hir::Group { kind, hir: Box::new(expr) }) } fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { @@ -808,11 +802,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { }; let greedy = if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; - Hir::repetition(hir::Repetition { - kind: kind, - greedy: greedy, - hir: Box::new(expr), - }) + Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) } fn hir_unicode_class( @@ -853,6 +843,32 @@ impl<'t, 'p> TranslatorI<'t, 'p> { result } + fn hir_ascii_unicode_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassUnicode> { + let mut cls = hir::ClassUnicode::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), + ); + self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + + fn hir_ascii_byte_class( + &self, + ast: &ast::ClassAscii, + ) -> Result<hir::ClassBytes> { + let mut cls = hir::ClassBytes::new( + ascii_class(&ast.kind) + .iter() + .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), + ); + self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; + Ok(cls) + } + fn hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, @@ -948,7 +964,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { class: &mut hir::ClassBytes, ) -> Result<()> { // Note that we must apply case folding before negation! - // Consider `(?i)[^x]`. If we applied negation field, then + // Consider `(?i)[^x]`. If we applied negation first, then // the result would be the character class that matched any // Unicode scalar value. if self.flags().case_insensitive() { @@ -1218,7 +1234,7 @@ mod tests { fn hir_quest(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrOne, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1226,7 +1242,7 @@ mod tests { fn hir_star(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1234,7 +1250,7 @@ mod tests { fn hir_plus(greedy: bool, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::OneOrMore, - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1242,7 +1258,7 @@ mod tests { fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::Range(range), - greedy: greedy, + greedy, hir: Box::new(expr), }) } @@ -1944,6 +1960,25 @@ mod tests { } #[test] + fn class_ascii_multiple() { + // See: https://github.com/rust-lang/regex/issues/680 + assert_eq!( + t("[[:alnum:][:^ascii:]]"), + hir_union( + hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_uclass(&[('\u{80}', '\u{10FFFF}')]), + ), + ); + assert_eq!( + t_bytes("(?-u)[[:alnum:][:^ascii:]]"), + hir_union( + hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), + hir_bclass(&[(0x80, 0xFF)]), + ), + ); + } + + #[test] #[cfg(feature = "unicode-perl")] fn class_perl() { // Unicode @@ -3100,6 +3135,9 @@ mod tests { assert!(t(r"\pL*").is_match_empty()); assert!(t(r"a*|b").is_match_empty()); assert!(t(r"b|a*").is_match_empty()); + assert!(t(r"a|").is_match_empty()); + assert!(t(r"|a").is_match_empty()); + assert!(t(r"a||b").is_match_empty()); assert!(t(r"a*a?(abcd)*").is_match_empty()); assert!(t(r"^").is_match_empty()); assert!(t(r"$").is_match_empty()); @@ -3109,6 +3147,8 @@ mod tests { assert!(t(r"\z").is_match_empty()); assert!(t(r"\B").is_match_empty()); assert!(t_bytes(r"(?-u)\B").is_match_empty()); + assert!(t(r"\b").is_match_empty()); + assert!(t(r"(?-u)\b").is_match_empty()); // Negative examples. assert!(!t(r"a+").is_match_empty()); @@ -3118,8 +3158,6 @@ mod tests { assert!(!t(r"a{1,10}").is_match_empty()); assert!(!t(r"b|a").is_match_empty()); assert!(!t(r"a*a+(abcd)*").is_match_empty()); - assert!(!t(r"\b").is_match_empty()); - assert!(!t(r"(?-u)\b").is_match_empty()); } #[test] |