diff options
author | Philip Hazel <Philip.Hazel@gmail.com> | 2023-11-25 17:10:35 +0000 |
---|---|---|
committer | Philip Hazel <Philip.Hazel@gmail.com> | 2023-11-25 17:10:35 +0000 |
commit | 198379ca8faaeb60a6677daebfc52480189704c8 (patch) | |
tree | 345c3f7c4edb9e946fceb685a3732a0815366753 | |
parent | 630b1cd68f51339a6ef4ff60142d9d66373d5f4d (diff) | |
download | pcre-198379ca8faaeb60a6677daebfc52480189704c8.tar.gz |
Add some additional tests that I found lying around.
-rw-r--r-- | testdata/testinput1 | 91 | ||||
-rw-r--r-- | testdata/testinput2 | 7 | ||||
-rw-r--r-- | testdata/testoutput1 | 120 | ||||
-rw-r--r-- | testdata/testoutput2 | 13 |
4 files changed, 231 insertions, 0 deletions
diff --git a/testdata/testinput1 b/testdata/testinput1 index 533389dc..c0da415e 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6557,4 +6557,95 @@ ef) x/x,mark /A{ 3, }/ BBAAAAAACC +# This pattern validates regular expression patterns. The original that I was +# sent was this: +# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/ +# This is not very readable, and also does not handle all features. I have done +# some work on it. + +/^ +(?<re> +# A regular expression is zero or more of these items. + (?: + # An item is one of these: + (?: + [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted . + \\.| # Quoted . + + \[ # Class, which is [ + (?: # Followed by + \^?\\.| # Optional ^ and any escaped character + \^[^\\]| # OR ^ and not escaped character + [^\\^] # OR neither ^ nor \ + ) # Followed by + (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot + \]| # Class ends with ] + + \( # Parenthesized group + (?: # Start with optional + \?[:=!]| # ? followed by : = ! + \?<[=!]| # OR ?< followed by = or ! + \?> # OR ?> + )? + (?&re)?? # Then a nested <re> + \)| # End parenthesized group + + \(\? # Other parenthesized items + (?: # (? followed by + R| # R + [+-]?\d++ # Or optional +- and digits + ) + \)| # End parens + + \(\* # Verbs + (?: + COMMIT| + FAIL| + MARK:[^)]*| + (?:PRUNE|SKIP|THEN)(?::[^\)]*+)? + ) + \) + ) # End list of items + + # Followed by an optional quantifier + + (?: + (?: + [?+*] # ?+* + | # OR + \{\d+ # { digits + (?:,\d*)? # optionally followed by ,digits + \} # then closing } + | # OR + \{,\d+} # {,digits} + ) + [?+]? # optional ungreedy or possessive + )? + + | # OR an "item" is a branch ending + + \| + + )* # Zero or more top-level items. +) # End regex group. +$/x + [abcdef] + [abc\\]def] + a.b|abcd + ab()d + ab{1,3}d + ab{,3}d + ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + ab(*MARK:xyz) + (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s]) + abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz + a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2 +\= Expect no match + ab)d + ab(d + {4,5} + a[]b + (a)(?(1)a|b|c) + # End of testinput1 diff --git a/testdata/testinput2 b/testdata/testinput2 index b874f20c..f4597397 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -6059,4 +6059,11 @@ a)"xI a a\=noteol +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length +# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0. + +/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g + aaabcccdeee + # End of testinput2 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index bedd9241..84fe0c6f 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -10375,4 +10375,124 @@ No match BBAAAAAACC 0: AAAAAA +# This pattern validates regular expression patterns. The original that I was +# sent was this: +# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/ +# This is not very readable, and also does not handle all features. I have done +# some work on it. + +/^ +(?<re> +# A regular expression is zero or more of these items. + (?: + # An item is one of these: + (?: + [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted . + \\.| # Quoted . + + \[ # Class, which is [ + (?: # Followed by + \^?\\.| # Optional ^ and any escaped character + \^[^\\]| # OR ^ and not escaped character + [^\\^] # OR neither ^ nor \ + ) # Followed by + (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot + \]| # Class ends with ] + + \( # Parenthesized group + (?: # Start with optional + \?[:=!]| # ? followed by : = ! + \?<[=!]| # OR ?< followed by = or ! + \?> # OR ?> + )? + (?&re)?? # Then a nested <re> + \)| # End parenthesized group + + \(\? # Other parenthesized items + (?: # (? followed by + R| # R + [+-]?\d++ # Or optional +- and digits + ) + \)| # End parens + + \(\* # Verbs + (?: + COMMIT| + FAIL| + MARK:[^)]*| + (?:PRUNE|SKIP|THEN)(?::[^\)]*+)? + ) + \) + ) # End list of items + + # Followed by an optional quantifier + + (?: + (?: + [?+*] # ?+* + | # OR + \{\d+ # { digits + (?:,\d*)? # optionally followed by ,digits + \} # then closing } + | # OR + \{,\d+} # {,digits} + ) + [?+]? # optional ungreedy or possessive + )? + + | # OR an "item" is a branch ending + + \| + + )* # Zero or more top-level items. +) # End regex group. +$/x + [abcdef] + 0: [abcdef] + 1: [abcdef] + [abc\\]def] + 0: [abc\]def] + 1: [abc\]def] + a.b|abcd + 0: a.b|abcd + 1: a.b|abcd + ab()d + 0: ab()d + 1: ab()d + ab{1,3}d + 0: ab{1,3}d + 1: ab{1,3}d + ab{,3}d + 0: ab{,3}d + 1: ab{,3}d + ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + 0: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + 1: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc) + ab(*MARK:xyz) + 0: ab(*MARK:xyz) + 1: ab(*MARK:xyz) + (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s]) + 0: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s]) + 1: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s]) + abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz + 0: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz + 1: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz + a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + 0: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + 1: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz + \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2 + 0: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2 + 1: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2 +\= Expect no match + ab)d +No match + ab(d +No match + {4,5} +No match + a[]b +No match + (a)(?(1)a|b|c) +No match + # End of testinput1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index c1bc0e64..f3861ed3 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17952,6 +17952,19 @@ No match a\=noteol 0: a +# This matches a character that only exists once in the subject, sort of like a +# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length +# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0. + +/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g + aaabcccdeee + 0: b + 1: aaa + 2: b + 0: d + 1: ccc + 2: d + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data |