aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Hazel <Philip.Hazel@gmail.com>2023-11-25 17:10:35 +0000
committerPhilip Hazel <Philip.Hazel@gmail.com>2023-11-25 17:10:35 +0000
commit198379ca8faaeb60a6677daebfc52480189704c8 (patch)
tree345c3f7c4edb9e946fceb685a3732a0815366753
parent630b1cd68f51339a6ef4ff60142d9d66373d5f4d (diff)
downloadpcre-198379ca8faaeb60a6677daebfc52480189704c8.tar.gz
Add some additional tests that I found lying around.
-rw-r--r--testdata/testinput191
-rw-r--r--testdata/testinput27
-rw-r--r--testdata/testoutput1120
-rw-r--r--testdata/testoutput213
4 files changed, 231 insertions, 0 deletions
diff --git a/testdata/testinput1 b/testdata/testinput1
index 533389dc..c0da415e 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6557,4 +6557,95 @@ ef) x/x,mark
/A{ 3, }/
BBAAAAAACC
+# This pattern validates regular expression patterns. The original that I was
+# sent was this:
+# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/
+# This is not very readable, and also does not handle all features. I have done
+# some work on it.
+
+/^
+(?<re>
+# A regular expression is zero or more of these items.
+ (?:
+ # An item is one of these:
+ (?:
+ [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted .
+ \\.| # Quoted .
+
+ \[ # Class, which is [
+ (?: # Followed by
+ \^?\\.| # Optional ^ and any escaped character
+ \^[^\\]| # OR ^ and not escaped character
+ [^\\^] # OR neither ^ nor \
+ ) # Followed by
+ (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot
+ \]| # Class ends with ]
+
+ \( # Parenthesized group
+ (?: # Start with optional
+ \?[:=!]| # ? followed by : = !
+ \?<[=!]| # OR ?< followed by = or !
+ \?> # OR ?>
+ )?
+ (?&re)?? # Then a nested <re>
+ \)| # End parenthesized group
+
+ \(\? # Other parenthesized items
+ (?: # (? followed by
+ R| # R
+ [+-]?\d++ # Or optional +- and digits
+ )
+ \)| # End parens
+
+ \(\* # Verbs
+ (?:
+ COMMIT|
+ FAIL|
+ MARK:[^)]*|
+ (?:PRUNE|SKIP|THEN)(?::[^\)]*+)?
+ )
+ \)
+ ) # End list of items
+
+ # Followed by an optional quantifier
+
+ (?:
+ (?:
+ [?+*] # ?+*
+ | # OR
+ \{\d+ # { digits
+ (?:,\d*)? # optionally followed by ,digits
+ \} # then closing }
+ | # OR
+ \{,\d+} # {,digits}
+ )
+ [?+]? # optional ungreedy or possessive
+ )?
+
+ | # OR an "item" is a branch ending
+
+ \|
+
+ )* # Zero or more top-level items.
+) # End regex group.
+$/x
+ [abcdef]
+ [abc\\]def]
+ a.b|abcd
+ ab()d
+ ab{1,3}d
+ ab{,3}d
+ ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
+ ab(*MARK:xyz)
+ (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s])
+ abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz
+ a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
+ \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2
+\= Expect no match
+ ab)d
+ ab(d
+ {4,5}
+ a[]b
+ (a)(?(1)a|b|c)
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index b874f20c..f4597397 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6059,4 +6059,11 @@ a)"xI
a
a\=noteol
+# This matches a character that only exists once in the subject, sort of like a
+# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length
+# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0.
+
+/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g
+ aaabcccdeee
+
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index bedd9241..84fe0c6f 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10375,4 +10375,124 @@ No match
BBAAAAAACC
0: AAAAAA
+# This pattern validates regular expression patterns. The original that I was
+# sent was this:
+# /^((?:(?:[^?+*{}()[\]\\|]+|\\.|\[(?:\^?\\.|\^[^\\]|[^\\^])(?:[^\]\\]+|\\.)*\]|\((?:\?[:=!]|\?<[=!]|\?>)?(?1)??\)|\(\?(?:R|[+-]?\d+)\))(?:(?:[?+*]|\{\d+(?:,\d*)?\})[?+]?)?|\|)*)$/
+# This is not very readable, and also does not handle all features. I have done
+# some work on it.
+
+/^
+(?<re>
+# A regular expression is zero or more of these items.
+ (?:
+ # An item is one of these:
+ (?:
+ [^?+*{}()\[\]\\|]++| # Non-meta characters or unquoted .
+ \\.| # Quoted .
+
+ \[ # Class, which is [
+ (?: # Followed by
+ \^?\\.| # Optional ^ and any escaped character
+ \^[^\\]| # OR ^ and not escaped character
+ [^\\^] # OR neither ^ nor \
+ ) # Followed by
+ (?:[^\]\\]+|\\.)*+ # Zero or more (not ] or \) OR escaped dot
+ \]| # Class ends with ]
+
+ \( # Parenthesized group
+ (?: # Start with optional
+ \?[:=!]| # ? followed by : = !
+ \?<[=!]| # OR ?< followed by = or !
+ \?> # OR ?>
+ )?
+ (?&re)?? # Then a nested <re>
+ \)| # End parenthesized group
+
+ \(\? # Other parenthesized items
+ (?: # (? followed by
+ R| # R
+ [+-]?\d++ # Or optional +- and digits
+ )
+ \)| # End parens
+
+ \(\* # Verbs
+ (?:
+ COMMIT|
+ FAIL|
+ MARK:[^)]*|
+ (?:PRUNE|SKIP|THEN)(?::[^\)]*+)?
+ )
+ \)
+ ) # End list of items
+
+ # Followed by an optional quantifier
+
+ (?:
+ (?:
+ [?+*] # ?+*
+ | # OR
+ \{\d+ # { digits
+ (?:,\d*)? # optionally followed by ,digits
+ \} # then closing }
+ | # OR
+ \{,\d+} # {,digits}
+ )
+ [?+]? # optional ungreedy or possessive
+ )?
+
+ | # OR an "item" is a branch ending
+
+ \|
+
+ )* # Zero or more top-level items.
+) # End regex group.
+$/x
+ [abcdef]
+ 0: [abcdef]
+ 1: [abcdef]
+ [abc\\]def]
+ 0: [abc\]def]
+ 1: [abc\]def]
+ a.b|abcd
+ 0: a.b|abcd
+ 1: a.b|abcd
+ ab()d
+ 0: ab()d
+ 1: ab()d
+ ab{1,3}d
+ 0: ab{1,3}d
+ 1: ab{1,3}d
+ ab{,3}d
+ 0: ab{,3}d
+ 1: ab{,3}d
+ ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
+ 0: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
+ 1: ab(*FAIL)d(*COMMIT)(*SKIP)(*THEN:abc)
+ ab(*MARK:xyz)
+ 0: ab(*MARK:xyz)
+ 1: ab(*MARK:xyz)
+ (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\\s])
+ 0: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])
+ 1: (?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])(?=.*[,;:])(?=.{8,16})(?!.*[\s])
+ abcd\\t\\n\\r\\f\\a\\e\\071\\x3b\\^\\\\\\?caxyz
+ 0: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz
+ 1: abcd\t\n\r\f\a\e\071\x3b\^\\\?caxyz
+ a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
+ 0: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
+ 1: a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz
+ \\G(?:(?=(\\1.|)(.))){1,13}?(?!.*\\2.*\\2)\\1\\K\\2
+ 0: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2
+ 1: \G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2
+\= Expect no match
+ ab)d
+No match
+ ab(d
+No match
+ {4,5}
+No match
+ a[]b
+No match
+ (a)(?(1)a|b|c)
+No match
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index c1bc0e64..f3861ed3 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17952,6 +17952,19 @@ No match
a\=noteol
0: a
+# This matches a character that only exists once in the subject, sort of like a
+# hypothetical "(.)(?<!\1.+)(?!.*\1)". That has unlimited variable length
+# lookbehind, so is invalid. This pattern doesn't work in Perl 5.38.0.
+
+/\G(?:(?=(\1.|)(.))){1,13}?(?!.*\2.*\2)\1\K\2/g
+ aaabcccdeee
+ 0: b
+ 1: aaa
+ 2: b
+ 0: d
+ 1: ccc
+ 2: d
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data