diff options
author | Philip Hazel <Philip.Hazel@gmail.com> | 2023-11-22 11:34:27 +0000 |
---|---|---|
committer | Philip Hazel <Philip.Hazel@gmail.com> | 2023-11-22 11:35:40 +0000 |
commit | 57ee073252dc826dbe412846a83421d2bb4483bc (patch) | |
tree | 2da3d65a3cc09edc70384ce67c384ec1f753b3c9 | |
parent | 45dcb3de900b77583f4e9daa663004c55fad4794 (diff) | |
download | pcre-57ee073252dc826dbe412846a83421d2bb4483bc.tar.gz |
Fix bad patch in 05206d66. The interpreter was handling NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects.
-rw-r--r-- | src/pcre2_intmodedep.h | 3 | ||||
-rw-r--r-- | src/pcre2_match.c | 7 | ||||
-rw-r--r-- | testdata/testinput2 | 4 | ||||
-rw-r--r-- | testdata/testoutput2 | 6 |
4 files changed, 15 insertions, 5 deletions
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 5e7e10d2..423764d2 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -880,7 +880,8 @@ typedef struct match_block { PCRE2_SPTR start_code; /* For use when recursing */ PCRE2_SPTR start_subject; /* Start of the subject string */ PCRE2_SPTR check_subject; /* Where UTF-checked from */ - PCRE2_SPTR end_subject; /* End of the subject string */ + PCRE2_SPTR end_subject; /* Usable end of the subject string */ + PCRE2_SPTR true_end_subject; /* Actual end of the subject string */ PCRE2_SPTR end_match_ptr; /* Subject position at end match */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */ diff --git a/src/pcre2_match.c b/src/pcre2_match.c index ea039765..c5e84cea 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; /* Fall through */ - /* Unconditional end of subject assertion (\z). We must check NOTEOL - because it gets set for invalid UTF fragments. */ + /* Unconditional end of subject assertion (\z). */ case OP_EOD: - if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0) - RRETURN(MATCH_NOMATCH); + if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); if (mb->partial != 0) { mb->hitend = TRUE; @@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data; mb->start_subject = subject; mb->start_offset = start_offset; mb->end_subject = end_subject; +mb->true_end_subject = true_end_subject; mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; mb->allowemptypartial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0; diff --git a/testdata/testinput2 b/testdata/testinput2 index 0e24e78e..b874f20c 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -6055,4 +6055,8 @@ a)"xI /(*ACCEPT)+/B,auto_callout +/a\z/ + a + a\=noteol + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 68800fb4..c1bc0e64 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17946,6 +17946,12 @@ No match End ------------------------------------------------------------------ +/a\z/ + a + 0: a + a\=noteol + 0: a + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data |