diff options
author | Ćukasz Langa <lukasz@langa.pl> | 2023-11-06 18:45:08 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-06 18:45:08 +0100 |
commit | c1c6bedfd3ee66ad208e0cd9cdd732374c95b83d (patch) | |
tree | f0be10d5934ea02c92aac5d4b248259150341283 | |
parent | edf701c4d83f399bff8612c34fef95da7663945e (diff) | |
download | cpython3-c1c6bedfd3ee66ad208e0cd9cdd732374c95b83d.tar.gz |
[3.8] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds (gh-111695) (gh-111781)
(cherry picked from commit c8faa3568afd255708096f6aa8df0afa80cf7697)
Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
-rw-r--r-- | Lib/test/test_codecencodings_iso2022.py | 46 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst | 1 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 9 |
3 files changed, 53 insertions, 3 deletions
diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py index 00ea1c39dd..027dbecc61 100644 --- a/Lib/test/test_codecencodings_iso2022.py +++ b/Lib/test/test_codecencodings_iso2022.py @@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): (b'ab\x1BNdef', 'replace', 'abdef'), ) +class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_3' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'), + ('ab\u4FF1def', 'replace', b'ab?def'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(O\x29\x28\x1B(Bℜ\x1B$(O\x29\x32\x1B(B = ⟨ሴ⟩' + ) + +class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_2004' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'), + ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(Q\x29\x28\x1B(Bℜ\x1B$(Q\x29\x32\x1B(B = ⟨ሴ⟩' + ) + class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst new file mode 100644 index 0000000000..268a3d310f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst @@ -0,0 +1 @@ +Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 7394cf67e0..6d906ecdd3 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -181,8 +181,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(&c, &length); + encoded = dsg->encoder(buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -191,9 +192,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(&c, &length); + } + encoded = dsg->encoder(buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break; |