aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorƁukasz Langa <lukasz@langa.pl>2023-11-06 18:45:08 +0100
committerGitHub <noreply@github.com>2023-11-06 18:45:08 +0100
commitc1c6bedfd3ee66ad208e0cd9cdd732374c95b83d (patch)
treef0be10d5934ea02c92aac5d4b248259150341283
parentedf701c4d83f399bff8612c34fef95da7663945e (diff)
downloadcpython3-c1c6bedfd3ee66ad208e0cd9cdd732374c95b83d.tar.gz
[3.8] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds (gh-111695) (gh-111781)
(cherry picked from commit c8faa3568afd255708096f6aa8df0afa80cf7697) Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
-rw-r--r--Lib/test/test_codecencodings_iso2022.py46
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst1
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c9
3 files changed, 53 insertions, 3 deletions
diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py
index 00ea1c39dd..027dbecc61 100644
--- a/Lib/test/test_codecencodings_iso2022.py
+++ b/Lib/test/test_codecencodings_iso2022.py
@@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab\x1BNdef', 'replace', 'abdef'),
)
+class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'iso2022_jp_3'
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
+ codectests = COMMON_CODEC_TESTS + (
+ (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+ (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ),
+ (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
+ (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+ (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
+ (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
+ ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'),
+ ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'),
+ ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'),
+ ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
+ ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+ (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'),
+ ('ab\u4FF1def', 'replace', b'ab?def'),
+ )
+ xmlcharnametest = (
+ '\xAB\u211C\xBB = \u2329\u1234\u232A',
+ b'\x1B$(O\x29\x28\x1B(B&real;\x1B$(O\x29\x32\x1B(B = &lang;&#4660;&rang;'
+ )
+
+class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'iso2022_jp_2004'
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
+ codectests = COMMON_CODEC_TESTS + (
+ (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+ (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ),
+ (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
+ (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+ (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
+ (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
+ ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'),
+ ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'),
+ ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'),
+ ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
+ ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+ (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'),
+ ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'),
+ )
+ xmlcharnametest = (
+ '\xAB\u211C\xBB = \u2329\u1234\u232A',
+ b'\x1B$(Q\x29\x28\x1B(B&real;\x1B$(Q\x29\x32\x1B(B = &lang;&#4660;&rang;'
+ )
+
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'iso2022_kr'
tstring = multibytecodec_support.load_teststring('iso2022_kr')
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
new file mode 100644
index 0000000000..268a3d310f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
@@ -0,0 +1 @@
+Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 7394cf67e0..6d906ecdd3 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -181,8 +181,9 @@ ENCODER(iso2022)
encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+ Py_UCS4 buf[2] = {c, 0};
Py_ssize_t length = 1;
- encoded = dsg->encoder(&c, &length);
+ encoded = dsg->encoder(buf, &length);
if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair
* of non-bmp characters. */
@@ -191,9 +192,11 @@ ENCODER(iso2022)
return MBERR_TOOFEW;
length = -1;
}
- else
+ else {
+ buf[1] = INCHAR2;
length = 2;
- encoded = dsg->encoder(&c, &length);
+ }
+ encoded = dsg->encoder(buf, &length);
if (encoded != MAP_UNMAPPABLE) {
insize = length;
break;