From 62031cd8f4a04600c11a4b294a6f786581ff6069 Mon Sep 17 00:00:00 2001 From: Endeavour233 <43426962+Endeavour233@users.noreply.github.com> Date: Mon, 13 May 2024 21:42:37 +0800 Subject: fix UTF-32 BOM (#8407) --- okhttp/src/main/kotlin/okhttp3/internal/-UtilCommon.kt | 6 +++--- okhttp/src/main/kotlin/okhttp3/internal/-UtilJvm.kt | 10 +++++++--- okhttp/src/test/java/okhttp3/ResponseBodyJvmTest.kt | 10 +++++----- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/okhttp/src/main/kotlin/okhttp3/internal/-UtilCommon.kt b/okhttp/src/main/kotlin/okhttp3/internal/-UtilCommon.kt index 3db8d43a9..16d1321b5 100644 --- a/okhttp/src/main/kotlin/okhttp3/internal/-UtilCommon.kt +++ b/okhttp/src/main/kotlin/okhttp3/internal/-UtilCommon.kt @@ -55,12 +55,12 @@ internal val UNICODE_BOMS = "efbbbf".decodeHex(), // UTF-16BE. "feff".decodeHex(), + // UTF-32LE. + "fffe0000".decodeHex(), // UTF-16LE. "fffe".decodeHex(), // UTF-32BE. - "0000ffff".decodeHex(), - // UTF-32LE. - "ffff0000".decodeHex(), + "0000feff".decodeHex(), ) /** diff --git a/okhttp/src/main/kotlin/okhttp3/internal/-UtilJvm.kt b/okhttp/src/main/kotlin/okhttp3/internal/-UtilJvm.kt index 1f1c4f4e3..71014a703 100644 --- a/okhttp/src/main/kotlin/okhttp3/internal/-UtilJvm.kt +++ b/okhttp/src/main/kotlin/okhttp3/internal/-UtilJvm.kt @@ -94,14 +94,18 @@ internal fun format( return String.format(Locale.US, format, *args) } +/** + * will also strip BOM from the source + */ @Throws(IOException::class) internal fun BufferedSource.readBomAsCharset(default: Charset): Charset { return when (select(UNICODE_BOMS)) { + // a mapping from the index of encoding methods in UNICODE_BOMS to its corresponding encoding method 0 -> UTF_8 1 -> UTF_16BE - 2 -> UTF_16LE - 3 -> UTF_32BE - 4 -> UTF_32LE + 2 -> UTF_32LE + 3 -> UTF_16LE + 4 -> UTF_32BE -1 -> default else -> throw AssertionError() } diff --git a/okhttp/src/test/java/okhttp3/ResponseBodyJvmTest.kt b/okhttp/src/test/java/okhttp3/ResponseBodyJvmTest.kt index a7967e92f..730a77bed 100644 --- a/okhttp/src/test/java/okhttp3/ResponseBodyJvmTest.kt +++ b/okhttp/src/test/java/okhttp3/ResponseBodyJvmTest.kt @@ -62,7 +62,7 @@ class ResponseBodyJvmTest { @Test fun stringBomOverridesExplicitCharset() { - val body = body("0000ffff00000068000000650000006c0000006c0000006f", "utf-8") + val body = body("0000feff00000068000000650000006c0000006c0000006f", "utf-8") assertThat(body.string()).isEqualTo("hello") } @@ -86,13 +86,13 @@ class ResponseBodyJvmTest { @Test fun stringBomUtf32Be() { - val body = body("0000ffff00000068000000650000006c0000006c0000006f") + val body = body("0000feff00000068000000650000006c0000006c0000006f") assertThat(body.string()).isEqualTo("hello") } @Test fun stringBomUtf32Le() { - val body = body("ffff000068000000650000006c0000006c0000006f000000") + val body = body("fffe000068000000650000006c0000006c0000006f000000") assertThat(body.string()).isEqualTo("hello") } @@ -168,13 +168,13 @@ class ResponseBodyJvmTest { @Test fun readerBomUtf32Be() { - val body = body("0000ffff00000068000000650000006c0000006c0000006f") + val body = body("0000feff00000068000000650000006c0000006c0000006f") assertThat(exhaust(body.charStream())).isEqualTo("hello") } @Test fun readerBomUtf32Le() { - val body = body("ffff000068000000650000006c0000006c0000006f000000") + val body = body("fffe000068000000650000006c0000006c0000006f000000") assertThat(exhaust(body.charStream())).isEqualTo("hello") } -- cgit v1.2.3