summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Shanshin <sergey.shanshin@jetbrains.com>2022-09-09 14:05:55 +0300
committerGitHub <noreply@github.com>2022-09-09 14:05:55 +0300
commit79de734f60d4efd86d9c3174ba5f212f276ee125 (patch)
treeb9b26977bb7066744f1b1f76493afa1dc13fe5c3
parent2fe2efa7e97f4a3093e4437a85bfe8c0b2204992 (diff)
downloadkotlinx.serialization-79de734f60d4efd86d9c3174ba5f212f276ee125.tar.gz
Added support of UTF-16 surrogate pairs to okio streams
Fixes #2030
-rw-r--r--formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt46
-rw-r--r--formats/json-tests/commonTest/src/kotlinx/serialization/features/EmojiTest.kt22
-rw-r--r--formats/json-tests/jvmTest/src/kotlinx/serialization/test/JsonHelpers.kt2
3 files changed, 67 insertions, 3 deletions
diff --git a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
index ae8de471..6d3c6c6d 100644
--- a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
+++ b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
@@ -46,12 +46,54 @@ internal class JsonToOkioStreamWriter(private val target: BufferedSink) : JsonWr
}
}
+// Max value for a code point placed in one Char
+private const val SINGLE_CHAR_MAX_CODEPOINT = Char.MAX_VALUE.code
+// Value added to the high UTF-16 surrogate after shifting
+private const val HIGH_SURROGATE_HEADER = 0xd800 - (0x010000 ushr 10)
+// Value added to the low UTF-16 surrogate after masking
+private const val LOW_SURROGATE_HEADER = 0xdc00
+
+
internal class OkioSerialReader(private val source: BufferedSource): SerialReader {
+ /*
+ A sequence of code points is read from UTF-8, some of it can take 2 characters.
+ In case the last code point requires 2 characters, and the array is already full, we buffer the second character
+ */
+ private var bufferedChar: Char? = null
+
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
var i = 0
- while (i < count && !source.exhausted()) {
- buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar()
+
+ if (bufferedChar != null) {
+ buffer[bufferOffset + i] = bufferedChar!!
i++
+ bufferedChar = null
+ }
+
+ while (i < count && !source.exhausted()) {
+ val codePoint = source.readUtf8CodePoint()
+ if (codePoint <= SINGLE_CHAR_MAX_CODEPOINT) {
+ buffer[bufferOffset + i] = codePoint.toChar()
+ i++
+ } else {
+ // an example of working with surrogates is taken from okio library with minor changes, see https://github.com/square/okio
+ // UTF-16 high surrogate: 110110xxxxxxxxxx (10 bits)
+ // UTF-16 low surrogate: 110111yyyyyyyyyy (10 bits)
+ // Unicode code point: 00010000000000000000 + xxxxxxxxxxyyyyyyyyyy (21 bits)
+ val upChar = ((codePoint ushr 10) + HIGH_SURROGATE_HEADER).toChar()
+ val lowChar = ((codePoint and 0x03ff) + LOW_SURROGATE_HEADER).toChar()
+
+ buffer[bufferOffset + i] = upChar
+ i++
+
+ if (i < count) {
+ buffer[bufferOffset + i] = lowChar
+ i++
+ } else {
+ // if char array is full - buffer lower surrogate
+ bufferedChar = lowChar
+ }
+ }
}
return if (i > 0) i else -1
}
diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/features/EmojiTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/features/EmojiTest.kt
new file mode 100644
index 00000000..1e3904ab
--- /dev/null
+++ b/formats/json-tests/commonTest/src/kotlinx/serialization/features/EmojiTest.kt
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
+package kotlinx.serialization.features
+
+import kotlinx.serialization.builtins.serializer
+import kotlinx.serialization.json.JsonTestBase
+import kotlin.test.Test
+
+
+class EmojiTest : JsonTestBase() {
+
+ @Test
+ fun testEmojiString() {
+ assertJsonFormAndRestored(
+ String.serializer(),
+ "\uD83C\uDF34",
+ "\"\uD83C\uDF34\""
+ )
+ }
+}
diff --git a/formats/json-tests/jvmTest/src/kotlinx/serialization/test/JsonHelpers.kt b/formats/json-tests/jvmTest/src/kotlinx/serialization/test/JsonHelpers.kt
index ebb49c35..9220bbd3 100644
--- a/formats/json-tests/jvmTest/src/kotlinx/serialization/test/JsonHelpers.kt
+++ b/formats/json-tests/jvmTest/src/kotlinx/serialization/test/JsonHelpers.kt
@@ -11,7 +11,7 @@ actual fun <T> Json.encodeViaStream(
): String {
val output = ByteArrayOutputStream()
encodeToStream(serializer, value, output)
- return output.toString()
+ return output.toString(Charsets.UTF_8.name())
}
actual fun <T> Json.decodeViaStream(