summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonid Startsev <sandwwraith@users.noreply.github.com>2021-11-10 13:59:05 +0300
committerGitHub <noreply@github.com>2021-11-10 13:59:05 +0300
commit3999818903da417138b297ceb32db37d51a0a139 (patch)
treed63659fd38097dbd30441aaf506107011a2ee4d1
parente721ebea476ac0c81d6642d396a1bcb10a34d301 (diff)
downloadkotlinx.serialization-3999818903da417138b297ceb32db37d51a0a139.tar.gz
Correctly handle buffer boundaries while decoding escape sequences from json stream (#1706)
Fixes #1702
-rw-r--r--formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt32
-rw-r--r--formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt6
-rw-r--r--formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt24
-rw-r--r--formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt20
-rw-r--r--formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt8
-rw-r--r--formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt28
6 files changed, 77 insertions, 41 deletions
diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt
index bc72ec3c..5ff983d0 100644
--- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt
+++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/AbstractJsonLexer.kt
@@ -140,7 +140,7 @@ internal abstract class AbstractJsonLexer {
open fun ensureHaveChars() {}
// Used as bound check in loops
- abstract fun definitelyNotEof(position: Int): Int
+ abstract fun prefetchOrEof(position: Int): Int
abstract fun tryConsumeComma(): Boolean
@@ -182,7 +182,7 @@ internal abstract class AbstractJsonLexer {
val source = source
var cpos = currentPosition
while (true) {
- cpos = definitelyNotEof(cpos)
+ cpos = prefetchOrEof(cpos)
if (cpos == -1) break // could be inline function but KT-1436
val c = source[cpos++]
if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue
@@ -223,7 +223,7 @@ internal abstract class AbstractJsonLexer {
val source = source
var cpos = currentPosition
while (true) {
- cpos = definitelyNotEof(cpos)
+ cpos = prefetchOrEof(cpos)
if (cpos == -1) break
val ch = source[cpos]
if (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t') {
@@ -244,7 +244,7 @@ internal abstract class AbstractJsonLexer {
*/
fun tryConsumeNotNull(): Boolean {
var current = skipWhitespaces()
- current = definitelyNotEof(current)
+ current = prefetchOrEof(current)
// Cannot consume null due to EOF, maybe something else
val len = source.length - current
if (len < 4 || current == -1) return true
@@ -264,7 +264,7 @@ internal abstract class AbstractJsonLexer {
var current = currentPosition
// Skip whitespaces
while (true) {
- current = definitelyNotEof(current)
+ current = prefetchOrEof(current)
if (current == -1) break
val c = source[current]
// Faster than char2TokenClass actually
@@ -317,13 +317,15 @@ internal abstract class AbstractJsonLexer {
while (char != STRING) {
if (char == STRING_ESC) {
usedAppend = true
- currentPosition = appendEscape(lastPosition, currentPosition)
+ currentPosition = prefetchOrEof(appendEscape(lastPosition, currentPosition))
+ if (currentPosition == -1)
+ fail("EOF", currentPosition)
lastPosition = currentPosition
} else if (++currentPosition >= source.length) {
usedAppend = true
// end of chunk
appendRange(lastPosition, currentPosition)
- currentPosition = definitelyNotEof(currentPosition)
+ currentPosition = prefetchOrEof(currentPosition)
if (currentPosition == -1)
fail("EOF", currentPosition)
lastPosition = currentPosition
@@ -395,7 +397,7 @@ internal abstract class AbstractJsonLexer {
if (current >= source.length) {
usedAppend = true
appendRange(currentPosition, current)
- val eof = definitelyNotEof(current)
+ val eof = prefetchOrEof(current)
if (eof == -1) {
// to handle plain lenient strings, such as top-level
currentPosition = current
@@ -421,7 +423,7 @@ internal abstract class AbstractJsonLexer {
private fun appendEsc(startPosition: Int): Int {
var currentPosition = startPosition
- currentPosition = definitelyNotEof(currentPosition)
+ currentPosition = prefetchOrEof(currentPosition)
if (currentPosition == -1) fail("Expected escape sequence to continue, got EOF")
val currentChar = source[currentPosition++]
if (currentChar == UNICODE_ESC) {
@@ -435,7 +437,13 @@ internal abstract class AbstractJsonLexer {
}
private fun appendHex(source: CharSequence, startPos: Int): Int {
- if (startPos + 4 >= source.length) fail("Unexpected EOF during unicode escape")
+ if (startPos + 4 >= source.length) {
+ currentPosition = startPos
+ ensureHaveChars()
+ if (currentPosition + 4 >= source.length)
+ fail("Unexpected EOF during unicode escape")
+ return appendHex(source, currentPosition)
+ }
escapedString.append(
((fromHexChar(source, startPos) shl 12) +
(fromHexChar(source, startPos + 1) shl 8) +
@@ -520,7 +528,7 @@ internal abstract class AbstractJsonLexer {
* that doesn't allocate and also doesn't support any radix but 10
*/
var current = skipWhitespaces()
- current = definitelyNotEof(current)
+ current = prefetchOrEof(current)
if (current >= source.length || current == -1) fail("EOF")
val hasQuotation = if (source[current] == STRING) {
// Check it again
@@ -598,7 +606,7 @@ internal abstract class AbstractJsonLexer {
* in 6-th bit and we leverage this fact, our implementation consumes boolean literals
* in a case-insensitive manner.
*/
- var current = definitelyNotEof(start)
+ var current = prefetchOrEof(start)
if (current >= source.length || current == -1) fail("EOF")
return when (source[current++].code or asciiCaseMask) {
't'.code -> {
diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt
index fc6a6c1a..0ff980a2 100644
--- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt
+++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt
@@ -1,8 +1,12 @@
+/*
+ * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
package kotlinx.serialization.json.internal
internal class StringJsonLexer(override val source: String) : AbstractJsonLexer() {
- override fun definitelyNotEof(position: Int): Int = if (position < source.length) position else -1
+ override fun prefetchOrEof(position: Int): Int = if (position < source.length) position else -1
override fun consumeNextToken(): Byte {
val source = source
diff --git a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt
index 51da529a..1f6f814f 100644
--- a/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt
+++ b/formats/json/commonTest/src/kotlinx/serialization/json/JsonUnicodeTest.kt
@@ -1,8 +1,11 @@
+/*
+ * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
package kotlinx.serialization.json
import kotlinx.serialization.*
import kotlinx.serialization.builtins.*
-import kotlinx.serialization.json.internal.*
import kotlinx.serialization.test.*
import kotlin.random.*
import kotlin.test.*
@@ -59,7 +62,7 @@ class JsonUnicodeTest : JsonTestBase() {
@Test
fun testRandomEscapeSequences() = noJs { // Too slow on JS
repeat(10_000) {
- val s = generateRandomString()
+ val s = generateRandomUnicodeString(Random.nextInt(1, 2047))
try {
assertSerializedAndRestored(s, String.serializer())
} catch (e: Throwable) {
@@ -68,21 +71,4 @@ class JsonUnicodeTest : JsonTestBase() {
}
}
}
-
- private fun generateRandomString(): String {
- val size = Random.nextInt(1, 2047)
- return buildString(size) {
- repeat(size) {
- val pickEscape = Random.nextBoolean()
- if (pickEscape) {
- // Definitely escape symbol
- // null can be appended as well, completely okay
- append(ESCAPE_STRINGS.random())
- } else {
- // Any symbol, including escaping one
- append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code)))
- }
- }
- }
- }
}
diff --git a/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt b/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt
index 9829503b..d178c871 100644
--- a/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt
+++ b/formats/json/commonTest/src/kotlinx/serialization/test/TestHelpers.kt
@@ -1,10 +1,13 @@
/*
- * Copyright 2017-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
*/
package kotlinx.serialization.test
import kotlinx.serialization.*
import kotlinx.serialization.descriptors.*
+import kotlinx.serialization.json.internal.ESCAPE_STRINGS
+import kotlin.random.Random
+import kotlin.random.nextInt
import kotlin.test.*
fun SerialDescriptor.assertDescriptorEqualsTo(other: SerialDescriptor) {
@@ -40,3 +43,18 @@ inline fun assertFailsWithMissingField(block: () -> Unit) {
val e = assertFailsWith<SerializationException>(block = block)
assertTrue(e.message?.contains("but it was missing") ?: false)
}
+
+fun generateRandomUnicodeString(size: Int): String {
+ return buildString(size) {
+ repeat(size) {
+ val pickEscape = Random.nextBoolean()
+ if (pickEscape) {
+ // Definitely an escape symbol
+ append(ESCAPE_STRINGS.random().takeIf { it != null } ?: 'N')
+ } else {
+ // Any symbol, including escaping one
+ append(Char(Random.nextInt(Char.MIN_VALUE.code..Char.MAX_VALUE.code)).takeIf { it.isDefined() && !it.isSurrogate()} ?: 'U')
+ }
+ }
+ }
+}
diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt
index 6a4c19c7..eabfd088 100644
--- a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt
+++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonLexerJvm.kt
@@ -58,7 +58,7 @@ internal class ReaderJsonLexer(
ensureHaveChars()
var current = currentPosition
while (true) {
- current = definitelyNotEof(current)
+ current = prefetchOrEof(current)
if (current == -1) break // could be inline function but KT-1436
val c = source[current]
// Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
@@ -93,7 +93,7 @@ internal class ReaderJsonLexer(
currentPosition = 0
}
- override fun definitelyNotEof(position: Int): Int {
+ override fun prefetchOrEof(position: Int): Int {
if (position < source.length) return position
currentPosition = position
ensureHaveChars()
@@ -106,7 +106,7 @@ internal class ReaderJsonLexer(
val source = source
var cpos = currentPosition
while (true) {
- cpos = definitelyNotEof(cpos)
+ cpos = prefetchOrEof(cpos)
if (cpos == -1) break
val ch = source[cpos++]
return when (val tc = charToTokenClass(ch)) {
@@ -141,7 +141,7 @@ internal class ReaderJsonLexer(
var current = currentPosition
val closingQuote = indexOf('"', current)
if (closingQuote == -1) {
- current = definitelyNotEof(current)
+ current = prefetchOrEof(current)
if (current == -1) fail(TC_STRING)
// it's also possible just to resize buffer,
// instead of falling back to slow path,
diff --git a/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt b/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt
index 1ed4078f..b576a2c1 100644
--- a/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt
+++ b/formats/json/jvmTest/src/kotlinx/serialization/features/JsonJvmStreamsTest.kt
@@ -4,13 +4,15 @@
package kotlinx.serialization.features
-import kotlinx.serialization.*
+import kotlinx.serialization.SerializationException
+import kotlinx.serialization.StringData
import kotlinx.serialization.builtins.serializer
-import kotlinx.serialization.json.Json
+import kotlinx.serialization.json.*
import kotlinx.serialization.json.internal.BATCH_SIZE
-import kotlinx.serialization.test.decodeViaStream
-import kotlinx.serialization.test.encodeViaStream
+import kotlinx.serialization.test.*
import org.junit.Test
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
@@ -65,4 +67,22 @@ class JsonJvmStreamsTest {
Json.decodeViaStream(String.serializer(), "\"")
}
}
+
+ @Test
+ fun testRandomEscapeSequences() {
+ repeat(1000) {
+ val s = generateRandomUnicodeString(strLen)
+ try {
+ val serializer = String.serializer()
+ val b = ByteArrayOutputStream()
+ Json.encodeToStream(serializer, s, b)
+ val restored = Json.decodeFromStream(serializer, ByteArrayInputStream(b.toByteArray()))
+ assertEquals(s, restored)
+ } catch (e: Throwable) {
+ // Not assertion error to preserve cause
+ throw IllegalStateException("Unexpectedly failed test, cause string: $s", e)
+ }
+ }
+ }
+
}