summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Shanshin <sergey.shanshin@jetbrains.com>2022-08-15 18:52:44 +0300
committerGitHub <noreply@github.com>2022-08-15 18:52:44 +0300
commit0c0648e8894a95ff57a2cb1c9e52f8e6831c4361 (patch)
tree06650548ea474140e85842bd7d88cae30c30902f
parent83b6e332df01c035f2cee55fc232ab6f55fea5ed (diff)
downloadkotlinx.serialization-0c0648e8894a95ff57a2cb1c9e52f8e6831c4361.tar.gz
Fixed decoding of huge JSON data for okio streams
Fixes #2006 Co-authored-by: Leonid Startsev <sandwwraith@users.noreply.github.com>
-rw-r--r--formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt2
-rw-r--r--formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt40
-rw-r--r--formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt52
3 files changed, 71 insertions, 23 deletions
diff --git a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
index 2d5485c1..ae8de471 100644
--- a/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
+++ b/formats/json-okio/commonMain/src/kotlinx/serialization/json/okio/internal/OkioJsonStreams.kt
@@ -50,7 +50,7 @@ internal class OkioSerialReader(private val source: BufferedSource): SerialReade
override fun read(buffer: CharArray, bufferOffset: Int, count: Int): Int {
var i = 0
while (i < count && !source.exhausted()) {
- buffer[i] = source.readUtf8CodePoint().toChar()
+ buffer[bufferOffset + i] = source.readUtf8CodePoint().toChar()
i++
}
return if (i > 0) i else -1
diff --git a/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt
new file mode 100644
index 00000000..0a633268
--- /dev/null
+++ b/formats/json-tests/commonTest/src/kotlinx/serialization/json/JsonHugeDataSerializationTest.kt
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017-2022 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
+package kotlinx.serialization.json
+
+import kotlinx.serialization.Serializable
+import kotlin.test.Test
+
+class JsonHugeDataSerializationTest : JsonTestBase() {
+
+ @Serializable
+ private data class Node(
+ val children: List<Node>
+ )
+
+ private fun createNodes(count: Int, depth: Int): List<Node> {
+ val ret = mutableListOf<Node>()
+ if (depth == 0) return ret
+ for (i in 0 until count) {
+ ret.add(Node(createNodes(1, depth - 1)))
+ }
+ return ret
+ }
+
+ @Test
+ fun test() {
+ // create some huge instance
+ val rootNode = Node(createNodes(1000, 10))
+
+ val expectedJson = Json.encodeToString(Node.serializer(), rootNode)
+
+ /*
+ The assertJsonFormAndRestored function, when checking the encoding, will call Json.encodeToString(...) for `JsonTestingMode.STREAMING`
+ since the string `expectedJson` was generated by the same function, the test will always consider
+ the encoding to the `STREAMING` mode is correct, even if there was actually an error there. So only TREE, JAVA_STREAMS and OKIO are actually being tested here
+ */
+ assertJsonFormAndRestored(Node.serializer(), rootNode, expectedJson)
+ }
+}
diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt
index e02364ee..83483eac 100644
--- a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt
+++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/JsonLexer.kt
@@ -14,23 +14,31 @@ private const val DEFAULT_THRESHOLD = 128
* For some reason this hand-rolled implementation is faster than
* fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
*/
-private class ArrayAsSequence(private val source: CharArray) : CharSequence {
- override val length: Int = source.size
+internal class ArrayAsSequence(val buffer: CharArray) : CharSequence {
+ override var length: Int = buffer.size
- override fun get(index: Int): Char = source[index]
+ override fun get(index: Int): Char = buffer[index]
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
- return source.concatToString(startIndex, endIndex)
+ return buffer.concatToString(startIndex, minOf(endIndex, length))
+ }
+
+ fun substring(startIndex: Int, endIndex: Int): String {
+ return buffer.concatToString(startIndex, minOf(endIndex, length))
+ }
+
+ fun trim(newSize: Int) {
+ length = minOf(buffer.size, newSize)
}
}
internal class ReaderJsonLexer(
private val reader: SerialReader,
- private var _source: CharArray = CharArray(BATCH_SIZE)
+ charsBuffer: CharArray = CharArray(BATCH_SIZE)
) : AbstractJsonLexer() {
private var threshold: Int = DEFAULT_THRESHOLD // chars
- override var source: CharSequence = ArrayAsSequence(_source)
+ override val source: ArrayAsSequence = ArrayAsSequence(charsBuffer)
init {
preload(0)
@@ -65,22 +73,22 @@ internal class ReaderJsonLexer(
return false
}
- private fun preload(spaceLeft: Int) {
- val buffer = _source
- buffer.copyInto(buffer, 0, currentPosition, currentPosition + spaceLeft)
- var read = spaceLeft
- val sizeTotal = _source.size
- while (read != sizeTotal) {
- val actual = reader.read(buffer, read, sizeTotal - read)
+ private fun preload(unprocessedCount: Int) {
+ val buffer = source.buffer
+ if (unprocessedCount != 0) {
+ buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount)
+ }
+ var filledCount = unprocessedCount
+ val sizeTotal = source.length
+ while (filledCount != sizeTotal) {
+ val actual = reader.read(buffer, filledCount, sizeTotal - filledCount)
if (actual == -1) {
// EOF, resizing the array so it matches input size
- // Can also be done by extracting source.length to a separate var
- _source = _source.copyOf(read)
- source = ArrayAsSequence(_source)
+ source.trim(filledCount)
threshold = -1
break
}
- read += actual
+ filledCount += actual
}
currentPosition = 0
}
@@ -115,7 +123,7 @@ internal class ReaderJsonLexer(
override fun ensureHaveChars() {
val cur = currentPosition
- val oldSize = _source.size
+ val oldSize = source.length
val spaceLeft = oldSize - cur
if (spaceLeft > threshold) return
// warning: current position is not updated during string consumption
@@ -152,19 +160,19 @@ internal class ReaderJsonLexer(
}
override fun indexOf(char: Char, startPos: Int): Int {
- val src = _source
- for (i in startPos until src.size) {
+ val src = source
+ for (i in startPos until src.length) {
if (src[i] == char) return i
}
return -1
}
override fun substring(startPos: Int, endPos: Int): String {
- return _source.concatToString(startPos, endPos)
+ return source.substring(startPos, endPos)
}
override fun appendRange(fromIndex: Int, toIndex: Int) {
- escapedString.appendRange(_source, fromIndex, toIndex)
+ escapedString.appendRange(source.buffer, fromIndex, toIndex)
}
// Can be carefully implemented but postponed for now