summaryrefslogtreecommitdiff
path: root/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt
diff options
context:
space:
mode:
Diffstat (limited to 'formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt')
-rw-r--r--formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt184
1 files changed, 184 insertions, 0 deletions
diff --git a/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt
new file mode 100644
index 00000000..24e5b472
--- /dev/null
+++ b/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/ReaderJsonLexer.kt
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
+ */
+
+package kotlinx.serialization.json.internal
+
+internal const val BATCH_SIZE: Int = 16 * 1024
+private const val DEFAULT_THRESHOLD = 128
+
+/**
+ * For some reason this hand-rolled implementation is faster than
+ * fun ArrayAsSequence(s: CharArray): CharSequence = java.nio.CharBuffer.wrap(s, 0, length)
+ */
+internal class ArrayAsSequence(internal val buffer: CharArray) : CharSequence {
+ override var length: Int = buffer.size
+
+ override fun get(index: Int): Char = buffer[index]
+
+ override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
+ return buffer.concatToString(startIndex, minOf(endIndex, length))
+ }
+
+ fun substring(startIndex: Int, endIndex: Int): String {
+ return buffer.concatToString(startIndex, minOf(endIndex, length))
+ }
+
+ fun trim(newSize: Int) {
+ length = minOf(buffer.size, newSize)
+ }
+
+ // source.toString() is used in JsonDecodingException
+ override fun toString(): String = substring(0, length)
+}
+
+internal class ReaderJsonLexer(
+ private val reader: InternalJsonReader,
+ private val buffer: CharArray = CharArrayPoolBatchSize.take()
+) : AbstractJsonLexer() {
+ private var threshold: Int = DEFAULT_THRESHOLD // chars
+
+ override val source: ArrayAsSequence = ArrayAsSequence(buffer)
+
+ init {
+ preload(0)
+ }
+
+ override fun tryConsumeComma(): Boolean {
+ val current = skipWhitespaces()
+ if (current >= source.length || current == -1) return false
+ if (source[current] == ',') {
+ ++currentPosition
+ return true
+ }
+ return false
+ }
+
+ override fun canConsumeValue(): Boolean {
+ ensureHaveChars()
+ var current = currentPosition
+ while (true) {
+ current = prefetchOrEof(current)
+ if (current == -1) break // could be inline function but KT-1436
+ val c = source[current]
+ // Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
+ if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
+ ++current
+ continue
+ }
+ currentPosition = current
+ return isValidValueStart(c)
+ }
+ currentPosition = current
+ return false
+ }
+
+ private fun preload(unprocessedCount: Int) {
+ val buffer = source.buffer
+ if (unprocessedCount != 0) {
+ buffer.copyInto(buffer, 0, currentPosition, currentPosition + unprocessedCount)
+ }
+ var filledCount = unprocessedCount
+ val sizeTotal = source.length
+ while (filledCount != sizeTotal) {
+ val actual = reader.read(buffer, filledCount, sizeTotal - filledCount)
+ if (actual == -1) {
+ // EOF, resizing the array so it matches input size
+ source.trim(filledCount)
+ threshold = -1
+ break
+ }
+ filledCount += actual
+ }
+ currentPosition = 0
+ }
+
+ override fun prefetchOrEof(position: Int): Int {
+ if (position < source.length) return position
+ currentPosition = position
+ ensureHaveChars()
+ if (currentPosition != 0 || source.isEmpty()) return -1 // if something was loaded, then it would be zero.
+ return 0
+ }
+
+ override fun consumeNextToken(): Byte {
+ ensureHaveChars()
+ val source = source
+ var cpos = currentPosition
+ while (true) {
+ cpos = prefetchOrEof(cpos)
+ if (cpos == -1) break
+ val ch = source[cpos++]
+ return when (val tc = charToTokenClass(ch)) {
+ TC_WHITESPACE -> continue
+ else -> {
+ currentPosition = cpos
+ tc
+ }
+ }
+ }
+ currentPosition = cpos
+ return TC_EOF
+ }
+
+ override fun ensureHaveChars() {
+ val cur = currentPosition
+ val oldSize = source.length
+ val spaceLeft = oldSize - cur
+ if (spaceLeft > threshold) return
+ // warning: current position is not updated during string consumption
+ // resizing
+ preload(spaceLeft)
+ }
+
+ override fun consumeKeyString(): String {
+ /*
+ * For strings we assume that escaped symbols are rather an exception, so firstly
+ * we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
+ * than do our pessimistic check for backslash and fallback to slow-path if necessary.
+ */
+ consumeNextToken(STRING)
+ var current = currentPosition
+ val closingQuote = indexOf('"', current)
+ if (closingQuote == -1) {
+ current = prefetchOrEof(current)
+ if (current == -1) fail(TC_STRING)
+ // it's also possible just to resize buffer,
+ // instead of falling back to slow path,
+ // not sure what is better
+ else return consumeString(source, currentPosition, current)
+ }
+ // Now we _optimistically_ know where the string ends (it might have been an escaped quote)
+ for (i in current until closingQuote) {
+ // Encountered escape sequence, should fallback to "slow" path and symmbolic scanning
+ if (source[i] == STRING_ESC) {
+ return consumeString(source, currentPosition, i)
+ }
+ }
+ this.currentPosition = closingQuote + 1
+ return substring(current, closingQuote)
+ }
+
+ override fun indexOf(char: Char, startPos: Int): Int {
+ val src = source
+ for (i in startPos until src.length) {
+ if (src[i] == char) return i
+ }
+ return -1
+ }
+
+ override fun substring(startPos: Int, endPos: Int): String {
+ return source.substring(startPos, endPos)
+ }
+
+ override fun appendRange(fromIndex: Int, toIndex: Int) {
+ escapedString.appendRange(source.buffer, fromIndex, toIndex)
+ }
+
+ // Can be carefully implemented but postponed for now
+ override fun peekLeadingMatchingValue(keyToMatch: String, isLenient: Boolean): String? = null
+
+ fun release() {
+ CharArrayPoolBatchSize.release(buffer)
+ }
+}