summaryrefslogtreecommitdiff
path: root/formats/json/commonMain/src/kotlinx/serialization/json/internal/lexer/StringJsonLexer.kt
blob: 9f2e519020c2cc902c18f4be35a7770dec546d31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
 * Copyright 2017-2021 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
 */

package kotlinx.serialization.json.internal

internal class StringJsonLexer(override val source: String) : AbstractJsonLexer() {

    override fun prefetchOrEof(position: Int): Int = if (position < source.length) position else -1

    override fun consumeNextToken(): Byte {
        val source = source
        while (currentPosition != -1 && currentPosition < source.length) {
            val ch = source[currentPosition++]
            return when (val tc = charToTokenClass(ch)) {
                TC_WHITESPACE -> continue
                else -> tc
            }
        }
        return TC_EOF
    }

    override fun tryConsumeComma(): Boolean {
        val current = skipWhitespaces()
        if (current == source.length || current == -1) return false
        if (source[current] == ',') {
            ++currentPosition
            return true
        }
        return false
    }

    override fun canConsumeValue(): Boolean {
        var current = currentPosition
        if (current == -1) return false
        while (current < source.length) {
            val c = source[current]
            // Inlined skipWhitespaces without field spill and nested loop. Also faster then char2TokenClass
            if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
                ++current
                continue
            }
            currentPosition = current
            return isValidValueStart(c)
        }
        currentPosition = current
        return false
    }

    override fun skipWhitespaces(): Int {
        var current = currentPosition
        if (current == -1) return current
        // Skip whitespaces
        while (current < source.length) {
            val c = source[current]
            // Faster than char2TokenClass actually
            if (c == ' ' || c == '\n' || c == '\r' || c == '\t') {
                ++current
            } else {
                break
            }
        }
        currentPosition = current
        return current
    }

    override fun consumeNextToken(expected: Char) {
        if (currentPosition == -1) unexpectedToken(expected)
        val source = source
        while (currentPosition < source.length) {
            val c = source[currentPosition++]
            if (c == ' ' || c == '\n' || c == '\r' || c == '\t') continue
            if (c == expected) return
            unexpectedToken(expected)
        }
        currentPosition = -1 // for correct EOF reporting
        unexpectedToken(expected) // EOF
    }

    override fun consumeKeyString(): String {
        /*
         * For strings we assume that escaped symbols are rather an exception, so firstly
         * we optimistically scan for closing quote via intrinsified and blazing-fast 'indexOf',
         * than do our pessimistic check for backslash and fallback to slow-path if necessary.
         */
        consumeNextToken(STRING)
        val current = currentPosition
        val closingQuote = source.indexOf('"', current)
        if (closingQuote == -1) {
            // advance currentPosition to a token after the end of the string to guess position in the error msg
            // (not always correct, as `:`/`,` are valid contents of the string, but good guess anyway)
            consumeStringLenient()
            fail(TC_STRING, wasConsumed = false)
        }
        // Now we _optimistically_ know where the string ends (it might have been an escaped quote)
        for (i in current until closingQuote) {
            // Encountered escape sequence, should fallback to "slow" path and symbolic scanning
            if (source[i] == STRING_ESC) {
                return consumeString(source, currentPosition, i)
            }
        }
        this.currentPosition = closingQuote + 1
        return source.substring(current, closingQuote)
    }

    override fun consumeStringChunked(isLenient: Boolean, consumeChunk: (stringChunk: String) -> Unit) {
        (if (isLenient) consumeStringLenient() else consumeString()).chunked(BATCH_SIZE).forEach(consumeChunk)
    }

    override fun peekLeadingMatchingValue(keyToMatch: String, isLenient: Boolean): String? {
        val positionSnapshot = currentPosition
        try {
            if (consumeNextToken() != TC_BEGIN_OBJ) return null // Malformed JSON, bailout
            val firstKey = peekString(isLenient)
            if (firstKey != keyToMatch) return null
            discardPeeked() // consume firstKey
            if (consumeNextToken() != TC_COLON) return null
            return peekString(isLenient)
        } finally {
            // Restore the position
            currentPosition = positionSnapshot
            discardPeeked()
        }
    }
}