summaryrefslogtreecommitdiff
path: root/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt
diff options
context:
space:
mode:
Diffstat (limited to 'formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt')
-rw-r--r--formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt136
1 files changed, 136 insertions, 0 deletions
diff --git a/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt
new file mode 100644
index 00000000..56667248
--- /dev/null
+++ b/formats/json/jvmMain/src/kotlinx/serialization/json/internal/JsonToStringWriter.kt
@@ -0,0 +1,136 @@
+package kotlinx.serialization.json.internal
+
+/**
+ * Optimized version of StringBuilder that is specific to JSON-encoding.
+ *
+ * ## Implementation note
+ *
+ * In order to encode a single string, it should be processed symbol-per-symbol,
+ * in order to detect and escape unicode symbols.
+ *
+ * Doing naively, it drastically slows down strings processing due to factors:
+ * * Byte-by-byte copying that does not leverage optimized array copying
+ * * A lot of range and flags checks due to Java's compact strings
+ *
+ * The following technique is used:
+ * 1) Instead of storing intermediate result in `StringBuilder`, we store it in
+ * `CharArray` directly, skipping compact strings checks in `StringBuilder`
+ * 2) Instead of copying symbols one-by-one, we optimistically copy it in batch using
+ * optimized and intrinsified `string.toCharArray(destination)`.
+ * It copies the content by up-to 8 times faster.
+ * Then we iterate over the char-array and execute single check over
+ * each character that is easily unrolled and vectorized by the inliner.
+ * If escape character is found, we fallback to per-symbol processing.
+ *
+ * 3) We pool char arrays in order to save excess resizes, allocations
+ * and nulls-out of arrays.
+ */
+internal actual class JsonToStringWriter : InternalJsonWriter {
+ private var array: CharArray = CharArrayPool.take()
+ private var size = 0
+
+ actual override fun writeLong(value: Long) {
+ // Can be hand-rolled, but requires a lot of code and corner-cases handling
+ write(value.toString())
+ }
+
+ actual override fun writeChar(char: Char) {
+ ensureAdditionalCapacity(1)
+ array[size++] = char
+ }
+
+ actual override fun write(text: String) {
+ val length = text.length
+ if (length == 0) return
+ ensureAdditionalCapacity(length)
+ text.toCharArray(array, size, 0, text.length)
+ size += length
+ }
+
+ actual override fun writeQuoted(text: String) {
+ ensureAdditionalCapacity(text.length + 2)
+ val arr = array
+ var sz = size
+ arr[sz++] = '"'
+ val length = text.length
+ text.toCharArray(arr, sz, 0, length)
+ for (i in sz until sz + length) {
+ val ch = arr[i].code
+ // Do we have unescaped symbols?
+ if (ch < ESCAPE_MARKERS.size && ESCAPE_MARKERS[ch] != 0.toByte()) {
+ // Go to slow path
+ return appendStringSlowPath(i - sz, i, text)
+ }
+ }
+ // Update the state
+ // Capacity is not ensured because we didn't hit the slow path and thus guessed it properly in the beginning
+ sz += length
+ arr[sz++] = '"'
+ size = sz
+ }
+
+ private fun appendStringSlowPath(firstEscapedChar: Int, currentSize: Int, string: String) {
+ var sz = currentSize
+ for (i in firstEscapedChar until string.length) {
+ /*
+ * We ar already on slow path and haven't guessed the capacity properly.
+ * Reserve +2 for backslash-escaped symbols on each iteration
+ */
+ sz = ensureTotalCapacity(sz, 2)
+ val ch = string[i].code
+ // Do we have unescaped symbols?
+ if (ch < ESCAPE_MARKERS.size) {
+ /*
+ * Escape markers are populated for backslash-escaped symbols.
+ * E.g. ESCAPE_MARKERS['\b'] == 'b'.toByte()
+ * Everything else is populated with either zeros (no escapes)
+ * or ones (unicode escape)
+ */
+ when (val marker = ESCAPE_MARKERS[ch]) {
+ 0.toByte() -> {
+ array[sz++] = ch.toChar()
+ }
+ 1.toByte() -> {
+ val escapedString = ESCAPE_STRINGS[ch]!!
+ sz = ensureTotalCapacity(sz, escapedString.length)
+ escapedString.toCharArray(array, sz, 0, escapedString.length)
+ sz += escapedString.length
+ size = sz // Update size so the next resize will take it into account
+ }
+ else -> {
+ array[sz] = '\\'
+ array[sz + 1] = marker.toInt().toChar()
+ sz += 2
+ size = sz // Update size so the next resize will take it into account
+ }
+ }
+ } else {
+ array[sz++] = ch.toChar()
+ }
+ }
+ sz = ensureTotalCapacity(sz, 1)
+ array[sz++] = '"'
+ size = sz
+ }
+
+ actual override fun release() {
+ CharArrayPool.release(array)
+ }
+
+ actual override fun toString(): String {
+ return String(array, 0, size)
+ }
+
+ private fun ensureAdditionalCapacity(expected: Int) {
+ ensureTotalCapacity(size, expected)
+ }
+
+ // Old size is passed and returned separately to avoid excessive [size] field read
+ private fun ensureTotalCapacity(oldSize: Int, additional: Int): Int {
+ val newSize = oldSize + additional
+ if (array.size <= newSize) {
+ array = array.copyOf(newSize.coerceAtLeast(oldSize * 2))
+ }
+ return oldSize
+ }
+}