changeset 28339:0860b12c5788 v8.2.4695

patch 8.2.4695: JSON encoding could be faster Commit: https://github.com/vim/vim/commit/beb0ef1ab2dbd9760345e3e03647b93914591d56 Author: LemonBoy <thatlemon@gmail.com> Date: Tue Apr 5 15:07:32 2022 +0100 patch 8.2.4695: JSON encoding could be faster Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes https://github.com/vim/vim/issues/10086)
author Bram Moolenaar <Bram@vim.org>
date Tue, 05 Apr 2022 16:15:03 +0200
parents fcac4b4be0c3
children e2084b13a5c3
files src/json.c src/testdir/test_json.vim src/version.c
diffstat 3 files changed, 94 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/src/json.c
+++ b/src/json.c
@@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val)
 }
 #endif
 
+/*
+ * Lookup table to quickly know if the given ASCII character must be escaped.
+ */
+static const char ascii_needs_escape[128] = {
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+    0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+};
+
+/*
+ * Encode the utf-8 encoded string "str" into "gap".
+ */
     static void
 write_string(garray_T *gap, char_u *str)
 {
     char_u	*res = str;
     char_u	numbuf[NUMBUFLEN];
+    char_u	*from;
+#if defined(USE_ICONV)
+    vimconv_T   conv;
+    char_u	*converted = NULL;
+#endif
+    int		c;
 
     if (res == NULL)
+    {
 	ga_concat(gap, (char_u *)"\"\"");
-    else
-    {
+	return;
+    }
+
 #if defined(USE_ICONV)
-	vimconv_T   conv;
-	char_u	    *converted = NULL;
-
-	if (!enc_utf8)
+    if (!enc_utf8)
+    {
+	// Convert the text from 'encoding' to utf-8, because a JSON string is
+	// always utf-8.
+	conv.vc_type = CONV_NONE;
+	convert_setup(&conv, p_enc, (char_u*)"utf-8");
+	if (conv.vc_type != CONV_NONE)
+	    converted = res = string_convert(&conv, res, NULL);
+	convert_setup(&conv, NULL, NULL);
+    }
+#endif
+    ga_append(gap, '"');
+    // `from` is the beginning of a sequence of bytes we can directly copy from
+    // the input string, avoiding the overhead associated to decoding/encoding
+    // them.
+    from = res;
+    while ((c = *res) != NUL)
+    {
+	// always use utf-8 encoding, ignore 'encoding'
+	if (c < 0x80)
 	{
-	    // Convert the text from 'encoding' to utf-8, the JSON string is
-	    // always utf-8.
-	    conv.vc_type = CONV_NONE;
-	    convert_setup(&conv, p_enc, (char_u*)"utf-8");
-	    if (conv.vc_type != CONV_NONE)
-		converted = res = string_convert(&conv, res, NULL);
-	    convert_setup(&conv, NULL, NULL);
-	}
-#endif
-	ga_append(gap, '"');
-	while (*res != NUL)
-	{
-	    int c;
-	    // always use utf-8 encoding, ignore 'encoding'
-	    c = utf_ptr2char(res);
+	    if (!ascii_needs_escape[c])
+	    {
+		res += 1;
+		continue;
+	    }
+
+	    if (res != from)
+		ga_concat_len(gap, from, res - from);
+	    from = res + 1;
 
 	    switch (c)
 	    {
@@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str)
 		    ga_append(gap, c);
 		    break;
 		default:
-		    if (c >= 0x20)
-		    {
-			numbuf[utf_char2bytes(c, numbuf)] = NUL;
-			ga_concat(gap, numbuf);
-		    }
-		    else
-		    {
-			vim_snprintf((char *)numbuf, NUMBUFLEN,
-							 "\\u%04lx", (long)c);
-			ga_concat(gap, numbuf);
-		    }
+		    vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
+								      (long)c);
+		    ga_concat(gap, numbuf);
+	    }
+
+	    res += 1;
+	}
+	else
+	{
+	    int l = utf_ptr2len(res);
+
+	    if (l > 1)
+	    {
+		res += l;
+		continue;
 	    }
-	    res += utf_ptr2len(res);
+
+	    // Invalid utf-8 sequence, replace it with the Unicode replacement
+	    // character U+FFFD.
+	    if (res != from)
+		ga_concat_len(gap, from, res - from);
+	    from = res + 1;
+
+	    numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
+	    ga_concat(gap, numbuf);
+
+	    res += l;
 	}
-	ga_append(gap, '"');
+    }
+
+    if (res != from)
+	ga_concat_len(gap, from, res - from);
+
+    ga_append(gap, '"');
 #if defined(USE_ICONV)
-	vim_free(converted);
+    vim_free(converted);
 #endif
-    }
 }
 
 /*
--- a/src/testdir/test_json.vim
+++ b/src/testdir/test_json.vim
@@ -107,6 +107,9 @@ func Test_json_encode()
   call assert_equal('"café"', json_encode("caf\xe9"))
   let &encoding = save_encoding
 
+  " Invalid utf-8 sequences are replaced with U+FFFD (replacement character)
+  call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB"))
+
   call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func')
   call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func')
 
--- a/src/version.c
+++ b/src/version.c
@@ -747,6 +747,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    4695,
+/**/
     4694,
 /**/
     4693,