# HG changeset patch # User Bram Moolenaar # Date 1649168103 -7200 # Node ID 0860b12c578856581e2b3ad785b1361c654ce0e2 # Parent fcac4b4be0c34add5ac696bcbdb9d45cb2f577a0 patch 8.2.4695: JSON encoding could be faster Commit: https://github.com/vim/vim/commit/beb0ef1ab2dbd9760345e3e03647b93914591d56 Author: LemonBoy Date: Tue Apr 5 15:07:32 2022 +0100 patch 8.2.4695: JSON encoding could be faster Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes https://github.com/vim/vim/issues/10086) diff --git a/src/json.c b/src/json.c --- a/src/json.c +++ b/src/json.c @@ -114,37 +114,72 @@ json_encode_lsp_msg(typval_T *val) } #endif +/* + * Lookup table to quickly know if the given ASCII character must be escaped. + */ +static const char ascii_needs_escape[128] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0. + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1. + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. +}; + +/* + * Encode the utf-8 encoded string "str" into "gap". + */ static void write_string(garray_T *gap, char_u *str) { char_u *res = str; char_u numbuf[NUMBUFLEN]; + char_u *from; +#if defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; +#endif + int c; if (res == NULL) + { ga_concat(gap, (char_u *)"\"\""); - else - { + return; + } + #if defined(USE_ICONV) - vimconv_T conv; - char_u *converted = NULL; - - if (!enc_utf8) + if (!enc_utf8) + { + // Convert the text from 'encoding' to utf-8, because a JSON string is + // always utf-8. + conv.vc_type = CONV_NONE; + convert_setup(&conv, p_enc, (char_u*)"utf-8"); + if (conv.vc_type != CONV_NONE) + converted = res = string_convert(&conv, res, NULL); + convert_setup(&conv, NULL, NULL); + } +#endif + ga_append(gap, '"'); + // `from` is the beginning of a sequence of bytes we can directly copy from + // the input string, avoiding the overhead associated to decoding/encoding + // them. + from = res; + while ((c = *res) != NUL) + { + // always use utf-8 encoding, ignore 'encoding' + if (c < 0x80) { - // Convert the text from 'encoding' to utf-8, the JSON string is - // always utf-8. - conv.vc_type = CONV_NONE; - convert_setup(&conv, p_enc, (char_u*)"utf-8"); - if (conv.vc_type != CONV_NONE) - converted = res = string_convert(&conv, res, NULL); - convert_setup(&conv, NULL, NULL); - } -#endif - ga_append(gap, '"'); - while (*res != NUL) - { - int c; - // always use utf-8 encoding, ignore 'encoding' - c = utf_ptr2char(res); + if (!ascii_needs_escape[c]) + { + res += 1; + continue; + } + + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; switch (c) { @@ -164,25 +199,43 @@ write_string(garray_T *gap, char_u *str) ga_append(gap, c); break; default: - if (c >= 0x20) - { - numbuf[utf_char2bytes(c, numbuf)] = NUL; - ga_concat(gap, numbuf); - } - else - { - vim_snprintf((char *)numbuf, NUMBUFLEN, - "\\u%04lx", (long)c); - ga_concat(gap, numbuf); - } + vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", + (long)c); + ga_concat(gap, numbuf); + } + + res += 1; + } + else + { + int l = utf_ptr2len(res); + + if (l > 1) + { + res += l; + continue; } - res += utf_ptr2len(res); + + // Invalid utf-8 sequence, replace it with the Unicode replacement + // character U+FFFD. + if (res != from) + ga_concat_len(gap, from, res - from); + from = res + 1; + + numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL; + ga_concat(gap, numbuf); + + res += l; } - ga_append(gap, '"'); + } + + if (res != from) + ga_concat_len(gap, from, res - from); + + ga_append(gap, '"'); #if defined(USE_ICONV) - vim_free(converted); + vim_free(converted); #endif - } } /* diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim --- a/src/testdir/test_json.vim +++ b/src/testdir/test_json.vim @@ -107,6 +107,9 @@ func Test_json_encode() call assert_equal('"café"', json_encode("caf\xe9")) let &encoding = save_encoding + " Invalid utf-8 sequences are replaced with U+FFFD (replacement character) + call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB")) + call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json encode a func') call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json encode a func') diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -747,6 +747,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 4695, +/**/ 4694, /**/ 4693,