comparison src/json.c @ 28339:0860b12c5788 v8.2.4695

patch 8.2.4695: JSON encoding could be faster Commit: https://github.com/vim/vim/commit/beb0ef1ab2dbd9760345e3e03647b93914591d56 Author: LemonBoy <thatlemon@gmail.com> Date: Tue Apr 5 15:07:32 2022 +0100 patch 8.2.4695: JSON encoding could be faster Problem: JSON encoding could be faster. Solution: Optimize encoding JSON strings. (closes https://github.com/vim/vim/issues/10086)
author Bram Moolenaar <Bram@vim.org>
date Tue, 05 Apr 2022 16:15:03 +0200
parents 62cc3b60493b
children 029c59bf78f1
comparison
equal deleted inserted replaced
28338:fcac4b4be0c3 28339:0860b12c5788
112 ga_clear(&ga); 112 ga_clear(&ga);
113 return lspga.ga_data; 113 return lspga.ga_data;
114 } 114 }
115 #endif 115 #endif
116 116
117 /*
118 * Lookup table to quickly know if the given ASCII character must be escaped.
119 */
120 static const char ascii_needs_escape[128] = {
121 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
123 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
129 };
130
131 /*
132 * Encode the utf-8 encoded string "str" into "gap".
133 */
117 static void 134 static void
118 write_string(garray_T *gap, char_u *str) 135 write_string(garray_T *gap, char_u *str)
119 { 136 {
120 char_u *res = str; 137 char_u *res = str;
121 char_u numbuf[NUMBUFLEN]; 138 char_u numbuf[NUMBUFLEN];
139 char_u *from;
140 #if defined(USE_ICONV)
141 vimconv_T conv;
142 char_u *converted = NULL;
143 #endif
144 int c;
122 145
123 if (res == NULL) 146 if (res == NULL)
147 {
124 ga_concat(gap, (char_u *)"\"\""); 148 ga_concat(gap, (char_u *)"\"\"");
125 else 149 return;
126 { 150 }
151
127 #if defined(USE_ICONV) 152 #if defined(USE_ICONV)
128 vimconv_T conv; 153 if (!enc_utf8)
129 char_u *converted = NULL; 154 {
130 155 // Convert the text from 'encoding' to utf-8, because a JSON string is
131 if (!enc_utf8) 156 // always utf-8.
157 conv.vc_type = CONV_NONE;
158 convert_setup(&conv, p_enc, (char_u*)"utf-8");
159 if (conv.vc_type != CONV_NONE)
160 converted = res = string_convert(&conv, res, NULL);
161 convert_setup(&conv, NULL, NULL);
162 }
163 #endif
164 ga_append(gap, '"');
165 // `from` is the beginning of a sequence of bytes we can directly copy from
166 // the input string, avoiding the overhead associated to decoding/encoding
167 // them.
168 from = res;
169 while ((c = *res) != NUL)
170 {
171 // always use utf-8 encoding, ignore 'encoding'
172 if (c < 0x80)
132 { 173 {
133 // Convert the text from 'encoding' to utf-8, the JSON string is 174 if (!ascii_needs_escape[c])
134 // always utf-8. 175 {
135 conv.vc_type = CONV_NONE; 176 res += 1;
136 convert_setup(&conv, p_enc, (char_u*)"utf-8"); 177 continue;
137 if (conv.vc_type != CONV_NONE) 178 }
138 converted = res = string_convert(&conv, res, NULL); 179
139 convert_setup(&conv, NULL, NULL); 180 if (res != from)
140 } 181 ga_concat_len(gap, from, res - from);
141 #endif 182 from = res + 1;
142 ga_append(gap, '"');
143 while (*res != NUL)
144 {
145 int c;
146 // always use utf-8 encoding, ignore 'encoding'
147 c = utf_ptr2char(res);
148 183
149 switch (c) 184 switch (c)
150 { 185 {
151 case 0x08: 186 case 0x08:
152 ga_append(gap, '\\'); ga_append(gap, 'b'); break; 187 ga_append(gap, '\\'); ga_append(gap, 'b'); break;
162 case 0x5c: // backslash 197 case 0x5c: // backslash
163 ga_append(gap, '\\'); 198 ga_append(gap, '\\');
164 ga_append(gap, c); 199 ga_append(gap, c);
165 break; 200 break;
166 default: 201 default:
167 if (c >= 0x20) 202 vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
168 { 203 (long)c);
169 numbuf[utf_char2bytes(c, numbuf)] = NUL; 204 ga_concat(gap, numbuf);
170 ga_concat(gap, numbuf); 205 }
171 } 206
172 else 207 res += 1;
173 {
174 vim_snprintf((char *)numbuf, NUMBUFLEN,
175 "\\u%04lx", (long)c);
176 ga_concat(gap, numbuf);
177 }
178 }
179 res += utf_ptr2len(res);
180 } 208 }
181 ga_append(gap, '"'); 209 else
210 {
211 int l = utf_ptr2len(res);
212
213 if (l > 1)
214 {
215 res += l;
216 continue;
217 }
218
219 // Invalid utf-8 sequence, replace it with the Unicode replacement
220 // character U+FFFD.
221 if (res != from)
222 ga_concat_len(gap, from, res - from);
223 from = res + 1;
224
225 numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
226 ga_concat(gap, numbuf);
227
228 res += l;
229 }
230 }
231
232 if (res != from)
233 ga_concat_len(gap, from, res - from);
234
235 ga_append(gap, '"');
182 #if defined(USE_ICONV) 236 #if defined(USE_ICONV)
183 vim_free(converted); 237 vim_free(converted);
184 #endif 238 #endif
185 }
186 } 239 }
187 240
188 /* 241 /*
189 * Return TRUE if "key" can be used without quotes. 242 * Return TRUE if "key" can be used without quotes.
190 * That is when it starts with a letter and only contains letters, digits and 243 * That is when it starts with a letter and only contains letters, digits and