Mercurial > vim
comparison src/json.c @ 28339:0860b12c5788 v8.2.4695
patch 8.2.4695: JSON encoding could be faster
Commit: https://github.com/vim/vim/commit/beb0ef1ab2dbd9760345e3e03647b93914591d56
Author: LemonBoy <thatlemon@gmail.com>
Date: Tue Apr 5 15:07:32 2022 +0100
patch 8.2.4695: JSON encoding could be faster
Problem: JSON encoding could be faster.
Solution: Optimize encoding JSON strings. (closes https://github.com/vim/vim/issues/10086)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Tue, 05 Apr 2022 16:15:03 +0200 |
parents | 62cc3b60493b |
children | 029c59bf78f1 |
comparison
equal
deleted
inserted
replaced
28338:fcac4b4be0c3 | 28339:0860b12c5788 |
---|---|
112 ga_clear(&ga); | 112 ga_clear(&ga); |
113 return lspga.ga_data; | 113 return lspga.ga_data; |
114 } | 114 } |
115 #endif | 115 #endif |
116 | 116 |
117 /* | |
118 * Lookup table to quickly know if the given ASCII character must be escaped. | |
119 */ | |
120 static const char ascii_needs_escape[128] = { | |
121 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0. | |
122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1. | |
123 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2. | |
124 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3. | |
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. | |
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5. | |
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6. | |
128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. | |
129 }; | |
130 | |
131 /* | |
132 * Encode the utf-8 encoded string "str" into "gap". | |
133 */ | |
117 static void | 134 static void |
118 write_string(garray_T *gap, char_u *str) | 135 write_string(garray_T *gap, char_u *str) |
119 { | 136 { |
120 char_u *res = str; | 137 char_u *res = str; |
121 char_u numbuf[NUMBUFLEN]; | 138 char_u numbuf[NUMBUFLEN]; |
139 char_u *from; | |
140 #if defined(USE_ICONV) | |
141 vimconv_T conv; | |
142 char_u *converted = NULL; | |
143 #endif | |
144 int c; | |
122 | 145 |
123 if (res == NULL) | 146 if (res == NULL) |
147 { | |
124 ga_concat(gap, (char_u *)"\"\""); | 148 ga_concat(gap, (char_u *)"\"\""); |
125 else | 149 return; |
126 { | 150 } |
151 | |
127 #if defined(USE_ICONV) | 152 #if defined(USE_ICONV) |
128 vimconv_T conv; | 153 if (!enc_utf8) |
129 char_u *converted = NULL; | 154 { |
130 | 155 // Convert the text from 'encoding' to utf-8, because a JSON string is |
131 if (!enc_utf8) | 156 // always utf-8. |
157 conv.vc_type = CONV_NONE; | |
158 convert_setup(&conv, p_enc, (char_u*)"utf-8"); | |
159 if (conv.vc_type != CONV_NONE) | |
160 converted = res = string_convert(&conv, res, NULL); | |
161 convert_setup(&conv, NULL, NULL); | |
162 } | |
163 #endif | |
164 ga_append(gap, '"'); | |
165 // `from` is the beginning of a sequence of bytes we can directly copy from | |
166 // the input string, avoiding the overhead associated to decoding/encoding | |
167 // them. | |
168 from = res; | |
169 while ((c = *res) != NUL) | |
170 { | |
171 // always use utf-8 encoding, ignore 'encoding' | |
172 if (c < 0x80) | |
132 { | 173 { |
133 // Convert the text from 'encoding' to utf-8, the JSON string is | 174 if (!ascii_needs_escape[c]) |
134 // always utf-8. | 175 { |
135 conv.vc_type = CONV_NONE; | 176 res += 1; |
136 convert_setup(&conv, p_enc, (char_u*)"utf-8"); | 177 continue; |
137 if (conv.vc_type != CONV_NONE) | 178 } |
138 converted = res = string_convert(&conv, res, NULL); | 179 |
139 convert_setup(&conv, NULL, NULL); | 180 if (res != from) |
140 } | 181 ga_concat_len(gap, from, res - from); |
141 #endif | 182 from = res + 1; |
142 ga_append(gap, '"'); | |
143 while (*res != NUL) | |
144 { | |
145 int c; | |
146 // always use utf-8 encoding, ignore 'encoding' | |
147 c = utf_ptr2char(res); | |
148 | 183 |
149 switch (c) | 184 switch (c) |
150 { | 185 { |
151 case 0x08: | 186 case 0x08: |
152 ga_append(gap, '\\'); ga_append(gap, 'b'); break; | 187 ga_append(gap, '\\'); ga_append(gap, 'b'); break; |
162 case 0x5c: // backslash | 197 case 0x5c: // backslash |
163 ga_append(gap, '\\'); | 198 ga_append(gap, '\\'); |
164 ga_append(gap, c); | 199 ga_append(gap, c); |
165 break; | 200 break; |
166 default: | 201 default: |
167 if (c >= 0x20) | 202 vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx", |
168 { | 203 (long)c); |
169 numbuf[utf_char2bytes(c, numbuf)] = NUL; | 204 ga_concat(gap, numbuf); |
170 ga_concat(gap, numbuf); | 205 } |
171 } | 206 |
172 else | 207 res += 1; |
173 { | |
174 vim_snprintf((char *)numbuf, NUMBUFLEN, | |
175 "\\u%04lx", (long)c); | |
176 ga_concat(gap, numbuf); | |
177 } | |
178 } | |
179 res += utf_ptr2len(res); | |
180 } | 208 } |
181 ga_append(gap, '"'); | 209 else |
210 { | |
211 int l = utf_ptr2len(res); | |
212 | |
213 if (l > 1) | |
214 { | |
215 res += l; | |
216 continue; | |
217 } | |
218 | |
219 // Invalid utf-8 sequence, replace it with the Unicode replacement | |
220 // character U+FFFD. | |
221 if (res != from) | |
222 ga_concat_len(gap, from, res - from); | |
223 from = res + 1; | |
224 | |
225 numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL; | |
226 ga_concat(gap, numbuf); | |
227 | |
228 res += l; | |
229 } | |
230 } | |
231 | |
232 if (res != from) | |
233 ga_concat_len(gap, from, res - from); | |
234 | |
235 ga_append(gap, '"'); | |
182 #if defined(USE_ICONV) | 236 #if defined(USE_ICONV) |
183 vim_free(converted); | 237 vim_free(converted); |
184 #endif | 238 #endif |
185 } | |
186 } | 239 } |
187 | 240 |
188 /* | 241 /* |
189 * Return TRUE if "key" can be used without quotes. | 242 * Return TRUE if "key" can be used without quotes. |
190 * That is when it starts with a letter and only contains letters, digits and | 243 * That is when it starts with a letter and only contains letters, digits and |