Mercurial > vim
changeset 8283:b8a56d4d83e0 v7.4.1434
commit https://github.com/vim/vim/commit/b6ff81188d27fae774d9ad2dfb498f596d697d4b
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Feb 27 18:41:27 2016 +0100
patch 7.4.1434
Problem: JSON encoding doesn't hanel surrogate pair.
Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sat, 27 Feb 2016 18:45:03 +0100 |
parents | 979b6b144d70 |
children | ded553b5751a |
files | src/json.c src/testdir/test_json.vim src/version.c |
diffstat | 3 files changed, 79 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/json.c +++ b/src/json.c @@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str) ga_concat(gap, (char_u *)"null"); else { +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; + + convert_setup(&conv, p_enc, (char_u*)"utf-8"); + if (conv.vc_type != CONV_NONE) + converted = res = string_convert(&conv, res, NULL); + convert_setup(&conv, NULL, NULL); +#endif + ga_append(gap, '"'); while (*res != NUL) { - int c = PTR2CHAR(res); + int c; +#ifdef FEAT_MBYTE + /* always use utf-8 encoding, ignore 'encoding' */ + c = utf_ptr2char(res); +#else + c = (int)*(p); +#endif switch (c) { @@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str) if (c >= 0x20) { #ifdef FEAT_MBYTE - numbuf[mb_char2bytes(c, numbuf)] = NUL; + numbuf[utf_char2bytes(c, numbuf)] = NUL; #else numbuf[0] = c; numbuf[1] = NUL; @@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str) ga_concat(gap, numbuf); } } - mb_cptr_adv(res); +#ifdef FEAT_MBYTE + res += utf_ptr2len(res); +#else + ++p; +#endif } ga_append(gap, '"'); +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif } } @@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, ty int c; long nr; char_u buf[NUMBUFLEN]; +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vimconv_T conv; + char_u *converted = NULL; +#endif if (res != NULL) ga_init2(&ga, 1, 200); p = reader->js_buf + reader->js_used + 1; /* skip over " */ +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + convert_setup(&conv, (char_u*)"utf-8", p_enc); + if (conv.vc_type != CONV_NONE) + converted = p = string_convert(&conv, p, NULL); + convert_setup(&conv, NULL, NULL); +#endif while (*p != '"') { if (*p == NUL || p[1] == NUL @@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, ty + STRLEN(reader->js_buf); } } + nr = 0; + len = 0; vim_str2nr(p + 2, NULL, &len, STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); p += len + 2; + if (0xd800 <= nr && nr <= 0xdfff + && (int)(reader->js_end - p) >= 6 + && *p == '\\' && *(p+1) == 'u') + { + long nr2 = 0; + + /* decode surrogate pair: \ud812\u3456 */ + len = 0; + vim_str2nr(p + 2, NULL, &len, + STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4); + if (0xdc00 <= nr2 && nr2 <= 0xdfff) + { + p += len + 2; + nr = (((nr - 0xd800) << 10) | + ((nr2 - 0xdc00) & 0x3ff)) + 0x10000; + } + } if (res != NULL) { #ifdef FEAT_MBYTE - buf[(*mb_char2bytes)((int)nr, buf)] = NUL; + buf[utf_char2bytes((int)nr, buf)] = NUL; ga_concat(&ga, buf); #else ga_append(&ga, nr); @@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, ty } else { - len = MB_PTR2LEN(p); +#ifdef FEAT_MBYTE + len = utf_ptr2len(p); +#else + len = 1; +#endif if (res != NULL) { if (ga_grow(&ga, len) == FAIL) { ga_clear(&ga); +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif return FAIL; } mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); @@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, ty p += len; } } +#if defined(FEAT_MBYTE) && defined(USE_ICONV) + vim_free(converted); +#endif reader->js_used = (int)(p - reader->js_buf); if (*p == '"')
--- a/src/testdir/test_json.vim +++ b/src/testdir/test_json.vim @@ -12,6 +12,12 @@ let s:var4 = "\x10\x11\x12\x13\x14\x15\x let s:json5 = '"\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"' let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +" surrogate pair +let s:jsonsp1 = '"\ud83c\udf63"' +let s:varsp1 = "\xf0\x9f\x8d\xa3" +let s:jsonsp2 = '"\ud83c\u00a0"' +let s:varsp2 = "\ud83c\u00a0" + let s:jsonmb = '"s¢cĴgё"' let s:varmb = "s¢cĴgё" let s:jsonnr = '1234' @@ -69,6 +75,8 @@ func Test_json_encode() if has('multi_byte') call assert_equal(s:jsonmb, json_encode(s:varmb)) + call assert_equal(s:varsp1, json_decode(s:jsonsp1)) + call assert_equal(s:varsp2, json_decode(s:jsonsp2)) endif call assert_equal(s:jsonnr, json_encode(s:varnr)) @@ -105,6 +113,8 @@ func Test_json_decode() if has('multi_byte') call assert_equal(s:varmb, json_decode(s:jsonmb)) + call assert_equal(s:varsp1, js_decode(s:jsonsp1)) + call assert_equal(s:varsp2, js_decode(s:jsonsp2)) endif call assert_equal(s:varnr, json_decode(s:jsonnr))