Mercurial > vim
comparison src/json.c @ 8283:b8a56d4d83e0 v7.4.1434
commit https://github.com/vim/vim/commit/b6ff81188d27fae774d9ad2dfb498f596d697d4b
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Feb 27 18:41:27 2016 +0100
patch 7.4.1434
Problem: JSON encoding doesn't hanel surrogate pair.
Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sat, 27 Feb 2016 18:45:03 +0100 |
parents | ff900e499f79 |
children | 6ae3fb4fe7c1 |
comparison
equal
deleted
inserted
replaced
8282:979b6b144d70 | 8283:b8a56d4d83e0 |
---|---|
95 | 95 |
96 if (res == NULL) | 96 if (res == NULL) |
97 ga_concat(gap, (char_u *)"null"); | 97 ga_concat(gap, (char_u *)"null"); |
98 else | 98 else |
99 { | 99 { |
100 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
101 vimconv_T conv; | |
102 char_u *converted = NULL; | |
103 | |
104 convert_setup(&conv, p_enc, (char_u*)"utf-8"); | |
105 if (conv.vc_type != CONV_NONE) | |
106 converted = res = string_convert(&conv, res, NULL); | |
107 convert_setup(&conv, NULL, NULL); | |
108 #endif | |
109 | |
100 ga_append(gap, '"'); | 110 ga_append(gap, '"'); |
101 while (*res != NUL) | 111 while (*res != NUL) |
102 { | 112 { |
103 int c = PTR2CHAR(res); | 113 int c; |
114 #ifdef FEAT_MBYTE | |
115 /* always use utf-8 encoding, ignore 'encoding' */ | |
116 c = utf_ptr2char(res); | |
117 #else | |
118 c = (int)*(p); | |
119 #endif | |
104 | 120 |
105 switch (c) | 121 switch (c) |
106 { | 122 { |
107 case 0x08: | 123 case 0x08: |
108 ga_append(gap, '\\'); ga_append(gap, 'b'); break; | 124 ga_append(gap, '\\'); ga_append(gap, 'b'); break; |
121 break; | 137 break; |
122 default: | 138 default: |
123 if (c >= 0x20) | 139 if (c >= 0x20) |
124 { | 140 { |
125 #ifdef FEAT_MBYTE | 141 #ifdef FEAT_MBYTE |
126 numbuf[mb_char2bytes(c, numbuf)] = NUL; | 142 numbuf[utf_char2bytes(c, numbuf)] = NUL; |
127 #else | 143 #else |
128 numbuf[0] = c; | 144 numbuf[0] = c; |
129 numbuf[1] = NUL; | 145 numbuf[1] = NUL; |
130 #endif | 146 #endif |
131 ga_concat(gap, numbuf); | 147 ga_concat(gap, numbuf); |
135 vim_snprintf((char *)numbuf, NUMBUFLEN, | 151 vim_snprintf((char *)numbuf, NUMBUFLEN, |
136 "\\u%04lx", (long)c); | 152 "\\u%04lx", (long)c); |
137 ga_concat(gap, numbuf); | 153 ga_concat(gap, numbuf); |
138 } | 154 } |
139 } | 155 } |
140 mb_cptr_adv(res); | 156 #ifdef FEAT_MBYTE |
157 res += utf_ptr2len(res); | |
158 #else | |
159 ++p; | |
160 #endif | |
141 } | 161 } |
142 ga_append(gap, '"'); | 162 ga_append(gap, '"'); |
163 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
164 vim_free(converted); | |
165 #endif | |
143 } | 166 } |
144 } | 167 } |
145 | 168 |
146 /* | 169 /* |
147 * Return TRUE if "key" can be used without quotes. | 170 * Return TRUE if "key" can be used without quotes. |
523 int len; | 546 int len; |
524 char_u *p; | 547 char_u *p; |
525 int c; | 548 int c; |
526 long nr; | 549 long nr; |
527 char_u buf[NUMBUFLEN]; | 550 char_u buf[NUMBUFLEN]; |
551 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
552 vimconv_T conv; | |
553 char_u *converted = NULL; | |
554 #endif | |
528 | 555 |
529 if (res != NULL) | 556 if (res != NULL) |
530 ga_init2(&ga, 1, 200); | 557 ga_init2(&ga, 1, 200); |
531 | 558 |
532 p = reader->js_buf + reader->js_used + 1; /* skip over " */ | 559 p = reader->js_buf + reader->js_used + 1; /* skip over " */ |
560 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
561 convert_setup(&conv, (char_u*)"utf-8", p_enc); | |
562 if (conv.vc_type != CONV_NONE) | |
563 converted = p = string_convert(&conv, p, NULL); | |
564 convert_setup(&conv, NULL, NULL); | |
565 #endif | |
533 while (*p != '"') | 566 while (*p != '"') |
534 { | 567 { |
535 if (*p == NUL || p[1] == NUL | 568 if (*p == NUL || p[1] == NUL |
536 #ifdef FEAT_MBYTE | 569 #ifdef FEAT_MBYTE |
537 || utf_ptr2len(p) < utf_byte2len(*p) | 570 || utf_ptr2len(p) < utf_byte2len(*p) |
571 p = reader->js_buf + reader->js_used; | 604 p = reader->js_buf + reader->js_used; |
572 reader->js_end = reader->js_buf | 605 reader->js_end = reader->js_buf |
573 + STRLEN(reader->js_buf); | 606 + STRLEN(reader->js_buf); |
574 } | 607 } |
575 } | 608 } |
609 nr = 0; | |
610 len = 0; | |
576 vim_str2nr(p + 2, NULL, &len, | 611 vim_str2nr(p + 2, NULL, &len, |
577 STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); | 612 STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); |
578 p += len + 2; | 613 p += len + 2; |
614 if (0xd800 <= nr && nr <= 0xdfff | |
615 && (int)(reader->js_end - p) >= 6 | |
616 && *p == '\\' && *(p+1) == 'u') | |
617 { | |
618 long nr2 = 0; | |
619 | |
620 /* decode surrogate pair: \ud812\u3456 */ | |
621 len = 0; | |
622 vim_str2nr(p + 2, NULL, &len, | |
623 STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4); | |
624 if (0xdc00 <= nr2 && nr2 <= 0xdfff) | |
625 { | |
626 p += len + 2; | |
627 nr = (((nr - 0xd800) << 10) | | |
628 ((nr2 - 0xdc00) & 0x3ff)) + 0x10000; | |
629 } | |
630 } | |
579 if (res != NULL) | 631 if (res != NULL) |
580 { | 632 { |
581 #ifdef FEAT_MBYTE | 633 #ifdef FEAT_MBYTE |
582 buf[(*mb_char2bytes)((int)nr, buf)] = NUL; | 634 buf[utf_char2bytes((int)nr, buf)] = NUL; |
583 ga_concat(&ga, buf); | 635 ga_concat(&ga, buf); |
584 #else | 636 #else |
585 ga_append(&ga, nr); | 637 ga_append(&ga, nr); |
586 #endif | 638 #endif |
587 } | 639 } |
598 ga_append(&ga, c); | 650 ga_append(&ga, c); |
599 } | 651 } |
600 } | 652 } |
601 else | 653 else |
602 { | 654 { |
603 len = MB_PTR2LEN(p); | 655 #ifdef FEAT_MBYTE |
656 len = utf_ptr2len(p); | |
657 #else | |
658 len = 1; | |
659 #endif | |
604 if (res != NULL) | 660 if (res != NULL) |
605 { | 661 { |
606 if (ga_grow(&ga, len) == FAIL) | 662 if (ga_grow(&ga, len) == FAIL) |
607 { | 663 { |
608 ga_clear(&ga); | 664 ga_clear(&ga); |
665 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
666 vim_free(converted); | |
667 #endif | |
609 return FAIL; | 668 return FAIL; |
610 } | 669 } |
611 mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); | 670 mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); |
612 ga.ga_len += len; | 671 ga.ga_len += len; |
613 } | 672 } |
614 p += len; | 673 p += len; |
615 } | 674 } |
616 } | 675 } |
676 #if defined(FEAT_MBYTE) && defined(USE_ICONV) | |
677 vim_free(converted); | |
678 #endif | |
617 | 679 |
618 reader->js_used = (int)(p - reader->js_buf); | 680 reader->js_used = (int)(p - reader->js_buf); |
619 if (*p == '"') | 681 if (*p == '"') |
620 { | 682 { |
621 ++reader->js_used; | 683 ++reader->js_used; |