comparison src/json.c @ 8283:b8a56d4d83e0 v7.4.1434

commit https://github.com/vim/vim/commit/b6ff81188d27fae774d9ad2dfb498f596d697d4b Author: Bram Moolenaar <Bram@vim.org> Date: Sat Feb 27 18:41:27 2016 +0100 patch 7.4.1434 Problem: JSON encoding doesn't hanel surrogate pair. Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
author Christian Brabandt <cb@256bit.org>
date Sat, 27 Feb 2016 18:45:03 +0100
parents ff900e499f79
children 6ae3fb4fe7c1
comparison
equal deleted inserted replaced
8282:979b6b144d70 8283:b8a56d4d83e0
95 95
96 if (res == NULL) 96 if (res == NULL)
97 ga_concat(gap, (char_u *)"null"); 97 ga_concat(gap, (char_u *)"null");
98 else 98 else
99 { 99 {
100 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
101 vimconv_T conv;
102 char_u *converted = NULL;
103
104 convert_setup(&conv, p_enc, (char_u*)"utf-8");
105 if (conv.vc_type != CONV_NONE)
106 converted = res = string_convert(&conv, res, NULL);
107 convert_setup(&conv, NULL, NULL);
108 #endif
109
100 ga_append(gap, '"'); 110 ga_append(gap, '"');
101 while (*res != NUL) 111 while (*res != NUL)
102 { 112 {
103 int c = PTR2CHAR(res); 113 int c;
114 #ifdef FEAT_MBYTE
115 /* always use utf-8 encoding, ignore 'encoding' */
116 c = utf_ptr2char(res);
117 #else
118 c = (int)*(p);
119 #endif
104 120
105 switch (c) 121 switch (c)
106 { 122 {
107 case 0x08: 123 case 0x08:
108 ga_append(gap, '\\'); ga_append(gap, 'b'); break; 124 ga_append(gap, '\\'); ga_append(gap, 'b'); break;
121 break; 137 break;
122 default: 138 default:
123 if (c >= 0x20) 139 if (c >= 0x20)
124 { 140 {
125 #ifdef FEAT_MBYTE 141 #ifdef FEAT_MBYTE
126 numbuf[mb_char2bytes(c, numbuf)] = NUL; 142 numbuf[utf_char2bytes(c, numbuf)] = NUL;
127 #else 143 #else
128 numbuf[0] = c; 144 numbuf[0] = c;
129 numbuf[1] = NUL; 145 numbuf[1] = NUL;
130 #endif 146 #endif
131 ga_concat(gap, numbuf); 147 ga_concat(gap, numbuf);
135 vim_snprintf((char *)numbuf, NUMBUFLEN, 151 vim_snprintf((char *)numbuf, NUMBUFLEN,
136 "\\u%04lx", (long)c); 152 "\\u%04lx", (long)c);
137 ga_concat(gap, numbuf); 153 ga_concat(gap, numbuf);
138 } 154 }
139 } 155 }
140 mb_cptr_adv(res); 156 #ifdef FEAT_MBYTE
157 res += utf_ptr2len(res);
158 #else
159 ++p;
160 #endif
141 } 161 }
142 ga_append(gap, '"'); 162 ga_append(gap, '"');
163 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
164 vim_free(converted);
165 #endif
143 } 166 }
144 } 167 }
145 168
146 /* 169 /*
147 * Return TRUE if "key" can be used without quotes. 170 * Return TRUE if "key" can be used without quotes.
523 int len; 546 int len;
524 char_u *p; 547 char_u *p;
525 int c; 548 int c;
526 long nr; 549 long nr;
527 char_u buf[NUMBUFLEN]; 550 char_u buf[NUMBUFLEN];
551 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
552 vimconv_T conv;
553 char_u *converted = NULL;
554 #endif
528 555
529 if (res != NULL) 556 if (res != NULL)
530 ga_init2(&ga, 1, 200); 557 ga_init2(&ga, 1, 200);
531 558
532 p = reader->js_buf + reader->js_used + 1; /* skip over " */ 559 p = reader->js_buf + reader->js_used + 1; /* skip over " */
560 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
561 convert_setup(&conv, (char_u*)"utf-8", p_enc);
562 if (conv.vc_type != CONV_NONE)
563 converted = p = string_convert(&conv, p, NULL);
564 convert_setup(&conv, NULL, NULL);
565 #endif
533 while (*p != '"') 566 while (*p != '"')
534 { 567 {
535 if (*p == NUL || p[1] == NUL 568 if (*p == NUL || p[1] == NUL
536 #ifdef FEAT_MBYTE 569 #ifdef FEAT_MBYTE
537 || utf_ptr2len(p) < utf_byte2len(*p) 570 || utf_ptr2len(p) < utf_byte2len(*p)
571 p = reader->js_buf + reader->js_used; 604 p = reader->js_buf + reader->js_used;
572 reader->js_end = reader->js_buf 605 reader->js_end = reader->js_buf
573 + STRLEN(reader->js_buf); 606 + STRLEN(reader->js_buf);
574 } 607 }
575 } 608 }
609 nr = 0;
610 len = 0;
576 vim_str2nr(p + 2, NULL, &len, 611 vim_str2nr(p + 2, NULL, &len,
577 STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4); 612 STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
578 p += len + 2; 613 p += len + 2;
614 if (0xd800 <= nr && nr <= 0xdfff
615 && (int)(reader->js_end - p) >= 6
616 && *p == '\\' && *(p+1) == 'u')
617 {
618 long nr2 = 0;
619
620 /* decode surrogate pair: \ud812\u3456 */
621 len = 0;
622 vim_str2nr(p + 2, NULL, &len,
623 STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
624 if (0xdc00 <= nr2 && nr2 <= 0xdfff)
625 {
626 p += len + 2;
627 nr = (((nr - 0xd800) << 10) |
628 ((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
629 }
630 }
579 if (res != NULL) 631 if (res != NULL)
580 { 632 {
581 #ifdef FEAT_MBYTE 633 #ifdef FEAT_MBYTE
582 buf[(*mb_char2bytes)((int)nr, buf)] = NUL; 634 buf[utf_char2bytes((int)nr, buf)] = NUL;
583 ga_concat(&ga, buf); 635 ga_concat(&ga, buf);
584 #else 636 #else
585 ga_append(&ga, nr); 637 ga_append(&ga, nr);
586 #endif 638 #endif
587 } 639 }
598 ga_append(&ga, c); 650 ga_append(&ga, c);
599 } 651 }
600 } 652 }
601 else 653 else
602 { 654 {
603 len = MB_PTR2LEN(p); 655 #ifdef FEAT_MBYTE
656 len = utf_ptr2len(p);
657 #else
658 len = 1;
659 #endif
604 if (res != NULL) 660 if (res != NULL)
605 { 661 {
606 if (ga_grow(&ga, len) == FAIL) 662 if (ga_grow(&ga, len) == FAIL)
607 { 663 {
608 ga_clear(&ga); 664 ga_clear(&ga);
665 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
666 vim_free(converted);
667 #endif
609 return FAIL; 668 return FAIL;
610 } 669 }
611 mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len); 670 mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
612 ga.ga_len += len; 671 ga.ga_len += len;
613 } 672 }
614 p += len; 673 p += len;
615 } 674 }
616 } 675 }
676 #if defined(FEAT_MBYTE) && defined(USE_ICONV)
677 vim_free(converted);
678 #endif
617 679
618 reader->js_used = (int)(p - reader->js_buf); 680 reader->js_used = (int)(p - reader->js_buf);
619 if (*p == '"') 681 if (*p == '"')
620 { 682 {
621 ++reader->js_used; 683 ++reader->js_used;