changeset 8283:b8a56d4d83e0 v7.4.1434

commit https://github.com/vim/vim/commit/b6ff81188d27fae774d9ad2dfb498f596d697d4b Author: Bram Moolenaar <Bram@vim.org> Date: Sat Feb 27 18:41:27 2016 +0100 patch 7.4.1434 Problem: JSON encoding doesn't hanel surrogate pair. Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
author Christian Brabandt <cb@256bit.org>
date Sat, 27 Feb 2016 18:45:03 +0100
parents 979b6b144d70
children ded553b5751a
files src/json.c src/testdir/test_json.vim src/version.c
diffstat 3 files changed, 79 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/json.c
+++ b/src/json.c
@@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str)
 	ga_concat(gap, (char_u *)"null");
     else
     {
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+	vimconv_T   conv;
+	char_u	    *converted = NULL;
+
+	convert_setup(&conv, p_enc, (char_u*)"utf-8");
+	if (conv.vc_type != CONV_NONE)
+	    converted = res = string_convert(&conv, res, NULL);
+	convert_setup(&conv, NULL, NULL);
+#endif
+
 	ga_append(gap, '"');
 	while (*res != NUL)
 	{
-	    int c = PTR2CHAR(res);
+	    int c;
+#ifdef FEAT_MBYTE
+	    /* always use utf-8 encoding, ignore 'encoding' */
+	    c = utf_ptr2char(res);
+#else
+	    c = (int)*(p);
+#endif
 
 	    switch (c)
 	    {
@@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str)
 		    if (c >= 0x20)
 		    {
 #ifdef FEAT_MBYTE
-			numbuf[mb_char2bytes(c, numbuf)] = NUL;
+			numbuf[utf_char2bytes(c, numbuf)] = NUL;
 #else
 			numbuf[0] = c;
 			numbuf[1] = NUL;
@@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str)
 			ga_concat(gap, numbuf);
 		    }
 	    }
-	    mb_cptr_adv(res);
+#ifdef FEAT_MBYTE
+	    res += utf_ptr2len(res);
+#else
+	    ++p;
+#endif
 	}
 	ga_append(gap, '"');
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+	vim_free(converted);
+#endif
     }
 }
 
@@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, ty
     int		c;
     long	nr;
     char_u	buf[NUMBUFLEN];
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    vimconv_T   conv;
+    char_u	*converted = NULL;
+#endif
 
     if (res != NULL)
 	ga_init2(&ga, 1, 200);
 
     p = reader->js_buf + reader->js_used + 1; /* skip over " */
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    convert_setup(&conv, (char_u*)"utf-8", p_enc);
+    if (conv.vc_type != CONV_NONE)
+	converted = p = string_convert(&conv, p, NULL);
+    convert_setup(&conv, NULL, NULL);
+#endif
     while (*p != '"')
     {
 	if (*p == NUL || p[1] == NUL
@@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, ty
 						     + STRLEN(reader->js_buf);
 			}
 		    }
+		    nr = 0;
+		    len = 0;
 		    vim_str2nr(p + 2, NULL, &len,
 				     STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
 		    p += len + 2;
+		    if (0xd800 <= nr && nr <= 0xdfff
+			    && (int)(reader->js_end - p) >= 6
+			    && *p == '\\' && *(p+1) == 'u')
+		    {
+			long	nr2 = 0;
+
+			/* decode surrogate pair: \ud812\u3456 */
+			len = 0;
+			vim_str2nr(p + 2, NULL, &len,
+				     STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
+			if (0xdc00 <= nr2 && nr2 <= 0xdfff)
+			{
+			    p += len + 2;
+			    nr = (((nr - 0xd800) << 10) |
+				((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
+			}
+		    }
 		    if (res != NULL)
 		    {
 #ifdef FEAT_MBYTE
-			buf[(*mb_char2bytes)((int)nr, buf)] = NUL;
+			buf[utf_char2bytes((int)nr, buf)] = NUL;
 			ga_concat(&ga, buf);
 #else
 			ga_append(&ga, nr);
@@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, ty
 	}
 	else
 	{
-	    len = MB_PTR2LEN(p);
+#ifdef FEAT_MBYTE
+	    len = utf_ptr2len(p);
+#else
+	    len = 1;
+#endif
 	    if (res != NULL)
 	    {
 		if (ga_grow(&ga, len) == FAIL)
 		{
 		    ga_clear(&ga);
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+		    vim_free(converted);
+#endif
 		    return FAIL;
 		}
 		mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
@@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, ty
 	    p += len;
 	}
     }
+#if defined(FEAT_MBYTE) && defined(USE_ICONV)
+    vim_free(converted);
+#endif
 
     reader->js_used = (int)(p - reader->js_buf);
     if (*p == '"')
--- a/src/testdir/test_json.vim
+++ b/src/testdir/test_json.vim
@@ -12,6 +12,12 @@ let s:var4 = "\x10\x11\x12\x13\x14\x15\x
 let s:json5 = '"\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"'
 let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
 
+" surrogate pair
+let s:jsonsp1 = '"\ud83c\udf63"'
+let s:varsp1 = "\xf0\x9f\x8d\xa3"
+let s:jsonsp2 = '"\ud83c\u00a0"'
+let s:varsp2 = "\ud83c\u00a0"
+
 let s:jsonmb = '"s¢cĴgё"'
 let s:varmb = "s¢cĴgё"
 let s:jsonnr = '1234'
@@ -69,6 +75,8 @@ func Test_json_encode()
 
   if has('multi_byte')
     call assert_equal(s:jsonmb, json_encode(s:varmb))
+    call assert_equal(s:varsp1, json_decode(s:jsonsp1))
+    call assert_equal(s:varsp2, json_decode(s:jsonsp2))
   endif
 
   call assert_equal(s:jsonnr, json_encode(s:varnr))
@@ -105,6 +113,8 @@ func Test_json_decode()
 
   if has('multi_byte')
     call assert_equal(s:varmb, json_decode(s:jsonmb))
+    call assert_equal(s:varsp1, js_decode(s:jsonsp1))
+    call assert_equal(s:varsp2, js_decode(s:jsonsp2))
   endif
 
   call assert_equal(s:varnr, json_decode(s:jsonnr))
--- a/src/version.c
+++ b/src/version.c
@@ -744,6 +744,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1434,
+/**/
     1433,
 /**/
     1432,