diff src/mbyte.c @ 8680:131e651fb347 v7.4.1629

commit https://github.com/vim/vim/commit/b86f10ee10bdf932df02bdaf601dffa671518a47 Author: Bram Moolenaar <Bram@vim.org> Date: Mon Mar 21 22:09:44 2016 +0100 patch 7.4.1629 Problem: Handling emoji characters as full width has problems with backwards compatibility. Solution: Remove ambiguous and double width characters from the emoji table. Use a separate table for the character class. (partly by Yasuhiro Matsumoto)
author Christian Brabandt <cb@256bit.org>
date Mon, 21 Mar 2016 22:15:08 +0100
parents a931160ffc41
children 4ce551bd5024
line wrap: on
line diff
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -1210,148 +1210,6 @@ intable(struct interval *table, size_t s
     return FALSE;
 }
 
-/* Sorted list of non-overlapping intervals of Emoji characters,
- * based on http://unicode.org/emoji/charts/emoji-list.html */
-static struct interval emoji_tab[] =
-{
-    {0x203c, 0x203c},
-    {0x2049, 0x2049},
-    {0x2122, 0x2122},
-    {0x2139, 0x2139},
-    {0x2194, 0x2199},
-    {0x21a9, 0x21aa},
-    {0x231a, 0x231b},
-    {0x2328, 0x2328},
-    {0x23cf, 0x23cf},
-    {0x23e9, 0x23f3},
-    {0x24c2, 0x24c2},
-    {0x25aa, 0x25ab},
-    {0x25b6, 0x25b6},
-    {0x25c0, 0x25c0},
-    {0x25fb, 0x25fe},
-    {0x2600, 0x2604},
-    {0x260e, 0x260e},
-    {0x2611, 0x2611},
-    {0x2614, 0x2615},
-    {0x2618, 0x2618},
-    {0x261d, 0x261d},
-    {0x2620, 0x2620},
-    {0x2622, 0x2623},
-    {0x2626, 0x2626},
-    {0x262a, 0x262a},
-    {0x262e, 0x262f},
-    {0x2638, 0x263a},
-    {0x2648, 0x2653},
-    {0x2660, 0x2660},
-    {0x2663, 0x2663},
-    {0x2665, 0x2666},
-    {0x2668, 0x2668},
-    {0x267b, 0x267b},
-    {0x267f, 0x267f},
-    {0x2692, 0x2694},
-    {0x2696, 0x2697},
-    {0x2699, 0x2699},
-    {0x269b, 0x269c},
-    {0x26a0, 0x26a1},
-    {0x26aa, 0x26ab},
-    {0x26b0, 0x26b1},
-    {0x26bd, 0x26be},
-    {0x26c4, 0x26c5},
-    {0x26c8, 0x26c8},
-    {0x26ce, 0x26ce},
-    {0x26cf, 0x26cf},
-    {0x26d1, 0x26d1},
-    {0x26d3, 0x26d4},
-    {0x26e9, 0x26ea},
-    {0x26f0, 0x26f5},
-    {0x26f7, 0x26fa},
-    {0x26fd, 0x26fd},
-    {0x2702, 0x2702},
-    {0x2705, 0x2705},
-    {0x2708, 0x2709},
-    {0x270a, 0x270b},
-    {0x270c, 0x270d},
-    {0x270f, 0x270f},
-    {0x2712, 0x2712},
-    {0x2714, 0x2714},
-    {0x2716, 0x2716},
-    {0x271d, 0x271d},
-    {0x2721, 0x2721},
-    {0x2728, 0x2728},
-    {0x2733, 0x2734},
-    {0x2744, 0x2744},
-    {0x2747, 0x2747},
-    {0x274c, 0x274c},
-    {0x274e, 0x274e},
-    {0x2753, 0x2755},
-    {0x2757, 0x2757},
-    {0x2763, 0x2764},
-    {0x2795, 0x2797},
-    {0x27a1, 0x27a1},
-    {0x27b0, 0x27b0},
-    {0x27bf, 0x27bf},
-    {0x2934, 0x2935},
-    {0x2b05, 0x2b07},
-    {0x2b1b, 0x2b1c},
-    {0x2b50, 0x2b50},
-    {0x2b55, 0x2b55},
-    {0x3030, 0x3030},
-    {0x303d, 0x303d},
-    {0x3297, 0x3297},
-    {0x3299, 0x3299},
-    {0x1f004, 0x1f004},
-    {0x1f0cf, 0x1f0cf},
-    {0x1f170, 0x1f171},
-    {0x1f17e, 0x1f17e},
-    {0x1f17f, 0x1f17f},
-    {0x1f18e, 0x1f18e},
-    {0x1f191, 0x1f19a},
-    {0x1f1e6, 0x1f1ff},
-    {0x1f201, 0x1f202},
-    {0x1f21a, 0x1f21a},
-    {0x1f22f, 0x1f22f},
-    {0x1f232, 0x1f23a},
-    {0x1f250, 0x1f251},
-    {0x1f300, 0x1f320},
-    {0x1f330, 0x1f335},
-    {0x1f337, 0x1f37c},
-    {0x1f380, 0x1f393},
-    {0x1f3a0, 0x1f3c4},
-    {0x1f3c6, 0x1f3ca},
-    {0x1f3e0, 0x1f3f0},
-    {0x1f400, 0x1f43e},
-    {0x1f440, 0x1f440},
-    {0x1f442, 0x1f4f7},
-    {0x1f4f9, 0x1f4fc},
-    {0x1f500, 0x1f53d},
-    {0x1f550, 0x1f567},
-    {0x1f5fb, 0x1f5ff},
-    {0x1f600, 0x1f600},
-    {0x1f601, 0x1f610},
-    {0x1f611, 0x1f611},
-    {0x1f612, 0x1f614},
-    {0x1f615, 0x1f615},
-    {0x1f616, 0x1f616},
-    {0x1f617, 0x1f617},
-    {0x1f618, 0x1f618},
-    {0x1f619, 0x1f619},
-    {0x1f61a, 0x1f61a},
-    {0x1f61b, 0x1f61b},
-    {0x1f61c, 0x1f61e},
-    {0x1f61f, 0x1f61f},
-    {0x1f620, 0x1f625},
-    {0x1f626, 0x1f627},
-    {0x1f628, 0x1f62b},
-    {0x1f62c, 0x1f62c},
-    {0x1f62d, 0x1f62d},
-    {0x1f62e, 0x1f62f},
-    {0x1f630, 0x1f633},
-    {0x1f634, 0x1f634},
-    {0x1f635, 0x1f640},
-    {0x1f645, 0x1f64f},
-    {0x1f680, 0x1f6c5}
-};
-
 /*
  * For UTF-8 character "c" return 2 for a double-width character, 1 for others.
  * Returns 4 or 6 for an unprintable character.
@@ -1577,6 +1435,90 @@ utf_char2cells(int c)
 	{0x100000, 0x10fffd}
     };
 
+    /* Sorted list of non-overlapping intervals of Emoji characters that don't
+     * have ambiguous or double width,
+     * based on http://unicode.org/emoji/charts/emoji-list.html */
+    static struct interval emoji_width[] =
+    {
+	{0x203c, 0x203c},
+	{0x2049, 0x2049},
+	{0x2139, 0x2139},
+	{0x21a9, 0x21aa},
+	{0x231a, 0x231b},
+	{0x2328, 0x2328},
+	{0x23cf, 0x23cf},
+	{0x23e9, 0x23f3},
+	{0x25aa, 0x25ab},
+	{0x25fb, 0x25fe},
+	{0x2600, 0x2604},
+	{0x2611, 0x2611},
+	{0x2618, 0x2618},
+	{0x261d, 0x261d},
+	{0x2620, 0x2620},
+	{0x2622, 0x2623},
+	{0x2626, 0x2626},
+	{0x262a, 0x262a},
+	{0x262e, 0x262f},
+	{0x2638, 0x263a},
+	{0x2648, 0x2653},
+	{0x2666, 0x2666},
+	{0x267b, 0x267b},
+	{0x267f, 0x267f},
+	{0x2692, 0x2694},
+	{0x2696, 0x2697},
+	{0x2699, 0x2699},
+	{0x269b, 0x269c},
+	{0x26a0, 0x26a1},
+	{0x26aa, 0x26ab},
+	{0x26b0, 0x26b1},
+	{0x26bd, 0x26bd},
+	{0x26ce, 0x26ce},
+	{0x2702, 0x2702},
+	{0x2705, 0x2705},
+	{0x2708, 0x270d},
+	{0x270f, 0x270f},
+	{0x2712, 0x2712},
+	{0x2714, 0x2714},
+	{0x2716, 0x2716},
+	{0x271d, 0x271d},
+	{0x2721, 0x2721},
+	{0x2728, 0x2728},
+	{0x2733, 0x2734},
+	{0x2744, 0x2744},
+	{0x2747, 0x2747},
+	{0x274c, 0x274c},
+	{0x274e, 0x274e},
+	{0x2753, 0x2755},
+	{0x2763, 0x2764},
+	{0x2795, 0x2797},
+	{0x27a1, 0x27a1},
+	{0x27b0, 0x27b0},
+	{0x27bf, 0x27bf},
+	{0x2934, 0x2935},
+	{0x2b05, 0x2b07},
+	{0x2b1b, 0x2b1c},
+	{0x2b50, 0x2b50},
+	{0x1f004, 0x1f004},
+	{0x1f0cf, 0x1f0cf},
+	{0x1f1e6, 0x1f1ff},
+	{0x1f300, 0x1f320},
+	{0x1f330, 0x1f335},
+	{0x1f337, 0x1f37c},
+	{0x1f380, 0x1f393},
+	{0x1f3a0, 0x1f3c4},
+	{0x1f3c6, 0x1f3ca},
+	{0x1f3e0, 0x1f3f0},
+	{0x1f400, 0x1f43e},
+	{0x1f440, 0x1f440},
+	{0x1f442, 0x1f4f7},
+	{0x1f4f9, 0x1f4fc},
+	{0x1f500, 0x1f53d},
+	{0x1f550, 0x1f567},
+	{0x1f5fb, 0x1f640},
+	{0x1f645, 0x1f64f},
+	{0x1f680, 0x1f6c5}
+    };
+
     if (c >= 0x100)
     {
 #ifdef USE_WCHAR_FUNCTIONS
@@ -1596,7 +1538,7 @@ utf_char2cells(int c)
 	if (intable(doublewidth, sizeof(doublewidth), c))
 	    return 2;
 #endif
-	if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c))
+	if (p_emoji && intable(emoji_width, sizeof(emoji_width), c))
 	    return 2;
     }
 
@@ -2674,6 +2616,123 @@ utf_class(int c)
 	{0x2b740, 0x2b81f, 0x4e00},	/* CJK Ideographs */
 	{0x2f800, 0x2fa1f, 0x4e00},	/* CJK Ideographs */
     };
+
+    /* Sorted list of non-overlapping intervals of all Emoji characters,
+     * based on http://unicode.org/emoji/charts/emoji-list.html */
+    static struct interval emoji_all[] =
+    {
+	{0x203c, 0x203c},
+	{0x2049, 0x2049},
+	{0x2122, 0x2122},
+	{0x2139, 0x2139},
+	{0x2194, 0x2199},
+	{0x21a9, 0x21aa},
+	{0x231a, 0x231b},
+	{0x2328, 0x2328},
+	{0x23cf, 0x23cf},
+	{0x23e9, 0x23f3},
+	{0x24c2, 0x24c2},
+	{0x25aa, 0x25ab},
+	{0x25b6, 0x25b6},
+	{0x25c0, 0x25c0},
+	{0x25fb, 0x25fe},
+	{0x2600, 0x2604},
+	{0x260e, 0x260e},
+	{0x2611, 0x2611},
+	{0x2614, 0x2615},
+	{0x2618, 0x2618},
+	{0x261d, 0x261d},
+	{0x2620, 0x2620},
+	{0x2622, 0x2623},
+	{0x2626, 0x2626},
+	{0x262a, 0x262a},
+	{0x262e, 0x262f},
+	{0x2638, 0x263a},
+	{0x2648, 0x2653},
+	{0x2660, 0x2660},
+	{0x2663, 0x2663},
+	{0x2665, 0x2666},
+	{0x2668, 0x2668},
+	{0x267b, 0x267b},
+	{0x267f, 0x267f},
+	{0x2692, 0x2694},
+	{0x2696, 0x2697},
+	{0x2699, 0x2699},
+	{0x269b, 0x269c},
+	{0x26a0, 0x26a1},
+	{0x26aa, 0x26ab},
+	{0x26b0, 0x26b1},
+	{0x26bd, 0x26be},
+	{0x26c4, 0x26c5},
+	{0x26c8, 0x26c8},
+	{0x26ce, 0x26cf},
+	{0x26d1, 0x26d1},
+	{0x26d3, 0x26d4},
+	{0x26e9, 0x26ea},
+	{0x26f0, 0x26f5},
+	{0x26f7, 0x26fa},
+	{0x26fd, 0x26fd},
+	{0x2702, 0x2702},
+	{0x2705, 0x2705},
+	{0x2708, 0x270d},
+	{0x270f, 0x270f},
+	{0x2712, 0x2712},
+	{0x2714, 0x2714},
+	{0x2716, 0x2716},
+	{0x271d, 0x271d},
+	{0x2721, 0x2721},
+	{0x2728, 0x2728},
+	{0x2733, 0x2734},
+	{0x2744, 0x2744},
+	{0x2747, 0x2747},
+	{0x274c, 0x274c},
+	{0x274e, 0x274e},
+	{0x2753, 0x2755},
+	{0x2757, 0x2757},
+	{0x2763, 0x2764},
+	{0x2795, 0x2797},
+	{0x27a1, 0x27a1},
+	{0x27b0, 0x27b0},
+	{0x27bf, 0x27bf},
+	{0x2934, 0x2935},
+	{0x2b05, 0x2b07},
+	{0x2b1b, 0x2b1c},
+	{0x2b50, 0x2b50},
+	{0x2b55, 0x2b55},
+	{0x3030, 0x3030},
+	{0x303d, 0x303d},
+	{0x3297, 0x3297},
+	{0x3299, 0x3299},
+	{0x1f004, 0x1f004},
+	{0x1f0cf, 0x1f0cf},
+	{0x1f170, 0x1f171},
+	{0x1f17e, 0x1f17f},
+	{0x1f18e, 0x1f18e},
+	{0x1f191, 0x1f19a},
+	{0x1f1e6, 0x1f1ff},
+	{0x1f201, 0x1f202},
+	{0x1f21a, 0x1f21a},
+	{0x1f22f, 0x1f22f},
+	{0x1f232, 0x1f23a},
+	{0x1f250, 0x1f251},
+	{0x1f300, 0x1f320},
+	{0x1f330, 0x1f335},
+	{0x1f337, 0x1f37c},
+	{0x1f380, 0x1f393},
+	{0x1f3a0, 0x1f3c4},
+	{0x1f3c6, 0x1f3ca},
+	{0x1f3e0, 0x1f3f0},
+	{0x1f400, 0x1f43e},
+	{0x1f440, 0x1f440},
+	{0x1f442, 0x1f4f7},
+	{0x1f4f9, 0x1f4fc},
+	{0x1f500, 0x1f53d},
+	{0x1f550, 0x1f567},
+	{0x1f5fb, 0x1f640},
+	{0x1f645, 0x1f64f},
+	{0x1f680, 0x1f6c5}
+    };
+
     int bot = 0;
     int top = sizeof(classes) / sizeof(struct clinterval) - 1;
     int mid;
@@ -2701,7 +2760,7 @@ utf_class(int c)
     }
 
     /* emoji */
-    if (intable(emoji_tab, sizeof(emoji_tab), c))
+    if (intable(emoji_all, sizeof(emoji_all), c))
 	return 3;
 
     /* most other characters are "word" characters */