comparison src/mbyte.c @ 8680:131e651fb347 v7.4.1629

commit https://github.com/vim/vim/commit/b86f10ee10bdf932df02bdaf601dffa671518a47 Author: Bram Moolenaar <Bram@vim.org> Date: Mon Mar 21 22:09:44 2016 +0100 patch 7.4.1629 Problem: Handling emoji characters as full width has problems with backwards compatibility. Solution: Remove ambiguous and double width characters from the emoji table. Use a separate table for the character class. (partly by Yasuhiro Matsumoto)
author Christian Brabandt <cb@256bit.org>
date Mon, 21 Mar 2016 22:15:08 +0100
parents a931160ffc41
children 4ce551bd5024
comparison
equal deleted inserted replaced
8679:a73ff2ec40f2 8680:131e651fb347
1207 else 1207 else
1208 return TRUE; 1208 return TRUE;
1209 } 1209 }
1210 return FALSE; 1210 return FALSE;
1211 } 1211 }
1212
1213 /* Sorted list of non-overlapping intervals of Emoji characters,
1214 * based on http://unicode.org/emoji/charts/emoji-list.html */
1215 static struct interval emoji_tab[] =
1216 {
1217 {0x203c, 0x203c},
1218 {0x2049, 0x2049},
1219 {0x2122, 0x2122},
1220 {0x2139, 0x2139},
1221 {0x2194, 0x2199},
1222 {0x21a9, 0x21aa},
1223 {0x231a, 0x231b},
1224 {0x2328, 0x2328},
1225 {0x23cf, 0x23cf},
1226 {0x23e9, 0x23f3},
1227 {0x24c2, 0x24c2},
1228 {0x25aa, 0x25ab},
1229 {0x25b6, 0x25b6},
1230 {0x25c0, 0x25c0},
1231 {0x25fb, 0x25fe},
1232 {0x2600, 0x2604},
1233 {0x260e, 0x260e},
1234 {0x2611, 0x2611},
1235 {0x2614, 0x2615},
1236 {0x2618, 0x2618},
1237 {0x261d, 0x261d},
1238 {0x2620, 0x2620},
1239 {0x2622, 0x2623},
1240 {0x2626, 0x2626},
1241 {0x262a, 0x262a},
1242 {0x262e, 0x262f},
1243 {0x2638, 0x263a},
1244 {0x2648, 0x2653},
1245 {0x2660, 0x2660},
1246 {0x2663, 0x2663},
1247 {0x2665, 0x2666},
1248 {0x2668, 0x2668},
1249 {0x267b, 0x267b},
1250 {0x267f, 0x267f},
1251 {0x2692, 0x2694},
1252 {0x2696, 0x2697},
1253 {0x2699, 0x2699},
1254 {0x269b, 0x269c},
1255 {0x26a0, 0x26a1},
1256 {0x26aa, 0x26ab},
1257 {0x26b0, 0x26b1},
1258 {0x26bd, 0x26be},
1259 {0x26c4, 0x26c5},
1260 {0x26c8, 0x26c8},
1261 {0x26ce, 0x26ce},
1262 {0x26cf, 0x26cf},
1263 {0x26d1, 0x26d1},
1264 {0x26d3, 0x26d4},
1265 {0x26e9, 0x26ea},
1266 {0x26f0, 0x26f5},
1267 {0x26f7, 0x26fa},
1268 {0x26fd, 0x26fd},
1269 {0x2702, 0x2702},
1270 {0x2705, 0x2705},
1271 {0x2708, 0x2709},
1272 {0x270a, 0x270b},
1273 {0x270c, 0x270d},
1274 {0x270f, 0x270f},
1275 {0x2712, 0x2712},
1276 {0x2714, 0x2714},
1277 {0x2716, 0x2716},
1278 {0x271d, 0x271d},
1279 {0x2721, 0x2721},
1280 {0x2728, 0x2728},
1281 {0x2733, 0x2734},
1282 {0x2744, 0x2744},
1283 {0x2747, 0x2747},
1284 {0x274c, 0x274c},
1285 {0x274e, 0x274e},
1286 {0x2753, 0x2755},
1287 {0x2757, 0x2757},
1288 {0x2763, 0x2764},
1289 {0x2795, 0x2797},
1290 {0x27a1, 0x27a1},
1291 {0x27b0, 0x27b0},
1292 {0x27bf, 0x27bf},
1293 {0x2934, 0x2935},
1294 {0x2b05, 0x2b07},
1295 {0x2b1b, 0x2b1c},
1296 {0x2b50, 0x2b50},
1297 {0x2b55, 0x2b55},
1298 {0x3030, 0x3030},
1299 {0x303d, 0x303d},
1300 {0x3297, 0x3297},
1301 {0x3299, 0x3299},
1302 {0x1f004, 0x1f004},
1303 {0x1f0cf, 0x1f0cf},
1304 {0x1f170, 0x1f171},
1305 {0x1f17e, 0x1f17e},
1306 {0x1f17f, 0x1f17f},
1307 {0x1f18e, 0x1f18e},
1308 {0x1f191, 0x1f19a},
1309 {0x1f1e6, 0x1f1ff},
1310 {0x1f201, 0x1f202},
1311 {0x1f21a, 0x1f21a},
1312 {0x1f22f, 0x1f22f},
1313 {0x1f232, 0x1f23a},
1314 {0x1f250, 0x1f251},
1315 {0x1f300, 0x1f320},
1316 {0x1f330, 0x1f335},
1317 {0x1f337, 0x1f37c},
1318 {0x1f380, 0x1f393},
1319 {0x1f3a0, 0x1f3c4},
1320 {0x1f3c6, 0x1f3ca},
1321 {0x1f3e0, 0x1f3f0},
1322 {0x1f400, 0x1f43e},
1323 {0x1f440, 0x1f440},
1324 {0x1f442, 0x1f4f7},
1325 {0x1f4f9, 0x1f4fc},
1326 {0x1f500, 0x1f53d},
1327 {0x1f550, 0x1f567},
1328 {0x1f5fb, 0x1f5ff},
1329 {0x1f600, 0x1f600},
1330 {0x1f601, 0x1f610},
1331 {0x1f611, 0x1f611},
1332 {0x1f612, 0x1f614},
1333 {0x1f615, 0x1f615},
1334 {0x1f616, 0x1f616},
1335 {0x1f617, 0x1f617},
1336 {0x1f618, 0x1f618},
1337 {0x1f619, 0x1f619},
1338 {0x1f61a, 0x1f61a},
1339 {0x1f61b, 0x1f61b},
1340 {0x1f61c, 0x1f61e},
1341 {0x1f61f, 0x1f61f},
1342 {0x1f620, 0x1f625},
1343 {0x1f626, 0x1f627},
1344 {0x1f628, 0x1f62b},
1345 {0x1f62c, 0x1f62c},
1346 {0x1f62d, 0x1f62d},
1347 {0x1f62e, 0x1f62f},
1348 {0x1f630, 0x1f633},
1349 {0x1f634, 0x1f634},
1350 {0x1f635, 0x1f640},
1351 {0x1f645, 0x1f64f},
1352 {0x1f680, 0x1f6c5}
1353 };
1354 1212
1355 /* 1213 /*
1356 * For UTF-8 character "c" return 2 for a double-width character, 1 for others. 1214 * For UTF-8 character "c" return 2 for a double-width character, 1 for others.
1357 * Returns 4 or 6 for an unprintable character. 1215 * Returns 4 or 6 for an unprintable character.
1358 * Is only correct for characters >= 0x80. 1216 * Is only correct for characters >= 0x80.
1575 {0xe0100, 0xe01ef}, 1433 {0xe0100, 0xe01ef},
1576 {0xf0000, 0xffffd}, 1434 {0xf0000, 0xffffd},
1577 {0x100000, 0x10fffd} 1435 {0x100000, 0x10fffd}
1578 }; 1436 };
1579 1437
1438 /* Sorted list of non-overlapping intervals of Emoji characters that don't
1439 * have ambiguous or double width,
1440 * based on http://unicode.org/emoji/charts/emoji-list.html */
1441 static struct interval emoji_width[] =
1442 {
1443 {0x203c, 0x203c},
1444 {0x2049, 0x2049},
1445 {0x2139, 0x2139},
1446 {0x21a9, 0x21aa},
1447 {0x231a, 0x231b},
1448 {0x2328, 0x2328},
1449 {0x23cf, 0x23cf},
1450 {0x23e9, 0x23f3},
1451 {0x25aa, 0x25ab},
1452 {0x25fb, 0x25fe},
1453 {0x2600, 0x2604},
1454 {0x2611, 0x2611},
1455 {0x2618, 0x2618},
1456 {0x261d, 0x261d},
1457 {0x2620, 0x2620},
1458 {0x2622, 0x2623},
1459 {0x2626, 0x2626},
1460 {0x262a, 0x262a},
1461 {0x262e, 0x262f},
1462 {0x2638, 0x263a},
1463 {0x2648, 0x2653},
1464 {0x2666, 0x2666},
1465 {0x267b, 0x267b},
1466 {0x267f, 0x267f},
1467 {0x2692, 0x2694},
1468 {0x2696, 0x2697},
1469 {0x2699, 0x2699},
1470 {0x269b, 0x269c},
1471 {0x26a0, 0x26a1},
1472 {0x26aa, 0x26ab},
1473 {0x26b0, 0x26b1},
1474 {0x26bd, 0x26bd},
1475 {0x26ce, 0x26ce},
1476 {0x2702, 0x2702},
1477 {0x2705, 0x2705},
1478 {0x2708, 0x270d},
1479 {0x270f, 0x270f},
1480 {0x2712, 0x2712},
1481 {0x2714, 0x2714},
1482 {0x2716, 0x2716},
1483 {0x271d, 0x271d},
1484 {0x2721, 0x2721},
1485 {0x2728, 0x2728},
1486 {0x2733, 0x2734},
1487 {0x2744, 0x2744},
1488 {0x2747, 0x2747},
1489 {0x274c, 0x274c},
1490 {0x274e, 0x274e},
1491 {0x2753, 0x2755},
1492 {0x2763, 0x2764},
1493 {0x2795, 0x2797},
1494 {0x27a1, 0x27a1},
1495 {0x27b0, 0x27b0},
1496 {0x27bf, 0x27bf},
1497 {0x2934, 0x2935},
1498 {0x2b05, 0x2b07},
1499 {0x2b1b, 0x2b1c},
1500 {0x2b50, 0x2b50},
1501 {0x1f004, 0x1f004},
1502 {0x1f0cf, 0x1f0cf},
1503 {0x1f1e6, 0x1f1ff},
1504 {0x1f300, 0x1f320},
1505 {0x1f330, 0x1f335},
1506 {0x1f337, 0x1f37c},
1507 {0x1f380, 0x1f393},
1508 {0x1f3a0, 0x1f3c4},
1509 {0x1f3c6, 0x1f3ca},
1510 {0x1f3e0, 0x1f3f0},
1511 {0x1f400, 0x1f43e},
1512 {0x1f440, 0x1f440},
1513 {0x1f442, 0x1f4f7},
1514 {0x1f4f9, 0x1f4fc},
1515 {0x1f500, 0x1f53d},
1516 {0x1f550, 0x1f567},
1517 {0x1f5fb, 0x1f640},
1518 {0x1f645, 0x1f64f},
1519 {0x1f680, 0x1f6c5}
1520 };
1521
1580 if (c >= 0x100) 1522 if (c >= 0x100)
1581 { 1523 {
1582 #ifdef USE_WCHAR_FUNCTIONS 1524 #ifdef USE_WCHAR_FUNCTIONS
1583 /* 1525 /*
1584 * Assume the library function wcwidth() works better than our own 1526 * Assume the library function wcwidth() works better than our own
1594 if (!utf_printable(c)) 1536 if (!utf_printable(c))
1595 return 6; /* unprintable, displays <xxxx> */ 1537 return 6; /* unprintable, displays <xxxx> */
1596 if (intable(doublewidth, sizeof(doublewidth), c)) 1538 if (intable(doublewidth, sizeof(doublewidth), c))
1597 return 2; 1539 return 2;
1598 #endif 1540 #endif
1599 if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c)) 1541 if (p_emoji && intable(emoji_width, sizeof(emoji_width), c))
1600 return 2; 1542 return 2;
1601 } 1543 }
1602 1544
1603 /* Characters below 0x100 are influenced by 'isprint' option */ 1545 /* Characters below 0x100 are influenced by 'isprint' option */
1604 else if (c >= 0x80 && !vim_isprintc(c)) 1546 else if (c >= 0x80 && !vim_isprintc(c))
2672 {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ 2614 {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */
2673 {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ 2615 {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */
2674 {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ 2616 {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */
2675 {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ 2617 {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */
2676 }; 2618 };
2619
2620 /* Sorted list of non-overlapping intervals of all Emoji characters,
2621 * based on http://unicode.org/emoji/charts/emoji-list.html */
2622 static struct interval emoji_all[] =
2623 {
2624 {0x203c, 0x203c},
2625 {0x2049, 0x2049},
2626 {0x2122, 0x2122},
2627 {0x2139, 0x2139},
2628 {0x2194, 0x2199},
2629 {0x21a9, 0x21aa},
2630 {0x231a, 0x231b},
2631 {0x2328, 0x2328},
2632 {0x23cf, 0x23cf},
2633 {0x23e9, 0x23f3},
2634 {0x24c2, 0x24c2},
2635 {0x25aa, 0x25ab},
2636 {0x25b6, 0x25b6},
2637 {0x25c0, 0x25c0},
2638 {0x25fb, 0x25fe},
2639 {0x2600, 0x2604},
2640 {0x260e, 0x260e},
2641 {0x2611, 0x2611},
2642 {0x2614, 0x2615},
2643 {0x2618, 0x2618},
2644 {0x261d, 0x261d},
2645 {0x2620, 0x2620},
2646 {0x2622, 0x2623},
2647 {0x2626, 0x2626},
2648 {0x262a, 0x262a},
2649 {0x262e, 0x262f},
2650 {0x2638, 0x263a},
2651 {0x2648, 0x2653},
2652 {0x2660, 0x2660},
2653 {0x2663, 0x2663},
2654 {0x2665, 0x2666},
2655 {0x2668, 0x2668},
2656 {0x267b, 0x267b},
2657 {0x267f, 0x267f},
2658 {0x2692, 0x2694},
2659 {0x2696, 0x2697},
2660 {0x2699, 0x2699},
2661 {0x269b, 0x269c},
2662 {0x26a0, 0x26a1},
2663 {0x26aa, 0x26ab},
2664 {0x26b0, 0x26b1},
2665 {0x26bd, 0x26be},
2666 {0x26c4, 0x26c5},
2667 {0x26c8, 0x26c8},
2668 {0x26ce, 0x26cf},
2669 {0x26d1, 0x26d1},
2670 {0x26d3, 0x26d4},
2671 {0x26e9, 0x26ea},
2672 {0x26f0, 0x26f5},
2673 {0x26f7, 0x26fa},
2674 {0x26fd, 0x26fd},
2675 {0x2702, 0x2702},
2676 {0x2705, 0x2705},
2677 {0x2708, 0x270d},
2678 {0x270f, 0x270f},
2679 {0x2712, 0x2712},
2680 {0x2714, 0x2714},
2681 {0x2716, 0x2716},
2682 {0x271d, 0x271d},
2683 {0x2721, 0x2721},
2684 {0x2728, 0x2728},
2685 {0x2733, 0x2734},
2686 {0x2744, 0x2744},
2687 {0x2747, 0x2747},
2688 {0x274c, 0x274c},
2689 {0x274e, 0x274e},
2690 {0x2753, 0x2755},
2691 {0x2757, 0x2757},
2692 {0x2763, 0x2764},
2693 {0x2795, 0x2797},
2694 {0x27a1, 0x27a1},
2695 {0x27b0, 0x27b0},
2696 {0x27bf, 0x27bf},
2697 {0x2934, 0x2935},
2698 {0x2b05, 0x2b07},
2699 {0x2b1b, 0x2b1c},
2700 {0x2b50, 0x2b50},
2701 {0x2b55, 0x2b55},
2702 {0x3030, 0x3030},
2703 {0x303d, 0x303d},
2704 {0x3297, 0x3297},
2705 {0x3299, 0x3299},
2706 {0x1f004, 0x1f004},
2707 {0x1f0cf, 0x1f0cf},
2708 {0x1f170, 0x1f171},
2709 {0x1f17e, 0x1f17f},
2710 {0x1f18e, 0x1f18e},
2711 {0x1f191, 0x1f19a},
2712 {0x1f1e6, 0x1f1ff},
2713 {0x1f201, 0x1f202},
2714 {0x1f21a, 0x1f21a},
2715 {0x1f22f, 0x1f22f},
2716 {0x1f232, 0x1f23a},
2717 {0x1f250, 0x1f251},
2718 {0x1f300, 0x1f320},
2719 {0x1f330, 0x1f335},
2720 {0x1f337, 0x1f37c},
2721 {0x1f380, 0x1f393},
2722 {0x1f3a0, 0x1f3c4},
2723 {0x1f3c6, 0x1f3ca},
2724 {0x1f3e0, 0x1f3f0},
2725 {0x1f400, 0x1f43e},
2726 {0x1f440, 0x1f440},
2727 {0x1f442, 0x1f4f7},
2728 {0x1f4f9, 0x1f4fc},
2729 {0x1f500, 0x1f53d},
2730 {0x1f550, 0x1f567},
2731 {0x1f5fb, 0x1f640},
2732 {0x1f645, 0x1f64f},
2733 {0x1f680, 0x1f6c5}
2734 };
2735
2677 int bot = 0; 2736 int bot = 0;
2678 int top = sizeof(classes) / sizeof(struct clinterval) - 1; 2737 int top = sizeof(classes) / sizeof(struct clinterval) - 1;
2679 int mid; 2738 int mid;
2680 2739
2681 /* First quick check for Latin1 characters, use 'iskeyword'. */ 2740 /* First quick check for Latin1 characters, use 'iskeyword'. */
2699 else 2758 else
2700 return (int)classes[mid].class; 2759 return (int)classes[mid].class;
2701 } 2760 }
2702 2761
2703 /* emoji */ 2762 /* emoji */
2704 if (intable(emoji_tab, sizeof(emoji_tab), c)) 2763 if (intable(emoji_all, sizeof(emoji_all), c))
2705 return 3; 2764 return 3;
2706 2765
2707 /* most other characters are "word" characters */ 2766 /* most other characters are "word" characters */
2708 return 2; 2767 return 2;
2709 } 2768 }