Mercurial > vim
comparison src/mbyte.c @ 8680:131e651fb347 v7.4.1629
commit https://github.com/vim/vim/commit/b86f10ee10bdf932df02bdaf601dffa671518a47
Author: Bram Moolenaar <Bram@vim.org>
Date: Mon Mar 21 22:09:44 2016 +0100
patch 7.4.1629
Problem: Handling emoji characters as full width has problems with
backwards compatibility.
Solution: Remove ambiguous and double width characters from the emoji table.
Use a separate table for the character class.
(partly by Yasuhiro Matsumoto)
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Mon, 21 Mar 2016 22:15:08 +0100 |
parents | a931160ffc41 |
children | 4ce551bd5024 |
comparison
equal
deleted
inserted
replaced
8679:a73ff2ec40f2 | 8680:131e651fb347 |
---|---|
1207 else | 1207 else |
1208 return TRUE; | 1208 return TRUE; |
1209 } | 1209 } |
1210 return FALSE; | 1210 return FALSE; |
1211 } | 1211 } |
1212 | |
1213 /* Sorted list of non-overlapping intervals of Emoji characters, | |
1214 * based on http://unicode.org/emoji/charts/emoji-list.html */ | |
1215 static struct interval emoji_tab[] = | |
1216 { | |
1217 {0x203c, 0x203c}, | |
1218 {0x2049, 0x2049}, | |
1219 {0x2122, 0x2122}, | |
1220 {0x2139, 0x2139}, | |
1221 {0x2194, 0x2199}, | |
1222 {0x21a9, 0x21aa}, | |
1223 {0x231a, 0x231b}, | |
1224 {0x2328, 0x2328}, | |
1225 {0x23cf, 0x23cf}, | |
1226 {0x23e9, 0x23f3}, | |
1227 {0x24c2, 0x24c2}, | |
1228 {0x25aa, 0x25ab}, | |
1229 {0x25b6, 0x25b6}, | |
1230 {0x25c0, 0x25c0}, | |
1231 {0x25fb, 0x25fe}, | |
1232 {0x2600, 0x2604}, | |
1233 {0x260e, 0x260e}, | |
1234 {0x2611, 0x2611}, | |
1235 {0x2614, 0x2615}, | |
1236 {0x2618, 0x2618}, | |
1237 {0x261d, 0x261d}, | |
1238 {0x2620, 0x2620}, | |
1239 {0x2622, 0x2623}, | |
1240 {0x2626, 0x2626}, | |
1241 {0x262a, 0x262a}, | |
1242 {0x262e, 0x262f}, | |
1243 {0x2638, 0x263a}, | |
1244 {0x2648, 0x2653}, | |
1245 {0x2660, 0x2660}, | |
1246 {0x2663, 0x2663}, | |
1247 {0x2665, 0x2666}, | |
1248 {0x2668, 0x2668}, | |
1249 {0x267b, 0x267b}, | |
1250 {0x267f, 0x267f}, | |
1251 {0x2692, 0x2694}, | |
1252 {0x2696, 0x2697}, | |
1253 {0x2699, 0x2699}, | |
1254 {0x269b, 0x269c}, | |
1255 {0x26a0, 0x26a1}, | |
1256 {0x26aa, 0x26ab}, | |
1257 {0x26b0, 0x26b1}, | |
1258 {0x26bd, 0x26be}, | |
1259 {0x26c4, 0x26c5}, | |
1260 {0x26c8, 0x26c8}, | |
1261 {0x26ce, 0x26ce}, | |
1262 {0x26cf, 0x26cf}, | |
1263 {0x26d1, 0x26d1}, | |
1264 {0x26d3, 0x26d4}, | |
1265 {0x26e9, 0x26ea}, | |
1266 {0x26f0, 0x26f5}, | |
1267 {0x26f7, 0x26fa}, | |
1268 {0x26fd, 0x26fd}, | |
1269 {0x2702, 0x2702}, | |
1270 {0x2705, 0x2705}, | |
1271 {0x2708, 0x2709}, | |
1272 {0x270a, 0x270b}, | |
1273 {0x270c, 0x270d}, | |
1274 {0x270f, 0x270f}, | |
1275 {0x2712, 0x2712}, | |
1276 {0x2714, 0x2714}, | |
1277 {0x2716, 0x2716}, | |
1278 {0x271d, 0x271d}, | |
1279 {0x2721, 0x2721}, | |
1280 {0x2728, 0x2728}, | |
1281 {0x2733, 0x2734}, | |
1282 {0x2744, 0x2744}, | |
1283 {0x2747, 0x2747}, | |
1284 {0x274c, 0x274c}, | |
1285 {0x274e, 0x274e}, | |
1286 {0x2753, 0x2755}, | |
1287 {0x2757, 0x2757}, | |
1288 {0x2763, 0x2764}, | |
1289 {0x2795, 0x2797}, | |
1290 {0x27a1, 0x27a1}, | |
1291 {0x27b0, 0x27b0}, | |
1292 {0x27bf, 0x27bf}, | |
1293 {0x2934, 0x2935}, | |
1294 {0x2b05, 0x2b07}, | |
1295 {0x2b1b, 0x2b1c}, | |
1296 {0x2b50, 0x2b50}, | |
1297 {0x2b55, 0x2b55}, | |
1298 {0x3030, 0x3030}, | |
1299 {0x303d, 0x303d}, | |
1300 {0x3297, 0x3297}, | |
1301 {0x3299, 0x3299}, | |
1302 {0x1f004, 0x1f004}, | |
1303 {0x1f0cf, 0x1f0cf}, | |
1304 {0x1f170, 0x1f171}, | |
1305 {0x1f17e, 0x1f17e}, | |
1306 {0x1f17f, 0x1f17f}, | |
1307 {0x1f18e, 0x1f18e}, | |
1308 {0x1f191, 0x1f19a}, | |
1309 {0x1f1e6, 0x1f1ff}, | |
1310 {0x1f201, 0x1f202}, | |
1311 {0x1f21a, 0x1f21a}, | |
1312 {0x1f22f, 0x1f22f}, | |
1313 {0x1f232, 0x1f23a}, | |
1314 {0x1f250, 0x1f251}, | |
1315 {0x1f300, 0x1f320}, | |
1316 {0x1f330, 0x1f335}, | |
1317 {0x1f337, 0x1f37c}, | |
1318 {0x1f380, 0x1f393}, | |
1319 {0x1f3a0, 0x1f3c4}, | |
1320 {0x1f3c6, 0x1f3ca}, | |
1321 {0x1f3e0, 0x1f3f0}, | |
1322 {0x1f400, 0x1f43e}, | |
1323 {0x1f440, 0x1f440}, | |
1324 {0x1f442, 0x1f4f7}, | |
1325 {0x1f4f9, 0x1f4fc}, | |
1326 {0x1f500, 0x1f53d}, | |
1327 {0x1f550, 0x1f567}, | |
1328 {0x1f5fb, 0x1f5ff}, | |
1329 {0x1f600, 0x1f600}, | |
1330 {0x1f601, 0x1f610}, | |
1331 {0x1f611, 0x1f611}, | |
1332 {0x1f612, 0x1f614}, | |
1333 {0x1f615, 0x1f615}, | |
1334 {0x1f616, 0x1f616}, | |
1335 {0x1f617, 0x1f617}, | |
1336 {0x1f618, 0x1f618}, | |
1337 {0x1f619, 0x1f619}, | |
1338 {0x1f61a, 0x1f61a}, | |
1339 {0x1f61b, 0x1f61b}, | |
1340 {0x1f61c, 0x1f61e}, | |
1341 {0x1f61f, 0x1f61f}, | |
1342 {0x1f620, 0x1f625}, | |
1343 {0x1f626, 0x1f627}, | |
1344 {0x1f628, 0x1f62b}, | |
1345 {0x1f62c, 0x1f62c}, | |
1346 {0x1f62d, 0x1f62d}, | |
1347 {0x1f62e, 0x1f62f}, | |
1348 {0x1f630, 0x1f633}, | |
1349 {0x1f634, 0x1f634}, | |
1350 {0x1f635, 0x1f640}, | |
1351 {0x1f645, 0x1f64f}, | |
1352 {0x1f680, 0x1f6c5} | |
1353 }; | |
1354 | 1212 |
1355 /* | 1213 /* |
1356 * For UTF-8 character "c" return 2 for a double-width character, 1 for others. | 1214 * For UTF-8 character "c" return 2 for a double-width character, 1 for others. |
1357 * Returns 4 or 6 for an unprintable character. | 1215 * Returns 4 or 6 for an unprintable character. |
1358 * Is only correct for characters >= 0x80. | 1216 * Is only correct for characters >= 0x80. |
1575 {0xe0100, 0xe01ef}, | 1433 {0xe0100, 0xe01ef}, |
1576 {0xf0000, 0xffffd}, | 1434 {0xf0000, 0xffffd}, |
1577 {0x100000, 0x10fffd} | 1435 {0x100000, 0x10fffd} |
1578 }; | 1436 }; |
1579 | 1437 |
1438 /* Sorted list of non-overlapping intervals of Emoji characters that don't | |
1439 * have ambiguous or double width, | |
1440 * based on http://unicode.org/emoji/charts/emoji-list.html */ | |
1441 static struct interval emoji_width[] = | |
1442 { | |
1443 {0x203c, 0x203c}, | |
1444 {0x2049, 0x2049}, | |
1445 {0x2139, 0x2139}, | |
1446 {0x21a9, 0x21aa}, | |
1447 {0x231a, 0x231b}, | |
1448 {0x2328, 0x2328}, | |
1449 {0x23cf, 0x23cf}, | |
1450 {0x23e9, 0x23f3}, | |
1451 {0x25aa, 0x25ab}, | |
1452 {0x25fb, 0x25fe}, | |
1453 {0x2600, 0x2604}, | |
1454 {0x2611, 0x2611}, | |
1455 {0x2618, 0x2618}, | |
1456 {0x261d, 0x261d}, | |
1457 {0x2620, 0x2620}, | |
1458 {0x2622, 0x2623}, | |
1459 {0x2626, 0x2626}, | |
1460 {0x262a, 0x262a}, | |
1461 {0x262e, 0x262f}, | |
1462 {0x2638, 0x263a}, | |
1463 {0x2648, 0x2653}, | |
1464 {0x2666, 0x2666}, | |
1465 {0x267b, 0x267b}, | |
1466 {0x267f, 0x267f}, | |
1467 {0x2692, 0x2694}, | |
1468 {0x2696, 0x2697}, | |
1469 {0x2699, 0x2699}, | |
1470 {0x269b, 0x269c}, | |
1471 {0x26a0, 0x26a1}, | |
1472 {0x26aa, 0x26ab}, | |
1473 {0x26b0, 0x26b1}, | |
1474 {0x26bd, 0x26bd}, | |
1475 {0x26ce, 0x26ce}, | |
1476 {0x2702, 0x2702}, | |
1477 {0x2705, 0x2705}, | |
1478 {0x2708, 0x270d}, | |
1479 {0x270f, 0x270f}, | |
1480 {0x2712, 0x2712}, | |
1481 {0x2714, 0x2714}, | |
1482 {0x2716, 0x2716}, | |
1483 {0x271d, 0x271d}, | |
1484 {0x2721, 0x2721}, | |
1485 {0x2728, 0x2728}, | |
1486 {0x2733, 0x2734}, | |
1487 {0x2744, 0x2744}, | |
1488 {0x2747, 0x2747}, | |
1489 {0x274c, 0x274c}, | |
1490 {0x274e, 0x274e}, | |
1491 {0x2753, 0x2755}, | |
1492 {0x2763, 0x2764}, | |
1493 {0x2795, 0x2797}, | |
1494 {0x27a1, 0x27a1}, | |
1495 {0x27b0, 0x27b0}, | |
1496 {0x27bf, 0x27bf}, | |
1497 {0x2934, 0x2935}, | |
1498 {0x2b05, 0x2b07}, | |
1499 {0x2b1b, 0x2b1c}, | |
1500 {0x2b50, 0x2b50}, | |
1501 {0x1f004, 0x1f004}, | |
1502 {0x1f0cf, 0x1f0cf}, | |
1503 {0x1f1e6, 0x1f1ff}, | |
1504 {0x1f300, 0x1f320}, | |
1505 {0x1f330, 0x1f335}, | |
1506 {0x1f337, 0x1f37c}, | |
1507 {0x1f380, 0x1f393}, | |
1508 {0x1f3a0, 0x1f3c4}, | |
1509 {0x1f3c6, 0x1f3ca}, | |
1510 {0x1f3e0, 0x1f3f0}, | |
1511 {0x1f400, 0x1f43e}, | |
1512 {0x1f440, 0x1f440}, | |
1513 {0x1f442, 0x1f4f7}, | |
1514 {0x1f4f9, 0x1f4fc}, | |
1515 {0x1f500, 0x1f53d}, | |
1516 {0x1f550, 0x1f567}, | |
1517 {0x1f5fb, 0x1f640}, | |
1518 {0x1f645, 0x1f64f}, | |
1519 {0x1f680, 0x1f6c5} | |
1520 }; | |
1521 | |
1580 if (c >= 0x100) | 1522 if (c >= 0x100) |
1581 { | 1523 { |
1582 #ifdef USE_WCHAR_FUNCTIONS | 1524 #ifdef USE_WCHAR_FUNCTIONS |
1583 /* | 1525 /* |
1584 * Assume the library function wcwidth() works better than our own | 1526 * Assume the library function wcwidth() works better than our own |
1594 if (!utf_printable(c)) | 1536 if (!utf_printable(c)) |
1595 return 6; /* unprintable, displays <xxxx> */ | 1537 return 6; /* unprintable, displays <xxxx> */ |
1596 if (intable(doublewidth, sizeof(doublewidth), c)) | 1538 if (intable(doublewidth, sizeof(doublewidth), c)) |
1597 return 2; | 1539 return 2; |
1598 #endif | 1540 #endif |
1599 if (p_emoji && intable(emoji_tab, sizeof(emoji_tab), c)) | 1541 if (p_emoji && intable(emoji_width, sizeof(emoji_width), c)) |
1600 return 2; | 1542 return 2; |
1601 } | 1543 } |
1602 | 1544 |
1603 /* Characters below 0x100 are influenced by 'isprint' option */ | 1545 /* Characters below 0x100 are influenced by 'isprint' option */ |
1604 else if (c >= 0x80 && !vim_isprintc(c)) | 1546 else if (c >= 0x80 && !vim_isprintc(c)) |
2672 {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ | 2614 {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ |
2673 {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ | 2615 {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ |
2674 {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ | 2616 {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ |
2675 {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ | 2617 {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ |
2676 }; | 2618 }; |
2619 | |
2620 /* Sorted list of non-overlapping intervals of all Emoji characters, | |
2621 * based on http://unicode.org/emoji/charts/emoji-list.html */ | |
2622 static struct interval emoji_all[] = | |
2623 { | |
2624 {0x203c, 0x203c}, | |
2625 {0x2049, 0x2049}, | |
2626 {0x2122, 0x2122}, | |
2627 {0x2139, 0x2139}, | |
2628 {0x2194, 0x2199}, | |
2629 {0x21a9, 0x21aa}, | |
2630 {0x231a, 0x231b}, | |
2631 {0x2328, 0x2328}, | |
2632 {0x23cf, 0x23cf}, | |
2633 {0x23e9, 0x23f3}, | |
2634 {0x24c2, 0x24c2}, | |
2635 {0x25aa, 0x25ab}, | |
2636 {0x25b6, 0x25b6}, | |
2637 {0x25c0, 0x25c0}, | |
2638 {0x25fb, 0x25fe}, | |
2639 {0x2600, 0x2604}, | |
2640 {0x260e, 0x260e}, | |
2641 {0x2611, 0x2611}, | |
2642 {0x2614, 0x2615}, | |
2643 {0x2618, 0x2618}, | |
2644 {0x261d, 0x261d}, | |
2645 {0x2620, 0x2620}, | |
2646 {0x2622, 0x2623}, | |
2647 {0x2626, 0x2626}, | |
2648 {0x262a, 0x262a}, | |
2649 {0x262e, 0x262f}, | |
2650 {0x2638, 0x263a}, | |
2651 {0x2648, 0x2653}, | |
2652 {0x2660, 0x2660}, | |
2653 {0x2663, 0x2663}, | |
2654 {0x2665, 0x2666}, | |
2655 {0x2668, 0x2668}, | |
2656 {0x267b, 0x267b}, | |
2657 {0x267f, 0x267f}, | |
2658 {0x2692, 0x2694}, | |
2659 {0x2696, 0x2697}, | |
2660 {0x2699, 0x2699}, | |
2661 {0x269b, 0x269c}, | |
2662 {0x26a0, 0x26a1}, | |
2663 {0x26aa, 0x26ab}, | |
2664 {0x26b0, 0x26b1}, | |
2665 {0x26bd, 0x26be}, | |
2666 {0x26c4, 0x26c5}, | |
2667 {0x26c8, 0x26c8}, | |
2668 {0x26ce, 0x26cf}, | |
2669 {0x26d1, 0x26d1}, | |
2670 {0x26d3, 0x26d4}, | |
2671 {0x26e9, 0x26ea}, | |
2672 {0x26f0, 0x26f5}, | |
2673 {0x26f7, 0x26fa}, | |
2674 {0x26fd, 0x26fd}, | |
2675 {0x2702, 0x2702}, | |
2676 {0x2705, 0x2705}, | |
2677 {0x2708, 0x270d}, | |
2678 {0x270f, 0x270f}, | |
2679 {0x2712, 0x2712}, | |
2680 {0x2714, 0x2714}, | |
2681 {0x2716, 0x2716}, | |
2682 {0x271d, 0x271d}, | |
2683 {0x2721, 0x2721}, | |
2684 {0x2728, 0x2728}, | |
2685 {0x2733, 0x2734}, | |
2686 {0x2744, 0x2744}, | |
2687 {0x2747, 0x2747}, | |
2688 {0x274c, 0x274c}, | |
2689 {0x274e, 0x274e}, | |
2690 {0x2753, 0x2755}, | |
2691 {0x2757, 0x2757}, | |
2692 {0x2763, 0x2764}, | |
2693 {0x2795, 0x2797}, | |
2694 {0x27a1, 0x27a1}, | |
2695 {0x27b0, 0x27b0}, | |
2696 {0x27bf, 0x27bf}, | |
2697 {0x2934, 0x2935}, | |
2698 {0x2b05, 0x2b07}, | |
2699 {0x2b1b, 0x2b1c}, | |
2700 {0x2b50, 0x2b50}, | |
2701 {0x2b55, 0x2b55}, | |
2702 {0x3030, 0x3030}, | |
2703 {0x303d, 0x303d}, | |
2704 {0x3297, 0x3297}, | |
2705 {0x3299, 0x3299}, | |
2706 {0x1f004, 0x1f004}, | |
2707 {0x1f0cf, 0x1f0cf}, | |
2708 {0x1f170, 0x1f171}, | |
2709 {0x1f17e, 0x1f17f}, | |
2710 {0x1f18e, 0x1f18e}, | |
2711 {0x1f191, 0x1f19a}, | |
2712 {0x1f1e6, 0x1f1ff}, | |
2713 {0x1f201, 0x1f202}, | |
2714 {0x1f21a, 0x1f21a}, | |
2715 {0x1f22f, 0x1f22f}, | |
2716 {0x1f232, 0x1f23a}, | |
2717 {0x1f250, 0x1f251}, | |
2718 {0x1f300, 0x1f320}, | |
2719 {0x1f330, 0x1f335}, | |
2720 {0x1f337, 0x1f37c}, | |
2721 {0x1f380, 0x1f393}, | |
2722 {0x1f3a0, 0x1f3c4}, | |
2723 {0x1f3c6, 0x1f3ca}, | |
2724 {0x1f3e0, 0x1f3f0}, | |
2725 {0x1f400, 0x1f43e}, | |
2726 {0x1f440, 0x1f440}, | |
2727 {0x1f442, 0x1f4f7}, | |
2728 {0x1f4f9, 0x1f4fc}, | |
2729 {0x1f500, 0x1f53d}, | |
2730 {0x1f550, 0x1f567}, | |
2731 {0x1f5fb, 0x1f640}, | |
2732 {0x1f645, 0x1f64f}, | |
2733 {0x1f680, 0x1f6c5} | |
2734 }; | |
2735 | |
2677 int bot = 0; | 2736 int bot = 0; |
2678 int top = sizeof(classes) / sizeof(struct clinterval) - 1; | 2737 int top = sizeof(classes) / sizeof(struct clinterval) - 1; |
2679 int mid; | 2738 int mid; |
2680 | 2739 |
2681 /* First quick check for Latin1 characters, use 'iskeyword'. */ | 2740 /* First quick check for Latin1 characters, use 'iskeyword'. */ |
2699 else | 2758 else |
2700 return (int)classes[mid].class; | 2759 return (int)classes[mid].class; |
2701 } | 2760 } |
2702 | 2761 |
2703 /* emoji */ | 2762 /* emoji */ |
2704 if (intable(emoji_tab, sizeof(emoji_tab), c)) | 2763 if (intable(emoji_all, sizeof(emoji_all), c)) |
2705 return 3; | 2764 return 3; |
2706 | 2765 |
2707 /* most other characters are "word" characters */ | 2766 /* most other characters are "word" characters */ |
2708 return 2; | 2767 return 2; |
2709 } | 2768 } |