Mercurial > vim
comparison src/mbyte.c @ 20695:cea8ae407452 v8.2.0901
patch 8.2.0901: formatting CJK text isn't optimal
Commit: https://github.com/vim/vim/commit/e52702f00322c8a8861efd0bd6a3775e685e5685
Author: Bram Moolenaar <Bram@vim.org>
Date: Thu Jun 4 18:22:13 2020 +0200
patch 8.2.0901: formatting CJK text isn't optimal
Problem: Formatting CJK text isn't optimal.
Solution: Properly break CJK lines. (closes https://github.com/vim/vim/issues/3875)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Thu, 04 Jun 2020 18:30:04 +0200 |
parents | 6c5b11458f31 |
children | 0bc43a704f56 |
comparison
equal
deleted
inserted
replaced
20694:3a049f4bdaa2 | 20695:cea8ae407452 |
---|---|
3841 | 3841 |
3842 return (int)(p - q); | 3842 return (int)(p - q); |
3843 } | 3843 } |
3844 | 3844 |
3845 /* | 3845 /* |
3846 * Whether space is NOT allowed before/after 'c'. | |
3847 */ | |
3848 int | |
3849 utf_eat_space(int cc) | |
3850 { | |
3851 return ((cc >= 0x2000 && cc <= 0x206F) // General punctuations | |
3852 || (cc >= 0x2e00 && cc <= 0x2e7f) // Supplemental punctuations | |
3853 || (cc >= 0x3000 && cc <= 0x303f) // CJK symbols and punctuations | |
3854 || (cc >= 0xff01 && cc <= 0xff0f) // Full width ASCII punctuations | |
3855 || (cc >= 0xff1a && cc <= 0xff20) // .. | |
3856 || (cc >= 0xff3b && cc <= 0xff40) // .. | |
3857 || (cc >= 0xff5b && cc <= 0xff65)); // .. | |
3858 } | |
3859 | |
3860 /* | |
3861 * Whether line break is allowed before "cc". | |
3862 */ | |
3863 int | |
3864 utf_allow_break_before(int cc) | |
3865 { | |
3866 static const int BOL_prohibition_punct[] = | |
3867 { | |
3868 '!', | |
3869 '%', | |
3870 ')', | |
3871 ',', | |
3872 ':', | |
3873 ';', | |
3874 '>', | |
3875 '?', | |
3876 ']', | |
3877 '}', | |
3878 0x2019, // ’ right single quotation mark | |
3879 0x201d, // ” right double quotation mark | |
3880 0x2020, // † dagger | |
3881 0x2021, // ‡ double dagger | |
3882 0x2026, // … horizontal ellipsis | |
3883 0x2030, // ‰ per mille sign | |
3884 0x2031, // ‱ per then thousand sign | |
3885 0x203c, // ‼ double exclamation mark | |
3886 0x2047, // ⁇ double question mark | |
3887 0x2048, // ⁈ question exclamation mark | |
3888 0x2049, // ⁉ exclamation question mark | |
3889 0x2103, // ℃ degree celsius | |
3890 0x2109, // ℉ degree fahrenheit | |
3891 0x3001, // 、 ideographic comma | |
3892 0x3002, // 。 ideographic full stop | |
3893 0x3009, // 〉 right angle bracket | |
3894 0x300b, // 》 right double angle bracket | |
3895 0x300d, // 」 right corner bracket | |
3896 0x300f, // 』 right white corner bracket | |
3897 0x3011, // 】 right black lenticular bracket | |
3898 0x3015, // 〕 right tortoise shell bracket | |
3899 0x3017, // 〗 right white lenticular bracket | |
3900 0x3019, // 〙 right white tortoise shell bracket | |
3901 0x301b, // 〛 right white square bracket | |
3902 0xff01, // ! fullwidth exclamation mark | |
3903 0xff09, // ) fullwidth right parenthesis | |
3904 0xff0c, // , fullwidth comma | |
3905 0xff0e, // . fullwidth full stop | |
3906 0xff1a, // : fullwidth colon | |
3907 0xff1b, // ; fullwidth semicolon | |
3908 0xff1f, // ? fullwidth question mark | |
3909 0xff3d, // ] fullwidth right square bracket | |
3910 0xff5d, // } fullwidth right curly bracket | |
3911 }; | |
3912 | |
3913 int first = 0; | |
3914 int last = sizeof(BOL_prohibition_punct)/sizeof(int) - 1; | |
3915 int mid = 0; | |
3916 | |
3917 while (first < last) | |
3918 { | |
3919 mid = (first + last)/2; | |
3920 | |
3921 if (cc == BOL_prohibition_punct[mid]) | |
3922 return FALSE; | |
3923 else if (cc > BOL_prohibition_punct[mid]) | |
3924 first = mid + 1; | |
3925 else | |
3926 last = mid - 1; | |
3927 } | |
3928 | |
3929 return cc != BOL_prohibition_punct[first]; | |
3930 } | |
3931 | |
3932 /* | |
3933 * Whether line break is allowed after "cc". | |
3934 */ | |
3935 static int | |
3936 utf_allow_break_after(int cc) | |
3937 { | |
3938 static const int EOL_prohibition_punct[] = | |
3939 { | |
3940 '(', | |
3941 '<', | |
3942 '[', | |
3943 '`', | |
3944 '{', | |
3945 //0x2014, // — em dash | |
3946 0x2018, // ‘ left single quotation mark | |
3947 0x201c, // “ left double quotation mark | |
3948 //0x2053, // ~ swung dash | |
3949 0x3008, // 〈 left angle bracket | |
3950 0x300a, // 《 left double angle bracket | |
3951 0x300c, // 「 left corner bracket | |
3952 0x300e, // 『 left white corner bracket | |
3953 0x3010, // 【 left black lenticular bracket | |
3954 0x3014, // 〔 left tortoise shell bracket | |
3955 0x3016, // 〖 left white lenticular bracket | |
3956 0x3018, // 〘 left white tortoise shell bracket | |
3957 0x301a, // 〚 left white square bracket | |
3958 0xff08, // ( fullwidth left parenthesis | |
3959 0xff3b, // [ fullwidth left square bracket | |
3960 0xff5b, // { fullwidth left curly bracket | |
3961 }; | |
3962 | |
3963 int first = 0; | |
3964 int last = sizeof(EOL_prohibition_punct)/sizeof(int) - 1; | |
3965 int mid = 0; | |
3966 | |
3967 while (first < last) | |
3968 { | |
3969 mid = (first + last)/2; | |
3970 | |
3971 if (cc == EOL_prohibition_punct[mid]) | |
3972 return FALSE; | |
3973 else if (cc > EOL_prohibition_punct[mid]) | |
3974 first = mid + 1; | |
3975 else | |
3976 last = mid - 1; | |
3977 } | |
3978 | |
3979 return cc != EOL_prohibition_punct[first]; | |
3980 } | |
3981 | |
3982 /* | |
3983 * Whether line break is allowed between "cc" and "ncc". | |
3984 */ | |
3985 int | |
3986 utf_allow_break(int cc, int ncc) | |
3987 { | |
3988 // don't break between two-letter punctuations | |
3989 if (cc == ncc | |
3990 && (cc == 0x2014 // em dash | |
3991 || cc == 0x2026)) // horizontal ellipsis | |
3992 return FALSE; | |
3993 | |
3994 return utf_allow_break_after(cc) && utf_allow_break_before(ncc); | |
3995 } | |
3996 | |
3997 /* | |
3846 * Copy a character from "*fp" to "*tp" and advance the pointers. | 3998 * Copy a character from "*fp" to "*tp" and advance the pointers. |
3847 */ | 3999 */ |
3848 void | 4000 void |
3849 mb_copy_char(char_u **fp, char_u **tp) | 4001 mb_copy_char(char_u **fp, char_u **tp) |
3850 { | 4002 { |