# HG changeset patch # User Bram Moolenaar # Date 1598646604 -7200 # Node ID 85add08e6a2d32b16b92e0a8cf30cb8f060ff90d # Parent 10438c4900d1a48a917cf1678043205456068265 patch 8.2.1536: cannot get the class of a character; emoji widths are wrong Commit: https://github.com/vim/vim/commit/4e4473c927167fd24e5c8df90e0e8035080cf2da Author: Bram Moolenaar Date: Fri Aug 28 22:24:57 2020 +0200 patch 8.2.1536: cannot get the class of a character; emoji widths are wrong Problem: Cannot get the class of a character; emoji widths are wrong in some environments. Solution: Add charclass(). Update some emoji widths. Add script to check emoji widths. diff --git a/Filelist b/Filelist --- a/Filelist +++ b/Filelist @@ -197,6 +197,7 @@ SRC_ALL = \ src/testdir/samples/*.txt \ src/testdir/samples/test000 \ src/testdir/color_ramp.vim \ + src/testdir/emoji_list.vim \ src/testdir/silent.wav \ src/testdir/popupbounce.vim \ src/proto.h \ diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -2425,6 +2425,7 @@ ch_status({handle} [, {options}]) String status of channel {handle} changenr() Number current change number char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr} +charclass({string}) Number character class of {string} chdir({dir}) String change current working directory cindent({lnum}) Number C indent for line {lnum} clearmatches([{win}]) none clear all matches @@ -3520,6 +3521,18 @@ char2nr({expr} [, {utf8}]) *char2nr( Can also be used as a |method|: > GetChar()->char2nr() + +charclass({string}) *charclass()* + Return the character class of the first character in {string}. + The character class is one of: + 0 blank + 1 punctuation + 2 word character + 3 emoji + other specific Unicode class + The class is used in patterns and word motions. + + chdir({dir}) *chdir()* Change the current working directory to {dir}. The scope of the directory change depends on the directory of the current diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -600,6 +600,7 @@ String manipulation: *string-functio strtrans() translate a string to make it printable tolower() turn a string to lowercase toupper() turn a string to uppercase + charclass() class of a character match() position where a pattern matches in a string matchend() position where a pattern match ends in a string matchstr() match of a pattern in a string diff --git a/src/evalfunc.c b/src/evalfunc.c --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -564,6 +564,7 @@ static funcentry_T global_functions[] = {"ch_status", 1, 2, FEARG_1, ret_string, JOB_FUNC(f_ch_status)}, {"changenr", 0, 0, 0, ret_number, f_changenr}, {"char2nr", 1, 2, FEARG_1, ret_number, f_char2nr}, + {"charclass", 1, 1, FEARG_1, ret_number, f_charclass}, {"chdir", 1, 1, FEARG_1, ret_string, f_chdir}, {"cindent", 1, 1, FEARG_1, ret_number, f_cindent}, {"clearmatches", 0, 1, FEARG_1, ret_void, f_clearmatches}, diff --git a/src/mbyte.c b/src/mbyte.c --- a/src/mbyte.c +++ b/src/mbyte.c @@ -132,7 +132,9 @@ static int dbcs_char2cells(int c); static int dbcs_ptr2cells_len(char_u *p, int size); static int dbcs_ptr2char(char_u *p); static int dbcs_head_off(char_u *base, char_u *p); +#ifdef FEAT_EVAL static int cw_value(int c); +#endif /* * Lookup table to quickly get the length in bytes of a UTF-8 character from @@ -1388,8 +1390,7 @@ utf_char2cells(int c) {0x26ce, 0x26ce}, {0x26d4, 0x26d4}, {0x26ea, 0x26ea}, - {0x26f2, 0x26f3}, - {0x26f5, 0x26f5}, + {0x26f2, 0x26f5}, {0x26fa, 0x26fa}, {0x26fd, 0x26fd}, {0x2705, 0x2705}, @@ -1490,6 +1491,21 @@ utf_char2cells(int c) // based on http://unicode.org/emoji/charts/emoji-list.html static struct interval emoji_wide[] = { + {0x23ed, 0x23ef}, + {0x23f1, 0x23f2}, + {0x23f8, 0x23fa}, + {0x24c2, 0x24c2}, + {0x261d, 0x261d}, + {0x26c8, 0x26c8}, + {0x26cf, 0x26cf}, + {0x26d1, 0x26d1}, + {0x26d3, 0x26d3}, + {0x26e9, 0x26e9}, + {0x26f0, 0x26f1}, + {0x26f7, 0x26f9}, + {0x270c, 0x270d}, + {0x2934, 0x2935}, + {0x1f170, 0x1f189}, {0x1f1e6, 0x1f1ff}, {0x1f321, 0x1f321}, {0x1f324, 0x1f32c}, @@ -1533,11 +1549,15 @@ utf_char2cells(int c) if (c >= 0x100) { +#if defined(FEAT_EVAL) || defined(USE_WCHAR_FUNCTIONS) int n; - +#endif + +#ifdef FEAT_EVAL n = cw_value(c); if (n != 0) return n; +#endif #ifdef USE_WCHAR_FUNCTIONS /* @@ -2667,8 +2687,7 @@ static struct interval emoji_all[] = {0x3299, 0x3299}, {0x1f004, 0x1f004}, {0x1f0cf, 0x1f0cf}, - {0x1f170, 0x1f171}, - {0x1f17e, 0x1f17f}, + {0x1f170, 0x1f189}, {0x1f18e, 0x1f18e}, {0x1f191, 0x1f19a}, {0x1f1e6, 0x1f1ff}, @@ -2835,6 +2854,10 @@ utf_class_buf(int c, buf_T *buf) return 1; // punctuation } + // emoji + if (intable(emoji_all, sizeof(emoji_all), c)) + return 3; + // binary search in table while (top >= bot) { @@ -2847,10 +2870,6 @@ utf_class_buf(int c, buf_T *buf) return (int)classes[mid].class; } - // emoji - if (intable(emoji_all, sizeof(emoji_all), c)) - return 3; - // most other characters are "word" characters return 2; } @@ -5352,6 +5371,8 @@ string_convert_ext( return retval; } +#if defined(FEAT_EVAL) || defined(PROTO) + /* * Table set by setcellwidths(). */ @@ -5525,3 +5546,17 @@ f_setcellwidths(typval_T *argvars, typva cw_table = table; cw_table_size = l->lv_len; } + + void +f_charclass(typval_T *argvars, typval_T *rettv UNUSED) +{ + if (argvars[0].v_type != VAR_STRING + || argvars[0].vval.v_string == NULL + || *argvars[0].vval.v_string == NUL) + { + emsg(_(e_stringreq)); + return; + } + rettv->vval.v_number = mb_get_class(argvars[0].vval.v_string); +} +#endif diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro --- a/src/proto/mbyte.pro +++ b/src/proto/mbyte.pro @@ -85,4 +85,5 @@ int convert_input_safe(char_u *ptr, int char_u *string_convert(vimconv_T *vcp, char_u *ptr, int *lenp); char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp); void f_setcellwidths(typval_T *argvars, typval_T *rettv); +void f_charclass(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */ diff --git a/src/testdir/emoji_list.vim b/src/testdir/emoji_list.vim new file mode 100644 --- /dev/null +++ b/src/testdir/emoji_list.vim @@ -0,0 +1,22 @@ +" Script to fill the window with emoji characters, one per line. + +if &modified + new +else + enew +endif + +" Use a compiled Vim9 function for speed +def DoIt() + let lnum = 1 + for c in range(0x100, 0x1ffff) + let cs = nr2char(c) + if charclass(cs) == 3 + setline(lnum, '|' .. cs .. '| ' .. strwidth(cs)) + lnum += 1 + endif + endfor +enddef + +call DoIt() +set nomodified diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -2077,6 +2077,13 @@ func Test_char2nr() set encoding=utf-8 endfunc +func Test_charclass() + call assert_equal(0, charclass(' ')) + call assert_equal(1, charclass('.')) + call assert_equal(2, charclass('x')) + call assert_equal(3, charclass("\u203c")) +endfunc + func Test_eventhandler() call assert_equal(0, eventhandler()) endfunc diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -755,6 +755,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1536, +/**/ 1535, /**/ 1534,