# HG changeset patch # User Bram Moolenaar # Date 1615748402 -3600 # Node ID 512f48dc7100785a09da7e120aa82059ce9b18e8 # Parent 81f596485c08c4d6d1a99186cefca2a438945dc2 patch 8.2.2607: strcharpart() cannot include composing characters Commit: https://github.com/vim/vim/commit/02b4d9b18a03549b68e364e428392b7a62766c74 Author: Bram Moolenaar Date: Sun Mar 14 19:46:45 2021 +0100 patch 8.2.2607: strcharpart() cannot include composing characters Problem: strcharpart() cannot include composing characters. Solution: Add the {skipcc} argument. diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -1187,7 +1187,8 @@ byte under the cursor: > In Vim9 script: If expr8 is a String this results in a String that contains the expr1'th -single character from expr8. To use byte indexes use |strpart()|. +single character (including any composing characters) from expr8. To use byte +indexes use |strpart()|. Index zero gives the first byte or character. Careful: text column numbers start with one! @@ -1217,8 +1218,9 @@ In legacy Vim script the indexes are byt multibyte encodings, see |byteidx()| for computing the indexes. If expr8 is a Number it is first converted to a String. -In Vim9 script the indexes are character indexes. To use byte indexes use -|strpart()|. +In Vim9 script the indexes are character indexes and include composing +characters. To use byte indexes use |strpart()|. To use character indexes +without including composing characters use |strcharpart()|. The item at index expr1b is included, it is inclusive. For an exclusive index use the |slice()| function. @@ -2924,7 +2926,7 @@ str2list({expr} [, {utf8}]) List convert str2nr({expr} [, {base} [, {quoted}]]) Number convert String to Number strcharlen({expr}) Number character length of the String {expr} -strcharpart({str}, {start} [, {len}]) +strcharpart({str}, {start} [, {len} [, {skipcc}]]) String {len} characters of {str} at character {start} strchars({expr} [, {skipcc}]) Number character count of the String {expr} @@ -9919,7 +9921,7 @@ slice({expr}, {start} [, {end}]) *slic Similar to using a |slice| "expr[start : end]", but "end" is used exclusive. And for a string the indexes are used as character indexes instead of byte indexes, like in - |vim9script|. + |vim9script|. Also, composing characters are not counted. When {end} is omitted the slice continues to the last item. When {end} is -1 the last item is omitted. @@ -10290,12 +10292,16 @@ strcharlen({expr}) *strcharlen()* GetText()->strcharlen() -strcharpart({src}, {start} [, {len}]) *strcharpart()* +strcharpart({src}, {start} [, {len} [, {skipcc}]]) *strcharpart()* Like |strpart()| but using character index and length instead - of byte index and length. Composing characters are counted - separately. + of byte index and length. + When {skipcc} is omitted or zero, composing characters are + counted separately. + When {skipcc} set to 1, Composing characters are ignored, + similar to |slice()|. When a character index is used where a character does not - exist it is assumed to be one character. For example: > + exist it is omitted and counted as one character. For + example: > strcharpart('abc', -1, 2) < results in 'a'. @@ -10309,7 +10315,7 @@ strchars({expr} [, {skipcc}]) *strch When {skipcc} is omitted or zero, composing characters are counted separately. When {skipcc} set to 1, Composing characters are ignored. - |strcharlen()| does the same. + |strcharlen()| always does this. Also see |strlen()|, |strdisplaywidth()| and |strwidth()|. diff --git a/src/evalfunc.c b/src/evalfunc.c --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -1575,7 +1575,7 @@ static funcentry_T global_functions[] = ret_number, f_str2nr}, {"strcharlen", 1, 1, FEARG_1, NULL, ret_number, f_strcharlen}, - {"strcharpart", 2, 3, FEARG_1, NULL, + {"strcharpart", 2, 4, FEARG_1, NULL, ret_string, f_strcharpart}, {"strchars", 1, 2, FEARG_1, NULL, ret_number, f_strchars}, @@ -9316,6 +9316,7 @@ f_strcharpart(typval_T *argvars, typval_ int nchar; int nbyte = 0; int charlen; + int skipcc = FALSE; int len = 0; int slen; int error = FALSE; @@ -9326,10 +9327,24 @@ f_strcharpart(typval_T *argvars, typval_ nchar = (int)tv_get_number_chk(&argvars[1], &error); if (!error) { + if (argvars[2].v_type != VAR_UNKNOWN + && argvars[3].v_type != VAR_UNKNOWN) + { + skipcc = tv_get_bool(&argvars[3]); + if (skipcc < 0 || skipcc > 1) + { + semsg(_(e_using_number_as_bool_nr), skipcc); + return; + } + } + if (nchar > 0) while (nchar > 0 && nbyte < slen) { - nbyte += MB_CPTR2LEN(p + nbyte); + if (skipcc) + nbyte += mb_ptr2len(p + nbyte); + else + nbyte += MB_CPTR2LEN(p + nbyte); --nchar; } else @@ -9344,7 +9359,12 @@ f_strcharpart(typval_T *argvars, typval_ if (off < 0) len += 1; else - len += MB_CPTR2LEN(p + off); + { + if (skipcc) + len += mb_ptr2len(p + off); + else + len += MB_CPTR2LEN(p + off); + } --charlen; } } diff --git a/src/testdir/test_expr_utf8.vim b/src/testdir/test_expr_utf8.vim --- a/src/testdir/test_expr_utf8.vim +++ b/src/testdir/test_expr_utf8.vim @@ -31,6 +31,14 @@ func Test_strcharpart() call assert_equal('a', strcharpart('àxb', 0, 1)) call assert_equal('̀', strcharpart('àxb', 1, 1)) call assert_equal('x', strcharpart('àxb', 2, 1)) + + + call assert_equal('a', strcharpart('àxb', 0, 1, 0)) + call assert_equal('à', strcharpart('àxb', 0, 1, 1)) + call assert_equal('x', strcharpart('àxb', 1, 1, 1)) + + call assert_fails("let v = strcharpart('abc', 0, 0, [])", 'E745:') + call assert_fails("let v = strcharpart('abc', 0, 0, 2)", 'E1023:') endfunc " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -751,6 +751,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 2607, +/**/ 2606, /**/ 2605,