changeset 24132:512f48dc7100 v8.2.2607

patch 8.2.2607: strcharpart() cannot include composing characters Commit: https://github.com/vim/vim/commit/02b4d9b18a03549b68e364e428392b7a62766c74 Author: Bram Moolenaar <Bram@vim.org> Date: Sun Mar 14 19:46:45 2021 +0100 patch 8.2.2607: strcharpart() cannot include composing characters Problem: strcharpart() cannot include composing characters. Solution: Add the {skipcc} argument.
author Bram Moolenaar <Bram@vim.org>
date Sun, 14 Mar 2021 20:00:02 +0100
parents 81f596485c08
children e06b3799d913
files runtime/doc/eval.txt src/evalfunc.c src/testdir/test_expr_utf8.vim src/version.c
diffstat 4 files changed, 49 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -1187,7 +1187,8 @@ byte under the cursor: >
 
 In Vim9 script:
 If expr8 is a String this results in a String that contains the expr1'th
-single character from expr8.  To use byte indexes use |strpart()|.
+single character (including any composing characters) from expr8.  To use byte
+indexes use |strpart()|.
 
 Index zero gives the first byte or character.  Careful: text column numbers
 start with one!
@@ -1217,8 +1218,9 @@ In legacy Vim script the indexes are byt
 multibyte encodings, see |byteidx()| for computing the indexes.  If expr8 is
 a Number it is first converted to a String.
 
-In Vim9 script the indexes are character indexes.  To use byte indexes use
-|strpart()|.
+In Vim9 script the indexes are character indexes and include composing
+characters.  To use byte indexes use |strpart()|.  To use character indexes
+without including composing characters use |strcharpart()|.
 
 The item at index expr1b is included, it is inclusive.  For an exclusive index
 use the |slice()| function.
@@ -2924,7 +2926,7 @@ str2list({expr} [, {utf8}])	List	convert
 str2nr({expr} [, {base} [, {quoted}]])
 				Number	convert String to Number
 strcharlen({expr})		Number	character length of the String {expr}
-strcharpart({str}, {start} [, {len}])
+strcharpart({str}, {start} [, {len} [, {skipcc}]])
 				String	{len} characters of {str} at
 					character {start}
 strchars({expr} [, {skipcc}])	Number	character count of the String {expr}
@@ -9919,7 +9921,7 @@ slice({expr}, {start} [, {end}])			*slic
 		Similar to using a |slice| "expr[start : end]", but "end" is
 		used exclusive.  And for a string the indexes are used as
 		character indexes instead of byte indexes, like in
-		|vim9script|.
+		|vim9script|.  Also, composing characters are not counted.
 		When {end} is omitted the slice continues to the last item.
 		When {end} is -1 the last item is omitted.
 
@@ -10290,12 +10292,16 @@ strcharlen({expr})					*strcharlen()*
 			GetText()->strcharlen()
 
 
-strcharpart({src}, {start} [, {len}])			*strcharpart()*
+strcharpart({src}, {start} [, {len} [, {skipcc}]])		*strcharpart()*
 		Like |strpart()| but using character index and length instead
-		of byte index and length.  Composing characters are counted
-		separately.
+		of byte index and length.
+		When {skipcc} is omitted or zero, composing characters are
+		counted separately.
+		When {skipcc} set to 1, Composing characters are ignored,
+		similar to  |slice()|.
 		When a character index is used where a character does not
-		exist it is assumed to be one character.  For example: >
+		exist it is omitted and counted as one character.  For
+		example: >
 			strcharpart('abc', -1, 2)
 <		results in 'a'.
 
@@ -10309,7 +10315,7 @@ strchars({expr} [, {skipcc}])					*strch
 		When {skipcc} is omitted or zero, composing characters are
 		counted separately.
 		When {skipcc} set to 1, Composing characters are ignored.
-		|strcharlen()| does the same.
+		|strcharlen()| always does this.
 
 		Also see |strlen()|, |strdisplaywidth()| and |strwidth()|.
 
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -1575,7 +1575,7 @@ static funcentry_T global_functions[] =
 			ret_number,	    f_str2nr},
     {"strcharlen",	1, 1, FEARG_1,	    NULL,
 			ret_number,	    f_strcharlen},
-    {"strcharpart",	2, 3, FEARG_1,	    NULL,
+    {"strcharpart",	2, 4, FEARG_1,	    NULL,
 			ret_string,	    f_strcharpart},
     {"strchars",	1, 2, FEARG_1,	    NULL,
 			ret_number,	    f_strchars},
@@ -9316,6 +9316,7 @@ f_strcharpart(typval_T *argvars, typval_
     int		nchar;
     int		nbyte = 0;
     int		charlen;
+    int		skipcc = FALSE;
     int		len = 0;
     int		slen;
     int		error = FALSE;
@@ -9326,10 +9327,24 @@ f_strcharpart(typval_T *argvars, typval_
     nchar = (int)tv_get_number_chk(&argvars[1], &error);
     if (!error)
     {
+	if (argvars[2].v_type != VAR_UNKNOWN
+					   && argvars[3].v_type != VAR_UNKNOWN)
+	{
+	    skipcc = tv_get_bool(&argvars[3]);
+	    if (skipcc < 0 || skipcc > 1)
+	    {
+		semsg(_(e_using_number_as_bool_nr), skipcc);
+		return;
+	    }
+	}
+
 	if (nchar > 0)
 	    while (nchar > 0 && nbyte < slen)
 	    {
-		nbyte += MB_CPTR2LEN(p + nbyte);
+		if (skipcc)
+		    nbyte += mb_ptr2len(p + nbyte);
+		else
+		    nbyte += MB_CPTR2LEN(p + nbyte);
 		--nchar;
 	    }
 	else
@@ -9344,7 +9359,12 @@ f_strcharpart(typval_T *argvars, typval_
 		if (off < 0)
 		    len += 1;
 		else
-		    len += MB_CPTR2LEN(p + off);
+		{
+		    if (skipcc)
+			len += mb_ptr2len(p + off);
+		    else
+			len += MB_CPTR2LEN(p + off);
+		}
 		--charlen;
 	    }
 	}
--- a/src/testdir/test_expr_utf8.vim
+++ b/src/testdir/test_expr_utf8.vim
@@ -31,6 +31,14 @@ func Test_strcharpart()
   call assert_equal('a', strcharpart('àxb', 0, 1))
   call assert_equal('̀', strcharpart('àxb', 1, 1))
   call assert_equal('x', strcharpart('àxb', 2, 1))
+
+
+  call assert_equal('a', strcharpart('àxb', 0, 1, 0))
+  call assert_equal('à', strcharpart('àxb', 0, 1, 1))
+  call assert_equal('x', strcharpart('àxb', 1, 1, 1))
+
+  call assert_fails("let v = strcharpart('abc', 0, 0, [])", 'E745:')
+  call assert_fails("let v = strcharpart('abc', 0, 0, 2)", 'E1023:')
 endfunc
 
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2607,
+/**/
     2606,
 /**/
     2605,