changeset 23380:2351b40af967 v8.2.2233

patch 8.2.2233: cannot convert a byte index into a character index Commit: https://github.com/vim/vim/commit/17793ef23aae0bc94539390ccfe5e63b0ad39ff2 Author: Bram Moolenaar <Bram@vim.org> Date: Mon Dec 28 12:56:58 2020 +0100 patch 8.2.2233: cannot convert a byte index into a character index Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes https://github.com/vim/vim/issues/7561)
author Bram Moolenaar <Bram@vim.org>
date Mon, 28 Dec 2020 13:00:07 +0100
parents 200f510d1b72
children 9d640db3536e
files runtime/doc/eval.txt runtime/doc/usr_41.txt src/evalfunc.c src/testdir/test_functions.vim src/version.c
diffstat 5 files changed, 109 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -2475,6 +2475,8 @@ ch_status({handle} [, {options}])
 changenr()			Number	current change number
 char2nr({expr} [, {utf8}])	Number	ASCII/UTF8 value of first char in {expr}
 charclass({string})		Number	character class of {string}
+charidx({string}, {idx} [, {countcc}])
+				Number  char index of byte {idx} in {string}
 chdir({dir})			String	change current working directory
 cindent({lnum})			Number	C indent for line {lnum}
 clearmatches([{win}])		none	clear all matches
@@ -3588,6 +3590,31 @@ charclass({string})					*charclass()*
 			other	specific Unicode class
 		The class is used in patterns and word motions.
 
+							*charidx()*
+charidx({string}, {idx} [, {countcc}])
+		Return the character index of the byte at {idx} in {string}.
+		The index of the first character is zero.
+		If there are no multibyte characters the returned value is
+		equal to {idx}.
+		When {countcc} is omitted or zero, then composing characters
+		are not counted separately, their byte length is added to the
+		preceding base character.
+		When {countcc} is set to 1, then composing characters are
+		counted as separate characters.
+		Returns -1 if the arguments are invalid or if {idx} is greater
+		than the index of the last byte in {string}.  An error is
+		given if the first argument is not a string, the second
+		argument is not a number or when the third argument is present
+		and is not zero or one.
+		See |byteidx()| and |byteidxcomp()| for getting the byte index
+		from the character index.
+		Examples: >
+			echo charidx('áb́ć', 3)		returns 1
+			echo charidx('áb́ć', 6, 1)	returns 4
+			echo charidx('áb́ć', 16)		returns -1
+<
+		Can also be used as a |method|: >
+			GetName()->charidx(idx)
 
 chdir({dir})						*chdir()*
 		Change the current working directory to {dir}.  The scope of
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -625,6 +625,7 @@ String manipulation:					*string-functio
 	iconv()			convert text from one encoding to another
 	byteidx()		byte index of a character in a string
 	byteidxcomp()		like byteidx() but count composing characters
+	charidx()		character index of a byte in a string
 	repeat()		repeat a string multiple times
 	eval()			evaluate a string expression
 	execute()		execute an Ex command and get the output
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -47,6 +47,7 @@ static void f_ceil(typval_T *argvars, ty
 #endif
 static void f_changenr(typval_T *argvars, typval_T *rettv);
 static void f_char2nr(typval_T *argvars, typval_T *rettv);
+static void f_charidx(typval_T *argvars, typval_T *rettv);
 static void f_col(typval_T *argvars, typval_T *rettv);
 static void f_confirm(typval_T *argvars, typval_T *rettv);
 static void f_copy(typval_T *argvars, typval_T *rettv);
@@ -789,6 +790,8 @@ static funcentry_T global_functions[] =
 			ret_number,	    f_char2nr},
     {"charclass",	1, 1, FEARG_1,	    NULL,
 			ret_number,	    f_charclass},
+    {"charidx",		2, 3, FEARG_1,	    NULL,
+			ret_number,	    f_charidx},
     {"chdir",		1, 1, FEARG_1,	    NULL,
 			ret_string,	    f_chdir},
     {"cindent",		1, 1, FEARG_1,	    NULL,
@@ -2420,6 +2423,57 @@ f_char2nr(typval_T *argvars, typval_T *r
 	rettv->vval.v_number = tv_get_string(&argvars[0])[0];
 }
 
+/*
+ * "charidx()" function
+ */
+    static void
+f_charidx(typval_T *argvars, typval_T *rettv)
+{
+    char_u	*str;
+    varnumber_T	idx;
+    int		countcc = FALSE;
+    char_u	*p;
+    int		len;
+    int		(*ptr2len)(char_u *);
+
+    rettv->vval.v_number = -1;
+
+    if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
+	    || (argvars[2].v_type != VAR_UNKNOWN
+					   && argvars[2].v_type != VAR_NUMBER))
+    {
+	emsg(_(e_invarg));
+	return;
+    }
+
+    str = tv_get_string_chk(&argvars[0]);
+    idx = tv_get_number_chk(&argvars[1], NULL);
+    if (str == NULL || idx < 0)
+	return;
+
+    if (argvars[2].v_type != VAR_UNKNOWN)
+	countcc = (int)tv_get_bool(&argvars[2]);
+    if (countcc < 0 || countcc > 1)
+    {
+	semsg(_(e_using_number_as_bool_nr), countcc);
+	return;
+    }
+
+    if (enc_utf8 && countcc)
+	ptr2len = utf_ptr2len;
+    else
+	ptr2len = mb_ptr2len;
+
+    for (p = str, len = 0; p <= str + idx; len++)
+    {
+	if (*p == NUL)
+	    return;
+	p += ptr2len(p);
+    }
+
+    rettv->vval.v_number = len > 0 ? len - 1 : 0;
+}
+
     win_T *
 get_optional_window(typval_T *argvars, int idx)
 {
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -1132,6 +1132,31 @@ func Test_byteidx()
   call assert_fails("call byteidxcomp([], 0)", 'E730:')
 endfunc
 
+" Test for charidx()
+func Test_charidx()
+  let a = 'xáb́y'
+  call assert_equal(0, charidx(a, 0))
+  call assert_equal(1, charidx(a, 3))
+  call assert_equal(2, charidx(a, 4))
+  call assert_equal(3, charidx(a, 7))
+  call assert_equal(-1, charidx(a, 8))
+  call assert_equal(-1, charidx('', 0))
+
+  " count composing characters
+  call assert_equal(0, charidx(a, 0, 1))
+  call assert_equal(2, charidx(a, 2, 1))
+  call assert_equal(3, charidx(a, 4, 1))
+  call assert_equal(5, charidx(a, 7, 1))
+  call assert_equal(-1, charidx(a, 8, 1))
+  call assert_equal(-1, charidx('', 0, 1))
+
+  call assert_fails('let x = charidx([], 1)', 'E474:')
+  call assert_fails('let x = charidx("abc", [])', 'E474:')
+  call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
+  call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:')
+  call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:')
+endfunc
+
 func Test_count()
   let l = ['a', 'a', 'A', 'b']
   call assert_equal(2, count(l, 'a'))
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2233,
+/**/
     2232,
 /**/
     2231,