changeset 5413:3109053ce4e3 v7.4.057

updated for version 7.4.057 Problem: byteidx() does not work for composing characters. Solution: Add byteidxcomp().
author Bram Moolenaar <bram@vim.org>
date Sat, 02 Nov 2013 23:29:26 +0100
parents 5126b80647a2
children 68331356e02e
files runtime/doc/eval.txt src/eval.c src/testdir/test69.in src/testdir/test69.ok src/version.c
diffstat 5 files changed, 79 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -1713,6 +1713,7 @@ bufnr( {expr})			Number	Number of the bu
 bufwinnr( {expr})		Number	window number of buffer {expr}
 byte2line( {byte})		Number	line number at byte count {byte}
 byteidx( {expr}, {nr})		Number	byte index of {nr}'th char in {expr}
+byteidxcomp( {expr}, {nr})	Number	byte index of {nr}'th char in {expr}
 call( {func}, {arglist} [, {dict}])
 				any	call {func} with arguments {arglist}
 ceil( {expr})			Float	round {expr} up
@@ -2261,7 +2262,10 @@ byteidx({expr}, {nr})					*byteidx()*
 		{expr}.  Use zero for the first character, it returns zero.
 		This function is only useful when there are multibyte
 		characters, otherwise the returned value is equal to {nr}.
-		Composing characters are counted as a separate character.
+		Composing characters are not counted separately, their byte
+		length is added to the preceding base character.  See
+		|byteidxcomp()| below for counting composing characters
+		separately.
 		Example : >
 			echo matchstr(str, ".", byteidx(str, 3))
 <		will display the fourth character.  Another way to do the
@@ -2270,7 +2274,20 @@ byteidx({expr}, {nr})					*byteidx()*
 			echo strpart(s, 0, byteidx(s, 1))
 <		If there are less than {nr} characters -1 is returned.
 		If there are exactly {nr} characters the length of the string
-		is returned.
+		in bytes is returned.
+
+byteidxcomp({expr}, {nr})					*byteidxcomp()*
+		Like byteidx(), except that a composing character is counted
+		as a separate character.  Example: >
+			let s = 'e' . nr2char(0x301)
+			echo byteidx(s, 1)
+			echo byteidxcomp(s, 1)
+			echo byteidxcomp(s, 2)
+<		The first and third echo result in 3 ('e' plus composing
+		character is 3 bytes), the second echo results in 1 ('e' is
+		one byte).
+		Only works different from byteidx() when 'encoding' is set to
+		a Unicode encoding.
 
 call({func}, {arglist} [, {dict}])			*call()* *E699*
 		Call function {func} with the items in |List| {arglist} as
--- a/src/eval.c
+++ b/src/eval.c
@@ -474,7 +474,9 @@ static void f_bufname __ARGS((typval_T *
 static void f_bufnr __ARGS((typval_T *argvars, typval_T *rettv));
 static void f_bufwinnr __ARGS((typval_T *argvars, typval_T *rettv));
 static void f_byte2line __ARGS((typval_T *argvars, typval_T *rettv));
+static void byteidx __ARGS((typval_T *argvars, typval_T *rettv, int comp));
 static void f_byteidx __ARGS((typval_T *argvars, typval_T *rettv));
+static void f_byteidxcomp __ARGS((typval_T *argvars, typval_T *rettv));
 static void f_call __ARGS((typval_T *argvars, typval_T *rettv));
 #ifdef FEAT_FLOAT
 static void f_ceil __ARGS((typval_T *argvars, typval_T *rettv));
@@ -7861,6 +7863,7 @@ static struct fst
     {"bufwinnr",	1, 1, f_bufwinnr},
     {"byte2line",	1, 1, f_byte2line},
     {"byteidx",		2, 2, f_byteidx},
+    {"byteidxcomp",	2, 2, f_byteidxcomp},
     {"call",		2, 3, f_call},
 #ifdef FEAT_FLOAT
     {"ceil",		1, 1, f_ceil},
@@ -9177,13 +9180,11 @@ f_byte2line(argvars, rettv)
 #endif
 }
 
-/*
- * "byteidx()" function
- */
-    static void
-f_byteidx(argvars, rettv)
-    typval_T	*argvars;
-    typval_T	*rettv;
+    static void
+byteidx(argvars, rettv, comp)
+    typval_T	*argvars;
+    typval_T	*rettv;
+    int		comp;
 {
 #ifdef FEAT_MBYTE
     char_u	*t;
@@ -9203,7 +9204,10 @@ f_byteidx(argvars, rettv)
     {
 	if (*t == NUL)		/* EOL reached */
 	    return;
-	t += (*mb_ptr2len)(t);
+	if (enc_utf8 && comp)
+	    t += utf_ptr2len(t);
+	else
+	    t += (*mb_ptr2len)(t);
     }
     rettv->vval.v_number = (varnumber_T)(t - str);
 #else
@@ -9212,6 +9216,28 @@ f_byteidx(argvars, rettv)
 #endif
 }
 
+/*
+ * "byteidx()" function
+ */
+    static void
+f_byteidx(argvars, rettv)
+    typval_T	*argvars;
+    typval_T	*rettv;
+{
+    byteidx(argvars, rettv, FALSE);
+}
+
+/*
+ * "byteidxcomp()" function
+ */
+    static void
+f_byteidxcomp(argvars, rettv)
+    typval_T	*argvars;
+    typval_T	*rettv;
+{
+    byteidx(argvars, rettv, TRUE);
+}
+
     int
 func_call(name, args, selfdict, rettv)
     char_u	*name;
--- a/src/testdir/test69.in
+++ b/src/testdir/test69.in
@@ -1,6 +1,7 @@
 Test for multi-byte text formatting.
 Also test, that 'mps' with multibyte chars works.
 And test "ra" on multi-byte characters.
+Also test byteidx() and byteidxcomp()
 
 STARTTEST
 :so mbyte.vim
@@ -154,6 +155,21 @@ ra test
 aab
 
 STARTTEST
+:let a = '.é.' " one char of two bytes
+:let b = '.é.' " normal e with composing char
+/^byteidx
+:put =string([byteidx(a, 0), byteidx(a, 1), byteidx(a, 2), byteidx(a, 3), byteidx(a, 4)])
+:put =string([byteidx(b, 0), byteidx(b, 1), byteidx(b, 2), byteidx(b, 3), byteidx(b, 4)])
+/^byteidxcomp
+:put =string([byteidxcomp(a, 0), byteidxcomp(a, 1), byteidxcomp(a, 2), byteidxcomp(a, 3), byteidxcomp(a, 4)])
+:let b = '.é.'
+:put =string([byteidxcomp(b, 0), byteidxcomp(b, 1), byteidxcomp(b, 2), byteidxcomp(b, 3), byteidxcomp(b, 4), byteidxcomp(b, 5)])
+ENDTEST
+
+byteidx
+byteidxcomp
+
+STARTTEST
 :g/^STARTTEST/.,/^ENDTEST/d
 :1;/^Results/,$wq! test.out
 ENDTEST
--- a/src/testdir/test69.ok
+++ b/src/testdir/test69.ok
@@ -149,3 +149,11 @@ ra test
 aaaa
 aaa
 
+
+byteidx
+[0, 1, 3, 4, -1]
+[0, 1, 4, 5, -1]
+byteidxcomp
+[0, 1, 3, 4, -1]
+[0, 1, 2, 4, 5, -1]
+
--- a/src/version.c
+++ b/src/version.c
@@ -739,6 +739,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    57,
+/**/
     56,
 /**/
     55,