changeset 24128:fcbb1d4df15b v8.2.2605

patch 8.2.2605: Vim9: string index and slice does not include composing chars Commit: https://github.com/vim/vim/commit/0289a093a4d65c6280a3be118d1d3696d1aa74da Author: Bram Moolenaar <Bram@vim.org> Date: Sun Mar 14 18:40:19 2021 +0100 patch 8.2.2605: Vim9: string index and slice does not include composing chars Problem: Vim9: string index and slice does not include composing chars. Solution: Include composing characters. (issue https://github.com/vim/vim/issues/6563)
author Bram Moolenaar <Bram@vim.org>
date Sun, 14 Mar 2021 18:45:03 +0100
parents 8c03eefca0c1
children 147967e557f7
files runtime/doc/vim9.txt src/testdir/test_vim9_expr.vim src/version.c src/vim9execute.c
diffstat 4 files changed, 54 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/vim9.txt
+++ b/runtime/doc/vim9.txt
@@ -96,8 +96,8 @@ script and `:def` functions; details are
 	def CallMe(count: number, message: string): bool
 - Call functions without `:call`: >
 	writefile(['done'], 'file.txt')
-- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or
-  curly-braces names.
+- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open`
+  or curly-braces names.
 - A range before a command must be prefixed with a colon: >
 	:%s/this/that
 - Unless mentioned specifically, the highest |scriptversion| is used.
@@ -341,7 +341,8 @@ Functions can be called without `:call`:
 Using `:call` is still possible, but this is discouraged.
 
 A method call without `eval` is possible, so long as the start is an
-identifier or can't be an Ex command.  Examples: >
+identifier or can't be an Ex command.  For a function either "(" or "->" must
+be following, without a line break.  Examples: >
 	myList->add(123)
 	g:myList->add(123)
 	[1, 2, 3]->Process()
@@ -696,8 +697,9 @@ for v:null.  When converting a boolean t
 used, not "v:false" and "v:true" like in legacy script.  "v:none" is not
 changed, it is only used in JSON and has no equivalent in other languages.
 
-Indexing a string with [idx] or [idx : idx] uses character indexes instead of
-byte indexes. Example: >
+Indexing a string with [idx] or taking a slice with [idx : idx] uses character
+indexes instead of byte indexes.  Composing characters are included.
+Example: >
 	echo 'bár'[1]
 In legacy script this results in the character 0xc3 (an illegal byte), in Vim9
 script this results in the string 'á'.
@@ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHI
 :enddef			End of a function defined with `:def`. It should be on
 			a line by its own.
 
+You may also find this wiki useful.  It was written by an early adoptor of
+Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md
 
 If the script the function is defined in is Vim9 script, then script-local
 variables can be accessed without the "s:" prefix.  They must be defined
--- a/src/testdir/test_vim9_expr.vim
+++ b/src/testdir/test_vim9_expr.vim
@@ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice()
     assert_equal('abcd', g:teststring[: -3])
     assert_equal('', g:teststring[: -9])
 
+    # composing characters are included
+    g:teststring = 'àéû'
+    assert_equal('à', g:teststring[0])
+    assert_equal('é', g:teststring[1])
+    assert_equal('û', g:teststring[2])
+    assert_equal('', g:teststring[3])
+    assert_equal('', g:teststring[4])
+
+    assert_equal('û', g:teststring[-1])
+    assert_equal('é', g:teststring[-2])
+    assert_equal('à', g:teststring[-3])
+    assert_equal('', g:teststring[-4])
+    assert_equal('', g:teststring[-5])
+
+    assert_equal('à', g:teststring[0 : 0])
+    assert_equal('é', g:teststring[1 : 1])
+    assert_equal('àé', g:teststring[0 : 1])
+    assert_equal('àéû', g:teststring[0 : -1])
+    assert_equal('àé', g:teststring[0 : -2])
+    assert_equal('à', g:teststring[0 : -3])
+    assert_equal('', g:teststring[0 : -4])
+    assert_equal('', g:teststring[0 : -5])
+    assert_equal('àéû', g:teststring[ : ])
+    assert_equal('àéû', g:teststring[0 : ])
+    assert_equal('éû', g:teststring[1 : ])
+    assert_equal('û', g:teststring[2 : ])
+    assert_equal('', g:teststring[3 : ])
+    assert_equal('', g:teststring[4 : ])
+
     # blob index cannot be out of range
     g:testblob = 0z01ab
     assert_equal(0x01, g:testblob[0])
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2605,
+/**/
     2604,
 /**/
     2603,
--- a/src/vim9execute.c
+++ b/src/vim9execute.c
@@ -985,8 +985,9 @@ allocate_if_null(typval_T *tv)
 }
 
 /*
- * Return the character "str[index]" where "index" is the character index.  If
- * "index" is out of range NULL is returned.
+ * Return the character "str[index]" where "index" is the character index,
+ * including composing characters.
+ * If "index" is out of range NULL is returned.
  */
     char_u *
 char_from_string(char_u *str, varnumber_T index)
@@ -1005,7 +1006,7 @@ char_from_string(char_u *str, varnumber_
 	int	clen = 0;
 
 	for (nbyte = 0; nbyte < slen; ++clen)
-	    nbyte += MB_CPTR2LEN(str + nbyte);
+	    nbyte += mb_ptr2len(str + nbyte);
 	nchar = clen + index;
 	if (nchar < 0)
 	    // unlike list: index out of range results in empty string
@@ -1013,15 +1014,15 @@ char_from_string(char_u *str, varnumber_
     }
 
     for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar)
-	nbyte += MB_CPTR2LEN(str + nbyte);
+	nbyte += mb_ptr2len(str + nbyte);
     if (nbyte >= slen)
 	return NULL;
-    return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte));
+    return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte));
 }
 
 /*
  * Get the byte index for character index "idx" in string "str" with length
- * "str_len".
+ * "str_len".  Composing characters are included.
  * If going over the end return "str_len".
  * If "idx" is negative count from the end, -1 is the last character.
  * When going over the start return -1.
@@ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_le
     {
 	while (nchar > 0 && nbyte < str_len)
 	{
-	    nbyte += MB_CPTR2LEN(str + nbyte);
+	    nbyte += mb_ptr2len(str + nbyte);
 	    --nchar;
 	}
     }
@@ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_le
 }
 
 /*
- * Return the slice "str[first:last]" using character indexes.
+ * Return the slice "str[first : last]" using character indexes.  Composing
+ * characters are included.
  * "exclusive" is TRUE for slice().
  * Return NULL when the result is empty.
  */
@@ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T fi
 	end_byte = char_idx2byte(str, slen, last);
 	if (!exclusive && end_byte >= 0 && end_byte < (long)slen)
 	    // end index is inclusive
-	    end_byte += MB_CPTR2LEN(str + end_byte);
+	    end_byte += mb_ptr2len(str + end_byte);
     }
 
     if (start_byte >= (long)slen || end_byte <= start_byte)
@@ -3249,8 +3251,9 @@ call_def_function(
 			res = string_slice(tv->vval.v_string, n1, n2, FALSE);
 		    else
 			// Index: The resulting variable is a string of a
-			// single character.  If the index is too big or
-			// negative the result is empty.
+			// single character (including composing characters).
+			// If the index is too big or negative the result is
+			// empty.
 			res = char_from_string(tv->vval.v_string, n2);
 		    vim_free(tv->vval.v_string);
 		    tv->vval.v_string = res;