# HG changeset patch # User Bram Moolenaar # Date 1615743903 -3600 # Node ID fcbb1d4df15bccc719f26cfa15d00721fd82cf99 # Parent 8c03eefca0c10ec8231af574219e2b212c7e90fe patch 8.2.2605: Vim9: string index and slice does not include composing chars Commit: https://github.com/vim/vim/commit/0289a093a4d65c6280a3be118d1d3696d1aa74da Author: Bram Moolenaar Date: Sun Mar 14 18:40:19 2021 +0100 patch 8.2.2605: Vim9: string index and slice does not include composing chars Problem: Vim9: string index and slice does not include composing chars. Solution: Include composing characters. (issue https://github.com/vim/vim/issues/6563) diff --git a/runtime/doc/vim9.txt b/runtime/doc/vim9.txt --- a/runtime/doc/vim9.txt +++ b/runtime/doc/vim9.txt @@ -96,8 +96,8 @@ script and `:def` functions; details are def CallMe(count: number, message: string): bool - Call functions without `:call`: > writefile(['done'], 'file.txt') -- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or - curly-braces names. +- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open` + or curly-braces names. - A range before a command must be prefixed with a colon: > :%s/this/that - Unless mentioned specifically, the highest |scriptversion| is used. @@ -341,7 +341,8 @@ Functions can be called without `:call`: Using `:call` is still possible, but this is discouraged. A method call without `eval` is possible, so long as the start is an -identifier or can't be an Ex command. Examples: > +identifier or can't be an Ex command. For a function either "(" or "->" must +be following, without a line break. Examples: > myList->add(123) g:myList->add(123) [1, 2, 3]->Process() @@ -696,8 +697,9 @@ for v:null. When converting a boolean t used, not "v:false" and "v:true" like in legacy script. "v:none" is not changed, it is only used in JSON and has no equivalent in other languages. -Indexing a string with [idx] or [idx : idx] uses character indexes instead of -byte indexes. Example: > +Indexing a string with [idx] or taking a slice with [idx : idx] uses character +indexes instead of byte indexes. Composing characters are included. +Example: > echo 'bár'[1] In legacy script this results in the character 0xc3 (an illegal byte), in Vim9 script this results in the string 'á'. @@ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHI :enddef End of a function defined with `:def`. It should be on a line by its own. +You may also find this wiki useful. It was written by an early adoptor of +Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md If the script the function is defined in is Vim9 script, then script-local variables can be accessed without the "s:" prefix. They must be defined diff --git a/src/testdir/test_vim9_expr.vim b/src/testdir/test_vim9_expr.vim --- a/src/testdir/test_vim9_expr.vim +++ b/src/testdir/test_vim9_expr.vim @@ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice() assert_equal('abcd', g:teststring[: -3]) assert_equal('', g:teststring[: -9]) + # composing characters are included + g:teststring = 'àéû' + assert_equal('à', g:teststring[0]) + assert_equal('é', g:teststring[1]) + assert_equal('û', g:teststring[2]) + assert_equal('', g:teststring[3]) + assert_equal('', g:teststring[4]) + + assert_equal('û', g:teststring[-1]) + assert_equal('é', g:teststring[-2]) + assert_equal('à', g:teststring[-3]) + assert_equal('', g:teststring[-4]) + assert_equal('', g:teststring[-5]) + + assert_equal('à', g:teststring[0 : 0]) + assert_equal('é', g:teststring[1 : 1]) + assert_equal('àé', g:teststring[0 : 1]) + assert_equal('àéû', g:teststring[0 : -1]) + assert_equal('àé', g:teststring[0 : -2]) + assert_equal('à', g:teststring[0 : -3]) + assert_equal('', g:teststring[0 : -4]) + assert_equal('', g:teststring[0 : -5]) + assert_equal('àéû', g:teststring[ : ]) + assert_equal('àéû', g:teststring[0 : ]) + assert_equal('éû', g:teststring[1 : ]) + assert_equal('û', g:teststring[2 : ]) + assert_equal('', g:teststring[3 : ]) + assert_equal('', g:teststring[4 : ]) + # blob index cannot be out of range g:testblob = 0z01ab assert_equal(0x01, g:testblob[0]) diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -751,6 +751,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 2605, +/**/ 2604, /**/ 2603, diff --git a/src/vim9execute.c b/src/vim9execute.c --- a/src/vim9execute.c +++ b/src/vim9execute.c @@ -985,8 +985,9 @@ allocate_if_null(typval_T *tv) } /* - * Return the character "str[index]" where "index" is the character index. If - * "index" is out of range NULL is returned. + * Return the character "str[index]" where "index" is the character index, + * including composing characters. + * If "index" is out of range NULL is returned. */ char_u * char_from_string(char_u *str, varnumber_T index) @@ -1005,7 +1006,7 @@ char_from_string(char_u *str, varnumber_ int clen = 0; for (nbyte = 0; nbyte < slen; ++clen) - nbyte += MB_CPTR2LEN(str + nbyte); + nbyte += mb_ptr2len(str + nbyte); nchar = clen + index; if (nchar < 0) // unlike list: index out of range results in empty string @@ -1013,15 +1014,15 @@ char_from_string(char_u *str, varnumber_ } for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar) - nbyte += MB_CPTR2LEN(str + nbyte); + nbyte += mb_ptr2len(str + nbyte); if (nbyte >= slen) return NULL; - return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); + return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte)); } /* * Get the byte index for character index "idx" in string "str" with length - * "str_len". + * "str_len". Composing characters are included. * If going over the end return "str_len". * If "idx" is negative count from the end, -1 is the last character. * When going over the start return -1. @@ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_le { while (nchar > 0 && nbyte < str_len) { - nbyte += MB_CPTR2LEN(str + nbyte); + nbyte += mb_ptr2len(str + nbyte); --nchar; } } @@ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_le } /* - * Return the slice "str[first:last]" using character indexes. + * Return the slice "str[first : last]" using character indexes. Composing + * characters are included. * "exclusive" is TRUE for slice(). * Return NULL when the result is empty. */ @@ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T fi end_byte = char_idx2byte(str, slen, last); if (!exclusive && end_byte >= 0 && end_byte < (long)slen) // end index is inclusive - end_byte += MB_CPTR2LEN(str + end_byte); + end_byte += mb_ptr2len(str + end_byte); } if (start_byte >= (long)slen || end_byte <= start_byte) @@ -3249,8 +3251,9 @@ call_def_function( res = string_slice(tv->vval.v_string, n1, n2, FALSE); else // Index: The resulting variable is a string of a - // single character. If the index is too big or - // negative the result is empty. + // single character (including composing characters). + // If the index is too big or negative the result is + // empty. res = char_from_string(tv->vval.v_string, n2); vim_free(tv->vval.v_string); tv->vval.v_string = res;