# HG changeset patch # User Bram Moolenaar # Date 1597509904 -7200 # Node ID b1f3d8a44ab621070bdbac8022e517610856b251 # Parent 1ff3fa258bf9b9c0910fa3ca9b7a04bbbfc1ec42 patch 8.2.1461: Vim9: string indexes are counted in bytes Commit: https://github.com/vim/vim/commit/e3c37d8ebf9dbbf210fde4a5fb28eb1f2a492a34 Author: Bram Moolenaar Date: Sat Aug 15 18:39:05 2020 +0200 patch 8.2.1461: Vim9: string indexes are counted in bytes Problem: Vim9: string indexes are counted in bytes. Solution: Use character indexes. (closes https://github.com/vim/vim/issues/6574) diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -1131,19 +1131,25 @@ Evaluation is always from left to right. expr8[expr1] item of String or |List| *expr-[]* *E111* *E909* *subscript* +In legacy Vim script: If expr8 is a Number or String this results in a String that contains the -expr1'th single byte from expr8. expr8 is used as a String, expr1 as a -Number. This doesn't recognize multi-byte encodings, see `byteidx()` for -an alternative, or use `split()` to turn the string into a list of characters. - -Index zero gives the first byte. This is like it works in C. Careful: -text column numbers start with one! Example, to get the byte under the -cursor: > +expr1'th single byte from expr8. expr8 is used as a String (a number is +automatically converted to a String), expr1 as a Number. This doesn't +recognize multi-byte encodings, see `byteidx()` for an alternative, or use +`split()` to turn the string into a list of characters. Example, to get the +byte under the cursor: > :let c = getline(".")[col(".") - 1] +In Vim9 script: +If expr8 is a String this results in a String that contains the expr1'th +single character from expr8. To use byte indexes use |strpart()|. + +Index zero gives the first byte or character. Careful: text column numbers +start with one! + If the length of the String is less than the index, the result is an empty String. A negative index always results in an empty string (reason: backward -compatibility). Use [-1:] to get the last byte. +compatibility). Use [-1:] to get the last byte or character. If expr8 is a |List| then it results the item at index expr1. See |list-index| for possible index values. If the index is out of range this results in an @@ -1157,10 +1163,16 @@ error. expr8[expr1a : expr1b] substring or sublist *expr-[:]* -If expr8 is a Number or String this results in the substring with the bytes -from expr1a to and including expr1b. expr8 is used as a String, expr1a and -expr1b are used as a Number. This doesn't recognize multi-byte encodings, see -|byteidx()| for computing the indexes. +If expr8 is a String this results in the substring with the bytes from expr1a +to and including expr1b. expr8 is used as a String, expr1a and expr1b are +used as a Number. + +In legacy Vim script the indexes are byte indexes. This doesn't recognize +multi-byte encodings, see |byteidx()| for computing the indexes. If expr8 is +a Number it is first converted to a String. + +In Vim9 script the indexes are character indexes. To use byte indexes use +|strpart()|. If expr1a is omitted zero is used. If expr1b is omitted the length of the string minus one is used. diff --git a/src/eval.c b/src/eval.c --- a/src/eval.c +++ b/src/eval.c @@ -3718,6 +3718,10 @@ eval_index( else s = vim_strnsave(s + n1, n2 - n1 + 1); } + else if (in_vim9script()) + { + s = char_from_string(s, n1); + } else { // The resulting variable is a string of a single @@ -5285,6 +5289,30 @@ eval_isdictc(int c) } /* + * Return the character "str[index]" where "index" is the character index. If + * "index" is out of range NULL is returned. + */ + char_u * +char_from_string(char_u *str, varnumber_T index) +{ + size_t nbyte = 0; + varnumber_T nchar = index; + size_t slen; + + if (str == NULL || index < 0) + return NULL; + slen = STRLEN(str); + while (nchar > 0 && nbyte < slen) + { + nbyte += MB_CPTR2LEN(str + nbyte); + --nchar; + } + if (nbyte >= slen) + return NULL; + return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); +} + +/* * Handle: * - expr[expr], expr[expr:expr] subscript * - ".name" lookup diff --git a/src/proto/eval.pro b/src/proto/eval.pro --- a/src/proto/eval.pro +++ b/src/proto/eval.pro @@ -59,6 +59,7 @@ char_u *find_name_end(char_u *arg, char_ int eval_isnamec(int c); int eval_isnamec1(int c); int eval_isdictc(int c); +char_u *char_from_string(char_u *str, varnumber_T index); int handle_subscript(char_u **arg, typval_T *rettv, evalarg_T *evalarg, int verbose); int item_copy(typval_T *from, typval_T *to, int deep, int copyID); void echo_one(typval_T *rettv, int with_space, int *atstart, int *needclr); diff --git a/src/testdir/test_vim9_expr.vim b/src/testdir/test_vim9_expr.vim --- a/src/testdir/test_vim9_expr.vim +++ b/src/testdir/test_vim9_expr.vim @@ -2075,12 +2075,28 @@ def Test_expr7_trailing() enddef def Test_expr7_subscript() - let text = 'abcdef' - assert_equal('', text[-1]) - assert_equal('a', text[0]) - assert_equal('e', text[4]) - assert_equal('f', text[5]) - assert_equal('', text[6]) + let lines =<< trim END + let text = 'abcdef' + assert_equal('', text[-1]) + assert_equal('a', text[0]) + assert_equal('e', text[4]) + assert_equal('f', text[5]) + assert_equal('', text[6]) + + text = 'ábçdëf' + assert_equal('', text[-999]) + assert_equal('', text[-1]) + assert_equal('á', text[0]) + assert_equal('b', text[1]) + assert_equal('ç', text[2]) + assert_equal('d', text[3]) + assert_equal('ë', text[4]) + assert_equal('f', text[5]) + assert_equal('', text[6]) + assert_equal('', text[999]) + END + CheckDefSuccess(lines) + CheckScriptSuccess(['vim9script'] + lines) enddef def Test_expr7_subscript_linebreak() diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -755,6 +755,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1461, +/**/ 1460, /**/ 1459, diff --git a/src/vim9execute.c b/src/vim9execute.c --- a/src/vim9execute.c +++ b/src/vim9execute.c @@ -2233,7 +2233,6 @@ call_def_function( case ISN_STRINDEX: { - char_u *s; varnumber_T n; char_u *res; @@ -2245,7 +2244,6 @@ call_def_function( emsg(_(e_stringreq)); goto on_error; } - s = tv->vval.v_string; tv = STACK_TV_BOT(-1); if (tv->v_type != VAR_NUMBER) @@ -2259,12 +2257,9 @@ call_def_function( // The resulting variable is a string of a single // character. If the index is too big or negative the // result is empty. - if (n < 0 || n >= (varnumber_T)STRLEN(s)) - res = NULL; - else - res = vim_strnsave(s + n, 1); --ectx.ec_stack.ga_len; tv = STACK_TV_BOT(-1); + res = char_from_string(tv->vval.v_string, n); vim_free(tv->vval.v_string); tv->vval.v_string = res; }