# HG changeset patch # User Bram Moolenaar # Date 1591724703 -7200 # Node ID 097f5b5c907b0fdf057616ced7dfd5c6e657984c # Parent 8aa004f7e48fab0462f24e4bfb58dc02b5d23893 patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case Commit: https://github.com/vim/vim/commit/59de417b904bbd204e313f015839317b577bd124 Author: Bram Moolenaar Date: Tue Jun 9 19:34:54 2020 +0200 patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs) Solution: Use utf_fold() when possible. (ref. neovim https://github.com/vim/vim/issues/12456) diff --git a/src/diff.c b/src/diff.c --- a/src/diff.c +++ b/src/diff.c @@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T * // xdiff doesn't support ignoring case, fold-case the text. c = PTR2CHAR(s); - c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c); + c = MB_CASEFOLD(c); orig_len = mb_ptr2len(s); if (mb_char2bytes(c, cbuf) != orig_len) // TODO: handle byte length difference diff --git a/src/macros.h b/src/macros.h --- a/src/macros.h +++ b/src/macros.h @@ -93,6 +93,7 @@ #define MB_ISUPPER(c) vim_isupper(c) #define MB_TOLOWER(c) vim_tolower(c) #define MB_TOUPPER(c) vim_toupper(c) +#define MB_CASEFOLD(c) (enc_utf8 ? utf_fold(c) : MB_TOLOWER(c)) // Use our own isdigit() replacement, because on MS-Windows isdigit() returns // non-zero for superscript 1. Also avoids that isdigit() crashes for numbers diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int re { c1 = PTR2CHAR(match_text + len1); c2 = PTR2CHAR(rex.line + col + len2); - if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) + if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2))) { match = FALSE; break; @@ -6271,11 +6271,11 @@ nfa_regmatch( } if (rex.reg_ic) { - int curc_low = MB_TOLOWER(curc); + int curc_low = MB_CASEFOLD(curc); int done = FALSE; for ( ; c1 <= c2; ++c1) - if (MB_TOLOWER(c1) == curc_low) + if (MB_CASEFOLD(c1) == curc_low) { result = result_if_matched; done = TRUE; @@ -6287,8 +6287,8 @@ nfa_regmatch( } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (rex.reg_ic && MB_TOLOWER(curc) - == MB_TOLOWER(state->c)))) + || (rex.reg_ic && MB_CASEFOLD(curc) + == MB_CASEFOLD(state->c)))) { result = result_if_matched; break; @@ -6713,7 +6713,7 @@ nfa_regmatch( result = (c == curc); if (!result && rex.reg_ic) - result = MB_TOLOWER(c) == MB_TOLOWER(curc); + result = MB_CASEFOLD(c) == MB_CASEFOLD(curc); // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. if (result && enc_utf8 && !rex.reg_icombine) @@ -6882,7 +6882,7 @@ nfa_regmatch( // cheaper than adding a state that won't match. c = PTR2CHAR(rex.input + clen); if (c != prog->regstart && (!rex.reg_ic - || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart))) + || MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart))) { #ifdef ENABLE_LOG fprintf(log_fd, " Skipping start state, regstart does not match\n"); diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -355,4 +355,23 @@ func Test_ambiwidth() set regexpengine& ambiwidth& endfunc +func Run_regexp_ignore_case() + call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g')) + + call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g')) + call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g')) +endfunc + +func Test_regexp_ignore_case() + set regexpengine=1 + call Run_regexp_ignore_case() + set regexpengine=2 + call Run_regexp_ignore_case() + set regexpengine& +endfunc + " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -755,6 +755,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 938, +/**/ 937, /**/ 936,