changeset 20772:097f5b5c907b v8.2.0938

patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case Commit: https://github.com/vim/vim/commit/59de417b904bbd204e313f015839317b577bd124 Author: Bram Moolenaar <Bram@vim.org> Date: Tue Jun 9 19:34:54 2020 +0200 patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs) Solution: Use utf_fold() when possible. (ref. neovim https://github.com/vim/vim/issues/12456)
author Bram Moolenaar <Bram@vim.org>
date Tue, 09 Jun 2020 19:45:03 +0200
parents 8aa004f7e48f
children 764cb6c1e56c
files src/diff.c src/macros.h src/regexp_nfa.c src/testdir/test_regexp_utf8.vim src/version.c
diffstat 5 files changed, 30 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/diff.c
+++ b/src/diff.c
@@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T *
 
 		// xdiff doesn't support ignoring case, fold-case the text.
 		c = PTR2CHAR(s);
-		c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c);
+		c = MB_CASEFOLD(c);
 		orig_len = mb_ptr2len(s);
 		if (mb_char2bytes(c, cbuf) != orig_len)
 		    // TODO: handle byte length difference
--- a/src/macros.h
+++ b/src/macros.h
@@ -93,6 +93,7 @@
 #define MB_ISUPPER(c)	vim_isupper(c)
 #define MB_TOLOWER(c)	vim_tolower(c)
 #define MB_TOUPPER(c)	vim_toupper(c)
+#define MB_CASEFOLD(c)	(enc_utf8 ? utf_fold(c) : MB_TOLOWER(c))
 
 // Use our own isdigit() replacement, because on MS-Windows isdigit() returns
 // non-zero for superscript 1.  Also avoids that isdigit() crashes for numbers
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int re
 	{
 	    c1 = PTR2CHAR(match_text + len1);
 	    c2 = PTR2CHAR(rex.line + col + len2);
-	    if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
+	    if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2)))
 	    {
 		match = FALSE;
 		break;
@@ -6271,11 +6271,11 @@ nfa_regmatch(
 			}
 			if (rex.reg_ic)
 			{
-			    int curc_low = MB_TOLOWER(curc);
+			    int curc_low = MB_CASEFOLD(curc);
 			    int done = FALSE;
 
 			    for ( ; c1 <= c2; ++c1)
-				if (MB_TOLOWER(c1) == curc_low)
+				if (MB_CASEFOLD(c1) == curc_low)
 				{
 				    result = result_if_matched;
 				    done = TRUE;
@@ -6287,8 +6287,8 @@ nfa_regmatch(
 		    }
 		    else if (state->c < 0 ? check_char_class(state->c, curc)
 			       : (curc == state->c
-				   || (rex.reg_ic && MB_TOLOWER(curc)
-						    == MB_TOLOWER(state->c))))
+				   || (rex.reg_ic && MB_CASEFOLD(curc)
+						    == MB_CASEFOLD(state->c))))
 		    {
 			result = result_if_matched;
 			break;
@@ -6713,7 +6713,7 @@ nfa_regmatch(
 		result = (c == curc);
 
 		if (!result && rex.reg_ic)
-		    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
+		    result = MB_CASEFOLD(c) == MB_CASEFOLD(curc);
 		// If rex.reg_icombine is not set only skip over the character
 		// itself.  When it is set skip over composing characters.
 		if (result && enc_utf8 && !rex.reg_icombine)
@@ -6882,7 +6882,7 @@ nfa_regmatch(
 			// cheaper than adding a state that won't match.
 			c = PTR2CHAR(rex.input + clen);
 			if (c != prog->regstart && (!rex.reg_ic
-			       || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart)))
+			     || MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart)))
 			{
 #ifdef ENABLE_LOG
 			    fprintf(log_fd, "  Skipping start state, regstart does not match\n");
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -355,4 +355,23 @@ func Test_ambiwidth()
   set regexpengine& ambiwidth&
 endfunc
 
+func Run_regexp_ignore_case()
+  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
+
+  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
+  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
+endfunc
+
+func Test_regexp_ignore_case()
+  set regexpengine=1
+  call Run_regexp_ignore_case()
+  set regexpengine=2
+  call Run_regexp_ignore_case()
+  set regexpengine&
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -755,6 +755,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    938,
+/**/
     937,
 /**/
     936,