changeset 32950:d415dfae6977 v9.0.1777

patch 9.0.1777: patch 9.0.1771 causes problems Commit: https://github.com/vim/vim/commit/be07caa071ea93c07b1b2204a17237133f38b2bd Author: Christian Brabandt <cb@256bit.org> Date: Sun Aug 20 22:26:15 2023 +0200 patch 9.0.1777: patch 9.0.1771 causes problems Problem: patch 9.0.1771 causes problems Solution: revert it Revert "patch 9.0.1771: regex: combining chars in collections not handled" This reverts commit ca22fc36a4e8a315f199893ee8ff6253573f5fbe. Signed-off-by: Christian Brabandt <cb@256bit.org>
author Christian Brabandt <cb@256bit.org>
date Sun, 20 Aug 2023 22:30:03 +0200
parents 6e346800670c
children 6c57606378bd
files src/regexp_bt.c src/regexp_nfa.c src/testdir/test_regexp_utf8.vim src/version.c
diffstat 4 files changed, 10 insertions(+), 146 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -3743,38 +3743,13 @@ regmatch(
 
 	  case ANYOF:
 	  case ANYBUT:
-	    {
-		char_u  *q = OPERAND(scan);
-
-		if (c == NUL)
-		    status = RA_NOMATCH;
-		else if ((cstrchr(q, c) == NULL) == (op == ANYOF))
-		    status = RA_NOMATCH;
-		else
-		{
-		    // Check following combining characters
-		    int	len = 0;
-		    int i;
-
-		    if (enc_utf8)
-			len = utfc_ptr2len(q) - utf_ptr2len(q);
-
-		    MB_CPTR_ADV(rex.input);
-		    MB_CPTR_ADV(q);
-
-		    if (!enc_utf8 || len == 0)
-			break;
-
-		    for (i = 0; i < len; ++i)
-			if (q[i] != rex.input[i])
-			{
-			    status = RA_NOMATCH;
-			    break;
-			}
-		    rex.input += len;
-		}
-		break;
-	    }
+	    if (c == NUL)
+		status = RA_NOMATCH;
+	    else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
+		status = RA_NOMATCH;
+	    else
+		ADVANCE_REGINPUT();
+	    break;
 
 	  case MULTIBYTECODE:
 	    if (has_mbyte)
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1764,7 +1764,6 @@ collection:
 	    endp = skip_anyof(p);
 	    if (*endp == ']')
 	    {
-		int plen;
 		/*
 		 * Try to reverse engineer character classes. For example,
 		 * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -2036,34 +2035,11 @@ collection:
 			    if (got_coll_char == TRUE && startc == 0)
 				EMIT(0x0a);
 			    else
-			    {
 				EMIT(startc);
-				if (!(enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))))
-				{
-				    EMIT(NFA_CONCAT);
-				}
-			    }
+			    EMIT(NFA_CONCAT);
 			}
 		    }
 
-		    if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
-		    {
-			int i = utf_ptr2len(regparse);
-
-			c = utf_ptr2char(regparse + i);
-
-			// Add composing characters
-			for (;;)
-			{
-			    EMIT(c);
-			    EMIT(NFA_CONCAT);
-			    if ((i += utf_char2len(c)) >= plen)
-				break;
-			    c = utf_ptr2char(regparse + i);
-			}
-			EMIT(NFA_COMPOSING);
-			EMIT(NFA_CONCAT);
-		    }
 		    MB_PTR_ADV(regparse);
 		} // while (p < endp)
 
@@ -6442,84 +6418,6 @@ nfa_regmatch(
 		result_if_matched = (t->state->c == NFA_START_COLL);
 		for (;;)
 		{
-		    if (state->c == NFA_COMPOSING)
-		    {
-			int	    mc = curc;
-			int	    len = 0;
-			nfa_state_T *end;
-			nfa_state_T *sta;
-			int	    cchars[MAX_MCO];
-			int	    ccount = 0;
-			int	    j;
-
-			sta = t->state->out->out;
-			len = 0;
-			if (utf_iscomposing(sta->c))
-			{
-			    // Only match composing character(s), ignore base
-			    // character.  Used for ".{composing}" and "{composing}"
-			    // (no preceding character).
-			    len += mb_char2len(mc);
-			}
-			if (rex.reg_icombine && len == 0)
-			{
-			    // If \Z was present, then ignore composing characters.
-			    // When ignoring the base character this always matches.
-			    if (sta->c != curc)
-				result = FAIL;
-			    else
-				result = OK;
-			    while (sta->c != NFA_END_COMPOSING)
-				sta = sta->out;
-			}
-			// Check base character matches first, unless ignored.
-			else if (len > 0 || mc == sta->c)
-//			if (len > 0 || mc == sta->c)
-			{
-			    if (len == 0)
-			    {
-				len += mb_char2len(mc);
-				sta = sta->out;
-			    }
-
-			    // We don't care about the order of composing characters.
-			    // Get them into cchars[] first.
-			    while (len < clen)
-			    {
-				mc = mb_ptr2char(rex.input + len);
-				cchars[ccount++] = mc;
-				len += mb_char2len(mc);
-				if (ccount == MAX_MCO)
-				    break;
-			    }
-
-			    // Check that each composing char in the pattern matches a
-			    // composing char in the text.  We do not check if all
-			    // composing chars are matched.
-			    result = OK;
-			    while (sta->c != NFA_END_COMPOSING)
-			    {
-				for (j = 0; j < ccount; ++j)
-				    if (cchars[j] == sta->c)
-					break;
-				if (j == ccount)
-				{
-				    result = FAIL;
-				    break;
-				}
-				sta = sta->out;
-			    }
-			}
-			else
-			    result = FAIL;
-
-			if (t->state->out->out1->c == NFA_END_COMPOSING)
-			{
-			    end = t->state->out->out1;
-			    ADD_STATE_IF_MATCH(end);
-			}
-			break;
-		    }
 		    if (state->c == NFA_END_COLL)
 		    {
 			result = !result_if_matched;
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -575,16 +575,5 @@ func Test_match_too_complicated()
   set regexpengine=0
 endfunc
 
-func Test_combining_chars_in_collection()
-  new
-  for i in range(0,2)
-    exe "set re=".i
-    put =['ɔ̃', 'ɔ',  '̃  ã', 'abcd']
-    :%s/[ɔ̃]//
-    call assert_equal(['', '', 'ɔ', '̃  ã', 'abcd'], getline(1,'$'))
-    %d
-  endfor
-  bw!
-endfunc
 
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -700,6 +700,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1777,
+/**/
     1776,
 /**/
     1775,