# HG changeset patch # User Christian Brabandt # Date 1692563403 -7200 # Node ID d415dfae697712e94b18be21a874c048b1f3c63f # Parent 6e346800670ca29781adfd643ade0d64be114bd5 patch 9.0.1777: patch 9.0.1771 causes problems Commit: https://github.com/vim/vim/commit/be07caa071ea93c07b1b2204a17237133f38b2bd Author: Christian Brabandt Date: Sun Aug 20 22:26:15 2023 +0200 patch 9.0.1777: patch 9.0.1771 causes problems Problem: patch 9.0.1771 causes problems Solution: revert it Revert "patch 9.0.1771: regex: combining chars in collections not handled" This reverts commit ca22fc36a4e8a315f199893ee8ff6253573f5fbe. Signed-off-by: Christian Brabandt diff --git a/src/regexp_bt.c b/src/regexp_bt.c --- a/src/regexp_bt.c +++ b/src/regexp_bt.c @@ -3743,38 +3743,13 @@ regmatch( case ANYOF: case ANYBUT: - { - char_u *q = OPERAND(scan); - - if (c == NUL) - status = RA_NOMATCH; - else if ((cstrchr(q, c) == NULL) == (op == ANYOF)) - status = RA_NOMATCH; - else - { - // Check following combining characters - int len = 0; - int i; - - if (enc_utf8) - len = utfc_ptr2len(q) - utf_ptr2len(q); - - MB_CPTR_ADV(rex.input); - MB_CPTR_ADV(q); - - if (!enc_utf8 || len == 0) - break; - - for (i = 0; i < len; ++i) - if (q[i] != rex.input[i]) - { - status = RA_NOMATCH; - break; - } - rex.input += len; - } - break; - } + if (c == NUL) + status = RA_NOMATCH; + else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) + status = RA_NOMATCH; + else + ADVANCE_REGINPUT(); + break; case MULTIBYTECODE: if (has_mbyte) diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -1764,7 +1764,6 @@ collection: endp = skip_anyof(p); if (*endp == ']') { - int plen; /* * Try to reverse engineer character classes. For example, * recognize that [0-9] stands for \d and [A-Za-z_] for \h, @@ -2036,34 +2035,11 @@ collection: if (got_coll_char == TRUE && startc == 0) EMIT(0x0a); else - { EMIT(startc); - if (!(enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))) - { - EMIT(NFA_CONCAT); - } - } + EMIT(NFA_CONCAT); } } - if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))) - { - int i = utf_ptr2len(regparse); - - c = utf_ptr2char(regparse + i); - - // Add composing characters - for (;;) - { - EMIT(c); - EMIT(NFA_CONCAT); - if ((i += utf_char2len(c)) >= plen) - break; - c = utf_ptr2char(regparse + i); - } - EMIT(NFA_COMPOSING); - EMIT(NFA_CONCAT); - } MB_PTR_ADV(regparse); } // while (p < endp) @@ -6442,84 +6418,6 @@ nfa_regmatch( result_if_matched = (t->state->c == NFA_START_COLL); for (;;) { - if (state->c == NFA_COMPOSING) - { - int mc = curc; - int len = 0; - nfa_state_T *end; - nfa_state_T *sta; - int cchars[MAX_MCO]; - int ccount = 0; - int j; - - sta = t->state->out->out; - len = 0; - if (utf_iscomposing(sta->c)) - { - // Only match composing character(s), ignore base - // character. Used for ".{composing}" and "{composing}" - // (no preceding character). - len += mb_char2len(mc); - } - if (rex.reg_icombine && len == 0) - { - // If \Z was present, then ignore composing characters. - // When ignoring the base character this always matches. - if (sta->c != curc) - result = FAIL; - else - result = OK; - while (sta->c != NFA_END_COMPOSING) - sta = sta->out; - } - // Check base character matches first, unless ignored. - else if (len > 0 || mc == sta->c) -// if (len > 0 || mc == sta->c) - { - if (len == 0) - { - len += mb_char2len(mc); - sta = sta->out; - } - - // We don't care about the order of composing characters. - // Get them into cchars[] first. - while (len < clen) - { - mc = mb_ptr2char(rex.input + len); - cchars[ccount++] = mc; - len += mb_char2len(mc); - if (ccount == MAX_MCO) - break; - } - - // Check that each composing char in the pattern matches a - // composing char in the text. We do not check if all - // composing chars are matched. - result = OK; - while (sta->c != NFA_END_COMPOSING) - { - for (j = 0; j < ccount; ++j) - if (cchars[j] == sta->c) - break; - if (j == ccount) - { - result = FAIL; - break; - } - sta = sta->out; - } - } - else - result = FAIL; - - if (t->state->out->out1->c == NFA_END_COMPOSING) - { - end = t->state->out->out1; - ADD_STATE_IF_MATCH(end); - } - break; - } if (state->c == NFA_END_COLL) { result = !result_if_matched; diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -575,16 +575,5 @@ func Test_match_too_complicated() set regexpengine=0 endfunc -func Test_combining_chars_in_collection() - new - for i in range(0,2) - exe "set re=".i - put =['ɔ̃', 'ɔ', '̃ ã', 'abcd'] - :%s/[ɔ̃]// - call assert_equal(['', '', 'ɔ', '̃ ã', 'abcd'], getline(1,'$')) - %d - endfor - bw! -endfunc " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -700,6 +700,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 1777, +/**/ 1776, /**/ 1775,