# HG changeset patch # User Bram Moolenaar # Date 1609605903 -3600 # Node ID a7cdfc8e4b6ef079650376b98e709be8739fabb5 # Parent 0d0ed7100454fc49a0b3426c7bfb10ed3f0e32a5 patch 8.2.2278: falling back to old regexp engine can some patterns Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644 Author: Bram Moolenaar Date: Sat Jan 2 17:43:49 2021 +0100 patch 8.2.2278: falling back to old regexp engine can some patterns Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572) diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -294,6 +294,7 @@ init_class_tab(void) static char_u *regparse; // Input-scan pointer. static int regnpar; // () count. +static int wants_nfa; // regex should use NFA engine #ifdef FEAT_SYN_HL static int regnzpar; // \z() count. static int re_has_z; // \z item detected @@ -381,6 +382,9 @@ static int cstrncmp(char_u *s1, char_u * static char_u *cstrchr(char_u *, int); static int re_mult_next(char *what); static int reg_iswordc(int); +#ifdef FEAT_EVAL +static void report_re_switch(char_u *pat); +#endif static regengine_T bt_regengine; static regengine_T nfa_regengine; @@ -2662,7 +2666,7 @@ vim_regcomp(char_u *expr_arg, int re_fla if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG - if (regexp_engine != BACKTRACKING_ENGINE) // debugging log for NFA + if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine { FILE *f; f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); @@ -2686,6 +2690,9 @@ vim_regcomp(char_u *expr_arg, int re_fla && called_emsg == called_emsg_before) { regexp_engine = BACKTRACKING_ENGINE; +#ifdef FEAT_EVAL + report_re_switch(expr); +#endif prog = bt_regengine.regcomp(expr, re_flags); } } diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags pas static int *post_start; // holds the postfix form of r.e. static int *post_end; static int *post_ptr; + +// Set when the pattern should use the NFA engine. +// E.g. [[:upper:]] only allows 8bit characters for BT engine, +// while NFA engine handles multibyte characters correctly. +static int wants_nfa; + static int nstate; // Number of states in the NFA. static int istate; // Index in the state vector, used in alloc_state() @@ -306,6 +312,7 @@ nfa_regcomp_start( return FAIL; post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = FALSE; rex.nfa_has_zend = FALSE; rex.nfa_has_backref = FALSE; @@ -1707,6 +1714,7 @@ collection: EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = TRUE; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: @@ -1719,6 +1727,7 @@ collection: EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = TRUE; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: @@ -2137,9 +2146,15 @@ nfa_regpiece(void) // The engine is very inefficient (uses too many states) when the // maximum is much larger than the minimum and when the maximum is - // large. Bail out if we can use the other engine. + // large. However, when maxval is MAX_LIMIT, it is okay, as this + // will emit NFA_STAR. + // Bail out if we can use the other engine, but only, when the + // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} + // does not work with with characters > 8 bit with the BT engine) if ((nfa_re_flags & RE_AUTO) - && (maxval > 500 || maxval > minval + 200)) + && (maxval > 500 || maxval > minval + 200) + && (maxval != MAX_LIMIT && minval < 200) + && !wants_nfa) return FAIL; // Ignore previous call to nfa_regatom() diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -510,6 +510,52 @@ func Test_match_start_of_line_combining( bwipe! endfunc +" Check that [[:upper:]] matches for automatic engine +func Test_match_char_class_upper() + new + let _engine=®expengine + " Test 1: [[:upper:]]\{2,\} + set regexpengine=0 + call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...']) + call cursor(1,1) + let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\" + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 1') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 1') + + " Test 2: [[:upper:]].\+ + let search_cmd='norm /\<[[:upper:]].\+\>' .. "\" + set regexpengine=0 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + set regexpengine=1 + exe search_cmd + call assert_equal(1, searchcount().total, 'TEST 2') + set regexpengine=2 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 2') + + " Test 3: [[:lower:]]\+ + let search_cmd='norm /\<[[:lower:]]\+\>' .. "\" + set regexpengine=0 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + set regexpengine=1 + exe search_cmd + call assert_equal(2, searchcount().total, 'TEST 3 lower') + set regexpengine=2 + exe search_cmd + call assert_equal(4, searchcount().total, 'TEST 3 lower') + + " clean up + let ®expengine=_engine + bwipe! +endfunc " vim: shiftwidth=2 sts=2 expandtab diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -751,6 +751,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 2278, +/**/ 2277, /**/ 2276,