Mercurial > vim
diff src/regexp_nfa.c @ 23471:a7cdfc8e4b6e v8.2.2278
patch 8.2.2278: falling back to old regexp engine can some patterns
Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Jan 2 17:43:49 2021 +0100
patch 8.2.2278: falling back to old regexp engine can some patterns
Problem: Falling back to old regexp engine can some patterns.
Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used.
(Christian Brabandt, closes #7572)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sat, 02 Jan 2021 17:45:03 +0100 |
parents | 22d0c25869d8 |
children | 585695c70392 |
line wrap: on
line diff
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags pas static int *post_start; // holds the postfix form of r.e. static int *post_end; static int *post_ptr; + +// Set when the pattern should use the NFA engine. +// E.g. [[:upper:]] only allows 8bit characters for BT engine, +// while NFA engine handles multibyte characters correctly. +static int wants_nfa; + static int nstate; // Number of states in the NFA. static int istate; // Index in the state vector, used in alloc_state() @@ -306,6 +312,7 @@ nfa_regcomp_start( return FAIL; post_ptr = post_start; post_end = post_start + nstate_max; + wants_nfa = FALSE; rex.nfa_has_zend = FALSE; rex.nfa_has_backref = FALSE; @@ -1707,6 +1714,7 @@ collection: EMIT(NFA_CLASS_GRAPH); break; case CLASS_LOWER: + wants_nfa = TRUE; EMIT(NFA_CLASS_LOWER); break; case CLASS_PRINT: @@ -1719,6 +1727,7 @@ collection: EMIT(NFA_CLASS_SPACE); break; case CLASS_UPPER: + wants_nfa = TRUE; EMIT(NFA_CLASS_UPPER); break; case CLASS_XDIGIT: @@ -2137,9 +2146,15 @@ nfa_regpiece(void) // The engine is very inefficient (uses too many states) when the // maximum is much larger than the minimum and when the maximum is - // large. Bail out if we can use the other engine. + // large. However, when maxval is MAX_LIMIT, it is okay, as this + // will emit NFA_STAR. + // Bail out if we can use the other engine, but only, when the + // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} + // does not work with with characters > 8 bit with the BT engine) if ((nfa_re_flags & RE_AUTO) - && (maxval > 500 || maxval > minval + 200)) + && (maxval > 500 || maxval > minval + 200) + && (maxval != MAX_LIMIT && minval < 200) + && !wants_nfa) return FAIL; // Ignore previous call to nfa_regatom()