diff src/regexp_nfa.c @ 23471:a7cdfc8e4b6e v8.2.2278

patch 8.2.2278: falling back to old regexp engine can some patterns Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Jan 2 17:43:49 2021 +0100 patch 8.2.2278: falling back to old regexp engine can some patterns Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572)
author Bram Moolenaar <Bram@vim.org>
date Sat, 02 Jan 2021 17:45:03 +0100
parents 22d0c25869d8
children 585695c70392
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags pas
 static int *post_start;  // holds the postfix form of r.e.
 static int *post_end;
 static int *post_ptr;
+
+// Set when the pattern should use the NFA engine.
+// E.g. [[:upper:]] only allows 8bit characters for BT engine,
+// while NFA engine handles multibyte characters correctly.
+static int wants_nfa;
+
 static int nstate;	// Number of states in the NFA.
 static int istate;	// Index in the state vector, used in alloc_state()
 
@@ -306,6 +312,7 @@ nfa_regcomp_start(
 	return FAIL;
     post_ptr = post_start;
     post_end = post_start + nstate_max;
+    wants_nfa = FALSE;
     rex.nfa_has_zend = FALSE;
     rex.nfa_has_backref = FALSE;
 
@@ -1707,6 +1714,7 @@ collection:
 				    EMIT(NFA_CLASS_GRAPH);
 				    break;
 				case CLASS_LOWER:
+				    wants_nfa = TRUE;
 				    EMIT(NFA_CLASS_LOWER);
 				    break;
 				case CLASS_PRINT:
@@ -1719,6 +1727,7 @@ collection:
 				    EMIT(NFA_CLASS_SPACE);
 				    break;
 				case CLASS_UPPER:
+				    wants_nfa = TRUE;
 				    EMIT(NFA_CLASS_UPPER);
 				    break;
 				case CLASS_XDIGIT:
@@ -2137,9 +2146,15 @@ nfa_regpiece(void)
 
 	    // The engine is very inefficient (uses too many states) when the
 	    // maximum is much larger than the minimum and when the maximum is
-	    // large.  Bail out if we can use the other engine.
+	    // large.  However, when maxval is MAX_LIMIT, it is okay, as this
+	    // will emit NFA_STAR.
+	    // Bail out if we can use the other engine, but only, when the
+	    // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
+	    // does not work with with characters > 8 bit with the BT engine)
 	    if ((nfa_re_flags & RE_AUTO)
-				   && (maxval > 500 || maxval > minval + 200))
+				   && (maxval > 500 || maxval > minval + 200)
+				   && (maxval != MAX_LIMIT && minval < 200)
+				   && !wants_nfa)
 		return FAIL;
 
 	    // Ignore previous call to nfa_regatom()