changeset 23471:a7cdfc8e4b6e v8.2.2278

patch 8.2.2278: falling back to old regexp engine can some patterns Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Jan 2 17:43:49 2021 +0100 patch 8.2.2278: falling back to old regexp engine can some patterns Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572)
author Bram Moolenaar <Bram@vim.org>
date Sat, 02 Jan 2021 17:45:03 +0100
parents 0d0ed7100454
children 55cfcf83ae29
files src/regexp.c src/regexp_nfa.c src/testdir/test_regexp_utf8.vim src/version.c
diffstat 4 files changed, 73 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -294,6 +294,7 @@ init_class_tab(void)
 
 static char_u	*regparse;	// Input-scan pointer.
 static int	regnpar;	// () count.
+static int	wants_nfa;	// regex should use NFA engine
 #ifdef FEAT_SYN_HL
 static int	regnzpar;	// \z() count.
 static int	re_has_z;	// \z item detected
@@ -381,6 +382,9 @@ static int	cstrncmp(char_u *s1, char_u *
 static char_u	*cstrchr(char_u *, int);
 static int	re_mult_next(char *what);
 static int	reg_iswordc(int);
+#ifdef FEAT_EVAL
+static void report_re_switch(char_u *pat);
+#endif
 
 static regengine_T bt_regengine;
 static regengine_T nfa_regengine;
@@ -2662,7 +2666,7 @@ vim_regcomp(char_u *expr_arg, int re_fla
     if (prog == NULL)
     {
 #ifdef BT_REGEXP_DEBUG_LOG
-	if (regexp_engine != BACKTRACKING_ENGINE)   // debugging log for NFA
+	if (regexp_engine == BACKTRACKING_ENGINE)   // debugging log for BT engine
 	{
 	    FILE *f;
 	    f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
@@ -2686,6 +2690,9 @@ vim_regcomp(char_u *expr_arg, int re_fla
 					  && called_emsg == called_emsg_before)
 	{
 	    regexp_engine = BACKTRACKING_ENGINE;
+#ifdef FEAT_EVAL
+	    report_re_switch(expr);
+#endif
 	    prog = bt_regengine.regcomp(expr, re_flags);
 	}
     }
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags pas
 static int *post_start;  // holds the postfix form of r.e.
 static int *post_end;
 static int *post_ptr;
+
+// Set when the pattern should use the NFA engine.
+// E.g. [[:upper:]] only allows 8bit characters for BT engine,
+// while NFA engine handles multibyte characters correctly.
+static int wants_nfa;
+
 static int nstate;	// Number of states in the NFA.
 static int istate;	// Index in the state vector, used in alloc_state()
 
@@ -306,6 +312,7 @@ nfa_regcomp_start(
 	return FAIL;
     post_ptr = post_start;
     post_end = post_start + nstate_max;
+    wants_nfa = FALSE;
     rex.nfa_has_zend = FALSE;
     rex.nfa_has_backref = FALSE;
 
@@ -1707,6 +1714,7 @@ collection:
 				    EMIT(NFA_CLASS_GRAPH);
 				    break;
 				case CLASS_LOWER:
+				    wants_nfa = TRUE;
 				    EMIT(NFA_CLASS_LOWER);
 				    break;
 				case CLASS_PRINT:
@@ -1719,6 +1727,7 @@ collection:
 				    EMIT(NFA_CLASS_SPACE);
 				    break;
 				case CLASS_UPPER:
+				    wants_nfa = TRUE;
 				    EMIT(NFA_CLASS_UPPER);
 				    break;
 				case CLASS_XDIGIT:
@@ -2137,9 +2146,15 @@ nfa_regpiece(void)
 
 	    // The engine is very inefficient (uses too many states) when the
 	    // maximum is much larger than the minimum and when the maximum is
-	    // large.  Bail out if we can use the other engine.
+	    // large.  However, when maxval is MAX_LIMIT, it is okay, as this
+	    // will emit NFA_STAR.
+	    // Bail out if we can use the other engine, but only, when the
+	    // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
+	    // does not work with with characters > 8 bit with the BT engine)
 	    if ((nfa_re_flags & RE_AUTO)
-				   && (maxval > 500 || maxval > minval + 200))
+				   && (maxval > 500 || maxval > minval + 200)
+				   && (maxval != MAX_LIMIT && minval < 200)
+				   && !wants_nfa)
 		return FAIL;
 
 	    // Ignore previous call to nfa_regatom()
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -510,6 +510,52 @@ func Test_match_start_of_line_combining(
   bwipe!
 endfunc
 
+" Check that [[:upper:]] matches for automatic engine
+func Test_match_char_class_upper()
+  new
+  let _engine=&regexpengine
 
+  " Test 1: [[:upper:]]\{2,\}
+  set regexpengine=0
+  call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
+  call cursor(1,1)
+  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
+  exe search_cmd
+  call assert_equal(4, searchcount().total, 'TEST 1')
+  set regexpengine=1
+  exe search_cmd
+  call assert_equal(2, searchcount().total, 'TEST 1')
+  set regexpengine=2
+  exe search_cmd
+  call assert_equal(4, searchcount().total, 'TEST 1')
+
+  " Test 2: [[:upper:]].\+
+  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
+  set regexpengine=0
+  exe search_cmd
+  call assert_equal(2, searchcount().total, 'TEST 2')
+  set regexpengine=1
+  exe search_cmd
+  call assert_equal(1, searchcount().total, 'TEST 2')
+  set regexpengine=2
+  exe search_cmd
+  call assert_equal(2, searchcount().total, 'TEST 2')
+
+  " Test 3: [[:lower:]]\+
+  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
+  set regexpengine=0
+  exe search_cmd
+  call assert_equal(4, searchcount().total, 'TEST 3 lower')
+  set regexpengine=1
+  exe search_cmd
+  call assert_equal(2, searchcount().total, 'TEST 3 lower')
+  set regexpengine=2
+  exe search_cmd
+  call assert_equal(4, searchcount().total, 'TEST 3 lower')
+
+  " clean up
+  let &regexpengine=_engine
+  bwipe!
+endfunc
 
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2278,
+/**/
     2277,
 /**/
     2276,