changeset 31235:7fb4e244b16e v9.0.0951

patch 9.0.0951: trying every character position for a match is inefficient Commit: https://github.com/vim/vim/commit/01105b37a108022515d364201767f7f111ec4222 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Nov 26 11:47:10 2022 +0000 patch 9.0.0951: trying every character position for a match is inefficient Problem: Trying every character position for a match is inefficient. Solution: Use the start position of the match ignoring "\zs".
author Bram Moolenaar <Bram@vim.org>
date Sat, 26 Nov 2022 13:00:05 +0100
parents e841d465ff3d
children c5c19125df73
files src/regexp.c src/regexp.h src/regexp_bt.c src/regexp_nfa.c src/version.c
diffstat 5 files changed, 34 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -1123,10 +1123,12 @@ static unsigned	reg_tofreelen;
 typedef struct {
     regmatch_T		*reg_match;
     regmmatch_T		*reg_mmatch;
+
     char_u		**reg_startp;
     char_u		**reg_endp;
     lpos_T		*reg_startpos;
     lpos_T		*reg_endpos;
+
     win_T		*reg_win;
     buf_T		*reg_buf;
     linenr_T		reg_firstlnum;
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -133,6 +133,8 @@ typedef struct
     regprog_T		*regprog;
     char_u		*startp[NSUBEXP];
     char_u		*endp[NSUBEXP];
+
+    colnr_T		rm_matchcol;   // match start without "\zs"
     int			rm_ic;
 } regmatch_T;
 
@@ -149,6 +151,8 @@ typedef struct
     regprog_T		*regprog;
     lpos_T		startpos[NSUBEXP];
     lpos_T		endpos[NSUBEXP];
+
+    colnr_T		rmm_matchcol;   // match start without "\zs"
     int			rmm_ic;
     colnr_T		rmm_maxcol;	// when not zero: maximum column
 } regmmatch_T;
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -4842,11 +4842,12 @@ regtry(
     static long
 bt_regexec_both(
     char_u	*line,
-    colnr_T	col,		// column to start looking for match
+    colnr_T	startcol,	// column to start looking for match
     int		*timed_out)	// flag set on timeout or NULL
 {
     bt_regprog_T    *prog;
     char_u	    *s;
+    colnr_T	    col = startcol;
     long	    retval = 0L;
 
     // Create "regstack" and "backpos" if they are not allocated yet.
@@ -5042,11 +5043,19 @@ theend:
 	    if (end->lnum < start->lnum
 			|| (end->lnum == start->lnum && end->col < start->col))
 		rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
+
+	    // startpos[0] may be set by "\zs", also return the column where
+	    // the whole pattern matched.
+	    rex.reg_mmatch->rmm_matchcol = col;
 	}
 	else
 	{
 	    if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
 		rex.reg_match->endp[0] = rex.reg_match->startp[0];
+
+	    // startpos[0] may be set by "\zs", also return the column where
+	    // the whole pattern matched.
+	    rex.reg_match->rm_matchcol = col;
 	}
     }
 
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -7378,7 +7378,14 @@ nfa_regexec_both(
 	// If match_text is set it contains the full text that must match.
 	// Nothing else to try. Doesn't handle combining chars well.
 	if (prog->match_text != NULL && !rex.reg_icombine)
-	    return find_match_text(col, prog->regstart, prog->match_text);
+	{
+	    retval = find_match_text(col, prog->regstart, prog->match_text);
+	    if (REG_MULTI)
+		rex.reg_mmatch->rmm_matchcol = col;
+	    else
+		rex.reg_match->rm_matchcol = col;
+	    return retval;
+	}
     }
 
     // If the start column is past the maximum column: no need to try.
@@ -7414,11 +7421,19 @@ theend:
 	    if (end->lnum < start->lnum
 			|| (end->lnum == start->lnum && end->col < start->col))
 		rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
+
+	    // startpos[0] may be set by "\zs", also return the column where
+	    // the whole pattern matched.
+	    rex.reg_mmatch->rmm_matchcol = col;
 	}
 	else
 	{
 	    if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
 		rex.reg_match->endp[0] = rex.reg_match->startp[0];
+
+	    // startpos[0] may be set by "\zs", also return the column where
+	    // the whole pattern matched.
+	    rex.reg_match->rm_matchcol = col;
 	}
     }
 
--- a/src/version.c
+++ b/src/version.c
@@ -696,6 +696,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    951,
+/**/
     950,
 /**/
     949,