# HG changeset patch # User Bram Moolenaar # Date 1669464005 -3600 # Node ID 7fb4e244b16efe74439ab8079a49999b7dcf6a4c # Parent e841d465ff3d7c5295c3f157ec88c707aedc761d patch 9.0.0951: trying every character position for a match is inefficient Commit: https://github.com/vim/vim/commit/01105b37a108022515d364201767f7f111ec4222 Author: Bram Moolenaar Date: Sat Nov 26 11:47:10 2022 +0000 patch 9.0.0951: trying every character position for a match is inefficient Problem: Trying every character position for a match is inefficient. Solution: Use the start position of the match ignoring "\zs". diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -1123,10 +1123,12 @@ static unsigned reg_tofreelen; typedef struct { regmatch_T *reg_match; regmmatch_T *reg_mmatch; + char_u **reg_startp; char_u **reg_endp; lpos_T *reg_startpos; lpos_T *reg_endpos; + win_T *reg_win; buf_T *reg_buf; linenr_T reg_firstlnum; diff --git a/src/regexp.h b/src/regexp.h --- a/src/regexp.h +++ b/src/regexp.h @@ -133,6 +133,8 @@ typedef struct regprog_T *regprog; char_u *startp[NSUBEXP]; char_u *endp[NSUBEXP]; + + colnr_T rm_matchcol; // match start without "\zs" int rm_ic; } regmatch_T; @@ -149,6 +151,8 @@ typedef struct regprog_T *regprog; lpos_T startpos[NSUBEXP]; lpos_T endpos[NSUBEXP]; + + colnr_T rmm_matchcol; // match start without "\zs" int rmm_ic; colnr_T rmm_maxcol; // when not zero: maximum column } regmmatch_T; diff --git a/src/regexp_bt.c b/src/regexp_bt.c --- a/src/regexp_bt.c +++ b/src/regexp_bt.c @@ -4842,11 +4842,12 @@ regtry( static long bt_regexec_both( char_u *line, - colnr_T col, // column to start looking for match + colnr_T startcol, // column to start looking for match int *timed_out) // flag set on timeout or NULL { bt_regprog_T *prog; char_u *s; + colnr_T col = startcol; long retval = 0L; // Create "regstack" and "backpos" if they are not allocated yet. @@ -5042,11 +5043,19 @@ theend: if (end->lnum < start->lnum || (end->lnum == start->lnum && end->col < start->col)) rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_mmatch->rmm_matchcol = col; } else { if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) rex.reg_match->endp[0] = rex.reg_match->startp[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_match->rm_matchcol = col; } } diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -7378,7 +7378,14 @@ nfa_regexec_both( // If match_text is set it contains the full text that must match. // Nothing else to try. Doesn't handle combining chars well. if (prog->match_text != NULL && !rex.reg_icombine) - return find_match_text(col, prog->regstart, prog->match_text); + { + retval = find_match_text(col, prog->regstart, prog->match_text); + if (REG_MULTI) + rex.reg_mmatch->rmm_matchcol = col; + else + rex.reg_match->rm_matchcol = col; + return retval; + } } // If the start column is past the maximum column: no need to try. @@ -7414,11 +7421,19 @@ theend: if (end->lnum < start->lnum || (end->lnum == start->lnum && end->col < start->col)) rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_mmatch->rmm_matchcol = col; } else { if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) rex.reg_match->endp[0] = rex.reg_match->startp[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_match->rm_matchcol = col; } } diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -696,6 +696,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 951, +/**/ 950, /**/ 949,