Mercurial > vim
changeset 31235:7fb4e244b16e v9.0.0951
patch 9.0.0951: trying every character position for a match is inefficient
Commit: https://github.com/vim/vim/commit/01105b37a108022515d364201767f7f111ec4222
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Nov 26 11:47:10 2022 +0000
patch 9.0.0951: trying every character position for a match is inefficient
Problem: Trying every character position for a match is inefficient.
Solution: Use the start position of the match ignoring "\zs".
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sat, 26 Nov 2022 13:00:05 +0100 |
parents | e841d465ff3d |
children | c5c19125df73 |
files | src/regexp.c src/regexp.h src/regexp_bt.c src/regexp_nfa.c src/version.c |
diffstat | 5 files changed, 34 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regexp.c +++ b/src/regexp.c @@ -1123,10 +1123,12 @@ static unsigned reg_tofreelen; typedef struct { regmatch_T *reg_match; regmmatch_T *reg_mmatch; + char_u **reg_startp; char_u **reg_endp; lpos_T *reg_startpos; lpos_T *reg_endpos; + win_T *reg_win; buf_T *reg_buf; linenr_T reg_firstlnum;
--- a/src/regexp.h +++ b/src/regexp.h @@ -133,6 +133,8 @@ typedef struct regprog_T *regprog; char_u *startp[NSUBEXP]; char_u *endp[NSUBEXP]; + + colnr_T rm_matchcol; // match start without "\zs" int rm_ic; } regmatch_T; @@ -149,6 +151,8 @@ typedef struct regprog_T *regprog; lpos_T startpos[NSUBEXP]; lpos_T endpos[NSUBEXP]; + + colnr_T rmm_matchcol; // match start without "\zs" int rmm_ic; colnr_T rmm_maxcol; // when not zero: maximum column } regmmatch_T;
--- a/src/regexp_bt.c +++ b/src/regexp_bt.c @@ -4842,11 +4842,12 @@ regtry( static long bt_regexec_both( char_u *line, - colnr_T col, // column to start looking for match + colnr_T startcol, // column to start looking for match int *timed_out) // flag set on timeout or NULL { bt_regprog_T *prog; char_u *s; + colnr_T col = startcol; long retval = 0L; // Create "regstack" and "backpos" if they are not allocated yet. @@ -5042,11 +5043,19 @@ theend: if (end->lnum < start->lnum || (end->lnum == start->lnum && end->col < start->col)) rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_mmatch->rmm_matchcol = col; } else { if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) rex.reg_match->endp[0] = rex.reg_match->startp[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_match->rm_matchcol = col; } }
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -7378,7 +7378,14 @@ nfa_regexec_both( // If match_text is set it contains the full text that must match. // Nothing else to try. Doesn't handle combining chars well. if (prog->match_text != NULL && !rex.reg_icombine) - return find_match_text(col, prog->regstart, prog->match_text); + { + retval = find_match_text(col, prog->regstart, prog->match_text); + if (REG_MULTI) + rex.reg_mmatch->rmm_matchcol = col; + else + rex.reg_match->rm_matchcol = col; + return retval; + } } // If the start column is past the maximum column: no need to try. @@ -7414,11 +7421,19 @@ theend: if (end->lnum < start->lnum || (end->lnum == start->lnum && end->col < start->col)) rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_mmatch->rmm_matchcol = col; } else { if (rex.reg_match->endp[0] < rex.reg_match->startp[0]) rex.reg_match->endp[0] = rex.reg_match->startp[0]; + + // startpos[0] may be set by "\zs", also return the column where + // the whole pattern matched. + rex.reg_match->rm_matchcol = col; } }