# HG changeset patch # User Christian Brabandt # Date 1531799105 -7200 # Node ID ffd834f893aa28660da92fd82b88a5bad39efd51 # Parent 35d1c7f3293b289837ca6d159195ce9153c6fc8c patch 8.1.0192: executing regexp recursively fails with a crash commit https://github.com/vim/vim/commit/0270f38e1ae484c31a80c813a08691c47a207f1a Author: Bram Moolenaar Date: Tue Jul 17 05:43:58 2018 +0200 patch 8.1.0192: executing regexp recursively fails with a crash Problem: Executing regexp recursively fails with a crash. Solution: Move global variables into "rex". diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -344,7 +344,6 @@ toggle_Magic(int x) #define MAX_LIMIT (32767L << 16L) -static int re_multi_type(int); static int cstrncmp(char_u *s1, char_u *s2, int *n); static char_u *cstrchr(char_u *, int); @@ -371,6 +370,8 @@ static char_u e_z1_not_allowed[] = N_("E #endif static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); +static char_u e_recursive[] = N_("E956: Cannot use pattern recursively"); + #define NOT_MULTI 0 #define MULTI_ONE 1 #define MULTI_MULT 2 @@ -426,14 +427,6 @@ static char_u *reg_prev_sub = NULL; static char_u REGEXP_INRANGE[] = "]^-n\\"; static char_u REGEXP_ABBR[] = "nrtebdoxuU"; -static int backslash_trans(int c); -static int get_char_class(char_u **pp); -static int get_equi_class(char_u **pp); -static void reg_equi_class(int c); -static int get_coll_element(char_u **pp); -static char_u *skip_anyof(char_u *p); -static void init_class_tab(void); - /* * Translate '\x' to its control character, except "\n", which is Magic. */ @@ -688,8 +681,6 @@ typedef struct * Forward declarations for vim_regcomp()'s friends. */ static void initchr(char_u *); -static void save_parse_state(parse_state_T *ps); -static void restore_parse_state(parse_state_T *ps); static int getchr(void); static void skipchr_keepstart(void); static int peekchr(void); @@ -1171,7 +1162,6 @@ get_coll_element(char_u **pp) return 0; } -static void get_cpo_flags(void); static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */ static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */ @@ -1322,9 +1312,6 @@ seen_endbrace(int refnum) return TRUE; } -static regprog_T *bt_regcomp(char_u *expr, int re_flags); -static void bt_regfree(regprog_T *prog); - /* * bt_regcomp() - compile a regular expression into internal code for the * traditional back track matcher. @@ -1373,6 +1360,7 @@ bt_regcomp(char_u *expr, int re_flags) r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); if (r == NULL) return NULL; + r->re_in_use = FALSE; /* * Second pass: emit code. @@ -1525,9 +1513,9 @@ vim_regcomp_had_eol(void) } #endif -/* variables for parsing reginput */ -static int at_start; /* True when on the first character */ -static int prev_at_start; /* True when on the second character */ +// variables used for parsing +static int at_start; // True when on the first character +static int prev_at_start; // True when on the second character /* * Parse regular expression, i.e. main body or parenthesized thing. @@ -3443,18 +3431,6 @@ read_limits(long *minval, long *maxval) * Global work variables for vim_regexec(). */ -/* The current match-position is remembered with these variables: */ -static linenr_T reglnum; /* line number, relative to first line */ -static char_u *regline; /* start of current line */ -static char_u *reginput; /* current input, points into "regline" */ - -static int need_clear_subexpr; /* subexpressions still need to be - * cleared */ -#ifdef FEAT_SYN_HL -static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions - * still need to be cleared */ -#endif - /* * Structure used to save the current input state, when it needs to be * restored after trying a match. Used by reg_save() and reg_restore(). @@ -3464,8 +3440,8 @@ typedef struct { union { - char_u *ptr; /* reginput pointer, for single-line regexp */ - lpos_T pos; /* reginput pos, for multi-line regexp */ + char_u *ptr; /* rex.input pointer, for single-line regexp */ + lpos_T pos; /* rex.input pos, for multi-line regexp */ } rs_u; int rs_len; } regsave_T; @@ -3564,13 +3540,24 @@ typedef struct { linenr_T reg_maxline; int reg_line_lbr; /* "\n" in string is line break */ + // The current match-position is stord in these variables: + linenr_T lnum; // line number, relative to first line + char_u *line; // start of current line + char_u *input; // current input, points into "regline" + + int need_clear_subexpr; // subexpressions still need to be cleared +#ifdef FEAT_SYN_HL + int need_clear_zsubexpr; // extmatch subexpressions still need to be + // cleared +#endif + /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern * contains '\c' or '\C' the value is overruled. */ int reg_ic; #ifdef FEAT_MBYTE - /* Similar to rex.reg_ic, but only for 'combining' characters. Set with \Z + /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z * flag in the regexp. Defaults to false, always. */ int reg_icombine; #endif @@ -3578,6 +3565,22 @@ typedef struct { /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when * there is no maximum. */ colnr_T reg_maxcol; + + // State for the NFA engine regexec. + int nfa_has_zend; // NFA regexp \ze operator encountered. + int nfa_has_backref; // NFA regexp \1 .. \9 encountered. + int nfa_nsubexpr; // Number of sub expressions actually being used + // during execution. 1 if only the whole match + // (subexpr 0) is used. + // listid is global, so that it increases on recursive calls to + // nfa_regmatch(), which means we don't have to clear the lastlist field of + // all the states. + int nfa_listid; + int nfa_alt_listid; + +#ifdef FEAT_SYN_HL + int nfa_has_zsubexpr; // NFA regexp has \z( ), set zsubexpr. +#endif } regexec_T; static regexec_T rex; @@ -3619,7 +3622,7 @@ typedef struct regitem_S { save_se_T sesave; regsave_T regsave; - } rs_un; /* room for saving reginput */ + } rs_un; /* room for saving rex.input */ short rs_no; /* submatch nr or BEHIND/NOBEHIND */ } regitem_T; @@ -3896,8 +3899,8 @@ bt_regexec_both( goto theend; } - regline = line; - reglnum = 0; + rex.line = line; + rex.lnum = 0; reg_toolong = FALSE; /* Simplest case: Anchored match need be tried only once. */ @@ -3907,10 +3910,10 @@ bt_regexec_both( #ifdef FEAT_MBYTE if (has_mbyte) - c = (*mb_ptr2char)(regline + col); + c = (*mb_ptr2char)(rex.line + col); else #endif - c = regline[col]; + c = rex.line[col]; if (prog->regstart == NUL || prog->regstart == c || (rex.reg_ic && (( @@ -3940,15 +3943,15 @@ bt_regexec_both( && !has_mbyte #endif ) - s = vim_strbyte(regline + col, prog->regstart); + s = vim_strbyte(rex.line + col, prog->regstart); else - s = cstrchr(regline + col, prog->regstart); + s = cstrchr(rex.line + col, prog->regstart); if (s == NULL) { retval = 0; break; } - col = (int)(s - regline); + col = (int)(s - rex.line); } /* Check for maximum column to try. */ @@ -3963,16 +3966,16 @@ bt_regexec_both( break; /* if not currently on the first line, get it again */ - if (reglnum != 0) + if (rex.lnum != 0) { - reglnum = 0; - regline = reg_getline((linenr_T)0); + rex.lnum = 0; + rex.line = reg_getline((linenr_T)0); } - if (regline[col] == NUL) + if (rex.line[col] == NUL) break; #ifdef FEAT_MBYTE if (has_mbyte) - col += (*mb_ptr2len)(regline + col); + col += (*mb_ptr2len)(rex.line + col); else #endif ++col; @@ -4052,7 +4055,7 @@ unref_extmatch(reg_extmatch_T *em) #endif /* - * regtry - try match of "prog" with at regline["col"]. + * regtry - try match of "prog" with at rex.line["col"]. * Returns 0 for failure, number of lines contained in the match otherwise. */ static long @@ -4062,12 +4065,11 @@ regtry( proftime_T *tm, /* timeout limit or NULL */ int *timed_out) /* flag set on timeout or NULL */ { - reginput = regline + col; - need_clear_subexpr = TRUE; + rex.input = rex.line + col; + rex.need_clear_subexpr = TRUE; #ifdef FEAT_SYN_HL - /* Clear the external match subpointers if necessary. */ - if (prog->reghasz == REX_SET) - need_clear_zsubexpr = TRUE; + // Clear the external match subpointers if necessary. + rex.need_clear_zsubexpr = (prog->reghasz == REX_SET); #endif if (regmatch(prog->program + 1, tm, timed_out) == 0) @@ -4083,19 +4085,19 @@ regtry( } if (rex.reg_endpos[0].lnum < 0) { - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else /* Use line number of "\ze". */ - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } else { if (rex.reg_startp[0] == NULL) - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; if (rex.reg_endp[0] == NULL) - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } #ifdef FEAT_SYN_HL /* Package any found \z(...\) matches for export. Default is none. */ @@ -4131,7 +4133,7 @@ regtry( } } #endif - return 1 + reglnum; + return 1 + rex.lnum; } #ifdef FEAT_MBYTE @@ -4143,9 +4145,9 @@ static int reg_prev_class(void); static int reg_prev_class(void) { - if (reginput > regline) - return mb_get_class_buf(reginput - 1 - - (*mb_head_off)(regline, reginput - 1), rex.reg_buf); + if (rex.input > rex.line) + return mb_get_class_buf(rex.input - 1 + - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf); return -1; } #endif @@ -4153,7 +4155,7 @@ reg_prev_class(void) static int reg_match_visual(void); /* - * Return TRUE if the current reginput position matches the Visual area. + * Return TRUE if the current rex.input position matches the Visual area. */ static int reg_match_visual(void) @@ -4199,13 +4201,13 @@ reg_match_visual(void) } mode = curbuf->b_visual.vi_mode; } - lnum = reglnum + rex.reg_firstlnum; + lnum = rex.lnum + rex.reg_firstlnum; if (lnum < top.lnum || lnum > bot.lnum) return FALSE; if (mode == 'v') { - col = (colnr_T)(reginput - regline); + col = (colnr_T)(rex.input - rex.line); if ((lnum == top.lnum && col < top.col) || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) return FALSE; @@ -4220,14 +4222,14 @@ reg_match_visual(void) end = end2; if (top.col == MAXCOL || bot.col == MAXCOL) end = MAXCOL; - cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline)); + cols = win_linetabsize(wp, rex.line, (colnr_T)(rex.input - rex.line)); if (cols < start || cols > end - (*p_sel == 'e')) return FALSE; } return TRUE; } -#define ADVANCE_REGINPUT() MB_PTR_ADV(reginput) +#define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input) /* * The arguments from BRACE_LIMITS are stored here. They are actually local @@ -4247,9 +4249,9 @@ static long bl_maxval; * (that don't need to know whether the rest of the match failed) by a nested * loop. * - * Returns TRUE when there is a match. Leaves reginput and reglnum just after + * Returns TRUE when there is a match. Leaves rex.input and rex.lnum just after * the last matched character. - * Returns FALSE when there is no match. Leaves reginput and reglnum in an + * Returns FALSE when there is no match. Leaves rex.input and rex.lnum in an * undefined state! */ static int @@ -4349,11 +4351,11 @@ regmatch( op = OP(scan); /* Check for character class with NL added. */ if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI - && *reginput == NUL && reglnum <= rex.reg_maxline) + && *rex.input == NUL && rex.lnum <= rex.reg_maxline) { reg_nextline(); } - else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n') + else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n') { ADVANCE_REGINPUT(); } @@ -4363,14 +4365,14 @@ regmatch( op -= ADD_NL; #ifdef FEAT_MBYTE if (has_mbyte) - c = (*mb_ptr2char)(reginput); + c = (*mb_ptr2char)(rex.input); else #endif - c = *reginput; + c = *rex.input; switch (op) { case BOL: - if (reginput != regline) + if (rex.input != rex.line) status = RA_NOMATCH; break; @@ -4383,13 +4385,13 @@ regmatch( /* We're not at the beginning of the file when below the first * line where we started, not at the start of the line or we * didn't start at the first line of the buffer. */ - if (reglnum != 0 || reginput != regline + if (rex.lnum != 0 || rex.input != rex.line || (REG_MULTI && rex.reg_firstlnum > 1)) status = RA_NOMATCH; break; case RE_EOF: - if (reglnum != rex.reg_maxline || c != NUL) + if (rex.lnum != rex.reg_maxline || c != NUL) status = RA_NOMATCH; break; @@ -4397,9 +4399,9 @@ regmatch( /* Check if the buffer is in a window and compare the * rex.reg_win->w_cursor position to the match position. */ if (rex.reg_win == NULL - || (reglnum + rex.reg_firstlnum + || (rex.lnum + rex.reg_firstlnum != rex.reg_win->w_cursor.lnum) - || ((colnr_T)(reginput - regline) + || ((colnr_T)(rex.input - rex.line) != rex.reg_win->w_cursor.col)) status = RA_NOMATCH; break; @@ -4414,13 +4416,13 @@ regmatch( pos = getmark_buf(rex.reg_buf, mark, FALSE); if (pos == NULL /* mark doesn't exist */ || pos->lnum <= 0 /* mark isn't set in reg_buf */ - || (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + || (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? (cmp == '<' || cmp == '>') - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? cmp != '>' : cmp != '<')) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? cmp != '>' : cmp != '<'))) status = RA_NOMATCH; @@ -4433,24 +4435,24 @@ regmatch( break; case RE_LNUM: - if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + rex.reg_firstlnum), + if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum), scan)) status = RA_NOMATCH; break; case RE_COL: - if (!re_num_cmp((long_u)(reginput - regline) + 1, scan)) + if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan)) status = RA_NOMATCH; break; case RE_VCOL: if (!re_num_cmp((long_u)win_linetabsize( rex.reg_win == NULL ? curwin : rex.reg_win, - regline, (colnr_T)(reginput - regline)) + 1, scan)) + rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan)) status = RA_NOMATCH; break; - case BOW: /* \ regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) + if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line + && vim_iswordc_buf(rex.input[-1], rex.reg_buf))) status = RA_NOMATCH; } break; - case EOW: /* word\>; reginput points after d */ - if (reginput == regline) /* Can't match at start of line */ + case EOW: /* word\>; rex.input points after d */ + if (rex.input == rex.line) /* Can't match at start of line */ status = RA_NOMATCH; #ifdef FEAT_MBYTE else if (has_mbyte) @@ -4483,7 +4485,7 @@ regmatch( int this_class, prev_class; /* Get class of current and previous char (if it exists). */ - this_class = mb_get_class_buf(reginput, rex.reg_buf); + this_class = mb_get_class_buf(rex.input, rex.reg_buf); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) @@ -4492,8 +4494,8 @@ regmatch( #endif else { - if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL + if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf) + || (rex.input[0] != NUL && vim_iswordc_buf(c, rex.reg_buf))) status = RA_NOMATCH; } @@ -4515,22 +4517,22 @@ regmatch( break; case SIDENT: - if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c)) + if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c)) status = RA_NOMATCH; else ADVANCE_REGINPUT(); break; case KWORD: - if (!vim_iswordp_buf(reginput, rex.reg_buf)) + if (!vim_iswordp_buf(rex.input, rex.reg_buf)) status = RA_NOMATCH; else ADVANCE_REGINPUT(); break; case SKWORD: - if (VIM_ISDIGIT(*reginput) - || !vim_iswordp_buf(reginput, rex.reg_buf)) + if (VIM_ISDIGIT(*rex.input) + || !vim_iswordp_buf(rex.input, rex.reg_buf)) status = RA_NOMATCH; else ADVANCE_REGINPUT(); @@ -4544,21 +4546,21 @@ regmatch( break; case SFNAME: - if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c)) + if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c)) status = RA_NOMATCH; else ADVANCE_REGINPUT(); break; case PRINT: - if (!vim_isprintc(PTR2CHAR(reginput))) + if (!vim_isprintc(PTR2CHAR(rex.input))) status = RA_NOMATCH; else ADVANCE_REGINPUT(); break; case SPRINT: - if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput))) + if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input))) status = RA_NOMATCH; else ADVANCE_REGINPUT(); @@ -4697,12 +4699,12 @@ regmatch( opnd = OPERAND(scan); /* Inline the first byte, for speed. */ - if (*opnd != *reginput + if (*opnd != *rex.input && (!rex.reg_ic || ( #ifdef FEAT_MBYTE !enc_utf8 && #endif - MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput)))) + MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input)))) status = RA_NOMATCH; else if (*opnd == NUL) { @@ -4723,7 +4725,7 @@ regmatch( { /* Need to match first byte again for multi-byte. */ len = (int)STRLEN(opnd); - if (cstrncmp(opnd, reginput, &len) != 0) + if (cstrncmp(opnd, rex.input, &len) != 0) status = RA_NOMATCH; } #ifdef FEAT_MBYTE @@ -4731,7 +4733,7 @@ regmatch( * follows (skips over all composing chars). */ if (status != RA_NOMATCH && enc_utf8 - && UTF_COMPOSINGLIKE(reginput, reginput + len) + && UTF_COMPOSINGLIKE(rex.input, rex.input + len) && !rex.reg_icombine && OP(next) != RE_COMPOSING) { @@ -4742,7 +4744,7 @@ regmatch( } #endif if (status != RA_NOMATCH) - reginput += len; + rex.input += len; } } break; @@ -4780,10 +4782,10 @@ regmatch( /* When only a composing char is given match at any * position where that composing char appears. */ status = RA_NOMATCH; - for (i = 0; reginput[i] != NUL; - i += utf_ptr2len(reginput + i)) + for (i = 0; rex.input[i] != NUL; + i += utf_ptr2len(rex.input + i)) { - inpc = utf_ptr2char(reginput + i); + inpc = utf_ptr2char(rex.input + i); if (!utf_iscomposing(inpc)) { if (i > 0) @@ -4792,7 +4794,7 @@ regmatch( else if (opndc == inpc) { /* Include all following composing chars. */ - len = i + utfc_ptr2len(reginput + i); + len = i + utfc_ptr2len(rex.input + i); status = RA_MATCH; break; } @@ -4800,12 +4802,12 @@ regmatch( } else for (i = 0; i < len; ++i) - if (opnd[i] != reginput[i]) + if (opnd[i] != rex.input[i]) { status = RA_NOMATCH; break; } - reginput += len; + rex.input += len; } else status = RA_NOMATCH; @@ -4816,8 +4818,8 @@ regmatch( if (enc_utf8) { /* Skip composing characters. */ - while (utf_iscomposing(utf_ptr2char(reginput))) - MB_CPTR_ADV(reginput); + while (utf_iscomposing(utf_ptr2char(rex.input))) + MB_CPTR_ADV(rex.input); } #endif break; @@ -5003,7 +5005,7 @@ regmatch( /* Compare current input with back-ref in the same * line. */ len = (int)(rex.reg_endp[no] - rex.reg_startp[no]); - if (cstrncmp(rex.reg_startp[no], reginput, &len) != 0) + if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0) status = RA_NOMATCH; } } @@ -5017,14 +5019,14 @@ regmatch( } else { - if (rex.reg_startpos[no].lnum == reglnum - && rex.reg_endpos[no].lnum == reglnum) + if (rex.reg_startpos[no].lnum == rex.lnum + && rex.reg_endpos[no].lnum == rex.lnum) { /* Compare back-ref within the current line. */ len = rex.reg_endpos[no].col - rex.reg_startpos[no].col; - if (cstrncmp(regline + rex.reg_startpos[no].col, - reginput, &len) != 0) + if (cstrncmp(rex.line + rex.reg_startpos[no].col, + rex.input, &len) != 0) status = RA_NOMATCH; } else @@ -5045,7 +5047,7 @@ regmatch( } /* Matched the backref, skip over it. */ - reginput += len; + rex.input += len; } break; @@ -5069,10 +5071,10 @@ regmatch( { len = (int)STRLEN(re_extmatch_in->matches[no]); if (cstrncmp(re_extmatch_in->matches[no], - reginput, &len) != 0) + rex.input, &len) != 0) status = RA_NOMATCH; else - reginput += len; + rex.input += len; } else { @@ -5319,16 +5321,16 @@ regmatch( case BHPOS: if (REG_MULTI) { - if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline) - || behind_pos.rs_u.pos.lnum != reglnum) + if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line) + || behind_pos.rs_u.pos.lnum != rex.lnum) status = RA_NOMATCH; } - else if (behind_pos.rs_u.ptr != reginput) + else if (behind_pos.rs_u.ptr != rex.input) status = RA_NOMATCH; break; case NEWL: - if ((c != NUL || !REG_MULTI || reglnum > rex.reg_maxline + if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) && (c != '\n' || !rex.reg_line_lbr)) status = RA_NOMATCH; @@ -5562,7 +5564,7 @@ regmatch( if (limit > 0 && ((rp->rs_un.regsave.rs_u.pos.lnum < behind_pos.rs_u.pos.lnum - ? (colnr_T)STRLEN(regline) + ? (colnr_T)STRLEN(rex.line) : behind_pos.rs_u.pos.col) - rp->rs_un.regsave.rs_u.pos.col >= limit)) no = FAIL; @@ -5578,7 +5580,7 @@ regmatch( { reg_restore(&rp->rs_un.regsave, &backpos); rp->rs_un.regsave.rs_u.pos.col = - (colnr_T)STRLEN(regline); + (colnr_T)STRLEN(rex.line); } } else @@ -5600,11 +5602,11 @@ regmatch( } else { - if (rp->rs_un.regsave.rs_u.ptr == regline) + if (rp->rs_un.regsave.rs_u.ptr == rex.line) no = FAIL; else { - MB_PTR_BACK(regline, rp->rs_un.regsave.rs_u.ptr); + MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr); if (limit > 0 && (long)(behind_pos.rs_u.ptr - rp->rs_un.regsave.rs_u.ptr) > limit) no = FAIL; @@ -5678,20 +5680,20 @@ regmatch( * didn't match -- back up one char. */ if (--rst->count < rst->minval) break; - if (reginput == regline) + if (rex.input == rex.line) { /* backup to last char of previous line */ - --reglnum; - regline = reg_getline(reglnum); + --rex.lnum; + rex.line = reg_getline(rex.lnum); /* Just in case regrepeat() didn't count * right. */ - if (regline == NULL) + if (rex.line == NULL) break; - reginput = regline + STRLEN(regline); + rex.input = rex.line + STRLEN(rex.line); fast_breakcheck(); } else - MB_PTR_BACK(regline, reginput); + MB_PTR_BACK(rex.line, rex.input); } else { @@ -5711,8 +5713,8 @@ regmatch( status = RA_NOMATCH; /* If it could match, try it. */ - if (rst->nextb == NUL || *reginput == rst->nextb - || *reginput == rst->nextb_ic) + if (rst->nextb == NUL || *rex.input == rst->nextb + || *rex.input == rst->nextb_ic) { reg_save(&rp->rs_un.regsave, &backpos); scan = regnext(rp->rs_scan); @@ -5807,7 +5809,7 @@ regstack_pop(char_u **scan) /* * regrepeat - repeatedly match something simple, return how many. - * Advances reginput (and reglnum) to just after the matched chars. + * Advances rex.input (and rex.lnum) to just after the matched chars. */ static int regrepeat( @@ -5820,7 +5822,7 @@ regrepeat( int mask; int testval = 0; - scan = reginput; /* Make local copy of reginput for speed. */ + scan = rex.input; /* Make local copy of rex.input for speed. */ opnd = OPERAND(p); switch (OP(p)) { @@ -5835,12 +5837,12 @@ regrepeat( ++count; MB_PTR_ADV(scan); } - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr || count == maxcount) break; ++count; /* count the line-break */ reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -5860,11 +5862,11 @@ regrepeat( } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -5891,11 +5893,11 @@ regrepeat( } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -5921,11 +5923,11 @@ regrepeat( } else if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -5947,11 +5949,11 @@ regrepeat( { if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -5979,11 +5981,11 @@ do_class: #endif if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -6144,11 +6146,11 @@ do_class: #endif if (*scan == NUL) { - if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > rex.reg_maxline + if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline || rex.reg_line_lbr) break; reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -6174,7 +6176,7 @@ do_class: case NEWL: while (count < maxcount - && ((*scan == NUL && reglnum <= rex.reg_maxline + && ((*scan == NUL && rex.lnum <= rex.reg_maxline && !rex.reg_line_lbr && REG_MULTI) || (*scan == '\n' && rex.reg_line_lbr))) { @@ -6183,7 +6185,7 @@ do_class: ADVANCE_REGINPUT(); else reg_nextline(); - scan = reginput; + scan = rex.input; if (got_int) break; } @@ -6197,7 +6199,7 @@ do_class: break; } - reginput = scan; + rex.input = scan; return (int)count; } @@ -6255,7 +6257,7 @@ prog_magic_wrong(void) static void cleanup_subexpr(void) { - if (need_clear_subexpr) + if (rex.need_clear_subexpr) { if (REG_MULTI) { @@ -6268,7 +6270,7 @@ cleanup_subexpr(void) vim_memset(rex.reg_startp, 0, sizeof(char_u *) * NSUBEXP); vim_memset(rex.reg_endp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_subexpr = FALSE; + rex.need_clear_subexpr = FALSE; } } @@ -6276,7 +6278,7 @@ cleanup_subexpr(void) static void cleanup_zsubexpr(void) { - if (need_clear_zsubexpr) + if (rex.need_clear_zsubexpr) { if (REG_MULTI) { @@ -6289,7 +6291,7 @@ cleanup_zsubexpr(void) vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP); vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP); } - need_clear_zsubexpr = FALSE; + rex.need_clear_zsubexpr = FALSE; } } #endif @@ -6303,10 +6305,10 @@ save_subexpr(regbehind_T *bp) { int i; - /* When "need_clear_subexpr" is set we don't need to save the values, only + /* When "rex.need_clear_subexpr" is set we don't need to save the values, only * remember that this flag needs to be set again when restoring. */ - bp->save_need_clear_subexpr = need_clear_subexpr; - if (!need_clear_subexpr) + bp->save_need_clear_subexpr = rex.need_clear_subexpr; + if (!rex.need_clear_subexpr) { for (i = 0; i < NSUBEXP; ++i) { @@ -6333,8 +6335,8 @@ restore_subexpr(regbehind_T *bp) int i; /* Only need to restore saved values when they are not to be cleared. */ - need_clear_subexpr = bp->save_need_clear_subexpr; - if (!need_clear_subexpr) + rex.need_clear_subexpr = bp->save_need_clear_subexpr; + if (!rex.need_clear_subexpr) { for (i = 0; i < NSUBEXP; ++i) { @@ -6353,13 +6355,13 @@ restore_subexpr(regbehind_T *bp) } /* - * Advance reglnum, regline and reginput to the next line. + * Advance rex.lnum, rex.line and rex.input to the next line. */ static void reg_nextline(void) { - regline = reg_getline(++reglnum); - reginput = regline; + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; fast_breakcheck(); } @@ -6371,11 +6373,11 @@ reg_save(regsave_T *save, garray_T *gap) { if (REG_MULTI) { - save->rs_u.pos.col = (colnr_T)(reginput - regline); - save->rs_u.pos.lnum = reglnum; + save->rs_u.pos.col = (colnr_T)(rex.input - rex.line); + save->rs_u.pos.lnum = rex.lnum; } else - save->rs_u.ptr = reginput; + save->rs_u.ptr = rex.input; save->rs_len = gap->ga_len; } @@ -6387,17 +6389,17 @@ reg_restore(regsave_T *save, garray_T *g { if (REG_MULTI) { - if (reglnum != save->rs_u.pos.lnum) + if (rex.lnum != save->rs_u.pos.lnum) { /* only call reg_getline() when the line number changed to save * a bit of time */ - reglnum = save->rs_u.pos.lnum; - regline = reg_getline(reglnum); + rex.lnum = save->rs_u.pos.lnum; + rex.line = reg_getline(rex.lnum); } - reginput = regline + save->rs_u.pos.col; + rex.input = rex.line + save->rs_u.pos.col; } else - reginput = save->rs_u.ptr; + rex.input = save->rs_u.ptr; gap->ga_len = save->rs_len; } @@ -6408,9 +6410,9 @@ reg_restore(regsave_T *save, garray_T *g reg_save_equal(regsave_T *save) { if (REG_MULTI) - return reglnum == save->rs_u.pos.lnum - && reginput == regline + save->rs_u.pos.col; - return reginput == save->rs_u.ptr; + return rex.lnum == save->rs_u.pos.lnum + && rex.input == rex.line + save->rs_u.pos.col; + return rex.input == save->rs_u.ptr; } /* @@ -6424,15 +6426,15 @@ reg_save_equal(regsave_T *save) save_se_multi(save_se_T *savep, lpos_T *posp) { savep->se_u.pos = *posp; - posp->lnum = reglnum; - posp->col = (colnr_T)(reginput - regline); + posp->lnum = rex.lnum; + posp->col = (colnr_T)(rex.input - rex.line); } static void save_se_one(save_se_T *savep, char_u **pp) { savep->se_u.ptr = *pp; - *pp = reginput; + *pp = rex.input; } /* @@ -6475,9 +6477,9 @@ match_with_backref( { /* Since getting one line may invalidate the other, need to make copy. * Slow! */ - if (regline != reg_tofree) + if (rex.line != reg_tofree) { - len = (int)STRLEN(regline); + len = (int)STRLEN(rex.line); if (reg_tofree == NULL || len >= (int)reg_tofreelen) { len += 50; /* get some extra */ @@ -6487,9 +6489,9 @@ match_with_backref( return RA_FAIL; /* out of memory!*/ reg_tofreelen = len; } - STRCPY(reg_tofree, regline); - reginput = reg_tofree + (reginput - regline); - regline = reg_tofree; + STRCPY(reg_tofree, rex.line); + rex.input = reg_tofree + (rex.input - rex.line); + rex.line = reg_tofree; } /* Get the line to compare with. */ @@ -6499,13 +6501,13 @@ match_with_backref( else len = (int)STRLEN(p + ccol); - if (cstrncmp(p + ccol, reginput, &len) != 0) + if (cstrncmp(p + ccol, rex.input, &len) != 0) return RA_NOMATCH; /* doesn't match */ if (bytelen != NULL) *bytelen += len; if (clnum == end_lnum) break; /* match and at end! */ - if (reglnum >= rex.reg_maxline) + if (rex.lnum >= rex.reg_maxline) return RA_NOMATCH; /* text too short */ /* Advance to next line. */ @@ -6518,7 +6520,7 @@ match_with_backref( return RA_FAIL; } - /* found a match! Note that regline may now point to a copy of the line, + /* found a match! Note that rex.line may now point to a copy of the line, * that should not matter. */ return RA_MATCH; } @@ -8144,8 +8146,10 @@ vim_regcomp(char_u *expr_arg, int re_fla regexp_engine = AUTOMATIC_ENGINE; } } +#ifdef DEBUG bt_regengine.expr = expr; nfa_regengine.expr = expr; +#endif /* * First try the NFA engine, unless backtracking was requested. @@ -8243,10 +8247,19 @@ vim_regexec_string( regexec_T rex_save; int rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) + { + EMSG(_(e_recursive)); + return FALSE; + } + rmp->regprog->re_in_use = TRUE; + if (rex_in_use) - /* Being called recursively, save the state. */ + // Being called recursively, save the state. rex_save = rex; rex_in_use = TRUE; + rex.reg_startp = NULL; rex.reg_endp = NULL; rex.reg_startpos = NULL; @@ -8281,6 +8294,7 @@ vim_regexec_string( rex_in_use = rex_in_use_save; if (rex_in_use) rex = rex_save; + rmp->regprog->re_in_use = FALSE; return result > 0; } @@ -8353,6 +8367,14 @@ vim_regexec_multi( regexec_T rex_save; int rex_in_use_save = rex_in_use; + // Cannot use the same prog recursively, it contains state. + if (rmp->regprog->re_in_use) + { + EMSG(_(e_recursive)); + return FALSE; + } + rmp->regprog->re_in_use = TRUE; + if (rex_in_use) /* Being called recursively, save the state. */ rex_save = rex; @@ -8397,6 +8419,7 @@ vim_regexec_multi( rex_in_use = rex_in_use_save; if (rex_in_use) rex = rex_save; + rmp->regprog->re_in_use = FALSE; return result <= 0 ? 0 : result; } diff --git a/src/regexp.h b/src/regexp.h --- a/src/regexp.h +++ b/src/regexp.h @@ -50,8 +50,9 @@ typedef struct regprog { regengine_T *engine; unsigned regflags; - unsigned re_engine; /* automatic, backtracking or nfa engine */ - unsigned re_flags; /* second argument for vim_regcomp() */ + unsigned re_engine; // automatic, backtracking or nfa engine + unsigned re_flags; // second argument for vim_regcomp() + int re_in_use; // prog is being executed } regprog_T; /* @@ -65,7 +66,8 @@ typedef struct regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; /* second argument for vim_regcomp() */ + unsigned re_flags; + int re_in_use; int regstart; char_u reganch; @@ -101,7 +103,8 @@ typedef struct regengine_T *engine; unsigned regflags; unsigned re_engine; - unsigned re_flags; /* second argument for vim_regcomp() */ + unsigned re_flags; + int re_in_use; nfa_state_T *start; /* points into state[] */ diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -244,41 +244,17 @@ static char_u e_nul_found[] = N_("E865: static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c"); static char_u e_ill_char_class[] = N_("E877: (NFA regexp) Invalid character class: %ld"); -/* re_flags passed to nfa_regcomp() */ -static int nfa_re_flags; - -/* NFA regexp \ze operator encountered. */ -static int nfa_has_zend; - -/* NFA regexp \1 .. \9 encountered. */ -static int nfa_has_backref; - -#ifdef FEAT_SYN_HL -/* NFA regexp has \z( ), set zsubexpr. */ -static int nfa_has_zsubexpr; -#endif - -/* Number of sub expressions actually being used during execution. 1 if only - * the whole match (subexpr 0) is used. */ -static int nfa_nsubexpr; - -static int *post_start; /* holds the postfix form of r.e. */ +// Variables only used in nfa_regcomp() and descendants. +static int nfa_re_flags; // re_flags passed to nfa_regcomp() +static int *post_start; // holds the postfix form of r.e. static int *post_end; static int *post_ptr; - -static int nstate; /* Number of states in the NFA. Also used when - * executing. */ -static int istate; /* Index in the state vector, used in alloc_state() */ +static int nstate; // Number of states in the NFA. +static int istate; // Index in the state vector, used in alloc_state() /* If not NULL match must end at this position */ static save_se_T *nfa_endp = NULL; -/* listid is global, so that it increases on recursive calls to - * nfa_regmatch(), which means we don't have to clear the lastlist field of - * all the states. */ -static int nfa_listid; -static int nfa_alt_listid; - /* 0 for first call to nfa_regmatch(), 1 for recursive call. */ static int nfa_ll_index = 0; @@ -326,8 +302,8 @@ nfa_regcomp_start( return FAIL; post_ptr = post_start; post_end = post_start + nstate_max; - nfa_has_zend = FALSE; - nfa_has_backref = FALSE; + rex.nfa_has_zend = FALSE; + rex.nfa_has_backref = FALSE; /* shared with BT engine */ regcomp_start(expr, re_flags); @@ -1422,7 +1398,7 @@ nfa_regatom(void) if (!seen_endbrace(refnum + 1)) return FAIL; EMIT(NFA_BACKREF1 + refnum); - nfa_has_backref = TRUE; + rex.nfa_has_backref = TRUE; } break; @@ -1437,7 +1413,7 @@ nfa_regatom(void) break; case 'e': EMIT(NFA_ZEND); - nfa_has_zend = TRUE; + rex.nfa_has_zend = TRUE; if (re_mult_next("\\ze") == FAIL) return FAIL; break; @@ -1455,7 +1431,7 @@ nfa_regatom(void) if ((reg_do_extmatch & REX_USE) == 0) EMSG_RET_FAIL(_(e_z1_not_allowed)); EMIT(NFA_ZREF1 + (no_Magic(c) - '1')); - /* No need to set nfa_has_backref, the sub-matches don't + /* No need to set rex.nfa_has_backref, the sub-matches don't * change when \z1 .. \z9 matches or not. */ re_has_z = REX_USE; break; @@ -2920,11 +2896,11 @@ st_error(int *postfix UNUSED, int *end U if (df) { fprintf(df, "Error popping the stack!\n"); -#ifdef DEBUG +# ifdef DEBUG fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr); -#endif +# endif fprintf(df, "Postfix form is: "); -#ifdef DEBUG +# ifdef DEBUG for (p2 = postfix; p2 < end; p2++) { nfa_set_code(*p2); @@ -2937,7 +2913,7 @@ st_error(int *postfix UNUSED, int *end U nfa_set_code(*p2); fprintf(df, "%s, ", code); } -#else +# else for (p2 = postfix; p2 < end; p2++) { fprintf(df, "%d, ", *p2); @@ -2947,7 +2923,7 @@ st_error(int *postfix UNUSED, int *end U { fprintf(df, "%d, ", *p2); } -#endif +# endif fprintf(df, "\n--------------------------\n"); fclose(df); } @@ -3887,7 +3863,7 @@ log_subsexpr(regsubs_T *subs) { log_subexpr(&subs->norm); # ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) log_subexpr(&subs->synt); # endif } @@ -3927,7 +3903,7 @@ pim_info(nfa_pim_T *pim) else { sprintf(buf, " PIM col %d", REG_MULTI ? (int)pim->end.pos.col - : (int)(pim->end.ptr - reginput)); + : (int)(pim->end.ptr - rex.input)); } return buf; } @@ -3955,7 +3931,7 @@ copy_pim(nfa_pim_T *to, nfa_pim_T *from) to->state = from->state; copy_sub(&to->subs.norm, &from->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub(&to->subs.synt, &from->subs.synt); #endif to->end = from->end; @@ -3967,9 +3943,10 @@ clear_sub(regsub_T *sub) if (REG_MULTI) /* Use 0xff to set lnum to -1 */ vim_memset(sub->list.multi, 0xff, - sizeof(struct multipos) * nfa_nsubexpr); + sizeof(struct multipos) * rex.nfa_nsubexpr); else - vim_memset(sub->list.line, 0, sizeof(struct linepos) * nfa_nsubexpr); + vim_memset(sub->list.line, 0, + sizeof(struct linepos) * rex.nfa_nsubexpr); sub->in_use = 0; } @@ -4022,7 +3999,7 @@ copy_sub_off(regsub_T *to, regsub_T *fro static void copy_ze_off(regsub_T *to, regsub_T *from) { - if (nfa_has_zend) + if (rex.nfa_has_zend) { if (REG_MULTI) { @@ -4073,7 +4050,7 @@ sub_equal(regsub_T *sub1, regsub_T *sub2 != sub2->list.multi[i].start_col) return FALSE; - if (nfa_has_backref) + if (rex.nfa_has_backref) { if (i < sub1->in_use) s1 = sub1->list.multi[i].end_lnum; @@ -4105,7 +4082,7 @@ sub_equal(regsub_T *sub1, regsub_T *sub2 sp2 = NULL; if (sp1 != sp2) return FALSE; - if (nfa_has_backref) + if (rex.nfa_has_backref) { if (i < sub1->in_use) sp1 = sub1->list.line[i].end; @@ -4139,7 +4116,7 @@ report_state(char *action, else if (REG_MULTI) col = sub->list.multi[0].start_col; else - col = (int)(sub->list.line[0].start - regline); + col = (int)(sub->list.line[0].start - rex.line); nfa_set_code(state->c); fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)%s\n", action, abs(state->id), lid, state->c, code, col, @@ -4167,7 +4144,7 @@ has_state_with_pos( if (thread->state->id == state->id && sub_equal(&thread->subs.norm, &subs->norm) #ifdef FEAT_SYN_HL - && (!nfa_has_zsubexpr + && (!rex.nfa_has_zsubexpr || sub_equal(&thread->subs.synt, &subs->synt)) #endif && pim_equal(&thread->pim, pim)) @@ -4306,7 +4283,7 @@ state_in_list( { if (state->lastlist[nfa_ll_index] == l->id) { - if (!nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) + if (!rex.nfa_has_backref || has_state_with_pos(l, state, subs, NULL)) return TRUE; } return FALSE; @@ -4390,11 +4367,11 @@ addstate( /* "^" won't match past end-of-line, don't bother trying. * Except when at the end of the line, or when we are going to the * next line for a look-behind match. */ - if (reginput > regline - && *reginput != NUL + if (rex.input > rex.line + && *rex.input != NUL && (nfa_endp == NULL || !REG_MULTI - || reglnum == nfa_endp->se_u.pos.lnum)) + || rex.lnum == nfa_endp->se_u.pos.lnum)) goto skip_add; /* FALLTHROUGH */ @@ -4432,7 +4409,7 @@ addstate( * unless it is an MOPEN that is used for a backreference or * when there is a PIM. For NFA_MATCH check the position, * lower position is preferred. */ - if (!nfa_has_backref && pim == NULL && !l->has_pim + if (!rex.nfa_has_backref && pim == NULL && !l->has_pim && state->c != NFA_MATCH) { /* When called from addstate_here() do insert before @@ -4477,7 +4454,7 @@ skip_add: * copy before it becomes invalid. */ copy_sub(&temp_subs.norm, &subs->norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub(&temp_subs.synt, &subs->synt); #endif subs = &temp_subs; @@ -4501,7 +4478,7 @@ skip_add: } copy_sub(&thread->subs.norm, &subs->norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub(&thread->subs.synt, &subs->synt); #endif #ifdef ENABLE_LOG @@ -4597,14 +4574,14 @@ skip_add: } if (off == -1) { - sub->list.multi[subidx].start_lnum = reglnum + 1; + sub->list.multi[subidx].start_lnum = rex.lnum + 1; sub->list.multi[subidx].start_col = 0; } else { - sub->list.multi[subidx].start_lnum = reglnum; + sub->list.multi[subidx].start_lnum = rex.lnum; sub->list.multi[subidx].start_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } sub->list.multi[subidx].end_lnum = -1; } @@ -4625,7 +4602,7 @@ skip_add: } sub->in_use = subidx + 1; } - sub->list.line[subidx].start = reginput + off; + sub->list.line[subidx].start = rex.input + off; } subs = addstate(l, state->out, subs, pim, off_arg); @@ -4649,7 +4626,7 @@ skip_add: break; case NFA_MCLOSE: - if (nfa_has_zend && (REG_MULTI + if (rex.nfa_has_zend && (REG_MULTI ? subs->norm.list.multi[0].end_lnum >= 0 : subs->norm.list.line[0].end != NULL)) { @@ -4708,14 +4685,14 @@ skip_add: save_multipos = sub->list.multi[subidx]; if (off == -1) { - sub->list.multi[subidx].end_lnum = reglnum + 1; + sub->list.multi[subidx].end_lnum = rex.lnum + 1; sub->list.multi[subidx].end_col = 0; } else { - sub->list.multi[subidx].end_lnum = reglnum; + sub->list.multi[subidx].end_lnum = rex.lnum; sub->list.multi[subidx].end_col = - (colnr_T)(reginput - regline + off); + (colnr_T)(rex.input - rex.line + off); } /* avoid compiler warnings */ save_ptr = NULL; @@ -4723,7 +4700,7 @@ skip_add: else { save_ptr = sub->list.line[subidx].end; - sub->list.line[subidx].end = reginput + off; + sub->list.line[subidx].end = rex.input + off; /* avoid compiler warnings */ vim_memset(&save_multipos, 0, sizeof(save_multipos)); } @@ -4929,13 +4906,13 @@ retempty: if (sub->list.multi[subidx].start_lnum < 0 || sub->list.multi[subidx].end_lnum < 0) goto retempty; - if (sub->list.multi[subidx].start_lnum == reglnum - && sub->list.multi[subidx].end_lnum == reglnum) + if (sub->list.multi[subidx].start_lnum == rex.lnum + && sub->list.multi[subidx].end_lnum == rex.lnum) { len = sub->list.multi[subidx].end_col - sub->list.multi[subidx].start_col; - if (cstrncmp(regline + sub->list.multi[subidx].start_col, - reginput, &len) == 0) + if (cstrncmp(rex.line + sub->list.multi[subidx].start_col, + rex.input, &len) == 0) { *bytelen = len; return TRUE; @@ -4958,7 +4935,7 @@ retempty: || sub->list.line[subidx].end == NULL) goto retempty; len = (int)(sub->list.line[subidx].end - sub->list.line[subidx].start); - if (cstrncmp(sub->list.line[subidx].start, reginput, &len) == 0) + if (cstrncmp(sub->list.line[subidx].start, rex.input, &len) == 0) { *bytelen = len; return TRUE; @@ -4989,7 +4966,7 @@ match_zref( } len = (int)STRLEN(re_extmatch_in->matches[subidx]); - if (cstrncmp(re_extmatch_in->matches[subidx], reginput, &len) == 0) + if (cstrncmp(re_extmatch_in->matches[subidx], rex.input, &len) == 0) { *bytelen = len; return TRUE; @@ -5061,10 +5038,10 @@ recursive_regmatch( int **listids, int *listids_len) { - int save_reginput_col = (int)(reginput - regline); - int save_reglnum = reglnum; + int save_reginput_col = (int)(rex.input - rex.line); + int save_reglnum = rex.lnum; int save_nfa_match = nfa_match; - int save_nfa_listid = nfa_listid; + int save_nfa_listid = rex.nfa_listid; save_se_T *save_nfa_endp = nfa_endp; save_se_T endpos; save_se_T *endposp = NULL; @@ -5075,9 +5052,9 @@ recursive_regmatch( { /* start at the position where the postponed match was */ if (REG_MULTI) - reginput = regline + pim->end.pos.col; + rex.input = rex.line + pim->end.pos.col; else - reginput = pim->end.ptr; + rex.input = pim->end.ptr; } if (state->c == NFA_START_INVISIBLE_BEFORE @@ -5092,8 +5069,8 @@ recursive_regmatch( { if (pim == NULL) { - endpos.se_u.pos.col = (int)(reginput - regline); - endpos.se_u.pos.lnum = reglnum; + endpos.se_u.pos.col = (int)(rex.input - rex.line); + endpos.se_u.pos.lnum = rex.lnum; } else endpos.se_u.pos = pim->end.pos; @@ -5101,7 +5078,7 @@ recursive_regmatch( else { if (pim == NULL) - endpos.se_u.ptr = reginput; + endpos.se_u.ptr = rex.input; else endpos.se_u.ptr = pim->end.ptr; } @@ -5114,39 +5091,39 @@ recursive_regmatch( { if (REG_MULTI) { - regline = reg_getline(--reglnum); - if (regline == NULL) + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) /* can't go before the first line */ - regline = reg_getline(++reglnum); + rex.line = reg_getline(++rex.lnum); } - reginput = regline; + rex.input = rex.line; } else { - if (REG_MULTI && (int)(reginput - regline) < state->val) + if (REG_MULTI && (int)(rex.input - rex.line) < state->val) { /* Not enough bytes in this line, go to end of * previous line. */ - regline = reg_getline(--reglnum); - if (regline == NULL) + rex.line = reg_getline(--rex.lnum); + if (rex.line == NULL) { /* can't go before the first line */ - regline = reg_getline(++reglnum); - reginput = regline; + rex.line = reg_getline(++rex.lnum); + rex.input = rex.line; } else - reginput = regline + STRLEN(regline); + rex.input = rex.line + STRLEN(rex.line); } - if ((int)(reginput - regline) >= state->val) + if ((int)(rex.input - rex.line) >= state->val) { - reginput -= state->val; + rex.input -= state->val; #ifdef FEAT_MBYTE if (has_mbyte) - reginput -= mb_head_off(regline, reginput); + rex.input -= mb_head_off(rex.line, rex.input); #endif } else - reginput = regline; + rex.input = rex.line; } } @@ -5161,29 +5138,29 @@ recursive_regmatch( { /* Already calling nfa_regmatch() recursively. Save the lastlist[1] * values and clear them. */ - if (*listids == NULL || *listids_len < nstate) + if (*listids == NULL || *listids_len < prog->nstate) { vim_free(*listids); - *listids = (int *)lalloc(sizeof(int) * nstate, TRUE); + *listids = (int *)lalloc(sizeof(int) * prog->nstate, TRUE); if (*listids == NULL) { EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!")); return 0; } - *listids_len = nstate; + *listids_len = prog->nstate; } nfa_save_listids(prog, *listids); need_restore = TRUE; - /* any value of nfa_listid will do */ + /* any value of rex.nfa_listid will do */ } else { /* First recursive nfa_regmatch() call, switch to the second lastlist - * entry. Make sure nfa_listid is different from a previous recursive - * call, because some states may still have this ID. */ + * entry. Make sure rex.nfa_listid is different from a previous + * recursive call, because some states may still have this ID. */ ++nfa_ll_index; - if (nfa_listid <= nfa_alt_listid) - nfa_listid = nfa_alt_listid; + if (rex.nfa_listid <= rex.nfa_alt_listid) + rex.nfa_listid = rex.nfa_alt_listid; } /* Call nfa_regmatch() to check if the current concat matches at this @@ -5196,18 +5173,18 @@ recursive_regmatch( else { --nfa_ll_index; - nfa_alt_listid = nfa_listid; + rex.nfa_alt_listid = rex.nfa_listid; } /* restore position in input text */ - reglnum = save_reglnum; + rex.lnum = save_reglnum; if (REG_MULTI) - regline = reg_getline(reglnum); - reginput = regline + save_reginput_col; + rex.line = reg_getline(rex.lnum); + rex.input = rex.line + save_reginput_col; if (result != NFA_TOO_EXPENSIVE) { nfa_match = save_nfa_match; - nfa_listid = save_nfa_listid; + rex.nfa_listid = save_nfa_listid; } nfa_endp = save_nfa_endp; @@ -5407,12 +5384,12 @@ skip_to_start(int c, colnr_T *colp) && !has_mbyte #endif ) - s = vim_strbyte(regline + *colp, c); + s = vim_strbyte(rex.line + *colp, c); else - s = cstrchr(regline + *colp, c); + s = cstrchr(rex.line + *colp, c); if (s == NULL) return FAIL; - *colp = (int)(s - regline); + *colp = (int)(s - rex.line); return OK; } @@ -5436,7 +5413,7 @@ find_match_text(colnr_T startcol, int re for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1)) { c1 = PTR2CHAR(match_text + len1); - c2 = PTR2CHAR(regline + col + len2); + c2 = PTR2CHAR(rex.line + col + len2); if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) { match = FALSE; @@ -5448,22 +5425,22 @@ find_match_text(colnr_T startcol, int re #ifdef FEAT_MBYTE /* check that no composing char follows */ && !(enc_utf8 - && utf_iscomposing(PTR2CHAR(regline + col + len2))) + && utf_iscomposing(PTR2CHAR(rex.line + col + len2))) #endif ) { cleanup_subexpr(); if (REG_MULTI) { - rex.reg_startpos[0].lnum = reglnum; + rex.reg_startpos[0].lnum = rex.lnum; rex.reg_startpos[0].col = col; - rex.reg_endpos[0].lnum = reglnum; + rex.reg_endpos[0].lnum = rex.lnum; rex.reg_endpos[0].col = col + len2; } else { - rex.reg_startp[0] = regline + col; - rex.reg_endp[0] = regline + col + len2; + rex.reg_startp[0] = rex.line + col; + rex.reg_endp[0] = rex.line + col + len2; } return 1L; } @@ -5493,7 +5470,7 @@ nfa_did_time_out() /* * Main matching routine. * - * Run NFA to determine whether it matches reginput. + * Run NFA to determine whether it matches rex.input. * * When "nfa_endp" is not NULL it is a required end-of-match position. * @@ -5549,12 +5526,12 @@ nfa_regmatch( nfa_match = FALSE; /* Allocate memory for the lists of nodes. */ - size = (nstate + 1) * sizeof(nfa_thread_T); + size = (prog->nstate + 1) * sizeof(nfa_thread_T); list[0].t = (nfa_thread_T *)lalloc(size, TRUE); - list[0].len = nstate + 1; + list[0].len = prog->nstate + 1; list[1].t = (nfa_thread_T *)lalloc(size, TRUE); - list[1].len = nstate + 1; + list[1].len = prog->nstate + 1; if (list[0].t == NULL || list[1].t == NULL) goto theend; @@ -5584,7 +5561,7 @@ nfa_regmatch( #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE first\n"); #endif - thislist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid + 1; /* Inline optimized code for addstate(thislist, start, m, 0) if we know * it's the first MOPEN. */ @@ -5592,11 +5569,11 @@ nfa_regmatch( { if (REG_MULTI) { - m->norm.list.multi[0].start_lnum = reglnum; - m->norm.list.multi[0].start_col = (colnr_T)(reginput - regline); + m->norm.list.multi[0].start_lnum = rex.lnum; + m->norm.list.multi[0].start_col = (colnr_T)(rex.input - rex.line); } else - m->norm.list.line[0].start = reginput; + m->norm.list.line[0].start = rex.input; m->norm.in_use = 1; addstate(thislist, start->out, m, NULL, 0); } @@ -5620,13 +5597,13 @@ nfa_regmatch( #ifdef FEAT_MBYTE if (has_mbyte) { - curc = (*mb_ptr2char)(reginput); - clen = (*mb_ptr2len)(reginput); + curc = (*mb_ptr2char)(rex.input); + clen = (*mb_ptr2len)(rex.input); } else #endif { - curc = *reginput; + curc = *rex.input; clen = 1; } if (curc == NUL) @@ -5640,9 +5617,9 @@ nfa_regmatch( nextlist = &list[flag ^= 1]; nextlist->n = 0; /* clear nextlist */ nextlist->has_pim = FALSE; - ++nfa_listid; + ++rex.nfa_listid; if (prog->re_engine == AUTOMATIC_ENGINE - && (nfa_listid >= NFA_MAX_STATES + && (rex.nfa_listid >= NFA_MAX_STATES # ifdef FEAT_EVAL || nfa_fail_for_testing # endif @@ -5653,12 +5630,12 @@ nfa_regmatch( goto theend; } - thislist->id = nfa_listid; - nextlist->id = nfa_listid + 1; + thislist->id = rex.nfa_listid; + nextlist->id = rex.nfa_listid + 1; #ifdef ENABLE_LOG fprintf(log_fd, "------------------------------------------\n"); - fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); + fprintf(log_fd, ">>> Reginput is \"%s\"\n", rex.input); fprintf(log_fd, ">>> Advanced one character... Current char is %c (code %d) \n", curc, (int)curc); fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); { @@ -5710,7 +5687,7 @@ nfa_regmatch( else if (REG_MULTI) col = t->subs.norm.list.multi[0].start_col; else - col = (int)(t->subs.norm.list.line[0].start - regline); + col = (int)(t->subs.norm.list.line[0].start - rex.line); nfa_set_code(t->state->c); fprintf(log_fd, "(%d) char %d %s (start col %d)%s... \n", abs(t->state->id), (int)t->state->c, code, col, @@ -5738,7 +5715,7 @@ nfa_regmatch( nfa_match = TRUE; copy_sub(&submatch->norm, &t->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub(&submatch->synt, &t->subs.synt); #endif #ifdef ENABLE_LOG @@ -5746,7 +5723,7 @@ nfa_regmatch( #endif /* Found the left-most longest match, do not look at any other * states at this position. When the list of states is going - * to be empty quit without advancing, so that "reginput" is + * to be empty quit without advancing, so that "rex.input" is * correct. */ if (nextlist->n == 0) clen = 0; @@ -5772,23 +5749,23 @@ nfa_regmatch( { if (REG_MULTI) fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n", - (int)reglnum, + (int)rex.lnum, (int)nfa_endp->se_u.pos.lnum, - (int)(reginput - regline), + (int)(rex.input - rex.line), nfa_endp->se_u.pos.col); else fprintf(log_fd, "Current col: %d, endp col: %d\n", - (int)(reginput - regline), - (int)(nfa_endp->se_u.ptr - reginput)); + (int)(rex.input - rex.line), + (int)(nfa_endp->se_u.ptr - rex.input)); } #endif /* If "nfa_endp" is set it's only a match if it ends at * "nfa_endp" */ if (nfa_endp != NULL && (REG_MULTI - ? (reglnum != nfa_endp->se_u.pos.lnum - || (int)(reginput - regline) + ? (rex.lnum != nfa_endp->se_u.pos.lnum + || (int)(rex.input - rex.line) != nfa_endp->se_u.pos.col) - : reginput != nfa_endp->se_u.ptr)) + : rex.input != nfa_endp->se_u.ptr)) break; /* do not set submatches for \@! */ @@ -5796,7 +5773,7 @@ nfa_regmatch( { copy_sub(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub(&m->synt, &t->subs.synt); #endif } @@ -5838,7 +5815,7 @@ nfa_regmatch( * of what happens on success below. */ copy_sub_off(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&m->synt, &t->subs.synt); #endif @@ -5866,7 +5843,7 @@ nfa_regmatch( /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&t->subs.synt, &m->synt); #endif /* If the pattern has \ze and it matched in the @@ -5899,11 +5876,11 @@ nfa_regmatch( #endif if (REG_MULTI) { - pim.end.pos.col = (int)(reginput - regline); - pim.end.pos.lnum = reglnum; + pim.end.pos.col = (int)(rex.input - rex.line); + pim.end.pos.lnum = rex.lnum; } else - pim.end.ptr = reginput; + pim.end.ptr = rex.input; /* t->state->out1 is the corresponding END_INVISIBLE * node; Add its out to the current list (zero-width @@ -5959,7 +5936,7 @@ nfa_regmatch( * happens afterwards. */ copy_sub_off(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&m->synt, &t->subs.synt); #endif @@ -5982,7 +5959,7 @@ nfa_regmatch( /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &m->norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&t->subs.synt, &m->synt); #endif /* Now we need to skip over the matched text and then @@ -5990,9 +5967,9 @@ nfa_regmatch( if (REG_MULTI) /* TODO: multi-line match */ bytelen = m->norm.list.multi[0].end_col - - (int)(reginput - regline); + - (int)(rex.input - rex.line); else - bytelen = (int)(m->norm.list.line[0].end - reginput); + bytelen = (int)(m->norm.list.line[0].end - rex.input); #ifdef ENABLE_LOG fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen); @@ -6025,7 +6002,7 @@ nfa_regmatch( } case NFA_BOL: - if (reginput == regline) + if (rex.input == rex.line) { add_here = TRUE; add_state = t->state->out; @@ -6051,7 +6028,7 @@ nfa_regmatch( int this_class; /* Get class of current and previous char (if it exists). */ - this_class = mb_get_class_buf(reginput, rex.reg_buf); + this_class = mb_get_class_buf(rex.input, rex.reg_buf); if (this_class <= 1) result = FALSE; else if (reg_prev_class() == this_class) @@ -6059,8 +6036,8 @@ nfa_regmatch( } #endif else if (!vim_iswordc_buf(curc, rex.reg_buf) - || (reginput > regline - && vim_iswordc_buf(reginput[-1], rex.reg_buf))) + || (rex.input > rex.line + && vim_iswordc_buf(rex.input[-1], rex.reg_buf))) result = FALSE; if (result) { @@ -6071,7 +6048,7 @@ nfa_regmatch( case NFA_EOW: result = TRUE; - if (reginput == regline) + if (rex.input == rex.line) result = FALSE; #ifdef FEAT_MBYTE else if (has_mbyte) @@ -6079,15 +6056,15 @@ nfa_regmatch( int this_class, prev_class; /* Get class of current and previous char (if it exists). */ - this_class = mb_get_class_buf(reginput, rex.reg_buf); + this_class = mb_get_class_buf(rex.input, rex.reg_buf); prev_class = reg_prev_class(); if (this_class == prev_class || prev_class == 0 || prev_class == 1) result = FALSE; } #endif - else if (!vim_iswordc_buf(reginput[-1], rex.reg_buf) - || (reginput[0] != NUL + else if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf) + || (rex.input[0] != NUL && vim_iswordc_buf(curc, rex.reg_buf))) result = FALSE; if (result) @@ -6098,7 +6075,7 @@ nfa_regmatch( break; case NFA_BOF: - if (reglnum == 0 && reginput == regline + if (rex.lnum == 0 && rex.input == rex.line && (!REG_MULTI || rex.reg_firstlnum == 1)) { add_here = TRUE; @@ -6107,7 +6084,7 @@ nfa_regmatch( break; case NFA_EOF: - if (reglnum == rex.reg_maxline && curc == NUL) + if (rex.lnum == rex.reg_maxline && curc == NUL) { add_here = TRUE; add_state = t->state->out; @@ -6159,7 +6136,7 @@ nfa_regmatch( * Get them into cchars[] first. */ while (len < clen) { - mc = mb_ptr2char(reginput + len); + mc = mb_ptr2char(rex.input + len); cchars[ccount++] = mc; len += mb_char2len(mc); if (ccount == MAX_MCO) @@ -6194,7 +6171,7 @@ nfa_regmatch( case NFA_NEWL: if (curc == NUL && !rex.reg_line_lbr && REG_MULTI - && reglnum <= rex.reg_maxline) + && rex.lnum <= rex.reg_maxline) { go_to_nextline = TRUE; /* Pass -1 for the offset, which means taking the position @@ -6323,13 +6300,13 @@ nfa_regmatch( break; case NFA_KWORD: /* \k */ - result = vim_iswordp_buf(reginput, rex.reg_buf); + result = vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; case NFA_SKWORD: /* \K */ result = !VIM_ISDIGIT(curc) - && vim_iswordp_buf(reginput, rex.reg_buf); + && vim_iswordp_buf(rex.input, rex.reg_buf); ADD_STATE_IF_MATCH(t->state); break; @@ -6344,12 +6321,12 @@ nfa_regmatch( break; case NFA_PRINT: /* \p */ - result = vim_isprintc(PTR2CHAR(reginput)); + result = vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; case NFA_SPRINT: /* \P */ - result = !VIM_ISDIGIT(curc) && vim_isprintc(PTR2CHAR(reginput)); + result = !VIM_ISDIGIT(curc) && vim_isprintc(PTR2CHAR(rex.input)); ADD_STATE_IF_MATCH(t->state); break; @@ -6552,7 +6529,7 @@ nfa_regmatch( case NFA_LNUM_LT: result = (REG_MULTI && nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, - (long_u)(reglnum + rex.reg_firstlnum))); + (long_u)(rex.lnum + rex.reg_firstlnum))); if (result) { add_here = TRUE; @@ -6564,7 +6541,7 @@ nfa_regmatch( case NFA_COL_GT: case NFA_COL_LT: result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, - (long_u)(reginput - regline) + 1); + (long_u)(rex.input - rex.line) + 1); if (result) { add_here = TRUE; @@ -6577,7 +6554,7 @@ nfa_regmatch( case NFA_VCOL_LT: { int op = t->state->c - NFA_VCOL; - colnr_T col = (colnr_T)(reginput - regline); + colnr_T col = (colnr_T)(rex.input - rex.line); win_T *wp = rex.reg_win == NULL ? curwin : rex.reg_win; /* Bail out quickly when there can't be a match, avoid the @@ -6601,7 +6578,7 @@ nfa_regmatch( } if (!result) result = nfa_re_num_cmp(t->state->val, op, - (long_u)win_linetabsize(wp, regline, col) + 1); + (long_u)win_linetabsize(wp, rex.line, col) + 1); if (result) { add_here = TRUE; @@ -6619,13 +6596,13 @@ nfa_regmatch( /* Compare the mark position to the match position. */ result = (pos != NULL /* mark doesn't exist */ && pos->lnum > 0 /* mark isn't set in reg_buf */ - && (pos->lnum == reglnum + rex.reg_firstlnum - ? (pos->col == (colnr_T)(reginput - regline) + && (pos->lnum == rex.lnum + rex.reg_firstlnum + ? (pos->col == (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK - : (pos->col < (colnr_T)(reginput - regline) + : (pos->col < (colnr_T)(rex.input - rex.line) ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT)) - : (pos->lnum < reglnum + rex.reg_firstlnum + : (pos->lnum < rex.lnum + rex.reg_firstlnum ? t->state->c == NFA_MARK_GT : t->state->c == NFA_MARK_LT))); if (result) @@ -6638,9 +6615,9 @@ nfa_regmatch( case NFA_CURSOR: result = (rex.reg_win != NULL - && (reglnum + rex.reg_firstlnum + && (rex.lnum + rex.reg_firstlnum == rex.reg_win->w_cursor.lnum) - && ((colnr_T)(reginput - regline) + && ((colnr_T)(rex.input - rex.line) == rex.reg_win->w_cursor.col)); if (result) { @@ -6701,7 +6678,7 @@ nfa_regmatch( /* If rex.reg_icombine is not set only skip over the character * itself. When it is set skip over composing characters. */ if (result && enc_utf8 && !rex.reg_icombine) - clen = utf_ptr2len(reginput); + clen = utf_ptr2len(rex.input); #endif ADD_STATE_IF_MATCH(t->state); break; @@ -6746,7 +6723,7 @@ nfa_regmatch( /* Copy submatch info from the recursive call */ copy_sub_off(&pim->subs.norm, &m->norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&pim->subs.synt, &m->synt); #endif } @@ -6773,7 +6750,7 @@ nfa_regmatch( /* Copy submatch info from the recursive call */ copy_sub_off(&t->subs.norm, &pim->subs.norm); #ifdef FEAT_SYN_HL - if (nfa_has_zsubexpr) + if (rex.nfa_has_zsubexpr) copy_sub_off(&t->subs.synt, &pim->subs.synt); #endif } @@ -6817,17 +6794,17 @@ nfa_regmatch( * Also don't start a match past the first line. */ if (nfa_match == FALSE && ((toplevel - && reglnum == 0 + && rex.lnum == 0 && clen != 0 && (rex.reg_maxcol == 0 - || (colnr_T)(reginput - regline) < rex.reg_maxcol)) + || (colnr_T)(rex.input - rex.line) < rex.reg_maxcol)) || (nfa_endp != NULL && (REG_MULTI - ? (reglnum < nfa_endp->se_u.pos.lnum - || (reglnum == nfa_endp->se_u.pos.lnum - && (int)(reginput - regline) + ? (rex.lnum < nfa_endp->se_u.pos.lnum + || (rex.lnum == nfa_endp->se_u.pos.lnum + && (int)(rex.input - rex.line) < nfa_endp->se_u.pos.col)) - : reginput < nfa_endp->se_u.ptr)))) + : rex.input < nfa_endp->se_u.ptr)))) { #ifdef ENABLE_LOG fprintf(log_fd, "(---) STARTSTATE\n"); @@ -6843,7 +6820,7 @@ nfa_regmatch( { if (nextlist->n == 0) { - colnr_T col = (colnr_T)(reginput - regline) + clen; + colnr_T col = (colnr_T)(rex.input - rex.line) + clen; /* Nextlist is empty, we can skip ahead to the * character that must appear at the start. */ @@ -6851,15 +6828,15 @@ nfa_regmatch( break; #ifdef ENABLE_LOG fprintf(log_fd, " Skipping ahead %d bytes to regstart\n", - col - ((colnr_T)(reginput - regline) + clen)); -#endif - reginput = regline + col - clen; + col - ((colnr_T)(rex.input - rex.line) + clen)); +#endif + rex.input = rex.line + col - clen; } else { /* Checking if the required start character matches is * cheaper than adding a state that won't match. */ - c = PTR2CHAR(reginput + clen); + c = PTR2CHAR(rex.input + clen); if (c != prog->regstart && (!rex.reg_ic || MB_TOLOWER(c) != MB_TOLOWER(prog->regstart))) { @@ -6875,9 +6852,9 @@ nfa_regmatch( { if (REG_MULTI) m->norm.list.multi[0].start_col = - (colnr_T)(reginput - regline) + clen; + (colnr_T)(rex.input - rex.line) + clen; else - m->norm.list.line[0].start = reginput + clen; + m->norm.list.line[0].start = rex.input + clen; addstate(nextlist, start->out, m, NULL, clen); } } @@ -6900,9 +6877,9 @@ nextchar: /* Advance to the next character, or advance to the next line, or * finish. */ if (clen != 0) - reginput += clen; + rex.input += clen; else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI - && reglnum < nfa_endp->se_u.pos.lnum)) + && rex.lnum < nfa_endp->se_u.pos.lnum)) reg_nextline(); else break; @@ -6942,7 +6919,7 @@ theend: } /* - * Try match of "prog" with at regline["col"]. + * Try match of "prog" with at rex.line["col"]. * Returns <= 0 for failure, number of lines contained in the match otherwise. */ static long @@ -6960,7 +6937,7 @@ nfa_regtry( FILE *f; #endif - reginput = regline + col; + rex.input = rex.line + col; #ifdef FEAT_RELTIME nfa_time_limit = tm; nfa_timed_out = timed_out; @@ -6975,7 +6952,7 @@ nfa_regtry( #ifdef DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif - fprintf(f, "\tInput text is \"%s\" \n", reginput); + fprintf(f, "\tInput text is \"%s\" \n", rex.input); fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); @@ -7018,12 +6995,12 @@ nfa_regtry( if (rex.reg_endpos[0].lnum < 0) { /* pattern has a \ze but it didn't match, use current end */ - rex.reg_endpos[0].lnum = reglnum; - rex.reg_endpos[0].col = (int)(reginput - regline); + rex.reg_endpos[0].lnum = rex.lnum; + rex.reg_endpos[0].col = (int)(rex.input - rex.line); } else /* Use line number of "\ze". */ - reglnum = rex.reg_endpos[0].lnum; + rex.lnum = rex.reg_endpos[0].lnum; } else { @@ -7034,9 +7011,9 @@ nfa_regtry( } if (rex.reg_startp[0] == NULL) - rex.reg_startp[0] = regline + col; + rex.reg_startp[0] = rex.line + col; if (rex.reg_endp[0] == NULL) - rex.reg_endp[0] = reginput; + rex.reg_endp[0] = rex.input; } #ifdef FEAT_SYN_HL @@ -7077,7 +7054,7 @@ nfa_regtry( } #endif - return 1 + reglnum; + return 1 + rex.lnum; } /* @@ -7131,29 +7108,34 @@ nfa_regexec_both( rex.reg_icombine = TRUE; #endif - regline = line; - reglnum = 0; /* relative to line */ - - nfa_has_zend = prog->has_zend; - nfa_has_backref = prog->has_backref; - nfa_nsubexpr = prog->nsubexp; - nfa_listid = 1; - nfa_alt_listid = 2; + rex.line = line; + rex.lnum = 0; /* relative to line */ + + rex.nfa_has_zend = prog->has_zend; + rex.nfa_has_backref = prog->has_backref; + rex.nfa_nsubexpr = prog->nsubexp; + rex.nfa_listid = 1; + rex.nfa_alt_listid = 2; +#ifdef DEBUG nfa_regengine.expr = prog->pattern; +#endif if (prog->reganch && col > 0) return 0L; - need_clear_subexpr = TRUE; + rex.need_clear_subexpr = TRUE; #ifdef FEAT_SYN_HL /* Clear the external match subpointers if necessary. */ if (prog->reghasz == REX_SET) { - nfa_has_zsubexpr = TRUE; - need_clear_zsubexpr = TRUE; + rex.nfa_has_zsubexpr = TRUE; + rex.need_clear_zsubexpr = TRUE; } else - nfa_has_zsubexpr = FALSE; + { + rex.nfa_has_zsubexpr = FALSE; + rex.need_clear_zsubexpr = FALSE; + } #endif if (prog->regstart != NUL) @@ -7177,8 +7159,10 @@ nfa_regexec_both( if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol) goto theend; - nstate = prog->nstate; - for (i = 0; i < nstate; ++i) + // Set the "nstate" used by nfa_regcomp() to zero to trigger an error when + // it's accidentally used during execution. + nstate = 0; + for (i = 0; i < prog->nstate; ++i) { prog->state[i].id = i; prog->state[i].lastlist[0] = 0; @@ -7187,7 +7171,9 @@ nfa_regexec_both( retval = nfa_regtry(prog, col, tm, timed_out); +#ifdef DEBUG nfa_regengine.expr = NULL; +#endif theend: return retval; @@ -7207,7 +7193,9 @@ nfa_regcomp(char_u *expr, int re_flags) if (expr == NULL) return NULL; +#ifdef DEBUG nfa_regengine.expr = expr; +#endif nfa_re_flags = re_flags; init_class_tab(); @@ -7255,6 +7243,7 @@ nfa_regcomp(char_u *expr, int re_flags) if (prog == NULL) goto fail; state_ptr = prog->state; + prog->re_in_use = FALSE; /* * PASS 2 @@ -7267,8 +7256,8 @@ nfa_regcomp(char_u *expr, int re_flags) prog->regflags = regflags; prog->engine = &nfa_regengine; prog->nstate = nstate; - prog->has_zend = nfa_has_zend; - prog->has_backref = nfa_has_backref; + prog->has_zend = rex.nfa_has_zend; + prog->has_backref = rex.nfa_has_backref; prog->nsubexp = regnpar; nfa_postprocess(prog); @@ -7286,7 +7275,9 @@ nfa_regcomp(char_u *expr, int re_flags) prog->reghasz = re_has_z; #endif prog->pattern = vim_strsave(expr); +#ifdef DEBUG nfa_regengine.expr = NULL; +#endif out: VIM_CLEAR(post_start); @@ -7299,7 +7290,9 @@ fail: #ifdef ENABLE_LOG nfa_postfix_dump(expr, FAIL); #endif +#ifdef DEBUG nfa_regengine.expr = NULL; +#endif goto out; } diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -790,6 +790,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 192, +/**/ 191, /**/ 190,