# HG changeset patch # User Bram Moolenaar # Date 1380120114 -7200 # Node ID 90e2f0729a0df249931a2dbe5f4310ba6c91cab4 # Parent 5bfefa47d4a05a62fc3d74c9c44f51bb547ea8d4 updated for version 7.4.036 Problem: NFA engine does not capture group correctly when using \@>. (ZyX) Solution: Copy submatches before doing the recursive match. diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -36,7 +36,7 @@ enum { NFA_SPLIT = -1024, NFA_MATCH, - NFA_SKIP_CHAR, /* matches a 0-length char */ + NFA_EMPTY, /* matches 0-length */ NFA_START_COLL, /* [abc] start */ NFA_END_COLL, /* [abc] end */ @@ -2005,8 +2005,8 @@ nfa_regpiece() { /* Ignore result of previous call to nfa_regatom() */ post_ptr = post_start + my_post_start; - /* NFA_SKIP_CHAR has 0-length and works everywhere */ - EMIT(NFA_SKIP_CHAR); + /* NFA_EMPTY is 0-length and works everywhere */ + EMIT(NFA_EMPTY); return OK; } @@ -2170,16 +2170,16 @@ nfa_regbranch() old_post_pos = (int)(post_ptr - post_start); if (nfa_regconcat() == FAIL) return FAIL; - /* if concat is empty, skip a input char. But do emit a node */ + /* if concat is empty do emit a node */ if (old_post_pos == (int)(post_ptr - post_start)) - EMIT(NFA_SKIP_CHAR); + EMIT(NFA_EMPTY); EMIT(NFA_CONCAT); ch = peekchr(); } - /* Even if a branch is empty, emit one node for it */ + /* if a branch is empty, emit one node for it */ if (old_post_pos == (int)(post_ptr - post_start)) - EMIT(NFA_SKIP_CHAR); + EMIT(NFA_EMPTY); return OK; } @@ -2423,7 +2423,7 @@ nfa_set_code(c) case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; - case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; + case NFA_EMPTY: STRCPY(code, "NFA_EMPTY"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break; @@ -3067,7 +3067,7 @@ nfa_max_width(startstate, depth) case NFA_ZSTART: case NFA_ZEND: case NFA_OPT_CHARS: - case NFA_SKIP_CHAR: + case NFA_EMPTY: case NFA_START_PATTERN: case NFA_END_PATTERN: case NFA_COMPOSING: @@ -3265,15 +3265,14 @@ post2nfa(postfix, end, nfa_calc_size) PUSH(frag(e1.start, e2.out)); break; - case NFA_SKIP_CHAR: - /* Symbol of 0-length, Used in a repetition - * with max/min count of 0 */ + case NFA_EMPTY: + /* 0-length, used in a repetition with max/min count of 0 */ if (nfa_calc_size == TRUE) { nstate++; break; } - s = alloc_state(NFA_SKIP_CHAR, NULL, NULL); + s = alloc_state(NFA_EMPTY, NULL, NULL); if (s == NULL) goto theend; PUSH(frag(s, list1(&s->out))); @@ -4209,7 +4208,7 @@ addstate(l, state, subs_arg, pim, off) case NFA_MOPEN: case NFA_ZEND: case NFA_SPLIT: - case NFA_SKIP_CHAR: + case NFA_EMPTY: /* These nodes are not added themselves but their "out" and/or * "out1" may be added below. */ break; @@ -4337,7 +4336,7 @@ skip_add: subs = addstate(l, state->out1, subs, pim, off); break; - case NFA_SKIP_CHAR: + case NFA_EMPTY: case NFA_NOPEN: case NFA_NCLOSE: subs = addstate(l, state->out, subs, pim, off); @@ -5604,9 +5603,13 @@ nfa_regmatch(prog, start, submatch, m) { int in_use = m->norm.in_use; - /* Copy submatch info for the recursive call, so that - * \1 can be matched. */ + /* Copy submatch info for the recursive call, opposite + * of what happens on success below. */ copy_sub_off(&m->norm, &t->subs.norm); +#ifdef FEAT_SYN_HL + if (nfa_has_zsubexpr) + copy_sub_off(&m->synt, &t->subs.synt); +#endif /* * First try matching the invisible match, then what @@ -5713,6 +5716,13 @@ nfa_regmatch(prog, start, submatch, m) #endif break; } + /* Copy submatch info to the recursive call, opposite of what + * happens afterwards. */ + copy_sub_off(&m->norm, &t->subs.norm); +#ifdef FEAT_SYN_HL + if (nfa_has_zsubexpr) + copy_sub_off(&m->synt, &t->subs.synt); +#endif /* First try matching the pattern. */ result = recursive_regmatch(t->state, NULL, prog, diff --git a/src/testdir/test64.in b/src/testdir/test64.in --- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -430,6 +430,7 @@ STARTTEST :call add(tl, [2, '\(a*\)\@>a', 'aaaa']) :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa']) :call add(tl, [2, '^\(.\{-}b\)\@>.', ' abcbd', ' abc', ' ab']) +:call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', '']) :" TODO: BT engine does not restore submatch after failure :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa']) :" diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok --- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -992,6 +992,9 @@ OK 2 - \(a*\)\@>b OK 0 - ^\(.\{-}b\)\@>. OK 1 - ^\(.\{-}b\)\@>. OK 2 - ^\(.\{-}b\)\@>. +OK 0 - \(.\{-}\)\(\)\@>$ +OK 1 - \(.\{-}\)\(\)\@>$ +OK 2 - \(.\{-}\)\(\)\@>$ OK 0 - \(a*\)\@>a\|a\+ OK 2 - \(a*\)\@>a\|a\+ OK 0 - \_[^8-9]\+ diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -739,6 +739,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 36, +/**/ 35, /**/ 34,