changeset 5370:90e2f0729a0d v7.4.036

updated for version 7.4.036 Problem: NFA engine does not capture group correctly when using \@>. (ZyX) Solution: Copy submatches before doing the recursive match.
author Bram Moolenaar <bram@vim.org>
date Wed, 25 Sep 2013 16:41:54 +0200
parents 5bfefa47d4a0
children 2238fb040256
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 4 files changed, 33 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -36,7 +36,7 @@ enum
 {
     NFA_SPLIT = -1024,
     NFA_MATCH,
-    NFA_SKIP_CHAR,		    /* matches a 0-length char */
+    NFA_EMPTY,			    /* matches 0-length */
 
     NFA_START_COLL,		    /* [abc] start */
     NFA_END_COLL,		    /* [abc] end */
@@ -2005,8 +2005,8 @@ nfa_regpiece()
 	    {
 		/* Ignore result of previous call to nfa_regatom() */
 		post_ptr = post_start + my_post_start;
-		/* NFA_SKIP_CHAR has 0-length and works everywhere */
-		EMIT(NFA_SKIP_CHAR);
+		/* NFA_EMPTY is 0-length and works everywhere */
+		EMIT(NFA_EMPTY);
 		return OK;
 	    }
 
@@ -2170,16 +2170,16 @@ nfa_regbranch()
 	old_post_pos = (int)(post_ptr - post_start);
 	if (nfa_regconcat() == FAIL)
 	    return FAIL;
-	/* if concat is empty, skip a input char. But do emit a node */
+	/* if concat is empty do emit a node */
 	if (old_post_pos == (int)(post_ptr - post_start))
-	    EMIT(NFA_SKIP_CHAR);
+	    EMIT(NFA_EMPTY);
 	EMIT(NFA_CONCAT);
 	ch = peekchr();
     }
 
-    /* Even if a branch is empty, emit one node for it */
+    /* if a branch is empty, emit one node for it */
     if (old_post_pos == (int)(post_ptr - post_start))
-	EMIT(NFA_SKIP_CHAR);
+	EMIT(NFA_EMPTY);
 
     return OK;
 }
@@ -2423,7 +2423,7 @@ nfa_set_code(c)
 	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
 	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
 	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
-	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
+	case NFA_EMPTY:		STRCPY(code, "NFA_EMPTY"); break;
 	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
 
 	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
@@ -3067,7 +3067,7 @@ nfa_max_width(startstate, depth)
 	    case NFA_ZSTART:
 	    case NFA_ZEND:
 	    case NFA_OPT_CHARS:
-	    case NFA_SKIP_CHAR:
+	    case NFA_EMPTY:
 	    case NFA_START_PATTERN:
 	    case NFA_END_PATTERN:
 	    case NFA_COMPOSING:
@@ -3265,15 +3265,14 @@ post2nfa(postfix, end, nfa_calc_size)
 	    PUSH(frag(e1.start, e2.out));
 	    break;
 
-	case NFA_SKIP_CHAR:
-	    /* Symbol of 0-length, Used in a repetition
-	     * with max/min count of 0 */
+	case NFA_EMPTY:
+	    /* 0-length, used in a repetition with max/min count of 0 */
 	    if (nfa_calc_size == TRUE)
 	    {
 		nstate++;
 		break;
 	    }
-	    s = alloc_state(NFA_SKIP_CHAR, NULL, NULL);
+	    s = alloc_state(NFA_EMPTY, NULL, NULL);
 	    if (s == NULL)
 		goto theend;
 	    PUSH(frag(s, list1(&s->out)));
@@ -4209,7 +4208,7 @@ addstate(l, state, subs_arg, pim, off)
 	case NFA_MOPEN:
 	case NFA_ZEND:
 	case NFA_SPLIT:
-	case NFA_SKIP_CHAR:
+	case NFA_EMPTY:
 	    /* These nodes are not added themselves but their "out" and/or
 	     * "out1" may be added below.  */
 	    break;
@@ -4337,7 +4336,7 @@ skip_add:
 	    subs = addstate(l, state->out1, subs, pim, off);
 	    break;
 
-	case NFA_SKIP_CHAR:
+	case NFA_EMPTY:
 	case NFA_NOPEN:
 	case NFA_NCLOSE:
 	    subs = addstate(l, state->out, subs, pim, off);
@@ -5604,9 +5603,13 @@ nfa_regmatch(prog, start, submatch, m)
 		    {
 			int in_use = m->norm.in_use;
 
-			/* Copy submatch info for the recursive call, so that
-			 * \1 can be matched. */
+			/* Copy submatch info for the recursive call, opposite
+			 * of what happens on success below. */
 			copy_sub_off(&m->norm, &t->subs.norm);
+#ifdef FEAT_SYN_HL
+			if (nfa_has_zsubexpr)
+			    copy_sub_off(&m->synt, &t->subs.synt);
+#endif
 
 			/*
 			 * First try matching the invisible match, then what
@@ -5713,6 +5716,13 @@ nfa_regmatch(prog, start, submatch, m)
 #endif
 		    break;
 		}
+		/* Copy submatch info to the recursive call, opposite of what
+		 * happens afterwards. */
+		copy_sub_off(&m->norm, &t->subs.norm);
+#ifdef FEAT_SYN_HL
+		if (nfa_has_zsubexpr)
+		    copy_sub_off(&m->synt, &t->subs.synt);
+#endif
 
 		/* First try matching the pattern. */
 		result = recursive_regmatch(t->state, NULL, prog,
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -430,6 +430,7 @@ STARTTEST
 :call add(tl, [2, '\(a*\)\@>a', 'aaaa'])
 :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa'])
 :call add(tl, [2, '^\(.\{-}b\)\@>.', '  abcbd', '  abc', '  ab'])
+:call add(tl, [2, '\(.\{-}\)\(\)\@>$', 'abc', 'abc', 'abc', ''])
 :" TODO: BT engine does not restore submatch after failure
 :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa'])
 :"
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -992,6 +992,9 @@ OK 2 - \(a*\)\@>b
 OK 0 - ^\(.\{-}b\)\@>.
 OK 1 - ^\(.\{-}b\)\@>.
 OK 2 - ^\(.\{-}b\)\@>.
+OK 0 - \(.\{-}\)\(\)\@>$
+OK 1 - \(.\{-}\)\(\)\@>$
+OK 2 - \(.\{-}\)\(\)\@>$
 OK 0 - \(a*\)\@>a\|a\+
 OK 2 - \(a*\)\@>a\|a\+
 OK 0 - \_[^8-9]\+
--- a/src/version.c
+++ b/src/version.c
@@ -739,6 +739,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    36,
+/**/
     35,
 /**/
     34,