changeset 4657:93b7ed814bec v7.3.1076

updated for version 7.3.1076 Problem: New regexp engine: \@= and \& don't work. Solution: Make these items work. Add column info to logging.
author Bram Moolenaar <bram@vim.org>
date Thu, 30 May 2013 21:42:13 +0200
parents 1a4b98208569
children 3be23951aa25
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 4 files changed, 85 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1740,8 +1740,8 @@ nfa_set_code(c)
 			    STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
 	case NFA_PREV_ATOM_NO_WIDTH_NEG:
 			    STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break;
-	case NFA_NOPEN:		    STRCPY(code, "NFA_MOPEN_INVISIBLE"); break;
-	case NFA_NCLOSE:	    STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break;
+	case NFA_NOPEN:		    STRCPY(code, "NFA_NOPEN"); break;
+	case NFA_NCLOSE:	    STRCPY(code, "NFA_NCLOSE"); break;
 	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
 	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
 
@@ -2373,12 +2373,9 @@ post2nfa(postfix, end, nfa_calc_size)
 	    break;
 
 	case NFA_PREV_ATOM_NO_WIDTH:
-	    /* The \@= operator: match the preceding atom with 0 width.
+	    /* The \@= operator: match the preceding atom with zero width.
 	     * Surrounds the preceding atom with START_INVISIBLE and
-	     * END_INVISIBLE, similarly to MOPEN.
-	     */
-	    /* TODO: Maybe this drops the speed? */
-	    goto theend;
+	     * END_INVISIBLE, similarly to MOPEN. */
 
 	    if (nfa_calc_size == TRUE)
 	    {
@@ -2745,6 +2742,9 @@ addstate(l, state, sub, off)
     int			save_in_use;
     char_u		*save_ptr;
     int			i;
+#ifdef ENABLE_LOG
+    int			did_print = FALSE;
+#endif
 
     if (l == NULL || state == NULL)
 	return;
@@ -2782,7 +2782,7 @@ addstate(l, state, sub, off)
 	    /* These nodes do not need to be added, but we need to bail out
 	     * when it was tried to be added to this list before. */
 	    if (state->lastlist == l->id)
-		return;
+		goto skip_add;
 	    state->lastlist = l->id;
 	    break;
 
@@ -2792,7 +2792,15 @@ addstate(l, state, sub, off)
 		/* This state is already in the list, don't add it again,
 		 * unless it is an MOPEN that is used for a backreference. */
 		if (!nfa_has_backref)
+		{
+skip_add:
+#ifdef ENABLE_LOG
+		    nfa_set_code(state->c);
+		    fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n",
+			    abs(state->id), l->id, state->c, code);
+#endif
 		    return;
+		}
 
 		/* See if the same state is already in the list with the same
 		 * positions. */
@@ -2801,7 +2809,7 @@ addstate(l, state, sub, off)
 		    thread = &l->t[i];
 		    if (thread->state->id == state->id
 					  && sub_equal(&thread->sub, sub))
-			return;
+			goto skip_add;
 		}
 	    }
 
@@ -2832,12 +2840,39 @@ addstate(l, state, sub, off)
 				&sub->list.line[0],
 				sizeof(struct linepos) * sub->in_use);
 	    }
+#ifdef ENABLE_LOG
+	    {
+		int col;
+
+		if (thread->sub.in_use <= 0)
+		    col = -1;
+		else if (REG_MULTI)
+		    col = thread->sub.list.multi[0].start.col;
+		else
+		    col = (int)(thread->sub.list.line[0].start - regline);
+		nfa_set_code(state->c);
+		fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n",
+		        abs(state->id), l->id, state->c, code, col);
+		did_print = TRUE;
+	    }
+#endif
     }
 
 #ifdef ENABLE_LOG
-    nfa_set_code(state->c);
-    fprintf(log_fd, "> Adding state %d to list. Character %d: %s\n",
-	abs(state->id), state->c, code);
+    if (!did_print)
+    {
+	int col;
+
+	if (sub->in_use <= 0)
+	    col = -1;
+	else if (REG_MULTI)
+	    col = sub->list.multi[0].start.col;
+	else
+	    col = (int)(sub->list.line[0].start - regline);
+	nfa_set_code(state->c);
+	fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n",
+		abs(state->id), l->id, state->c, code, col);
+    }
 #endif
     switch (state->c)
     {
@@ -2873,14 +2908,6 @@ addstate(l, state, sub, off)
 	    addstate(l, state->out, sub, off);
 	    break;
 
-	/* If this state is reached, then a recursive call of nfa_regmatch()
-	 * succeeded. the next call saves the found submatches in the
-	 * first state after the "invisible" branch. */
-#if 0
-	case NFA_END_INVISIBLE:
-	    break;
-#endif
-
 	case NFA_MOPEN + 0:
 	case NFA_MOPEN + 1:
 	case NFA_MOPEN + 2:
@@ -3450,9 +3477,19 @@ nfa_regmatch(start, submatch, m)
 	    fprintf(debug, "%s, ", code);
 #endif
 #ifdef ENABLE_LOG
-	    nfa_set_code(t->state->c);
-	    fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id),
-						      code, (int)t->state->c);
+	    {
+		int col;
+
+		if (t->sub.in_use <= 0)
+		    col = -1;
+		else if (REG_MULTI)
+		    col = t->sub.list.multi[0].start.col;
+		else
+		    col = (int)(t->sub.list.line[0].start - regline);
+		nfa_set_code(t->state->c);
+		fprintf(log_fd, "(%d) char %d %s (start col %d) ... \n",
+			abs(t->state->id), (int)t->state->c, code, col);
+	    }
 #endif
 
 	    /*
@@ -3504,6 +3541,7 @@ nfa_regmatch(start, submatch, m)
 		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		else
 		{
+		    /* TODO: only copy positions in use. */
 		    *m = t->sub;
 		    nfa_match = TRUE;
 		}
@@ -3538,6 +3576,7 @@ nfa_regmatch(start, submatch, m)
 		result = nfa_regmatch(t->state->out, submatch, m);
 		nfa_set_neg_listids(start);
 		nfa_restore_listids(start, listids);
+		nfa_match = FALSE;
 
 #ifdef ENABLE_LOG
 		log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
@@ -3575,9 +3614,11 @@ nfa_regmatch(start, submatch, m)
 			    t->sub.list.line[j].start = m->list.line[j].start;
 			    t->sub.list.line[j].end = m->list.line[j].end;
 			}
-		    t->sub.in_use = m->in_use;
-
-		    /* t->state->out1 is the corresponding END_INVISIBLE node */
+		    if (m->in_use > t->sub.in_use)
+			t->sub.in_use = m->in_use;
+
+		    /* t->state->out1 is the corresponding END_INVISIBLE node;
+		     * Add it to the current list (zero-width match). */
 		    addstate_here(thislist, t->state->out1->out, &t->sub,
 								    &listidx);
 		}
@@ -4146,7 +4187,7 @@ nfa_regtry(start, col)
 	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
 #endif
 	fprintf(f, "\tInput text is \"%s\" \n", reginput);
-	fprintf(f, "		=======================================================\n\n\n\n\n\n\n");
+	fprintf(f, "		=======================================================\n\n");
 	nfa_print_state(f, start);
 	fprintf(f, "\n\n");
 	fclose(f);
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -294,22 +294,22 @@ STARTTEST
 :call add(tl, [2, '\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last'])
 :call add(tl, [2, '\>\zs.', 'aword. ', '.'])
 :"
-:"""" Tests for \@ features
-:call add(tl, [0, 'abc\@=', 'abc', 'ab'])
-:call add(tl, [0, 'abc\@=cd', 'abcd', 'abcd'])
-:call add(tl, [0, 'abc\@=', 'ababc', 'ab'])
+:"""" Tests for \@= and \& features
+:call add(tl, [2, 'abc\@=', 'abc', 'ab'])
+:call add(tl, [2, 'abc\@=cd', 'abcd', 'abcd'])
+:call add(tl, [2, 'abc\@=', 'ababc', 'ab'])
 :" will never match, no matter the input text
 :call add(tl, [2, 'abcd\@=e', 'abcd'])
 :" will never match
 :call add(tl, [2, 'abcd\@=e', 'any text in here ... '])
-:call add(tl, [0, '\v(abc)@=..', 'xabcd', 'ab', 'abc'])
+:call add(tl, [2, '\v(abc)@=..', 'xabcd', 'ab', 'abc'])
 :" no match
 :call add(tl, [2, '\(.*John\)\@=.*Bob', 'here is John, and here is B'])
-:call add(tl, [0, '\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend'])
+:call add(tl, [2, '\(John.*\)\@=.*Bob', 'John is Bobs friend', 'John is Bob', 'John is Bobs friend'])
 :" no match
 :call add(tl, [2, '.*John\&.*Bob', 'here is John, and here is B'])
-:call add(tl, [0, '.*John\&.*Bob', 'John is Bobs friend', 'John is Bob'])
-:call add(tl, [0, '\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1'])
+:call add(tl, [2, '.*John\&.*Bob', 'John is Bobs friend', 'John is Bob'])
+:call add(tl, [2, '\v(test1)@=.*yep', 'this is a test1, yep it is', 'test1, yep', 'test1'])
 :"
 :"""" Combining different tests and features
 :call add(tl, [2, '[[:alpha:]]\{-2,6}', '787abcdiuhsasiuhb4', 'ab'])
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -647,10 +647,13 @@ OK 1 - \>\zs.
 OK 2 - \>\zs.
 OK 0 - abc\@=
 OK 1 - abc\@=
+OK 2 - abc\@=
 OK 0 - abc\@=cd
 OK 1 - abc\@=cd
+OK 2 - abc\@=cd
 OK 0 - abc\@=
 OK 1 - abc\@=
+OK 2 - abc\@=
 OK 0 - abcd\@=e
 OK 1 - abcd\@=e
 OK 2 - abcd\@=e
@@ -659,18 +662,22 @@ OK 1 - abcd\@=e
 OK 2 - abcd\@=e
 OK 0 - \v(abc)@=..
 OK 1 - \v(abc)@=..
+OK 2 - \v(abc)@=..
 OK 0 - \(.*John\)\@=.*Bob
 OK 1 - \(.*John\)\@=.*Bob
 OK 2 - \(.*John\)\@=.*Bob
 OK 0 - \(John.*\)\@=.*Bob
 OK 1 - \(John.*\)\@=.*Bob
+OK 2 - \(John.*\)\@=.*Bob
 OK 0 - .*John\&.*Bob
 OK 1 - .*John\&.*Bob
 OK 2 - .*John\&.*Bob
 OK 0 - .*John\&.*Bob
 OK 1 - .*John\&.*Bob
+OK 2 - .*John\&.*Bob
 OK 0 - \v(test1)@=.*yep
 OK 1 - \v(test1)@=.*yep
+OK 2 - \v(test1)@=.*yep
 OK 0 - [[:alpha:]]\{-2,6}
 OK 1 - [[:alpha:]]\{-2,6}
 OK 2 - [[:alpha:]]\{-2,6}
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1076,
+/**/
     1075,
 /**/
     1074,