changeset 5255:3c6e2b89875f v7.4b.004

updated for version 7.4b.004 Problem: Regexp crash on pattern "@\%[\w\-]*". (Axel Kielhorn) Solution: Add \%(\) around \%[] internally.
author Bram Moolenaar <bram@vim.org>
date Thu, 01 Aug 2013 18:38:26 +0200
parents d17b182a73a4
children 74301ef4297a
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 4 files changed, 30 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1166,6 +1166,15 @@ nfa_regatom()
 						      reg_magic == MAGIC_ALL);
 			EMIT(NFA_OPT_CHARS);
 			EMIT(n);
+
+			/* Emit as "\%(\%[abc]\)" to be able to handle
+			 * "\%[abc]*" which would cause the empty string to be
+			 * matched an unlimited number of times. NFA_NOPEN is
+			 * added only once at a position, while NFA_SPLIT is
+			 * added multiple times.  This is more efficient than
+			 * not allowsing NFA_SPLIT multiple times, it is used
+			 * a lot. */
+			EMIT(NFA_NOPEN);
 			break;
 		    }
 
@@ -1641,7 +1650,7 @@ nfa_regpiece()
 	     * engine interprets the plus as "try matching one more time", and
 	     * a* matches a second time at the end of the input, the empty
 	     * string.
-	     * The submatch will the empty string.
+	     * The submatch will be the empty string.
 	     *
 	     * In order to be consistent with the old engine, we replace
 	     * <atom>+ with <atom><atom>*
@@ -2242,13 +2251,13 @@ nfa_postfix_dump(expr, retval)
 	else if (retval == OK)
 	    fprintf(f, ">>> NFA engine succeeded !\n");
 	fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr);
-	for (p = post_start; *p && p < post_end; p++)
+	for (p = post_start; *p && p < post_ptr; p++)
 	{
 	    nfa_set_code(*p);
 	    fprintf(f, "%s, ", code);
 	}
 	fprintf(f, "\"\nPostfix notation (int): ");
-	for (p = post_start; *p && p < post_end; p++)
+	for (p = post_start; *p && p < post_ptr; p++)
 		fprintf(f, "%d ", *p);
 	fprintf(f, "\n\n");
 	fclose(f);
@@ -3005,7 +3014,18 @@ post2nfa(postfix, end, nfa_calc_size)
 	  {
 	    int    n;
 
-	    /* \%[abc] */
+	    /* \%[abc] implemented as:
+	     *    NFA_SPLIT
+	     *    +-CHAR(a)
+	     *    | +-NFA_SPLIT
+	     *    |   +-CHAR(b)
+	     *    |   | +-NFA_SPLIT
+	     *    |   |   +-CHAR(c)
+	     *    |   |   | +-next
+	     *    |   |   +- next
+	     *    |   +- next
+	     *    +- next
+	     */
 	    n = *++p; /* get number of characters */
 	    if (nfa_calc_size == TRUE)
 	    {
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -373,6 +373,7 @@ STARTTEST
 :call add(tl, [2, '\%[bar]x', 'xxx', 'x'])
 :call add(tl, [2, 'b\%[[ao]r]', 'bar bor', 'bar'])
 :call add(tl, [2, 'b\%[[]]r]', 'b]r bor', 'b]r'])
+:call add(tl, [2, '@\%[\w\-]*', '<http://john.net/pandoc/>[@pandoc]', '@pandoc'])
 :"
 :"""" Alternatives, must use first longest match
 :call add(tl, [2, 'goo\|go', 'google', 'goo'])
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -857,6 +857,9 @@ OK 2 - b\%[[ao]r]
 OK 0 - b\%[[]]r]
 OK 1 - b\%[[]]r]
 OK 2 - b\%[[]]r]
+OK 0 - @\%[\w\-]*
+OK 1 - @\%[\w\-]*
+OK 2 - @\%[\w\-]*
 OK 0 - goo\|go
 OK 1 - goo\|go
 OK 2 - goo\|go
--- a/src/version.c
+++ b/src/version.c
@@ -728,6 +728,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    4,
+/**/
     3,
 /**/
     2,