changeset 4673:05d57d7c2d55 v7.3.1084

updated for version 7.3.1084 Problem: New regexp engine: only accepts up to \{,10}. Solution: Remove upper limit. Remove dead code with NFA_PLUS.
author Bram Moolenaar <bram@vim.org>
date Fri, 31 May 2013 23:18:00 +0200
parents eada32f930a3
children 7099f98528b4
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 4 files changed, 35 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -29,11 +29,6 @@
 # define NFA_REGEXP_DEBUG_LOG	"nfa_regexp_debug.log"
 #endif
 
-/* Upper limit allowed for {m,n} repetitions handled by NFA */
-#define	    NFA_BRACES_MAXLIMIT		    10
-/* For allocating space for the postfix representation */
-#define	    NFA_POSTFIX_MULTIPLIER	    (NFA_BRACES_MAXLIMIT + 2)*2
-
 enum
 {
     NFA_SPLIT = -1024,
@@ -44,7 +39,6 @@ enum
     NFA_CONCAT,
     NFA_OR,
     NFA_STAR,
-    NFA_PLUS,
     NFA_QUEST,
     NFA_QUEST_NONGREEDY,	    /* Non-greedy version of \? */
     NFA_NOT,			    /* used for [^ab] negated char ranges */
@@ -253,7 +247,7 @@ nfa_regcomp_start(expr, re_flags)
     nstate = 0;
     istate = 0;
     /* A reasonable estimation for maximum size */
-    nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
+    nstate_max = (int)(STRLEN(expr) + 1) * 25;
 
     /* Some items blow up in size, such as [A-z].  Add more space for that.
      * When it is still not enough realloc_post_list() will be used. */
@@ -1365,10 +1359,9 @@ nfa_regpiece()
 	     * string.
 	     * The submatch will the empty string.
 	     *
-	     * In order to be consistent with the old engine, we disable
-	     * NFA_PLUS, and replace <atom>+ with <atom><atom>*
+	     * In order to be consistent with the old engine, we replace
+	     * <atom>+ with <atom><atom>*
 	     */
-	    /*	EMIT(NFA_PLUS);	 */
 	    regnpar = old_regnpar;
 	    regparse = old_regparse;
 	    curchr = -1;
@@ -1443,12 +1436,9 @@ nfa_regpiece()
 		break;
 	    }
 
-	    if (maxval > NFA_BRACES_MAXLIMIT)
-	    {
-		/* This would yield a huge automaton and use too much memory.
-		 * Revert to old engine */
+	    /* TODO: \{-} doesn't work yet */
+	    if (maxval == MAX_LIMIT && !greedy)
 		return FAIL;
-	    }
 
 	    /* Special case: x{0} or x{-0} */
 	    if (maxval == 0)
@@ -1478,9 +1468,16 @@ nfa_regpiece()
 		    return FAIL;
 		/* after "minval" times, atoms are optional */
 		if (i + 1 > minval)
-		    EMIT(quest);
+		{
+		    if (maxval == MAX_LIMIT)
+			EMIT(NFA_STAR);
+		    else
+			EMIT(quest);
+		}
 		if (old_post_pos != my_post_start)
 		    EMIT(NFA_CONCAT);
+		if (i + 1 > minval && maxval == MAX_LIMIT)
+		    break;
 	    }
 
 	    /* Go to just after the repeated atom and the \{} */
@@ -1779,7 +1776,6 @@ nfa_set_code(c)
 	case NFA_EOF:		STRCPY(code, "NFA_EOF "); break;
 	case NFA_BOF:		STRCPY(code, "NFA_BOF "); break;
 	case NFA_STAR:		STRCPY(code, "NFA_STAR "); break;
-	case NFA_PLUS:		STRCPY(code, "NFA_PLUS "); break;
 	case NFA_NOT:		STRCPY(code, "NFA_NOT "); break;
 	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
 	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
@@ -2343,21 +2339,6 @@ post2nfa(postfix, end, nfa_calc_size)
 	    PUSH(frag(s, append(e.out, list1(&s->out))));
 	    break;
 
-	case NFA_PLUS:
-	    /* One or more */
-	    if (nfa_calc_size == TRUE)
-	    {
-		nstate++;
-		break;
-	    }
-	    e = POP();
-	    s = new_state(NFA_SPLIT, e.start, NULL);
-	    if (s == NULL)
-		goto theend;
-	    patch(e.out, s);
-	    PUSH(frag(e.start, list1(&s->out1)));
-	    break;
-
 	case NFA_SKIP_CHAR:
 	    /* Symbol of 0-length, Used in a repetition
 	     * with max/min count of 0 */
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -182,7 +182,9 @@ STARTTEST
 :call add(tl, [2, 'a\{0,}', 'oij sdigfusnf', ''])
 :call add(tl, [2, 'a\{0,}', 'aaaaa aa', 'aaaaa'])
 :call add(tl, [2, 'a\{2,}', 'sdfiougjdsafg'])
-:call add(tl, [0, 'a\{2,}', 'aaaaasfoij ', 'aaaaa'])
+:call add(tl, [2, 'a\{2,}', 'aaaaasfoij ', 'aaaaa'])
+:call add(tl, [2, 'a\{5,}', 'xxaaaaxxx '])
+:call add(tl, [2, 'a\{5,}', 'xxaaaaaxxx ', 'aaaaa'])
 :call add(tl, [2, 'a\{,0}', 'oidfguih iuhi hiu aaaa', ''])
 :call add(tl, [2, 'a\{,5}', 'abcd', 'a'])
 :call add(tl, [2, 'a\{,5}', 'aaaaaaaaaa', 'aaaaa'])
@@ -225,7 +227,9 @@ STARTTEST
 :"
 :" Test greedy-ness and lazy-ness
 :call add(tl, [2, 'a\{-2,7}','aaaaaaaaaaaaa', 'aa'])
+:call add(tl, [2, 'a\{-2,7}x','aaaaaaaaax', 'aaaaaaax'])
 :call add(tl, [2, 'a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa'])
+:call add(tl, [2, 'a\{2,7}x','aaaaaaaaax', 'aaaaaaax'])
 :call add(tl, [2, '\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz'])
 :call add(tl, [2, '\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa',''])
 :call add(tl, [2, '\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa'])
@@ -366,7 +370,7 @@ STARTTEST
 :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
 :"
 :"""" Requiring lots of states.
-:call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
+:call add(tl, [2, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
 :"
 :"
 :"""" Run the tests
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -389,6 +389,13 @@ OK 1 - a\{2,}
 OK 2 - a\{2,}
 OK 0 - a\{2,}
 OK 1 - a\{2,}
+OK 2 - a\{2,}
+OK 0 - a\{5,}
+OK 1 - a\{5,}
+OK 2 - a\{5,}
+OK 0 - a\{5,}
+OK 1 - a\{5,}
+OK 2 - a\{5,}
 OK 0 - a\{,0}
 OK 1 - a\{,0}
 OK 2 - a\{,0}
@@ -486,9 +493,15 @@ OK 2 - \v(a|b*)+
 OK 0 - a\{-2,7}
 OK 1 - a\{-2,7}
 OK 2 - a\{-2,7}
+OK 0 - a\{-2,7}x
+OK 1 - a\{-2,7}x
+OK 2 - a\{-2,7}x
 OK 0 - a\{2,7}
 OK 1 - a\{2,7}
 OK 2 - a\{2,7}
+OK 0 - a\{2,7}x
+OK 1 - a\{2,7}x
+OK 2 - a\{2,7}x
 OK 0 - \vx(.{-,8})yz(.*)
 OK 1 - \vx(.{-,8})yz(.*)
 OK 2 - \vx(.{-,8})yz(.*)
@@ -803,6 +816,7 @@ OK 1 - \_[^a]\+
 OK 2 - \_[^a]\+
 OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
 OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
+OK 2 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
 192.168.0.1
 192.168.0.1
 192.168.0.1
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1084,
+/**/
     1083,
 /**/
     1082,