changeset 4675:811a4c9b51d8 v7.3.1085

updated for version 7.3.1085 Problem: New regexp engine: Non-greedy multi doesn't work. Solution: Implement \{-}.
author Bram Moolenaar <bram@vim.org>
date Sat, 01 Jun 2013 12:40:20 +0200
parents 7099f98528b4
children 9d3768ec9d74
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 4 files changed, 52 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -38,9 +38,10 @@ enum
 
     NFA_CONCAT,
     NFA_OR,
-    NFA_STAR,
-    NFA_QUEST,
-    NFA_QUEST_NONGREEDY,	    /* Non-greedy version of \? */
+    NFA_STAR,			    /* greedy * */
+    NFA_STAR_NONGREEDY,		    /* non-greedy * */
+    NFA_QUEST,			    /* greedy \? */
+    NFA_QUEST_NONGREEDY,	    /* non-greedy \? */
     NFA_NOT,			    /* used for [^ab] negated char ranges */
 
     NFA_BOL,			    /* ^    Begin line */
@@ -1430,16 +1431,17 @@ nfa_regpiece()
 	    }
 	    /*  <atom>{0,inf}, <atom>{0,} and <atom>{}  are equivalent to
 	     *  <atom>*  */
-	    if (minval == 0 && maxval == MAX_LIMIT && greedy)
+	    if (minval == 0 && maxval == MAX_LIMIT)
 	    {
-		EMIT(NFA_STAR);
+		if (greedy)
+		    /* \{}, \{0,} */
+		    EMIT(NFA_STAR);
+		else
+		    /* \{-}, \{-0,} */
+		    EMIT(NFA_STAR_NONGREEDY);
 		break;
 	    }
 
-	    /* TODO: \{-} doesn't work yet */
-	    if (maxval == MAX_LIMIT && !greedy)
-		return FAIL;
-
 	    /* Special case: x{0} or x{-0} */
 	    if (maxval == 0)
 	    {
@@ -1470,7 +1472,12 @@ nfa_regpiece()
 		if (i + 1 > minval)
 		{
 		    if (maxval == MAX_LIMIT)
-			EMIT(NFA_STAR);
+		    {
+			if (greedy)
+			    EMIT(NFA_STAR);
+			else
+			    EMIT(NFA_STAR_NONGREEDY);
+		    }
 		    else
 			EMIT(quest);
 		}
@@ -1776,11 +1783,12 @@ nfa_set_code(c)
 	case NFA_EOF:		STRCPY(code, "NFA_EOF "); break;
 	case NFA_BOF:		STRCPY(code, "NFA_BOF "); break;
 	case NFA_STAR:		STRCPY(code, "NFA_STAR "); break;
+	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
+	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
+	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
 	case NFA_NOT:		STRCPY(code, "NFA_NOT "); break;
 	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
 	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
-	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
-	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
 	case NFA_END_NEG_RANGE:	STRCPY(code, "NFA_END_NEG_RANGE"); break;
 	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
 	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
@@ -2297,7 +2305,7 @@ post2nfa(postfix, end, nfa_calc_size)
 	    break;
 
 	case NFA_STAR:
-	    /* Zero or more */
+	    /* Zero or more, prefer more */
 	    if (nfa_calc_size == TRUE)
 	    {
 		nstate++;
@@ -2311,6 +2319,21 @@ post2nfa(postfix, end, nfa_calc_size)
 	    PUSH(frag(s, list1(&s->out1)));
 	    break;
 
+	case NFA_STAR_NONGREEDY:
+	    /* Zero or more, prefer zero */
+	    if (nfa_calc_size == TRUE)
+	    {
+		nstate++;
+		break;
+	    }
+	    e = POP();
+	    s = new_state(NFA_SPLIT, NULL, e.start);
+	    if (s == NULL)
+		goto theend;
+	    patch(e.out, s);
+	    PUSH(frag(s, list1(&s->out)));
+	    break;
+
 	case NFA_QUEST:
 	    /* one or zero atoms=> greedy match */
 	    if (nfa_calc_size == TRUE)
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -23,8 +23,8 @@ STARTTEST
 :call add(tl, [2, 'ab', 'aab', 'ab'])
 :call add(tl, [2, 'b', 'abcdef', 'b'])
 :call add(tl, [2, 'bc*', 'abccccdef', 'bcccc'])
-:call add(tl, [0, 'bc\{-}', 'abccccdef', 'b'])
-:call add(tl, [0, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd'])
+:call add(tl, [2, 'bc\{-}', 'abccccdef', 'b'])
+:call add(tl, [2, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd'])
 :call add(tl, [2, 'bc*', 'abbdef', 'b'])
 :call add(tl, [2, 'c*', 'ccc', 'ccc'])
 :call add(tl, [2, 'bc*', 'abdef', 'b'])
@@ -201,16 +201,16 @@ STARTTEST
 :call add(tl, [2, 'a\{-0}', 'asoiuj', ''])
 :call add(tl, [2, 'a\{-2}', 'aaaa', 'aa'])
 :call add(tl, [2, 'a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890'])
-:call add(tl, [0, 'a\{-0,}', 'oij sdigfusnf', ''])
-:call add(tl, [0, 'a\{-0,}', 'aaaaa aa', ''])
+:call add(tl, [2, 'a\{-0,}', 'oij sdigfusnf', ''])
+:call add(tl, [2, 'a\{-0,}', 'aaaaa aa', ''])
 :call add(tl, [2, 'a\{-2,}', 'sdfiougjdsafg'])
-:call add(tl, [0, 'a\{-2,}', 'aaaaasfoij ', 'aa'])
+:call add(tl, [2, 'a\{-2,}', 'aaaaasfoij ', 'aa'])
 :call add(tl, [2, 'a\{-,0}', 'oidfguih iuhi hiu aaaa', ''])
 :call add(tl, [2, 'a\{-,5}', 'abcd', ''])
 :call add(tl, [2, 'a\{-,5}', 'aaaaaaaaaa', ''])
 :" anti-greedy version of 'a*'
-:call add(tl, [0, 'a\{-}', 'bbbcddiuhfcd', ''])
-:call add(tl, [0, 'a\{-}', 'aaaaioudfh coisf jda', ''])
+:call add(tl, [2, 'a\{-}', 'bbbcddiuhfcd', ''])
+:call add(tl, [2, 'a\{-}', 'aaaaioudfh coisf jda', ''])
 :"
 :" Test groups of characters and submatches
 :call add(tl, [2, '\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc'])
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -10,8 +10,10 @@ OK 1 - bc*
 OK 2 - bc*
 OK 0 - bc\{-}
 OK 1 - bc\{-}
+OK 2 - bc\{-}
 OK 0 - bc\{-}\(d\)
 OK 1 - bc\{-}\(d\)
+OK 2 - bc\{-}\(d\)
 OK 0 - bc*
 OK 1 - bc*
 OK 2 - bc*
@@ -437,13 +439,16 @@ OK 1 - a\{-2}
 OK 2 - a\{-2}
 OK 0 - a\{-0,}
 OK 1 - a\{-0,}
+OK 2 - a\{-0,}
 OK 0 - a\{-0,}
 OK 1 - a\{-0,}
+OK 2 - a\{-0,}
 OK 0 - a\{-2,}
 OK 1 - a\{-2,}
 OK 2 - a\{-2,}
 OK 0 - a\{-2,}
 OK 1 - a\{-2,}
+OK 2 - a\{-2,}
 OK 0 - a\{-,0}
 OK 1 - a\{-,0}
 OK 2 - a\{-,0}
@@ -455,8 +460,10 @@ OK 1 - a\{-,5}
 OK 2 - a\{-,5}
 OK 0 - a\{-}
 OK 1 - a\{-}
+OK 2 - a\{-}
 OK 0 - a\{-}
 OK 1 - a\{-}
+OK 2 - a\{-}
 OK 0 - \(abc\)*
 OK 1 - \(abc\)*
 OK 2 - \(abc\)*
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1085,
+/**/
     1084,
 /**/
     1083,