changeset 4651:f10f63aaec5c v7.3.1073

updated for version 7.3.1073 Problem: New regexp engine may run out of states. Solution: Allocate states dynamically. Also make the test report errors.
author Bram Moolenaar <bram@vim.org>
date Thu, 30 May 2013 18:45:23 +0200
parents e030bcb72325
children b1c4a737f6d1
files src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/testdir/test95.in src/version.c
diffstat 5 files changed, 58 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -233,7 +233,7 @@ static long nfa_regexec_multi __ARGS((re
 
 /* helper functions used when doing re2post() ... regatom() parsing */
 #define EMIT(c)	do {				\
-		    if (post_ptr >= post_end)	\
+		    if (post_ptr >= post_end && realloc_post_list() == FAIL) \
 			return FAIL;		\
 		    *post_ptr++ = c;		\
 		} while (0)
@@ -256,11 +256,11 @@ nfa_regcomp_start(expr, re_flags)
     nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
 
     /* Some items blow up in size, such as [A-z].  Add more space for that.
-     * TODO: some patterns may still fail. */
+     * When it is still not enough realloc_post_list() will be used. */
     nstate_max += 1000;
 
     /* Size for postfix representation of expr. */
-    postfix_size = sizeof(*post_start) * nstate_max;
+    postfix_size = sizeof(int) * nstate_max;
 
     post_start = (int *)lalloc(postfix_size, TRUE);
     if (post_start == NULL)
@@ -277,6 +277,31 @@ nfa_regcomp_start(expr, re_flags)
 }
 
 /*
+ * Allocate more space for post_start.  Called when
+ * running above the estimated number of states.
+ */
+    static int
+realloc_post_list()
+{
+    int   nstate_max = post_end - post_start;
+    int   new_max = nstate_max + 1000;
+    int   *new_start;
+    int	  *old_start;
+
+    new_start = (int *)lalloc(new_max * sizeof(int), TRUE);
+    if (new_start == NULL)
+	return FAIL;
+    mch_memmove(new_start, post_start, nstate_max * sizeof(int));
+    vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int));
+    old_start = post_start;
+    post_start = new_start;
+    post_ptr = new_start + (post_ptr - old_start);
+    post_end = post_start + new_max;
+    vim_free(old_start);
+    return OK;
+}
+
+/*
  * Search between "start" and "end" and try to recognize a
  * character class in expanded form. For example [0-9].
  * On success, return the id the character class to be emitted.
@@ -1306,7 +1331,8 @@ nfa_regpiece()
     int		greedy = TRUE;      /* Braces are prefixed with '-' ? */
     char_u	*old_regparse, *new_regparse;
     int		c2;
-    int		*old_post_ptr, *my_post_start;
+    int		old_post_pos;
+    int		my_post_start;
     int		old_regnpar;
     int		quest;
 
@@ -1317,7 +1343,7 @@ nfa_regpiece()
      * <atom>{m,n} is next */
     old_regnpar = regnpar;
     /* store current pos in the postfix form, for \{m,n} involving 0s */
-    my_post_start = post_ptr;
+    my_post_start = (int)(post_ptr - post_start);
 
     ret = nfa_regatom();
     if (ret == FAIL)
@@ -1430,14 +1456,14 @@ nfa_regpiece()
 	    if (maxval == 0)
 	    {
 		/* Ignore result of previous call to nfa_regatom() */
-		post_ptr = my_post_start;
+		post_ptr = post_start + my_post_start;
 		/* NFA_SKIP_CHAR has 0-length and works everywhere */
 		EMIT(NFA_SKIP_CHAR);
 		return OK;
 	    }
 
 	    /* Ignore previous call to nfa_regatom() */
-	    post_ptr = my_post_start;
+	    post_ptr = post_start + my_post_start;
 	    /* Save pos after the repeated atom and the \{} */
 	    new_regparse = regparse;
 
@@ -1449,13 +1475,13 @@ nfa_regpiece()
 		curchr = -1;
 		/* Restore count of parenthesis */
 		regnpar = old_regnpar;
-		old_post_ptr = post_ptr;
+		old_post_pos = (int)(post_ptr - post_start);
 		if (nfa_regatom() == FAIL)
 		    return FAIL;
 		/* after "minval" times, atoms are optional */
 		if (i + 1 > minval)
 		    EMIT(quest);
-		if (old_post_ptr != my_post_start)
+		if (old_post_pos != my_post_start)
 		    EMIT(NFA_CONCAT);
 	    }
 
@@ -1572,9 +1598,9 @@ nfa_regconcat()
 nfa_regbranch()
 {
     int		ch;
-    int		*old_post_ptr;
-
-    old_post_ptr = post_ptr;
+    int		old_post_pos;
+
+    old_post_pos = (int)(post_ptr - post_start);
 
     /* First branch, possibly the only one */
     if (nfa_regconcat() == FAIL)
@@ -1587,18 +1613,18 @@ nfa_regbranch()
 	skipchr();
 	EMIT(NFA_NOPEN);
 	EMIT(NFA_PREV_ATOM_NO_WIDTH);
-	old_post_ptr = post_ptr;
+	old_post_pos = (int)(post_ptr - post_start);
 	if (nfa_regconcat() == FAIL)
 	    return FAIL;
 	/* if concat is empty, skip a input char. But do emit a node */
-	if (old_post_ptr == post_ptr)
+	if (old_post_pos == (int)(post_ptr - post_start))
 	    EMIT(NFA_SKIP_CHAR);
 	EMIT(NFA_CONCAT);
 	ch = peekchr();
     }
 
     /* Even if a branch is empty, emit one node for it */
-    if (old_post_ptr == post_ptr)
+    if (old_post_pos == (int)(post_ptr - post_start))
 	EMIT(NFA_SKIP_CHAR);
 
     return OK;
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -348,6 +348,9 @@ STARTTEST
 :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
 :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
 :"
+:"""" Requiring lots of states.
+:call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
+:"
 :"
 :"""" Run the tests
 :"
@@ -361,7 +364,11 @@ STARTTEST
 :      continue
 :    endif
 :    let &regexpengine = engine
-:    let l = matchlist(text, pat)
+:    try
+:      let l = matchlist(text, pat)
+:    catch
+:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
+:    endtry
 :" check the match itself
 :    if len(l) == 0 && len(t) > matchidx
 :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -740,6 +740,8 @@ OK 2 - \_[^8-9]\+
 OK 0 - \_[^a]\+
 OK 1 - \_[^a]\+
 OK 2 - \_[^a]\+
+OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
+OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
 192.168.0.1
 192.168.0.1
 192.168.0.1
--- a/src/testdir/test95.in
+++ b/src/testdir/test95.in
@@ -85,7 +85,11 @@ STARTTEST
 :      continue
 :    endif
 :    let &regexpengine = engine
-:    let l = matchlist(text, pat)
+:    try
+:      let l = matchlist(text, pat)
+:    catch
+:      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
+:    endtry
 :" check the match itself
 :    if len(l) == 0 && len(t) > matchidx
 :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1073,
+/**/
     1072,
 /**/
     1071,