changeset 4809:4d7e3df04256 v7.3.1151

updated for version 7.3.1151 Problem: New regexp engine: Slow when a look-behind match is followed by a zero-width match. Solution: Postpone the look-behind match more often.
author Bram Moolenaar <bram@vim.org>
date Sat, 08 Jun 2013 23:26:27 +0200
parents 0979fb86ff47
children 47466036eff2
files src/regexp_nfa.c src/version.c
diffstat 2 files changed, 91 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -2794,6 +2794,7 @@ nfa_max_width(startstate, depth)
     /* unrecognized */
     return -1;
 }
+
 /*
  * Convert a postfix form into its equivalent NFA.
  * Return the NFA start state on success, NULL otherwise.
@@ -3433,6 +3434,7 @@ static void copy_sub __ARGS((regsub_T *t
 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
 static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
+static int match_follows __ARGS((nfa_state_T *startstate, int depth));
 static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off));
 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
@@ -3626,6 +3628,88 @@ has_state_with_pos(l, state, subs)
 }
 
 /*
+ * Return TRUE if "state" leads to a NFA_MATCH without advancing the input.
+ */
+    static int
+match_follows(startstate, depth)
+    nfa_state_T *startstate;
+    int		depth;
+{
+    nfa_state_T	    *state = startstate;
+
+    /* avoid too much recursion */
+    if (depth > 10)
+	return FALSE;
+
+    for (;;)
+    {
+	switch (state->c)
+	{
+	    case NFA_MATCH:
+		return TRUE;
+
+	    case NFA_SPLIT:
+		return match_follows(state->out, depth + 1)
+				     || match_follows(state->out1, depth + 1);
+
+	    case NFA_START_INVISIBLE:
+	    case NFA_START_INVISIBLE_BEFORE:
+	    case NFA_START_INVISIBLE_NEG:
+	    case NFA_START_INVISIBLE_BEFORE_NEG:
+	    case NFA_COMPOSING:
+		/* skip ahead to next state */
+		state = state->out1->out;
+		break;
+
+	    case NFA_ANY:
+	    case NFA_IDENT:
+	    case NFA_SIDENT:
+	    case NFA_KWORD:
+	    case NFA_SKWORD:
+	    case NFA_FNAME:
+	    case NFA_SFNAME:
+	    case NFA_PRINT:
+	    case NFA_SPRINT:
+	    case NFA_WHITE:
+	    case NFA_NWHITE:
+	    case NFA_DIGIT:
+	    case NFA_NDIGIT:
+	    case NFA_HEX:
+	    case NFA_NHEX:
+	    case NFA_OCTAL:
+	    case NFA_NOCTAL:
+	    case NFA_WORD:
+	    case NFA_NWORD:
+	    case NFA_HEAD:
+	    case NFA_NHEAD:
+	    case NFA_ALPHA:
+	    case NFA_NALPHA:
+	    case NFA_LOWER:
+	    case NFA_NLOWER:
+	    case NFA_UPPER:
+	    case NFA_NUPPER:
+	    case NFA_START_COLL:
+	    case NFA_START_NEG_COLL:
+	    case NFA_NEWL:
+		/* state will advance input */
+		return FALSE;
+
+	    default:
+		if (state->c > 0)
+		    /* state will advance input */
+		    return FALSE;
+
+		/* Others: zero-width or possibly zero-width, might still find
+		 * a match at the same position, keep looking. */
+		break;
+	}
+	state = state->out;
+    }
+    return FALSE;
+}
+
+
+/*
  * Return TRUE if "state" is already in list "l".
  */
     static int
@@ -5714,9 +5798,11 @@ nfa_regmatch(prog, start, submatch, m)
 
 	    if (add_state != NULL)
 	    {
-		if (t->pim != NULL)
+		/* Handle the postponed invisible match before advancing to
+		 * the next character and for a zero-width match if the match
+		 * might end without advancing. */
+		if (t->pim != NULL && (!add_here || match_follows(add_state, 0)))
 		{
-		    /* postponed invisible match */
 		    if (t->pim->result == NFA_PIM_TODO)
 		    {
 #ifdef ENABLE_LOG
@@ -5773,7 +5859,7 @@ nfa_regmatch(prog, start, submatch, m)
 		}
 
 		if (add_here)
-		    addstate_here(thislist, add_state, &t->subs, NULL, &listidx);
+		    addstate_here(thislist, add_state, &t->subs, t->pim, &listidx);
 		else
 		{
 		    addstate(nextlist, add_state, &t->subs, add_off);
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1151,
+/**/
     1150,
 /**/
     1149,