changeset 4801:3cd3cc1e9119 v7.3.1147

updated for version 7.3.1147 Problem: New regexp engine: regstart is only used to find the first match. Solution: Use regstart whenever adding the start state.
author Bram Moolenaar <bram@vim.org>
date Sat, 08 Jun 2013 14:38:27 +0200
parents 339cf2070eb7
children 8adbe7dae7fd
files src/regexp_nfa.c src/version.c
diffstat 2 files changed, 74 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -4153,6 +4153,7 @@ recursive_regmatch(state, prog, submatch
 }
 
 static int failure_chance __ARGS((nfa_state_T *state, int depth));
+static int skip_to_start __ARGS((int c, colnr_T *colp));
 
 /*
  * Estimate the chance of a match with "state" failing.
@@ -4305,6 +4306,31 @@ failure_chance(state, depth)
 }
 
 /*
+ * Skip until the char "c" we know a match must start with.
+ */
+    static int
+skip_to_start(c, colp)
+    int		c;
+    colnr_T	*colp;
+{
+    char_u *s;
+
+    /* Used often, do some work to avoid call overhead. */
+    if (!ireg_ic
+#ifdef FEAT_MBYTE
+		&& !has_mbyte
+#endif
+		)
+	s = vim_strbyte(regline + *colp, c);
+    else
+	s = cstrchr(regline + *colp, c);
+    if (s == NULL)
+	return FAIL;
+    *colp = (int)(s - regline);
+    return OK;
+}
+
+/*
  * Main matching routine.
  *
  * Run NFA to determine whether it matches reginput.
@@ -5449,12 +5475,50 @@ nfa_regmatch(prog, start, submatch, m)
 	     * the first MOPEN. */
 	    if (toplevel)
 	    {
-		if (REG_MULTI)
-		    m->norm.list.multi[0].start.col =
+		int add = TRUE;
+		int c;
+
+		if (prog->regstart != NUL && clen != 0)
+		{
+		    if (nextlist->n == 0)
+		    {
+			colnr_T col = (colnr_T)(reginput - regline) + clen;
+
+			/* Nextlist is empty, we can skip ahead to the
+			 * character that must appear at the start. */
+			if (skip_to_start(prog->regstart, &col) == FAIL)
+			    break;
+#ifdef ENABLE_LOG
+			fprintf(log_fd, "  Skipping ahead %d bytes to regstart\n",
+				col - ((colnr_T)(reginput - regline) + clen));
+#endif
+			reginput = regline + col - clen;
+		    }
+		    else
+		    {
+			/* Checking if the required start character matches is
+			 * cheaper than adding a state that won't match. */
+			c = PTR2CHAR(reginput + clen);
+			if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c)
+					       != MB_TOLOWER(prog->regstart)))
+			{
+#ifdef ENABLE_LOG
+			    fprintf(log_fd, "  Skipping start state, regstart does not match\n");
+#endif
+			    add = FALSE;
+			}
+		    }
+		}
+
+		if (add)
+		{
+		    if (REG_MULTI)
+			m->norm.list.multi[0].start.col =
 					 (colnr_T)(reginput - regline) + clen;
-		else
-		    m->norm.list.line[0].start = reginput + clen;
-		addstate(nextlist, start->out, m, clen);
+		    else
+			m->norm.list.line[0].start = reginput + clen;
+		    addstate(nextlist, start->out, m, clen);
+		}
 	    }
 	    else
 		addstate(nextlist, start, m, clen);
@@ -5701,23 +5765,10 @@ nfa_regexec_both(line, startcol)
 	return 0L;
 
     if (prog->regstart != NUL)
-    {
-	char_u *s;
-
-	/* Skip until the char we know it must start with.
-	 * Used often, do some work to avoid call overhead. */
-	if (!ireg_ic
-#ifdef FEAT_MBYTE
-		    && !has_mbyte
-#endif
-		    )
-	    s = vim_strbyte(regline + col, prog->regstart);
-	else
-	    s = cstrchr(regline + col, prog->regstart);
-	if (s == NULL)
+	/* Skip ahead until a character we know the match must start with.
+	 * When there is none there is no match. */
+	if (skip_to_start(prog->regstart, &col) == FAIL)
 	    return 0L;
-	col = (int)(s - regline);
-    }
 
     /* If the start column is past the maximum column: no need to try. */
     if (ireg_maxcol > 0 && col >= ireg_maxcol)
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1147,
+/**/
     1146,
 /**/
     1145,