changeset 4571:b2a8e3a66f8c v7.3.1033

updated for version 7.3.1033 Problem: "\1" .. "\9" are not supported in the new regexp engine. Solution: Implement them. Add a few more tests.
author Bram Moolenaar <bram@vim.org>
date Tue, 28 May 2013 22:03:20 +0200
parents 7f988965bc43
children ef476bdc9274
files src/regexp.h src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c
diffstat 5 files changed, 359 insertions(+), 164 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -71,7 +71,6 @@ struct nfa_state
     nfa_state_T		*out1;
     int			id;
     int			lastlist;
-    int			visits;
     int			negated;
 };
 
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -73,6 +73,17 @@ enum
     NFA_PREV_ATOM_JUST_BEFORE_NEG,  /* Used for \@<! */
     NFA_PREV_ATOM_LIKE_PATTERN,	    /* Used for \@> */
 
+    NFA_BACKREF1,		    /* \1 */
+    NFA_BACKREF2,		    /* \2 */
+    NFA_BACKREF3,		    /* \3 */
+    NFA_BACKREF4,		    /* \4 */
+    NFA_BACKREF5,		    /* \5 */
+    NFA_BACKREF6,		    /* \6 */
+    NFA_BACKREF7,		    /* \7 */
+    NFA_BACKREF8,		    /* \8 */
+    NFA_BACKREF9,		    /* \9 */
+    NFA_SKIP,			    /* Skip characters */
+
     NFA_MOPEN,
     NFA_MCLOSE = NFA_MOPEN + NSUBEXP,
 
@@ -709,7 +720,8 @@ nfa_regatom()
 	    p = vim_strchr(classchars, no_Magic(c));
 	    if (p == NULL)
 	    {
-		return FAIL;	    /* runtime error */
+		EMSGN("INTERNAL: Unknown character class char: %ld", c);
+		return FAIL;
 	    }
 #ifdef FEAT_MBYTE
 	    /* When '.' is followed by a composing char ignore the dot, so that
@@ -766,20 +778,18 @@ nfa_regatom()
 	    return FAIL;
 
 	case Magic('~'):		/* previous substitute pattern */
-	    /* Not supported yet */
+	    /* TODO: Not supported yet */
 	    return FAIL;
 
-	case Magic('1'):
-	case Magic('2'):
-	case Magic('3'):
-	case Magic('4'):
-	case Magic('5'):
-	case Magic('6'):
-	case Magic('7'):
-	case Magic('8'):
-	case Magic('9'):
-	    /* not supported yet */
-	    return FAIL;
+	case Magic('1'): EMIT(NFA_BACKREF1); break;
+	case Magic('2'): EMIT(NFA_BACKREF2); break;
+	case Magic('3'): EMIT(NFA_BACKREF3); break;
+	case Magic('4'): EMIT(NFA_BACKREF4); break;
+	case Magic('5'): EMIT(NFA_BACKREF5); break;
+	case Magic('6'): EMIT(NFA_BACKREF6); break;
+	case Magic('7'): EMIT(NFA_BACKREF7); break;
+	case Magic('8'): EMIT(NFA_BACKREF8); break;
+	case Magic('9'): EMIT(NFA_BACKREF9); break;
 
 	case Magic('z'):
 	    c = no_Magic(getchr());
@@ -802,7 +812,7 @@ nfa_regatom()
 		case '8':
 		case '9':
 		case '(':
-		    /* \z1...\z9 and \z( not yet supported */
+		    /* TODO: \z1...\z9 and \z( not yet supported */
 		    return FAIL;
 		default:
 		    syntax_error = TRUE;
@@ -854,32 +864,50 @@ nfa_regatom()
 		 * pattern -- regardless of whether or not it makes sense. */
 		case '^':
 		    EMIT(NFA_BOF);
-		    /* Not yet supported */
+		    /* TODO: Not yet supported */
 		    return FAIL;
 		    break;
 
 		case '$':
 		    EMIT(NFA_EOF);
-		    /* Not yet supported */
+		    /* TODO: Not yet supported */
 		    return FAIL;
 		    break;
 
 		case '#':
-		    /* not supported yet */
+		    /* TODO: not supported yet */
 		    return FAIL;
 		    break;
 
 		case 'V':
-		    /* not supported yet */
+		    /* TODO: not supported yet */
 		    return FAIL;
 		    break;
 
 		case '[':
-		    /* \%[abc] not supported yet */
+		    /* TODO: \%[abc] not supported yet */
+		    return FAIL;
+
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case '<':
+		case '>':
+		case '\'':
+		    /* TODO: not supported yet */
 		    return FAIL;
 
 		default:
-		    /* not supported yet */
+		    syntax_error = TRUE;
+		    EMSGN(_("E867: (NFA) Unknown operator '\\%%%c'"),
+								 no_Magic(c));
 		    return FAIL;
 	    }
 	    break;
@@ -1672,6 +1700,17 @@ nfa_set_code(c)
 	case NFA_ZSTART:    STRCPY(code, "NFA_ZSTART"); break;
 	case NFA_ZEND:	    STRCPY(code, "NFA_ZEND"); break;
 
+	case NFA_BACKREF1:  STRCPY(code, "NFA_BACKREF1"); break;
+	case NFA_BACKREF2:  STRCPY(code, "NFA_BACKREF2"); break;
+	case NFA_BACKREF3:  STRCPY(code, "NFA_BACKREF3"); break;
+	case NFA_BACKREF4:  STRCPY(code, "NFA_BACKREF4"); break;
+	case NFA_BACKREF5:  STRCPY(code, "NFA_BACKREF5"); break;
+	case NFA_BACKREF6:  STRCPY(code, "NFA_BACKREF6"); break;
+	case NFA_BACKREF7:  STRCPY(code, "NFA_BACKREF7"); break;
+	case NFA_BACKREF8:  STRCPY(code, "NFA_BACKREF8"); break;
+	case NFA_BACKREF9:  STRCPY(code, "NFA_BACKREF9"); break;
+	case NFA_SKIP:	    STRCPY(code, "NFA_SKIP"); break;
+
 	case NFA_PREV_ATOM_NO_WIDTH:
 			    STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
 	case NFA_NOPEN:		    STRCPY(code, "NFA_MOPEN_INVISIBLE"); break;
@@ -1949,7 +1988,6 @@ new_state(c, out, out1)
 
     s->id   = istate;
     s->lastlist = 0;
-    s->visits = 0;
     s->negated = FALSE;
 
     return s;
@@ -2416,6 +2454,30 @@ post2nfa(postfix, end, nfa_calc_size)
 	    PUSH(frag(s, list1(&s1->out)));
 	    break;
 
+	case NFA_BACKREF1:
+	case NFA_BACKREF2:
+	case NFA_BACKREF3:
+	case NFA_BACKREF4:
+	case NFA_BACKREF5:
+	case NFA_BACKREF6:
+	case NFA_BACKREF7:
+	case NFA_BACKREF8:
+	case NFA_BACKREF9:
+	    if (nfa_calc_size == TRUE)
+	    {
+		nstate += 2;
+		break;
+	    }
+	    s = new_state(*p, NULL, NULL);
+	    if (s == NULL)
+		goto theend;
+	    s1 = new_state(NFA_SKIP, NULL, NULL);
+	    if (s1 == NULL)
+		goto theend;
+	    patch(list1(&s->out), s1);
+	    PUSH(frag(s, list1(&s1->out)));
+	    break;
+
 	case NFA_ZSTART:
 	case NFA_ZEND:
 	default:
@@ -2495,29 +2557,54 @@ typedef struct
 typedef struct
 {
     nfa_state_T	*state;
+    int		count;
     regsub_T	sub;		/* submatch info, only party used */
 } nfa_thread_T;
 
 /* nfa_list_T contains the alternative NFA execution states. */
 typedef struct
 {
-    nfa_thread_T    *t;
-    int		    n;
+    nfa_thread_T    *t;		/* allocated array of states */
+    int		    n;		/* nr of states in "t" */
+    int		    id;		/* ID of the list */
 } nfa_list_T;
 
+#ifdef ENABLE_LOG
+    static void
+log_subexpr(sub)
+    regsub_T *sub;
+{
+    int j;
+
+    for (j = 0; j < sub->in_use; j++)
+	if (REG_MULTI)
+	    fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d",
+		    j,
+		    sub->multilist[j].start.col,
+		    (int)sub->multilist[j].start.lnum,
+		    sub->multilist[j].end.col,
+		    (int)sub->multilist[j].end.lnum);
+	else
+	    fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"",
+		    j,
+		    (char *)sub->linelist[j].start,
+		    (char *)sub->linelist[j].end);
+    fprintf(log_fd, "\n");
+}
+#endif
+
 /* Used during execution: whether a match has been found. */
 static int nfa_match;
 
-static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid));
-static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip));
+static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int off));
+static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *sub, int *ip));
 
     static void
-addstate(l, state, m, off, lid)
+addstate(l, state, sub, off)
     nfa_list_T		*l;	/* runtime state list */
     nfa_state_T		*state;	/* state to update */
-    regsub_T		*m;	/* pointers to subexpressions */
+    regsub_T		*sub;	/* pointers to subexpressions */
     int			off;	/* byte offset, when -1 go to next line */
-    int			lid;
 {
     int			subidx;
     nfa_thread_T	*lastthread;
@@ -2545,41 +2632,58 @@ addstate(l, state, m, off, lid)
 	case NFA_MCLOSE + 7:
 	case NFA_MCLOSE + 8:
 	case NFA_MCLOSE + 9:
-	    /* Do not remember these nodes in list "thislist" or "nextlist" */
+	    /* These nodes are not added themselves but their "out" and/or
+	     * "out1" may be added below.  */
+	    break;
+
+	case NFA_MOPEN:
+	case NFA_MOPEN + 1:
+	case NFA_MOPEN + 2:
+	case NFA_MOPEN + 3:
+	case NFA_MOPEN + 4:
+	case NFA_MOPEN + 5:
+	case NFA_MOPEN + 6:
+	case NFA_MOPEN + 7:
+	case NFA_MOPEN + 8:
+	case NFA_MOPEN + 9:
+	    /* These nodes do not need to be added, but we need to bail out
+	     * when it was tried to be added to this list before. */
+	    if (state->lastlist == l->id)
+		return;
+	    state->lastlist = l->id;
 	    break;
 
 	default:
-	    if (state->lastlist == lid)
-	    {
-		if (++state->visits > 2)
-		    return;
-	    }
-	    else
+	    if (state->lastlist == l->id)
 	    {
-		/* add the state to the list */
-		state->lastlist = lid;
-		lastthread = &l->t[l->n++];
-		lastthread->state = state;
-		lastthread->sub.in_use = m->in_use;
-		if (m->in_use > 0)
-		{
-		    /* Copy the match start and end positions. */
-		    if (REG_MULTI)
-			mch_memmove(&lastthread->sub.multilist[0],
-				    &m->multilist[0],
-				    sizeof(struct multipos) * m->in_use);
-		    else
-			mch_memmove(&lastthread->sub.linelist[0],
-				    &m->linelist[0],
-				    sizeof(struct linepos) * m->in_use);
-		}
+		/* This state is already in the list, don't add it again,
+		 * unless it is an MOPEN that is used for a backreference. */
+		return;
+	    }
+
+	    /* add the state to the list */
+	    state->lastlist = l->id;
+	    lastthread = &l->t[l->n++];
+	    lastthread->state = state;
+	    lastthread->sub.in_use = sub->in_use;
+	    if (sub->in_use > 0)
+	    {
+		/* Copy the match start and end positions. */
+		if (REG_MULTI)
+		    mch_memmove(&lastthread->sub.multilist[0],
+				&sub->multilist[0],
+				sizeof(struct multipos) * sub->in_use);
+		else
+		    mch_memmove(&lastthread->sub.linelist[0],
+				&sub->linelist[0],
+				sizeof(struct linepos) * sub->in_use);
 	    }
     }
 
 #ifdef ENABLE_LOG
     nfa_set_code(state->c);
-    fprintf(log_fd, "> Adding state %d to list. Character %s, code %d\n",
-	abs(state->id), code, state->c);
+    fprintf(log_fd, "> Adding state %d to list. Character %d: %s\n",
+	abs(state->id), state->c, code);
 #endif
     switch (state->c)
     {
@@ -2588,12 +2692,8 @@ addstate(l, state, m, off, lid)
 	    break;
 
 	case NFA_SPLIT:
-	    addstate(l, state->out, m, off, lid);
-	    addstate(l, state->out1, m, off, lid);
-	    break;
-
-	case NFA_SKIP_CHAR:
-	    addstate(l, state->out, m, off, lid);
+	    addstate(l, state->out, sub, off);
+	    addstate(l, state->out1, sub, off);
 	    break;
 
 #if 0
@@ -2613,9 +2713,10 @@ addstate(l, state, m, off, lid)
 	    break;
 #endif
 
+	case NFA_SKIP_CHAR:
 	case NFA_NOPEN:
 	case NFA_NCLOSE:
-	    addstate(l, state->out, m, off, lid);
+	    addstate(l, state->out, sub, off);
 	    break;
 
 	/* If this state is reached, then a recursive call of nfa_regmatch()
@@ -2646,64 +2747,64 @@ addstate(l, state, m, off, lid)
 	     * restore it when it was in use.  Otherwise fill any gap. */
 	    if (REG_MULTI)
 	    {
-		if (subidx < m->in_use)
+		if (subidx < sub->in_use)
 		{
-		    save_lpos = m->multilist[subidx].start;
+		    save_lpos = sub->multilist[subidx].start;
 		    save_in_use = -1;
 		}
 		else
 		{
-		    save_in_use = m->in_use;
-		    for (i = m->in_use; i < subidx; ++i)
+		    save_in_use = sub->in_use;
+		    for (i = sub->in_use; i < subidx; ++i)
 		    {
-			m->multilist[i].start.lnum = -1;
-			m->multilist[i].end.lnum = -1;
+			sub->multilist[i].start.lnum = -1;
+			sub->multilist[i].end.lnum = -1;
 		    }
-		    m->in_use = subidx + 1;
+		    sub->in_use = subidx + 1;
 		}
 		if (off == -1)
 		{
-		    m->multilist[subidx].start.lnum = reglnum + 1;
-		    m->multilist[subidx].start.col = 0;
+		    sub->multilist[subidx].start.lnum = reglnum + 1;
+		    sub->multilist[subidx].start.col = 0;
 		}
 		else
 		{
-		    m->multilist[subidx].start.lnum = reglnum;
-		    m->multilist[subidx].start.col =
+		    sub->multilist[subidx].start.lnum = reglnum;
+		    sub->multilist[subidx].start.col =
 					  (colnr_T)(reginput - regline + off);
 		}
 	    }
 	    else
 	    {
-		if (subidx < m->in_use)
+		if (subidx < sub->in_use)
 		{
-		    save_ptr = m->linelist[subidx].start;
+		    save_ptr = sub->linelist[subidx].start;
 		    save_in_use = -1;
 		}
 		else
 		{
-		    save_in_use = m->in_use;
-		    for (i = m->in_use; i < subidx; ++i)
+		    save_in_use = sub->in_use;
+		    for (i = sub->in_use; i < subidx; ++i)
 		    {
-			m->linelist[i].start = NULL;
-			m->linelist[i].end = NULL;
+			sub->linelist[i].start = NULL;
+			sub->linelist[i].end = NULL;
 		    }
-		    m->in_use = subidx + 1;
+		    sub->in_use = subidx + 1;
 		}
-		m->linelist[subidx].start = reginput + off;
+		sub->linelist[subidx].start = reginput + off;
 	    }
 
-	    addstate(l, state->out, m, off, lid);
+	    addstate(l, state->out, sub, off);
 
 	    if (save_in_use == -1)
 	    {
 		if (REG_MULTI)
-		    m->multilist[subidx].start = save_lpos;
+		    sub->multilist[subidx].start = save_lpos;
 		else
-		    m->linelist[subidx].start = save_ptr;
+		    sub->linelist[subidx].start = save_ptr;
 	    }
 	    else
-		m->in_use = save_in_use;
+		sub->in_use = save_in_use;
 	    break;
 
 	case NFA_MCLOSE + 0:
@@ -2711,7 +2812,7 @@ addstate(l, state, m, off, lid)
 	    {
 		/* Do not overwrite the position set by \ze. If no \ze
 		 * encountered end will be set in nfa_regtry(). */
-		addstate(l, state->out, m, off, lid);
+		addstate(l, state->out, sub, off);
 		break;
 	    }
 	case NFA_MCLOSE + 1:
@@ -2731,37 +2832,37 @@ addstate(l, state, m, off, lid)
 
 	    /* We don't fill in gaps here, there must have been an MOPEN that
 	     * has done that. */
-	    save_in_use = m->in_use;
-	    if (m->in_use <= subidx)
-		m->in_use = subidx + 1;
+	    save_in_use = sub->in_use;
+	    if (sub->in_use <= subidx)
+		sub->in_use = subidx + 1;
 	    if (REG_MULTI)
 	    {
-		save_lpos = m->multilist[subidx].end;
+		save_lpos = sub->multilist[subidx].end;
 		if (off == -1)
 		{
-		    m->multilist[subidx].end.lnum = reglnum + 1;
-		    m->multilist[subidx].end.col = 0;
+		    sub->multilist[subidx].end.lnum = reglnum + 1;
+		    sub->multilist[subidx].end.col = 0;
 		}
 		else
 		{
-		    m->multilist[subidx].end.lnum = reglnum;
-		    m->multilist[subidx].end.col =
+		    sub->multilist[subidx].end.lnum = reglnum;
+		    sub->multilist[subidx].end.col =
 					  (colnr_T)(reginput - regline + off);
 		}
 	    }
 	    else
 	    {
-		save_ptr = m->linelist[subidx].end;
-		m->linelist[subidx].end = reginput + off;
+		save_ptr = sub->linelist[subidx].end;
+		sub->linelist[subidx].end = reginput + off;
 	    }
 
-	    addstate(l, state->out, m, off, lid);
+	    addstate(l, state->out, sub, off);
 
 	    if (REG_MULTI)
-		m->multilist[subidx].end = save_lpos;
+		sub->multilist[subidx].end = save_lpos;
 	    else
-		m->linelist[subidx].end = save_ptr;
-	    m->in_use = save_in_use;
+		sub->linelist[subidx].end = save_ptr;
+	    sub->in_use = save_in_use;
 	    break;
     }
 }
@@ -2773,11 +2874,10 @@ addstate(l, state, m, off, lid)
  * matters for alternatives.
  */
     static void
-addstate_here(l, state, m, lid, ip)
+addstate_here(l, state, sub, ip)
     nfa_list_T		*l;	/* runtime state list */
     nfa_state_T		*state;	/* state to update */
-    regsub_T		*m;	/* pointers to subexpressions */
-    int			lid;
+    regsub_T		*sub;	/* pointers to subexpressions */
     int			*ip;
 {
     int tlen = l->n;
@@ -2785,7 +2885,7 @@ addstate_here(l, state, m, lid, ip)
     int i = *ip;
 
     /* first add the state(s) at the end, so that we know how many there are */
-    addstate(l, state, m, 0, lid);
+    addstate(l, state, sub, 0);
 
     /* when "*ip" was at the end of the list, nothing to do */
     if (i + 1 == tlen)
@@ -2895,6 +2995,58 @@ check_char_class(class, c)
     return FAIL;
 }
 
+static int match_backref __ARGS((regsub_T *sub, int subidx, int *bytelen));
+
+/*
+ * Check for a match with subexpression "subidx".
+ * return TRUE if it matches.
+ */
+    static int
+match_backref(sub, subidx, bytelen)
+    regsub_T	*sub;	    /* pointers to subexpressions */
+    int		subidx;
+    int		*bytelen;   /* out: length of match in bytes */
+{
+    int		len;
+
+    if (sub->in_use <= subidx)
+    {
+retempty:
+	/* backref was not set, match an empty string */
+	*bytelen = 0;
+	return TRUE;
+    }
+
+    if (REG_MULTI)
+    {
+	if (sub->multilist[subidx].start.lnum < 0
+				       || sub->multilist[subidx].end.lnum < 0)
+	    goto retempty;
+	/* TODO: line breaks */
+	len = sub->multilist[subidx].end.col
+					 - sub->multilist[subidx].start.col;
+	if (cstrncmp(regline + sub->multilist[subidx].start.col,
+							reginput, &len) == 0)
+	{
+	    *bytelen = len;
+	    return TRUE;
+	}
+    }
+    else
+    {
+	if (sub->linelist[subidx].start == NULL
+					 || sub->linelist[subidx].end == NULL)
+	    goto retempty;
+	len = (int)(sub->linelist[subidx].end - sub->linelist[subidx].start);
+	if (cstrncmp(sub->linelist[subidx].start, reginput, &len) == 0)
+	{
+	    *bytelen = len;
+	    return TRUE;
+	}
+    }
+    return FALSE;
+}
+
 /*
  * Set all NFA nodes' list ID equal to -1.
  */
@@ -2902,9 +3054,7 @@ check_char_class(class, c)
 nfa_set_neg_listids(start)
     nfa_state_T	    *start;
 {
-    if (start == NULL)
-	return;
-    if (start->lastlist >= 0)
+    if (start != NULL && start->lastlist >= 0)
     {
 	start->lastlist = -1;
 	nfa_set_neg_listids(start->out);
@@ -2919,9 +3069,7 @@ nfa_set_neg_listids(start)
 nfa_set_null_listids(start)
     nfa_state_T	    *start;
 {
-    if (start == NULL)
-	return;
-    if (start->lastlist == -1)
+    if (start != NULL && start->lastlist == -1)
     {
 	start->lastlist = 0;
 	nfa_set_null_listids(start->out);
@@ -2937,9 +3085,7 @@ nfa_save_listids(start, list)
     nfa_state_T	    *start;
     int		    *list;
 {
-    if (start == NULL)
-	return;
-    if (start->lastlist != -1)
+    if (start != NULL && start->lastlist != -1)
     {
 	list[abs(start->id)] = start->lastlist;
 	start->lastlist = -1;
@@ -2956,9 +3102,7 @@ nfa_restore_listids(start, list)
     nfa_state_T	    *start;
     int		    *list;
 {
-    if (start == NULL)
-	return;
-    if (start->lastlist == -1)
+    if (start != NULL && start->lastlist == -1)
     {
 	start->lastlist = list[abs(start->id)];
 	nfa_restore_listids(start->out, list);
@@ -3047,7 +3191,8 @@ nfa_regmatch(start, submatch, m)
 #ifdef ENABLE_LOG
     fprintf(log_fd, "(---) STARTSTATE\n");
 #endif
-    addstate(thislist, start, m, 0, listid);
+    thislist->id = listid;
+    addstate(thislist, start, m, 0);
 
     /* There are two cases when the NFA advances: 1. input char matches the
      * NFA node and 2. input char does not match the NFA node, but the next
@@ -3060,7 +3205,7 @@ nfa_regmatch(start, submatch, m)
 #define	ADD_POS_NEG_STATE(node)						    \
     ll = listtbl[result ? 1 : 0][node->negated];			    \
     if (ll != NULL)							    \
-	addstate(ll, node->out , &t->sub, clen, listid + 1);
+	addstate(ll, node->out , &t->sub, clen);
 
 
     /*
@@ -3092,9 +3237,12 @@ nfa_regmatch(start, submatch, m)
 	/* swap lists */
 	thislist = &list[flag];
 	nextlist = &list[flag ^= 1];
-	nextlist->n = 0;	    /* `clear' nextlist */
+	nextlist->n = 0;	    /* clear nextlist */
 	listtbl[1][0] = nextlist;
 	++listid;
+	thislist->id = listid;
+	nextlist->id = listid + 1;
+	neglist->id = listid + 1;
 
 #ifdef ENABLE_LOG
 	fprintf(log_fd, "------------------------------------------\n");
@@ -3156,7 +3304,8 @@ nfa_regmatch(start, submatch, m)
 		if (REG_MULTI)
 		    for (j = 0; j < submatch->in_use; j++)
 		    {
-			submatch->multilist[j].start = t->sub.multilist[j].start;
+			submatch->multilist[j].start =
+						    t->sub.multilist[j].start;
 			submatch->multilist[j].end = t->sub.multilist[j].end;
 		    }
 		else
@@ -3166,20 +3315,7 @@ nfa_regmatch(start, submatch, m)
 			submatch->linelist[j].end = t->sub.linelist[j].end;
 		    }
 #ifdef ENABLE_LOG
-		for (j = 0; j < t->sub.in_use; j++)
-		    if (REG_MULTI)
-			fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d",
-				j,
-				t->sub.multilist[j].start.col,
-				(int)t->sub.multilist[j].start.lnum,
-				t->sub.multilist[j].end.col,
-				(int)t->sub.multilist[j].end.lnum);
-		    else
-			fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"",
-				j,
-				(char *)t->sub.linelist[j].start,
-				(char *)t->sub.linelist[j].end);
-		fprintf(log_fd, "\n");
+		log_subexpr(&t->sub);
 #endif
 		/* Found the left-most longest match, do not look at any other
 		 * states at this position.  When the list of states is going
@@ -3198,8 +3334,7 @@ nfa_regmatch(start, submatch, m)
 		 * nfa_regmatch().  Submatches are stored in *m, and used in
 		 * the parent call. */
 		if (start->c == NFA_MOPEN + 0)
-		    addstate_here(thislist, t->state->out, &t->sub, listid,
-								    &listidx);
+		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		else
 		{
 		    *m = t->sub;
@@ -3277,7 +3412,7 @@ nfa_regmatch(start, submatch, m)
 
 		    /* t->state->out1 is the corresponding END_INVISIBLE node */
 		    addstate_here(thislist, t->state->out1->out, &t->sub,
-							    listid, &listidx);
+								    &listidx);
 		}
 		else
 		{
@@ -3288,14 +3423,12 @@ nfa_regmatch(start, submatch, m)
 
 	    case NFA_BOL:
 		if (reginput == regline)
-		    addstate_here(thislist, t->state->out, &t->sub, listid,
-								    &listidx);
+		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		break;
 
 	    case NFA_EOL:
 		if (curc == NUL)
-		    addstate_here(thislist, t->state->out, &t->sub, listid,
-								    &listidx);
+		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		break;
 
 	    case NFA_BOW:
@@ -3322,8 +3455,7 @@ nfa_regmatch(start, submatch, m)
 				   && vim_iswordc_buf(reginput[-1], reg_buf)))
 		    bow = FALSE;
 		if (bow)
-		    addstate_here(thislist, t->state->out, &t->sub, listid,
-								    &listidx);
+		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		break;
 	    }
 
@@ -3351,8 +3483,7 @@ nfa_regmatch(start, submatch, m)
 					   && vim_iswordc_buf(curc, reg_buf)))
 		    eow = FALSE;
 		if (eow)
-		    addstate_here(thislist, t->state->out, &t->sub, listid,
-								    &listidx);
+		    addstate_here(thislist, t->state->out, &t->sub, &listidx);
 		break;
 	    }
 
@@ -3442,12 +3573,12 @@ nfa_regmatch(start, submatch, m)
 		    go_to_nextline = TRUE;
 		    /* Pass -1 for the offset, which means taking the position
 		     * at the start of the next line. */
-		    addstate(nextlist, t->state->out, &t->sub, -1, listid + 1);
+		    addstate(nextlist, t->state->out, &t->sub, -1);
 		}
 		else if (curc == '\n' && reg_line_lbr)
 		{
 		    /* match \n as if it is an ordinary character */
-		    addstate(nextlist, t->state->out, &t->sub, 1, listid + 1);
+		    addstate(nextlist, t->state->out, &t->sub, 1);
 		}
 		break;
 
@@ -3475,15 +3606,13 @@ nfa_regmatch(start, submatch, m)
 		/* This follows a series of negated nodes, like:
 		 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
 		if (curc > 0)
-		    addstate(nextlist, t->state->out, &t->sub, clen,
-								  listid + 1);
+		    addstate(nextlist, t->state->out, &t->sub, clen);
 		break;
 
 	    case NFA_ANY:
 		/* Any char except '\0', (end of input) does not match. */
 		if (curc > 0)
-		    addstate(nextlist, t->state->out, &t->sub, clen,
-								  listid + 1);
+		    addstate(nextlist, t->state->out, &t->sub, clen);
 		break;
 
 	    /*
@@ -3620,18 +3749,74 @@ nfa_regmatch(start, submatch, m)
 		ADD_POS_NEG_STATE(t->state);
 		break;
 
-	    case NFA_MOPEN + 0:
-	    case NFA_MOPEN + 1:
-	    case NFA_MOPEN + 2:
-	    case NFA_MOPEN + 3:
-	    case NFA_MOPEN + 4:
-	    case NFA_MOPEN + 5:
-	    case NFA_MOPEN + 6:
-	    case NFA_MOPEN + 7:
-	    case NFA_MOPEN + 8:
-	    case NFA_MOPEN + 9:
-		/* handled below */
+	    case NFA_BACKREF1:
+	    case NFA_BACKREF2:
+	    case NFA_BACKREF3:
+	    case NFA_BACKREF4:
+	    case NFA_BACKREF5:
+	    case NFA_BACKREF6:
+	    case NFA_BACKREF7:
+	    case NFA_BACKREF8:
+	    case NFA_BACKREF9:
+		/* \1 .. \9 */
+	      {
+		int subidx = t->state->c - NFA_BACKREF1 + 1;
+		int bytelen;
+
+		result = match_backref(&t->sub, subidx, &bytelen);
+		if (result)
+		{
+		    if (bytelen == 0)
+		    {
+			/* empty match always works, add NFA_SKIP with zero to
+			 * be used next */
+			addstate_here(thislist, t->state->out, &t->sub,
+								    &listidx);
+			thislist->t[listidx + 1].count = 0;
+		    }
+		    else if (bytelen <= clen)
+		    {
+			/* match current character, jump ahead to out of
+			 * NFA_SKIP */
+			addstate(nextlist, t->state->out->out, &t->sub, clen);
+#ifdef ENABLE_LOG
+			log_subexpr(&nextlist->t[nextlist->n - 1].sub);
+#endif
+		    }
+		    else
+		    {
+			/* skip ofer the matched characters, set character
+			 * count in NFA_SKIP */
+			addstate(nextlist, t->state->out, &t->sub, bytelen);
+			nextlist->t[nextlist->n - 1].count = bytelen - clen;
+#ifdef ENABLE_LOG
+			log_subexpr(&nextlist->t[nextlist->n - 1].sub);
+#endif
+		    }
+
+		}
 		break;
+	      }
+	    case NFA_SKIP:
+	      /* charater of previous matching \1 .. \9 */
+	      if (t->count - clen <= 0)
+	      {
+		  /* end of match, go to what follows */
+		  addstate(nextlist, t->state->out, &t->sub, clen);
+#ifdef ENABLE_LOG
+		  log_subexpr(&nextlist->t[nextlist->n - 1].sub);
+#endif
+	      }
+	      else
+	      {
+		  /* add state again with decremented count */
+		  addstate(nextlist, t->state, &t->sub, 0);
+		  nextlist->t[nextlist->n - 1].count = t->count - clen;
+#ifdef ENABLE_LOG
+		  log_subexpr(&nextlist->t[nextlist->n - 1].sub);
+#endif
+	      }
+	      break;
 
 	    case NFA_SKIP_CHAR:
 	    case NFA_ZSTART:
@@ -3680,7 +3865,7 @@ nfa_regmatch(start, submatch, m)
 #ifdef ENABLE_LOG
 	    fprintf(log_fd, "(---) STARTSTATE\n");
 #endif
-	    addstate(nextlist, start, m, clen, listid + 1);
+	    addstate(nextlist, start, m, clen);
 	}
 
 #ifdef ENABLE_LOG
@@ -3884,7 +4069,6 @@ nfa_regexec_both(line, col)
     {
 	prog->state[i].id = i;
 	prog->state[i].lastlist = 0;
-	prog->state[i].visits = 0;
     }
 
     retval = nfa_regtry(prog->start, col);
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -331,6 +331,10 @@ STARTTEST
 :call add(tl, [2, '\<goo\|\<go', 'google', 'goo'])
 :call add(tl, [2, '\<goo\|go', 'google', 'goo'])
 :"
+:"""" Back references
+:call add(tl, [2, '\(\i\+\) \1', ' abc abc', 'abc abc', 'abc'])
+:"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
+:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
 :"
 :"""" Run the tests
 :"
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -713,6 +713,12 @@ OK 2 - \<goo\|\<go
 OK 0 - \<goo\|go
 OK 1 - \<goo\|go
 OK 2 - \<goo\|go
+OK 0 - \(\i\+\) \1
+OK 1 - \(\i\+\) \1
+OK 2 - \(\i\+\) \1
+OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
+OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
+OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
 192.168.0.1
 192.168.0.1
 192.168.0.1
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1033,
+/**/
     1032,
 /**/
     1031,