changeset 4547:fc997f05cbc7 v7.3.1021

updated for version 7.3.1021 Problem: New regexp engine does not ignore order of composing chars. Solution: Ignore composing chars order.
author Bram Moolenaar <bram@vim.org>
date Sun, 26 May 2013 14:32:05 +0200
parents c10cff3ab921
children f070418bd2ce
files src/regexp_nfa.c src/testdir/test95.in src/testdir/test95.ok src/version.c
diffstat 4 files changed, 68 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -3275,8 +3275,10 @@ nfa_regmatch(start, submatch, m)
 		int	    len = 0;
 		nfa_state_T *end;
 		nfa_state_T *sta;
-
-		result = OK;
+		int	    cchars[MAX_MCO];
+		int	    ccount = 0;
+		int	    j;
+
 		sta = t->state->out;
 		len = 0;
 		if (utf_iscomposing(sta->c))
@@ -3293,24 +3295,52 @@ nfa_regmatch(start, submatch, m)
 		    /* TODO: How about negated? */
 		    if (len == 0 && sta->c != c)
 			result = FAIL;
-		    len = n;
+		    else
+			result = OK;
 		    while (sta->c != NFA_END_COMPOSING)
 			sta = sta->out;
 		}
-		else
-		    while (sta->c != NFA_END_COMPOSING && len < n)
+
+		/* Check base character matches first, unless ignored. */
+		else if (len > 0 || mc == sta->c)
+		{
+		    if (len == 0)
 		    {
-			if (len > 0)
-			    mc = mb_ptr2char(reginput + len);
-			if (mc != sta->c)
-			    break;
 			len += mb_char2len(mc);
 			sta = sta->out;
 		    }
 
-		/* if input char length doesn't match regexp char length */
-		if (len < n || sta->c != NFA_END_COMPOSING)
+		    /* We don't care about the order of composing characters.
+		     * Get them into cchars[] first. */
+		    while (len < n)
+		    {
+			mc = mb_ptr2char(reginput + len);
+			cchars[ccount++] = mc;
+			len += mb_char2len(mc);
+			if (ccount == MAX_MCO)
+			    break;
+		    }
+
+		    /* Check that each composing char in the pattern matches a
+		     * composing char in the text.  We do not check if all
+		     * composing chars are matched. */
+		    result = OK;
+		    while (sta->c != NFA_END_COMPOSING)
+		    {
+			for (j = 0; j < ccount; ++j)
+			    if (cchars[j] == sta->c)
+				break;
+			if (j == ccount)
+			{
+			    result = FAIL;
+			    break;
+			}
+			sta = sta->out;
+		    }
+		}
+		else
 		    result = FAIL;
+
 		end = t->state->out1;	    /* NFA_END_COMPOSING */
 		ADD_POS_NEG_STATE(end);
 		break;
--- a/src/testdir/test95.in
+++ b/src/testdir/test95.in
@@ -9,6 +9,7 @@ STARTTEST
 :so mbyte.vim
 :set nocp encoding=utf-8 viminfo+=nviminfo nomore
 :" tl is a List of Lists with:
+:"    2: test auto/old/new  0: test auto/old  1: test auto/new
 :"    regexp pattern
 :"    text to test the pattern on
 :"    expected match (optional)
@@ -40,10 +41,14 @@ STARTTEST
 :call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
 :call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 :call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
-:"call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb "])
-:"call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb "])
+:call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
+:call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 :call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
 :call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
+:call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
+:call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
+:call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
+:call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
 
 
 :"""" Test \Z
@@ -74,7 +79,7 @@ STARTTEST
 :  let text = t[2]
 :  let matchidx = 3
 :  for engine in [0, 1, 2]
-:    if engine == 2 && !re
+:    if engine == 2 && re == 0 || engine == 1 && re == 1
 :      continue
 :    endif
 :    let &regexpengine = engine
--- a/src/testdir/test95.ok
+++ b/src/testdir/test95.ok
@@ -41,12 +41,29 @@ OK 2 - ֹֻ
 OK 0 - .ֹֻ
 OK 1 - .ֹֻ
 OK 2 - .ֹֻ
+OK 0 - ֹֻ
+OK 1 - ֹֻ
+OK 2 - ֹֻ
+OK 0 - .ֹֻ
+OK 1 - .ֹֻ
+OK 2 - .ֹֻ
 OK 0 - ֹ
 OK 1 - ֹ
 OK 2 - ֹ
 OK 0 - .ֹ
 OK 1 - .ֹ
 OK 2 - .ֹ
+OK 0 - ֹ
+OK 1 - ֹ
+OK 2 - ֹ
+OK 0 - .ֹ
+OK 1 - .ֹ
+OK 2 - .ֹ
+OK 0 - ֹֻ
+OK 2 - ֹֻ
+OK 0 - .ֹֻ
+OK 1 - .ֹֻ
+OK 2 - .ֹֻ
 OK 0 - ú\Z
 OK 1 - ú\Z
 OK 2 - ú\Z
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1021,
+/**/
     1020,
 /**/
     1019,