changeset 4535:45f97c349537 v7.3.1015

updated for version 7.3.1015 Problem: New regexp engine: Matching composing characters is wrong. Solution: Fix matching composing characters.
author Bram Moolenaar <bram@vim.org>
date Sat, 25 May 2013 14:42:03 +0200
parents 3a03d5347dcd
children 16961a4f00ea
files src/regexp_nfa.c src/testdir/test95.in src/testdir/test95.ok src/version.c
diffstat 4 files changed, 38 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -716,6 +716,7 @@ nfa_regatom()
 	     * the composing char is matched here. */
 	    if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
 	    {
+		old_regparse = regparse;
 		c = getchr();
 		goto nfa_do_multibyte;
 	    }
@@ -1217,9 +1218,11 @@ collection:
 
 nfa_do_multibyte:
 		/* Length of current char with composing chars. */
-		if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
+		if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse))
+			    || utf_iscomposing(c)))
 		{
-		    /* A base character plus composing characters.
+		    /* A base character plus composing characters, or just one
+		     * or more composing characters.
 		     * This requires creating a separate atom as if enclosing
 		     * the characters in (), where NFA_COMPOSING is the ( and
 		     * NFA_END_COMPOSING is the ). Note that right now we are
@@ -1400,7 +1403,6 @@ nfa_regpiece()
 	    /* Save pos after the repeated atom and the \{} */
 	    new_regparse = regparse;
 
-	    new_regparse = regparse;
 	    quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
 	    for (i = 0; i < maxval; i++)
 	    {
@@ -3218,11 +3220,19 @@ nfa_regmatch(start, submatch, m)
 		result = OK;
 		sta = t->state->out;
 		len = 0;
+		if (utf_iscomposing(sta->c))
+		{
+		    /* Only match composing character(s), ignore base
+		     * character.  Used for ".{composing}" and "{composing}"
+		     * (no preceding character). */
+		    len += mb_char2len(c);
+		}
 		if (ireg_icombine)
 		{
-		    /* If \Z was present, then ignore composing characters. */
+		    /* If \Z was present, then ignore composing characters.
+		     * When ignoring the base character this always matches. */
 		    /* TODO: How about negated? */
-		    if (sta->c != c)
+		    if (len == 0 && sta->c != c)
 			result = FAIL;
 		    len = n;
 		    while (sta->c != NFA_END_COMPOSING)
--- a/src/testdir/test95.in
+++ b/src/testdir/test95.in
@@ -38,6 +38,15 @@ STARTTEST
 :"""" Test composing character matching
 :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
 :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
+:call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"])
+:call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"])
+:call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
+:call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
+:call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "])
+:call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "])
+:call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
+:call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
+
 
 :"""" Test \Z
 :call add(tl, ['ú\Z', 'x'])
@@ -50,6 +59,8 @@ STARTTEST
 :call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
 :call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
 :call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
+:call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"])
+:call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"])
 
 :"""" Combining different tests and features
 :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
--- a/src/testdir/test95.ok
+++ b/src/testdir/test95.ok
@@ -11,6 +11,14 @@ OK - \f\+
 OK - \%#=1\f\+
 OK - .ม
 OK - .ม่
+OK - ֹ
+OK - .ֹ
+OK - ֹֻ
+OK - .ֹֻ
+OK - ֹֻ
+OK - .ֹֻ
+OK - ֹ
+OK - .ֹ
 OK - ú\Z
 OK - יהוה\Z
 OK - יְהוָה\Z
@@ -21,4 +29,6 @@ OK - ק‍ֹx\Z
 OK - ק‍ֹx\Z
 OK - ק‍x\Z
 OK - ק‍x\Z
+OK - ֹ\+\Z
+OK - \Zֹ\+
 OK - [^[=a=]]\+
--- a/src/version.c
+++ b/src/version.c
@@ -729,6 +729,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1015,
+/**/
     1014,
 /**/
     1013,