changeset 5221:9982ec574beb v7.4a.036

updated for version 7.4a.036 Problem: "\p" in a regexp does not match double-width characters. (Yukihiro Nakadaira) Solution: Don't count display cells, use vim_isprintc().
author Bram Moolenaar <bram@vim.org>
date Sun, 21 Jul 2013 17:06:00 +0200
parents 050893d44c33
children c8559a2d8e5f
files src/regexp.c src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/testdir/test95.in src/testdir/test95.ok src/version.c
diffstat 7 files changed, 30 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -4563,14 +4563,14 @@ regmatch(scan)
 	    break;
 
 	  case PRINT:
-	    if (ptr2cells(reginput) != 1)
+	    if (!vim_isprintc(PTR2CHAR(reginput)))
 		status = RA_NOMATCH;
 	    else
 		ADVANCE_REGINPUT();
 	    break;
 
 	  case SPRINT:
-	    if (VIM_ISDIGIT(*reginput) || ptr2cells(reginput) != 1)
+	    if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
 		status = RA_NOMATCH;
 	    else
 		ADVANCE_REGINPUT();
@@ -5944,7 +5944,8 @@ regrepeat(p, maxcount)
 		if (got_int)
 		    break;
 	    }
-	    else if (ptr2cells(scan) == 1 && (testval || !VIM_ISDIGIT(*scan)))
+	    else if (vim_isprintc(PTR2CHAR(scan)) == 1
+					  && (testval || !VIM_ISDIGIT(*scan)))
 	    {
 		mb_ptr_adv(scan);
 	    }
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -5749,12 +5749,12 @@ nfa_regmatch(prog, start, submatch, m)
 		break;
 
 	    case NFA_PRINT:	/*  \p	*/
-		result = ptr2cells(reginput) == 1;
+		result = vim_isprintc(PTR2CHAR(reginput));
 		ADD_STATE_IF_MATCH(t->state);
 		break;
 
 	    case NFA_SPRINT:	/*  \P	*/
-		result = !VIM_ISDIGIT(curc) && ptr2cells(reginput) == 1;
+		result = !VIM_ISDIGIT(curc) && vim_isprintc(PTR2CHAR(reginput));
 		ADD_STATE_IF_MATCH(t->state);
 		break;
 
--- a/src/testdir/test64.in
+++ b/src/testdir/test64.in
@@ -228,6 +228,7 @@ STARTTEST
 :call add(tl, [2, '\v((ab)|c*)+', 'abcccaba', 'abcccab', '', 'ab'])
 :call add(tl, [2, '\v(a(c*)+b)+', 'acbababaaa', 'acbabab', 'ab', ''])
 :call add(tl, [2, '\v(a|b*)+', 'aaaa', 'aaaa', ''])
+:call add(tl, [2, '\p*', 'aá 	', 'aá '])
 :"
 :" Test greedy-ness and lazy-ness
 :call add(tl, [2, 'a\{-2,7}','aaaaaaaaaaaaa', 'aa'])
--- a/src/testdir/test64.ok
+++ b/src/testdir/test64.ok
@@ -506,6 +506,9 @@ OK 2 - \v(a(c*)+b)+
 OK 0 - \v(a|b*)+
 OK 1 - \v(a|b*)+
 OK 2 - \v(a|b*)+
+OK 0 - \p*
+OK 1 - \p*
+OK 2 - \p*
 OK 0 - a\{-2,7}
 OK 1 - a\{-2,7}
 OK 2 - a\{-2,7}
--- a/src/testdir/test95.in
+++ b/src/testdir/test95.in
@@ -29,6 +29,7 @@ STARTTEST
 
 :" this is not a normal "i" but 0xec
 :call add(tl, [2, '\p\+', 'ìa', 'ìa'])
+:call add(tl, [2, '\p*', 'aあ', 'aあ'])
 
 :"""" Test recognition of some character classes
 :call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
@@ -118,6 +119,16 @@ STARTTEST
 :endfor
 :unlet t tl e l
 
+:" check that 'ambiwidth' does not change the meaning of \p
+:set regexpengine=1 ambiwidth=single
+:$put ='eng 1 ambi single: ' . match(\"\u00EC\", '\p')
+:set regexpengine=1 ambiwidth=double
+:$put ='eng 1 ambi double: ' . match(\"\u00EC\", '\p')
+:set regexpengine=2 ambiwidth=single
+:$put ='eng 2 ambi single: ' . match(\"\u00EC\", '\p')
+:set regexpengine=2 ambiwidth=double
+:$put ='eng 2 ambi double: ' . match(\"\u00EC\", '\p')
+
 :/\%#=1^Results/,$wq! test.out
 ENDTEST
 
--- a/src/testdir/test95.ok
+++ b/src/testdir/test95.ok
@@ -17,6 +17,9 @@ OK 2 - [ม[:alpha:][=a=]]\+
 OK 0 - \p\+
 OK 1 - \p\+
 OK 2 - \p\+
+OK 0 - \p*
+OK 1 - \p*
+OK 2 - \p*
 OK 0 - \i\+
 OK 1 - \i\+
 OK 2 - \i\+
@@ -113,3 +116,7 @@ OK 2 - \Zֹ\+
 OK 0 - [^[=a=]]\+
 OK 1 - [^[=a=]]\+
 OK 2 - [^[=a=]]\+
+eng 1 ambi single: 0
+eng 1 ambi double: 0
+eng 2 ambi single: 0
+eng 2 ambi double: 0
--- a/src/version.c
+++ b/src/version.c
@@ -728,6 +728,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    36,
+/**/
     35,
 /**/
     34,