changeset 5794:a63d0cd691dc v7.4.241

updated for version 7.4.241 Problem: The string returned by submatch() does not distinguish between a NL from a line break and a NL that stands for a NUL character. Solution: Add a second argument to return a list. (ZyX)
author Bram Moolenaar <bram@vim.org>
date Wed, 02 Apr 2014 19:00:58 +0200
parents 3b08d14e08a3
children 64e4633f0751
files runtime/doc/eval.txt src/eval.c src/proto/regexp.pro src/regexp.c src/testdir/test79.in src/testdir/test79.ok src/testdir/test80.in src/testdir/test80.ok src/version.c
diffstat 9 files changed, 128 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -1990,7 +1990,8 @@ strridx( {haystack}, {needle} [, {start}
 				Number	last index of {needle} in {haystack}
 strtrans( {expr})		String	translate string to make it printable
 strwidth( {expr})		Number	display cell length of the String {expr}
-submatch( {nr})			String	specific match in ":s" or substitute()
+submatch( {nr}[, {list}])	String or List
+					specific match in ":s" or substitute()
 substitute( {expr}, {pat}, {sub}, {flags})
 				String	all {pat} in {expr} replaced with {sub}
 synID( {lnum}, {col}, {trans})	Number	syntax ID at {lnum} and {col}
@@ -5797,12 +5798,23 @@ strwidth({expr})					*strwidth()*
 		Ambiguous, this function's return value depends on 'ambiwidth'.
 		Also see |strlen()|, |strdisplaywidth()| and |strchars()|.
 
-submatch({nr})						*submatch()*
+submatch({nr}[, {list}])				*submatch()*
 		Only for an expression in a |:substitute| command or
 		substitute() function.
 		Returns the {nr}'th submatch of the matched text.  When {nr}
 		is 0 the whole matched text is returned.
+		Note that a NL in the string can stand for a line break of a
+		multi-line match or a NUL character in the text.
 		Also see |sub-replace-expression|.
+
+		If {list} is present and non-zero then submatch() returns 
+		a list of strings, similar to |getline()| with two arguments. 
+		NL characters in the text represent NUL characters in the
+		text.
+		Only returns more than one item for |:substitute|, inside
+		|substitute()| this list will always contain one or zero
+		items, since there are no real line breaks.
+
 		Example: >
 			:s/\d\+/\=submatch(0) + 1/
 <		This finds the first number in the line and adds one to it.
--- a/src/eval.c
+++ b/src/eval.c
@@ -8129,7 +8129,7 @@ static struct fst
     {"strridx",		2, 3, f_strridx},
     {"strtrans",	1, 1, f_strtrans},
     {"strwidth",	1, 1, f_strwidth},
-    {"submatch",	1, 1, f_submatch},
+    {"submatch",	1, 2, f_submatch},
     {"substitute",	4, 4, f_substitute},
     {"synID",		3, 3, f_synID},
     {"synIDattr",	2, 3, f_synIDattr},
@@ -17890,9 +17890,32 @@ f_submatch(argvars, rettv)
     typval_T	*argvars;
     typval_T	*rettv;
 {
-    rettv->v_type = VAR_STRING;
-    rettv->vval.v_string =
-		    reg_submatch((int)get_tv_number_chk(&argvars[0], NULL));
+    int		error = FALSE;
+    char_u	**match;
+    char_u	**s;
+    listitem_T	*li;
+    int		no;
+    int		retList = 0;
+
+    no = (int)get_tv_number_chk(&argvars[0], &error);
+    if (error)
+	return;
+    error = FALSE;
+    if (argvars[1].v_type != VAR_UNKNOWN)
+	retList = get_tv_number_chk(&argvars[1], &error);
+    if (error)
+	return;
+
+    if (retList == 0)
+    {
+	rettv->v_type = VAR_STRING;
+	rettv->vval.v_string = reg_submatch(no);
+    }
+    else
+    {
+	rettv->v_type = VAR_LIST;
+	rettv->vval.v_list = reg_submatch_list(no);
+    }
 }
 
 /*
--- a/src/proto/regexp.pro
+++ b/src/proto/regexp.pro
@@ -10,6 +10,7 @@ char_u *regtilde __ARGS((char_u *source,
 int vim_regsub __ARGS((regmatch_T *rmp, char_u *source, char_u *dest, int copy, int magic, int backslash));
 int vim_regsub_multi __ARGS((regmmatch_T *rmp, linenr_T lnum, char_u *source, char_u *dest, int copy, int magic, int backslash));
 char_u *reg_submatch __ARGS((int no));
+list_T *reg_submatch_list __ARGS((int no));
 regprog_T *vim_regcomp __ARGS((char_u *expr_arg, int re_flags));
 void vim_regfree __ARGS((regprog_T *prog));
 int vim_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -7897,6 +7897,85 @@ reg_submatch(no)
 
     return retval;
 }
+
+/*
+ * Used for the submatch() function with the optional non-zero argument: get
+ * the list of strings from the n'th submatch in allocated memory with NULs
+ * represented in NLs.
+ * Returns a list of allocated strings.  Returns NULL when not in a ":s"
+ * command, for a non-existing submatch and for any error.
+ */
+    list_T *
+reg_submatch_list(no)
+    int		no;
+{
+    char_u	*s;
+    linenr_T	slnum;
+    linenr_T	elnum;
+    colnr_T	scol;
+    colnr_T	ecol;
+    int		i;
+    list_T	*list;
+    int		error = FALSE;
+
+    if (!can_f_submatch || no < 0)
+	return NULL;
+
+    if (submatch_match == NULL)
+    {
+	slnum = submatch_mmatch->startpos[no].lnum;
+	elnum = submatch_mmatch->endpos[no].lnum;
+	if (slnum < 0 || elnum < 0)
+	    return NULL;
+
+	scol = submatch_mmatch->startpos[no].col;
+	ecol = submatch_mmatch->endpos[no].col;
+
+	list = list_alloc();
+	if (list == NULL)
+	    return NULL;
+
+	s = reg_getline_submatch(slnum) + scol;
+	if (slnum == elnum)
+	{
+	    if (list_append_string(list, s, ecol - scol) == FAIL)
+		error = TRUE;
+	}
+	else
+	{
+	    if (list_append_string(list, s, -1) == FAIL)
+		error = TRUE;
+	    for (i = 1; i < elnum - slnum; i++)
+	    {
+		s = reg_getline_submatch(slnum + i);
+		if (list_append_string(list, s, -1) == FAIL)
+		    error = TRUE;
+	    }
+	    s = reg_getline_submatch(elnum);
+	    if (list_append_string(list, s, ecol) == FAIL)
+		error = TRUE;
+	}
+    }
+    else
+    {
+	s = submatch_match->startp[no];
+	if (s == NULL || submatch_match->endp[no] == NULL)
+	    return NULL;
+	list = list_alloc();
+	if (list == NULL)
+	    return NULL;
+	if (list_append_string(list, s,
+				 (int)(submatch_match->endp[no] - s)) == FAIL)
+	    error = TRUE;
+    }
+
+    if (error)
+    {
+	list_free(list, TRUE);
+	return NULL;
+    }
+    return list;
+}
 #endif
 
 static regengine_T bt_regengine =
index f15ecc0f8dc07e351bf7ac6e1a8e1cba63db1fbb..56955c23182a8809b9aa233afbc8f6d1148d07c1
GIT binary patch
literal 3123
zc%1E4OK;jh5av8mt~t9Xi6KqA<(;_VK)@uBG%X3Fhfax%G2mcq;zwGkYJYpztRJk?
z3OMvuA+5e|cV<01-!eOW-Svct?Fh%NKRR{vrKxKAsk_uQLp6^D)f)3F`uC-yyRKnd
z$AWDMGF@{$!}C2|f_}6Yom{A%Hp}HXn{|_8**$!{1Kjr;cm2L{-v`;X1$UvEru1Sg
zK)G$LX5mhEebaLVdm?yruUbLoopeuFs8d5@P+QtZK!(8@fhXA|4RUnE&;`K|H8Ce)
ze%pcIJ3PlTtYH-^SjG|-vA{NJkud`b`Ncq+V2CaW`qL@HR|E$$RA!8I2%4P^!(D<m
zT~xY^T~oU`>T||!$o?Kf9|$@O-4cu@KN7SU`b2QVP>-O;&>cZ!=$@d)P@n8)7#h&e
zqVErUG*KC_xd-}jG#_3b&`0HkBnkxMBTah)E}&T`mdcfC?d%*{pg<cg;R-s?g=;Wx
zOjvwafOTtu{n3WyrzJf1p24|uz`b|D>w7SOhga%Z_OANj;=h8sD_Hzz3a<1!`m2Cg
z7g3Bgq$Fa^fbH*zkrcsNOciDt^Y)56Vyq2y&4{r&fonuNp=-puD{hIgMD;8>*+`;*
zS=n^9N;0So6-E~$(3%>ITVv1_9VYDwOfRP}yPAQ~G2p5D1oP|pDs?-jB`JitxPZRP
zp9rEP(gA}(%qNN4V9wb$H`>$0uGzOX@6S6ZTLi>=v^RS=>-LwMkC0inzuMvmoI_`u
zBXAzw%^ude{q-jM;2y5o-)y$GB76S2mjCs2-Lc@-*J2l>q(3<4*cr!a9IJAy!m%>P
zN*pV4tPuTwc8=e33~QSElAPm2C{P)alce}riq}%Sn&OodFQ<4Z#fvFk2>91m7okF{
zd{W^r68IOnBatK7-`1o0q(+tJFj+mVN99F}>c(L@f@=8A(C1~#EB06mzbWw-Ck)_^
zvrUPK<T<M4=0df2vGP%ElhA5yZLQ#=gg;JyDqDGwVebHfXZy<#t*<>kQYkyJ?U5Qh
zHF8#Hyx7K=cE5t|IJ6%gIl4M`g|QuUwKOx!pf~<(SY!K}E2UCeqbc2^NzKr-W@utF
z!PA|A@TWkJ{?>CnCBv8z>4Xf!?Z_D9!h13ddunoW@a_4{s;!o6-ve2r-M28+Y4oa-
I%f~_d21281DF6Tf
index bb30d140525facb55d0b400578271a9d90da5534..0f6ea4545106dbc331076866a7effa74ebcd0927
GIT binary patch
literal 556
zc$}?}J9C0S6ooC5Gn4;76SlG8jZ_h2@~TYKh9wzAP|<+GV(D+s)rB-MUFMtN+`Yp&
zXW=cao1LhYGE+oa3rF(+Ov3lcCr!g4R0NuCn3nChgMsVVmTBl3mc$8<D?F{Sc}D(%
z*DZF$7g5|5D0?OLH+vjz52$#BI;kOg0@J3M;y9ZfyB6Jps>1)H<@8T2hKD(yW5M|X
z39;l9o^p;98Lu*|h&2b*&~AgA7df_&Wo?AnMYIW*fU_U1{o+2G@ylz`ue|boQN;Y8
zHH-U7#9DEsI8tmWmK0NpAw`#>u{iG9Y`Q6t;J({9)KB<C#Db3p76&wpXy_|!#9}&x
M@^I}L(hFk(-&MncWB>pF
--- a/src/testdir/test80.in
+++ b/src/testdir/test80.in
@@ -117,6 +117,7 @@ STARTTEST
 :set cpo&
 :$put =\"\n\nTEST_5:\"
 :$put =substitute('A123456789', 'A\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)', '\=submatch(0) . submatch(9) . submatch(8) . submatch(7) . submatch(6) . submatch(5) . submatch(4) . submatch(3) . submatch(2) . submatch(1)', '')
+:$put =substitute('A123456789', 'A\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)\(.\)', '\=string([submatch(0, 1), submatch(9, 1), submatch(8, 1), submatch(7, 1), submatch(6, 1), submatch(5, 1), submatch(4, 1), submatch(3, 1), submatch(2, 1), submatch(1, 1)])', '')
 /^TEST_6
 ENDTEST
 
@@ -142,6 +143,7 @@ STARTTEST
 :$put =\"\n\nTEST_7:\"
 :$put =substitute('A
A', 'A.', '\=submatch(0)', '')
 :$put =substitute(\"B\nB\", 'B.', '\=submatch(0)', '')
+:$put =substitute(\"B\nB\", 'B.', '\=string(submatch(0, 1))', '')
 :$put =substitute('-bb', '\zeb', 'a', 'g')
 :$put =substitute('-bb', '\ze', 'c', 'g')
 /^TEST_8
--- a/src/testdir/test80.ok
+++ b/src/testdir/test80.ok
@@ -90,6 +90,7 @@ l
 
 TEST_5:
 A123456789987654321
+[['A123456789'], ['9'], ['8'], ['7'], ['6'], ['5'], ['4'], ['3'], ['2'], ['1']]
 
 
 TEST_6:
@@ -103,6 +104,8 @@ TEST_7:
 A
A
 B
 B
+['B
+']B
 -abab
 c-cbcbc
 
--- a/src/version.c
+++ b/src/version.c
@@ -735,6 +735,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    241,
+/**/
     240,
 /**/
     239,