comparison src/regexp.c @ 35166:0b259135fb3a v9.1.0409

patch 9.1.0409: too many strlen() calls in the regexp engine Commit: https://github.com/vim/vim/commit/82792db6315f7c7b0e299cdde1566f2932a463f8 Author: John Marriott <basilisk@internode.on.net> Date: Sun May 12 00:07:17 2024 +0200 patch 9.1.0409: too many strlen() calls in the regexp engine Problem: too many strlen() calls in the regexp engine Solution: refactor code to retrieve strlen differently, make use of bsearch() for getting the character class (John Marriott) closes: #14648 Signed-off-by: John Marriott <basilisk@internode.on.net> Signed-off-by: Christian Brabandt <cb@256bit.org>
author Christian Brabandt <cb@256bit.org>
date Sun, 12 May 2024 00:15:04 +0200
parents 3f8444c5a6f3
children 4aad918ac113
comparison
equal deleted inserted replaced
35165:d0498ef60b5b 35166:0b259135fb3a
159 return MULTI_MULT; 159 return MULTI_MULT;
160 return NOT_MULTI; 160 return NOT_MULTI;
161 } 161 }
162 162
163 static char_u *reg_prev_sub = NULL; 163 static char_u *reg_prev_sub = NULL;
164 static size_t reg_prev_sublen = 0;
164 165
165 /* 166 /*
166 * REGEXP_INRANGE contains all characters which are always special in a [] 167 * REGEXP_INRANGE contains all characters which are always special in a []
167 * range after '\'. 168 * range after '\'.
168 * REGEXP_ABBR contains all characters which act as abbreviations after '\'. 169 * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
195 case 'b': return BS; 196 case 'b': return BS;
196 } 197 }
197 return c; 198 return c;
198 } 199 }
199 200
201 enum
202 {
203 CLASS_ALNUM = 0,
204 CLASS_ALPHA,
205 CLASS_BLANK,
206 CLASS_CNTRL,
207 CLASS_DIGIT,
208 CLASS_GRAPH,
209 CLASS_LOWER,
210 CLASS_PRINT,
211 CLASS_PUNCT,
212 CLASS_SPACE,
213 CLASS_UPPER,
214 CLASS_XDIGIT,
215 CLASS_TAB,
216 CLASS_RETURN,
217 CLASS_BACKSPACE,
218 CLASS_ESCAPE,
219 CLASS_IDENT,
220 CLASS_KEYWORD,
221 CLASS_FNAME,
222 CLASS_NONE = 99
223 };
224
200 /* 225 /*
201 * Check for a character class name "[:name:]". "pp" points to the '['. 226 * Check for a character class name "[:name:]". "pp" points to the '['.
202 * Returns one of the CLASS_ items. CLASS_NONE means that no item was 227 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
203 * recognized. Otherwise "pp" is advanced to after the item. 228 * recognized. Otherwise "pp" is advanced to after the item.
204 */ 229 */
205 static int 230 static int
206 get_char_class(char_u **pp) 231 get_char_class(char_u **pp)
207 { 232 {
208 static const char *(class_names[]) = 233 // must be sorted by the 'value' field because it is used by bsearch()!
209 { 234 static keyvalue_T char_class_tab[] =
210 "alnum:]", 235 {
211 #define CLASS_ALNUM 0 236 KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"),
212 "alpha:]", 237 KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"),
213 #define CLASS_ALPHA 1 238 KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"),
214 "blank:]", 239 KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"),
215 #define CLASS_BLANK 2 240 KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"),
216 "cntrl:]", 241 KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"),
217 #define CLASS_CNTRL 3 242 KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"),
218 "digit:]", 243 KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"),
219 #define CLASS_DIGIT 4 244 KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"),
220 "graph:]", 245 KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"),
221 #define CLASS_GRAPH 5 246 KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"),
222 "lower:]", 247 KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"),
223 #define CLASS_LOWER 6 248 KEYVALUE_ENTRY(CLASS_PRINT, "print:]"),
224 "print:]", 249 KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"),
225 #define CLASS_PRINT 7 250 KEYVALUE_ENTRY(CLASS_RETURN, "return:]"),
226 "punct:]", 251 KEYVALUE_ENTRY(CLASS_SPACE, "space:]"),
227 #define CLASS_PUNCT 8 252 KEYVALUE_ENTRY(CLASS_TAB, "tab:]"),
228 "space:]", 253 KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"),
229 #define CLASS_SPACE 9 254 KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]")
230 "upper:]",
231 #define CLASS_UPPER 10
232 "xdigit:]",
233 #define CLASS_XDIGIT 11
234 "tab:]",
235 #define CLASS_TAB 12
236 "return:]",
237 #define CLASS_RETURN 13
238 "backspace:]",
239 #define CLASS_BACKSPACE 14
240 "escape:]",
241 #define CLASS_ESCAPE 15
242 "ident:]",
243 #define CLASS_IDENT 16
244 "keyword:]",
245 #define CLASS_KEYWORD 17
246 "fname:]",
247 #define CLASS_FNAME 18
248 }; 255 };
249 #define CLASS_NONE 99 256
250 int i; 257 // check that the value of "pp" has a chance of matching
251 258 if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2])
252 if ((*pp)[1] == ':') 259 && ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4]))
253 { 260 {
254 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i) 261 keyvalue_T target;
255 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0) 262 keyvalue_T *entry;
256 { 263 // this function can be called repeatedly with the same value for "pp"
257 *pp += STRLEN(class_names[i]) + 2; 264 // so we cache the last found entry.
258 return i; 265 static keyvalue_T *last_entry = NULL;
259 } 266
267 target.key = 0;
268 target.value = (char *)*pp + 2;
269 target.length = 0; // not used, see cmp_keyvalue_value_n()
270
271 if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0)
272 entry = last_entry;
273 else
274 entry = (keyvalue_T *)bsearch(&target, &char_class_tab,
275 ARRAY_LENGTH(char_class_tab),
276 sizeof(char_class_tab[0]), cmp_keyvalue_value_n);
277 if (entry != NULL)
278 {
279 last_entry = entry;
280 *pp += entry->length + 2;
281 return entry->key;
282 }
260 } 283 }
261 return CLASS_NONE; 284 return CLASS_NONE;
262 } 285 }
263 286
264 /* 287 /*
617 } 640 }
618 else if (p[0] == '\\' && p[1] != NUL) 641 else if (p[0] == '\\' && p[1] != NUL)
619 { 642 {
620 if (dirc == '?' && newp != NULL && p[1] == '?') 643 if (dirc == '?' && newp != NULL && p[1] == '?')
621 { 644 {
645 size_t startplen;
646
622 // change "\?" to "?", make a copy first. 647 // change "\?" to "?", make a copy first.
623 if (*newp == NULL) 648 if (*newp == NULL)
624 { 649 {
625 *newp = vim_strsave(startp); 650 startplen = STRLEN(startp);
651 *newp = vim_strnsave(startp, startplen);
626 if (*newp != NULL) 652 if (*newp != NULL)
627 p = *newp + (p - startp); 653 p = *newp + (p - startp);
628 } 654 }
629 if (dropped != NULL) 655 if (dropped != NULL)
630 ++*dropped; 656 ++*dropped;
631 if (*newp != NULL) 657 if (*newp != NULL)
632 STRMOVE(p, p + 1); 658 mch_memmove(p, p + 1, (startplen - ((p + 1) - *newp)) + 1);
633 else 659 else
634 ++p; 660 ++p;
635 } 661 }
636 else 662 else
637 ++p; // skip next character 663 ++p; // skip next character
1187 reg_iswordc(int c) 1213 reg_iswordc(int c)
1188 { 1214 {
1189 return vim_iswordc_buf(c, rex.reg_buf); 1215 return vim_iswordc_buf(c, rex.reg_buf);
1190 } 1216 }
1191 1217
1218 #ifdef FEAT_EVAL
1219 static int can_f_submatch = FALSE; // TRUE when submatch() can be used
1220
1221 // This struct is used for reg_submatch(). Needed for when the
1222 // substitution string is an expression that contains a call to substitute()
1223 // and submatch().
1224 typedef struct {
1225 regmatch_T *sm_match;
1226 regmmatch_T *sm_mmatch;
1227 linenr_T sm_firstlnum;
1228 linenr_T sm_maxline;
1229 int sm_line_lbr;
1230 } regsubmatch_T;
1231
1232 static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
1233 #endif
1234
1235 typedef enum
1236 {
1237 RGLF_LINE = 0x01,
1238 RGLF_LENGTH = 0x02
1239 #ifdef FEAT_EVAL
1240 ,
1241 RGLF_SUBMATCH = 0x04
1242 #endif
1243 } reg_getline_flags_T;
1244
1245 //
1246 // common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and
1247 // reg_getline_submatch_len().
1248 // the flags argument (which is a bitmask) controls what info is to be returned and whether
1249 // or not submatch is in effect.
1250 // note:
1251 // submatch is available only if FEAT_EVAL is defined.
1252 static void
1253 reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length)
1254 {
1255 int get_line = flags & RGLF_LINE;
1256 int get_length = flags & RGLF_LENGTH;
1257 linenr_T firstlnum;
1258 linenr_T maxline;
1259
1260 #ifdef FEAT_EVAL
1261 if (flags & RGLF_SUBMATCH)
1262 {
1263 firstlnum = rsm.sm_firstlnum + lnum;
1264 maxline = rsm.sm_maxline;
1265 }
1266 else
1267 #endif
1268 {
1269 firstlnum = rex.reg_firstlnum + lnum;
1270 maxline = rex.reg_maxline;
1271 }
1272
1273 // when looking behind for a match/no-match lnum is negative. but we
1274 // can't go before line 1.
1275 if (firstlnum < 1)
1276 {
1277 if (get_line)
1278 *line = NULL;
1279 if (get_length)
1280 *length = 0;
1281
1282 return;
1283 }
1284
1285 if (lnum > maxline)
1286 {
1287 // must have matched the "\n" in the last line.
1288 if (get_line)
1289 *line = (char_u *)"";
1290 if (get_length)
1291 *length = 0;
1292
1293 return;
1294 }
1295
1296 if (get_line)
1297 *line = ml_get_buf(rex.reg_buf, firstlnum, FALSE);
1298 if (get_length)
1299 *length = ml_get_buf_len(rex.reg_buf, firstlnum);
1300 }
1301
1192 /* 1302 /*
1193 * Get pointer to the line "lnum", which is relative to "reg_firstlnum". 1303 * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
1194 */ 1304 */
1195 static char_u * 1305 static char_u *
1196 reg_getline(linenr_T lnum) 1306 reg_getline(linenr_T lnum)
1197 { 1307 {
1198 // when looking behind for a match/no-match lnum is negative. But we 1308 char_u *line;
1199 // can't go before line 1 1309
1200 if (rex.reg_firstlnum + lnum < 1) 1310 reg_getline_common(lnum, RGLF_LINE, &line, NULL);
1201 return NULL; 1311
1202 if (lnum > rex.reg_maxline) 1312 return line;
1203 // Must have matched the "\n" in the last line. 1313 }
1204 return (char_u *)""; 1314
1205 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE); 1315 /*
1316 * Get length of line "lnum", which is relative to "reg_firstlnum".
1317 */
1318 static colnr_T
1319 reg_getline_len(linenr_T lnum)
1320 {
1321 colnr_T length;
1322
1323 reg_getline_common(lnum, RGLF_LENGTH, NULL, &length);
1324
1325 return length;
1206 } 1326 }
1207 1327
1208 #ifdef FEAT_SYN_HL 1328 #ifdef FEAT_SYN_HL
1209 static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning 1329 static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning
1210 static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches 1330 static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches
1482 // Get the line to compare with. 1602 // Get the line to compare with.
1483 p = reg_getline(clnum); 1603 p = reg_getline(clnum);
1484 if (clnum == end_lnum) 1604 if (clnum == end_lnum)
1485 len = end_col - ccol; 1605 len = end_col - ccol;
1486 else 1606 else
1487 len = (int)STRLEN(p + ccol); 1607 len = (int)reg_getline_len(clnum) - ccol;
1488 1608
1489 if (cstrncmp(p + ccol, rex.input, &len) != 0) 1609 if (cstrncmp(p + ccol, rex.input, &len) != 0)
1490 return RA_NOMATCH; // doesn't match 1610 return RA_NOMATCH; // doesn't match
1491 if (bytelen != NULL) 1611 if (bytelen != NULL)
1492 *bytelen += len; 1612 *bytelen += len;
1743 char_u * 1863 char_u *
1744 regtilde(char_u *source, int magic) 1864 regtilde(char_u *source, int magic)
1745 { 1865 {
1746 char_u *newsub = source; 1866 char_u *newsub = source;
1747 char_u *p; 1867 char_u *p;
1868 size_t newsublen = 0;
1869 char_u tilde[3] = {'~', NUL, NUL};
1870 size_t tildelen = 1;
1871 int error = FALSE;
1872
1873 if (!magic)
1874 {
1875 tilde[0] = '\\';
1876 tilde[1] = '~';
1877 tilde[2] = NUL;
1878 tildelen = 2;
1879 }
1748 1880
1749 for (p = newsub; *p; ++p) 1881 for (p = newsub; *p; ++p)
1750 { 1882 {
1751 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) 1883 if (STRNCMP(p, tilde, tildelen) == 0)
1752 { 1884 {
1753 if (reg_prev_sub != NULL) 1885 size_t prefixlen = p - newsub; // not including the tilde
1886 char_u *postfix = p + tildelen;
1887 size_t postfixlen;
1888 size_t tmpsublen;
1889
1890 if (newsublen == 0)
1891 newsublen = STRLEN(newsub);
1892 newsublen -= tildelen;
1893 postfixlen = newsublen - prefixlen;
1894 tmpsublen = prefixlen + reg_prev_sublen + postfixlen;
1895
1896 if (tmpsublen > 0 && reg_prev_sub != NULL)
1754 { 1897 {
1755 // length = len(newsub) - 1 + len(prev_sub) + 1 1898 char_u *tmpsub;
1899
1756 // Avoid making the text longer than MAXCOL, it will cause 1900 // Avoid making the text longer than MAXCOL, it will cause
1757 // trouble at some point. 1901 // trouble at some point.
1758 size_t prevsublen = STRLEN(reg_prev_sub); 1902 if (tmpsublen > MAXCOL)
1759 size_t newsublen = STRLEN(newsub);
1760 if (prevsublen > MAXCOL || newsublen > MAXCOL
1761 || newsublen + prevsublen > MAXCOL)
1762 { 1903 {
1763 emsg(_(e_resulting_text_too_long)); 1904 emsg(_(e_resulting_text_too_long));
1905 error = TRUE;
1764 break; 1906 break;
1765 } 1907 }
1766 1908
1767 char_u *tmpsub = alloc(newsublen + prevsublen); 1909 tmpsub = alloc(tmpsublen + 1);
1768 if (tmpsub != NULL) 1910 if (tmpsub == NULL)
1769 { 1911 {
1770 // copy prefix 1912 emsg(_(e_out_of_memory));
1771 size_t prefixlen = p - newsub; // not including ~ 1913 error = TRUE;
1772 mch_memmove(tmpsub, newsub, prefixlen); 1914 break;
1773 // interpret tilde
1774 mch_memmove(tmpsub + prefixlen, reg_prev_sub,
1775 prevsublen);
1776 // copy postfix
1777 if (!magic)
1778 ++p; // back off backslash
1779 STRCPY(tmpsub + prefixlen + prevsublen, p + 1);
1780
1781 if (newsub != source) // allocated newsub before
1782 vim_free(newsub);
1783 newsub = tmpsub;
1784 p = newsub + prefixlen + prevsublen;
1785 } 1915 }
1916
1917 // copy prefix
1918 mch_memmove(tmpsub, newsub, prefixlen);
1919 // interpret tilde
1920 mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen);
1921 // copy postfix
1922 STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix);
1923
1924 if (newsub != source) // allocated newsub before
1925 vim_free(newsub);
1926 newsub = tmpsub;
1927 newsublen = tmpsublen;
1928 p = newsub + prefixlen + reg_prev_sublen;
1786 } 1929 }
1787 else if (magic)
1788 STRMOVE(p, p + 1); // remove '~'
1789 else 1930 else
1790 STRMOVE(p, p + 2); // remove '\~' 1931 mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL)
1932
1791 --p; 1933 --p;
1792 } 1934 }
1793 else 1935 else
1794 { 1936 {
1795 if (*p == '\\' && p[1]) // skip escaped characters 1937 if (*p == '\\' && p[1]) // skip escaped characters
1797 if (has_mbyte) 1939 if (has_mbyte)
1798 p += (*mb_ptr2len)(p) - 1; 1940 p += (*mb_ptr2len)(p) - 1;
1799 } 1941 }
1800 } 1942 }
1801 1943
1944 if (error)
1945 {
1946 if (newsub != source)
1947 vim_free(newsub);
1948 return source;
1949 }
1950
1802 // Store a copy of newsub in reg_prev_sub. It is always allocated, 1951 // Store a copy of newsub in reg_prev_sub. It is always allocated,
1803 // because recursive calls may make the returned string invalid. 1952 // because recursive calls may make the returned string invalid.
1804 vim_free(reg_prev_sub); 1953 // Only store it if there something to store.
1805 reg_prev_sub = vim_strsave(newsub); 1954 newsublen = p - newsub;
1955 if (newsublen == 0)
1956 VIM_CLEAR(reg_prev_sub);
1957 else
1958 {
1959 vim_free(reg_prev_sub);
1960 reg_prev_sub = vim_strnsave(newsub, newsublen);
1961 }
1962
1963 if (reg_prev_sub == NULL)
1964 reg_prev_sublen = 0;
1965 else
1966 reg_prev_sublen = newsublen;
1806 1967
1807 return newsub; 1968 return newsub;
1808 } 1969 }
1809
1810 #ifdef FEAT_EVAL
1811 static int can_f_submatch = FALSE; // TRUE when submatch() can be used
1812
1813 // These pointers are used for reg_submatch(). Needed for when the
1814 // substitution string is an expression that contains a call to substitute()
1815 // and submatch().
1816 typedef struct {
1817 regmatch_T *sm_match;
1818 regmmatch_T *sm_mmatch;
1819 linenr_T sm_firstlnum;
1820 linenr_T sm_maxline;
1821 int sm_line_lbr;
1822 } regsubmatch_T;
1823
1824 static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE
1825 #endif
1826 1970
1827 #ifdef FEAT_EVAL 1971 #ifdef FEAT_EVAL
1828 1972
1829 /* 1973 /*
1830 * Put the submatches in "argv[argskip]" which is a list passed into 1974 * Put the submatches in "argv[argskip]" which is a list passed into
2026 // resulting string is saved from the call with 2170 // resulting string is saved from the call with
2027 // "flags & REGSUB_COPY" == 0 to the call with 2171 // "flags & REGSUB_COPY" == 0 to the call with
2028 // "flags & REGSUB_COPY" != 0. 2172 // "flags & REGSUB_COPY" != 0.
2029 if (copy) 2173 if (copy)
2030 { 2174 {
2031 if (eval_result[nested] != NULL && 2175 if (eval_result[nested] != NULL)
2032 (int)STRLEN(eval_result[nested]) < destlen)
2033 { 2176 {
2034 STRCPY(dest, eval_result[nested]); 2177 int eval_len = (int)STRLEN(eval_result[nested]);
2035 dst += STRLEN(eval_result[nested]); 2178
2036 VIM_CLEAR(eval_result[nested]); 2179 if (eval_len < destlen)
2180 {
2181 STRCPY(dest, eval_result[nested]);
2182 dst += eval_len;
2183 VIM_CLEAR(eval_result[nested]);
2184 }
2037 } 2185 }
2038 } 2186 }
2039 else 2187 else
2040 { 2188 {
2041 int prev_can_f_submatch = can_f_submatch; 2189 int prev_can_f_submatch = can_f_submatch;
2323 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col; 2471 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col;
2324 if (rex.reg_mmatch->endpos[no].lnum == clnum) 2472 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2325 len = rex.reg_mmatch->endpos[no].col 2473 len = rex.reg_mmatch->endpos[no].col
2326 - rex.reg_mmatch->startpos[no].col; 2474 - rex.reg_mmatch->startpos[no].col;
2327 else 2475 else
2328 len = (int)STRLEN(s); 2476 len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col;
2329 } 2477 }
2330 } 2478 }
2331 else 2479 else
2332 { 2480 {
2333 s = rex.reg_match->startp[no]; 2481 s = rex.reg_match->startp[no];
2358 ++dst; 2506 ++dst;
2359 s = reg_getline(++clnum); 2507 s = reg_getline(++clnum);
2360 if (rex.reg_mmatch->endpos[no].lnum == clnum) 2508 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2361 len = rex.reg_mmatch->endpos[no].col; 2509 len = rex.reg_mmatch->endpos[no].col;
2362 else 2510 else
2363 len = (int)STRLEN(s); 2511 len = (int)reg_getline_len(clnum);
2364 } 2512 }
2365 else 2513 else
2366 break; 2514 break;
2367 } 2515 }
2368 else if (*s == NUL) // we hit NUL. 2516 else if (*s == NUL) // we hit NUL.
2463 exit: 2611 exit:
2464 return (int)((dst - dest) + 1); 2612 return (int)((dst - dest) + 1);
2465 } 2613 }
2466 2614
2467 #ifdef FEAT_EVAL 2615 #ifdef FEAT_EVAL
2468 /* 2616
2469 * Call reg_getline() with the line numbers from the submatch. If a
2470 * substitute() was used the reg_maxline and other values have been
2471 * overwritten.
2472 */
2473 static char_u * 2617 static char_u *
2474 reg_getline_submatch(linenr_T lnum) 2618 reg_getline_submatch(linenr_T lnum)
2475 { 2619 {
2476 char_u *s; 2620 char_u *line;
2477 linenr_T save_first = rex.reg_firstlnum; 2621
2478 linenr_T save_max = rex.reg_maxline; 2622 reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL);
2479 2623
2480 rex.reg_firstlnum = rsm.sm_firstlnum; 2624 return line;
2481 rex.reg_maxline = rsm.sm_maxline; 2625 }
2482 2626
2483 s = reg_getline(lnum); 2627 static colnr_T
2484 2628 reg_getline_submatch_len(linenr_T lnum)
2485 rex.reg_firstlnum = save_first; 2629 {
2486 rex.reg_maxline = save_max; 2630 colnr_T length;
2487 return s; 2631
2632 reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length);
2633
2634 return length;
2488 } 2635 }
2489 2636
2490 /* 2637 /*
2491 * Used for the submatch() function: get the string from the n'th submatch in 2638 * Used for the submatch() function: get the string from the n'th submatch in
2492 * allocated memory. 2639 * allocated memory.
2531 } 2678 }
2532 else 2679 else
2533 { 2680 {
2534 // Multiple lines: take start line from start col, middle 2681 // Multiple lines: take start line from start col, middle
2535 // lines completely and end line up to end col. 2682 // lines completely and end line up to end col.
2536 len = (int)STRLEN(s); 2683 len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col;
2537 if (round == 2) 2684 if (round == 2)
2538 { 2685 {
2539 STRCPY(retval, s); 2686 STRCPY(retval, s);
2540 retval[len] = '\n'; 2687 retval[len] = '\n';
2541 } 2688 }
2542 ++len; 2689 ++len;
2543 ++lnum; 2690 ++lnum;
2544 while (lnum < rsm.sm_mmatch->endpos[no].lnum) 2691 while (lnum < rsm.sm_mmatch->endpos[no].lnum)
2545 { 2692 {
2546 s = reg_getline_submatch(lnum++); 2693 s = reg_getline_submatch(lnum);
2547 if (round == 2) 2694 if (round == 2)
2548 STRCPY(retval + len, s); 2695 STRCPY(retval + len, s);
2549 len += (int)STRLEN(s); 2696 len += (int)reg_getline_submatch_len(lnum);
2550 if (round == 2) 2697 if (round == 2)
2551 retval[len] = '\n'; 2698 retval[len] = '\n';
2552 ++len; 2699 ++len;
2700 ++lnum;
2553 } 2701 }
2554 if (round == 2) 2702 if (round == 2)
2555 STRNCPY(retval + len, reg_getline_submatch(lnum), 2703 STRNCPY(retval + len, reg_getline_submatch(lnum),
2556 rsm.sm_mmatch->endpos[no].col); 2704 rsm.sm_mmatch->endpos[no].col);
2557 len += rsm.sm_mmatch->endpos[no].col; 2705 len += rsm.sm_mmatch->endpos[no].col;
2622 if (list_append_string(list, s, ecol - scol) == FAIL) 2770 if (list_append_string(list, s, ecol - scol) == FAIL)
2623 error = TRUE; 2771 error = TRUE;
2624 } 2772 }
2625 else 2773 else
2626 { 2774 {
2775 int max_lnum = elnum - slnum;
2776
2627 if (list_append_string(list, s, -1) == FAIL) 2777 if (list_append_string(list, s, -1) == FAIL)
2628 error = TRUE; 2778 error = TRUE;
2629 for (i = 1; i < elnum - slnum; i++) 2779 for (i = 1; i < max_lnum; i++)
2630 { 2780 {
2631 s = reg_getline_submatch(slnum + i); 2781 s = reg_getline_submatch(slnum + i);
2632 if (list_append_string(list, s, -1) == FAIL) 2782 if (list_append_string(list, s, -1) == FAIL)
2633 error = TRUE; 2783 error = TRUE;
2634 } 2784 }