Mercurial > vim
comparison src/regexp.c @ 35166:0b259135fb3a v9.1.0409
patch 9.1.0409: too many strlen() calls in the regexp engine
Commit: https://github.com/vim/vim/commit/82792db6315f7c7b0e299cdde1566f2932a463f8
Author: John Marriott <basilisk@internode.on.net>
Date: Sun May 12 00:07:17 2024 +0200
patch 9.1.0409: too many strlen() calls in the regexp engine
Problem: too many strlen() calls in the regexp engine
Solution: refactor code to retrieve strlen differently, make use
of bsearch() for getting the character class
(John Marriott)
closes: #14648
Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sun, 12 May 2024 00:15:04 +0200 |
parents | 3f8444c5a6f3 |
children | 4aad918ac113 |
comparison
equal
deleted
inserted
replaced
35165:d0498ef60b5b | 35166:0b259135fb3a |
---|---|
159 return MULTI_MULT; | 159 return MULTI_MULT; |
160 return NOT_MULTI; | 160 return NOT_MULTI; |
161 } | 161 } |
162 | 162 |
163 static char_u *reg_prev_sub = NULL; | 163 static char_u *reg_prev_sub = NULL; |
164 static size_t reg_prev_sublen = 0; | |
164 | 165 |
165 /* | 166 /* |
166 * REGEXP_INRANGE contains all characters which are always special in a [] | 167 * REGEXP_INRANGE contains all characters which are always special in a [] |
167 * range after '\'. | 168 * range after '\'. |
168 * REGEXP_ABBR contains all characters which act as abbreviations after '\'. | 169 * REGEXP_ABBR contains all characters which act as abbreviations after '\'. |
195 case 'b': return BS; | 196 case 'b': return BS; |
196 } | 197 } |
197 return c; | 198 return c; |
198 } | 199 } |
199 | 200 |
201 enum | |
202 { | |
203 CLASS_ALNUM = 0, | |
204 CLASS_ALPHA, | |
205 CLASS_BLANK, | |
206 CLASS_CNTRL, | |
207 CLASS_DIGIT, | |
208 CLASS_GRAPH, | |
209 CLASS_LOWER, | |
210 CLASS_PRINT, | |
211 CLASS_PUNCT, | |
212 CLASS_SPACE, | |
213 CLASS_UPPER, | |
214 CLASS_XDIGIT, | |
215 CLASS_TAB, | |
216 CLASS_RETURN, | |
217 CLASS_BACKSPACE, | |
218 CLASS_ESCAPE, | |
219 CLASS_IDENT, | |
220 CLASS_KEYWORD, | |
221 CLASS_FNAME, | |
222 CLASS_NONE = 99 | |
223 }; | |
224 | |
200 /* | 225 /* |
201 * Check for a character class name "[:name:]". "pp" points to the '['. | 226 * Check for a character class name "[:name:]". "pp" points to the '['. |
202 * Returns one of the CLASS_ items. CLASS_NONE means that no item was | 227 * Returns one of the CLASS_ items. CLASS_NONE means that no item was |
203 * recognized. Otherwise "pp" is advanced to after the item. | 228 * recognized. Otherwise "pp" is advanced to after the item. |
204 */ | 229 */ |
205 static int | 230 static int |
206 get_char_class(char_u **pp) | 231 get_char_class(char_u **pp) |
207 { | 232 { |
208 static const char *(class_names[]) = | 233 // must be sorted by the 'value' field because it is used by bsearch()! |
209 { | 234 static keyvalue_T char_class_tab[] = |
210 "alnum:]", | 235 { |
211 #define CLASS_ALNUM 0 | 236 KEYVALUE_ENTRY(CLASS_ALNUM, "alnum:]"), |
212 "alpha:]", | 237 KEYVALUE_ENTRY(CLASS_ALPHA, "alpha:]"), |
213 #define CLASS_ALPHA 1 | 238 KEYVALUE_ENTRY(CLASS_BACKSPACE, "backspace:]"), |
214 "blank:]", | 239 KEYVALUE_ENTRY(CLASS_BLANK, "blank:]"), |
215 #define CLASS_BLANK 2 | 240 KEYVALUE_ENTRY(CLASS_CNTRL, "cntrl:]"), |
216 "cntrl:]", | 241 KEYVALUE_ENTRY(CLASS_DIGIT, "digit:]"), |
217 #define CLASS_CNTRL 3 | 242 KEYVALUE_ENTRY(CLASS_ESCAPE, "escape:]"), |
218 "digit:]", | 243 KEYVALUE_ENTRY(CLASS_FNAME, "fname:]"), |
219 #define CLASS_DIGIT 4 | 244 KEYVALUE_ENTRY(CLASS_GRAPH, "graph:]"), |
220 "graph:]", | 245 KEYVALUE_ENTRY(CLASS_IDENT, "ident:]"), |
221 #define CLASS_GRAPH 5 | 246 KEYVALUE_ENTRY(CLASS_KEYWORD, "keyword:]"), |
222 "lower:]", | 247 KEYVALUE_ENTRY(CLASS_LOWER, "lower:]"), |
223 #define CLASS_LOWER 6 | 248 KEYVALUE_ENTRY(CLASS_PRINT, "print:]"), |
224 "print:]", | 249 KEYVALUE_ENTRY(CLASS_PUNCT, "punct:]"), |
225 #define CLASS_PRINT 7 | 250 KEYVALUE_ENTRY(CLASS_RETURN, "return:]"), |
226 "punct:]", | 251 KEYVALUE_ENTRY(CLASS_SPACE, "space:]"), |
227 #define CLASS_PUNCT 8 | 252 KEYVALUE_ENTRY(CLASS_TAB, "tab:]"), |
228 "space:]", | 253 KEYVALUE_ENTRY(CLASS_UPPER, "upper:]"), |
229 #define CLASS_SPACE 9 | 254 KEYVALUE_ENTRY(CLASS_XDIGIT, "xdigit:]") |
230 "upper:]", | |
231 #define CLASS_UPPER 10 | |
232 "xdigit:]", | |
233 #define CLASS_XDIGIT 11 | |
234 "tab:]", | |
235 #define CLASS_TAB 12 | |
236 "return:]", | |
237 #define CLASS_RETURN 13 | |
238 "backspace:]", | |
239 #define CLASS_BACKSPACE 14 | |
240 "escape:]", | |
241 #define CLASS_ESCAPE 15 | |
242 "ident:]", | |
243 #define CLASS_IDENT 16 | |
244 "keyword:]", | |
245 #define CLASS_KEYWORD 17 | |
246 "fname:]", | |
247 #define CLASS_FNAME 18 | |
248 }; | 255 }; |
249 #define CLASS_NONE 99 | 256 |
250 int i; | 257 // check that the value of "pp" has a chance of matching |
251 | 258 if ((*pp)[1] == ':' && ASCII_ISLOWER((*pp)[2]) |
252 if ((*pp)[1] == ':') | 259 && ASCII_ISLOWER((*pp)[3]) && ASCII_ISLOWER((*pp)[4])) |
253 { | 260 { |
254 for (i = 0; i < (int)ARRAY_LENGTH(class_names); ++i) | 261 keyvalue_T target; |
255 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0) | 262 keyvalue_T *entry; |
256 { | 263 // this function can be called repeatedly with the same value for "pp" |
257 *pp += STRLEN(class_names[i]) + 2; | 264 // so we cache the last found entry. |
258 return i; | 265 static keyvalue_T *last_entry = NULL; |
259 } | 266 |
267 target.key = 0; | |
268 target.value = (char *)*pp + 2; | |
269 target.length = 0; // not used, see cmp_keyvalue_value_n() | |
270 | |
271 if (last_entry != NULL && cmp_keyvalue_value_n(&target, last_entry) == 0) | |
272 entry = last_entry; | |
273 else | |
274 entry = (keyvalue_T *)bsearch(&target, &char_class_tab, | |
275 ARRAY_LENGTH(char_class_tab), | |
276 sizeof(char_class_tab[0]), cmp_keyvalue_value_n); | |
277 if (entry != NULL) | |
278 { | |
279 last_entry = entry; | |
280 *pp += entry->length + 2; | |
281 return entry->key; | |
282 } | |
260 } | 283 } |
261 return CLASS_NONE; | 284 return CLASS_NONE; |
262 } | 285 } |
263 | 286 |
264 /* | 287 /* |
617 } | 640 } |
618 else if (p[0] == '\\' && p[1] != NUL) | 641 else if (p[0] == '\\' && p[1] != NUL) |
619 { | 642 { |
620 if (dirc == '?' && newp != NULL && p[1] == '?') | 643 if (dirc == '?' && newp != NULL && p[1] == '?') |
621 { | 644 { |
645 size_t startplen; | |
646 | |
622 // change "\?" to "?", make a copy first. | 647 // change "\?" to "?", make a copy first. |
623 if (*newp == NULL) | 648 if (*newp == NULL) |
624 { | 649 { |
625 *newp = vim_strsave(startp); | 650 startplen = STRLEN(startp); |
651 *newp = vim_strnsave(startp, startplen); | |
626 if (*newp != NULL) | 652 if (*newp != NULL) |
627 p = *newp + (p - startp); | 653 p = *newp + (p - startp); |
628 } | 654 } |
629 if (dropped != NULL) | 655 if (dropped != NULL) |
630 ++*dropped; | 656 ++*dropped; |
631 if (*newp != NULL) | 657 if (*newp != NULL) |
632 STRMOVE(p, p + 1); | 658 mch_memmove(p, p + 1, (startplen - ((p + 1) - *newp)) + 1); |
633 else | 659 else |
634 ++p; | 660 ++p; |
635 } | 661 } |
636 else | 662 else |
637 ++p; // skip next character | 663 ++p; // skip next character |
1187 reg_iswordc(int c) | 1213 reg_iswordc(int c) |
1188 { | 1214 { |
1189 return vim_iswordc_buf(c, rex.reg_buf); | 1215 return vim_iswordc_buf(c, rex.reg_buf); |
1190 } | 1216 } |
1191 | 1217 |
1218 #ifdef FEAT_EVAL | |
1219 static int can_f_submatch = FALSE; // TRUE when submatch() can be used | |
1220 | |
1221 // This struct is used for reg_submatch(). Needed for when the | |
1222 // substitution string is an expression that contains a call to substitute() | |
1223 // and submatch(). | |
1224 typedef struct { | |
1225 regmatch_T *sm_match; | |
1226 regmmatch_T *sm_mmatch; | |
1227 linenr_T sm_firstlnum; | |
1228 linenr_T sm_maxline; | |
1229 int sm_line_lbr; | |
1230 } regsubmatch_T; | |
1231 | |
1232 static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE | |
1233 #endif | |
1234 | |
1235 typedef enum | |
1236 { | |
1237 RGLF_LINE = 0x01, | |
1238 RGLF_LENGTH = 0x02 | |
1239 #ifdef FEAT_EVAL | |
1240 , | |
1241 RGLF_SUBMATCH = 0x04 | |
1242 #endif | |
1243 } reg_getline_flags_T; | |
1244 | |
1245 // | |
1246 // common code for reg_getline(), reg_getline_len(), reg_getline_submatch() and | |
1247 // reg_getline_submatch_len(). | |
1248 // the flags argument (which is a bitmask) controls what info is to be returned and whether | |
1249 // or not submatch is in effect. | |
1250 // note: | |
1251 // submatch is available only if FEAT_EVAL is defined. | |
1252 static void | |
1253 reg_getline_common(linenr_T lnum, reg_getline_flags_T flags, char_u **line, colnr_T *length) | |
1254 { | |
1255 int get_line = flags & RGLF_LINE; | |
1256 int get_length = flags & RGLF_LENGTH; | |
1257 linenr_T firstlnum; | |
1258 linenr_T maxline; | |
1259 | |
1260 #ifdef FEAT_EVAL | |
1261 if (flags & RGLF_SUBMATCH) | |
1262 { | |
1263 firstlnum = rsm.sm_firstlnum + lnum; | |
1264 maxline = rsm.sm_maxline; | |
1265 } | |
1266 else | |
1267 #endif | |
1268 { | |
1269 firstlnum = rex.reg_firstlnum + lnum; | |
1270 maxline = rex.reg_maxline; | |
1271 } | |
1272 | |
1273 // when looking behind for a match/no-match lnum is negative. but we | |
1274 // can't go before line 1. | |
1275 if (firstlnum < 1) | |
1276 { | |
1277 if (get_line) | |
1278 *line = NULL; | |
1279 if (get_length) | |
1280 *length = 0; | |
1281 | |
1282 return; | |
1283 } | |
1284 | |
1285 if (lnum > maxline) | |
1286 { | |
1287 // must have matched the "\n" in the last line. | |
1288 if (get_line) | |
1289 *line = (char_u *)""; | |
1290 if (get_length) | |
1291 *length = 0; | |
1292 | |
1293 return; | |
1294 } | |
1295 | |
1296 if (get_line) | |
1297 *line = ml_get_buf(rex.reg_buf, firstlnum, FALSE); | |
1298 if (get_length) | |
1299 *length = ml_get_buf_len(rex.reg_buf, firstlnum); | |
1300 } | |
1301 | |
1192 /* | 1302 /* |
1193 * Get pointer to the line "lnum", which is relative to "reg_firstlnum". | 1303 * Get pointer to the line "lnum", which is relative to "reg_firstlnum". |
1194 */ | 1304 */ |
1195 static char_u * | 1305 static char_u * |
1196 reg_getline(linenr_T lnum) | 1306 reg_getline(linenr_T lnum) |
1197 { | 1307 { |
1198 // when looking behind for a match/no-match lnum is negative. But we | 1308 char_u *line; |
1199 // can't go before line 1 | 1309 |
1200 if (rex.reg_firstlnum + lnum < 1) | 1310 reg_getline_common(lnum, RGLF_LINE, &line, NULL); |
1201 return NULL; | 1311 |
1202 if (lnum > rex.reg_maxline) | 1312 return line; |
1203 // Must have matched the "\n" in the last line. | 1313 } |
1204 return (char_u *)""; | 1314 |
1205 return ml_get_buf(rex.reg_buf, rex.reg_firstlnum + lnum, FALSE); | 1315 /* |
1316 * Get length of line "lnum", which is relative to "reg_firstlnum". | |
1317 */ | |
1318 static colnr_T | |
1319 reg_getline_len(linenr_T lnum) | |
1320 { | |
1321 colnr_T length; | |
1322 | |
1323 reg_getline_common(lnum, RGLF_LENGTH, NULL, &length); | |
1324 | |
1325 return length; | |
1206 } | 1326 } |
1207 | 1327 |
1208 #ifdef FEAT_SYN_HL | 1328 #ifdef FEAT_SYN_HL |
1209 static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning | 1329 static char_u *reg_startzp[NSUBEXP]; // Workspace to mark beginning |
1210 static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches | 1330 static char_u *reg_endzp[NSUBEXP]; // and end of \z(...\) matches |
1482 // Get the line to compare with. | 1602 // Get the line to compare with. |
1483 p = reg_getline(clnum); | 1603 p = reg_getline(clnum); |
1484 if (clnum == end_lnum) | 1604 if (clnum == end_lnum) |
1485 len = end_col - ccol; | 1605 len = end_col - ccol; |
1486 else | 1606 else |
1487 len = (int)STRLEN(p + ccol); | 1607 len = (int)reg_getline_len(clnum) - ccol; |
1488 | 1608 |
1489 if (cstrncmp(p + ccol, rex.input, &len) != 0) | 1609 if (cstrncmp(p + ccol, rex.input, &len) != 0) |
1490 return RA_NOMATCH; // doesn't match | 1610 return RA_NOMATCH; // doesn't match |
1491 if (bytelen != NULL) | 1611 if (bytelen != NULL) |
1492 *bytelen += len; | 1612 *bytelen += len; |
1743 char_u * | 1863 char_u * |
1744 regtilde(char_u *source, int magic) | 1864 regtilde(char_u *source, int magic) |
1745 { | 1865 { |
1746 char_u *newsub = source; | 1866 char_u *newsub = source; |
1747 char_u *p; | 1867 char_u *p; |
1868 size_t newsublen = 0; | |
1869 char_u tilde[3] = {'~', NUL, NUL}; | |
1870 size_t tildelen = 1; | |
1871 int error = FALSE; | |
1872 | |
1873 if (!magic) | |
1874 { | |
1875 tilde[0] = '\\'; | |
1876 tilde[1] = '~'; | |
1877 tilde[2] = NUL; | |
1878 tildelen = 2; | |
1879 } | |
1748 | 1880 |
1749 for (p = newsub; *p; ++p) | 1881 for (p = newsub; *p; ++p) |
1750 { | 1882 { |
1751 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) | 1883 if (STRNCMP(p, tilde, tildelen) == 0) |
1752 { | 1884 { |
1753 if (reg_prev_sub != NULL) | 1885 size_t prefixlen = p - newsub; // not including the tilde |
1886 char_u *postfix = p + tildelen; | |
1887 size_t postfixlen; | |
1888 size_t tmpsublen; | |
1889 | |
1890 if (newsublen == 0) | |
1891 newsublen = STRLEN(newsub); | |
1892 newsublen -= tildelen; | |
1893 postfixlen = newsublen - prefixlen; | |
1894 tmpsublen = prefixlen + reg_prev_sublen + postfixlen; | |
1895 | |
1896 if (tmpsublen > 0 && reg_prev_sub != NULL) | |
1754 { | 1897 { |
1755 // length = len(newsub) - 1 + len(prev_sub) + 1 | 1898 char_u *tmpsub; |
1899 | |
1756 // Avoid making the text longer than MAXCOL, it will cause | 1900 // Avoid making the text longer than MAXCOL, it will cause |
1757 // trouble at some point. | 1901 // trouble at some point. |
1758 size_t prevsublen = STRLEN(reg_prev_sub); | 1902 if (tmpsublen > MAXCOL) |
1759 size_t newsublen = STRLEN(newsub); | |
1760 if (prevsublen > MAXCOL || newsublen > MAXCOL | |
1761 || newsublen + prevsublen > MAXCOL) | |
1762 { | 1903 { |
1763 emsg(_(e_resulting_text_too_long)); | 1904 emsg(_(e_resulting_text_too_long)); |
1905 error = TRUE; | |
1764 break; | 1906 break; |
1765 } | 1907 } |
1766 | 1908 |
1767 char_u *tmpsub = alloc(newsublen + prevsublen); | 1909 tmpsub = alloc(tmpsublen + 1); |
1768 if (tmpsub != NULL) | 1910 if (tmpsub == NULL) |
1769 { | 1911 { |
1770 // copy prefix | 1912 emsg(_(e_out_of_memory)); |
1771 size_t prefixlen = p - newsub; // not including ~ | 1913 error = TRUE; |
1772 mch_memmove(tmpsub, newsub, prefixlen); | 1914 break; |
1773 // interpret tilde | |
1774 mch_memmove(tmpsub + prefixlen, reg_prev_sub, | |
1775 prevsublen); | |
1776 // copy postfix | |
1777 if (!magic) | |
1778 ++p; // back off backslash | |
1779 STRCPY(tmpsub + prefixlen + prevsublen, p + 1); | |
1780 | |
1781 if (newsub != source) // allocated newsub before | |
1782 vim_free(newsub); | |
1783 newsub = tmpsub; | |
1784 p = newsub + prefixlen + prevsublen; | |
1785 } | 1915 } |
1916 | |
1917 // copy prefix | |
1918 mch_memmove(tmpsub, newsub, prefixlen); | |
1919 // interpret tilde | |
1920 mch_memmove(tmpsub + prefixlen, reg_prev_sub, reg_prev_sublen); | |
1921 // copy postfix | |
1922 STRCPY(tmpsub + prefixlen + reg_prev_sublen, postfix); | |
1923 | |
1924 if (newsub != source) // allocated newsub before | |
1925 vim_free(newsub); | |
1926 newsub = tmpsub; | |
1927 newsublen = tmpsublen; | |
1928 p = newsub + prefixlen + reg_prev_sublen; | |
1786 } | 1929 } |
1787 else if (magic) | |
1788 STRMOVE(p, p + 1); // remove '~' | |
1789 else | 1930 else |
1790 STRMOVE(p, p + 2); // remove '\~' | 1931 mch_memmove(p, postfix, postfixlen + 1); // remove the tilde (+1 for the NUL) |
1932 | |
1791 --p; | 1933 --p; |
1792 } | 1934 } |
1793 else | 1935 else |
1794 { | 1936 { |
1795 if (*p == '\\' && p[1]) // skip escaped characters | 1937 if (*p == '\\' && p[1]) // skip escaped characters |
1797 if (has_mbyte) | 1939 if (has_mbyte) |
1798 p += (*mb_ptr2len)(p) - 1; | 1940 p += (*mb_ptr2len)(p) - 1; |
1799 } | 1941 } |
1800 } | 1942 } |
1801 | 1943 |
1944 if (error) | |
1945 { | |
1946 if (newsub != source) | |
1947 vim_free(newsub); | |
1948 return source; | |
1949 } | |
1950 | |
1802 // Store a copy of newsub in reg_prev_sub. It is always allocated, | 1951 // Store a copy of newsub in reg_prev_sub. It is always allocated, |
1803 // because recursive calls may make the returned string invalid. | 1952 // because recursive calls may make the returned string invalid. |
1804 vim_free(reg_prev_sub); | 1953 // Only store it if there something to store. |
1805 reg_prev_sub = vim_strsave(newsub); | 1954 newsublen = p - newsub; |
1955 if (newsublen == 0) | |
1956 VIM_CLEAR(reg_prev_sub); | |
1957 else | |
1958 { | |
1959 vim_free(reg_prev_sub); | |
1960 reg_prev_sub = vim_strnsave(newsub, newsublen); | |
1961 } | |
1962 | |
1963 if (reg_prev_sub == NULL) | |
1964 reg_prev_sublen = 0; | |
1965 else | |
1966 reg_prev_sublen = newsublen; | |
1806 | 1967 |
1807 return newsub; | 1968 return newsub; |
1808 } | 1969 } |
1809 | |
1810 #ifdef FEAT_EVAL | |
1811 static int can_f_submatch = FALSE; // TRUE when submatch() can be used | |
1812 | |
1813 // These pointers are used for reg_submatch(). Needed for when the | |
1814 // substitution string is an expression that contains a call to substitute() | |
1815 // and submatch(). | |
1816 typedef struct { | |
1817 regmatch_T *sm_match; | |
1818 regmmatch_T *sm_mmatch; | |
1819 linenr_T sm_firstlnum; | |
1820 linenr_T sm_maxline; | |
1821 int sm_line_lbr; | |
1822 } regsubmatch_T; | |
1823 | |
1824 static regsubmatch_T rsm; // can only be used when can_f_submatch is TRUE | |
1825 #endif | |
1826 | 1970 |
1827 #ifdef FEAT_EVAL | 1971 #ifdef FEAT_EVAL |
1828 | 1972 |
1829 /* | 1973 /* |
1830 * Put the submatches in "argv[argskip]" which is a list passed into | 1974 * Put the submatches in "argv[argskip]" which is a list passed into |
2026 // resulting string is saved from the call with | 2170 // resulting string is saved from the call with |
2027 // "flags & REGSUB_COPY" == 0 to the call with | 2171 // "flags & REGSUB_COPY" == 0 to the call with |
2028 // "flags & REGSUB_COPY" != 0. | 2172 // "flags & REGSUB_COPY" != 0. |
2029 if (copy) | 2173 if (copy) |
2030 { | 2174 { |
2031 if (eval_result[nested] != NULL && | 2175 if (eval_result[nested] != NULL) |
2032 (int)STRLEN(eval_result[nested]) < destlen) | |
2033 { | 2176 { |
2034 STRCPY(dest, eval_result[nested]); | 2177 int eval_len = (int)STRLEN(eval_result[nested]); |
2035 dst += STRLEN(eval_result[nested]); | 2178 |
2036 VIM_CLEAR(eval_result[nested]); | 2179 if (eval_len < destlen) |
2180 { | |
2181 STRCPY(dest, eval_result[nested]); | |
2182 dst += eval_len; | |
2183 VIM_CLEAR(eval_result[nested]); | |
2184 } | |
2037 } | 2185 } |
2038 } | 2186 } |
2039 else | 2187 else |
2040 { | 2188 { |
2041 int prev_can_f_submatch = can_f_submatch; | 2189 int prev_can_f_submatch = can_f_submatch; |
2323 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col; | 2471 s = reg_getline(clnum) + rex.reg_mmatch->startpos[no].col; |
2324 if (rex.reg_mmatch->endpos[no].lnum == clnum) | 2472 if (rex.reg_mmatch->endpos[no].lnum == clnum) |
2325 len = rex.reg_mmatch->endpos[no].col | 2473 len = rex.reg_mmatch->endpos[no].col |
2326 - rex.reg_mmatch->startpos[no].col; | 2474 - rex.reg_mmatch->startpos[no].col; |
2327 else | 2475 else |
2328 len = (int)STRLEN(s); | 2476 len = (int)reg_getline_len(clnum) - rex.reg_mmatch->startpos[no].col; |
2329 } | 2477 } |
2330 } | 2478 } |
2331 else | 2479 else |
2332 { | 2480 { |
2333 s = rex.reg_match->startp[no]; | 2481 s = rex.reg_match->startp[no]; |
2358 ++dst; | 2506 ++dst; |
2359 s = reg_getline(++clnum); | 2507 s = reg_getline(++clnum); |
2360 if (rex.reg_mmatch->endpos[no].lnum == clnum) | 2508 if (rex.reg_mmatch->endpos[no].lnum == clnum) |
2361 len = rex.reg_mmatch->endpos[no].col; | 2509 len = rex.reg_mmatch->endpos[no].col; |
2362 else | 2510 else |
2363 len = (int)STRLEN(s); | 2511 len = (int)reg_getline_len(clnum); |
2364 } | 2512 } |
2365 else | 2513 else |
2366 break; | 2514 break; |
2367 } | 2515 } |
2368 else if (*s == NUL) // we hit NUL. | 2516 else if (*s == NUL) // we hit NUL. |
2463 exit: | 2611 exit: |
2464 return (int)((dst - dest) + 1); | 2612 return (int)((dst - dest) + 1); |
2465 } | 2613 } |
2466 | 2614 |
2467 #ifdef FEAT_EVAL | 2615 #ifdef FEAT_EVAL |
2468 /* | 2616 |
2469 * Call reg_getline() with the line numbers from the submatch. If a | |
2470 * substitute() was used the reg_maxline and other values have been | |
2471 * overwritten. | |
2472 */ | |
2473 static char_u * | 2617 static char_u * |
2474 reg_getline_submatch(linenr_T lnum) | 2618 reg_getline_submatch(linenr_T lnum) |
2475 { | 2619 { |
2476 char_u *s; | 2620 char_u *line; |
2477 linenr_T save_first = rex.reg_firstlnum; | 2621 |
2478 linenr_T save_max = rex.reg_maxline; | 2622 reg_getline_common(lnum, RGLF_LINE | RGLF_SUBMATCH, &line, NULL); |
2479 | 2623 |
2480 rex.reg_firstlnum = rsm.sm_firstlnum; | 2624 return line; |
2481 rex.reg_maxline = rsm.sm_maxline; | 2625 } |
2482 | 2626 |
2483 s = reg_getline(lnum); | 2627 static colnr_T |
2484 | 2628 reg_getline_submatch_len(linenr_T lnum) |
2485 rex.reg_firstlnum = save_first; | 2629 { |
2486 rex.reg_maxline = save_max; | 2630 colnr_T length; |
2487 return s; | 2631 |
2632 reg_getline_common(lnum, RGLF_LENGTH | RGLF_SUBMATCH, NULL, &length); | |
2633 | |
2634 return length; | |
2488 } | 2635 } |
2489 | 2636 |
2490 /* | 2637 /* |
2491 * Used for the submatch() function: get the string from the n'th submatch in | 2638 * Used for the submatch() function: get the string from the n'th submatch in |
2492 * allocated memory. | 2639 * allocated memory. |
2531 } | 2678 } |
2532 else | 2679 else |
2533 { | 2680 { |
2534 // Multiple lines: take start line from start col, middle | 2681 // Multiple lines: take start line from start col, middle |
2535 // lines completely and end line up to end col. | 2682 // lines completely and end line up to end col. |
2536 len = (int)STRLEN(s); | 2683 len = (int)reg_getline_submatch_len(lnum) - rsm.sm_mmatch->startpos[no].col; |
2537 if (round == 2) | 2684 if (round == 2) |
2538 { | 2685 { |
2539 STRCPY(retval, s); | 2686 STRCPY(retval, s); |
2540 retval[len] = '\n'; | 2687 retval[len] = '\n'; |
2541 } | 2688 } |
2542 ++len; | 2689 ++len; |
2543 ++lnum; | 2690 ++lnum; |
2544 while (lnum < rsm.sm_mmatch->endpos[no].lnum) | 2691 while (lnum < rsm.sm_mmatch->endpos[no].lnum) |
2545 { | 2692 { |
2546 s = reg_getline_submatch(lnum++); | 2693 s = reg_getline_submatch(lnum); |
2547 if (round == 2) | 2694 if (round == 2) |
2548 STRCPY(retval + len, s); | 2695 STRCPY(retval + len, s); |
2549 len += (int)STRLEN(s); | 2696 len += (int)reg_getline_submatch_len(lnum); |
2550 if (round == 2) | 2697 if (round == 2) |
2551 retval[len] = '\n'; | 2698 retval[len] = '\n'; |
2552 ++len; | 2699 ++len; |
2700 ++lnum; | |
2553 } | 2701 } |
2554 if (round == 2) | 2702 if (round == 2) |
2555 STRNCPY(retval + len, reg_getline_submatch(lnum), | 2703 STRNCPY(retval + len, reg_getline_submatch(lnum), |
2556 rsm.sm_mmatch->endpos[no].col); | 2704 rsm.sm_mmatch->endpos[no].col); |
2557 len += rsm.sm_mmatch->endpos[no].col; | 2705 len += rsm.sm_mmatch->endpos[no].col; |
2622 if (list_append_string(list, s, ecol - scol) == FAIL) | 2770 if (list_append_string(list, s, ecol - scol) == FAIL) |
2623 error = TRUE; | 2771 error = TRUE; |
2624 } | 2772 } |
2625 else | 2773 else |
2626 { | 2774 { |
2775 int max_lnum = elnum - slnum; | |
2776 | |
2627 if (list_append_string(list, s, -1) == FAIL) | 2777 if (list_append_string(list, s, -1) == FAIL) |
2628 error = TRUE; | 2778 error = TRUE; |
2629 for (i = 1; i < elnum - slnum; i++) | 2779 for (i = 1; i < max_lnum; i++) |
2630 { | 2780 { |
2631 s = reg_getline_submatch(slnum + i); | 2781 s = reg_getline_submatch(slnum + i); |
2632 if (list_append_string(list, s, -1) == FAIL) | 2782 if (list_append_string(list, s, -1) == FAIL) |
2633 error = TRUE; | 2783 error = TRUE; |
2634 } | 2784 } |