comparison src/regexp.c @ 29048:c98fc7a4dde4 v8.2.5046

patch 8.2.5046: vim_regsub() can overwrite the destination Commit: https://github.com/vim/vim/commit/4aaf3e7f4db599932d01d87e5bbcdc342cccee27 Author: Bram Moolenaar <Bram@vim.org> Date: Mon May 30 20:58:55 2022 +0100 patch 8.2.5046: vim_regsub() can overwrite the destination Problem: vim_regsub() can overwrite the destination. Solution: Pass the destination length, give an error when it doesn't fit.
author Bram Moolenaar <Bram@vim.org>
date Mon, 30 May 2022 22:00:03 +0200
parents bfd8e25fa207
children b90bca860b5a
comparison
equal deleted inserted replaced
29047:fd46c946d9bf 29048:c98fc7a4dde4
1647 * This is impossible, so we declare a pointer to a function returning a 1647 * This is impossible, so we declare a pointer to a function returning a
1648 * void pointer. This should work for all compilers. 1648 * void pointer. This should work for all compilers.
1649 */ 1649 */
1650 typedef void (*(*fptr_T)(int *, int)); 1650 typedef void (*(*fptr_T)(int *, int));
1651 1651
1652 static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash); 1652 static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
1653 1653
1654 static fptr_T 1654 static fptr_T
1655 do_upper(int *d, int c) 1655 do_upper(int *d, int c)
1656 { 1656 {
1657 *d = MB_TOUPPER(c); 1657 *d = MB_TOUPPER(c);
1820 1820
1821 /* 1821 /*
1822 * vim_regsub() - perform substitutions after a vim_regexec() or 1822 * vim_regsub() - perform substitutions after a vim_regexec() or
1823 * vim_regexec_multi() match. 1823 * vim_regexec_multi() match.
1824 * 1824 *
1825 * If "copy" is TRUE really copy into "dest". 1825 * If "flags" has REGSUB_COPY really copy into "dest[destlen]".
1826 * If "copy" is FALSE nothing is copied, this is just to find out the length 1826 * Oterwise nothing is copied, only compue the length of the result.
1827 * of the result.
1828 * 1827 *
1829 * If "backslash" is TRUE, a backslash will be removed later, need to double 1828 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set.
1830 * them to keep them, and insert a backslash before a CR to avoid it being 1829 *
1831 * replaced with a line break later. 1830 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to
1831 * double them to keep them, and insert a backslash before a CR to avoid it
1832 * being replaced with a line break later.
1832 * 1833 *
1833 * Note: The matched text must not change between the call of 1834 * Note: The matched text must not change between the call of
1834 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back 1835 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
1835 * references invalid! 1836 * references invalid!
1836 * 1837 *
1840 vim_regsub( 1841 vim_regsub(
1841 regmatch_T *rmp, 1842 regmatch_T *rmp,
1842 char_u *source, 1843 char_u *source,
1843 typval_T *expr, 1844 typval_T *expr,
1844 char_u *dest, 1845 char_u *dest,
1845 int copy, 1846 int destlen,
1846 int magic, 1847 int flags)
1847 int backslash)
1848 { 1848 {
1849 int result; 1849 int result;
1850 regexec_T rex_save; 1850 regexec_T rex_save;
1851 int rex_in_use_save = rex_in_use; 1851 int rex_in_use_save = rex_in_use;
1852 1852
1858 rex.reg_match = rmp; 1858 rex.reg_match = rmp;
1859 rex.reg_mmatch = NULL; 1859 rex.reg_mmatch = NULL;
1860 rex.reg_maxline = 0; 1860 rex.reg_maxline = 0;
1861 rex.reg_buf = curbuf; 1861 rex.reg_buf = curbuf;
1862 rex.reg_line_lbr = TRUE; 1862 rex.reg_line_lbr = TRUE;
1863 result = vim_regsub_both(source, expr, dest, copy, magic, backslash); 1863 result = vim_regsub_both(source, expr, dest, destlen, flags);
1864 1864
1865 rex_in_use = rex_in_use_save; 1865 rex_in_use = rex_in_use_save;
1866 if (rex_in_use) 1866 if (rex_in_use)
1867 rex = rex_save; 1867 rex = rex_save;
1868 1868
1873 vim_regsub_multi( 1873 vim_regsub_multi(
1874 regmmatch_T *rmp, 1874 regmmatch_T *rmp,
1875 linenr_T lnum, 1875 linenr_T lnum,
1876 char_u *source, 1876 char_u *source,
1877 char_u *dest, 1877 char_u *dest,
1878 int copy, 1878 int destlen,
1879 int magic, 1879 int flags)
1880 int backslash)
1881 { 1880 {
1882 int result; 1881 int result;
1883 regexec_T rex_save; 1882 regexec_T rex_save;
1884 int rex_in_use_save = rex_in_use; 1883 int rex_in_use_save = rex_in_use;
1885 1884
1892 rex.reg_mmatch = rmp; 1891 rex.reg_mmatch = rmp;
1893 rex.reg_buf = curbuf; // always works on the current buffer! 1892 rex.reg_buf = curbuf; // always works on the current buffer!
1894 rex.reg_firstlnum = lnum; 1893 rex.reg_firstlnum = lnum;
1895 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum; 1894 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum;
1896 rex.reg_line_lbr = FALSE; 1895 rex.reg_line_lbr = FALSE;
1897 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash); 1896 result = vim_regsub_both(source, NULL, dest, destlen, flags);
1898 1897
1899 rex_in_use = rex_in_use_save; 1898 rex_in_use = rex_in_use_save;
1900 if (rex_in_use) 1899 if (rex_in_use)
1901 rex = rex_save; 1900 rex = rex_save;
1902 1901
1906 static int 1905 static int
1907 vim_regsub_both( 1906 vim_regsub_both(
1908 char_u *source, 1907 char_u *source,
1909 typval_T *expr, 1908 typval_T *expr,
1910 char_u *dest, 1909 char_u *dest,
1911 int copy, 1910 int destlen,
1912 int magic, 1911 int flags)
1913 int backslash)
1914 { 1912 {
1915 char_u *src; 1913 char_u *src;
1916 char_u *dst; 1914 char_u *dst;
1917 char_u *s; 1915 char_u *s;
1918 int c; 1916 int c;
1923 linenr_T clnum = 0; // init for GCC 1921 linenr_T clnum = 0; // init for GCC
1924 int len = 0; // init for GCC 1922 int len = 0; // init for GCC
1925 #ifdef FEAT_EVAL 1923 #ifdef FEAT_EVAL
1926 static char_u *eval_result = NULL; 1924 static char_u *eval_result = NULL;
1927 #endif 1925 #endif
1926 int copy = flags & REGSUB_COPY;
1928 1927
1929 // Be paranoid... 1928 // Be paranoid...
1930 if ((source == NULL && expr == NULL) || dest == NULL) 1929 if ((source == NULL && expr == NULL) || dest == NULL)
1931 { 1930 {
1932 emsg(_(e_null_argument)); 1931 emsg(_(e_null_argument));
1943 if (expr != NULL || (source[0] == '\\' && source[1] == '=')) 1942 if (expr != NULL || (source[0] == '\\' && source[1] == '='))
1944 { 1943 {
1945 #ifdef FEAT_EVAL 1944 #ifdef FEAT_EVAL
1946 // To make sure that the length doesn't change between checking the 1945 // To make sure that the length doesn't change between checking the
1947 // length and copying the string, and to speed up things, the 1946 // length and copying the string, and to speed up things, the
1948 // resulting string is saved from the call with "copy" == FALSE to the 1947 // resulting string is saved from the call with "flags & REGSUB_COPY"
1949 // call with "copy" == TRUE. 1948 // == 0 to the // call with "flags & REGSUB_COPY" != 0.
1950 if (copy) 1949 if (copy)
1951 { 1950 {
1952 if (eval_result != NULL) 1951 if (eval_result != NULL)
1953 { 1952 {
1954 STRCPY(dest, eval_result); 1953 STRCPY(dest, eval_result);
2052 if (*s == NL && !rsm.sm_line_lbr) 2051 if (*s == NL && !rsm.sm_line_lbr)
2053 *s = CAR; 2052 *s = CAR;
2054 had_backslash = TRUE; 2053 had_backslash = TRUE;
2055 } 2054 }
2056 } 2055 }
2057 if (had_backslash && backslash) 2056 if (had_backslash && (flags & REGSUB_BACKSLASH))
2058 { 2057 {
2059 // Backslashes will be consumed, need to double them. 2058 // Backslashes will be consumed, need to double them.
2060 s = vim_strsave_escaped(eval_result, (char_u *)"\\"); 2059 s = vim_strsave_escaped(eval_result, (char_u *)"\\");
2061 if (s != NULL) 2060 if (s != NULL)
2062 { 2061 {
2075 #endif 2074 #endif
2076 } 2075 }
2077 else 2076 else
2078 while ((c = *src++) != NUL) 2077 while ((c = *src++) != NUL)
2079 { 2078 {
2080 if (c == '&' && magic) 2079 if (c == '&' && (flags & REGSUB_MAGIC))
2081 no = 0; 2080 no = 0;
2082 else if (c == '\\' && *src != NUL) 2081 else if (c == '\\' && *src != NUL)
2083 { 2082 {
2084 if (*src == '&' && !magic) 2083 if (*src == '&' && !(flags & REGSUB_MAGIC))
2085 { 2084 {
2086 ++src; 2085 ++src;
2087 no = 0; 2086 no = 0;
2088 } 2087 }
2089 else if ('0' <= *src && *src <= '9') 2088 else if ('0' <= *src && *src <= '9')
2113 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) 2112 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
2114 { 2113 {
2115 // Copy a special key as-is. 2114 // Copy a special key as-is.
2116 if (copy) 2115 if (copy)
2117 { 2116 {
2117 if (dst + 3 > dest + destlen)
2118 {
2119 iemsg("vim_regsub_both(): not enough space");
2120 return 0;
2121 }
2118 *dst++ = c; 2122 *dst++ = c;
2119 *dst++ = *src++; 2123 *dst++ = *src++;
2120 *dst++ = *src++; 2124 *dst++ = *src++;
2121 } 2125 }
2122 else 2126 else
2139 // case 'e': c = ESC; ++src; break; 2143 // case 'e': c = ESC; ++src; break;
2140 case 'b': c = Ctrl_H; ++src; break; 2144 case 'b': c = Ctrl_H; ++src; break;
2141 2145
2142 // If "backslash" is TRUE the backslash will be removed 2146 // If "backslash" is TRUE the backslash will be removed
2143 // later. Used to insert a literal CR. 2147 // later. Used to insert a literal CR.
2144 default: if (backslash) 2148 default: if (flags & REGSUB_BACKSLASH)
2145 { 2149 {
2146 if (copy) 2150 if (copy)
2151 {
2152 if (dst + 1 > dest + destlen)
2153 {
2154 iemsg("vim_regsub_both(): not enough space");
2155 return 0;
2156 }
2147 *dst = '\\'; 2157 *dst = '\\';
2158 }
2148 ++dst; 2159 ++dst;
2149 } 2160 }
2150 c = *src++; 2161 c = *src++;
2151 } 2162 }
2152 } 2163 }
2164 cc = c; 2175 cc = c;
2165 2176
2166 if (has_mbyte) 2177 if (has_mbyte)
2167 { 2178 {
2168 int totlen = mb_ptr2len(src - 1); 2179 int totlen = mb_ptr2len(src - 1);
2180 int charlen = mb_char2len(cc);
2169 2181
2170 if (copy) 2182 if (copy)
2183 {
2184 if (dst + charlen > dest + destlen)
2185 {
2186 iemsg("vim_regsub_both(): not enough space");
2187 return 0;
2188 }
2171 mb_char2bytes(cc, dst); 2189 mb_char2bytes(cc, dst);
2172 dst += mb_char2len(cc) - 1; 2190 }
2191 dst += charlen - 1;
2173 if (enc_utf8) 2192 if (enc_utf8)
2174 { 2193 {
2175 int clen = utf_ptr2len(src - 1); 2194 int clen = utf_ptr2len(src - 1);
2176 2195
2177 // If the character length is shorter than "totlen", there 2196 // If the character length is shorter than "totlen", there
2178 // are composing characters; copy them as-is. 2197 // are composing characters; copy them as-is.
2179 if (clen < totlen) 2198 if (clen < totlen)
2180 { 2199 {
2181 if (copy) 2200 if (copy)
2201 {
2202 if (dst + totlen - clen > dest + destlen)
2203 {
2204 iemsg("vim_regsub_both(): not enough space");
2205 return 0;
2206 }
2182 mch_memmove(dst + 1, src - 1 + clen, 2207 mch_memmove(dst + 1, src - 1 + clen,
2183 (size_t)(totlen - clen)); 2208 (size_t)(totlen - clen));
2209 }
2184 dst += totlen - clen; 2210 dst += totlen - clen;
2185 } 2211 }
2186 } 2212 }
2187 src += totlen - 1; 2213 src += totlen - 1;
2188 } 2214 }
2189 else if (copy) 2215 else if (copy)
2190 *dst = cc; 2216 {
2217 if (dst + 1 > dest + destlen)
2218 {
2219 iemsg("vim_regsub_both(): not enough space");
2220 return 0;
2221 }
2222 *dst = cc;
2223 }
2191 dst++; 2224 dst++;
2192 } 2225 }
2193 else 2226 else
2194 { 2227 {
2195 if (REG_MULTI) 2228 if (REG_MULTI)
2224 if (REG_MULTI) 2257 if (REG_MULTI)
2225 { 2258 {
2226 if (rex.reg_mmatch->endpos[no].lnum == clnum) 2259 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2227 break; 2260 break;
2228 if (copy) 2261 if (copy)
2262 {
2263 if (dst + 1 > dest + destlen)
2264 {
2265 iemsg("vim_regsub_both(): not enough space");
2266 return 0;
2267 }
2229 *dst = CAR; 2268 *dst = CAR;
2269 }
2230 ++dst; 2270 ++dst;
2231 s = reg_getline(++clnum); 2271 s = reg_getline(++clnum);
2232 if (rex.reg_mmatch->endpos[no].lnum == clnum) 2272 if (rex.reg_mmatch->endpos[no].lnum == clnum)
2233 len = rex.reg_mmatch->endpos[no].col; 2273 len = rex.reg_mmatch->endpos[no].col;
2234 else 2274 else
2243 iemsg(_(e_damaged_match_string)); 2283 iemsg(_(e_damaged_match_string));
2244 goto exit; 2284 goto exit;
2245 } 2285 }
2246 else 2286 else
2247 { 2287 {
2248 if (backslash && (*s == CAR || *s == '\\')) 2288 if ((flags & REGSUB_BACKSLASH)
2289 && (*s == CAR || *s == '\\'))
2249 { 2290 {
2250 /* 2291 /*
2251 * Insert a backslash in front of a CR, otherwise 2292 * Insert a backslash in front of a CR, otherwise
2252 * it will be replaced by a line break. 2293 * it will be replaced by a line break.
2253 * Number of backslashes will be halved later, 2294 * Number of backslashes will be halved later,
2254 * double them here. 2295 * double them here.
2255 */ 2296 */
2256 if (copy) 2297 if (copy)
2257 { 2298 {
2299 if (dst + 2 > dest + destlen)
2300 {
2301 iemsg("vim_regsub_both(): not enough space");
2302 return 0;
2303 }
2258 dst[0] = '\\'; 2304 dst[0] = '\\';
2259 dst[1] = *s; 2305 dst[1] = *s;
2260 } 2306 }
2261 dst += 2; 2307 dst += 2;
2262 } 2308 }
2277 cc = c; 2323 cc = c;
2278 2324
2279 if (has_mbyte) 2325 if (has_mbyte)
2280 { 2326 {
2281 int l; 2327 int l;
2328 int charlen;
2282 2329
2283 // Copy composing characters separately, one 2330 // Copy composing characters separately, one
2284 // at a time. 2331 // at a time.
2285 if (enc_utf8) 2332 if (enc_utf8)
2286 l = utf_ptr2len(s) - 1; 2333 l = utf_ptr2len(s) - 1;
2287 else 2334 else
2288 l = mb_ptr2len(s) - 1; 2335 l = mb_ptr2len(s) - 1;
2289 2336
2290 s += l; 2337 s += l;
2291 len -= l; 2338 len -= l;
2339 charlen = mb_char2len(cc);
2292 if (copy) 2340 if (copy)
2341 {
2342 if (dst + charlen > dest + destlen)
2343 {
2344 iemsg("vim_regsub_both(): not enough space");
2345 return 0;
2346 }
2293 mb_char2bytes(cc, dst); 2347 mb_char2bytes(cc, dst);
2294 dst += mb_char2len(cc) - 1; 2348 }
2349 dst += charlen - 1;
2295 } 2350 }
2296 else if (copy) 2351 else if (copy)
2297 *dst = cc; 2352 {
2353 if (dst + 1 > dest + destlen)
2354 {
2355 iemsg("vim_regsub_both(): not enough space");
2356 return 0;
2357 }
2358 *dst = cc;
2359 }
2298 dst++; 2360 dst++;
2299 } 2361 }
2300 2362
2301 ++s; 2363 ++s;
2302 --len; 2364 --len;
2709 } 2771 }
2710 #endif 2772 #endif
2711 2773
2712 /* 2774 /*
2713 * Match a regexp against a string. 2775 * Match a regexp against a string.
2714 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). 2776 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp().
2715 * Note: "rmp->regprog" may be freed and changed. 2777 * Note: "rmp->regprog" may be freed and changed.
2716 * Uses curbuf for line count and 'iskeyword'. 2778 * Uses curbuf for line count and 'iskeyword'.
2717 * When "nl" is TRUE consider a "\n" in "line" to be a line break. 2779 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
2718 * 2780 *
2719 * Return TRUE if there is a match, FALSE if not. 2781 * Return TRUE if there is a match, FALSE if not.