Mercurial > vim
comparison src/regexp.c @ 29048:c98fc7a4dde4 v8.2.5046
patch 8.2.5046: vim_regsub() can overwrite the destination
Commit: https://github.com/vim/vim/commit/4aaf3e7f4db599932d01d87e5bbcdc342cccee27
Author: Bram Moolenaar <Bram@vim.org>
Date: Mon May 30 20:58:55 2022 +0100
patch 8.2.5046: vim_regsub() can overwrite the destination
Problem: vim_regsub() can overwrite the destination.
Solution: Pass the destination length, give an error when it doesn't fit.
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Mon, 30 May 2022 22:00:03 +0200 |
parents | bfd8e25fa207 |
children | b90bca860b5a |
comparison
equal
deleted
inserted
replaced
29047:fd46c946d9bf | 29048:c98fc7a4dde4 |
---|---|
1647 * This is impossible, so we declare a pointer to a function returning a | 1647 * This is impossible, so we declare a pointer to a function returning a |
1648 * void pointer. This should work for all compilers. | 1648 * void pointer. This should work for all compilers. |
1649 */ | 1649 */ |
1650 typedef void (*(*fptr_T)(int *, int)); | 1650 typedef void (*(*fptr_T)(int *, int)); |
1651 | 1651 |
1652 static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int copy, int magic, int backslash); | 1652 static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags); |
1653 | 1653 |
1654 static fptr_T | 1654 static fptr_T |
1655 do_upper(int *d, int c) | 1655 do_upper(int *d, int c) |
1656 { | 1656 { |
1657 *d = MB_TOUPPER(c); | 1657 *d = MB_TOUPPER(c); |
1820 | 1820 |
1821 /* | 1821 /* |
1822 * vim_regsub() - perform substitutions after a vim_regexec() or | 1822 * vim_regsub() - perform substitutions after a vim_regexec() or |
1823 * vim_regexec_multi() match. | 1823 * vim_regexec_multi() match. |
1824 * | 1824 * |
1825 * If "copy" is TRUE really copy into "dest". | 1825 * If "flags" has REGSUB_COPY really copy into "dest[destlen]". |
1826 * If "copy" is FALSE nothing is copied, this is just to find out the length | 1826 * Oterwise nothing is copied, only compue the length of the result. |
1827 * of the result. | |
1828 * | 1827 * |
1829 * If "backslash" is TRUE, a backslash will be removed later, need to double | 1828 * If "flags" has REGSUB_MAGIC then behave like 'magic' is set. |
1830 * them to keep them, and insert a backslash before a CR to avoid it being | 1829 * |
1831 * replaced with a line break later. | 1830 * If "flags" has REGSUB_BACKSLASH a backslash will be removed later, need to |
1831 * double them to keep them, and insert a backslash before a CR to avoid it | |
1832 * being replaced with a line break later. | |
1832 * | 1833 * |
1833 * Note: The matched text must not change between the call of | 1834 * Note: The matched text must not change between the call of |
1834 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back | 1835 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back |
1835 * references invalid! | 1836 * references invalid! |
1836 * | 1837 * |
1840 vim_regsub( | 1841 vim_regsub( |
1841 regmatch_T *rmp, | 1842 regmatch_T *rmp, |
1842 char_u *source, | 1843 char_u *source, |
1843 typval_T *expr, | 1844 typval_T *expr, |
1844 char_u *dest, | 1845 char_u *dest, |
1845 int copy, | 1846 int destlen, |
1846 int magic, | 1847 int flags) |
1847 int backslash) | |
1848 { | 1848 { |
1849 int result; | 1849 int result; |
1850 regexec_T rex_save; | 1850 regexec_T rex_save; |
1851 int rex_in_use_save = rex_in_use; | 1851 int rex_in_use_save = rex_in_use; |
1852 | 1852 |
1858 rex.reg_match = rmp; | 1858 rex.reg_match = rmp; |
1859 rex.reg_mmatch = NULL; | 1859 rex.reg_mmatch = NULL; |
1860 rex.reg_maxline = 0; | 1860 rex.reg_maxline = 0; |
1861 rex.reg_buf = curbuf; | 1861 rex.reg_buf = curbuf; |
1862 rex.reg_line_lbr = TRUE; | 1862 rex.reg_line_lbr = TRUE; |
1863 result = vim_regsub_both(source, expr, dest, copy, magic, backslash); | 1863 result = vim_regsub_both(source, expr, dest, destlen, flags); |
1864 | 1864 |
1865 rex_in_use = rex_in_use_save; | 1865 rex_in_use = rex_in_use_save; |
1866 if (rex_in_use) | 1866 if (rex_in_use) |
1867 rex = rex_save; | 1867 rex = rex_save; |
1868 | 1868 |
1873 vim_regsub_multi( | 1873 vim_regsub_multi( |
1874 regmmatch_T *rmp, | 1874 regmmatch_T *rmp, |
1875 linenr_T lnum, | 1875 linenr_T lnum, |
1876 char_u *source, | 1876 char_u *source, |
1877 char_u *dest, | 1877 char_u *dest, |
1878 int copy, | 1878 int destlen, |
1879 int magic, | 1879 int flags) |
1880 int backslash) | |
1881 { | 1880 { |
1882 int result; | 1881 int result; |
1883 regexec_T rex_save; | 1882 regexec_T rex_save; |
1884 int rex_in_use_save = rex_in_use; | 1883 int rex_in_use_save = rex_in_use; |
1885 | 1884 |
1892 rex.reg_mmatch = rmp; | 1891 rex.reg_mmatch = rmp; |
1893 rex.reg_buf = curbuf; // always works on the current buffer! | 1892 rex.reg_buf = curbuf; // always works on the current buffer! |
1894 rex.reg_firstlnum = lnum; | 1893 rex.reg_firstlnum = lnum; |
1895 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum; | 1894 rex.reg_maxline = curbuf->b_ml.ml_line_count - lnum; |
1896 rex.reg_line_lbr = FALSE; | 1895 rex.reg_line_lbr = FALSE; |
1897 result = vim_regsub_both(source, NULL, dest, copy, magic, backslash); | 1896 result = vim_regsub_both(source, NULL, dest, destlen, flags); |
1898 | 1897 |
1899 rex_in_use = rex_in_use_save; | 1898 rex_in_use = rex_in_use_save; |
1900 if (rex_in_use) | 1899 if (rex_in_use) |
1901 rex = rex_save; | 1900 rex = rex_save; |
1902 | 1901 |
1906 static int | 1905 static int |
1907 vim_regsub_both( | 1906 vim_regsub_both( |
1908 char_u *source, | 1907 char_u *source, |
1909 typval_T *expr, | 1908 typval_T *expr, |
1910 char_u *dest, | 1909 char_u *dest, |
1911 int copy, | 1910 int destlen, |
1912 int magic, | 1911 int flags) |
1913 int backslash) | |
1914 { | 1912 { |
1915 char_u *src; | 1913 char_u *src; |
1916 char_u *dst; | 1914 char_u *dst; |
1917 char_u *s; | 1915 char_u *s; |
1918 int c; | 1916 int c; |
1923 linenr_T clnum = 0; // init for GCC | 1921 linenr_T clnum = 0; // init for GCC |
1924 int len = 0; // init for GCC | 1922 int len = 0; // init for GCC |
1925 #ifdef FEAT_EVAL | 1923 #ifdef FEAT_EVAL |
1926 static char_u *eval_result = NULL; | 1924 static char_u *eval_result = NULL; |
1927 #endif | 1925 #endif |
1926 int copy = flags & REGSUB_COPY; | |
1928 | 1927 |
1929 // Be paranoid... | 1928 // Be paranoid... |
1930 if ((source == NULL && expr == NULL) || dest == NULL) | 1929 if ((source == NULL && expr == NULL) || dest == NULL) |
1931 { | 1930 { |
1932 emsg(_(e_null_argument)); | 1931 emsg(_(e_null_argument)); |
1943 if (expr != NULL || (source[0] == '\\' && source[1] == '=')) | 1942 if (expr != NULL || (source[0] == '\\' && source[1] == '=')) |
1944 { | 1943 { |
1945 #ifdef FEAT_EVAL | 1944 #ifdef FEAT_EVAL |
1946 // To make sure that the length doesn't change between checking the | 1945 // To make sure that the length doesn't change between checking the |
1947 // length and copying the string, and to speed up things, the | 1946 // length and copying the string, and to speed up things, the |
1948 // resulting string is saved from the call with "copy" == FALSE to the | 1947 // resulting string is saved from the call with "flags & REGSUB_COPY" |
1949 // call with "copy" == TRUE. | 1948 // == 0 to the // call with "flags & REGSUB_COPY" != 0. |
1950 if (copy) | 1949 if (copy) |
1951 { | 1950 { |
1952 if (eval_result != NULL) | 1951 if (eval_result != NULL) |
1953 { | 1952 { |
1954 STRCPY(dest, eval_result); | 1953 STRCPY(dest, eval_result); |
2052 if (*s == NL && !rsm.sm_line_lbr) | 2051 if (*s == NL && !rsm.sm_line_lbr) |
2053 *s = CAR; | 2052 *s = CAR; |
2054 had_backslash = TRUE; | 2053 had_backslash = TRUE; |
2055 } | 2054 } |
2056 } | 2055 } |
2057 if (had_backslash && backslash) | 2056 if (had_backslash && (flags & REGSUB_BACKSLASH)) |
2058 { | 2057 { |
2059 // Backslashes will be consumed, need to double them. | 2058 // Backslashes will be consumed, need to double them. |
2060 s = vim_strsave_escaped(eval_result, (char_u *)"\\"); | 2059 s = vim_strsave_escaped(eval_result, (char_u *)"\\"); |
2061 if (s != NULL) | 2060 if (s != NULL) |
2062 { | 2061 { |
2075 #endif | 2074 #endif |
2076 } | 2075 } |
2077 else | 2076 else |
2078 while ((c = *src++) != NUL) | 2077 while ((c = *src++) != NUL) |
2079 { | 2078 { |
2080 if (c == '&' && magic) | 2079 if (c == '&' && (flags & REGSUB_MAGIC)) |
2081 no = 0; | 2080 no = 0; |
2082 else if (c == '\\' && *src != NUL) | 2081 else if (c == '\\' && *src != NUL) |
2083 { | 2082 { |
2084 if (*src == '&' && !magic) | 2083 if (*src == '&' && !(flags & REGSUB_MAGIC)) |
2085 { | 2084 { |
2086 ++src; | 2085 ++src; |
2087 no = 0; | 2086 no = 0; |
2088 } | 2087 } |
2089 else if ('0' <= *src && *src <= '9') | 2088 else if ('0' <= *src && *src <= '9') |
2113 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) | 2112 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) |
2114 { | 2113 { |
2115 // Copy a special key as-is. | 2114 // Copy a special key as-is. |
2116 if (copy) | 2115 if (copy) |
2117 { | 2116 { |
2117 if (dst + 3 > dest + destlen) | |
2118 { | |
2119 iemsg("vim_regsub_both(): not enough space"); | |
2120 return 0; | |
2121 } | |
2118 *dst++ = c; | 2122 *dst++ = c; |
2119 *dst++ = *src++; | 2123 *dst++ = *src++; |
2120 *dst++ = *src++; | 2124 *dst++ = *src++; |
2121 } | 2125 } |
2122 else | 2126 else |
2139 // case 'e': c = ESC; ++src; break; | 2143 // case 'e': c = ESC; ++src; break; |
2140 case 'b': c = Ctrl_H; ++src; break; | 2144 case 'b': c = Ctrl_H; ++src; break; |
2141 | 2145 |
2142 // If "backslash" is TRUE the backslash will be removed | 2146 // If "backslash" is TRUE the backslash will be removed |
2143 // later. Used to insert a literal CR. | 2147 // later. Used to insert a literal CR. |
2144 default: if (backslash) | 2148 default: if (flags & REGSUB_BACKSLASH) |
2145 { | 2149 { |
2146 if (copy) | 2150 if (copy) |
2151 { | |
2152 if (dst + 1 > dest + destlen) | |
2153 { | |
2154 iemsg("vim_regsub_both(): not enough space"); | |
2155 return 0; | |
2156 } | |
2147 *dst = '\\'; | 2157 *dst = '\\'; |
2158 } | |
2148 ++dst; | 2159 ++dst; |
2149 } | 2160 } |
2150 c = *src++; | 2161 c = *src++; |
2151 } | 2162 } |
2152 } | 2163 } |
2164 cc = c; | 2175 cc = c; |
2165 | 2176 |
2166 if (has_mbyte) | 2177 if (has_mbyte) |
2167 { | 2178 { |
2168 int totlen = mb_ptr2len(src - 1); | 2179 int totlen = mb_ptr2len(src - 1); |
2180 int charlen = mb_char2len(cc); | |
2169 | 2181 |
2170 if (copy) | 2182 if (copy) |
2183 { | |
2184 if (dst + charlen > dest + destlen) | |
2185 { | |
2186 iemsg("vim_regsub_both(): not enough space"); | |
2187 return 0; | |
2188 } | |
2171 mb_char2bytes(cc, dst); | 2189 mb_char2bytes(cc, dst); |
2172 dst += mb_char2len(cc) - 1; | 2190 } |
2191 dst += charlen - 1; | |
2173 if (enc_utf8) | 2192 if (enc_utf8) |
2174 { | 2193 { |
2175 int clen = utf_ptr2len(src - 1); | 2194 int clen = utf_ptr2len(src - 1); |
2176 | 2195 |
2177 // If the character length is shorter than "totlen", there | 2196 // If the character length is shorter than "totlen", there |
2178 // are composing characters; copy them as-is. | 2197 // are composing characters; copy them as-is. |
2179 if (clen < totlen) | 2198 if (clen < totlen) |
2180 { | 2199 { |
2181 if (copy) | 2200 if (copy) |
2201 { | |
2202 if (dst + totlen - clen > dest + destlen) | |
2203 { | |
2204 iemsg("vim_regsub_both(): not enough space"); | |
2205 return 0; | |
2206 } | |
2182 mch_memmove(dst + 1, src - 1 + clen, | 2207 mch_memmove(dst + 1, src - 1 + clen, |
2183 (size_t)(totlen - clen)); | 2208 (size_t)(totlen - clen)); |
2209 } | |
2184 dst += totlen - clen; | 2210 dst += totlen - clen; |
2185 } | 2211 } |
2186 } | 2212 } |
2187 src += totlen - 1; | 2213 src += totlen - 1; |
2188 } | 2214 } |
2189 else if (copy) | 2215 else if (copy) |
2190 *dst = cc; | 2216 { |
2217 if (dst + 1 > dest + destlen) | |
2218 { | |
2219 iemsg("vim_regsub_both(): not enough space"); | |
2220 return 0; | |
2221 } | |
2222 *dst = cc; | |
2223 } | |
2191 dst++; | 2224 dst++; |
2192 } | 2225 } |
2193 else | 2226 else |
2194 { | 2227 { |
2195 if (REG_MULTI) | 2228 if (REG_MULTI) |
2224 if (REG_MULTI) | 2257 if (REG_MULTI) |
2225 { | 2258 { |
2226 if (rex.reg_mmatch->endpos[no].lnum == clnum) | 2259 if (rex.reg_mmatch->endpos[no].lnum == clnum) |
2227 break; | 2260 break; |
2228 if (copy) | 2261 if (copy) |
2262 { | |
2263 if (dst + 1 > dest + destlen) | |
2264 { | |
2265 iemsg("vim_regsub_both(): not enough space"); | |
2266 return 0; | |
2267 } | |
2229 *dst = CAR; | 2268 *dst = CAR; |
2269 } | |
2230 ++dst; | 2270 ++dst; |
2231 s = reg_getline(++clnum); | 2271 s = reg_getline(++clnum); |
2232 if (rex.reg_mmatch->endpos[no].lnum == clnum) | 2272 if (rex.reg_mmatch->endpos[no].lnum == clnum) |
2233 len = rex.reg_mmatch->endpos[no].col; | 2273 len = rex.reg_mmatch->endpos[no].col; |
2234 else | 2274 else |
2243 iemsg(_(e_damaged_match_string)); | 2283 iemsg(_(e_damaged_match_string)); |
2244 goto exit; | 2284 goto exit; |
2245 } | 2285 } |
2246 else | 2286 else |
2247 { | 2287 { |
2248 if (backslash && (*s == CAR || *s == '\\')) | 2288 if ((flags & REGSUB_BACKSLASH) |
2289 && (*s == CAR || *s == '\\')) | |
2249 { | 2290 { |
2250 /* | 2291 /* |
2251 * Insert a backslash in front of a CR, otherwise | 2292 * Insert a backslash in front of a CR, otherwise |
2252 * it will be replaced by a line break. | 2293 * it will be replaced by a line break. |
2253 * Number of backslashes will be halved later, | 2294 * Number of backslashes will be halved later, |
2254 * double them here. | 2295 * double them here. |
2255 */ | 2296 */ |
2256 if (copy) | 2297 if (copy) |
2257 { | 2298 { |
2299 if (dst + 2 > dest + destlen) | |
2300 { | |
2301 iemsg("vim_regsub_both(): not enough space"); | |
2302 return 0; | |
2303 } | |
2258 dst[0] = '\\'; | 2304 dst[0] = '\\'; |
2259 dst[1] = *s; | 2305 dst[1] = *s; |
2260 } | 2306 } |
2261 dst += 2; | 2307 dst += 2; |
2262 } | 2308 } |
2277 cc = c; | 2323 cc = c; |
2278 | 2324 |
2279 if (has_mbyte) | 2325 if (has_mbyte) |
2280 { | 2326 { |
2281 int l; | 2327 int l; |
2328 int charlen; | |
2282 | 2329 |
2283 // Copy composing characters separately, one | 2330 // Copy composing characters separately, one |
2284 // at a time. | 2331 // at a time. |
2285 if (enc_utf8) | 2332 if (enc_utf8) |
2286 l = utf_ptr2len(s) - 1; | 2333 l = utf_ptr2len(s) - 1; |
2287 else | 2334 else |
2288 l = mb_ptr2len(s) - 1; | 2335 l = mb_ptr2len(s) - 1; |
2289 | 2336 |
2290 s += l; | 2337 s += l; |
2291 len -= l; | 2338 len -= l; |
2339 charlen = mb_char2len(cc); | |
2292 if (copy) | 2340 if (copy) |
2341 { | |
2342 if (dst + charlen > dest + destlen) | |
2343 { | |
2344 iemsg("vim_regsub_both(): not enough space"); | |
2345 return 0; | |
2346 } | |
2293 mb_char2bytes(cc, dst); | 2347 mb_char2bytes(cc, dst); |
2294 dst += mb_char2len(cc) - 1; | 2348 } |
2349 dst += charlen - 1; | |
2295 } | 2350 } |
2296 else if (copy) | 2351 else if (copy) |
2297 *dst = cc; | 2352 { |
2353 if (dst + 1 > dest + destlen) | |
2354 { | |
2355 iemsg("vim_regsub_both(): not enough space"); | |
2356 return 0; | |
2357 } | |
2358 *dst = cc; | |
2359 } | |
2298 dst++; | 2360 dst++; |
2299 } | 2361 } |
2300 | 2362 |
2301 ++s; | 2363 ++s; |
2302 --len; | 2364 --len; |
2709 } | 2771 } |
2710 #endif | 2772 #endif |
2711 | 2773 |
2712 /* | 2774 /* |
2713 * Match a regexp against a string. | 2775 * Match a regexp against a string. |
2714 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | 2776 * "rmp->regprog" must be a compiled regexp as returned by vim_regcomp(). |
2715 * Note: "rmp->regprog" may be freed and changed. | 2777 * Note: "rmp->regprog" may be freed and changed. |
2716 * Uses curbuf for line count and 'iskeyword'. | 2778 * Uses curbuf for line count and 'iskeyword'. |
2717 * When "nl" is TRUE consider a "\n" in "line" to be a line break. | 2779 * When "nl" is TRUE consider a "\n" in "line" to be a line break. |
2718 * | 2780 * |
2719 * Return TRUE if there is a match, FALSE if not. | 2781 * Return TRUE if there is a match, FALSE if not. |