Mercurial > vim
comparison src/regexp_nfa.c @ 23471:a7cdfc8e4b6e v8.2.2278
patch 8.2.2278: falling back to old regexp engine can some patterns
Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Jan 2 17:43:49 2021 +0100
patch 8.2.2278: falling back to old regexp engine can some patterns
Problem: Falling back to old regexp engine can some patterns.
Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used.
(Christian Brabandt, closes #7572)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sat, 02 Jan 2021 17:45:03 +0100 |
parents | 22d0c25869d8 |
children | 585695c70392 |
comparison
equal
deleted
inserted
replaced
23470:0d0ed7100454 | 23471:a7cdfc8e4b6e |
---|---|
251 // Variables only used in nfa_regcomp() and descendants. | 251 // Variables only used in nfa_regcomp() and descendants. |
252 static int nfa_re_flags; // re_flags passed to nfa_regcomp() | 252 static int nfa_re_flags; // re_flags passed to nfa_regcomp() |
253 static int *post_start; // holds the postfix form of r.e. | 253 static int *post_start; // holds the postfix form of r.e. |
254 static int *post_end; | 254 static int *post_end; |
255 static int *post_ptr; | 255 static int *post_ptr; |
256 | |
257 // Set when the pattern should use the NFA engine. | |
258 // E.g. [[:upper:]] only allows 8bit characters for BT engine, | |
259 // while NFA engine handles multibyte characters correctly. | |
260 static int wants_nfa; | |
261 | |
256 static int nstate; // Number of states in the NFA. | 262 static int nstate; // Number of states in the NFA. |
257 static int istate; // Index in the state vector, used in alloc_state() | 263 static int istate; // Index in the state vector, used in alloc_state() |
258 | 264 |
259 // If not NULL match must end at this position | 265 // If not NULL match must end at this position |
260 static save_se_T *nfa_endp = NULL; | 266 static save_se_T *nfa_endp = NULL; |
304 post_start = alloc(postfix_size); | 310 post_start = alloc(postfix_size); |
305 if (post_start == NULL) | 311 if (post_start == NULL) |
306 return FAIL; | 312 return FAIL; |
307 post_ptr = post_start; | 313 post_ptr = post_start; |
308 post_end = post_start + nstate_max; | 314 post_end = post_start + nstate_max; |
315 wants_nfa = FALSE; | |
309 rex.nfa_has_zend = FALSE; | 316 rex.nfa_has_zend = FALSE; |
310 rex.nfa_has_backref = FALSE; | 317 rex.nfa_has_backref = FALSE; |
311 | 318 |
312 // shared with BT engine | 319 // shared with BT engine |
313 regcomp_start(expr, re_flags); | 320 regcomp_start(expr, re_flags); |
1705 break; | 1712 break; |
1706 case CLASS_GRAPH: | 1713 case CLASS_GRAPH: |
1707 EMIT(NFA_CLASS_GRAPH); | 1714 EMIT(NFA_CLASS_GRAPH); |
1708 break; | 1715 break; |
1709 case CLASS_LOWER: | 1716 case CLASS_LOWER: |
1717 wants_nfa = TRUE; | |
1710 EMIT(NFA_CLASS_LOWER); | 1718 EMIT(NFA_CLASS_LOWER); |
1711 break; | 1719 break; |
1712 case CLASS_PRINT: | 1720 case CLASS_PRINT: |
1713 EMIT(NFA_CLASS_PRINT); | 1721 EMIT(NFA_CLASS_PRINT); |
1714 break; | 1722 break; |
1717 break; | 1725 break; |
1718 case CLASS_SPACE: | 1726 case CLASS_SPACE: |
1719 EMIT(NFA_CLASS_SPACE); | 1727 EMIT(NFA_CLASS_SPACE); |
1720 break; | 1728 break; |
1721 case CLASS_UPPER: | 1729 case CLASS_UPPER: |
1730 wants_nfa = TRUE; | |
1722 EMIT(NFA_CLASS_UPPER); | 1731 EMIT(NFA_CLASS_UPPER); |
1723 break; | 1732 break; |
1724 case CLASS_XDIGIT: | 1733 case CLASS_XDIGIT: |
1725 EMIT(NFA_CLASS_XDIGIT); | 1734 EMIT(NFA_CLASS_XDIGIT); |
1726 break; | 1735 break; |
2135 return OK; | 2144 return OK; |
2136 } | 2145 } |
2137 | 2146 |
2138 // The engine is very inefficient (uses too many states) when the | 2147 // The engine is very inefficient (uses too many states) when the |
2139 // maximum is much larger than the minimum and when the maximum is | 2148 // maximum is much larger than the minimum and when the maximum is |
2140 // large. Bail out if we can use the other engine. | 2149 // large. However, when maxval is MAX_LIMIT, it is okay, as this |
2150 // will emit NFA_STAR. | |
2151 // Bail out if we can use the other engine, but only, when the | |
2152 // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\} | |
2153 // does not work with with characters > 8 bit with the BT engine) | |
2141 if ((nfa_re_flags & RE_AUTO) | 2154 if ((nfa_re_flags & RE_AUTO) |
2142 && (maxval > 500 || maxval > minval + 200)) | 2155 && (maxval > 500 || maxval > minval + 200) |
2156 && (maxval != MAX_LIMIT && minval < 200) | |
2157 && !wants_nfa) | |
2143 return FAIL; | 2158 return FAIL; |
2144 | 2159 |
2145 // Ignore previous call to nfa_regatom() | 2160 // Ignore previous call to nfa_regatom() |
2146 post_ptr = post_start + my_post_start; | 2161 post_ptr = post_start + my_post_start; |
2147 // Save parse state after the repeated atom and the \{} | 2162 // Save parse state after the repeated atom and the \{} |