comparison src/regexp_nfa.c @ 23471:a7cdfc8e4b6e v8.2.2278

patch 8.2.2278: falling back to old regexp engine can some patterns Commit: https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Jan 2 17:43:49 2021 +0100 patch 8.2.2278: falling back to old regexp engine can some patterns Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572)
author Bram Moolenaar <Bram@vim.org>
date Sat, 02 Jan 2021 17:45:03 +0100
parents 22d0c25869d8
children 585695c70392
comparison
equal deleted inserted replaced
23470:0d0ed7100454 23471:a7cdfc8e4b6e
251 // Variables only used in nfa_regcomp() and descendants. 251 // Variables only used in nfa_regcomp() and descendants.
252 static int nfa_re_flags; // re_flags passed to nfa_regcomp() 252 static int nfa_re_flags; // re_flags passed to nfa_regcomp()
253 static int *post_start; // holds the postfix form of r.e. 253 static int *post_start; // holds the postfix form of r.e.
254 static int *post_end; 254 static int *post_end;
255 static int *post_ptr; 255 static int *post_ptr;
256
257 // Set when the pattern should use the NFA engine.
258 // E.g. [[:upper:]] only allows 8bit characters for BT engine,
259 // while NFA engine handles multibyte characters correctly.
260 static int wants_nfa;
261
256 static int nstate; // Number of states in the NFA. 262 static int nstate; // Number of states in the NFA.
257 static int istate; // Index in the state vector, used in alloc_state() 263 static int istate; // Index in the state vector, used in alloc_state()
258 264
259 // If not NULL match must end at this position 265 // If not NULL match must end at this position
260 static save_se_T *nfa_endp = NULL; 266 static save_se_T *nfa_endp = NULL;
304 post_start = alloc(postfix_size); 310 post_start = alloc(postfix_size);
305 if (post_start == NULL) 311 if (post_start == NULL)
306 return FAIL; 312 return FAIL;
307 post_ptr = post_start; 313 post_ptr = post_start;
308 post_end = post_start + nstate_max; 314 post_end = post_start + nstate_max;
315 wants_nfa = FALSE;
309 rex.nfa_has_zend = FALSE; 316 rex.nfa_has_zend = FALSE;
310 rex.nfa_has_backref = FALSE; 317 rex.nfa_has_backref = FALSE;
311 318
312 // shared with BT engine 319 // shared with BT engine
313 regcomp_start(expr, re_flags); 320 regcomp_start(expr, re_flags);
1705 break; 1712 break;
1706 case CLASS_GRAPH: 1713 case CLASS_GRAPH:
1707 EMIT(NFA_CLASS_GRAPH); 1714 EMIT(NFA_CLASS_GRAPH);
1708 break; 1715 break;
1709 case CLASS_LOWER: 1716 case CLASS_LOWER:
1717 wants_nfa = TRUE;
1710 EMIT(NFA_CLASS_LOWER); 1718 EMIT(NFA_CLASS_LOWER);
1711 break; 1719 break;
1712 case CLASS_PRINT: 1720 case CLASS_PRINT:
1713 EMIT(NFA_CLASS_PRINT); 1721 EMIT(NFA_CLASS_PRINT);
1714 break; 1722 break;
1717 break; 1725 break;
1718 case CLASS_SPACE: 1726 case CLASS_SPACE:
1719 EMIT(NFA_CLASS_SPACE); 1727 EMIT(NFA_CLASS_SPACE);
1720 break; 1728 break;
1721 case CLASS_UPPER: 1729 case CLASS_UPPER:
1730 wants_nfa = TRUE;
1722 EMIT(NFA_CLASS_UPPER); 1731 EMIT(NFA_CLASS_UPPER);
1723 break; 1732 break;
1724 case CLASS_XDIGIT: 1733 case CLASS_XDIGIT:
1725 EMIT(NFA_CLASS_XDIGIT); 1734 EMIT(NFA_CLASS_XDIGIT);
1726 break; 1735 break;
2135 return OK; 2144 return OK;
2136 } 2145 }
2137 2146
2138 // The engine is very inefficient (uses too many states) when the 2147 // The engine is very inefficient (uses too many states) when the
2139 // maximum is much larger than the minimum and when the maximum is 2148 // maximum is much larger than the minimum and when the maximum is
2140 // large. Bail out if we can use the other engine. 2149 // large. However, when maxval is MAX_LIMIT, it is okay, as this
2150 // will emit NFA_STAR.
2151 // Bail out if we can use the other engine, but only, when the
2152 // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
2153 // does not work with with characters > 8 bit with the BT engine)
2141 if ((nfa_re_flags & RE_AUTO) 2154 if ((nfa_re_flags & RE_AUTO)
2142 && (maxval > 500 || maxval > minval + 200)) 2155 && (maxval > 500 || maxval > minval + 200)
2156 && (maxval != MAX_LIMIT && minval < 200)
2157 && !wants_nfa)
2143 return FAIL; 2158 return FAIL;
2144 2159
2145 // Ignore previous call to nfa_regatom() 2160 // Ignore previous call to nfa_regatom()
2146 post_ptr = post_start + my_post_start; 2161 post_ptr = post_start + my_post_start;
2147 // Save parse state after the repeated atom and the \{} 2162 // Save parse state after the repeated atom and the \{}