comparison src/regexp.c @ 6328:adfbffe1e642 v7.4.497

updated for version 7.4.497 Problem: With some regexp patterns the NFA engine uses many states and becomes very slow. To the user it looks like Vim freezes. Solution: When the number of states reaches a limit fall back to the old engine. (Christian Brabandt)
author Bram Moolenaar <bram@vim.org>
date Wed, 05 Nov 2014 14:27:36 +0100
parents 8515b42f939c
children 27a36d1013a6
comparison
equal deleted inserted replaced
6327:896d12bf73c1 6328:adfbffe1e642
8009 static regengine_T bt_regengine = 8009 static regengine_T bt_regengine =
8010 { 8010 {
8011 bt_regcomp, 8011 bt_regcomp,
8012 bt_regfree, 8012 bt_regfree,
8013 bt_regexec_nl, 8013 bt_regexec_nl,
8014 bt_regexec_multi 8014 bt_regexec_multi,
8015 #ifdef DEBUG 8015 (char_u *)""
8016 ,(char_u *)""
8017 #endif
8018 }; 8016 };
8019
8020 8017
8021 #include "regexp_nfa.c" 8018 #include "regexp_nfa.c"
8022 8019
8023 static regengine_T nfa_regengine = 8020 static regengine_T nfa_regengine =
8024 { 8021 {
8025 nfa_regcomp, 8022 nfa_regcomp,
8026 nfa_regfree, 8023 nfa_regfree,
8027 nfa_regexec_nl, 8024 nfa_regexec_nl,
8028 nfa_regexec_multi 8025 nfa_regexec_multi,
8029 #ifdef DEBUG 8026 (char_u *)""
8030 ,(char_u *)""
8031 #endif
8032 }; 8027 };
8033 8028
8034 /* Which regexp engine to use? Needed for vim_regcomp(). 8029 /* Which regexp engine to use? Needed for vim_regcomp().
8035 * Must match with 'regexpengine'. */ 8030 * Must match with 'regexpengine'. */
8036 static int regexp_engine = 0; 8031 static int regexp_engine = 0;
8037 #define AUTOMATIC_ENGINE 0 8032
8038 #define BACKTRACKING_ENGINE 1
8039 #define NFA_ENGINE 2
8040 #ifdef DEBUG 8033 #ifdef DEBUG
8041 static char_u regname[][30] = { 8034 static char_u regname[][30] = {
8042 "AUTOMATIC Regexp Engine", 8035 "AUTOMATIC Regexp Engine",
8043 "BACKTRACKING Regexp Engine", 8036 "BACKTRACKING Regexp Engine",
8044 "NFA Regexp Engine" 8037 "NFA Regexp Engine"
8081 { 8074 {
8082 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); 8075 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8083 regexp_engine = AUTOMATIC_ENGINE; 8076 regexp_engine = AUTOMATIC_ENGINE;
8084 } 8077 }
8085 } 8078 }
8086 #ifdef DEBUG
8087 bt_regengine.expr = expr; 8079 bt_regengine.expr = expr;
8088 nfa_regengine.expr = expr; 8080 nfa_regengine.expr = expr;
8089 #endif
8090 8081
8091 /* 8082 /*
8092 * First try the NFA engine, unless backtracking was requested. 8083 * First try the NFA engine, unless backtracking was requested.
8093 */ 8084 */
8094 if (regexp_engine != BACKTRACKING_ENGINE) 8085 if (regexp_engine != BACKTRACKING_ENGINE)
8095 prog = nfa_regengine.regcomp(expr, re_flags); 8086 prog = nfa_regengine.regcomp(expr, re_flags);
8096 else 8087 else
8097 prog = bt_regengine.regcomp(expr, re_flags); 8088 prog = bt_regengine.regcomp(expr, re_flags);
8098 8089
8099 if (prog == NULL) /* error compiling regexp with initial engine */ 8090 /* Check for error compiling regexp with initial engine. */
8091 if (prog == NULL)
8100 { 8092 {
8101 #ifdef BT_REGEXP_DEBUG_LOG 8093 #ifdef BT_REGEXP_DEBUG_LOG
8102 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ 8094 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
8103 { 8095 {
8104 FILE *f; 8096 FILE *f;
8112 EMSG2("(NFA) Could not open \"%s\" to write !!!", 8104 EMSG2("(NFA) Could not open \"%s\" to write !!!",
8113 BT_REGEXP_DEBUG_LOG_NAME); 8105 BT_REGEXP_DEBUG_LOG_NAME);
8114 } 8106 }
8115 #endif 8107 #endif
8116 /* 8108 /*
8117 * If the NFA engine failed, the backtracking engine won't work either. 8109 * If the NFA engine failed, try the backtracking engine.
8110 * Disabled for now, both engines fail on the same patterns.
8111 * Re-enable when regcomp() fails when the pattern would work better
8112 * with the other engine.
8118 * 8113 *
8119 if (regexp_engine == AUTOMATIC_ENGINE) 8114 if (regexp_engine == AUTOMATIC_ENGINE)
8115 {
8120 prog = bt_regengine.regcomp(expr, re_flags); 8116 prog = bt_regengine.regcomp(expr, re_flags);
8117 regexp_engine == BACKTRACKING_ENGINE;
8118 }
8121 */ 8119 */
8120 }
8121
8122 if (prog != NULL)
8123 {
8124 /* Store the info needed to call regcomp() again when the engine turns
8125 * out to be very slow when executing it. */
8126 prog->re_engine = regexp_engine;
8127 prog->re_flags = re_flags;
8122 } 8128 }
8123 8129
8124 return prog; 8130 return prog;
8125 } 8131 }
8126 8132
8133 { 8139 {
8134 if (prog != NULL) 8140 if (prog != NULL)
8135 prog->engine->regfree(prog); 8141 prog->engine->regfree(prog);
8136 } 8142 }
8137 8143
8144 #ifdef FEAT_EVAL
8145 static void report_re_switch __ARGS((char_u *pat));
8146
8147 static void
8148 report_re_switch(pat)
8149 char_u *pat;
8150 {
8151 if (p_verbose > 0)
8152 {
8153 verbose_enter();
8154 MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8155 MSG_PUTS(pat);
8156 verbose_leave();
8157 }
8158 }
8159 #endif
8160
8161 static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl));
8162
8138 /* 8163 /*
8139 * Match a regexp against a string. 8164 * Match a regexp against a string.
8140 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). 8165 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8141 * Uses curbuf for line count and 'iskeyword'. 8166 * Uses curbuf for line count and 'iskeyword'.
8167 * When "nl" is TRUE consider a "\n" in "line" to be a line break.
8142 * 8168 *
8143 * Return TRUE if there is a match, FALSE if not. 8169 * Return TRUE if there is a match, FALSE if not.
8144 */ 8170 */
8171 static int
8172 vim_regexec_both(rmp, line, col, nl)
8173 regmatch_T *rmp;
8174 char_u *line; /* string to match against */
8175 colnr_T col; /* column to start looking for match */
8176 int nl;
8177 {
8178 int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8179
8180 /* NFA engine aborted because it's very slow. */
8181 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8182 && result == NFA_TOO_EXPENSIVE)
8183 {
8184 int save_p_re = p_re;
8185 int re_flags = rmp->regprog->re_flags;
8186 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8187
8188 p_re = BACKTRACKING_ENGINE;
8189 vim_regfree(rmp->regprog);
8190 if (pat != NULL)
8191 {
8192 #ifdef FEAT_EVAL
8193 report_re_switch(pat);
8194 #endif
8195 rmp->regprog = vim_regcomp(pat, re_flags);
8196 if (rmp->regprog != NULL)
8197 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8198 vim_free(pat);
8199 }
8200
8201 p_re = save_p_re;
8202 }
8203 return result;
8204 }
8205
8145 int 8206 int
8146 vim_regexec(rmp, line, col) 8207 vim_regexec(rmp, line, col)
8147 regmatch_T *rmp; 8208 regmatch_T *rmp;
8148 char_u *line; /* string to match against */ 8209 char_u *line;
8149 colnr_T col; /* column to start looking for match */ 8210 colnr_T col;
8150 { 8211 {
8151 return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE); 8212 return vim_regexec_both(rmp, line, col, FALSE);
8152 } 8213 }
8153 8214
8154 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ 8215 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8155 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) 8216 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8156 /* 8217 /*
8157 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. 8218 * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
8158 */ 8219 */
8159 int 8220 int
8160 vim_regexec_nl(rmp, line, col) 8221 vim_regexec_nl(rmp, line, col)
8161 regmatch_T *rmp; 8222 regmatch_T *rmp;
8162 char_u *line; 8223 char_u *line;
8163 colnr_T col; 8224 colnr_T col;
8164 { 8225 {
8165 return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE); 8226 return vim_regexec_both(rmp, line, col, TRUE);
8166 } 8227 }
8167 #endif 8228 #endif
8168 8229
8169 /* 8230 /*
8170 * Match a regexp against multiple lines. 8231 * Match a regexp against multiple lines.
8181 buf_T *buf; /* buffer in which to search */ 8242 buf_T *buf; /* buffer in which to search */
8182 linenr_T lnum; /* nr of line to start looking for match */ 8243 linenr_T lnum; /* nr of line to start looking for match */
8183 colnr_T col; /* column to start looking for match */ 8244 colnr_T col; /* column to start looking for match */
8184 proftime_T *tm; /* timeout limit or NULL */ 8245 proftime_T *tm; /* timeout limit or NULL */
8185 { 8246 {
8186 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); 8247 int result = rmp->regprog->engine->regexec_multi(
8187 } 8248 rmp, win, buf, lnum, col, tm);
8249
8250 /* NFA engine aborted because it's very slow. */
8251 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8252 && result == NFA_TOO_EXPENSIVE)
8253 {
8254 int save_p_re = p_re;
8255 int re_flags = rmp->regprog->re_flags;
8256 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8257
8258 p_re = BACKTRACKING_ENGINE;
8259 vim_regfree(rmp->regprog);
8260 if (pat != NULL)
8261 {
8262 #ifdef FEAT_EVAL
8263 report_re_switch(pat);
8264 #endif
8265 rmp->regprog = vim_regcomp(pat, re_flags);
8266 if (rmp->regprog != NULL)
8267 result = rmp->regprog->engine->regexec_multi(
8268 rmp, win, buf, lnum, col, tm);
8269 vim_free(pat);
8270 }
8271 p_re = save_p_re;
8272 }
8273
8274 return result;
8275 }