Mercurial > vim
comparison src/regexp.c @ 6328:adfbffe1e642 v7.4.497
updated for version 7.4.497
Problem: With some regexp patterns the NFA engine uses many states and
becomes very slow. To the user it looks like Vim freezes.
Solution: When the number of states reaches a limit fall back to the old
engine. (Christian Brabandt)
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 05 Nov 2014 14:27:36 +0100 |
parents | 8515b42f939c |
children | 27a36d1013a6 |
comparison
equal
deleted
inserted
replaced
6327:896d12bf73c1 | 6328:adfbffe1e642 |
---|---|
8009 static regengine_T bt_regengine = | 8009 static regengine_T bt_regengine = |
8010 { | 8010 { |
8011 bt_regcomp, | 8011 bt_regcomp, |
8012 bt_regfree, | 8012 bt_regfree, |
8013 bt_regexec_nl, | 8013 bt_regexec_nl, |
8014 bt_regexec_multi | 8014 bt_regexec_multi, |
8015 #ifdef DEBUG | 8015 (char_u *)"" |
8016 ,(char_u *)"" | |
8017 #endif | |
8018 }; | 8016 }; |
8019 | |
8020 | 8017 |
8021 #include "regexp_nfa.c" | 8018 #include "regexp_nfa.c" |
8022 | 8019 |
8023 static regengine_T nfa_regengine = | 8020 static regengine_T nfa_regengine = |
8024 { | 8021 { |
8025 nfa_regcomp, | 8022 nfa_regcomp, |
8026 nfa_regfree, | 8023 nfa_regfree, |
8027 nfa_regexec_nl, | 8024 nfa_regexec_nl, |
8028 nfa_regexec_multi | 8025 nfa_regexec_multi, |
8029 #ifdef DEBUG | 8026 (char_u *)"" |
8030 ,(char_u *)"" | |
8031 #endif | |
8032 }; | 8027 }; |
8033 | 8028 |
8034 /* Which regexp engine to use? Needed for vim_regcomp(). | 8029 /* Which regexp engine to use? Needed for vim_regcomp(). |
8035 * Must match with 'regexpengine'. */ | 8030 * Must match with 'regexpengine'. */ |
8036 static int regexp_engine = 0; | 8031 static int regexp_engine = 0; |
8037 #define AUTOMATIC_ENGINE 0 | 8032 |
8038 #define BACKTRACKING_ENGINE 1 | |
8039 #define NFA_ENGINE 2 | |
8040 #ifdef DEBUG | 8033 #ifdef DEBUG |
8041 static char_u regname[][30] = { | 8034 static char_u regname[][30] = { |
8042 "AUTOMATIC Regexp Engine", | 8035 "AUTOMATIC Regexp Engine", |
8043 "BACKTRACKING Regexp Engine", | 8036 "BACKTRACKING Regexp Engine", |
8044 "NFA Regexp Engine" | 8037 "NFA Regexp Engine" |
8081 { | 8074 { |
8082 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); | 8075 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); |
8083 regexp_engine = AUTOMATIC_ENGINE; | 8076 regexp_engine = AUTOMATIC_ENGINE; |
8084 } | 8077 } |
8085 } | 8078 } |
8086 #ifdef DEBUG | |
8087 bt_regengine.expr = expr; | 8079 bt_regengine.expr = expr; |
8088 nfa_regengine.expr = expr; | 8080 nfa_regengine.expr = expr; |
8089 #endif | |
8090 | 8081 |
8091 /* | 8082 /* |
8092 * First try the NFA engine, unless backtracking was requested. | 8083 * First try the NFA engine, unless backtracking was requested. |
8093 */ | 8084 */ |
8094 if (regexp_engine != BACKTRACKING_ENGINE) | 8085 if (regexp_engine != BACKTRACKING_ENGINE) |
8095 prog = nfa_regengine.regcomp(expr, re_flags); | 8086 prog = nfa_regengine.regcomp(expr, re_flags); |
8096 else | 8087 else |
8097 prog = bt_regengine.regcomp(expr, re_flags); | 8088 prog = bt_regengine.regcomp(expr, re_flags); |
8098 | 8089 |
8099 if (prog == NULL) /* error compiling regexp with initial engine */ | 8090 /* Check for error compiling regexp with initial engine. */ |
8091 if (prog == NULL) | |
8100 { | 8092 { |
8101 #ifdef BT_REGEXP_DEBUG_LOG | 8093 #ifdef BT_REGEXP_DEBUG_LOG |
8102 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ | 8094 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ |
8103 { | 8095 { |
8104 FILE *f; | 8096 FILE *f; |
8112 EMSG2("(NFA) Could not open \"%s\" to write !!!", | 8104 EMSG2("(NFA) Could not open \"%s\" to write !!!", |
8113 BT_REGEXP_DEBUG_LOG_NAME); | 8105 BT_REGEXP_DEBUG_LOG_NAME); |
8114 } | 8106 } |
8115 #endif | 8107 #endif |
8116 /* | 8108 /* |
8117 * If the NFA engine failed, the backtracking engine won't work either. | 8109 * If the NFA engine failed, try the backtracking engine. |
8110 * Disabled for now, both engines fail on the same patterns. | |
8111 * Re-enable when regcomp() fails when the pattern would work better | |
8112 * with the other engine. | |
8118 * | 8113 * |
8119 if (regexp_engine == AUTOMATIC_ENGINE) | 8114 if (regexp_engine == AUTOMATIC_ENGINE) |
8115 { | |
8120 prog = bt_regengine.regcomp(expr, re_flags); | 8116 prog = bt_regengine.regcomp(expr, re_flags); |
8117 regexp_engine == BACKTRACKING_ENGINE; | |
8118 } | |
8121 */ | 8119 */ |
8120 } | |
8121 | |
8122 if (prog != NULL) | |
8123 { | |
8124 /* Store the info needed to call regcomp() again when the engine turns | |
8125 * out to be very slow when executing it. */ | |
8126 prog->re_engine = regexp_engine; | |
8127 prog->re_flags = re_flags; | |
8122 } | 8128 } |
8123 | 8129 |
8124 return prog; | 8130 return prog; |
8125 } | 8131 } |
8126 | 8132 |
8133 { | 8139 { |
8134 if (prog != NULL) | 8140 if (prog != NULL) |
8135 prog->engine->regfree(prog); | 8141 prog->engine->regfree(prog); |
8136 } | 8142 } |
8137 | 8143 |
8144 #ifdef FEAT_EVAL | |
8145 static void report_re_switch __ARGS((char_u *pat)); | |
8146 | |
8147 static void | |
8148 report_re_switch(pat) | |
8149 char_u *pat; | |
8150 { | |
8151 if (p_verbose > 0) | |
8152 { | |
8153 verbose_enter(); | |
8154 MSG_PUTS(_("Switching to backtracking RE engine for pattern: ")); | |
8155 MSG_PUTS(pat); | |
8156 verbose_leave(); | |
8157 } | |
8158 } | |
8159 #endif | |
8160 | |
8161 static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl)); | |
8162 | |
8138 /* | 8163 /* |
8139 * Match a regexp against a string. | 8164 * Match a regexp against a string. |
8140 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | 8165 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). |
8141 * Uses curbuf for line count and 'iskeyword'. | 8166 * Uses curbuf for line count and 'iskeyword'. |
8167 * When "nl" is TRUE consider a "\n" in "line" to be a line break. | |
8142 * | 8168 * |
8143 * Return TRUE if there is a match, FALSE if not. | 8169 * Return TRUE if there is a match, FALSE if not. |
8144 */ | 8170 */ |
8171 static int | |
8172 vim_regexec_both(rmp, line, col, nl) | |
8173 regmatch_T *rmp; | |
8174 char_u *line; /* string to match against */ | |
8175 colnr_T col; /* column to start looking for match */ | |
8176 int nl; | |
8177 { | |
8178 int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); | |
8179 | |
8180 /* NFA engine aborted because it's very slow. */ | |
8181 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE | |
8182 && result == NFA_TOO_EXPENSIVE) | |
8183 { | |
8184 int save_p_re = p_re; | |
8185 int re_flags = rmp->regprog->re_flags; | |
8186 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); | |
8187 | |
8188 p_re = BACKTRACKING_ENGINE; | |
8189 vim_regfree(rmp->regprog); | |
8190 if (pat != NULL) | |
8191 { | |
8192 #ifdef FEAT_EVAL | |
8193 report_re_switch(pat); | |
8194 #endif | |
8195 rmp->regprog = vim_regcomp(pat, re_flags); | |
8196 if (rmp->regprog != NULL) | |
8197 result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); | |
8198 vim_free(pat); | |
8199 } | |
8200 | |
8201 p_re = save_p_re; | |
8202 } | |
8203 return result; | |
8204 } | |
8205 | |
8145 int | 8206 int |
8146 vim_regexec(rmp, line, col) | 8207 vim_regexec(rmp, line, col) |
8147 regmatch_T *rmp; | 8208 regmatch_T *rmp; |
8148 char_u *line; /* string to match against */ | 8209 char_u *line; |
8149 colnr_T col; /* column to start looking for match */ | 8210 colnr_T col; |
8150 { | 8211 { |
8151 return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE); | 8212 return vim_regexec_both(rmp, line, col, FALSE); |
8152 } | 8213 } |
8153 | 8214 |
8154 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ | 8215 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ |
8155 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) | 8216 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) |
8156 /* | 8217 /* |
8157 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. | 8218 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. |
8158 */ | 8219 */ |
8159 int | 8220 int |
8160 vim_regexec_nl(rmp, line, col) | 8221 vim_regexec_nl(rmp, line, col) |
8161 regmatch_T *rmp; | 8222 regmatch_T *rmp; |
8162 char_u *line; | 8223 char_u *line; |
8163 colnr_T col; | 8224 colnr_T col; |
8164 { | 8225 { |
8165 return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE); | 8226 return vim_regexec_both(rmp, line, col, TRUE); |
8166 } | 8227 } |
8167 #endif | 8228 #endif |
8168 | 8229 |
8169 /* | 8230 /* |
8170 * Match a regexp against multiple lines. | 8231 * Match a regexp against multiple lines. |
8181 buf_T *buf; /* buffer in which to search */ | 8242 buf_T *buf; /* buffer in which to search */ |
8182 linenr_T lnum; /* nr of line to start looking for match */ | 8243 linenr_T lnum; /* nr of line to start looking for match */ |
8183 colnr_T col; /* column to start looking for match */ | 8244 colnr_T col; /* column to start looking for match */ |
8184 proftime_T *tm; /* timeout limit or NULL */ | 8245 proftime_T *tm; /* timeout limit or NULL */ |
8185 { | 8246 { |
8186 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); | 8247 int result = rmp->regprog->engine->regexec_multi( |
8187 } | 8248 rmp, win, buf, lnum, col, tm); |
8249 | |
8250 /* NFA engine aborted because it's very slow. */ | |
8251 if (rmp->regprog->re_engine == AUTOMATIC_ENGINE | |
8252 && result == NFA_TOO_EXPENSIVE) | |
8253 { | |
8254 int save_p_re = p_re; | |
8255 int re_flags = rmp->regprog->re_flags; | |
8256 char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); | |
8257 | |
8258 p_re = BACKTRACKING_ENGINE; | |
8259 vim_regfree(rmp->regprog); | |
8260 if (pat != NULL) | |
8261 { | |
8262 #ifdef FEAT_EVAL | |
8263 report_re_switch(pat); | |
8264 #endif | |
8265 rmp->regprog = vim_regcomp(pat, re_flags); | |
8266 if (rmp->regprog != NULL) | |
8267 result = rmp->regprog->engine->regexec_multi( | |
8268 rmp, win, buf, lnum, col, tm); | |
8269 vim_free(pat); | |
8270 } | |
8271 p_re = save_p_re; | |
8272 } | |
8273 | |
8274 return result; | |
8275 } |