# HG changeset patch # User Bram Moolenaar # Date 1415194056 -3600 # Node ID adfbffe1e642d8779872ced560c13f3e62a39819 # Parent 896d12bf73c1eab6290c85fea15052844bd0ee16 updated for version 7.4.497 Problem: With some regexp patterns the NFA engine uses many states and becomes very slow. To the user it looks like Vim freezes. Solution: When the number of states reaches a limit fall back to the old engine. (Christian Brabandt) diff --git a/Filelist b/Filelist --- a/Filelist +++ b/Filelist @@ -102,6 +102,9 @@ SRC_ALL = \ src/testdir/pythonx/topmodule/submodule/subsubmodule/subsubsubmodule.py \ src/testdir/python_after/*.py \ src/testdir/python_before/*.py \ + src/testdir/bench*.in \ + src/testdir/bench*.vim \ + src/testdir/samples.*.txt \ src/proto.h \ src/proto/blowfish.pro \ src/proto/buffer.pro \ diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -5626,6 +5626,10 @@ A jump table for the options with a shor Note that when using the NFA engine and the pattern contains something that is not supported the pattern will not match. This is only useful for debugging the regexp engine. + Using automatic selection enables Vim to switch the engine, if the + default engine becomes too costly. E.g., when the NFA engine uses too + many states. This should prevent Vim from hanging on a combination of + a complex pattern with long text. *'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'* 'relativenumber' 'rnu' boolean (default off) diff --git a/src/Makefile b/src/Makefile --- a/src/Makefile +++ b/src/Makefile @@ -1879,6 +1879,9 @@ test check: cd testdir; $(MAKE) -f Makefile $(GUI_TESTTARGET) VIMPROG=../$(VIMTARGET) $(GUI_TESTARG) SCRIPTSOURCE=../$(SCRIPTSOURCE) $(MAKE) -f Makefile unittest +benchmark: + cd testdir; $(MAKE) -f Makefile benchmark VIMPROG=../$(VIMTARGET) SCRIPTSOURCE=../$(SCRIPTSOURCE) + unittesttargets: $(MAKE) -f Makefile $(UNITTEST_TARGETS) diff --git a/src/regexp.c b/src/regexp.c --- a/src/regexp.c +++ b/src/regexp.c @@ -8011,13 +8011,10 @@ static regengine_T bt_regengine = bt_regcomp, bt_regfree, bt_regexec_nl, - bt_regexec_multi -#ifdef DEBUG - ,(char_u *)"" -#endif + bt_regexec_multi, + (char_u *)"" }; - #include "regexp_nfa.c" static regengine_T nfa_regengine = @@ -8025,18 +8022,14 @@ static regengine_T nfa_regengine = nfa_regcomp, nfa_regfree, nfa_regexec_nl, - nfa_regexec_multi -#ifdef DEBUG - ,(char_u *)"" -#endif + nfa_regexec_multi, + (char_u *)"" }; /* Which regexp engine to use? Needed for vim_regcomp(). * Must match with 'regexpengine'. */ static int regexp_engine = 0; -#define AUTOMATIC_ENGINE 0 -#define BACKTRACKING_ENGINE 1 -#define NFA_ENGINE 2 + #ifdef DEBUG static char_u regname[][30] = { "AUTOMATIC Regexp Engine", @@ -8083,10 +8076,8 @@ vim_regcomp(expr_arg, re_flags) regexp_engine = AUTOMATIC_ENGINE; } } -#ifdef DEBUG bt_regengine.expr = expr; nfa_regengine.expr = expr; -#endif /* * First try the NFA engine, unless backtracking was requested. @@ -8096,7 +8087,8 @@ vim_regcomp(expr_arg, re_flags) else prog = bt_regengine.regcomp(expr, re_flags); - if (prog == NULL) /* error compiling regexp with initial engine */ + /* Check for error compiling regexp with initial engine. */ + if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ @@ -8114,13 +8106,27 @@ vim_regcomp(expr_arg, re_flags) } #endif /* - * If the NFA engine failed, the backtracking engine won't work either. + * If the NFA engine failed, try the backtracking engine. + * Disabled for now, both engines fail on the same patterns. + * Re-enable when regcomp() fails when the pattern would work better + * with the other engine. * if (regexp_engine == AUTOMATIC_ENGINE) + { prog = bt_regengine.regcomp(expr, re_flags); + regexp_engine == BACKTRACKING_ENGINE; + } */ } + if (prog != NULL) + { + /* Store the info needed to call regcomp() again when the engine turns + * out to be very slow when executing it. */ + prog->re_engine = regexp_engine; + prog->re_flags = re_flags; + } + return prog; } @@ -8135,20 +8141,75 @@ vim_regfree(prog) prog->engine->regfree(prog); } +#ifdef FEAT_EVAL +static void report_re_switch __ARGS((char_u *pat)); + + static void +report_re_switch(pat) + char_u *pat; +{ + if (p_verbose > 0) + { + verbose_enter(); + MSG_PUTS(_("Switching to backtracking RE engine for pattern: ")); + MSG_PUTS(pat); + verbose_leave(); + } +} +#endif + +static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl)); + /* * Match a regexp against a string. * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). * Uses curbuf for line count and 'iskeyword'. + * When "nl" is TRUE consider a "\n" in "line" to be a line break. * * Return TRUE if there is a match, FALSE if not. */ + static int +vim_regexec_both(rmp, line, col, nl) + regmatch_T *rmp; + char_u *line; /* string to match against */ + colnr_T col; /* column to start looking for match */ + int nl; +{ + int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + + /* NFA engine aborted because it's very slow. */ + if (rmp->regprog->re_engine == AUTOMATIC_ENGINE + && result == NFA_TOO_EXPENSIVE) + { + int save_p_re = p_re; + int re_flags = rmp->regprog->re_flags; + char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); + + p_re = BACKTRACKING_ENGINE; + vim_regfree(rmp->regprog); + if (pat != NULL) + { +#ifdef FEAT_EVAL + report_re_switch(pat); +#endif + rmp->regprog = vim_regcomp(pat, re_flags); + if (rmp->regprog != NULL) + result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + vim_free(pat); + } + + p_re = save_p_re; + } + return result; +} + int vim_regexec(rmp, line, col) - regmatch_T *rmp; - char_u *line; /* string to match against */ - colnr_T col; /* column to start looking for match */ + regmatch_T *rmp; + char_u *line; + colnr_T col; { - return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE); + return vim_regexec_both(rmp, line, col, FALSE); } #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ @@ -8158,11 +8219,11 @@ vim_regexec(rmp, line, col) */ int vim_regexec_nl(rmp, line, col) - regmatch_T *rmp; - char_u *line; - colnr_T col; + regmatch_T *rmp; + char_u *line; + colnr_T col; { - return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE); + return vim_regexec_both(rmp, line, col, TRUE); } #endif @@ -8183,5 +8244,32 @@ vim_regexec_multi(rmp, win, buf, lnum, c colnr_T col; /* column to start looking for match */ proftime_T *tm; /* timeout limit or NULL */ { - return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); + int result = rmp->regprog->engine->regexec_multi( + rmp, win, buf, lnum, col, tm); + + /* NFA engine aborted because it's very slow. */ + if (rmp->regprog->re_engine == AUTOMATIC_ENGINE + && result == NFA_TOO_EXPENSIVE) + { + int save_p_re = p_re; + int re_flags = rmp->regprog->re_flags; + char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); + + p_re = BACKTRACKING_ENGINE; + vim_regfree(rmp->regprog); + if (pat != NULL) + { +#ifdef FEAT_EVAL + report_re_switch(pat); +#endif + rmp->regprog = vim_regcomp(pat, re_flags); + if (rmp->regprog != NULL) + result = rmp->regprog->engine->regexec_multi( + rmp, win, buf, lnum, col, tm); + vim_free(pat); + } + p_re = save_p_re; + } + + return result; } diff --git a/src/regexp.h b/src/regexp.h --- a/src/regexp.h +++ b/src/regexp.h @@ -27,6 +27,18 @@ */ #define NFA_MAX_BRACES 20 +/* + * In the NFA engine: how many states are allowed + */ +#define NFA_MAX_STATES 100000 +#define NFA_TOO_EXPENSIVE -1 + +/* Which regexp engine to use? Needed for vim_regcomp(). + * Must match with 'regexpengine'. */ +#define AUTOMATIC_ENGINE 0 +#define BACKTRACKING_ENGINE 1 +#define NFA_ENGINE 2 + typedef struct regengine regengine_T; /* @@ -38,6 +50,8 @@ typedef struct regprog { regengine_T *engine; unsigned regflags; + unsigned re_engine; /* automatic, backtracking or nfa engine */ + unsigned re_flags; /* second argument for vim_regcomp() */ } regprog_T; /* @@ -47,9 +61,11 @@ typedef struct regprog */ typedef struct { - /* These two members implement regprog_T */ + /* These four members implement regprog_T */ regengine_T *engine; unsigned regflags; + unsigned re_engine; + unsigned re_flags; /* second argument for vim_regcomp() */ int regstart; char_u reganch; @@ -81,9 +97,11 @@ struct nfa_state */ typedef struct { - /* These two members implement regprog_T */ + /* These three members implement regprog_T */ regengine_T *engine; unsigned regflags; + unsigned re_engine; + unsigned re_flags; /* second argument for vim_regcomp() */ nfa_state_T *start; /* points into state[] */ @@ -96,9 +114,7 @@ typedef struct #ifdef FEAT_SYN_HL int reghasz; #endif -#ifdef DEBUG char_u *pattern; -#endif int nsubexp; /* number of () */ int nstate; nfa_state_T state[1]; /* actually longer.. */ @@ -151,9 +167,7 @@ struct regengine void (*regfree)(regprog_T *); int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int); long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*); -#ifdef DEBUG char_u *expr; -#endif }; #endif /* _REGEXP_H */ diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5522,6 +5522,13 @@ nfa_regmatch(prog, start, submatch, m) nextlist->n = 0; /* clear nextlist */ nextlist->has_pim = FALSE; ++nfa_listid; + if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) + { + /* too many states, retry with old engine */ + nfa_match = NFA_TOO_EXPENSIVE; + goto theend; + } + thislist->id = nfa_listid; nextlist->id = nfa_listid + 1; @@ -5704,6 +5711,11 @@ nfa_regmatch(prog, start, submatch, m) */ result = recursive_regmatch(t->state, NULL, prog, submatch, m, &listids); + if (result == NFA_TOO_EXPENSIVE) + { + nfa_match = result; + goto theend; + } /* for \@! and \@state, NULL, prog, submatch, m, &listids); + if (result == NFA_TOO_EXPENSIVE) + { + nfa_match = result; + goto theend; + } if (result) { int bytelen; @@ -6760,6 +6777,7 @@ nfa_regtry(prog, col) int i; regsubs_T subs, m; nfa_state_T *start = prog->start; + int result; #ifdef ENABLE_LOG FILE *f; #endif @@ -6791,8 +6809,11 @@ nfa_regtry(prog, col) clear_sub(&m.synt); #endif - if (nfa_regmatch(prog, start, &subs, &m) == FALSE) + result = nfa_regmatch(prog, start, &subs, &m); + if (result == FALSE) return 0; + else if (result == NFA_TOO_EXPENSIVE) + return result; cleanup_subexpr(); if (REG_MULTI) @@ -6929,9 +6950,7 @@ nfa_regexec_both(line, startcol) nfa_nsubexpr = prog->nsubexp; nfa_listid = 1; nfa_alt_listid = 2; -#ifdef DEBUG nfa_regengine.expr = prog->pattern; -#endif if (prog->reganch && col > 0) return 0L; @@ -6979,9 +6998,7 @@ nfa_regexec_both(line, startcol) retval = nfa_regtry(prog, col); -#ifdef DEBUG nfa_regengine.expr = NULL; -#endif theend: return retval; @@ -7003,9 +7020,7 @@ nfa_regcomp(expr, re_flags) if (expr == NULL) return NULL; -#ifdef DEBUG nfa_regengine.expr = expr; -#endif init_class_tab(); @@ -7082,10 +7097,8 @@ nfa_regcomp(expr, re_flags) /* Remember whether this pattern has any \z specials in it. */ prog->reghasz = re_has_z; #endif -#ifdef DEBUG prog->pattern = vim_strsave(expr); nfa_regengine.expr = NULL; -#endif out: vim_free(post_start); @@ -7099,9 +7112,7 @@ fail: #ifdef ENABLE_LOG nfa_postfix_dump(expr, FAIL); #endif -#ifdef DEBUG nfa_regengine.expr = NULL; -#endif goto out; } @@ -7115,9 +7126,7 @@ nfa_regfree(prog) if (prog != NULL) { vim_free(((nfa_regprog_T *)prog)->match_text); -#ifdef DEBUG vim_free(((nfa_regprog_T *)prog)->pattern); -#endif vim_free(prog); } } diff --git a/src/testdir/Make_dos.mak b/src/testdir/Make_dos.mak --- a/src/testdir/Make_dos.mak +++ b/src/testdir/Make_dos.mak @@ -87,6 +87,7 @@ clean: -if exist Xfind rd /s /q Xfind -if exist viminfo del viminfo -del test.log + -if exists benchmark.out del benchmark.out .in.out: -if exist $*.failed del $*.failed @@ -103,3 +104,11 @@ clean: nolog: -del test.log + +benchmark: + bench_re_freeze.out + +bench_re_freeze.out: bench_re_freeze.vim + -if exist benchmark.out del benchmark.out + $(VIMPROG) -u dos.vim -U NONE --noplugin $*.in + @IF EXIST benchmark.out ( type benchmark.out ) diff --git a/src/testdir/Make_ming.mak b/src/testdir/Make_ming.mak --- a/src/testdir/Make_ming.mak +++ b/src/testdir/Make_ming.mak @@ -12,11 +12,13 @@ ifneq (sh.exe, $(SHELL)) DEL = rm -f MV = mv CP = cp +CAT = cat DIRSLASH = / else DEL = del MV = rename CP = copy +CAT = type DIRSLASH = \\ endif @@ -72,6 +74,8 @@ SCRIPTS32 = test50.out test70.out SCRIPTS_GUI = test16.out +SCRIPTS_BENCH = bench_re_freeze.out + .SUFFIXES: .in .out vimall: fixff $(SCRIPTS16) $(SCRIPTS) $(SCRIPTS_GUI) $(SCRIPTS32) @@ -80,6 +84,8 @@ vimall: fixff $(SCRIPTS16) $(SCRIPTS) $( nongui: fixff $(SCRIPTS16) $(SCRIPTS) echo ALL DONE +benchmark: $(SCRIPTS_BENCH) + small: echo ALL DONE @@ -114,3 +120,8 @@ clean: -$(DEL) X* -$(DEL) test.ok -$(DEL) viminfo + +bench_re_freeze.out: bench_re_freeze.vim + -$(DEL) benchmark.out + $(VIMPROG) -u dos.vim -U NONE --noplugin $*.in + $(CAT) benchmark.out diff --git a/src/testdir/Make_os2.mak b/src/testdir/Make_os2.mak --- a/src/testdir/Make_os2.mak +++ b/src/testdir/Make_os2.mak @@ -50,6 +50,8 @@ SCRIPTS = test1.out test3.out test4.out test_signs.out \ test_utf8.out +SCRIPTS_BENCH = bench_re_freeze.out + .SUFFIXES: .in .out all: /tmp $(SCRIPTS) @@ -57,6 +59,8 @@ all: /tmp $(SCRIPTS) $(SCRIPTS): $(VIMPROG) +benchmark: $(SCRIPTS_BENCH) + clean: -rm -rf *.out Xdotest test.ok tiny.vim small.vim mbyte.vim viminfo @@ -75,3 +79,10 @@ clean: # Create a directory for temp files /tmp: -mkdir /tmp + +bench_re_freeze.out: bench_re_freeze.vim + -del $*.failed test.ok benchmark.out + copy $*.ok test.ok + $(VIMPROG) -u os2.vim --noplugin -s dotest.in $*.in + type benchmark.out + diff --git a/src/testdir/Makefile b/src/testdir/Makefile --- a/src/testdir/Makefile +++ b/src/testdir/Makefile @@ -48,12 +48,16 @@ SCRIPTS = test1.out test2.out test3.out SCRIPTS_GUI = test16.out +SCRIPTS_BENCH = bench_re_freeze.out + .SUFFIXES: .in .out nongui: nolog $(SCRIPTS) report gui: nolog $(SCRIPTS) $(SCRIPTS_GUI) report +benchmark: $(SCRIPTS_BENCH) + report: @echo @echo 'Test results:' @@ -65,7 +69,7 @@ report: $(SCRIPTS) $(SCRIPTS_GUI): $(VIMPROG) RM_ON_RUN = test.out X* viminfo -RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok +RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok benchmark.out RUN_VIM = VIMRUNTIME=$(SCRIPTSOURCE); export VIMRUNTIME; $(VALGRIND) $(VIMPROG) -u unix.vim -U NONE --noplugin -s dotest.in clean: @@ -120,5 +124,14 @@ test49.out: test49.vim test60.out: test60.vim +bench_re_freeze.out: bench_re_freeze.vim + -rm -rf benchmark.out $(RM_ON_RUN) + # Sleep a moment to avoid that the xterm title is messed up. + # 200 msec is sufficient, but only modern sleep supports a fraction of + # a second, fall back to a second if it fails. + @-/bin/sh -c "sleep .2 > /dev/null 2>&1 || sleep 1" + -$(RUN_VIM) $*.in + @/bin/sh -c "if test -f benchmark.out; then cat benchmark.out; fi" + nolog: -rm -f test.log diff --git a/src/testdir/bench_re_freeze.in b/src/testdir/bench_re_freeze.in new file mode 100644 --- /dev/null +++ b/src/testdir/bench_re_freeze.in @@ -0,0 +1,13 @@ +Test for Benchmarking RE engine + +STARTTEST +:so small.vim +:if !has("reltime") | qa! | endif +:set nocp cpo&vim +:so bench_re_freeze.vim +:call Measure('samples/re.freeze.txt', '\s\+\%#\@55555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555 + diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -742,6 +742,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 497, +/**/ 496, /**/ 495,