Mercurial > vim
changeset 4673:05d57d7c2d55 v7.3.1084
updated for version 7.3.1084
Problem: New regexp engine: only accepts up to \{,10}.
Solution: Remove upper limit. Remove dead code with NFA_PLUS.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Fri, 31 May 2013 23:18:00 +0200 |
parents | eada32f930a3 |
children | 7099f98528b4 |
files | src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c |
diffstat | 4 files changed, 35 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -29,11 +29,6 @@ # define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log" #endif -/* Upper limit allowed for {m,n} repetitions handled by NFA */ -#define NFA_BRACES_MAXLIMIT 10 -/* For allocating space for the postfix representation */ -#define NFA_POSTFIX_MULTIPLIER (NFA_BRACES_MAXLIMIT + 2)*2 - enum { NFA_SPLIT = -1024, @@ -44,7 +39,6 @@ enum NFA_CONCAT, NFA_OR, NFA_STAR, - NFA_PLUS, NFA_QUEST, NFA_QUEST_NONGREEDY, /* Non-greedy version of \? */ NFA_NOT, /* used for [^ab] negated char ranges */ @@ -253,7 +247,7 @@ nfa_regcomp_start(expr, re_flags) nstate = 0; istate = 0; /* A reasonable estimation for maximum size */ - nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; + nstate_max = (int)(STRLEN(expr) + 1) * 25; /* Some items blow up in size, such as [A-z]. Add more space for that. * When it is still not enough realloc_post_list() will be used. */ @@ -1365,10 +1359,9 @@ nfa_regpiece() * string. * The submatch will the empty string. * - * In order to be consistent with the old engine, we disable - * NFA_PLUS, and replace <atom>+ with <atom><atom>* + * In order to be consistent with the old engine, we replace + * <atom>+ with <atom><atom>* */ - /* EMIT(NFA_PLUS); */ regnpar = old_regnpar; regparse = old_regparse; curchr = -1; @@ -1443,12 +1436,9 @@ nfa_regpiece() break; } - if (maxval > NFA_BRACES_MAXLIMIT) - { - /* This would yield a huge automaton and use too much memory. - * Revert to old engine */ + /* TODO: \{-} doesn't work yet */ + if (maxval == MAX_LIMIT && !greedy) return FAIL; - } /* Special case: x{0} or x{-0} */ if (maxval == 0) @@ -1478,9 +1468,16 @@ nfa_regpiece() return FAIL; /* after "minval" times, atoms are optional */ if (i + 1 > minval) - EMIT(quest); + { + if (maxval == MAX_LIMIT) + EMIT(NFA_STAR); + else + EMIT(quest); + } if (old_post_pos != my_post_start) EMIT(NFA_CONCAT); + if (i + 1 > minval && maxval == MAX_LIMIT) + break; } /* Go to just after the repeated atom and the \{} */ @@ -1779,7 +1776,6 @@ nfa_set_code(c) case NFA_EOF: STRCPY(code, "NFA_EOF "); break; case NFA_BOF: STRCPY(code, "NFA_BOF "); break; case NFA_STAR: STRCPY(code, "NFA_STAR "); break; - case NFA_PLUS: STRCPY(code, "NFA_PLUS "); break; case NFA_NOT: STRCPY(code, "NFA_NOT "); break; case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; @@ -2343,21 +2339,6 @@ post2nfa(postfix, end, nfa_calc_size) PUSH(frag(s, append(e.out, list1(&s->out)))); break; - case NFA_PLUS: - /* One or more */ - if (nfa_calc_size == TRUE) - { - nstate++; - break; - } - e = POP(); - s = new_state(NFA_SPLIT, e.start, NULL); - if (s == NULL) - goto theend; - patch(e.out, s); - PUSH(frag(e.start, list1(&s->out1))); - break; - case NFA_SKIP_CHAR: /* Symbol of 0-length, Used in a repetition * with max/min count of 0 */
--- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -182,7 +182,9 @@ STARTTEST :call add(tl, [2, 'a\{0,}', 'oij sdigfusnf', '']) :call add(tl, [2, 'a\{0,}', 'aaaaa aa', 'aaaaa']) :call add(tl, [2, 'a\{2,}', 'sdfiougjdsafg']) -:call add(tl, [0, 'a\{2,}', 'aaaaasfoij ', 'aaaaa']) +:call add(tl, [2, 'a\{2,}', 'aaaaasfoij ', 'aaaaa']) +:call add(tl, [2, 'a\{5,}', 'xxaaaaxxx ']) +:call add(tl, [2, 'a\{5,}', 'xxaaaaaxxx ', 'aaaaa']) :call add(tl, [2, 'a\{,0}', 'oidfguih iuhi hiu aaaa', '']) :call add(tl, [2, 'a\{,5}', 'abcd', 'a']) :call add(tl, [2, 'a\{,5}', 'aaaaaaaaaa', 'aaaaa']) @@ -225,7 +227,9 @@ STARTTEST :" :" Test greedy-ness and lazy-ness :call add(tl, [2, 'a\{-2,7}','aaaaaaaaaaaaa', 'aa']) +:call add(tl, [2, 'a\{-2,7}x','aaaaaaaaax', 'aaaaaaax']) :call add(tl, [2, 'a\{2,7}','aaaaaaaaaaaaaaaaaaaa', 'aaaaaaa']) +:call add(tl, [2, 'a\{2,7}x','aaaaaaaaax', 'aaaaaaax']) :call add(tl, [2, '\vx(.{-,8})yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz','ayxa','xayzxayz']) :call add(tl, [2, '\vx(.*)yz(.*)','xayxayzxayzxayz','xayxayzxayzxayz', 'ayxayzxayzxa','']) :call add(tl, [2, '\v(a{1,2}){-2,3}','aaaaaaa','aaaa','aa']) @@ -366,7 +370,7 @@ STARTTEST :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"]) :" :"""" Requiring lots of states. -:call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"]) +:call add(tl, [2, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"]) :" :" :"""" Run the tests
--- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -389,6 +389,13 @@ OK 1 - a\{2,} OK 2 - a\{2,} OK 0 - a\{2,} OK 1 - a\{2,} +OK 2 - a\{2,} +OK 0 - a\{5,} +OK 1 - a\{5,} +OK 2 - a\{5,} +OK 0 - a\{5,} +OK 1 - a\{5,} +OK 2 - a\{5,} OK 0 - a\{,0} OK 1 - a\{,0} OK 2 - a\{,0} @@ -486,9 +493,15 @@ OK 2 - \v(a|b*)+ OK 0 - a\{-2,7} OK 1 - a\{-2,7} OK 2 - a\{-2,7} +OK 0 - a\{-2,7}x +OK 1 - a\{-2,7}x +OK 2 - a\{-2,7}x OK 0 - a\{2,7} OK 1 - a\{2,7} OK 2 - a\{2,7} +OK 0 - a\{2,7}x +OK 1 - a\{2,7}x +OK 2 - a\{2,7}x OK 0 - \vx(.{-,8})yz(.*) OK 1 - \vx(.{-,8})yz(.*) OK 2 - \vx(.{-,8})yz(.*) @@ -803,6 +816,7 @@ OK 1 - \_[^a]\+ OK 2 - \_[^a]\+ OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} +OK 2 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12} 192.168.0.1 192.168.0.1 192.168.0.1