Mercurial > vim
changeset 4675:811a4c9b51d8 v7.3.1085
updated for version 7.3.1085
Problem: New regexp engine: Non-greedy multi doesn't work.
Solution: Implement \{-}.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sat, 01 Jun 2013 12:40:20 +0200 |
parents | 7099f98528b4 |
children | 9d3768ec9d74 |
files | src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c |
diffstat | 4 files changed, 52 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -38,9 +38,10 @@ enum NFA_CONCAT, NFA_OR, - NFA_STAR, - NFA_QUEST, - NFA_QUEST_NONGREEDY, /* Non-greedy version of \? */ + NFA_STAR, /* greedy * */ + NFA_STAR_NONGREEDY, /* non-greedy * */ + NFA_QUEST, /* greedy \? */ + NFA_QUEST_NONGREEDY, /* non-greedy \? */ NFA_NOT, /* used for [^ab] negated char ranges */ NFA_BOL, /* ^ Begin line */ @@ -1430,16 +1431,17 @@ nfa_regpiece() } /* <atom>{0,inf}, <atom>{0,} and <atom>{} are equivalent to * <atom>* */ - if (minval == 0 && maxval == MAX_LIMIT && greedy) + if (minval == 0 && maxval == MAX_LIMIT) { - EMIT(NFA_STAR); + if (greedy) + /* \{}, \{0,} */ + EMIT(NFA_STAR); + else + /* \{-}, \{-0,} */ + EMIT(NFA_STAR_NONGREEDY); break; } - /* TODO: \{-} doesn't work yet */ - if (maxval == MAX_LIMIT && !greedy) - return FAIL; - /* Special case: x{0} or x{-0} */ if (maxval == 0) { @@ -1470,7 +1472,12 @@ nfa_regpiece() if (i + 1 > minval) { if (maxval == MAX_LIMIT) - EMIT(NFA_STAR); + { + if (greedy) + EMIT(NFA_STAR); + else + EMIT(NFA_STAR_NONGREEDY); + } else EMIT(quest); } @@ -1776,11 +1783,12 @@ nfa_set_code(c) case NFA_EOF: STRCPY(code, "NFA_EOF "); break; case NFA_BOF: STRCPY(code, "NFA_BOF "); break; case NFA_STAR: STRCPY(code, "NFA_STAR "); break; + case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; + case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; + case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; case NFA_NOT: STRCPY(code, "NFA_NOT "); break; case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break; case NFA_OR: STRCPY(code, "NFA_OR"); break; - case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break; - case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break; case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break; case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break; case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break; @@ -2297,7 +2305,7 @@ post2nfa(postfix, end, nfa_calc_size) break; case NFA_STAR: - /* Zero or more */ + /* Zero or more, prefer more */ if (nfa_calc_size == TRUE) { nstate++; @@ -2311,6 +2319,21 @@ post2nfa(postfix, end, nfa_calc_size) PUSH(frag(s, list1(&s->out1))); break; + case NFA_STAR_NONGREEDY: + /* Zero or more, prefer zero */ + if (nfa_calc_size == TRUE) + { + nstate++; + break; + } + e = POP(); + s = new_state(NFA_SPLIT, NULL, e.start); + if (s == NULL) + goto theend; + patch(e.out, s); + PUSH(frag(s, list1(&s->out))); + break; + case NFA_QUEST: /* one or zero atoms=> greedy match */ if (nfa_calc_size == TRUE)
--- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -23,8 +23,8 @@ STARTTEST :call add(tl, [2, 'ab', 'aab', 'ab']) :call add(tl, [2, 'b', 'abcdef', 'b']) :call add(tl, [2, 'bc*', 'abccccdef', 'bcccc']) -:call add(tl, [0, 'bc\{-}', 'abccccdef', 'b']) -:call add(tl, [0, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd']) +:call add(tl, [2, 'bc\{-}', 'abccccdef', 'b']) +:call add(tl, [2, 'bc\{-}\(d\)', 'abccccdef', 'bccccd', 'd']) :call add(tl, [2, 'bc*', 'abbdef', 'b']) :call add(tl, [2, 'c*', 'ccc', 'ccc']) :call add(tl, [2, 'bc*', 'abdef', 'b']) @@ -201,16 +201,16 @@ STARTTEST :call add(tl, [2, 'a\{-0}', 'asoiuj', '']) :call add(tl, [2, 'a\{-2}', 'aaaa', 'aa']) :call add(tl, [2, 'a\{-2}', 'abcdefghijklmnopqrestuvwxyz1234567890']) -:call add(tl, [0, 'a\{-0,}', 'oij sdigfusnf', '']) -:call add(tl, [0, 'a\{-0,}', 'aaaaa aa', '']) +:call add(tl, [2, 'a\{-0,}', 'oij sdigfusnf', '']) +:call add(tl, [2, 'a\{-0,}', 'aaaaa aa', '']) :call add(tl, [2, 'a\{-2,}', 'sdfiougjdsafg']) -:call add(tl, [0, 'a\{-2,}', 'aaaaasfoij ', 'aa']) +:call add(tl, [2, 'a\{-2,}', 'aaaaasfoij ', 'aa']) :call add(tl, [2, 'a\{-,0}', 'oidfguih iuhi hiu aaaa', '']) :call add(tl, [2, 'a\{-,5}', 'abcd', '']) :call add(tl, [2, 'a\{-,5}', 'aaaaaaaaaa', '']) :" anti-greedy version of 'a*' -:call add(tl, [0, 'a\{-}', 'bbbcddiuhfcd', '']) -:call add(tl, [0, 'a\{-}', 'aaaaioudfh coisf jda', '']) +:call add(tl, [2, 'a\{-}', 'bbbcddiuhfcd', '']) +:call add(tl, [2, 'a\{-}', 'aaaaioudfh coisf jda', '']) :" :" Test groups of characters and submatches :call add(tl, [2, '\(abc\)*', 'abcabcabc', 'abcabcabc', 'abc'])
--- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -10,8 +10,10 @@ OK 1 - bc* OK 2 - bc* OK 0 - bc\{-} OK 1 - bc\{-} +OK 2 - bc\{-} OK 0 - bc\{-}\(d\) OK 1 - bc\{-}\(d\) +OK 2 - bc\{-}\(d\) OK 0 - bc* OK 1 - bc* OK 2 - bc* @@ -437,13 +439,16 @@ OK 1 - a\{-2} OK 2 - a\{-2} OK 0 - a\{-0,} OK 1 - a\{-0,} +OK 2 - a\{-0,} OK 0 - a\{-0,} OK 1 - a\{-0,} +OK 2 - a\{-0,} OK 0 - a\{-2,} OK 1 - a\{-2,} OK 2 - a\{-2,} OK 0 - a\{-2,} OK 1 - a\{-2,} +OK 2 - a\{-2,} OK 0 - a\{-,0} OK 1 - a\{-,0} OK 2 - a\{-,0} @@ -455,8 +460,10 @@ OK 1 - a\{-,5} OK 2 - a\{-,5} OK 0 - a\{-} OK 1 - a\{-} +OK 2 - a\{-} OK 0 - a\{-} OK 1 - a\{-} +OK 2 - a\{-} OK 0 - \(abc\)* OK 1 - \(abc\)* OK 2 - \(abc\)*