Mercurial > vim
changeset 4740:97560c16ca99 v7.3.1117
updated for version 7.3.1117
Problem: New regexp engine: \%[abc] not supported.
Solution: Implement \%[abc]. Add tests.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 05 Jun 2013 11:05:17 +0200 |
parents | a74d6e9f580c |
children | 72ab0f628a55 |
files | src/regexp_nfa.c src/testdir/test64.in src/testdir/test64.ok src/version.c |
diffstat | 4 files changed, 116 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -61,6 +61,7 @@ enum NFA_COMPOSING, /* Next nodes in NFA are part of the composing multibyte char */ NFA_END_COMPOSING, /* End of a composing char in the NFA */ + NFA_OPT_CHARS, /* \%[abc] */ /* The following are used only in the postfix form, not in the NFA */ NFA_PREV_ATOM_NO_WIDTH, /* Used for \@= */ @@ -972,8 +973,21 @@ nfa_regatom() #endif case '[': - /* TODO: \%[abc] not supported yet */ - return FAIL; + { + int n; + + /* \%[abc] */ + for (n = 0; (c = getchr()) != ']'; ++n) + { + if (c == NUL) + EMSG2_RET_FAIL(_(e_missing_sb), + reg_magic == MAGIC_ALL); + EMIT(c); + } + EMIT(NFA_OPT_CHARS); + EMIT(n); + break; + } default: { @@ -989,7 +1003,6 @@ nfa_regatom() } if (c == 'l' || c == 'c' || c == 'v') { - EMIT(n); if (c == 'l') /* \%{n}l \%{n}<l \%{n}>l */ EMIT(cmp == '<' ? NFA_LNUM_LT : @@ -1002,14 +1015,15 @@ nfa_regatom() /* \%{n}v \%{n}<v \%{n}>v */ EMIT(cmp == '<' ? NFA_VCOL_LT : cmp == '>' ? NFA_VCOL_GT : NFA_VCOL); + EMIT(n); break; } else if (c == '\'' && n == 0) { /* \%'m \%<'m \%>'m */ - EMIT(getchr()); EMIT(cmp == '<' ? NFA_MARK_LT : cmp == '>' ? NFA_MARK_GT : NFA_MARK); + EMIT(getchr()); break; } } @@ -1885,6 +1899,7 @@ nfa_set_code(c) case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; + case NFA_OPT_CHARS: STRCPY(code, "NFA_OPT_CHARS"); break; case NFA_MOPEN: case NFA_MOPEN1: @@ -2558,10 +2573,49 @@ post2nfa(postfix, end, nfa_calc_size) PUSH(frag(s, list1(&s->out))); break; + case NFA_OPT_CHARS: + { + int n; + + /* \%[abc] */ + n = *++p; /* get number of characters */ + if (nfa_calc_size == TRUE) + { + nstate += n; + break; + } + e1.out = NULL; /* stores list with out1's */ + s1 = NULL; /* previous NFA_SPLIT to connect to */ + while (n-- > 0) + { + e = POP(); /* get character */ + s = alloc_state(NFA_SPLIT, e.start, NULL); + if (s == NULL) + goto theend; + if (e1.out == NULL) + e1 = e; + patch(e.out, s1); + append(e1.out, list1(&s->out1)); + s1 = s; + } + PUSH(frag(s, e1.out)); + break; + } + case NFA_PREV_ATOM_NO_WIDTH: case NFA_PREV_ATOM_NO_WIDTH_NEG: case NFA_PREV_ATOM_JUST_BEFORE: case NFA_PREV_ATOM_JUST_BEFORE_NEG: + { + int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG + || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); + int before = (*p == NFA_PREV_ATOM_JUST_BEFORE + || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); + int n; + + if (before) + n = *++p; /* get the count */ + /* The \@= operator: match the preceding atom with zero width. * The \@! operator: no match for the preceding atom. * The \@<= operator: match for the preceding atom. @@ -2583,21 +2637,20 @@ post2nfa(postfix, end, nfa_calc_size) s = alloc_state(NFA_START_INVISIBLE, e.start, s1); if (s == NULL) goto theend; - if (*p == NFA_PREV_ATOM_NO_WIDTH_NEG - || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG) + if (neg) { s->negated = TRUE; s1->negated = TRUE; } - if (*p == NFA_PREV_ATOM_JUST_BEFORE - || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG) + if (before) { - s->val = *++p; /* get the count */ + s->val = n; /* store the count */ ++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */ } PUSH(frag(s, list1(&s1->out))); break; + } #ifdef FEAT_MBYTE case NFA_COMPOSING: /* char with composing char */ @@ -2750,18 +2803,21 @@ post2nfa(postfix, end, nfa_calc_size) case NFA_MARK: case NFA_MARK_GT: case NFA_MARK_LT: + { + int n = *++p; /* lnum, col or mark name */ + if (nfa_calc_size == TRUE) { nstate += 1; break; } - e1 = POP(); - s = alloc_state(*p, NULL, NULL); + s = alloc_state(p[-1], NULL, NULL); if (s == NULL) goto theend; - s->val = e1.start->c; /* lnum, col or mark name */ + s->val = n; PUSH(frag(s, list1(&s->out))); break; + } case NFA_ZSTART: case NFA_ZEND:
--- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -352,6 +352,19 @@ STARTTEST :call add(tl, [2, '\%u0020', 'yes no', ' ']) :call add(tl, [2, '\%U00000020', 'yes no', ' ']) :" +:""""" \%[abc] +:call add(tl, [2, 'foo\%[bar]', 'fobar']) +:call add(tl, [2, 'foo\%[bar]', 'foobar', 'foobar']) +:call add(tl, [2, 'foo\%[bar]', 'fooxx', 'foo']) +:call add(tl, [2, 'foo\%[bar]', 'foobxx', 'foob']) +:call add(tl, [2, 'foo\%[bar]', 'foobaxx', 'fooba']) +:call add(tl, [2, 'foo\%[bar]', 'foobarxx', 'foobar']) +:call add(tl, [2, 'foo\%[bar]x', 'foobxx', 'foobx']) +:call add(tl, [2, 'foo\%[bar]x', 'foobarxx', 'foobarx']) +:call add(tl, [2, '\%[bar]x', 'barxx', 'barx']) +:call add(tl, [2, '\%[bar]x', 'bxx', 'bx']) +:call add(tl, [2, '\%[bar]x', 'xxx', 'x']) +:" :"""" Alternatives, must use first longest match :call add(tl, [2, 'goo\|go', 'google', 'goo']) :call add(tl, [2, '\<goo\|\<go', 'google', 'goo'])
--- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -797,6 +797,39 @@ OK 2 - \%u0020 OK 0 - \%U00000020 OK 1 - \%U00000020 OK 2 - \%U00000020 +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar] +OK 1 - foo\%[bar] +OK 2 - foo\%[bar] +OK 0 - foo\%[bar]x +OK 1 - foo\%[bar]x +OK 2 - foo\%[bar]x +OK 0 - foo\%[bar]x +OK 1 - foo\%[bar]x +OK 2 - foo\%[bar]x +OK 0 - \%[bar]x +OK 1 - \%[bar]x +OK 2 - \%[bar]x +OK 0 - \%[bar]x +OK 1 - \%[bar]x +OK 2 - \%[bar]x +OK 0 - \%[bar]x +OK 1 - \%[bar]x +OK 2 - \%[bar]x OK 0 - goo\|go OK 1 - goo\|go OK 2 - goo\|go