# HG changeset patch # User Bram Moolenaar # Date 1375375106 -7200 # Node ID 3c6e2b89875f39c8417021aa369695ec1ba762df # Parent d17b182a73a468c1d8badd914b59fd9e9ad99b69 updated for version 7.4b.004 Problem: Regexp crash on pattern "@\%[\w\-]*". (Axel Kielhorn) Solution: Add \%(\) around \%[] internally. diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -1166,6 +1166,15 @@ nfa_regatom() reg_magic == MAGIC_ALL); EMIT(NFA_OPT_CHARS); EMIT(n); + + /* Emit as "\%(\%[abc]\)" to be able to handle + * "\%[abc]*" which would cause the empty string to be + * matched an unlimited number of times. NFA_NOPEN is + * added only once at a position, while NFA_SPLIT is + * added multiple times. This is more efficient than + * not allowsing NFA_SPLIT multiple times, it is used + * a lot. */ + EMIT(NFA_NOPEN); break; } @@ -1641,7 +1650,7 @@ nfa_regpiece() * engine interprets the plus as "try matching one more time", and * a* matches a second time at the end of the input, the empty * string. - * The submatch will the empty string. + * The submatch will be the empty string. * * In order to be consistent with the old engine, we replace * + with * @@ -2242,13 +2251,13 @@ nfa_postfix_dump(expr, retval) else if (retval == OK) fprintf(f, ">>> NFA engine succeeded !\n"); fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr); - for (p = post_start; *p && p < post_end; p++) + for (p = post_start; *p && p < post_ptr; p++) { nfa_set_code(*p); fprintf(f, "%s, ", code); } fprintf(f, "\"\nPostfix notation (int): "); - for (p = post_start; *p && p < post_end; p++) + for (p = post_start; *p && p < post_ptr; p++) fprintf(f, "%d ", *p); fprintf(f, "\n\n"); fclose(f); @@ -3005,7 +3014,18 @@ post2nfa(postfix, end, nfa_calc_size) { int n; - /* \%[abc] */ + /* \%[abc] implemented as: + * NFA_SPLIT + * +-CHAR(a) + * | +-NFA_SPLIT + * | +-CHAR(b) + * | | +-NFA_SPLIT + * | | +-CHAR(c) + * | | | +-next + * | | +- next + * | +- next + * +- next + */ n = *++p; /* get number of characters */ if (nfa_calc_size == TRUE) { diff --git a/src/testdir/test64.in b/src/testdir/test64.in --- a/src/testdir/test64.in +++ b/src/testdir/test64.in @@ -373,6 +373,7 @@ STARTTEST :call add(tl, [2, '\%[bar]x', 'xxx', 'x']) :call add(tl, [2, 'b\%[[ao]r]', 'bar bor', 'bar']) :call add(tl, [2, 'b\%[[]]r]', 'b]r bor', 'b]r']) +:call add(tl, [2, '@\%[\w\-]*', '[@pandoc]', '@pandoc']) :" :"""" Alternatives, must use first longest match :call add(tl, [2, 'goo\|go', 'google', 'goo']) diff --git a/src/testdir/test64.ok b/src/testdir/test64.ok --- a/src/testdir/test64.ok +++ b/src/testdir/test64.ok @@ -857,6 +857,9 @@ OK 2 - b\%[[ao]r] OK 0 - b\%[[]]r] OK 1 - b\%[[]]r] OK 2 - b\%[[]]r] +OK 0 - @\%[\w\-]* +OK 1 - @\%[\w\-]* +OK 2 - @\%[\w\-]* OK 0 - goo\|go OK 1 - goo\|go OK 2 - goo\|go diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -728,6 +728,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 4, +/**/ 3, /**/ 2,