Mercurial > vim
diff src/regexp_nfa.c @ 4805:66803af09906 v7.3.1149
updated for version 7.3.1149
Problem: New regexp engine: Matching plain text could be faster.
Solution: Detect a plain text match and handle it specifically. Add
vim_regfree().
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sat, 08 Jun 2013 18:19:48 +0200 |
parents | 3cd3cc1e9119 |
children | 3dbd251777de |
line wrap: on
line diff
--- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -270,6 +270,7 @@ static int nfa_ll_index = 0; static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags)); static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth)); static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth)); +static char_u *nfa_get_match_text __ARGS((nfa_state_T *start)); static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); static int nfa_emit_equi_class __ARGS((int c)); static int nfa_regatom __ARGS((void)); @@ -295,6 +296,7 @@ static int nfa_re_num_cmp __ARGS((long_u static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); +static void nfa_regfree __ARGS((regprog_T *prog)); static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); @@ -493,6 +495,52 @@ nfa_get_regstart(start, depth) } /* + * Figure out if the NFA state list contains just literal text and nothing + * else. If so return a string with what must match after regstart. + * Otherwise return NULL. + */ + static char_u * +nfa_get_match_text(start) + nfa_state_T *start; +{ + nfa_state_T *p = start; + int len = 0; + char_u *ret; + char_u *s; + + if (p->c != NFA_MOPEN) + return NULL; /* just in case */ + p = p->out; + while (p->c > 0) + { + len += MB_CHAR2LEN(p->c); + p = p->out; + } + if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH) + return NULL; + + ret = alloc(len); + if (ret != NULL) + { + len = 0; + p = start->out->out; /* skip first char, it goes into regstart */ + s = ret; + while (p->c > 0) + { +#ifdef FEAT_MBYTE + if (has_mbyte) + s += (*mb_char2bytes)(p->c, s); + else +#endif + *s++ = p->c; + p = p->out; + } + *s = NUL; + } + return ret; +} + +/* * Allocate more space for post_start. Called when * running above the estimated number of states. */ @@ -2280,8 +2328,13 @@ nfa_dump(prog) { nfa_print_state(debugf, prog->start); - fprintf(debugf, "reganch: %d\n", prog->reganch); - fprintf(debugf, "regstart: %d\n", prog->regstart); + if (prog->reganch) + fprintf(debugf, "reganch: %d\n", prog->reganch); + if (prog->regstart != NUL) + fprintf(debugf, "regstart: %c (decimal: %d)\n", + prog->regstart, prog->regstart); + if (prog->match_text != NULL) + fprintf(debugf, "match_text: \"%s\"\n", prog->match_text); fclose(debugf); } @@ -4154,6 +4207,7 @@ recursive_regmatch(state, prog, submatch static int failure_chance __ARGS((nfa_state_T *state, int depth)); static int skip_to_start __ARGS((int c, colnr_T *colp)); +static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text)); /* * Estimate the chance of a match with "state" failing. @@ -4331,6 +4385,69 @@ skip_to_start(c, colp) } /* + * Check for a match with match_text. + * Called after skip_to_start() has find regstart. + * Returns zero for no match, 1 for a match. + */ + static long +find_match_text(startcol, regstart, match_text) + colnr_T startcol; + int regstart; + char_u *match_text; +{ + colnr_T col = startcol; + int c1, c2; + int len1, len2; + int match; + + for (;;) + { + match = TRUE; + len2 = MB_CHAR2LEN(regstart); /* skip regstart */ + for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1)) + { + c1 = PTR2CHAR(match_text + len1); + c2 = PTR2CHAR(regline + col + len2); + if (c1 != c2 && (!ireg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2))) + { + match = FALSE; + break; + } + len2 += MB_CHAR2LEN(c2); + } + if (match +#ifdef FEAT_MBYTE + /* check that no composing char follows */ + && !(enc_utf8 + && utf_iscomposing(PTR2CHAR(regline + col + len2))) +#endif + ) + { + cleanup_subexpr(); + if (REG_MULTI) + { + reg_startpos[0].lnum = reglnum; + reg_startpos[0].col = col; + reg_endpos[0].lnum = reglnum; + reg_endpos[0].col = col + len2; + } + else + { + reg_startp[0] = regline + col; + reg_endp[0] = regline + col + len2; + } + return 1L; + } + + /* Try finding regstart after the current match. */ + col += MB_CHAR2LEN(regstart); /* skip regstart */ + if (skip_to_start(regstart, &col) == FAIL) + break; + } + return 0L; +} + +/* * Main matching routine. * * Run NFA to determine whether it matches reginput. @@ -5584,17 +5701,6 @@ nfa_regtry(prog, col) #endif reginput = regline + col; - need_clear_subexpr = TRUE; -#ifdef FEAT_SYN_HL - /* Clear the external match subpointers if necessary. */ - if (prog->reghasz == REX_SET) - { - nfa_has_zsubexpr = TRUE; - need_clear_zsubexpr = TRUE; - } - else - nfa_has_zsubexpr = FALSE; -#endif #ifdef ENABLE_LOG f = fopen(NFA_REGEXP_RUN_LOG, "a"); @@ -5764,12 +5870,31 @@ nfa_regexec_both(line, startcol) if (prog->reganch && col > 0) return 0L; + need_clear_subexpr = TRUE; +#ifdef FEAT_SYN_HL + /* Clear the external match subpointers if necessary. */ + if (prog->reghasz == REX_SET) + { + nfa_has_zsubexpr = TRUE; + need_clear_zsubexpr = TRUE; + } + else + nfa_has_zsubexpr = FALSE; +#endif + if (prog->regstart != NUL) + { /* Skip ahead until a character we know the match must start with. * When there is none there is no match. */ if (skip_to_start(prog->regstart, &col) == FAIL) return 0L; + /* If match_text is set it contains the full text that must match. + * Nothing else to try. Doesn't handle combining chars well. */ + if (prog->match_text != NULL && !ireg_icombine) + return find_match_text(col, prog->regstart, prog->match_text); + } + /* If the start column is past the maximum column: no need to try. */ if (ireg_maxcol > 0 && col >= ireg_maxcol) goto theend; @@ -5876,6 +6001,8 @@ nfa_regcomp(expr, re_flags) prog->reganch = nfa_get_reganch(prog->start, 0); prog->regstart = nfa_get_regstart(prog->start, 0); + prog->match_text = nfa_get_match_text(prog->start); + #ifdef ENABLE_LOG nfa_postfix_dump(expr, OK); nfa_dump(prog); @@ -5885,7 +6012,7 @@ nfa_regcomp(expr, re_flags) prog->reghasz = re_has_z; #endif #ifdef DEBUG - prog->pattern = vim_strsave(expr); /* memory will leak */ + prog->pattern = vim_strsave(expr); nfa_regengine.expr = NULL; #endif @@ -5907,6 +6034,22 @@ fail: goto out; } +/* + * Free a compiled regexp program, returned by nfa_regcomp(). + */ + static void +nfa_regfree(prog) + regprog_T *prog; +{ + if (prog != NULL) + { + vim_free(((nfa_regprog_T *)prog)->match_text); +#ifdef DEBUG + vim_free(((nfa_regprog_T *)prog)->pattern); +#endif + vim_free(prog); + } +} /* * Match a regexp against a string.