Mercurial > vim
comparison src/regexp_nfa.c @ 4712:832bf8136d86 v7.3.1103
updated for version 7.3.1103
Problem: New regexp engine: overhead in saving and restoring.
Solution: Make saving and restoring list IDs faster. Don't copy or check \z
subexpressions when they are not used.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sun, 02 Jun 2013 21:30:04 +0200 |
parents | ed4e689bbea1 |
children | fc4d7f02ea3a |
comparison
equal
deleted
inserted
replaced
4711:515ce0356f88 | 4712:832bf8136d86 |
---|---|
235 static int nfa_has_zend; | 235 static int nfa_has_zend; |
236 | 236 |
237 /* NFA regexp \1 .. \9 encountered. */ | 237 /* NFA regexp \1 .. \9 encountered. */ |
238 static int nfa_has_backref; | 238 static int nfa_has_backref; |
239 | 239 |
240 /* NFA regexp has \z( ), set zsubexpr. */ | |
241 static int nfa_has_zsubexpr; | |
242 | |
240 /* Number of sub expressions actually being used during execution. 1 if only | 243 /* Number of sub expressions actually being used during execution. 1 if only |
241 * the whole match (subexpr 0) is used. */ | 244 * the whole match (subexpr 0) is used. */ |
242 static int nfa_nsubexpr; | 245 static int nfa_nsubexpr; |
243 | 246 |
244 static int *post_start; /* holds the postfix form of r.e. */ | 247 static int *post_start; /* holds the postfix form of r.e. */ |
270 static int *re2post __ARGS((void)); | 273 static int *re2post __ARGS((void)); |
271 static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1)); | 274 static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1)); |
272 static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); | 275 static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); |
273 static int check_char_class __ARGS((int class, int c)); | 276 static int check_char_class __ARGS((int class, int c)); |
274 static void st_error __ARGS((int *postfix, int *end, int *p)); | 277 static void st_error __ARGS((int *postfix, int *end, int *p)); |
275 static void nfa_set_neg_listids __ARGS((nfa_state_T *start)); | 278 static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list)); |
276 static void nfa_set_null_listids __ARGS((nfa_state_T *start)); | 279 static void nfa_restore_listids __ARGS((nfa_regprog_T *prog, int *list)); |
277 static void nfa_save_listids __ARGS((nfa_state_T *start, int *list)); | |
278 static void nfa_restore_listids __ARGS((nfa_state_T *start, int *list)); | |
279 static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); | 280 static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); |
280 static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); | 281 static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); |
281 static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); | 282 static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); |
282 static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); | 283 static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); |
283 static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); | 284 static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); |
2998 } | 2999 } |
2999 | 3000 |
3000 return TRUE; | 3001 return TRUE; |
3001 } | 3002 } |
3002 | 3003 |
3004 #ifdef ENABLE_LOG | |
3005 static void | |
3006 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid); | |
3007 { | |
3008 int col; | |
3009 | |
3010 if (sub->in_use <= 0) | |
3011 col = -1; | |
3012 else if (REG_MULTI) | |
3013 col = sub->list.multi[0].start.col; | |
3014 else | |
3015 col = (int)(sub->list.line[0].start - regline); | |
3016 nfa_set_code(state->c); | |
3017 fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)\n", | |
3018 action, abs(state->id), lid, state->c, code, col); | |
3019 } | |
3020 #endif | |
3021 | |
3003 static void | 3022 static void |
3004 addstate(l, state, subs, off) | 3023 addstate(l, state, subs, off) |
3005 nfa_list_T *l; /* runtime state list */ | 3024 nfa_list_T *l; /* runtime state list */ |
3006 nfa_state_T *state; /* state to update */ | 3025 nfa_state_T *state; /* state to update */ |
3007 regsubs_T *subs; /* pointers to subexpressions */ | 3026 regsubs_T *subs; /* pointers to subexpressions */ |
3116 { | 3135 { |
3117 thread = &l->t[i]; | 3136 thread = &l->t[i]; |
3118 if (thread->state->id == state->id | 3137 if (thread->state->id == state->id |
3119 && sub_equal(&thread->subs.norm, &subs->norm) | 3138 && sub_equal(&thread->subs.norm, &subs->norm) |
3120 #ifdef FEAT_SYN_HL | 3139 #ifdef FEAT_SYN_HL |
3121 && sub_equal(&thread->subs.synt, &subs->synt) | 3140 && (!nfa_has_zsubexpr || |
3141 sub_equal(&thread->subs.synt, &subs->synt)) | |
3122 #endif | 3142 #endif |
3123 ) | 3143 ) |
3124 goto skip_add; | 3144 goto skip_add; |
3125 } | 3145 } |
3126 } | 3146 } |
3139 state->lastlist = l->id; | 3159 state->lastlist = l->id; |
3140 thread = &l->t[l->n++]; | 3160 thread = &l->t[l->n++]; |
3141 thread->state = state; | 3161 thread->state = state; |
3142 copy_sub(&thread->subs.norm, &subs->norm); | 3162 copy_sub(&thread->subs.norm, &subs->norm); |
3143 #ifdef FEAT_SYN_HL | 3163 #ifdef FEAT_SYN_HL |
3144 copy_sub(&thread->subs.synt, &subs->synt); | 3164 if (nfa_has_zsubexpr) |
3165 copy_sub(&thread->subs.synt, &subs->synt); | |
3145 #endif | 3166 #endif |
3146 #ifdef ENABLE_LOG | 3167 #ifdef ENABLE_LOG |
3147 { | 3168 report_state("Adding", &thread->subs.norm, state, l->id); |
3148 int col; | 3169 did_print = TRUE; |
3149 | |
3150 if (thread->subs.norm.in_use <= 0) | |
3151 col = -1; | |
3152 else if (REG_MULTI) | |
3153 col = thread->subs.norm.list.multi[0].start.col; | |
3154 else | |
3155 col = (int)(thread->subs.norm.list.line[0].start - regline); | |
3156 nfa_set_code(state->c); | |
3157 fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n", | |
3158 abs(state->id), l->id, state->c, code, col); | |
3159 did_print = TRUE; | |
3160 } | |
3161 #endif | 3170 #endif |
3162 } | 3171 } |
3163 | 3172 |
3164 #ifdef ENABLE_LOG | 3173 #ifdef ENABLE_LOG |
3165 if (!did_print) | 3174 if (!did_print) |
3166 { | 3175 report_state("Processing", &subs->norm, state, l->id); |
3167 int col; | |
3168 | |
3169 if (subs->norm.in_use <= 0) | |
3170 col = -1; | |
3171 else if (REG_MULTI) | |
3172 col = subs->norm.list.multi[0].start.col; | |
3173 else | |
3174 col = (int)(subs->norm.list.line[0].start - regline); | |
3175 nfa_set_code(state->c); | |
3176 fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n", | |
3177 abs(state->id), l->id, state->c, code, col); | |
3178 } | |
3179 #endif | 3176 #endif |
3180 switch (state->c) | 3177 switch (state->c) |
3181 { | 3178 { |
3182 case NFA_MATCH: | 3179 case NFA_MATCH: |
3183 nfa_match = TRUE; | 3180 nfa_match = TRUE; |
3598 return FALSE; | 3595 return FALSE; |
3599 } | 3596 } |
3600 #endif | 3597 #endif |
3601 | 3598 |
3602 /* | 3599 /* |
3603 * Set all NFA nodes' list ID equal to -1. | 3600 * Save list IDs for all NFA states of "prog" into "list". |
3601 * Also reset the IDs to zero. | |
3604 */ | 3602 */ |
3605 static void | 3603 static void |
3606 nfa_set_neg_listids(start) | 3604 nfa_save_listids(prog, list) |
3607 nfa_state_T *start; | 3605 nfa_regprog_T *prog; |
3608 { | |
3609 if (start != NULL && start->lastlist >= 0) | |
3610 { | |
3611 start->lastlist = -1; | |
3612 nfa_set_neg_listids(start->out); | |
3613 nfa_set_neg_listids(start->out1); | |
3614 } | |
3615 } | |
3616 | |
3617 /* | |
3618 * Set all NFA nodes' list ID equal to 0. | |
3619 */ | |
3620 static void | |
3621 nfa_set_null_listids(start) | |
3622 nfa_state_T *start; | |
3623 { | |
3624 if (start != NULL && start->lastlist == -1) | |
3625 { | |
3626 start->lastlist = 0; | |
3627 nfa_set_null_listids(start->out); | |
3628 nfa_set_null_listids(start->out1); | |
3629 } | |
3630 } | |
3631 | |
3632 /* | |
3633 * Save list IDs for all NFA states in "list". | |
3634 */ | |
3635 static void | |
3636 nfa_save_listids(start, list) | |
3637 nfa_state_T *start; | |
3638 int *list; | 3606 int *list; |
3639 { | 3607 { |
3640 if (start != NULL && start->lastlist != -1) | 3608 int i; |
3641 { | 3609 nfa_state_T *p; |
3642 list[abs(start->id)] = start->lastlist; | 3610 |
3643 start->lastlist = -1; | 3611 /* Order in the list is reverse, it's a bit faster that way. */ |
3644 nfa_save_listids(start->out, list); | 3612 p = &prog->state[0]; |
3645 nfa_save_listids(start->out1, list); | 3613 for (i = prog->nstate; --i >= 0; ) |
3614 { | |
3615 list[i] = p->lastlist; | |
3616 p->lastlist = 0; | |
3617 ++p; | |
3646 } | 3618 } |
3647 } | 3619 } |
3648 | 3620 |
3649 /* | 3621 /* |
3650 * Restore list IDs from "list" to all NFA states. | 3622 * Restore list IDs from "list" to all NFA states. |
3651 */ | 3623 */ |
3652 static void | 3624 static void |
3653 nfa_restore_listids(start, list) | 3625 nfa_restore_listids(prog, list) |
3654 nfa_state_T *start; | 3626 nfa_regprog_T *prog; |
3655 int *list; | 3627 int *list; |
3656 { | 3628 { |
3657 if (start != NULL && start->lastlist == -1) | 3629 int i; |
3658 { | 3630 nfa_state_T *p; |
3659 start->lastlist = list[abs(start->id)]; | 3631 |
3660 nfa_restore_listids(start->out, list); | 3632 p = &prog->state[0]; |
3661 nfa_restore_listids(start->out1, list); | 3633 for (i = prog->nstate; --i >= 0; ) |
3634 { | |
3635 p->lastlist = list[i]; | |
3636 ++p; | |
3662 } | 3637 } |
3663 } | 3638 } |
3664 | 3639 |
3665 static int | 3640 static int |
3666 nfa_re_num_cmp(val, op, pos) | 3641 nfa_re_num_cmp(val, op, pos) |
3671 if (op == 1) return pos > val; | 3646 if (op == 1) return pos > val; |
3672 if (op == 2) return pos < val; | 3647 if (op == 2) return pos < val; |
3673 return val == pos; | 3648 return val == pos; |
3674 } | 3649 } |
3675 | 3650 |
3676 static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); | 3651 static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); |
3677 | 3652 |
3678 /* | 3653 /* |
3679 * Main matching routine. | 3654 * Main matching routine. |
3680 * | 3655 * |
3681 * Run NFA to determine whether it matches reginput. | 3656 * Run NFA to determine whether it matches reginput. |
3684 * | 3659 * |
3685 * Return TRUE if there is a match, FALSE otherwise. | 3660 * Return TRUE if there is a match, FALSE otherwise. |
3686 * Note: Caller must ensure that: start != NULL. | 3661 * Note: Caller must ensure that: start != NULL. |
3687 */ | 3662 */ |
3688 static int | 3663 static int |
3689 nfa_regmatch(start, submatch, m) | 3664 nfa_regmatch(prog, start, submatch, m) |
3665 nfa_regprog_T *prog; | |
3690 nfa_state_T *start; | 3666 nfa_state_T *start; |
3691 regsubs_T *submatch; | 3667 regsubs_T *submatch; |
3692 regsubs_T *m; | 3668 regsubs_T *m; |
3693 { | 3669 { |
3694 int result; | 3670 int result; |
3870 case NFA_MATCH: | 3846 case NFA_MATCH: |
3871 { | 3847 { |
3872 nfa_match = TRUE; | 3848 nfa_match = TRUE; |
3873 copy_sub(&submatch->norm, &t->subs.norm); | 3849 copy_sub(&submatch->norm, &t->subs.norm); |
3874 #ifdef FEAT_SYN_HL | 3850 #ifdef FEAT_SYN_HL |
3875 copy_sub(&submatch->synt, &t->subs.synt); | 3851 if (nfa_has_zsubexpr) |
3852 copy_sub(&submatch->synt, &t->subs.synt); | |
3876 #endif | 3853 #endif |
3877 #ifdef ENABLE_LOG | 3854 #ifdef ENABLE_LOG |
3878 log_subsexpr(&t->subs); | 3855 log_subsexpr(&t->subs); |
3879 #endif | 3856 #endif |
3880 /* Found the left-most longest match, do not look at any other | 3857 /* Found the left-most longest match, do not look at any other |
3926 /* do not set submatches for \@! */ | 3903 /* do not set submatches for \@! */ |
3927 if (!t->state->negated) | 3904 if (!t->state->negated) |
3928 { | 3905 { |
3929 copy_sub(&m->norm, &t->subs.norm); | 3906 copy_sub(&m->norm, &t->subs.norm); |
3930 #ifdef FEAT_SYN_HL | 3907 #ifdef FEAT_SYN_HL |
3931 copy_sub(&m->synt, &t->subs.synt); | 3908 if (nfa_has_zsubexpr) |
3909 copy_sub(&m->synt, &t->subs.synt); | |
3932 #endif | 3910 #endif |
3933 } | 3911 } |
3934 nfa_match = TRUE; | 3912 nfa_match = TRUE; |
3935 } | 3913 } |
3936 break; | 3914 break; |
4022 log_fd = NULL; | 4000 log_fd = NULL; |
4023 #endif | 4001 #endif |
4024 /* Have to clear the listid field of the NFA nodes, so that | 4002 /* Have to clear the listid field of the NFA nodes, so that |
4025 * nfa_regmatch() and addstate() can run properly after | 4003 * nfa_regmatch() and addstate() can run properly after |
4026 * recursion. */ | 4004 * recursion. */ |
4027 nfa_save_listids(start, listids); | 4005 nfa_save_listids(prog, listids); |
4028 nfa_set_null_listids(start); | |
4029 nfa_endp = endposp; | 4006 nfa_endp = endposp; |
4030 result = nfa_regmatch(t->state->out, submatch, m); | 4007 result = nfa_regmatch(prog, t->state->out, submatch, m); |
4031 nfa_set_neg_listids(start); | 4008 nfa_restore_listids(prog, listids); |
4032 nfa_restore_listids(start, listids); | |
4033 | 4009 |
4034 /* restore position in input text */ | 4010 /* restore position in input text */ |
4035 reginput = save_reginput; | 4011 reginput = save_reginput; |
4036 regline = save_regline; | 4012 regline = save_regline; |
4037 reglnum = save_reglnum; | 4013 reglnum = save_reglnum; |
4663 reginput = regline + col; | 4639 reginput = regline + col; |
4664 need_clear_subexpr = TRUE; | 4640 need_clear_subexpr = TRUE; |
4665 #ifdef FEAT_SYN_HL | 4641 #ifdef FEAT_SYN_HL |
4666 /* Clear the external match subpointers if necessary. */ | 4642 /* Clear the external match subpointers if necessary. */ |
4667 if (prog->reghasz == REX_SET) | 4643 if (prog->reghasz == REX_SET) |
4644 { | |
4645 nfa_has_zsubexpr = TRUE; | |
4668 need_clear_zsubexpr = TRUE; | 4646 need_clear_zsubexpr = TRUE; |
4647 } | |
4648 else | |
4649 nfa_has_zsubexpr = FALSE; | |
4669 #endif | 4650 #endif |
4670 | 4651 |
4671 #ifdef ENABLE_LOG | 4652 #ifdef ENABLE_LOG |
4672 f = fopen(NFA_REGEXP_RUN_LOG, "a"); | 4653 f = fopen(NFA_REGEXP_RUN_LOG, "a"); |
4673 if (f != NULL) | 4654 if (f != NULL) |
4692 #ifdef FEAT_SYN_HL | 4673 #ifdef FEAT_SYN_HL |
4693 clear_sub(&subs.synt); | 4674 clear_sub(&subs.synt); |
4694 clear_sub(&m.synt); | 4675 clear_sub(&m.synt); |
4695 #endif | 4676 #endif |
4696 | 4677 |
4697 if (nfa_regmatch(start, &subs, &m) == FALSE) | 4678 if (nfa_regmatch(prog, start, &subs, &m) == FALSE) |
4698 return 0; | 4679 return 0; |
4699 | 4680 |
4700 cleanup_subexpr(); | 4681 cleanup_subexpr(); |
4701 if (REG_MULTI) | 4682 if (REG_MULTI) |
4702 { | 4683 { |