comparison src/regexp_nfa.c @ 4712:832bf8136d86 v7.3.1103

updated for version 7.3.1103 Problem: New regexp engine: overhead in saving and restoring. Solution: Make saving and restoring list IDs faster. Don't copy or check \z subexpressions when they are not used.
author Bram Moolenaar <bram@vim.org>
date Sun, 02 Jun 2013 21:30:04 +0200
parents ed4e689bbea1
children fc4d7f02ea3a
comparison
equal deleted inserted replaced
4711:515ce0356f88 4712:832bf8136d86
235 static int nfa_has_zend; 235 static int nfa_has_zend;
236 236
237 /* NFA regexp \1 .. \9 encountered. */ 237 /* NFA regexp \1 .. \9 encountered. */
238 static int nfa_has_backref; 238 static int nfa_has_backref;
239 239
240 /* NFA regexp has \z( ), set zsubexpr. */
241 static int nfa_has_zsubexpr;
242
240 /* Number of sub expressions actually being used during execution. 1 if only 243 /* Number of sub expressions actually being used during execution. 1 if only
241 * the whole match (subexpr 0) is used. */ 244 * the whole match (subexpr 0) is used. */
242 static int nfa_nsubexpr; 245 static int nfa_nsubexpr;
243 246
244 static int *post_start; /* holds the postfix form of r.e. */ 247 static int *post_start; /* holds the postfix form of r.e. */
270 static int *re2post __ARGS((void)); 273 static int *re2post __ARGS((void));
271 static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1)); 274 static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1));
272 static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); 275 static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size));
273 static int check_char_class __ARGS((int class, int c)); 276 static int check_char_class __ARGS((int class, int c));
274 static void st_error __ARGS((int *postfix, int *end, int *p)); 277 static void st_error __ARGS((int *postfix, int *end, int *p));
275 static void nfa_set_neg_listids __ARGS((nfa_state_T *start)); 278 static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list));
276 static void nfa_set_null_listids __ARGS((nfa_state_T *start)); 279 static void nfa_restore_listids __ARGS((nfa_regprog_T *prog, int *list));
277 static void nfa_save_listids __ARGS((nfa_state_T *start, int *list));
278 static void nfa_restore_listids __ARGS((nfa_state_T *start, int *list));
279 static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); 280 static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos));
280 static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); 281 static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col));
281 static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); 282 static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
282 static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags)); 283 static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags));
283 static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col)); 284 static int nfa_regexec __ARGS((regmatch_T *rmp, char_u *line, colnr_T col));
2998 } 2999 }
2999 3000
3000 return TRUE; 3001 return TRUE;
3001 } 3002 }
3002 3003
3004 #ifdef ENABLE_LOG
3005 static void
3006 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid);
3007 {
3008 int col;
3009
3010 if (sub->in_use <= 0)
3011 col = -1;
3012 else if (REG_MULTI)
3013 col = sub->list.multi[0].start.col;
3014 else
3015 col = (int)(sub->list.line[0].start - regline);
3016 nfa_set_code(state->c);
3017 fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)\n",
3018 action, abs(state->id), lid, state->c, code, col);
3019 }
3020 #endif
3021
3003 static void 3022 static void
3004 addstate(l, state, subs, off) 3023 addstate(l, state, subs, off)
3005 nfa_list_T *l; /* runtime state list */ 3024 nfa_list_T *l; /* runtime state list */
3006 nfa_state_T *state; /* state to update */ 3025 nfa_state_T *state; /* state to update */
3007 regsubs_T *subs; /* pointers to subexpressions */ 3026 regsubs_T *subs; /* pointers to subexpressions */
3116 { 3135 {
3117 thread = &l->t[i]; 3136 thread = &l->t[i];
3118 if (thread->state->id == state->id 3137 if (thread->state->id == state->id
3119 && sub_equal(&thread->subs.norm, &subs->norm) 3138 && sub_equal(&thread->subs.norm, &subs->norm)
3120 #ifdef FEAT_SYN_HL 3139 #ifdef FEAT_SYN_HL
3121 && sub_equal(&thread->subs.synt, &subs->synt) 3140 && (!nfa_has_zsubexpr ||
3141 sub_equal(&thread->subs.synt, &subs->synt))
3122 #endif 3142 #endif
3123 ) 3143 )
3124 goto skip_add; 3144 goto skip_add;
3125 } 3145 }
3126 } 3146 }
3139 state->lastlist = l->id; 3159 state->lastlist = l->id;
3140 thread = &l->t[l->n++]; 3160 thread = &l->t[l->n++];
3141 thread->state = state; 3161 thread->state = state;
3142 copy_sub(&thread->subs.norm, &subs->norm); 3162 copy_sub(&thread->subs.norm, &subs->norm);
3143 #ifdef FEAT_SYN_HL 3163 #ifdef FEAT_SYN_HL
3144 copy_sub(&thread->subs.synt, &subs->synt); 3164 if (nfa_has_zsubexpr)
3165 copy_sub(&thread->subs.synt, &subs->synt);
3145 #endif 3166 #endif
3146 #ifdef ENABLE_LOG 3167 #ifdef ENABLE_LOG
3147 { 3168 report_state("Adding", &thread->subs.norm, state, l->id);
3148 int col; 3169 did_print = TRUE;
3149
3150 if (thread->subs.norm.in_use <= 0)
3151 col = -1;
3152 else if (REG_MULTI)
3153 col = thread->subs.norm.list.multi[0].start.col;
3154 else
3155 col = (int)(thread->subs.norm.list.line[0].start - regline);
3156 nfa_set_code(state->c);
3157 fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n",
3158 abs(state->id), l->id, state->c, code, col);
3159 did_print = TRUE;
3160 }
3161 #endif 3170 #endif
3162 } 3171 }
3163 3172
3164 #ifdef ENABLE_LOG 3173 #ifdef ENABLE_LOG
3165 if (!did_print) 3174 if (!did_print)
3166 { 3175 report_state("Processing", &subs->norm, state, l->id);
3167 int col;
3168
3169 if (subs->norm.in_use <= 0)
3170 col = -1;
3171 else if (REG_MULTI)
3172 col = subs->norm.list.multi[0].start.col;
3173 else
3174 col = (int)(subs->norm.list.line[0].start - regline);
3175 nfa_set_code(state->c);
3176 fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n",
3177 abs(state->id), l->id, state->c, code, col);
3178 }
3179 #endif 3176 #endif
3180 switch (state->c) 3177 switch (state->c)
3181 { 3178 {
3182 case NFA_MATCH: 3179 case NFA_MATCH:
3183 nfa_match = TRUE; 3180 nfa_match = TRUE;
3598 return FALSE; 3595 return FALSE;
3599 } 3596 }
3600 #endif 3597 #endif
3601 3598
3602 /* 3599 /*
3603 * Set all NFA nodes' list ID equal to -1. 3600 * Save list IDs for all NFA states of "prog" into "list".
3601 * Also reset the IDs to zero.
3604 */ 3602 */
3605 static void 3603 static void
3606 nfa_set_neg_listids(start) 3604 nfa_save_listids(prog, list)
3607 nfa_state_T *start; 3605 nfa_regprog_T *prog;
3608 {
3609 if (start != NULL && start->lastlist >= 0)
3610 {
3611 start->lastlist = -1;
3612 nfa_set_neg_listids(start->out);
3613 nfa_set_neg_listids(start->out1);
3614 }
3615 }
3616
3617 /*
3618 * Set all NFA nodes' list ID equal to 0.
3619 */
3620 static void
3621 nfa_set_null_listids(start)
3622 nfa_state_T *start;
3623 {
3624 if (start != NULL && start->lastlist == -1)
3625 {
3626 start->lastlist = 0;
3627 nfa_set_null_listids(start->out);
3628 nfa_set_null_listids(start->out1);
3629 }
3630 }
3631
3632 /*
3633 * Save list IDs for all NFA states in "list".
3634 */
3635 static void
3636 nfa_save_listids(start, list)
3637 nfa_state_T *start;
3638 int *list; 3606 int *list;
3639 { 3607 {
3640 if (start != NULL && start->lastlist != -1) 3608 int i;
3641 { 3609 nfa_state_T *p;
3642 list[abs(start->id)] = start->lastlist; 3610
3643 start->lastlist = -1; 3611 /* Order in the list is reverse, it's a bit faster that way. */
3644 nfa_save_listids(start->out, list); 3612 p = &prog->state[0];
3645 nfa_save_listids(start->out1, list); 3613 for (i = prog->nstate; --i >= 0; )
3614 {
3615 list[i] = p->lastlist;
3616 p->lastlist = 0;
3617 ++p;
3646 } 3618 }
3647 } 3619 }
3648 3620
3649 /* 3621 /*
3650 * Restore list IDs from "list" to all NFA states. 3622 * Restore list IDs from "list" to all NFA states.
3651 */ 3623 */
3652 static void 3624 static void
3653 nfa_restore_listids(start, list) 3625 nfa_restore_listids(prog, list)
3654 nfa_state_T *start; 3626 nfa_regprog_T *prog;
3655 int *list; 3627 int *list;
3656 { 3628 {
3657 if (start != NULL && start->lastlist == -1) 3629 int i;
3658 { 3630 nfa_state_T *p;
3659 start->lastlist = list[abs(start->id)]; 3631
3660 nfa_restore_listids(start->out, list); 3632 p = &prog->state[0];
3661 nfa_restore_listids(start->out1, list); 3633 for (i = prog->nstate; --i >= 0; )
3634 {
3635 p->lastlist = list[i];
3636 ++p;
3662 } 3637 }
3663 } 3638 }
3664 3639
3665 static int 3640 static int
3666 nfa_re_num_cmp(val, op, pos) 3641 nfa_re_num_cmp(val, op, pos)
3671 if (op == 1) return pos > val; 3646 if (op == 1) return pos > val;
3672 if (op == 2) return pos < val; 3647 if (op == 2) return pos < val;
3673 return val == pos; 3648 return val == pos;
3674 } 3649 }
3675 3650
3676 static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); 3651 static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
3677 3652
3678 /* 3653 /*
3679 * Main matching routine. 3654 * Main matching routine.
3680 * 3655 *
3681 * Run NFA to determine whether it matches reginput. 3656 * Run NFA to determine whether it matches reginput.
3684 * 3659 *
3685 * Return TRUE if there is a match, FALSE otherwise. 3660 * Return TRUE if there is a match, FALSE otherwise.
3686 * Note: Caller must ensure that: start != NULL. 3661 * Note: Caller must ensure that: start != NULL.
3687 */ 3662 */
3688 static int 3663 static int
3689 nfa_regmatch(start, submatch, m) 3664 nfa_regmatch(prog, start, submatch, m)
3665 nfa_regprog_T *prog;
3690 nfa_state_T *start; 3666 nfa_state_T *start;
3691 regsubs_T *submatch; 3667 regsubs_T *submatch;
3692 regsubs_T *m; 3668 regsubs_T *m;
3693 { 3669 {
3694 int result; 3670 int result;
3870 case NFA_MATCH: 3846 case NFA_MATCH:
3871 { 3847 {
3872 nfa_match = TRUE; 3848 nfa_match = TRUE;
3873 copy_sub(&submatch->norm, &t->subs.norm); 3849 copy_sub(&submatch->norm, &t->subs.norm);
3874 #ifdef FEAT_SYN_HL 3850 #ifdef FEAT_SYN_HL
3875 copy_sub(&submatch->synt, &t->subs.synt); 3851 if (nfa_has_zsubexpr)
3852 copy_sub(&submatch->synt, &t->subs.synt);
3876 #endif 3853 #endif
3877 #ifdef ENABLE_LOG 3854 #ifdef ENABLE_LOG
3878 log_subsexpr(&t->subs); 3855 log_subsexpr(&t->subs);
3879 #endif 3856 #endif
3880 /* Found the left-most longest match, do not look at any other 3857 /* Found the left-most longest match, do not look at any other
3926 /* do not set submatches for \@! */ 3903 /* do not set submatches for \@! */
3927 if (!t->state->negated) 3904 if (!t->state->negated)
3928 { 3905 {
3929 copy_sub(&m->norm, &t->subs.norm); 3906 copy_sub(&m->norm, &t->subs.norm);
3930 #ifdef FEAT_SYN_HL 3907 #ifdef FEAT_SYN_HL
3931 copy_sub(&m->synt, &t->subs.synt); 3908 if (nfa_has_zsubexpr)
3909 copy_sub(&m->synt, &t->subs.synt);
3932 #endif 3910 #endif
3933 } 3911 }
3934 nfa_match = TRUE; 3912 nfa_match = TRUE;
3935 } 3913 }
3936 break; 3914 break;
4022 log_fd = NULL; 4000 log_fd = NULL;
4023 #endif 4001 #endif
4024 /* Have to clear the listid field of the NFA nodes, so that 4002 /* Have to clear the listid field of the NFA nodes, so that
4025 * nfa_regmatch() and addstate() can run properly after 4003 * nfa_regmatch() and addstate() can run properly after
4026 * recursion. */ 4004 * recursion. */
4027 nfa_save_listids(start, listids); 4005 nfa_save_listids(prog, listids);
4028 nfa_set_null_listids(start);
4029 nfa_endp = endposp; 4006 nfa_endp = endposp;
4030 result = nfa_regmatch(t->state->out, submatch, m); 4007 result = nfa_regmatch(prog, t->state->out, submatch, m);
4031 nfa_set_neg_listids(start); 4008 nfa_restore_listids(prog, listids);
4032 nfa_restore_listids(start, listids);
4033 4009
4034 /* restore position in input text */ 4010 /* restore position in input text */
4035 reginput = save_reginput; 4011 reginput = save_reginput;
4036 regline = save_regline; 4012 regline = save_regline;
4037 reglnum = save_reglnum; 4013 reglnum = save_reglnum;
4663 reginput = regline + col; 4639 reginput = regline + col;
4664 need_clear_subexpr = TRUE; 4640 need_clear_subexpr = TRUE;
4665 #ifdef FEAT_SYN_HL 4641 #ifdef FEAT_SYN_HL
4666 /* Clear the external match subpointers if necessary. */ 4642 /* Clear the external match subpointers if necessary. */
4667 if (prog->reghasz == REX_SET) 4643 if (prog->reghasz == REX_SET)
4644 {
4645 nfa_has_zsubexpr = TRUE;
4668 need_clear_zsubexpr = TRUE; 4646 need_clear_zsubexpr = TRUE;
4647 }
4648 else
4649 nfa_has_zsubexpr = FALSE;
4669 #endif 4650 #endif
4670 4651
4671 #ifdef ENABLE_LOG 4652 #ifdef ENABLE_LOG
4672 f = fopen(NFA_REGEXP_RUN_LOG, "a"); 4653 f = fopen(NFA_REGEXP_RUN_LOG, "a");
4673 if (f != NULL) 4654 if (f != NULL)
4692 #ifdef FEAT_SYN_HL 4673 #ifdef FEAT_SYN_HL
4693 clear_sub(&subs.synt); 4674 clear_sub(&subs.synt);
4694 clear_sub(&m.synt); 4675 clear_sub(&m.synt);
4695 #endif 4676 #endif
4696 4677
4697 if (nfa_regmatch(start, &subs, &m) == FALSE) 4678 if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
4698 return 0; 4679 return 0;
4699 4680
4700 cleanup_subexpr(); 4681 cleanup_subexpr();
4701 if (REG_MULTI) 4682 if (REG_MULTI)
4702 { 4683 {