comparison src/regexp_nfa.c @ 4726:3849c811cc0b v7.3.1110

updated for version 7.3.1110 Problem: New regexp matching: Using \@= and the like can be slow. Solution: Decide whether to first try matching the zero-wdith part or what follows, whatever is more likely to fail.
author Bram Moolenaar <bram@vim.org>
date Tue, 04 Jun 2013 14:23:05 +0200
parents bd6bef0bd0fb
children 43de4ebbe7ad
comparison
equal deleted inserted replaced
4725:b58729d9fc5f 4726:3849c811cc0b
2822 #ifdef FEAT_SYN_HL 2822 #ifdef FEAT_SYN_HL
2823 regsub_T synt; /* \z( .. \) matches */ 2823 regsub_T synt; /* \z( .. \) matches */
2824 #endif 2824 #endif
2825 } regsubs_T; 2825 } regsubs_T;
2826 2826
2827 /* nfa_pim_T stores a Postponed Invisible Match. */
2828 typedef struct nfa_pim_S nfa_pim_T;
2829 struct nfa_pim_S
2830 {
2831 nfa_state_T *state;
2832 int result; /* NFA_PIM_TODO, NFA_PIM_[NO]MATCH */
2833 nfa_pim_T *pim; /* another PIM at the same position */
2834 regsubs_T subs; /* submatch info, only party used */
2835 };
2836
2837 /* Values for done in nfa_pim_T. */
2838 #define NFA_PIM_TODO 0
2839 #define NFA_PIM_MATCH 1
2840 #define NFA_PIM_NOMATCH -1
2841
2842
2827 /* nfa_thread_T contains execution information of a NFA state */ 2843 /* nfa_thread_T contains execution information of a NFA state */
2828 typedef struct 2844 typedef struct
2829 { 2845 {
2830 nfa_state_T *state; 2846 nfa_state_T *state;
2831 int count; 2847 int count;
2848 nfa_pim_T *pim; /* if not NULL: postponed invisible match */
2832 regsubs_T subs; /* submatch info, only party used */ 2849 regsubs_T subs; /* submatch info, only party used */
2833 } nfa_thread_T; 2850 } nfa_thread_T;
2834 2851
2835 /* nfa_list_T contains the alternative NFA execution states. */ 2852 /* nfa_list_T contains the alternative NFA execution states. */
2836 typedef struct 2853 typedef struct
2884 static void clear_sub __ARGS((regsub_T *sub)); 2901 static void clear_sub __ARGS((regsub_T *sub));
2885 static void copy_sub __ARGS((regsub_T *to, regsub_T *from)); 2902 static void copy_sub __ARGS((regsub_T *to, regsub_T *from));
2886 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from)); 2903 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
2887 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); 2904 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
2888 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off)); 2905 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off));
2889 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int *ip)); 2906 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
2890 2907
2891 static void 2908 static void
2892 clear_sub(sub) 2909 clear_sub(sub)
2893 regsub_T *sub; 2910 regsub_T *sub;
2894 { 2911 {
3030 return TRUE; 3047 return TRUE;
3031 } 3048 }
3032 3049
3033 #ifdef ENABLE_LOG 3050 #ifdef ENABLE_LOG
3034 static void 3051 static void
3035 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid); 3052 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid)
3036 { 3053 {
3037 int col; 3054 int col;
3038 3055
3039 if (sub->in_use <= 0) 3056 if (sub->in_use <= 0)
3040 col = -1; 3057 col = -1;
3172 ) 3189 )
3173 goto skip_add; 3190 goto skip_add;
3174 } 3191 }
3175 } 3192 }
3176 3193
3177 /* when there are backreferences the number of states may be (a 3194 /* when there are backreferences or look-behind matches the number
3178 * lot) bigger */ 3195 * of states may be (a lot) bigger */
3179 if (nfa_has_backref && l->n == l->len) 3196 if (nfa_has_backref && l->n == l->len)
3180 { 3197 {
3181 int newlen = l->len * 3 / 2 + 50; 3198 int newlen = l->len * 3 / 2 + 50;
3182 3199
3183 l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T)); 3200 l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T));
3186 3203
3187 /* add the state to the list */ 3204 /* add the state to the list */
3188 state->lastlist[nfa_ll_index] = l->id; 3205 state->lastlist[nfa_ll_index] = l->id;
3189 thread = &l->t[l->n++]; 3206 thread = &l->t[l->n++];
3190 thread->state = state; 3207 thread->state = state;
3208 thread->pim = NULL;
3191 copy_sub(&thread->subs.norm, &subs->norm); 3209 copy_sub(&thread->subs.norm, &subs->norm);
3192 #ifdef FEAT_SYN_HL 3210 #ifdef FEAT_SYN_HL
3193 if (nfa_has_zsubexpr) 3211 if (nfa_has_zsubexpr)
3194 copy_sub(&thread->subs.synt, &subs->synt); 3212 copy_sub(&thread->subs.synt, &subs->synt);
3195 #endif 3213 #endif
3417 * Used for zero-width matches, next state to use is the added one. 3435 * Used for zero-width matches, next state to use is the added one.
3418 * This makes sure the order of states to be tried does not change, which 3436 * This makes sure the order of states to be tried does not change, which
3419 * matters for alternatives. 3437 * matters for alternatives.
3420 */ 3438 */
3421 static void 3439 static void
3422 addstate_here(l, state, subs, ip) 3440 addstate_here(l, state, subs, pim, ip)
3423 nfa_list_T *l; /* runtime state list */ 3441 nfa_list_T *l; /* runtime state list */
3424 nfa_state_T *state; /* state to update */ 3442 nfa_state_T *state; /* state to update */
3425 regsubs_T *subs; /* pointers to subexpressions */ 3443 regsubs_T *subs; /* pointers to subexpressions */
3444 nfa_pim_T *pim; /* postponed look-behind match */
3426 int *ip; 3445 int *ip;
3427 { 3446 {
3428 int tlen = l->n; 3447 int tlen = l->n;
3429 int count; 3448 int count;
3430 int i = *ip; 3449 int listidx = *ip;
3450 int i;
3431 3451
3432 /* first add the state(s) at the end, so that we know how many there are */ 3452 /* first add the state(s) at the end, so that we know how many there are */
3433 addstate(l, state, subs, 0); 3453 addstate(l, state, subs, 0);
3434 3454
3455 /* fill in the "pim" field in the new states */
3456 if (pim != NULL)
3457 for (i = tlen; i < l->n; ++i)
3458 l->t[i].pim = pim;
3459
3435 /* when "*ip" was at the end of the list, nothing to do */ 3460 /* when "*ip" was at the end of the list, nothing to do */
3436 if (i + 1 == tlen) 3461 if (listidx + 1 == tlen)
3437 return; 3462 return;
3438 3463
3439 /* re-order to put the new state at the current position */ 3464 /* re-order to put the new state at the current position */
3440 count = l->n - tlen; 3465 count = l->n - tlen;
3441 if (count == 1) 3466 if (count == 1)
3442 { 3467 {
3443 /* overwrite the current state */ 3468 /* overwrite the current state */
3444 l->t[i] = l->t[l->n - 1]; 3469 l->t[listidx] = l->t[l->n - 1];
3445 } 3470 }
3446 else if (count > 1) 3471 else if (count > 1)
3447 { 3472 {
3448 /* make space for new states, then move them from the 3473 /* make space for new states, then move them from the
3449 * end to the current position */ 3474 * end to the current position */
3450 mch_memmove(&(l->t[i + count]), 3475 mch_memmove(&(l->t[listidx + count]),
3451 &(l->t[i + 1]), 3476 &(l->t[listidx + 1]),
3452 sizeof(nfa_thread_T) * (l->n - i - 1)); 3477 sizeof(nfa_thread_T) * (l->n - listidx - 1));
3453 mch_memmove(&(l->t[i]), 3478 mch_memmove(&(l->t[listidx]),
3454 &(l->t[l->n - 1]), 3479 &(l->t[l->n - 1]),
3455 sizeof(nfa_thread_T) * count); 3480 sizeof(nfa_thread_T) * count);
3456 } 3481 }
3457 --l->n; 3482 --l->n;
3458 *ip = i - 1; 3483 *ip = listidx - 1;
3459 } 3484 }
3460 3485
3461 /* 3486 /*
3462 * Check character class "class" against current character c. 3487 * Check character class "class" against current character c.
3463 */ 3488 */
3830 log_fd = stderr; 3855 log_fd = stderr;
3831 } 3856 }
3832 #endif 3857 #endif
3833 3858
3834 return result; 3859 return result;
3860 }
3861
3862 static int failure_chance __ARGS((nfa_state_T *state, int depth));
3863
3864 /*
3865 * Estimate the chance of a match with "state" failing.
3866 * NFA_ANY: 1
3867 * specific character: 99
3868 */
3869 static int
3870 failure_chance(state, depth)
3871 nfa_state_T *state;
3872 int depth;
3873 {
3874 int c = state->c;
3875 int l, r;
3876
3877 /* detect looping */
3878 if (depth > 4)
3879 return 1;
3880
3881 if (c == NFA_SPLIT)
3882 {
3883 if (state->out->c == NFA_SPLIT || state->out1->c == NFA_SPLIT)
3884 return 1;
3885 l = failure_chance(state->out, depth + 1);
3886 r = failure_chance(state->out1, depth + 1);
3887 return l < r ? l : r;
3888 }
3889 if (c == NFA_ANY)
3890 return 1;
3891 if (c > 0)
3892 return 99;
3893 if ((c >= NFA_MOPEN && c <= NFA_MOPEN9)
3894 || (c >= NFA_ZOPEN && c <= NFA_ZOPEN9)
3895 || c == NFA_NOPEN)
3896 return failure_chance(state->out, depth + 1);
3897 /* something else */
3898 return 50;
3835 } 3899 }
3836 3900
3837 /* 3901 /*
3838 * Main matching routine. 3902 * Main matching routine.
3839 * 3903 *
3862 int listidx; 3926 int listidx;
3863 nfa_list_T *thislist; 3927 nfa_list_T *thislist;
3864 nfa_list_T *nextlist; 3928 nfa_list_T *nextlist;
3865 nfa_list_T *neglist; 3929 nfa_list_T *neglist;
3866 int *listids = NULL; 3930 int *listids = NULL;
3931 nfa_state_T *add_state;
3932 int add_count;
3933 int add_off;
3934 garray_T pimlist;
3867 #ifdef NFA_REGEXP_DEBUG_LOG 3935 #ifdef NFA_REGEXP_DEBUG_LOG
3868 FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); 3936 FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a");
3869 3937
3870 if (debug == NULL) 3938 if (debug == NULL)
3871 { 3939 {
3872 EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG); 3940 EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG);
3873 return FALSE; 3941 return FALSE;
3874 } 3942 }
3875 #endif 3943 #endif
3876 nfa_match = FALSE; 3944 nfa_match = FALSE;
3945 ga_init2(&pimlist, sizeof(nfa_pim_T), 5);
3877 3946
3878 /* Allocate memory for the lists of nodes. */ 3947 /* Allocate memory for the lists of nodes. */
3879 size = (nstate + 1) * sizeof(nfa_thread_T); 3948 size = (nstate + 1) * sizeof(nfa_thread_T);
3880 list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE); 3949 list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE);
3881 list[0].len = nstate + 1; 3950 list[0].len = nstate + 1;
3921 * these rules. It is used A LOT, so use the "listtbl" table for speed */ 3990 * these rules. It is used A LOT, so use the "listtbl" table for speed */
3922 listtbl[0][0] = NULL; 3991 listtbl[0][0] = NULL;
3923 listtbl[0][1] = neglist; 3992 listtbl[0][1] = neglist;
3924 listtbl[1][0] = nextlist; 3993 listtbl[1][0] = nextlist;
3925 listtbl[1][1] = NULL; 3994 listtbl[1][1] = NULL;
3926 #define ADD_POS_NEG_STATE(node) \ 3995 #define ADD_POS_NEG_STATE(state) \
3927 ll = listtbl[result ? 1 : 0][node->negated]; \ 3996 ll = listtbl[result ? 1 : 0][state->negated]; \
3928 if (ll != NULL) \ 3997 if (ll != NULL) { \
3929 addstate(ll, node->out , &t->subs, clen); 3998 add_state = state->out; \
3930 3999 add_off = clen; \
4000 }
3931 4001
3932 /* 4002 /*
3933 * Run for each character. 4003 * Run for each character.
3934 */ 4004 */
3935 for (;;) 4005 for (;;)
3963 ++nfa_listid; 4033 ++nfa_listid;
3964 thislist->id = nfa_listid; 4034 thislist->id = nfa_listid;
3965 nextlist->id = nfa_listid + 1; 4035 nextlist->id = nfa_listid + 1;
3966 neglist->id = nfa_listid + 1; 4036 neglist->id = nfa_listid + 1;
3967 4037
4038 pimlist.ga_len = 0;
4039
3968 #ifdef ENABLE_LOG 4040 #ifdef ENABLE_LOG
3969 fprintf(log_fd, "------------------------------------------\n"); 4041 fprintf(log_fd, "------------------------------------------\n");
3970 fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); 4042 fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
3971 fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc); 4043 fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc);
3972 fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); 4044 fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n);
4022 4094
4023 /* 4095 /*
4024 * Handle the possible codes of the current state. 4096 * Handle the possible codes of the current state.
4025 * The most important is NFA_MATCH. 4097 * The most important is NFA_MATCH.
4026 */ 4098 */
4099 add_state = NULL;
4100 add_count = 0;
4027 switch (t->state->c) 4101 switch (t->state->c)
4028 { 4102 {
4029 case NFA_MATCH: 4103 case NFA_MATCH:
4030 { 4104 {
4031 nfa_match = TRUE; 4105 nfa_match = TRUE;
4093 nfa_match = TRUE; 4167 nfa_match = TRUE;
4094 break; 4168 break;
4095 4169
4096 case NFA_START_INVISIBLE: 4170 case NFA_START_INVISIBLE:
4097 case NFA_START_INVISIBLE_BEFORE: 4171 case NFA_START_INVISIBLE_BEFORE:
4098 result = recursive_regmatch(t->state, prog, submatch, m, 4172 /* If invisible match has a higher chance to fail, do it
4099 &listids); 4173 * right away. Otherwise postpone it until what follows is
4100 4174 * matching and causes addstate(nextlist, ..) to be called.
4101 /* for \@! it is a match when result is FALSE */ 4175 * This is indicated by the "pim" field. */
4102 if (result != t->state->negated)
4103 { 4176 {
4104 /* Copy submatch info from the recursive call */ 4177 nfa_pim_T *pim;
4105 copy_sub_off(&t->subs.norm, &m->norm); 4178 int cout = t->state->out1->out->c;
4179
4180 /* Do it directly when what follows is possibly end of
4181 * match (closing paren).
4182 * Postpone when it is \@<= or \@<!, these are expensive.
4183 * TODO: remove the check for t->pim and check multiple
4184 * where it's used?
4185 * Otherwise first do the one that has the highest chance
4186 * of failing. */
4187 if ((cout >= NFA_MCLOSE && cout <= NFA_MCLOSE9)
4188 || (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9)
4189 || cout == NFA_NCLOSE
4190 || t->pim != NULL
4191 || (t->state->c != NFA_START_INVISIBLE_BEFORE
4192 && failure_chance(t->state->out1->out, 0)
4193 < failure_chance(t->state->out, 0)))
4194 {
4195 /*
4196 * First try matching the invisible match, then what
4197 * follows.
4198 */
4199 result = recursive_regmatch(t->state, prog,
4200 submatch, m, &listids);
4201
4202 /* for \@! it is a match when result is FALSE */
4203 if (result != t->state->negated)
4204 {
4205 /* Copy submatch info from the recursive call */
4206 copy_sub_off(&t->subs.norm, &m->norm);
4106 #ifdef FEAT_SYN_HL 4207 #ifdef FEAT_SYN_HL
4107 copy_sub_off(&t->subs.synt, &m->synt); 4208 copy_sub_off(&t->subs.synt, &m->synt);
4108 #endif 4209 #endif
4109 4210
4110 /* t->state->out1 is the corresponding END_INVISIBLE node; 4211 /* t->state->out1 is the corresponding
4111 * Add its out to the current list (zero-width match). */ 4212 * END_INVISIBLE node; Add its out to the current
4112 addstate_here(thislist, t->state->out1->out, &t->subs, 4213 * list (zero-width match). */
4113 &listidx); 4214 addstate_here(thislist, t->state->out1->out,
4215 &t->subs, t->pim, &listidx);
4216 }
4217 }
4218 else
4219 {
4220 /*
4221 * First try matching what follows at the current
4222 * position. Only if a match is found, addstate() is
4223 * called, then verify the invisible match matches.
4224 * Add a nfa_pim_T to the following states, it
4225 * contains info about the invisible match.
4226 */
4227 if (ga_grow(&pimlist, 1) == FAIL)
4228 goto theend;
4229 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len;
4230 ++pimlist.ga_len;
4231 pim->state = t->state;
4232 pim->pim = NULL;
4233 pim->result = NFA_PIM_TODO;
4234
4235 /* t->state->out1 is the corresponding END_INVISIBLE
4236 * node; Add its out to the current list (zero-width
4237 * match). */
4238 addstate_here(thislist, t->state->out1->out, &t->subs,
4239 pim, &listidx);
4240 }
4114 } 4241 }
4115 break; 4242 break;
4116 4243
4117 case NFA_BOL: 4244 case NFA_BOL:
4118 if (reginput == regline) 4245 if (reginput == regline)
4119 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4246 addstate_here(thislist, t->state->out, &t->subs,
4247 t->pim, &listidx);
4120 break; 4248 break;
4121 4249
4122 case NFA_EOL: 4250 case NFA_EOL:
4123 if (curc == NUL) 4251 if (curc == NUL)
4124 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4252 addstate_here(thislist, t->state->out, &t->subs,
4253 t->pim, &listidx);
4125 break; 4254 break;
4126 4255
4127 case NFA_BOW: 4256 case NFA_BOW:
4128 { 4257 {
4129 int bow = TRUE; 4258 int bow = TRUE;
4146 else if (!vim_iswordc_buf(curc, reg_buf) 4275 else if (!vim_iswordc_buf(curc, reg_buf)
4147 || (reginput > regline 4276 || (reginput > regline
4148 && vim_iswordc_buf(reginput[-1], reg_buf))) 4277 && vim_iswordc_buf(reginput[-1], reg_buf)))
4149 bow = FALSE; 4278 bow = FALSE;
4150 if (bow) 4279 if (bow)
4151 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4280 addstate_here(thislist, t->state->out, &t->subs,
4281 t->pim, &listidx);
4152 break; 4282 break;
4153 } 4283 }
4154 4284
4155 case NFA_EOW: 4285 case NFA_EOW:
4156 { 4286 {
4174 else if (!vim_iswordc_buf(reginput[-1], reg_buf) 4304 else if (!vim_iswordc_buf(reginput[-1], reg_buf)
4175 || (reginput[0] != NUL 4305 || (reginput[0] != NUL
4176 && vim_iswordc_buf(curc, reg_buf))) 4306 && vim_iswordc_buf(curc, reg_buf)))
4177 eow = FALSE; 4307 eow = FALSE;
4178 if (eow) 4308 if (eow)
4179 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4309 addstate_here(thislist, t->state->out, &t->subs,
4310 t->pim, &listidx);
4180 break; 4311 break;
4181 } 4312 }
4182 4313
4183 case NFA_BOF: 4314 case NFA_BOF:
4184 if (reglnum == 0 && reginput == regline 4315 if (reglnum == 0 && reginput == regline
4185 && (!REG_MULTI || reg_firstlnum == 1)) 4316 && (!REG_MULTI || reg_firstlnum == 1))
4186 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4317 addstate_here(thislist, t->state->out, &t->subs,
4318 t->pim, &listidx);
4187 break; 4319 break;
4188 4320
4189 case NFA_EOF: 4321 case NFA_EOF:
4190 if (reglnum == reg_maxline && curc == NUL) 4322 if (reglnum == reg_maxline && curc == NUL)
4191 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4323 addstate_here(thislist, t->state->out, &t->subs,
4324 t->pim, &listidx);
4192 break; 4325 break;
4193 4326
4194 #ifdef FEAT_MBYTE 4327 #ifdef FEAT_MBYTE
4195 case NFA_COMPOSING: 4328 case NFA_COMPOSING:
4196 { 4329 {
4275 && reglnum <= reg_maxline) 4408 && reglnum <= reg_maxline)
4276 { 4409 {
4277 go_to_nextline = TRUE; 4410 go_to_nextline = TRUE;
4278 /* Pass -1 for the offset, which means taking the position 4411 /* Pass -1 for the offset, which means taking the position
4279 * at the start of the next line. */ 4412 * at the start of the next line. */
4280 addstate(nextlist, t->state->out, &t->subs, -1); 4413 ll = nextlist;
4414 add_state = t->state->out;
4415 add_off = -1;
4281 } 4416 }
4282 else if (curc == '\n' && reg_line_lbr) 4417 else if (curc == '\n' && reg_line_lbr)
4283 { 4418 {
4284 /* match \n as if it is an ordinary character */ 4419 /* match \n as if it is an ordinary character */
4285 addstate(nextlist, t->state->out, &t->subs, 1); 4420 ll = nextlist;
4421 add_state = t->state->out;
4422 add_off = 1;
4286 } 4423 }
4287 break; 4424 break;
4288 4425
4289 case NFA_CLASS_ALNUM: 4426 case NFA_CLASS_ALNUM:
4290 case NFA_CLASS_ALPHA: 4427 case NFA_CLASS_ALPHA:
4308 4445
4309 case NFA_END_NEG_RANGE: 4446 case NFA_END_NEG_RANGE:
4310 /* This follows a series of negated nodes, like: 4447 /* This follows a series of negated nodes, like:
4311 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ 4448 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
4312 if (curc > 0) 4449 if (curc > 0)
4313 addstate(nextlist, t->state->out, &t->subs, clen); 4450 {
4451 ll = nextlist;
4452 add_state = t->state->out;
4453 add_off = clen;
4454 }
4314 break; 4455 break;
4315 4456
4316 case NFA_ANY: 4457 case NFA_ANY:
4317 /* Any char except '\0', (end of input) does not match. */ 4458 /* Any char except '\0', (end of input) does not match. */
4318 if (curc > 0) 4459 if (curc > 0)
4319 addstate(nextlist, t->state->out, &t->subs, clen); 4460 {
4461 ll = nextlist;
4462 add_state = t->state->out;
4463 add_off = clen;
4464 }
4320 break; 4465 break;
4321 4466
4322 /* 4467 /*
4323 * Character classes like \a for alpha, \d for digit etc. 4468 * Character classes like \a for alpha, \d for digit etc.
4324 */ 4469 */
4496 if (bytelen == 0) 4641 if (bytelen == 0)
4497 { 4642 {
4498 /* empty match always works, output of NFA_SKIP to be 4643 /* empty match always works, output of NFA_SKIP to be
4499 * used next */ 4644 * used next */
4500 addstate_here(thislist, t->state->out->out, &t->subs, 4645 addstate_here(thislist, t->state->out->out, &t->subs,
4501 &listidx); 4646 t->pim, &listidx);
4502 } 4647 }
4503 else if (bytelen <= clen) 4648 else if (bytelen <= clen)
4504 { 4649 {
4505 /* match current character, jump ahead to out of 4650 /* match current character, jump ahead to out of
4506 * NFA_SKIP */ 4651 * NFA_SKIP */
4507 addstate(nextlist, t->state->out->out, &t->subs, clen); 4652 ll = nextlist;
4653 add_state = t->state->out->out;
4654 add_off = clen;
4508 #ifdef ENABLE_LOG 4655 #ifdef ENABLE_LOG
4509 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); 4656 log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
4510 #endif 4657 #endif
4511 } 4658 }
4512 else 4659 else
4513 { 4660 {
4514 /* skip ofer the matched characters, set character 4661 /* skip ofer the matched characters, set character
4515 * count in NFA_SKIP */ 4662 * count in NFA_SKIP */
4516 addstate(nextlist, t->state->out, &t->subs, bytelen); 4663 ll = nextlist;
4517 nextlist->t[nextlist->n - 1].count = bytelen - clen; 4664 add_state = t->state->out;
4665 add_off = bytelen;
4666 add_count = bytelen - clen;
4518 #ifdef ENABLE_LOG 4667 #ifdef ENABLE_LOG
4519 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); 4668 log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
4520 #endif 4669 #endif
4521 } 4670 }
4522 4671
4526 case NFA_SKIP: 4675 case NFA_SKIP:
4527 /* charater of previous matching \1 .. \9 */ 4676 /* charater of previous matching \1 .. \9 */
4528 if (t->count - clen <= 0) 4677 if (t->count - clen <= 0)
4529 { 4678 {
4530 /* end of match, go to what follows */ 4679 /* end of match, go to what follows */
4531 addstate(nextlist, t->state->out, &t->subs, clen); 4680 ll = nextlist;
4681 add_state = t->state->out;
4682 add_off = clen;
4532 #ifdef ENABLE_LOG 4683 #ifdef ENABLE_LOG
4533 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); 4684 log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
4534 #endif 4685 #endif
4535 } 4686 }
4536 else 4687 else
4537 { 4688 {
4538 /* add state again with decremented count */ 4689 /* add state again with decremented count */
4539 addstate(nextlist, t->state, &t->subs, 0); 4690 ll = nextlist;
4540 nextlist->t[nextlist->n - 1].count = t->count - clen; 4691 add_state = t->state;
4692 add_off = 0;
4693 add_count = t->count - clen;
4541 #ifdef ENABLE_LOG 4694 #ifdef ENABLE_LOG
4542 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); 4695 log_subsexpr(&nextlist->t[nextlist->n - 1].subs);
4543 #endif 4696 #endif
4544 } 4697 }
4545 break; 4698 break;
4555 case NFA_LNUM_LT: 4708 case NFA_LNUM_LT:
4556 result = (REG_MULTI && 4709 result = (REG_MULTI &&
4557 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, 4710 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM,
4558 (long_u)(reglnum + reg_firstlnum))); 4711 (long_u)(reglnum + reg_firstlnum)));
4559 if (result) 4712 if (result)
4560 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4713 addstate_here(thislist, t->state->out, &t->subs,
4714 t->pim, &listidx);
4561 break; 4715 break;
4562 4716
4563 case NFA_COL: 4717 case NFA_COL:
4564 case NFA_COL_GT: 4718 case NFA_COL_GT:
4565 case NFA_COL_LT: 4719 case NFA_COL_LT:
4566 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, 4720 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL,
4567 (long_u)(reginput - regline) + 1); 4721 (long_u)(reginput - regline) + 1);
4568 if (result) 4722 if (result)
4569 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4723 addstate_here(thislist, t->state->out, &t->subs,
4724 t->pim, &listidx);
4570 break; 4725 break;
4571 4726
4572 case NFA_VCOL: 4727 case NFA_VCOL:
4573 case NFA_VCOL_GT: 4728 case NFA_VCOL_GT:
4574 case NFA_VCOL_LT: 4729 case NFA_VCOL_LT:
4575 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, 4730 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL,
4576 (long_u)win_linetabsize( 4731 (long_u)win_linetabsize(
4577 reg_win == NULL ? curwin : reg_win, 4732 reg_win == NULL ? curwin : reg_win,
4578 regline, (colnr_T)(reginput - regline)) + 1); 4733 regline, (colnr_T)(reginput - regline)) + 1);
4579 if (result) 4734 if (result)
4580 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4735 addstate_here(thislist, t->state->out, &t->subs,
4736 t->pim, &listidx);
4581 break; 4737 break;
4582 4738
4583 case NFA_CURSOR: 4739 case NFA_CURSOR:
4584 result = (reg_win != NULL 4740 result = (reg_win != NULL
4585 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) 4741 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
4586 && ((colnr_T)(reginput - regline) 4742 && ((colnr_T)(reginput - regline)
4587 == reg_win->w_cursor.col)); 4743 == reg_win->w_cursor.col));
4588 if (result) 4744 if (result)
4589 addstate_here(thislist, t->state->out, &t->subs, &listidx); 4745 addstate_here(thislist, t->state->out, &t->subs,
4746 t->pim, &listidx);
4590 break; 4747 break;
4591 4748
4592 default: /* regular character */ 4749 default: /* regular character */
4593 { 4750 {
4594 int c = t->state->c; 4751 int c = t->state->c;
4611 result = FALSE; 4768 result = FALSE;
4612 #endif 4769 #endif
4613 ADD_POS_NEG_STATE(t->state); 4770 ADD_POS_NEG_STATE(t->state);
4614 break; 4771 break;
4615 } 4772 }
4773
4774 } /* switch (t->state->c) */
4775
4776 if (add_state != NULL)
4777 {
4778 if (t->pim != NULL)
4779 {
4780 /* postponed invisible match */
4781 /* TODO: also do t->pim->pim recursively? */
4782 if (t->pim->result == NFA_PIM_TODO)
4783 {
4784 #ifdef ENABLE_LOG
4785 fprintf(log_fd, "\n");
4786 fprintf(log_fd, "==================================\n");
4787 fprintf(log_fd, "Postponed recursive nfa_regmatch()\n");
4788 fprintf(log_fd, "\n");
4789 #endif
4790 result = recursive_regmatch(t->pim->state,
4791 prog, submatch, m, &listids);
4792 t->pim->result = result ? NFA_PIM_MATCH
4793 : NFA_PIM_NOMATCH;
4794 /* for \@! it is a match when result is FALSE */
4795 if (result != t->pim->state->negated)
4796 {
4797 /* Copy submatch info from the recursive call */
4798 copy_sub_off(&t->pim->subs.norm, &m->norm);
4799 #ifdef FEAT_SYN_HL
4800 copy_sub_off(&t->pim->subs.synt, &m->synt);
4801 #endif
4802 }
4803 }
4804 else
4805 {
4806 result = (t->pim->result == NFA_PIM_MATCH);
4807 #ifdef ENABLE_LOG
4808 fprintf(log_fd, "\n");
4809 fprintf(log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", t->pim->result);
4810 fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
4811 fprintf(log_fd, "\n");
4812 #endif
4813 }
4814
4815 /* for \@! it is a match when result is FALSE */
4816 if (result != t->pim->state->negated)
4817 {
4818 /* Copy submatch info from the recursive call */
4819 copy_sub_off(&t->subs.norm, &t->pim->subs.norm);
4820 #ifdef FEAT_SYN_HL
4821 copy_sub_off(&t->subs.synt, &t->pim->subs.synt);
4822 #endif
4823 }
4824 else
4825 /* look-behind match failed, don't add the state */
4826 continue;
4827 }
4828
4829 addstate(ll, add_state, &t->subs, add_off);
4830 if (add_count > 0)
4831 nextlist->t[ll->n - 1].count = add_count;
4616 } 4832 }
4617 4833
4618 } /* for (thislist = thislist; thislist->state; thislist++) */ 4834 } /* for (thislist = thislist; thislist->state; thislist++) */
4619 4835
4620 /* Look for the start of a match in the current position by adding the 4836 /* Look for the start of a match in the current position by adding the
4678 /* Free memory */ 4894 /* Free memory */
4679 vim_free(list[0].t); 4895 vim_free(list[0].t);
4680 vim_free(list[1].t); 4896 vim_free(list[1].t);
4681 vim_free(list[2].t); 4897 vim_free(list[2].t);
4682 vim_free(listids); 4898 vim_free(listids);
4899 ga_clear(&pimlist);
4683 #undef ADD_POS_NEG_STATE 4900 #undef ADD_POS_NEG_STATE
4684 #ifdef NFA_REGEXP_DEBUG_LOG 4901 #ifdef NFA_REGEXP_DEBUG_LOG
4685 fclose(debug); 4902 fclose(debug);
4686 #endif 4903 #endif
4687 4904