Mercurial > vim
comparison src/regexp_nfa.c @ 4726:3849c811cc0b v7.3.1110
updated for version 7.3.1110
Problem: New regexp matching: Using \@= and the like can be slow.
Solution: Decide whether to first try matching the zero-wdith part or what
follows, whatever is more likely to fail.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Tue, 04 Jun 2013 14:23:05 +0200 |
parents | bd6bef0bd0fb |
children | 43de4ebbe7ad |
comparison
equal
deleted
inserted
replaced
4725:b58729d9fc5f | 4726:3849c811cc0b |
---|---|
2822 #ifdef FEAT_SYN_HL | 2822 #ifdef FEAT_SYN_HL |
2823 regsub_T synt; /* \z( .. \) matches */ | 2823 regsub_T synt; /* \z( .. \) matches */ |
2824 #endif | 2824 #endif |
2825 } regsubs_T; | 2825 } regsubs_T; |
2826 | 2826 |
2827 /* nfa_pim_T stores a Postponed Invisible Match. */ | |
2828 typedef struct nfa_pim_S nfa_pim_T; | |
2829 struct nfa_pim_S | |
2830 { | |
2831 nfa_state_T *state; | |
2832 int result; /* NFA_PIM_TODO, NFA_PIM_[NO]MATCH */ | |
2833 nfa_pim_T *pim; /* another PIM at the same position */ | |
2834 regsubs_T subs; /* submatch info, only party used */ | |
2835 }; | |
2836 | |
2837 /* Values for done in nfa_pim_T. */ | |
2838 #define NFA_PIM_TODO 0 | |
2839 #define NFA_PIM_MATCH 1 | |
2840 #define NFA_PIM_NOMATCH -1 | |
2841 | |
2842 | |
2827 /* nfa_thread_T contains execution information of a NFA state */ | 2843 /* nfa_thread_T contains execution information of a NFA state */ |
2828 typedef struct | 2844 typedef struct |
2829 { | 2845 { |
2830 nfa_state_T *state; | 2846 nfa_state_T *state; |
2831 int count; | 2847 int count; |
2848 nfa_pim_T *pim; /* if not NULL: postponed invisible match */ | |
2832 regsubs_T subs; /* submatch info, only party used */ | 2849 regsubs_T subs; /* submatch info, only party used */ |
2833 } nfa_thread_T; | 2850 } nfa_thread_T; |
2834 | 2851 |
2835 /* nfa_list_T contains the alternative NFA execution states. */ | 2852 /* nfa_list_T contains the alternative NFA execution states. */ |
2836 typedef struct | 2853 typedef struct |
2884 static void clear_sub __ARGS((regsub_T *sub)); | 2901 static void clear_sub __ARGS((regsub_T *sub)); |
2885 static void copy_sub __ARGS((regsub_T *to, regsub_T *from)); | 2902 static void copy_sub __ARGS((regsub_T *to, regsub_T *from)); |
2886 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from)); | 2903 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from)); |
2887 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); | 2904 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2)); |
2888 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off)); | 2905 static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int off)); |
2889 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, int *ip)); | 2906 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip)); |
2890 | 2907 |
2891 static void | 2908 static void |
2892 clear_sub(sub) | 2909 clear_sub(sub) |
2893 regsub_T *sub; | 2910 regsub_T *sub; |
2894 { | 2911 { |
3030 return TRUE; | 3047 return TRUE; |
3031 } | 3048 } |
3032 | 3049 |
3033 #ifdef ENABLE_LOG | 3050 #ifdef ENABLE_LOG |
3034 static void | 3051 static void |
3035 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid); | 3052 report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid) |
3036 { | 3053 { |
3037 int col; | 3054 int col; |
3038 | 3055 |
3039 if (sub->in_use <= 0) | 3056 if (sub->in_use <= 0) |
3040 col = -1; | 3057 col = -1; |
3172 ) | 3189 ) |
3173 goto skip_add; | 3190 goto skip_add; |
3174 } | 3191 } |
3175 } | 3192 } |
3176 | 3193 |
3177 /* when there are backreferences the number of states may be (a | 3194 /* when there are backreferences or look-behind matches the number |
3178 * lot) bigger */ | 3195 * of states may be (a lot) bigger */ |
3179 if (nfa_has_backref && l->n == l->len) | 3196 if (nfa_has_backref && l->n == l->len) |
3180 { | 3197 { |
3181 int newlen = l->len * 3 / 2 + 50; | 3198 int newlen = l->len * 3 / 2 + 50; |
3182 | 3199 |
3183 l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T)); | 3200 l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T)); |
3186 | 3203 |
3187 /* add the state to the list */ | 3204 /* add the state to the list */ |
3188 state->lastlist[nfa_ll_index] = l->id; | 3205 state->lastlist[nfa_ll_index] = l->id; |
3189 thread = &l->t[l->n++]; | 3206 thread = &l->t[l->n++]; |
3190 thread->state = state; | 3207 thread->state = state; |
3208 thread->pim = NULL; | |
3191 copy_sub(&thread->subs.norm, &subs->norm); | 3209 copy_sub(&thread->subs.norm, &subs->norm); |
3192 #ifdef FEAT_SYN_HL | 3210 #ifdef FEAT_SYN_HL |
3193 if (nfa_has_zsubexpr) | 3211 if (nfa_has_zsubexpr) |
3194 copy_sub(&thread->subs.synt, &subs->synt); | 3212 copy_sub(&thread->subs.synt, &subs->synt); |
3195 #endif | 3213 #endif |
3417 * Used for zero-width matches, next state to use is the added one. | 3435 * Used for zero-width matches, next state to use is the added one. |
3418 * This makes sure the order of states to be tried does not change, which | 3436 * This makes sure the order of states to be tried does not change, which |
3419 * matters for alternatives. | 3437 * matters for alternatives. |
3420 */ | 3438 */ |
3421 static void | 3439 static void |
3422 addstate_here(l, state, subs, ip) | 3440 addstate_here(l, state, subs, pim, ip) |
3423 nfa_list_T *l; /* runtime state list */ | 3441 nfa_list_T *l; /* runtime state list */ |
3424 nfa_state_T *state; /* state to update */ | 3442 nfa_state_T *state; /* state to update */ |
3425 regsubs_T *subs; /* pointers to subexpressions */ | 3443 regsubs_T *subs; /* pointers to subexpressions */ |
3444 nfa_pim_T *pim; /* postponed look-behind match */ | |
3426 int *ip; | 3445 int *ip; |
3427 { | 3446 { |
3428 int tlen = l->n; | 3447 int tlen = l->n; |
3429 int count; | 3448 int count; |
3430 int i = *ip; | 3449 int listidx = *ip; |
3450 int i; | |
3431 | 3451 |
3432 /* first add the state(s) at the end, so that we know how many there are */ | 3452 /* first add the state(s) at the end, so that we know how many there are */ |
3433 addstate(l, state, subs, 0); | 3453 addstate(l, state, subs, 0); |
3434 | 3454 |
3455 /* fill in the "pim" field in the new states */ | |
3456 if (pim != NULL) | |
3457 for (i = tlen; i < l->n; ++i) | |
3458 l->t[i].pim = pim; | |
3459 | |
3435 /* when "*ip" was at the end of the list, nothing to do */ | 3460 /* when "*ip" was at the end of the list, nothing to do */ |
3436 if (i + 1 == tlen) | 3461 if (listidx + 1 == tlen) |
3437 return; | 3462 return; |
3438 | 3463 |
3439 /* re-order to put the new state at the current position */ | 3464 /* re-order to put the new state at the current position */ |
3440 count = l->n - tlen; | 3465 count = l->n - tlen; |
3441 if (count == 1) | 3466 if (count == 1) |
3442 { | 3467 { |
3443 /* overwrite the current state */ | 3468 /* overwrite the current state */ |
3444 l->t[i] = l->t[l->n - 1]; | 3469 l->t[listidx] = l->t[l->n - 1]; |
3445 } | 3470 } |
3446 else if (count > 1) | 3471 else if (count > 1) |
3447 { | 3472 { |
3448 /* make space for new states, then move them from the | 3473 /* make space for new states, then move them from the |
3449 * end to the current position */ | 3474 * end to the current position */ |
3450 mch_memmove(&(l->t[i + count]), | 3475 mch_memmove(&(l->t[listidx + count]), |
3451 &(l->t[i + 1]), | 3476 &(l->t[listidx + 1]), |
3452 sizeof(nfa_thread_T) * (l->n - i - 1)); | 3477 sizeof(nfa_thread_T) * (l->n - listidx - 1)); |
3453 mch_memmove(&(l->t[i]), | 3478 mch_memmove(&(l->t[listidx]), |
3454 &(l->t[l->n - 1]), | 3479 &(l->t[l->n - 1]), |
3455 sizeof(nfa_thread_T) * count); | 3480 sizeof(nfa_thread_T) * count); |
3456 } | 3481 } |
3457 --l->n; | 3482 --l->n; |
3458 *ip = i - 1; | 3483 *ip = listidx - 1; |
3459 } | 3484 } |
3460 | 3485 |
3461 /* | 3486 /* |
3462 * Check character class "class" against current character c. | 3487 * Check character class "class" against current character c. |
3463 */ | 3488 */ |
3830 log_fd = stderr; | 3855 log_fd = stderr; |
3831 } | 3856 } |
3832 #endif | 3857 #endif |
3833 | 3858 |
3834 return result; | 3859 return result; |
3860 } | |
3861 | |
3862 static int failure_chance __ARGS((nfa_state_T *state, int depth)); | |
3863 | |
3864 /* | |
3865 * Estimate the chance of a match with "state" failing. | |
3866 * NFA_ANY: 1 | |
3867 * specific character: 99 | |
3868 */ | |
3869 static int | |
3870 failure_chance(state, depth) | |
3871 nfa_state_T *state; | |
3872 int depth; | |
3873 { | |
3874 int c = state->c; | |
3875 int l, r; | |
3876 | |
3877 /* detect looping */ | |
3878 if (depth > 4) | |
3879 return 1; | |
3880 | |
3881 if (c == NFA_SPLIT) | |
3882 { | |
3883 if (state->out->c == NFA_SPLIT || state->out1->c == NFA_SPLIT) | |
3884 return 1; | |
3885 l = failure_chance(state->out, depth + 1); | |
3886 r = failure_chance(state->out1, depth + 1); | |
3887 return l < r ? l : r; | |
3888 } | |
3889 if (c == NFA_ANY) | |
3890 return 1; | |
3891 if (c > 0) | |
3892 return 99; | |
3893 if ((c >= NFA_MOPEN && c <= NFA_MOPEN9) | |
3894 || (c >= NFA_ZOPEN && c <= NFA_ZOPEN9) | |
3895 || c == NFA_NOPEN) | |
3896 return failure_chance(state->out, depth + 1); | |
3897 /* something else */ | |
3898 return 50; | |
3835 } | 3899 } |
3836 | 3900 |
3837 /* | 3901 /* |
3838 * Main matching routine. | 3902 * Main matching routine. |
3839 * | 3903 * |
3862 int listidx; | 3926 int listidx; |
3863 nfa_list_T *thislist; | 3927 nfa_list_T *thislist; |
3864 nfa_list_T *nextlist; | 3928 nfa_list_T *nextlist; |
3865 nfa_list_T *neglist; | 3929 nfa_list_T *neglist; |
3866 int *listids = NULL; | 3930 int *listids = NULL; |
3931 nfa_state_T *add_state; | |
3932 int add_count; | |
3933 int add_off; | |
3934 garray_T pimlist; | |
3867 #ifdef NFA_REGEXP_DEBUG_LOG | 3935 #ifdef NFA_REGEXP_DEBUG_LOG |
3868 FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); | 3936 FILE *debug = fopen(NFA_REGEXP_DEBUG_LOG, "a"); |
3869 | 3937 |
3870 if (debug == NULL) | 3938 if (debug == NULL) |
3871 { | 3939 { |
3872 EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG); | 3940 EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG); |
3873 return FALSE; | 3941 return FALSE; |
3874 } | 3942 } |
3875 #endif | 3943 #endif |
3876 nfa_match = FALSE; | 3944 nfa_match = FALSE; |
3945 ga_init2(&pimlist, sizeof(nfa_pim_T), 5); | |
3877 | 3946 |
3878 /* Allocate memory for the lists of nodes. */ | 3947 /* Allocate memory for the lists of nodes. */ |
3879 size = (nstate + 1) * sizeof(nfa_thread_T); | 3948 size = (nstate + 1) * sizeof(nfa_thread_T); |
3880 list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE); | 3949 list[0].t = (nfa_thread_T *)lalloc_clear(size, TRUE); |
3881 list[0].len = nstate + 1; | 3950 list[0].len = nstate + 1; |
3921 * these rules. It is used A LOT, so use the "listtbl" table for speed */ | 3990 * these rules. It is used A LOT, so use the "listtbl" table for speed */ |
3922 listtbl[0][0] = NULL; | 3991 listtbl[0][0] = NULL; |
3923 listtbl[0][1] = neglist; | 3992 listtbl[0][1] = neglist; |
3924 listtbl[1][0] = nextlist; | 3993 listtbl[1][0] = nextlist; |
3925 listtbl[1][1] = NULL; | 3994 listtbl[1][1] = NULL; |
3926 #define ADD_POS_NEG_STATE(node) \ | 3995 #define ADD_POS_NEG_STATE(state) \ |
3927 ll = listtbl[result ? 1 : 0][node->negated]; \ | 3996 ll = listtbl[result ? 1 : 0][state->negated]; \ |
3928 if (ll != NULL) \ | 3997 if (ll != NULL) { \ |
3929 addstate(ll, node->out , &t->subs, clen); | 3998 add_state = state->out; \ |
3930 | 3999 add_off = clen; \ |
4000 } | |
3931 | 4001 |
3932 /* | 4002 /* |
3933 * Run for each character. | 4003 * Run for each character. |
3934 */ | 4004 */ |
3935 for (;;) | 4005 for (;;) |
3963 ++nfa_listid; | 4033 ++nfa_listid; |
3964 thislist->id = nfa_listid; | 4034 thislist->id = nfa_listid; |
3965 nextlist->id = nfa_listid + 1; | 4035 nextlist->id = nfa_listid + 1; |
3966 neglist->id = nfa_listid + 1; | 4036 neglist->id = nfa_listid + 1; |
3967 | 4037 |
4038 pimlist.ga_len = 0; | |
4039 | |
3968 #ifdef ENABLE_LOG | 4040 #ifdef ENABLE_LOG |
3969 fprintf(log_fd, "------------------------------------------\n"); | 4041 fprintf(log_fd, "------------------------------------------\n"); |
3970 fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); | 4042 fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); |
3971 fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc); | 4043 fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc); |
3972 fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); | 4044 fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n); |
4022 | 4094 |
4023 /* | 4095 /* |
4024 * Handle the possible codes of the current state. | 4096 * Handle the possible codes of the current state. |
4025 * The most important is NFA_MATCH. | 4097 * The most important is NFA_MATCH. |
4026 */ | 4098 */ |
4099 add_state = NULL; | |
4100 add_count = 0; | |
4027 switch (t->state->c) | 4101 switch (t->state->c) |
4028 { | 4102 { |
4029 case NFA_MATCH: | 4103 case NFA_MATCH: |
4030 { | 4104 { |
4031 nfa_match = TRUE; | 4105 nfa_match = TRUE; |
4093 nfa_match = TRUE; | 4167 nfa_match = TRUE; |
4094 break; | 4168 break; |
4095 | 4169 |
4096 case NFA_START_INVISIBLE: | 4170 case NFA_START_INVISIBLE: |
4097 case NFA_START_INVISIBLE_BEFORE: | 4171 case NFA_START_INVISIBLE_BEFORE: |
4098 result = recursive_regmatch(t->state, prog, submatch, m, | 4172 /* If invisible match has a higher chance to fail, do it |
4099 &listids); | 4173 * right away. Otherwise postpone it until what follows is |
4100 | 4174 * matching and causes addstate(nextlist, ..) to be called. |
4101 /* for \@! it is a match when result is FALSE */ | 4175 * This is indicated by the "pim" field. */ |
4102 if (result != t->state->negated) | |
4103 { | 4176 { |
4104 /* Copy submatch info from the recursive call */ | 4177 nfa_pim_T *pim; |
4105 copy_sub_off(&t->subs.norm, &m->norm); | 4178 int cout = t->state->out1->out->c; |
4179 | |
4180 /* Do it directly when what follows is possibly end of | |
4181 * match (closing paren). | |
4182 * Postpone when it is \@<= or \@<!, these are expensive. | |
4183 * TODO: remove the check for t->pim and check multiple | |
4184 * where it's used? | |
4185 * Otherwise first do the one that has the highest chance | |
4186 * of failing. */ | |
4187 if ((cout >= NFA_MCLOSE && cout <= NFA_MCLOSE9) | |
4188 || (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9) | |
4189 || cout == NFA_NCLOSE | |
4190 || t->pim != NULL | |
4191 || (t->state->c != NFA_START_INVISIBLE_BEFORE | |
4192 && failure_chance(t->state->out1->out, 0) | |
4193 < failure_chance(t->state->out, 0))) | |
4194 { | |
4195 /* | |
4196 * First try matching the invisible match, then what | |
4197 * follows. | |
4198 */ | |
4199 result = recursive_regmatch(t->state, prog, | |
4200 submatch, m, &listids); | |
4201 | |
4202 /* for \@! it is a match when result is FALSE */ | |
4203 if (result != t->state->negated) | |
4204 { | |
4205 /* Copy submatch info from the recursive call */ | |
4206 copy_sub_off(&t->subs.norm, &m->norm); | |
4106 #ifdef FEAT_SYN_HL | 4207 #ifdef FEAT_SYN_HL |
4107 copy_sub_off(&t->subs.synt, &m->synt); | 4208 copy_sub_off(&t->subs.synt, &m->synt); |
4108 #endif | 4209 #endif |
4109 | 4210 |
4110 /* t->state->out1 is the corresponding END_INVISIBLE node; | 4211 /* t->state->out1 is the corresponding |
4111 * Add its out to the current list (zero-width match). */ | 4212 * END_INVISIBLE node; Add its out to the current |
4112 addstate_here(thislist, t->state->out1->out, &t->subs, | 4213 * list (zero-width match). */ |
4113 &listidx); | 4214 addstate_here(thislist, t->state->out1->out, |
4215 &t->subs, t->pim, &listidx); | |
4216 } | |
4217 } | |
4218 else | |
4219 { | |
4220 /* | |
4221 * First try matching what follows at the current | |
4222 * position. Only if a match is found, addstate() is | |
4223 * called, then verify the invisible match matches. | |
4224 * Add a nfa_pim_T to the following states, it | |
4225 * contains info about the invisible match. | |
4226 */ | |
4227 if (ga_grow(&pimlist, 1) == FAIL) | |
4228 goto theend; | |
4229 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len; | |
4230 ++pimlist.ga_len; | |
4231 pim->state = t->state; | |
4232 pim->pim = NULL; | |
4233 pim->result = NFA_PIM_TODO; | |
4234 | |
4235 /* t->state->out1 is the corresponding END_INVISIBLE | |
4236 * node; Add its out to the current list (zero-width | |
4237 * match). */ | |
4238 addstate_here(thislist, t->state->out1->out, &t->subs, | |
4239 pim, &listidx); | |
4240 } | |
4114 } | 4241 } |
4115 break; | 4242 break; |
4116 | 4243 |
4117 case NFA_BOL: | 4244 case NFA_BOL: |
4118 if (reginput == regline) | 4245 if (reginput == regline) |
4119 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4246 addstate_here(thislist, t->state->out, &t->subs, |
4247 t->pim, &listidx); | |
4120 break; | 4248 break; |
4121 | 4249 |
4122 case NFA_EOL: | 4250 case NFA_EOL: |
4123 if (curc == NUL) | 4251 if (curc == NUL) |
4124 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4252 addstate_here(thislist, t->state->out, &t->subs, |
4253 t->pim, &listidx); | |
4125 break; | 4254 break; |
4126 | 4255 |
4127 case NFA_BOW: | 4256 case NFA_BOW: |
4128 { | 4257 { |
4129 int bow = TRUE; | 4258 int bow = TRUE; |
4146 else if (!vim_iswordc_buf(curc, reg_buf) | 4275 else if (!vim_iswordc_buf(curc, reg_buf) |
4147 || (reginput > regline | 4276 || (reginput > regline |
4148 && vim_iswordc_buf(reginput[-1], reg_buf))) | 4277 && vim_iswordc_buf(reginput[-1], reg_buf))) |
4149 bow = FALSE; | 4278 bow = FALSE; |
4150 if (bow) | 4279 if (bow) |
4151 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4280 addstate_here(thislist, t->state->out, &t->subs, |
4281 t->pim, &listidx); | |
4152 break; | 4282 break; |
4153 } | 4283 } |
4154 | 4284 |
4155 case NFA_EOW: | 4285 case NFA_EOW: |
4156 { | 4286 { |
4174 else if (!vim_iswordc_buf(reginput[-1], reg_buf) | 4304 else if (!vim_iswordc_buf(reginput[-1], reg_buf) |
4175 || (reginput[0] != NUL | 4305 || (reginput[0] != NUL |
4176 && vim_iswordc_buf(curc, reg_buf))) | 4306 && vim_iswordc_buf(curc, reg_buf))) |
4177 eow = FALSE; | 4307 eow = FALSE; |
4178 if (eow) | 4308 if (eow) |
4179 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4309 addstate_here(thislist, t->state->out, &t->subs, |
4310 t->pim, &listidx); | |
4180 break; | 4311 break; |
4181 } | 4312 } |
4182 | 4313 |
4183 case NFA_BOF: | 4314 case NFA_BOF: |
4184 if (reglnum == 0 && reginput == regline | 4315 if (reglnum == 0 && reginput == regline |
4185 && (!REG_MULTI || reg_firstlnum == 1)) | 4316 && (!REG_MULTI || reg_firstlnum == 1)) |
4186 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4317 addstate_here(thislist, t->state->out, &t->subs, |
4318 t->pim, &listidx); | |
4187 break; | 4319 break; |
4188 | 4320 |
4189 case NFA_EOF: | 4321 case NFA_EOF: |
4190 if (reglnum == reg_maxline && curc == NUL) | 4322 if (reglnum == reg_maxline && curc == NUL) |
4191 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4323 addstate_here(thislist, t->state->out, &t->subs, |
4324 t->pim, &listidx); | |
4192 break; | 4325 break; |
4193 | 4326 |
4194 #ifdef FEAT_MBYTE | 4327 #ifdef FEAT_MBYTE |
4195 case NFA_COMPOSING: | 4328 case NFA_COMPOSING: |
4196 { | 4329 { |
4275 && reglnum <= reg_maxline) | 4408 && reglnum <= reg_maxline) |
4276 { | 4409 { |
4277 go_to_nextline = TRUE; | 4410 go_to_nextline = TRUE; |
4278 /* Pass -1 for the offset, which means taking the position | 4411 /* Pass -1 for the offset, which means taking the position |
4279 * at the start of the next line. */ | 4412 * at the start of the next line. */ |
4280 addstate(nextlist, t->state->out, &t->subs, -1); | 4413 ll = nextlist; |
4414 add_state = t->state->out; | |
4415 add_off = -1; | |
4281 } | 4416 } |
4282 else if (curc == '\n' && reg_line_lbr) | 4417 else if (curc == '\n' && reg_line_lbr) |
4283 { | 4418 { |
4284 /* match \n as if it is an ordinary character */ | 4419 /* match \n as if it is an ordinary character */ |
4285 addstate(nextlist, t->state->out, &t->subs, 1); | 4420 ll = nextlist; |
4421 add_state = t->state->out; | |
4422 add_off = 1; | |
4286 } | 4423 } |
4287 break; | 4424 break; |
4288 | 4425 |
4289 case NFA_CLASS_ALNUM: | 4426 case NFA_CLASS_ALNUM: |
4290 case NFA_CLASS_ALPHA: | 4427 case NFA_CLASS_ALPHA: |
4308 | 4445 |
4309 case NFA_END_NEG_RANGE: | 4446 case NFA_END_NEG_RANGE: |
4310 /* This follows a series of negated nodes, like: | 4447 /* This follows a series of negated nodes, like: |
4311 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ | 4448 * CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */ |
4312 if (curc > 0) | 4449 if (curc > 0) |
4313 addstate(nextlist, t->state->out, &t->subs, clen); | 4450 { |
4451 ll = nextlist; | |
4452 add_state = t->state->out; | |
4453 add_off = clen; | |
4454 } | |
4314 break; | 4455 break; |
4315 | 4456 |
4316 case NFA_ANY: | 4457 case NFA_ANY: |
4317 /* Any char except '\0', (end of input) does not match. */ | 4458 /* Any char except '\0', (end of input) does not match. */ |
4318 if (curc > 0) | 4459 if (curc > 0) |
4319 addstate(nextlist, t->state->out, &t->subs, clen); | 4460 { |
4461 ll = nextlist; | |
4462 add_state = t->state->out; | |
4463 add_off = clen; | |
4464 } | |
4320 break; | 4465 break; |
4321 | 4466 |
4322 /* | 4467 /* |
4323 * Character classes like \a for alpha, \d for digit etc. | 4468 * Character classes like \a for alpha, \d for digit etc. |
4324 */ | 4469 */ |
4496 if (bytelen == 0) | 4641 if (bytelen == 0) |
4497 { | 4642 { |
4498 /* empty match always works, output of NFA_SKIP to be | 4643 /* empty match always works, output of NFA_SKIP to be |
4499 * used next */ | 4644 * used next */ |
4500 addstate_here(thislist, t->state->out->out, &t->subs, | 4645 addstate_here(thislist, t->state->out->out, &t->subs, |
4501 &listidx); | 4646 t->pim, &listidx); |
4502 } | 4647 } |
4503 else if (bytelen <= clen) | 4648 else if (bytelen <= clen) |
4504 { | 4649 { |
4505 /* match current character, jump ahead to out of | 4650 /* match current character, jump ahead to out of |
4506 * NFA_SKIP */ | 4651 * NFA_SKIP */ |
4507 addstate(nextlist, t->state->out->out, &t->subs, clen); | 4652 ll = nextlist; |
4653 add_state = t->state->out->out; | |
4654 add_off = clen; | |
4508 #ifdef ENABLE_LOG | 4655 #ifdef ENABLE_LOG |
4509 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | 4656 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); |
4510 #endif | 4657 #endif |
4511 } | 4658 } |
4512 else | 4659 else |
4513 { | 4660 { |
4514 /* skip ofer the matched characters, set character | 4661 /* skip ofer the matched characters, set character |
4515 * count in NFA_SKIP */ | 4662 * count in NFA_SKIP */ |
4516 addstate(nextlist, t->state->out, &t->subs, bytelen); | 4663 ll = nextlist; |
4517 nextlist->t[nextlist->n - 1].count = bytelen - clen; | 4664 add_state = t->state->out; |
4665 add_off = bytelen; | |
4666 add_count = bytelen - clen; | |
4518 #ifdef ENABLE_LOG | 4667 #ifdef ENABLE_LOG |
4519 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | 4668 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); |
4520 #endif | 4669 #endif |
4521 } | 4670 } |
4522 | 4671 |
4526 case NFA_SKIP: | 4675 case NFA_SKIP: |
4527 /* charater of previous matching \1 .. \9 */ | 4676 /* charater of previous matching \1 .. \9 */ |
4528 if (t->count - clen <= 0) | 4677 if (t->count - clen <= 0) |
4529 { | 4678 { |
4530 /* end of match, go to what follows */ | 4679 /* end of match, go to what follows */ |
4531 addstate(nextlist, t->state->out, &t->subs, clen); | 4680 ll = nextlist; |
4681 add_state = t->state->out; | |
4682 add_off = clen; | |
4532 #ifdef ENABLE_LOG | 4683 #ifdef ENABLE_LOG |
4533 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | 4684 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); |
4534 #endif | 4685 #endif |
4535 } | 4686 } |
4536 else | 4687 else |
4537 { | 4688 { |
4538 /* add state again with decremented count */ | 4689 /* add state again with decremented count */ |
4539 addstate(nextlist, t->state, &t->subs, 0); | 4690 ll = nextlist; |
4540 nextlist->t[nextlist->n - 1].count = t->count - clen; | 4691 add_state = t->state; |
4692 add_off = 0; | |
4693 add_count = t->count - clen; | |
4541 #ifdef ENABLE_LOG | 4694 #ifdef ENABLE_LOG |
4542 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | 4695 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); |
4543 #endif | 4696 #endif |
4544 } | 4697 } |
4545 break; | 4698 break; |
4555 case NFA_LNUM_LT: | 4708 case NFA_LNUM_LT: |
4556 result = (REG_MULTI && | 4709 result = (REG_MULTI && |
4557 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, | 4710 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, |
4558 (long_u)(reglnum + reg_firstlnum))); | 4711 (long_u)(reglnum + reg_firstlnum))); |
4559 if (result) | 4712 if (result) |
4560 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4713 addstate_here(thislist, t->state->out, &t->subs, |
4714 t->pim, &listidx); | |
4561 break; | 4715 break; |
4562 | 4716 |
4563 case NFA_COL: | 4717 case NFA_COL: |
4564 case NFA_COL_GT: | 4718 case NFA_COL_GT: |
4565 case NFA_COL_LT: | 4719 case NFA_COL_LT: |
4566 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, | 4720 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, |
4567 (long_u)(reginput - regline) + 1); | 4721 (long_u)(reginput - regline) + 1); |
4568 if (result) | 4722 if (result) |
4569 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4723 addstate_here(thislist, t->state->out, &t->subs, |
4724 t->pim, &listidx); | |
4570 break; | 4725 break; |
4571 | 4726 |
4572 case NFA_VCOL: | 4727 case NFA_VCOL: |
4573 case NFA_VCOL_GT: | 4728 case NFA_VCOL_GT: |
4574 case NFA_VCOL_LT: | 4729 case NFA_VCOL_LT: |
4575 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, | 4730 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, |
4576 (long_u)win_linetabsize( | 4731 (long_u)win_linetabsize( |
4577 reg_win == NULL ? curwin : reg_win, | 4732 reg_win == NULL ? curwin : reg_win, |
4578 regline, (colnr_T)(reginput - regline)) + 1); | 4733 regline, (colnr_T)(reginput - regline)) + 1); |
4579 if (result) | 4734 if (result) |
4580 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4735 addstate_here(thislist, t->state->out, &t->subs, |
4736 t->pim, &listidx); | |
4581 break; | 4737 break; |
4582 | 4738 |
4583 case NFA_CURSOR: | 4739 case NFA_CURSOR: |
4584 result = (reg_win != NULL | 4740 result = (reg_win != NULL |
4585 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) | 4741 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) |
4586 && ((colnr_T)(reginput - regline) | 4742 && ((colnr_T)(reginput - regline) |
4587 == reg_win->w_cursor.col)); | 4743 == reg_win->w_cursor.col)); |
4588 if (result) | 4744 if (result) |
4589 addstate_here(thislist, t->state->out, &t->subs, &listidx); | 4745 addstate_here(thislist, t->state->out, &t->subs, |
4746 t->pim, &listidx); | |
4590 break; | 4747 break; |
4591 | 4748 |
4592 default: /* regular character */ | 4749 default: /* regular character */ |
4593 { | 4750 { |
4594 int c = t->state->c; | 4751 int c = t->state->c; |
4611 result = FALSE; | 4768 result = FALSE; |
4612 #endif | 4769 #endif |
4613 ADD_POS_NEG_STATE(t->state); | 4770 ADD_POS_NEG_STATE(t->state); |
4614 break; | 4771 break; |
4615 } | 4772 } |
4773 | |
4774 } /* switch (t->state->c) */ | |
4775 | |
4776 if (add_state != NULL) | |
4777 { | |
4778 if (t->pim != NULL) | |
4779 { | |
4780 /* postponed invisible match */ | |
4781 /* TODO: also do t->pim->pim recursively? */ | |
4782 if (t->pim->result == NFA_PIM_TODO) | |
4783 { | |
4784 #ifdef ENABLE_LOG | |
4785 fprintf(log_fd, "\n"); | |
4786 fprintf(log_fd, "==================================\n"); | |
4787 fprintf(log_fd, "Postponed recursive nfa_regmatch()\n"); | |
4788 fprintf(log_fd, "\n"); | |
4789 #endif | |
4790 result = recursive_regmatch(t->pim->state, | |
4791 prog, submatch, m, &listids); | |
4792 t->pim->result = result ? NFA_PIM_MATCH | |
4793 : NFA_PIM_NOMATCH; | |
4794 /* for \@! it is a match when result is FALSE */ | |
4795 if (result != t->pim->state->negated) | |
4796 { | |
4797 /* Copy submatch info from the recursive call */ | |
4798 copy_sub_off(&t->pim->subs.norm, &m->norm); | |
4799 #ifdef FEAT_SYN_HL | |
4800 copy_sub_off(&t->pim->subs.synt, &m->synt); | |
4801 #endif | |
4802 } | |
4803 } | |
4804 else | |
4805 { | |
4806 result = (t->pim->result == NFA_PIM_MATCH); | |
4807 #ifdef ENABLE_LOG | |
4808 fprintf(log_fd, "\n"); | |
4809 fprintf(log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", t->pim->result); | |
4810 fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE"); | |
4811 fprintf(log_fd, "\n"); | |
4812 #endif | |
4813 } | |
4814 | |
4815 /* for \@! it is a match when result is FALSE */ | |
4816 if (result != t->pim->state->negated) | |
4817 { | |
4818 /* Copy submatch info from the recursive call */ | |
4819 copy_sub_off(&t->subs.norm, &t->pim->subs.norm); | |
4820 #ifdef FEAT_SYN_HL | |
4821 copy_sub_off(&t->subs.synt, &t->pim->subs.synt); | |
4822 #endif | |
4823 } | |
4824 else | |
4825 /* look-behind match failed, don't add the state */ | |
4826 continue; | |
4827 } | |
4828 | |
4829 addstate(ll, add_state, &t->subs, add_off); | |
4830 if (add_count > 0) | |
4831 nextlist->t[ll->n - 1].count = add_count; | |
4616 } | 4832 } |
4617 | 4833 |
4618 } /* for (thislist = thislist; thislist->state; thislist++) */ | 4834 } /* for (thislist = thislist; thislist->state; thislist++) */ |
4619 | 4835 |
4620 /* Look for the start of a match in the current position by adding the | 4836 /* Look for the start of a match in the current position by adding the |
4678 /* Free memory */ | 4894 /* Free memory */ |
4679 vim_free(list[0].t); | 4895 vim_free(list[0].t); |
4680 vim_free(list[1].t); | 4896 vim_free(list[1].t); |
4681 vim_free(list[2].t); | 4897 vim_free(list[2].t); |
4682 vim_free(listids); | 4898 vim_free(listids); |
4899 ga_clear(&pimlist); | |
4683 #undef ADD_POS_NEG_STATE | 4900 #undef ADD_POS_NEG_STATE |
4684 #ifdef NFA_REGEXP_DEBUG_LOG | 4901 #ifdef NFA_REGEXP_DEBUG_LOG |
4685 fclose(debug); | 4902 fclose(debug); |
4686 #endif | 4903 #endif |
4687 | 4904 |