Mercurial > vim
comparison src/regexp_nfa.c @ 4799:e3f9e33fb28c v7.3.1146
updated for version 7.3.1146
Problem: New regexp engine: look-behind match not checked when followed by
zero-width match.
Solution: Do the look-behind match before adding the zero-width state.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sat, 08 Jun 2013 13:33:37 +0200 |
parents | a30e3762957d |
children | 3cd3cc1e9119 |
comparison
equal
deleted
inserted
replaced
4798:b4b5c5eadd9a | 4799:e3f9e33fb28c |
---|---|
4330 int listidx; | 4330 int listidx; |
4331 nfa_list_T *thislist; | 4331 nfa_list_T *thislist; |
4332 nfa_list_T *nextlist; | 4332 nfa_list_T *nextlist; |
4333 int *listids = NULL; | 4333 int *listids = NULL; |
4334 nfa_state_T *add_state; | 4334 nfa_state_T *add_state; |
4335 int add_here; | |
4335 int add_count; | 4336 int add_count; |
4336 int add_off; | 4337 int add_off; |
4337 garray_T pimlist; | 4338 garray_T pimlist; |
4338 int toplevel = start->c == NFA_MOPEN; | 4339 int toplevel = start->c == NFA_MOPEN; |
4339 #ifdef NFA_REGEXP_DEBUG_LOG | 4340 #ifdef NFA_REGEXP_DEBUG_LOG |
4493 /* | 4494 /* |
4494 * Handle the possible codes of the current state. | 4495 * Handle the possible codes of the current state. |
4495 * The most important is NFA_MATCH. | 4496 * The most important is NFA_MATCH. |
4496 */ | 4497 */ |
4497 add_state = NULL; | 4498 add_state = NULL; |
4499 add_here = FALSE; | |
4498 add_count = 0; | 4500 add_count = 0; |
4499 switch (t->state->c) | 4501 switch (t->state->c) |
4500 { | 4502 { |
4501 case NFA_MATCH: | 4503 case NFA_MATCH: |
4502 { | 4504 { |
4619 #endif | 4621 #endif |
4620 | 4622 |
4621 /* t->state->out1 is the corresponding | 4623 /* t->state->out1 is the corresponding |
4622 * END_INVISIBLE node; Add its out to the current | 4624 * END_INVISIBLE node; Add its out to the current |
4623 * list (zero-width match). */ | 4625 * list (zero-width match). */ |
4624 addstate_here(thislist, t->state->out1->out, | 4626 add_here = TRUE; |
4625 &t->subs, t->pim, &listidx); | 4627 add_state = t->state->out1->out; |
4626 } | 4628 } |
4627 } | 4629 } |
4628 else | 4630 else |
4629 { | 4631 { |
4630 /* | 4632 /* |
4631 * First try matching what follows at the current | 4633 * First try matching what follows at the current |
4632 * position. Only if a match is found, addstate() is | 4634 * position. Only if a match is found, before |
4633 * called, then verify the invisible match matches. | 4635 * addstate() is called, then verify the invisible |
4634 * Add a nfa_pim_T to the following states, it | 4636 * match matches. Add a nfa_pim_T to the following |
4635 * contains info about the invisible match. | 4637 * states, it contains info about the invisible match. |
4636 */ | 4638 */ |
4637 if (ga_grow(&pimlist, 1) == FAIL) | 4639 if (ga_grow(&pimlist, 1) == FAIL) |
4638 goto theend; | 4640 goto theend; |
4639 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len; | 4641 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len; |
4640 ++pimlist.ga_len; | 4642 ++pimlist.ga_len; |
4725 if (bytelen == 0) | 4727 if (bytelen == 0) |
4726 { | 4728 { |
4727 /* empty match, output of corresponding | 4729 /* empty match, output of corresponding |
4728 * NFA_END_PATTERN/NFA_SKIP to be used at current | 4730 * NFA_END_PATTERN/NFA_SKIP to be used at current |
4729 * position */ | 4731 * position */ |
4730 addstate_here(thislist, t->state->out1->out->out, | 4732 add_here = TRUE; |
4731 &t->subs, t->pim, &listidx); | 4733 add_state = t->state->out1->out->out; |
4732 } | 4734 } |
4733 else if (bytelen <= clen) | 4735 else if (bytelen <= clen) |
4734 { | 4736 { |
4735 /* match current character, output of corresponding | 4737 /* match current character, output of corresponding |
4736 * NFA_END_PATTERN to be used at next position. */ | 4738 * NFA_END_PATTERN to be used at next position. */ |
4749 break; | 4751 break; |
4750 } | 4752 } |
4751 | 4753 |
4752 case NFA_BOL: | 4754 case NFA_BOL: |
4753 if (reginput == regline) | 4755 if (reginput == regline) |
4754 addstate_here(thislist, t->state->out, &t->subs, | 4756 { |
4755 t->pim, &listidx); | 4757 add_here = TRUE; |
4758 add_state = t->state->out; | |
4759 } | |
4756 break; | 4760 break; |
4757 | 4761 |
4758 case NFA_EOL: | 4762 case NFA_EOL: |
4759 if (curc == NUL) | 4763 if (curc == NUL) |
4760 addstate_here(thislist, t->state->out, &t->subs, | 4764 { |
4761 t->pim, &listidx); | 4765 add_here = TRUE; |
4766 add_state = t->state->out; | |
4767 } | |
4762 break; | 4768 break; |
4763 | 4769 |
4764 case NFA_BOW: | 4770 case NFA_BOW: |
4765 result = TRUE; | 4771 result = TRUE; |
4766 | 4772 |
4782 else if (!vim_iswordc_buf(curc, reg_buf) | 4788 else if (!vim_iswordc_buf(curc, reg_buf) |
4783 || (reginput > regline | 4789 || (reginput > regline |
4784 && vim_iswordc_buf(reginput[-1], reg_buf))) | 4790 && vim_iswordc_buf(reginput[-1], reg_buf))) |
4785 result = FALSE; | 4791 result = FALSE; |
4786 if (result) | 4792 if (result) |
4787 addstate_here(thislist, t->state->out, &t->subs, | 4793 { |
4788 t->pim, &listidx); | 4794 add_here = TRUE; |
4795 add_state = t->state->out; | |
4796 } | |
4789 break; | 4797 break; |
4790 | 4798 |
4791 case NFA_EOW: | 4799 case NFA_EOW: |
4792 result = TRUE; | 4800 result = TRUE; |
4793 if (reginput == regline) | 4801 if (reginput == regline) |
4808 else if (!vim_iswordc_buf(reginput[-1], reg_buf) | 4816 else if (!vim_iswordc_buf(reginput[-1], reg_buf) |
4809 || (reginput[0] != NUL | 4817 || (reginput[0] != NUL |
4810 && vim_iswordc_buf(curc, reg_buf))) | 4818 && vim_iswordc_buf(curc, reg_buf))) |
4811 result = FALSE; | 4819 result = FALSE; |
4812 if (result) | 4820 if (result) |
4813 addstate_here(thislist, t->state->out, &t->subs, | 4821 { |
4814 t->pim, &listidx); | 4822 add_here = TRUE; |
4823 add_state = t->state->out; | |
4824 } | |
4815 break; | 4825 break; |
4816 | 4826 |
4817 case NFA_BOF: | 4827 case NFA_BOF: |
4818 if (reglnum == 0 && reginput == regline | 4828 if (reglnum == 0 && reginput == regline |
4819 && (!REG_MULTI || reg_firstlnum == 1)) | 4829 && (!REG_MULTI || reg_firstlnum == 1)) |
4820 addstate_here(thislist, t->state->out, &t->subs, | 4830 { |
4821 t->pim, &listidx); | 4831 add_here = TRUE; |
4832 add_state = t->state->out; | |
4833 } | |
4822 break; | 4834 break; |
4823 | 4835 |
4824 case NFA_EOF: | 4836 case NFA_EOF: |
4825 if (reglnum == reg_maxline && curc == NUL) | 4837 if (reglnum == reg_maxline && curc == NUL) |
4826 addstate_here(thislist, t->state->out, &t->subs, | 4838 { |
4827 t->pim, &listidx); | 4839 add_here = TRUE; |
4840 add_state = t->state->out; | |
4841 } | |
4828 break; | 4842 break; |
4829 | 4843 |
4830 #ifdef FEAT_MBYTE | 4844 #ifdef FEAT_MBYTE |
4831 case NFA_COMPOSING: | 4845 case NFA_COMPOSING: |
4832 { | 4846 { |
5181 { | 5195 { |
5182 if (bytelen == 0) | 5196 if (bytelen == 0) |
5183 { | 5197 { |
5184 /* empty match always works, output of NFA_SKIP to be | 5198 /* empty match always works, output of NFA_SKIP to be |
5185 * used next */ | 5199 * used next */ |
5186 addstate_here(thislist, t->state->out->out, &t->subs, | 5200 add_here = TRUE; |
5187 t->pim, &listidx); | 5201 add_state = t->state->out->out; |
5188 } | 5202 } |
5189 else if (bytelen <= clen) | 5203 else if (bytelen <= clen) |
5190 { | 5204 { |
5191 /* match current character, jump ahead to out of | 5205 /* match current character, jump ahead to out of |
5192 * NFA_SKIP */ | 5206 * NFA_SKIP */ |
5226 case NFA_LNUM_LT: | 5240 case NFA_LNUM_LT: |
5227 result = (REG_MULTI && | 5241 result = (REG_MULTI && |
5228 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, | 5242 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, |
5229 (long_u)(reglnum + reg_firstlnum))); | 5243 (long_u)(reglnum + reg_firstlnum))); |
5230 if (result) | 5244 if (result) |
5231 addstate_here(thislist, t->state->out, &t->subs, | 5245 { |
5232 t->pim, &listidx); | 5246 add_here = TRUE; |
5247 add_state = t->state->out; | |
5248 } | |
5233 break; | 5249 break; |
5234 | 5250 |
5235 case NFA_COL: | 5251 case NFA_COL: |
5236 case NFA_COL_GT: | 5252 case NFA_COL_GT: |
5237 case NFA_COL_LT: | 5253 case NFA_COL_LT: |
5238 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, | 5254 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, |
5239 (long_u)(reginput - regline) + 1); | 5255 (long_u)(reginput - regline) + 1); |
5240 if (result) | 5256 if (result) |
5241 addstate_here(thislist, t->state->out, &t->subs, | 5257 { |
5242 t->pim, &listidx); | 5258 add_here = TRUE; |
5259 add_state = t->state->out; | |
5260 } | |
5243 break; | 5261 break; |
5244 | 5262 |
5245 case NFA_VCOL: | 5263 case NFA_VCOL: |
5246 case NFA_VCOL_GT: | 5264 case NFA_VCOL_GT: |
5247 case NFA_VCOL_LT: | 5265 case NFA_VCOL_LT: |
5248 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, | 5266 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, |
5249 (long_u)win_linetabsize( | 5267 (long_u)win_linetabsize( |
5250 reg_win == NULL ? curwin : reg_win, | 5268 reg_win == NULL ? curwin : reg_win, |
5251 regline, (colnr_T)(reginput - regline)) + 1); | 5269 regline, (colnr_T)(reginput - regline)) + 1); |
5252 if (result) | 5270 if (result) |
5253 addstate_here(thislist, t->state->out, &t->subs, | 5271 { |
5254 t->pim, &listidx); | 5272 add_here = TRUE; |
5273 add_state = t->state->out; | |
5274 } | |
5255 break; | 5275 break; |
5256 | 5276 |
5257 case NFA_MARK: | 5277 case NFA_MARK: |
5258 case NFA_MARK_GT: | 5278 case NFA_MARK_GT: |
5259 case NFA_MARK_LT: | 5279 case NFA_MARK_LT: |
5271 : t->state->c == NFA_MARK_LT)) | 5291 : t->state->c == NFA_MARK_LT)) |
5272 : (pos->lnum < reglnum + reg_firstlnum | 5292 : (pos->lnum < reglnum + reg_firstlnum |
5273 ? t->state->c == NFA_MARK_GT | 5293 ? t->state->c == NFA_MARK_GT |
5274 : t->state->c == NFA_MARK_LT))); | 5294 : t->state->c == NFA_MARK_LT))); |
5275 if (result) | 5295 if (result) |
5276 addstate_here(thislist, t->state->out, &t->subs, | 5296 { |
5277 t->pim, &listidx); | 5297 add_here = TRUE; |
5298 add_state = t->state->out; | |
5299 } | |
5278 break; | 5300 break; |
5279 } | 5301 } |
5280 | 5302 |
5281 case NFA_CURSOR: | 5303 case NFA_CURSOR: |
5282 result = (reg_win != NULL | 5304 result = (reg_win != NULL |
5283 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) | 5305 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) |
5284 && ((colnr_T)(reginput - regline) | 5306 && ((colnr_T)(reginput - regline) |
5285 == reg_win->w_cursor.col)); | 5307 == reg_win->w_cursor.col)); |
5286 if (result) | 5308 if (result) |
5287 addstate_here(thislist, t->state->out, &t->subs, | 5309 { |
5288 t->pim, &listidx); | 5310 add_here = TRUE; |
5311 add_state = t->state->out; | |
5312 } | |
5289 break; | 5313 break; |
5290 | 5314 |
5291 case NFA_VISUAL: | 5315 case NFA_VISUAL: |
5292 #ifdef FEAT_VISUAL | 5316 #ifdef FEAT_VISUAL |
5293 result = reg_match_visual(); | 5317 result = reg_match_visual(); |
5294 if (result) | 5318 if (result) |
5295 addstate_here(thislist, t->state->out, &t->subs, | 5319 { |
5296 t->pim, &listidx); | 5320 add_here = TRUE; |
5321 add_state = t->state->out; | |
5322 } | |
5297 #endif | 5323 #endif |
5298 break; | 5324 break; |
5299 | 5325 |
5300 default: /* regular character */ | 5326 default: /* regular character */ |
5301 { | 5327 { |
5325 if (add_state != NULL) | 5351 if (add_state != NULL) |
5326 { | 5352 { |
5327 if (t->pim != NULL) | 5353 if (t->pim != NULL) |
5328 { | 5354 { |
5329 /* postponed invisible match */ | 5355 /* postponed invisible match */ |
5330 /* TODO: also do t->pim->pim recursively? */ | |
5331 if (t->pim->result == NFA_PIM_TODO) | 5356 if (t->pim->result == NFA_PIM_TODO) |
5332 { | 5357 { |
5333 #ifdef ENABLE_LOG | 5358 #ifdef ENABLE_LOG |
5334 fprintf(log_fd, "\n"); | 5359 fprintf(log_fd, "\n"); |
5335 fprintf(log_fd, "==================================\n"); | 5360 fprintf(log_fd, "==================================\n"); |
5381 else | 5406 else |
5382 /* look-behind match failed, don't add the state */ | 5407 /* look-behind match failed, don't add the state */ |
5383 continue; | 5408 continue; |
5384 } | 5409 } |
5385 | 5410 |
5386 addstate(nextlist, add_state, &t->subs, add_off); | 5411 if (add_here) |
5387 if (add_count > 0) | 5412 addstate_here(thislist, add_state, &t->subs, NULL, &listidx); |
5388 nextlist->t[nextlist->n - 1].count = add_count; | 5413 else |
5414 { | |
5415 addstate(nextlist, add_state, &t->subs, add_off); | |
5416 if (add_count > 0) | |
5417 nextlist->t[nextlist->n - 1].count = add_count; | |
5418 } | |
5389 } | 5419 } |
5390 | 5420 |
5391 } /* for (thislist = thislist; thislist->state; thislist++) */ | 5421 } /* for (thislist = thislist; thislist->state; thislist++) */ |
5392 | 5422 |
5393 /* Look for the start of a match in the current position by adding the | 5423 /* Look for the start of a match in the current position by adding the |