comparison src/regexp_nfa.c @ 4799:e3f9e33fb28c v7.3.1146

updated for version 7.3.1146 Problem: New regexp engine: look-behind match not checked when followed by zero-width match. Solution: Do the look-behind match before adding the zero-width state.
author Bram Moolenaar <bram@vim.org>
date Sat, 08 Jun 2013 13:33:37 +0200
parents a30e3762957d
children 3cd3cc1e9119
comparison
equal deleted inserted replaced
4798:b4b5c5eadd9a 4799:e3f9e33fb28c
4330 int listidx; 4330 int listidx;
4331 nfa_list_T *thislist; 4331 nfa_list_T *thislist;
4332 nfa_list_T *nextlist; 4332 nfa_list_T *nextlist;
4333 int *listids = NULL; 4333 int *listids = NULL;
4334 nfa_state_T *add_state; 4334 nfa_state_T *add_state;
4335 int add_here;
4335 int add_count; 4336 int add_count;
4336 int add_off; 4337 int add_off;
4337 garray_T pimlist; 4338 garray_T pimlist;
4338 int toplevel = start->c == NFA_MOPEN; 4339 int toplevel = start->c == NFA_MOPEN;
4339 #ifdef NFA_REGEXP_DEBUG_LOG 4340 #ifdef NFA_REGEXP_DEBUG_LOG
4493 /* 4494 /*
4494 * Handle the possible codes of the current state. 4495 * Handle the possible codes of the current state.
4495 * The most important is NFA_MATCH. 4496 * The most important is NFA_MATCH.
4496 */ 4497 */
4497 add_state = NULL; 4498 add_state = NULL;
4499 add_here = FALSE;
4498 add_count = 0; 4500 add_count = 0;
4499 switch (t->state->c) 4501 switch (t->state->c)
4500 { 4502 {
4501 case NFA_MATCH: 4503 case NFA_MATCH:
4502 { 4504 {
4619 #endif 4621 #endif
4620 4622
4621 /* t->state->out1 is the corresponding 4623 /* t->state->out1 is the corresponding
4622 * END_INVISIBLE node; Add its out to the current 4624 * END_INVISIBLE node; Add its out to the current
4623 * list (zero-width match). */ 4625 * list (zero-width match). */
4624 addstate_here(thislist, t->state->out1->out, 4626 add_here = TRUE;
4625 &t->subs, t->pim, &listidx); 4627 add_state = t->state->out1->out;
4626 } 4628 }
4627 } 4629 }
4628 else 4630 else
4629 { 4631 {
4630 /* 4632 /*
4631 * First try matching what follows at the current 4633 * First try matching what follows at the current
4632 * position. Only if a match is found, addstate() is 4634 * position. Only if a match is found, before
4633 * called, then verify the invisible match matches. 4635 * addstate() is called, then verify the invisible
4634 * Add a nfa_pim_T to the following states, it 4636 * match matches. Add a nfa_pim_T to the following
4635 * contains info about the invisible match. 4637 * states, it contains info about the invisible match.
4636 */ 4638 */
4637 if (ga_grow(&pimlist, 1) == FAIL) 4639 if (ga_grow(&pimlist, 1) == FAIL)
4638 goto theend; 4640 goto theend;
4639 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len; 4641 pim = (nfa_pim_T *)pimlist.ga_data + pimlist.ga_len;
4640 ++pimlist.ga_len; 4642 ++pimlist.ga_len;
4725 if (bytelen == 0) 4727 if (bytelen == 0)
4726 { 4728 {
4727 /* empty match, output of corresponding 4729 /* empty match, output of corresponding
4728 * NFA_END_PATTERN/NFA_SKIP to be used at current 4730 * NFA_END_PATTERN/NFA_SKIP to be used at current
4729 * position */ 4731 * position */
4730 addstate_here(thislist, t->state->out1->out->out, 4732 add_here = TRUE;
4731 &t->subs, t->pim, &listidx); 4733 add_state = t->state->out1->out->out;
4732 } 4734 }
4733 else if (bytelen <= clen) 4735 else if (bytelen <= clen)
4734 { 4736 {
4735 /* match current character, output of corresponding 4737 /* match current character, output of corresponding
4736 * NFA_END_PATTERN to be used at next position. */ 4738 * NFA_END_PATTERN to be used at next position. */
4749 break; 4751 break;
4750 } 4752 }
4751 4753
4752 case NFA_BOL: 4754 case NFA_BOL:
4753 if (reginput == regline) 4755 if (reginput == regline)
4754 addstate_here(thislist, t->state->out, &t->subs, 4756 {
4755 t->pim, &listidx); 4757 add_here = TRUE;
4758 add_state = t->state->out;
4759 }
4756 break; 4760 break;
4757 4761
4758 case NFA_EOL: 4762 case NFA_EOL:
4759 if (curc == NUL) 4763 if (curc == NUL)
4760 addstate_here(thislist, t->state->out, &t->subs, 4764 {
4761 t->pim, &listidx); 4765 add_here = TRUE;
4766 add_state = t->state->out;
4767 }
4762 break; 4768 break;
4763 4769
4764 case NFA_BOW: 4770 case NFA_BOW:
4765 result = TRUE; 4771 result = TRUE;
4766 4772
4782 else if (!vim_iswordc_buf(curc, reg_buf) 4788 else if (!vim_iswordc_buf(curc, reg_buf)
4783 || (reginput > regline 4789 || (reginput > regline
4784 && vim_iswordc_buf(reginput[-1], reg_buf))) 4790 && vim_iswordc_buf(reginput[-1], reg_buf)))
4785 result = FALSE; 4791 result = FALSE;
4786 if (result) 4792 if (result)
4787 addstate_here(thislist, t->state->out, &t->subs, 4793 {
4788 t->pim, &listidx); 4794 add_here = TRUE;
4795 add_state = t->state->out;
4796 }
4789 break; 4797 break;
4790 4798
4791 case NFA_EOW: 4799 case NFA_EOW:
4792 result = TRUE; 4800 result = TRUE;
4793 if (reginput == regline) 4801 if (reginput == regline)
4808 else if (!vim_iswordc_buf(reginput[-1], reg_buf) 4816 else if (!vim_iswordc_buf(reginput[-1], reg_buf)
4809 || (reginput[0] != NUL 4817 || (reginput[0] != NUL
4810 && vim_iswordc_buf(curc, reg_buf))) 4818 && vim_iswordc_buf(curc, reg_buf)))
4811 result = FALSE; 4819 result = FALSE;
4812 if (result) 4820 if (result)
4813 addstate_here(thislist, t->state->out, &t->subs, 4821 {
4814 t->pim, &listidx); 4822 add_here = TRUE;
4823 add_state = t->state->out;
4824 }
4815 break; 4825 break;
4816 4826
4817 case NFA_BOF: 4827 case NFA_BOF:
4818 if (reglnum == 0 && reginput == regline 4828 if (reglnum == 0 && reginput == regline
4819 && (!REG_MULTI || reg_firstlnum == 1)) 4829 && (!REG_MULTI || reg_firstlnum == 1))
4820 addstate_here(thislist, t->state->out, &t->subs, 4830 {
4821 t->pim, &listidx); 4831 add_here = TRUE;
4832 add_state = t->state->out;
4833 }
4822 break; 4834 break;
4823 4835
4824 case NFA_EOF: 4836 case NFA_EOF:
4825 if (reglnum == reg_maxline && curc == NUL) 4837 if (reglnum == reg_maxline && curc == NUL)
4826 addstate_here(thislist, t->state->out, &t->subs, 4838 {
4827 t->pim, &listidx); 4839 add_here = TRUE;
4840 add_state = t->state->out;
4841 }
4828 break; 4842 break;
4829 4843
4830 #ifdef FEAT_MBYTE 4844 #ifdef FEAT_MBYTE
4831 case NFA_COMPOSING: 4845 case NFA_COMPOSING:
4832 { 4846 {
5181 { 5195 {
5182 if (bytelen == 0) 5196 if (bytelen == 0)
5183 { 5197 {
5184 /* empty match always works, output of NFA_SKIP to be 5198 /* empty match always works, output of NFA_SKIP to be
5185 * used next */ 5199 * used next */
5186 addstate_here(thislist, t->state->out->out, &t->subs, 5200 add_here = TRUE;
5187 t->pim, &listidx); 5201 add_state = t->state->out->out;
5188 } 5202 }
5189 else if (bytelen <= clen) 5203 else if (bytelen <= clen)
5190 { 5204 {
5191 /* match current character, jump ahead to out of 5205 /* match current character, jump ahead to out of
5192 * NFA_SKIP */ 5206 * NFA_SKIP */
5226 case NFA_LNUM_LT: 5240 case NFA_LNUM_LT:
5227 result = (REG_MULTI && 5241 result = (REG_MULTI &&
5228 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM, 5242 nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM,
5229 (long_u)(reglnum + reg_firstlnum))); 5243 (long_u)(reglnum + reg_firstlnum)));
5230 if (result) 5244 if (result)
5231 addstate_here(thislist, t->state->out, &t->subs, 5245 {
5232 t->pim, &listidx); 5246 add_here = TRUE;
5247 add_state = t->state->out;
5248 }
5233 break; 5249 break;
5234 5250
5235 case NFA_COL: 5251 case NFA_COL:
5236 case NFA_COL_GT: 5252 case NFA_COL_GT:
5237 case NFA_COL_LT: 5253 case NFA_COL_LT:
5238 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL, 5254 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL,
5239 (long_u)(reginput - regline) + 1); 5255 (long_u)(reginput - regline) + 1);
5240 if (result) 5256 if (result)
5241 addstate_here(thislist, t->state->out, &t->subs, 5257 {
5242 t->pim, &listidx); 5258 add_here = TRUE;
5259 add_state = t->state->out;
5260 }
5243 break; 5261 break;
5244 5262
5245 case NFA_VCOL: 5263 case NFA_VCOL:
5246 case NFA_VCOL_GT: 5264 case NFA_VCOL_GT:
5247 case NFA_VCOL_LT: 5265 case NFA_VCOL_LT:
5248 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL, 5266 result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_VCOL,
5249 (long_u)win_linetabsize( 5267 (long_u)win_linetabsize(
5250 reg_win == NULL ? curwin : reg_win, 5268 reg_win == NULL ? curwin : reg_win,
5251 regline, (colnr_T)(reginput - regline)) + 1); 5269 regline, (colnr_T)(reginput - regline)) + 1);
5252 if (result) 5270 if (result)
5253 addstate_here(thislist, t->state->out, &t->subs, 5271 {
5254 t->pim, &listidx); 5272 add_here = TRUE;
5273 add_state = t->state->out;
5274 }
5255 break; 5275 break;
5256 5276
5257 case NFA_MARK: 5277 case NFA_MARK:
5258 case NFA_MARK_GT: 5278 case NFA_MARK_GT:
5259 case NFA_MARK_LT: 5279 case NFA_MARK_LT:
5271 : t->state->c == NFA_MARK_LT)) 5291 : t->state->c == NFA_MARK_LT))
5272 : (pos->lnum < reglnum + reg_firstlnum 5292 : (pos->lnum < reglnum + reg_firstlnum
5273 ? t->state->c == NFA_MARK_GT 5293 ? t->state->c == NFA_MARK_GT
5274 : t->state->c == NFA_MARK_LT))); 5294 : t->state->c == NFA_MARK_LT)));
5275 if (result) 5295 if (result)
5276 addstate_here(thislist, t->state->out, &t->subs, 5296 {
5277 t->pim, &listidx); 5297 add_here = TRUE;
5298 add_state = t->state->out;
5299 }
5278 break; 5300 break;
5279 } 5301 }
5280 5302
5281 case NFA_CURSOR: 5303 case NFA_CURSOR:
5282 result = (reg_win != NULL 5304 result = (reg_win != NULL
5283 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum) 5305 && (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
5284 && ((colnr_T)(reginput - regline) 5306 && ((colnr_T)(reginput - regline)
5285 == reg_win->w_cursor.col)); 5307 == reg_win->w_cursor.col));
5286 if (result) 5308 if (result)
5287 addstate_here(thislist, t->state->out, &t->subs, 5309 {
5288 t->pim, &listidx); 5310 add_here = TRUE;
5311 add_state = t->state->out;
5312 }
5289 break; 5313 break;
5290 5314
5291 case NFA_VISUAL: 5315 case NFA_VISUAL:
5292 #ifdef FEAT_VISUAL 5316 #ifdef FEAT_VISUAL
5293 result = reg_match_visual(); 5317 result = reg_match_visual();
5294 if (result) 5318 if (result)
5295 addstate_here(thislist, t->state->out, &t->subs, 5319 {
5296 t->pim, &listidx); 5320 add_here = TRUE;
5321 add_state = t->state->out;
5322 }
5297 #endif 5323 #endif
5298 break; 5324 break;
5299 5325
5300 default: /* regular character */ 5326 default: /* regular character */
5301 { 5327 {
5325 if (add_state != NULL) 5351 if (add_state != NULL)
5326 { 5352 {
5327 if (t->pim != NULL) 5353 if (t->pim != NULL)
5328 { 5354 {
5329 /* postponed invisible match */ 5355 /* postponed invisible match */
5330 /* TODO: also do t->pim->pim recursively? */
5331 if (t->pim->result == NFA_PIM_TODO) 5356 if (t->pim->result == NFA_PIM_TODO)
5332 { 5357 {
5333 #ifdef ENABLE_LOG 5358 #ifdef ENABLE_LOG
5334 fprintf(log_fd, "\n"); 5359 fprintf(log_fd, "\n");
5335 fprintf(log_fd, "==================================\n"); 5360 fprintf(log_fd, "==================================\n");
5381 else 5406 else
5382 /* look-behind match failed, don't add the state */ 5407 /* look-behind match failed, don't add the state */
5383 continue; 5408 continue;
5384 } 5409 }
5385 5410
5386 addstate(nextlist, add_state, &t->subs, add_off); 5411 if (add_here)
5387 if (add_count > 0) 5412 addstate_here(thislist, add_state, &t->subs, NULL, &listidx);
5388 nextlist->t[nextlist->n - 1].count = add_count; 5413 else
5414 {
5415 addstate(nextlist, add_state, &t->subs, add_off);
5416 if (add_count > 0)
5417 nextlist->t[nextlist->n - 1].count = add_count;
5418 }
5389 } 5419 }
5390 5420
5391 } /* for (thislist = thislist; thislist->state; thislist++) */ 5421 } /* for (thislist = thislist; thislist->state; thislist++) */
5392 5422
5393 /* Look for the start of a match in the current position by adding the 5423 /* Look for the start of a match in the current position by adding the