comparison src/regexp_nfa.c @ 4801:3cd3cc1e9119 v7.3.1147

updated for version 7.3.1147 Problem: New regexp engine: regstart is only used to find the first match. Solution: Use regstart whenever adding the start state.
author Bram Moolenaar <bram@vim.org>
date Sat, 08 Jun 2013 14:38:27 +0200
parents e3f9e33fb28c
children 66803af09906
comparison
equal deleted inserted replaced
4800:339cf2070eb7 4801:3cd3cc1e9119
4151 4151
4152 return result; 4152 return result;
4153 } 4153 }
4154 4154
4155 static int failure_chance __ARGS((nfa_state_T *state, int depth)); 4155 static int failure_chance __ARGS((nfa_state_T *state, int depth));
4156 static int skip_to_start __ARGS((int c, colnr_T *colp));
4156 4157
4157 /* 4158 /*
4158 * Estimate the chance of a match with "state" failing. 4159 * Estimate the chance of a match with "state" failing.
4159 * NFA_ANY: 1 4160 * NFA_ANY: 1
4160 * specific character: 99 4161 * specific character: 99
4300 return 95; 4301 return 95;
4301 } 4302 }
4302 4303
4303 /* something else, includes character classes */ 4304 /* something else, includes character classes */
4304 return 50; 4305 return 50;
4306 }
4307
4308 /*
4309 * Skip until the char "c" we know a match must start with.
4310 */
4311 static int
4312 skip_to_start(c, colp)
4313 int c;
4314 colnr_T *colp;
4315 {
4316 char_u *s;
4317
4318 /* Used often, do some work to avoid call overhead. */
4319 if (!ireg_ic
4320 #ifdef FEAT_MBYTE
4321 && !has_mbyte
4322 #endif
4323 )
4324 s = vim_strbyte(regline + *colp, c);
4325 else
4326 s = cstrchr(regline + *colp, c);
4327 if (s == NULL)
4328 return FAIL;
4329 *colp = (int)(s - regline);
4330 return OK;
4305 } 4331 }
4306 4332
4307 /* 4333 /*
4308 * Main matching routine. 4334 * Main matching routine.
4309 * 4335 *
5447 #endif 5473 #endif
5448 /* Inline optimized code for addstate() if we know the state is 5474 /* Inline optimized code for addstate() if we know the state is
5449 * the first MOPEN. */ 5475 * the first MOPEN. */
5450 if (toplevel) 5476 if (toplevel)
5451 { 5477 {
5452 if (REG_MULTI) 5478 int add = TRUE;
5453 m->norm.list.multi[0].start.col = 5479 int c;
5480
5481 if (prog->regstart != NUL && clen != 0)
5482 {
5483 if (nextlist->n == 0)
5484 {
5485 colnr_T col = (colnr_T)(reginput - regline) + clen;
5486
5487 /* Nextlist is empty, we can skip ahead to the
5488 * character that must appear at the start. */
5489 if (skip_to_start(prog->regstart, &col) == FAIL)
5490 break;
5491 #ifdef ENABLE_LOG
5492 fprintf(log_fd, " Skipping ahead %d bytes to regstart\n",
5493 col - ((colnr_T)(reginput - regline) + clen));
5494 #endif
5495 reginput = regline + col - clen;
5496 }
5497 else
5498 {
5499 /* Checking if the required start character matches is
5500 * cheaper than adding a state that won't match. */
5501 c = PTR2CHAR(reginput + clen);
5502 if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c)
5503 != MB_TOLOWER(prog->regstart)))
5504 {
5505 #ifdef ENABLE_LOG
5506 fprintf(log_fd, " Skipping start state, regstart does not match\n");
5507 #endif
5508 add = FALSE;
5509 }
5510 }
5511 }
5512
5513 if (add)
5514 {
5515 if (REG_MULTI)
5516 m->norm.list.multi[0].start.col =
5454 (colnr_T)(reginput - regline) + clen; 5517 (colnr_T)(reginput - regline) + clen;
5455 else 5518 else
5456 m->norm.list.line[0].start = reginput + clen; 5519 m->norm.list.line[0].start = reginput + clen;
5457 addstate(nextlist, start->out, m, clen); 5520 addstate(nextlist, start->out, m, clen);
5521 }
5458 } 5522 }
5459 else 5523 else
5460 addstate(nextlist, start, m, clen); 5524 addstate(nextlist, start, m, clen);
5461 } 5525 }
5462 5526
5699 5763
5700 if (prog->reganch && col > 0) 5764 if (prog->reganch && col > 0)
5701 return 0L; 5765 return 0L;
5702 5766
5703 if (prog->regstart != NUL) 5767 if (prog->regstart != NUL)
5704 { 5768 /* Skip ahead until a character we know the match must start with.
5705 char_u *s; 5769 * When there is none there is no match. */
5706 5770 if (skip_to_start(prog->regstart, &col) == FAIL)
5707 /* Skip until the char we know it must start with.
5708 * Used often, do some work to avoid call overhead. */
5709 if (!ireg_ic
5710 #ifdef FEAT_MBYTE
5711 && !has_mbyte
5712 #endif
5713 )
5714 s = vim_strbyte(regline + col, prog->regstart);
5715 else
5716 s = cstrchr(regline + col, prog->regstart);
5717 if (s == NULL)
5718 return 0L; 5771 return 0L;
5719 col = (int)(s - regline);
5720 }
5721 5772
5722 /* If the start column is past the maximum column: no need to try. */ 5773 /* If the start column is past the maximum column: no need to try. */
5723 if (ireg_maxcol > 0 && col >= ireg_maxcol) 5774 if (ireg_maxcol > 0 && col >= ireg_maxcol)
5724 goto theend; 5775 goto theend;
5725 5776