Mercurial > vim
comparison src/regexp_nfa.c @ 4801:3cd3cc1e9119 v7.3.1147
updated for version 7.3.1147
Problem: New regexp engine: regstart is only used to find the first match.
Solution: Use regstart whenever adding the start state.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sat, 08 Jun 2013 14:38:27 +0200 |
parents | e3f9e33fb28c |
children | 66803af09906 |
comparison
equal
deleted
inserted
replaced
4800:339cf2070eb7 | 4801:3cd3cc1e9119 |
---|---|
4151 | 4151 |
4152 return result; | 4152 return result; |
4153 } | 4153 } |
4154 | 4154 |
4155 static int failure_chance __ARGS((nfa_state_T *state, int depth)); | 4155 static int failure_chance __ARGS((nfa_state_T *state, int depth)); |
4156 static int skip_to_start __ARGS((int c, colnr_T *colp)); | |
4156 | 4157 |
4157 /* | 4158 /* |
4158 * Estimate the chance of a match with "state" failing. | 4159 * Estimate the chance of a match with "state" failing. |
4159 * NFA_ANY: 1 | 4160 * NFA_ANY: 1 |
4160 * specific character: 99 | 4161 * specific character: 99 |
4300 return 95; | 4301 return 95; |
4301 } | 4302 } |
4302 | 4303 |
4303 /* something else, includes character classes */ | 4304 /* something else, includes character classes */ |
4304 return 50; | 4305 return 50; |
4306 } | |
4307 | |
4308 /* | |
4309 * Skip until the char "c" we know a match must start with. | |
4310 */ | |
4311 static int | |
4312 skip_to_start(c, colp) | |
4313 int c; | |
4314 colnr_T *colp; | |
4315 { | |
4316 char_u *s; | |
4317 | |
4318 /* Used often, do some work to avoid call overhead. */ | |
4319 if (!ireg_ic | |
4320 #ifdef FEAT_MBYTE | |
4321 && !has_mbyte | |
4322 #endif | |
4323 ) | |
4324 s = vim_strbyte(regline + *colp, c); | |
4325 else | |
4326 s = cstrchr(regline + *colp, c); | |
4327 if (s == NULL) | |
4328 return FAIL; | |
4329 *colp = (int)(s - regline); | |
4330 return OK; | |
4305 } | 4331 } |
4306 | 4332 |
4307 /* | 4333 /* |
4308 * Main matching routine. | 4334 * Main matching routine. |
4309 * | 4335 * |
5447 #endif | 5473 #endif |
5448 /* Inline optimized code for addstate() if we know the state is | 5474 /* Inline optimized code for addstate() if we know the state is |
5449 * the first MOPEN. */ | 5475 * the first MOPEN. */ |
5450 if (toplevel) | 5476 if (toplevel) |
5451 { | 5477 { |
5452 if (REG_MULTI) | 5478 int add = TRUE; |
5453 m->norm.list.multi[0].start.col = | 5479 int c; |
5480 | |
5481 if (prog->regstart != NUL && clen != 0) | |
5482 { | |
5483 if (nextlist->n == 0) | |
5484 { | |
5485 colnr_T col = (colnr_T)(reginput - regline) + clen; | |
5486 | |
5487 /* Nextlist is empty, we can skip ahead to the | |
5488 * character that must appear at the start. */ | |
5489 if (skip_to_start(prog->regstart, &col) == FAIL) | |
5490 break; | |
5491 #ifdef ENABLE_LOG | |
5492 fprintf(log_fd, " Skipping ahead %d bytes to regstart\n", | |
5493 col - ((colnr_T)(reginput - regline) + clen)); | |
5494 #endif | |
5495 reginput = regline + col - clen; | |
5496 } | |
5497 else | |
5498 { | |
5499 /* Checking if the required start character matches is | |
5500 * cheaper than adding a state that won't match. */ | |
5501 c = PTR2CHAR(reginput + clen); | |
5502 if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c) | |
5503 != MB_TOLOWER(prog->regstart))) | |
5504 { | |
5505 #ifdef ENABLE_LOG | |
5506 fprintf(log_fd, " Skipping start state, regstart does not match\n"); | |
5507 #endif | |
5508 add = FALSE; | |
5509 } | |
5510 } | |
5511 } | |
5512 | |
5513 if (add) | |
5514 { | |
5515 if (REG_MULTI) | |
5516 m->norm.list.multi[0].start.col = | |
5454 (colnr_T)(reginput - regline) + clen; | 5517 (colnr_T)(reginput - regline) + clen; |
5455 else | 5518 else |
5456 m->norm.list.line[0].start = reginput + clen; | 5519 m->norm.list.line[0].start = reginput + clen; |
5457 addstate(nextlist, start->out, m, clen); | 5520 addstate(nextlist, start->out, m, clen); |
5521 } | |
5458 } | 5522 } |
5459 else | 5523 else |
5460 addstate(nextlist, start, m, clen); | 5524 addstate(nextlist, start, m, clen); |
5461 } | 5525 } |
5462 | 5526 |
5699 | 5763 |
5700 if (prog->reganch && col > 0) | 5764 if (prog->reganch && col > 0) |
5701 return 0L; | 5765 return 0L; |
5702 | 5766 |
5703 if (prog->regstart != NUL) | 5767 if (prog->regstart != NUL) |
5704 { | 5768 /* Skip ahead until a character we know the match must start with. |
5705 char_u *s; | 5769 * When there is none there is no match. */ |
5706 | 5770 if (skip_to_start(prog->regstart, &col) == FAIL) |
5707 /* Skip until the char we know it must start with. | |
5708 * Used often, do some work to avoid call overhead. */ | |
5709 if (!ireg_ic | |
5710 #ifdef FEAT_MBYTE | |
5711 && !has_mbyte | |
5712 #endif | |
5713 ) | |
5714 s = vim_strbyte(regline + col, prog->regstart); | |
5715 else | |
5716 s = cstrchr(regline + col, prog->regstart); | |
5717 if (s == NULL) | |
5718 return 0L; | 5771 return 0L; |
5719 col = (int)(s - regline); | |
5720 } | |
5721 | 5772 |
5722 /* If the start column is past the maximum column: no need to try. */ | 5773 /* If the start column is past the maximum column: no need to try. */ |
5723 if (ireg_maxcol > 0 && col >= ireg_maxcol) | 5774 if (ireg_maxcol > 0 && col >= ireg_maxcol) |
5724 goto theend; | 5775 goto theend; |
5725 | 5776 |