Mercurial > vim
comparison src/regexp_nfa.c @ 4750:7793e737ec87 v7.3.1122
updated for version 7.3.1122
Problem: New regexp engine: \%> not supported.
Solution: Implement \%>.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Wed, 05 Jun 2013 18:52:40 +0200 |
parents | 4b9503f0c7d3 |
children | 96f3348f9f11 |
comparison
equal
deleted
inserted
replaced
4749:31754f244d6e | 4750:7793e737ec87 |
---|---|
55 NFA_ZEND, /* Used for \ze */ | 55 NFA_ZEND, /* Used for \ze */ |
56 NFA_NOPEN, /* Start of subexpression marked with \%( */ | 56 NFA_NOPEN, /* Start of subexpression marked with \%( */ |
57 NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ | 57 NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ |
58 NFA_START_INVISIBLE, | 58 NFA_START_INVISIBLE, |
59 NFA_START_INVISIBLE_BEFORE, | 59 NFA_START_INVISIBLE_BEFORE, |
60 NFA_START_PATTERN, | |
60 NFA_END_INVISIBLE, | 61 NFA_END_INVISIBLE, |
62 NFA_END_PATTERN, | |
61 NFA_COMPOSING, /* Next nodes in NFA are part of the | 63 NFA_COMPOSING, /* Next nodes in NFA are part of the |
62 composing multibyte char */ | 64 composing multibyte char */ |
63 NFA_END_COMPOSING, /* End of a composing char in the NFA */ | 65 NFA_END_COMPOSING, /* End of a composing char in the NFA */ |
64 NFA_OPT_CHARS, /* \%[abc] */ | 66 NFA_OPT_CHARS, /* \%[abc] */ |
65 | 67 |
1503 else if (op == '!') | 1505 else if (op == '!') |
1504 /* \@<! */ | 1506 /* \@<! */ |
1505 i = NFA_PREV_ATOM_JUST_BEFORE_NEG; | 1507 i = NFA_PREV_ATOM_JUST_BEFORE_NEG; |
1506 break; | 1508 break; |
1507 case '>': | 1509 case '>': |
1508 /* \@> Not supported yet */ | 1510 /* \@> */ |
1509 /* i = NFA_PREV_ATOM_LIKE_PATTERN; */ | 1511 i = NFA_PREV_ATOM_LIKE_PATTERN; |
1510 return FAIL; | 1512 break; |
1511 } | 1513 } |
1512 if (i == 0) | 1514 if (i == 0) |
1513 { | 1515 { |
1514 syntax_error = TRUE; | 1516 syntax_error = TRUE; |
1515 EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op); | 1517 EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op); |
1883 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break; | 1885 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break; |
1884 case NFA_PREV_ATOM_JUST_BEFORE: | 1886 case NFA_PREV_ATOM_JUST_BEFORE: |
1885 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break; | 1887 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break; |
1886 case NFA_PREV_ATOM_JUST_BEFORE_NEG: | 1888 case NFA_PREV_ATOM_JUST_BEFORE_NEG: |
1887 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break; | 1889 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break; |
1890 case NFA_PREV_ATOM_LIKE_PATTERN: | |
1891 STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break; | |
1892 | |
1888 case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; | 1893 case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; |
1889 case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; | 1894 case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; |
1890 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; | 1895 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; |
1891 case NFA_START_INVISIBLE_BEFORE: | 1896 case NFA_START_INVISIBLE_BEFORE: |
1892 STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; | 1897 STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; |
1898 case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; | |
1893 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; | 1899 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; |
1900 case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; | |
1894 | 1901 |
1895 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; | 1902 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; |
1896 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; | 1903 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; |
1897 case NFA_OPT_CHARS: STRCPY(code, "NFA_OPT_CHARS"); break; | 1904 case NFA_OPT_CHARS: STRCPY(code, "NFA_OPT_CHARS"); break; |
1898 | 1905 |
2599 | 2606 |
2600 case NFA_PREV_ATOM_NO_WIDTH: | 2607 case NFA_PREV_ATOM_NO_WIDTH: |
2601 case NFA_PREV_ATOM_NO_WIDTH_NEG: | 2608 case NFA_PREV_ATOM_NO_WIDTH_NEG: |
2602 case NFA_PREV_ATOM_JUST_BEFORE: | 2609 case NFA_PREV_ATOM_JUST_BEFORE: |
2603 case NFA_PREV_ATOM_JUST_BEFORE_NEG: | 2610 case NFA_PREV_ATOM_JUST_BEFORE_NEG: |
2611 case NFA_PREV_ATOM_LIKE_PATTERN: | |
2604 { | 2612 { |
2605 int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG | 2613 int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG |
2606 || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); | 2614 || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); |
2607 int before = (*p == NFA_PREV_ATOM_JUST_BEFORE | 2615 int before = (*p == NFA_PREV_ATOM_JUST_BEFORE |
2608 || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); | 2616 || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); |
2609 int n; | 2617 int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); |
2618 int start_state = NFA_START_INVISIBLE; | |
2619 int end_state = NFA_END_INVISIBLE; | |
2620 int n = 0; | |
2621 nfa_state_T *zend; | |
2622 nfa_state_T *skip; | |
2623 | |
2624 if (before) | |
2625 start_state = NFA_START_INVISIBLE_BEFORE; | |
2626 else if (pattern) | |
2627 { | |
2628 start_state = NFA_START_PATTERN; | |
2629 end_state = NFA_END_PATTERN; | |
2630 } | |
2610 | 2631 |
2611 if (before) | 2632 if (before) |
2612 n = *++p; /* get the count */ | 2633 n = *++p; /* get the count */ |
2613 | 2634 |
2614 /* The \@= operator: match the preceding atom with zero width. | 2635 /* The \@= operator: match the preceding atom with zero width. |
2618 * Surrounds the preceding atom with START_INVISIBLE and | 2639 * Surrounds the preceding atom with START_INVISIBLE and |
2619 * END_INVISIBLE, similarly to MOPEN. */ | 2640 * END_INVISIBLE, similarly to MOPEN. */ |
2620 | 2641 |
2621 if (nfa_calc_size == TRUE) | 2642 if (nfa_calc_size == TRUE) |
2622 { | 2643 { |
2623 nstate += 2; | 2644 nstate += pattern ? 4 : 2; |
2624 break; | 2645 break; |
2625 } | 2646 } |
2626 e = POP(); | 2647 e = POP(); |
2627 s1 = alloc_state(NFA_END_INVISIBLE, NULL, NULL); | 2648 s1 = alloc_state(end_state, NULL, NULL); |
2628 if (s1 == NULL) | 2649 if (s1 == NULL) |
2629 goto theend; | 2650 goto theend; |
2630 patch(e.out, s1); | 2651 |
2631 | 2652 s = alloc_state(start_state, e.start, s1); |
2632 s = alloc_state(NFA_START_INVISIBLE, e.start, s1); | |
2633 if (s == NULL) | 2653 if (s == NULL) |
2634 goto theend; | 2654 goto theend; |
2635 if (neg) | 2655 if (neg) |
2636 { | 2656 { |
2637 s->negated = TRUE; | 2657 s->negated = TRUE; |
2638 s1->negated = TRUE; | 2658 s1->negated = TRUE; |
2639 } | 2659 } |
2640 if (before) | 2660 if (before) |
2641 { | |
2642 s->val = n; /* store the count */ | 2661 s->val = n; /* store the count */ |
2643 ++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */ | 2662 if (pattern) |
2644 } | 2663 { |
2645 | 2664 /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ |
2646 PUSH(frag(s, list1(&s1->out))); | 2665 skip = alloc_state(NFA_SKIP, NULL, NULL); |
2666 zend = alloc_state(NFA_ZEND, s1, NULL); | |
2667 s1->out= skip; | |
2668 patch(e.out, zend); | |
2669 PUSH(frag(s, list1(&skip->out))); | |
2670 } | |
2671 else | |
2672 { | |
2673 patch(e.out, s1); | |
2674 PUSH(frag(s, list1(&s1->out))); | |
2675 } | |
2647 break; | 2676 break; |
2648 } | 2677 } |
2649 | 2678 |
2650 #ifdef FEAT_MBYTE | 2679 #ifdef FEAT_MBYTE |
2651 case NFA_COMPOSING: /* char with composing char */ | 2680 case NFA_COMPOSING: /* char with composing char */ |
2951 { | 2980 { |
2952 int j; | 2981 int j; |
2953 | 2982 |
2954 for (j = 0; j < sub->in_use; j++) | 2983 for (j = 0; j < sub->in_use; j++) |
2955 if (REG_MULTI) | 2984 if (REG_MULTI) |
2956 fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", | 2985 fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n", |
2957 j, | 2986 j, |
2958 sub->list.multi[j].start.col, | 2987 sub->list.multi[j].start.col, |
2959 (int)sub->list.multi[j].start.lnum, | 2988 (int)sub->list.multi[j].start.lnum, |
2960 sub->list.multi[j].end.col, | 2989 sub->list.multi[j].end.col, |
2961 (int)sub->list.multi[j].end.lnum); | 2990 (int)sub->list.multi[j].end.lnum); |
2962 else | 2991 else |
2963 { | 2992 { |
2964 char *s = (char *)sub->list.line[j].start; | 2993 char *s = (char *)sub->list.line[j].start; |
2965 char *e = (char *)sub->list.line[j].end; | 2994 char *e = (char *)sub->list.line[j].end; |
2966 | 2995 |
2967 fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", | 2996 fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n", |
2968 j, | 2997 j, |
2969 s == NULL ? "NULL" : s, | 2998 s == NULL ? "NULL" : s, |
2970 e == NULL ? "NULL" : e); | 2999 e == NULL ? "NULL" : e); |
2971 } | 3000 } |
2972 fprintf(log_fd, "\n"); | |
2973 } | 3001 } |
2974 #endif | 3002 #endif |
2975 | 3003 |
2976 /* Used during execution: whether a match has been found. */ | 3004 /* Used during execution: whether a match has been found. */ |
2977 static int nfa_match; | 3005 static int nfa_match; |
4315 clen = 0; | 4343 clen = 0; |
4316 goto nextchar; | 4344 goto nextchar; |
4317 } | 4345 } |
4318 | 4346 |
4319 case NFA_END_INVISIBLE: | 4347 case NFA_END_INVISIBLE: |
4348 case NFA_END_PATTERN: | |
4320 /* | 4349 /* |
4321 * This is only encountered after a NFA_START_INVISIBLE or | 4350 * This is only encountered after a NFA_START_INVISIBLE or |
4322 * NFA_START_INVISIBLE_BEFORE node. | 4351 * NFA_START_INVISIBLE_BEFORE node. |
4323 * They surround a zero-width group, used with "\@=", "\&", | 4352 * They surround a zero-width group, used with "\@=", "\&", |
4324 * "\@!", "\@<=" and "\@<!". | 4353 * "\@!", "\@<=" and "\@<!". |
4341 fprintf(log_fd, "Current col: %d, endp col: %d\n", | 4370 fprintf(log_fd, "Current col: %d, endp col: %d\n", |
4342 (int)(reginput - regline), | 4371 (int)(reginput - regline), |
4343 (int)(nfa_endp->se_u.ptr - reginput)); | 4372 (int)(nfa_endp->se_u.ptr - reginput)); |
4344 } | 4373 } |
4345 #endif | 4374 #endif |
4346 /* It's only a match if it ends at "nfa_endp" */ | 4375 /* If "nfa_endp" is set it's only a match if it ends at |
4376 * "nfa_endp" */ | |
4347 if (nfa_endp != NULL && (REG_MULTI | 4377 if (nfa_endp != NULL && (REG_MULTI |
4348 ? (reglnum != nfa_endp->se_u.pos.lnum | 4378 ? (reglnum != nfa_endp->se_u.pos.lnum |
4349 || (int)(reginput - regline) | 4379 || (int)(reginput - regline) |
4350 != nfa_endp->se_u.pos.col) | 4380 != nfa_endp->se_u.pos.col) |
4351 : reginput != nfa_endp->se_u.ptr)) | 4381 : reginput != nfa_endp->se_u.ptr)) |
4358 #ifdef FEAT_SYN_HL | 4388 #ifdef FEAT_SYN_HL |
4359 if (nfa_has_zsubexpr) | 4389 if (nfa_has_zsubexpr) |
4360 copy_sub(&m->synt, &t->subs.synt); | 4390 copy_sub(&m->synt, &t->subs.synt); |
4361 #endif | 4391 #endif |
4362 } | 4392 } |
4393 #ifdef ENABLE_LOG | |
4394 fprintf(log_fd, "Match found:\n"); | |
4395 log_subsexpr(m); | |
4396 #endif | |
4363 nfa_match = TRUE; | 4397 nfa_match = TRUE; |
4364 break; | 4398 break; |
4365 | 4399 |
4366 case NFA_START_INVISIBLE: | 4400 case NFA_START_INVISIBLE: |
4367 case NFA_START_INVISIBLE_BEFORE: | 4401 case NFA_START_INVISIBLE_BEFORE: |
4433 pim, &listidx); | 4467 pim, &listidx); |
4434 } | 4468 } |
4435 } | 4469 } |
4436 break; | 4470 break; |
4437 | 4471 |
4472 case NFA_START_PATTERN: | |
4473 /* First try matching the pattern. */ | |
4474 result = recursive_regmatch(t->state, prog, | |
4475 submatch, m, &listids); | |
4476 if (result) | |
4477 { | |
4478 int bytelen; | |
4479 | |
4480 #ifdef ENABLE_LOG | |
4481 fprintf(log_fd, "NFA_START_PATTERN matches:\n"); | |
4482 log_subsexpr(m); | |
4483 #endif | |
4484 /* Copy submatch info from the recursive call */ | |
4485 copy_sub_off(&t->subs.norm, &m->norm); | |
4486 #ifdef FEAT_SYN_HL | |
4487 copy_sub_off(&t->subs.synt, &m->synt); | |
4488 #endif | |
4489 /* Now we need to skip over the matched text and then | |
4490 * continue with what follows. */ | |
4491 if (REG_MULTI) | |
4492 /* TODO: multi-line match */ | |
4493 bytelen = m->norm.list.multi[0].end.col | |
4494 - (int)(reginput - regline); | |
4495 else | |
4496 bytelen = (int)(m->norm.list.line[0].end - reginput); | |
4497 | |
4498 #ifdef ENABLE_LOG | |
4499 fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen); | |
4500 #endif | |
4501 if (bytelen == 0) | |
4502 { | |
4503 /* empty match, output of corresponding | |
4504 * NFA_END_PATTERN/NFA_SKIP to be used at current | |
4505 * position */ | |
4506 addstate_here(thislist, t->state->out1->out->out, | |
4507 &t->subs, t->pim, &listidx); | |
4508 } | |
4509 else if (bytelen <= clen) | |
4510 { | |
4511 /* match current character, output of corresponding | |
4512 * NFA_END_PATTERN to be used at next position. */ | |
4513 ll = nextlist; | |
4514 add_state = t->state->out1->out->out; | |
4515 add_off = clen; | |
4516 } | |
4517 else | |
4518 { | |
4519 /* skip over the matched characters, set character | |
4520 * count in NFA_SKIP */ | |
4521 ll = nextlist; | |
4522 add_state = t->state->out1->out; | |
4523 add_off = bytelen; | |
4524 add_count = bytelen - clen; | |
4525 } | |
4526 } | |
4527 break; | |
4528 | |
4438 case NFA_BOL: | 4529 case NFA_BOL: |
4439 if (reginput == regline) | 4530 if (reginput == regline) |
4440 addstate_here(thislist, t->state->out, &t->subs, | 4531 addstate_here(thislist, t->state->out, &t->subs, |
4441 t->pim, &listidx); | 4532 t->pim, &listidx); |
4442 break; | 4533 break; |
4844 /* match current character, jump ahead to out of | 4935 /* match current character, jump ahead to out of |
4845 * NFA_SKIP */ | 4936 * NFA_SKIP */ |
4846 ll = nextlist; | 4937 ll = nextlist; |
4847 add_state = t->state->out->out; | 4938 add_state = t->state->out->out; |
4848 add_off = clen; | 4939 add_off = clen; |
4849 #ifdef ENABLE_LOG | |
4850 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | |
4851 #endif | |
4852 } | 4940 } |
4853 else | 4941 else |
4854 { | 4942 { |
4855 /* skip over the matched characters, set character | 4943 /* skip over the matched characters, set character |
4856 * count in NFA_SKIP */ | 4944 * count in NFA_SKIP */ |
4857 ll = nextlist; | 4945 ll = nextlist; |
4858 add_state = t->state->out; | 4946 add_state = t->state->out; |
4859 add_off = bytelen; | 4947 add_off = bytelen; |
4860 add_count = bytelen - clen; | 4948 add_count = bytelen - clen; |
4861 #ifdef ENABLE_LOG | |
4862 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | |
4863 #endif | |
4864 } | 4949 } |
4865 } | 4950 } |
4866 break; | 4951 break; |
4867 } | 4952 } |
4868 case NFA_SKIP: | 4953 case NFA_SKIP: |
4871 { | 4956 { |
4872 /* end of match, go to what follows */ | 4957 /* end of match, go to what follows */ |
4873 ll = nextlist; | 4958 ll = nextlist; |
4874 add_state = t->state->out; | 4959 add_state = t->state->out; |
4875 add_off = clen; | 4960 add_off = clen; |
4876 #ifdef ENABLE_LOG | |
4877 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | |
4878 #endif | |
4879 } | 4961 } |
4880 else | 4962 else |
4881 { | 4963 { |
4882 /* add state again with decremented count */ | 4964 /* add state again with decremented count */ |
4883 ll = nextlist; | 4965 ll = nextlist; |
4884 add_state = t->state; | 4966 add_state = t->state; |
4885 add_off = 0; | 4967 add_off = 0; |
4886 add_count = t->count - clen; | 4968 add_count = t->count - clen; |
4887 #ifdef ENABLE_LOG | |
4888 log_subsexpr(&nextlist->t[nextlist->n - 1].subs); | |
4889 #endif | |
4890 } | 4969 } |
4891 break; | 4970 break; |
4892 | 4971 |
4893 case NFA_LNUM: | 4972 case NFA_LNUM: |
4894 case NFA_LNUM_GT: | 4973 case NFA_LNUM_GT: |
5156 | 5235 |
5157 #ifdef ENABLE_LOG | 5236 #ifdef ENABLE_LOG |
5158 f = fopen(NFA_REGEXP_RUN_LOG, "a"); | 5237 f = fopen(NFA_REGEXP_RUN_LOG, "a"); |
5159 if (f != NULL) | 5238 if (f != NULL) |
5160 { | 5239 { |
5161 fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n"); | 5240 fprintf(f, "\n\n\t=======================================================\n"); |
5162 fprintf(f, " =======================================================\n"); | |
5163 #ifdef DEBUG | 5241 #ifdef DEBUG |
5164 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); | 5242 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); |
5165 #endif | 5243 #endif |
5166 fprintf(f, "\tInput text is \"%s\" \n", reginput); | 5244 fprintf(f, "\tInput text is \"%s\" \n", reginput); |
5167 fprintf(f, " =======================================================\n\n"); | 5245 fprintf(f, "\t=======================================================\n\n"); |
5168 nfa_print_state(f, start); | 5246 nfa_print_state(f, start); |
5169 fprintf(f, "\n\n"); | 5247 fprintf(f, "\n\n"); |
5170 fclose(f); | 5248 fclose(f); |
5171 } | 5249 } |
5172 else | 5250 else |