Mercurial > vim
comparison src/regexp_nfa.c @ 4657:93b7ed814bec v7.3.1076
updated for version 7.3.1076
Problem: New regexp engine: \@= and \& don't work.
Solution: Make these items work. Add column info to logging.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Thu, 30 May 2013 21:42:13 +0200 |
parents | 779ca415f8e1 |
children | 0dce3d812e7a |
comparison
equal
deleted
inserted
replaced
4656:1a4b98208569 | 4657:93b7ed814bec |
---|---|
1738 | 1738 |
1739 case NFA_PREV_ATOM_NO_WIDTH: | 1739 case NFA_PREV_ATOM_NO_WIDTH: |
1740 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; | 1740 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; |
1741 case NFA_PREV_ATOM_NO_WIDTH_NEG: | 1741 case NFA_PREV_ATOM_NO_WIDTH_NEG: |
1742 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break; | 1742 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break; |
1743 case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break; | 1743 case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; |
1744 case NFA_NCLOSE: STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break; | 1744 case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; |
1745 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; | 1745 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; |
1746 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; | 1746 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; |
1747 | 1747 |
1748 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; | 1748 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; |
1749 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; | 1749 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; |
2371 goto theend; | 2371 goto theend; |
2372 PUSH(frag(s, list1(&s->out))); | 2372 PUSH(frag(s, list1(&s->out))); |
2373 break; | 2373 break; |
2374 | 2374 |
2375 case NFA_PREV_ATOM_NO_WIDTH: | 2375 case NFA_PREV_ATOM_NO_WIDTH: |
2376 /* The \@= operator: match the preceding atom with 0 width. | 2376 /* The \@= operator: match the preceding atom with zero width. |
2377 * Surrounds the preceding atom with START_INVISIBLE and | 2377 * Surrounds the preceding atom with START_INVISIBLE and |
2378 * END_INVISIBLE, similarly to MOPEN. | 2378 * END_INVISIBLE, similarly to MOPEN. */ |
2379 */ | |
2380 /* TODO: Maybe this drops the speed? */ | |
2381 goto theend; | |
2382 | 2379 |
2383 if (nfa_calc_size == TRUE) | 2380 if (nfa_calc_size == TRUE) |
2384 { | 2381 { |
2385 nstate += 2; | 2382 nstate += 2; |
2386 break; | 2383 break; |
2743 nfa_thread_T *thread; | 2740 nfa_thread_T *thread; |
2744 lpos_T save_lpos; | 2741 lpos_T save_lpos; |
2745 int save_in_use; | 2742 int save_in_use; |
2746 char_u *save_ptr; | 2743 char_u *save_ptr; |
2747 int i; | 2744 int i; |
2745 #ifdef ENABLE_LOG | |
2746 int did_print = FALSE; | |
2747 #endif | |
2748 | 2748 |
2749 if (l == NULL || state == NULL) | 2749 if (l == NULL || state == NULL) |
2750 return; | 2750 return; |
2751 | 2751 |
2752 switch (state->c) | 2752 switch (state->c) |
2780 case NFA_MOPEN + 8: | 2780 case NFA_MOPEN + 8: |
2781 case NFA_MOPEN + 9: | 2781 case NFA_MOPEN + 9: |
2782 /* These nodes do not need to be added, but we need to bail out | 2782 /* These nodes do not need to be added, but we need to bail out |
2783 * when it was tried to be added to this list before. */ | 2783 * when it was tried to be added to this list before. */ |
2784 if (state->lastlist == l->id) | 2784 if (state->lastlist == l->id) |
2785 return; | 2785 goto skip_add; |
2786 state->lastlist = l->id; | 2786 state->lastlist = l->id; |
2787 break; | 2787 break; |
2788 | 2788 |
2789 default: | 2789 default: |
2790 if (state->lastlist == l->id) | 2790 if (state->lastlist == l->id) |
2791 { | 2791 { |
2792 /* This state is already in the list, don't add it again, | 2792 /* This state is already in the list, don't add it again, |
2793 * unless it is an MOPEN that is used for a backreference. */ | 2793 * unless it is an MOPEN that is used for a backreference. */ |
2794 if (!nfa_has_backref) | 2794 if (!nfa_has_backref) |
2795 { | |
2796 skip_add: | |
2797 #ifdef ENABLE_LOG | |
2798 nfa_set_code(state->c); | |
2799 fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n", | |
2800 abs(state->id), l->id, state->c, code); | |
2801 #endif | |
2795 return; | 2802 return; |
2803 } | |
2796 | 2804 |
2797 /* See if the same state is already in the list with the same | 2805 /* See if the same state is already in the list with the same |
2798 * positions. */ | 2806 * positions. */ |
2799 for (i = 0; i < l->n; ++i) | 2807 for (i = 0; i < l->n; ++i) |
2800 { | 2808 { |
2801 thread = &l->t[i]; | 2809 thread = &l->t[i]; |
2802 if (thread->state->id == state->id | 2810 if (thread->state->id == state->id |
2803 && sub_equal(&thread->sub, sub)) | 2811 && sub_equal(&thread->sub, sub)) |
2804 return; | 2812 goto skip_add; |
2805 } | 2813 } |
2806 } | 2814 } |
2807 | 2815 |
2808 /* when there are backreferences the number of states may be (a | 2816 /* when there are backreferences the number of states may be (a |
2809 * lot) bigger */ | 2817 * lot) bigger */ |
2830 else | 2838 else |
2831 mch_memmove(&thread->sub.list.line[0], | 2839 mch_memmove(&thread->sub.list.line[0], |
2832 &sub->list.line[0], | 2840 &sub->list.line[0], |
2833 sizeof(struct linepos) * sub->in_use); | 2841 sizeof(struct linepos) * sub->in_use); |
2834 } | 2842 } |
2835 } | |
2836 | |
2837 #ifdef ENABLE_LOG | 2843 #ifdef ENABLE_LOG |
2838 nfa_set_code(state->c); | 2844 { |
2839 fprintf(log_fd, "> Adding state %d to list. Character %d: %s\n", | 2845 int col; |
2840 abs(state->id), state->c, code); | 2846 |
2847 if (thread->sub.in_use <= 0) | |
2848 col = -1; | |
2849 else if (REG_MULTI) | |
2850 col = thread->sub.list.multi[0].start.col; | |
2851 else | |
2852 col = (int)(thread->sub.list.line[0].start - regline); | |
2853 nfa_set_code(state->c); | |
2854 fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n", | |
2855 abs(state->id), l->id, state->c, code, col); | |
2856 did_print = TRUE; | |
2857 } | |
2858 #endif | |
2859 } | |
2860 | |
2861 #ifdef ENABLE_LOG | |
2862 if (!did_print) | |
2863 { | |
2864 int col; | |
2865 | |
2866 if (sub->in_use <= 0) | |
2867 col = -1; | |
2868 else if (REG_MULTI) | |
2869 col = sub->list.multi[0].start.col; | |
2870 else | |
2871 col = (int)(sub->list.line[0].start - regline); | |
2872 nfa_set_code(state->c); | |
2873 fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n", | |
2874 abs(state->id), l->id, state->c, code, col); | |
2875 } | |
2841 #endif | 2876 #endif |
2842 switch (state->c) | 2877 switch (state->c) |
2843 { | 2878 { |
2844 case NFA_MATCH: | 2879 case NFA_MATCH: |
2845 nfa_match = TRUE; | 2880 nfa_match = TRUE; |
2870 case NFA_SKIP_CHAR: | 2905 case NFA_SKIP_CHAR: |
2871 case NFA_NOPEN: | 2906 case NFA_NOPEN: |
2872 case NFA_NCLOSE: | 2907 case NFA_NCLOSE: |
2873 addstate(l, state->out, sub, off); | 2908 addstate(l, state->out, sub, off); |
2874 break; | 2909 break; |
2875 | |
2876 /* If this state is reached, then a recursive call of nfa_regmatch() | |
2877 * succeeded. the next call saves the found submatches in the | |
2878 * first state after the "invisible" branch. */ | |
2879 #if 0 | |
2880 case NFA_END_INVISIBLE: | |
2881 break; | |
2882 #endif | |
2883 | 2910 |
2884 case NFA_MOPEN + 0: | 2911 case NFA_MOPEN + 0: |
2885 case NFA_MOPEN + 1: | 2912 case NFA_MOPEN + 1: |
2886 case NFA_MOPEN + 2: | 2913 case NFA_MOPEN + 2: |
2887 case NFA_MOPEN + 3: | 2914 case NFA_MOPEN + 3: |
3448 #ifdef NFA_REGEXP_DEBUG_LOG | 3475 #ifdef NFA_REGEXP_DEBUG_LOG |
3449 nfa_set_code(t->state->c); | 3476 nfa_set_code(t->state->c); |
3450 fprintf(debug, "%s, ", code); | 3477 fprintf(debug, "%s, ", code); |
3451 #endif | 3478 #endif |
3452 #ifdef ENABLE_LOG | 3479 #ifdef ENABLE_LOG |
3453 nfa_set_code(t->state->c); | 3480 { |
3454 fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id), | 3481 int col; |
3455 code, (int)t->state->c); | 3482 |
3483 if (t->sub.in_use <= 0) | |
3484 col = -1; | |
3485 else if (REG_MULTI) | |
3486 col = t->sub.list.multi[0].start.col; | |
3487 else | |
3488 col = (int)(t->sub.list.line[0].start - regline); | |
3489 nfa_set_code(t->state->c); | |
3490 fprintf(log_fd, "(%d) char %d %s (start col %d) ... \n", | |
3491 abs(t->state->id), (int)t->state->c, code, col); | |
3492 } | |
3456 #endif | 3493 #endif |
3457 | 3494 |
3458 /* | 3495 /* |
3459 * Handle the possible codes of the current state. | 3496 * Handle the possible codes of the current state. |
3460 * The most important is NFA_MATCH. | 3497 * The most important is NFA_MATCH. |
3502 * the parent call. */ | 3539 * the parent call. */ |
3503 if (start->c == NFA_MOPEN + 0) | 3540 if (start->c == NFA_MOPEN + 0) |
3504 addstate_here(thislist, t->state->out, &t->sub, &listidx); | 3541 addstate_here(thislist, t->state->out, &t->sub, &listidx); |
3505 else | 3542 else |
3506 { | 3543 { |
3544 /* TODO: only copy positions in use. */ | |
3507 *m = t->sub; | 3545 *m = t->sub; |
3508 nfa_match = TRUE; | 3546 nfa_match = TRUE; |
3509 } | 3547 } |
3510 break; | 3548 break; |
3511 | 3549 |
3536 nfa_save_listids(start, listids); | 3574 nfa_save_listids(start, listids); |
3537 nfa_set_null_listids(start); | 3575 nfa_set_null_listids(start); |
3538 result = nfa_regmatch(t->state->out, submatch, m); | 3576 result = nfa_regmatch(t->state->out, submatch, m); |
3539 nfa_set_neg_listids(start); | 3577 nfa_set_neg_listids(start); |
3540 nfa_restore_listids(start, listids); | 3578 nfa_restore_listids(start, listids); |
3579 nfa_match = FALSE; | |
3541 | 3580 |
3542 #ifdef ENABLE_LOG | 3581 #ifdef ENABLE_LOG |
3543 log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); | 3582 log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); |
3544 if (log_fd != NULL) | 3583 if (log_fd != NULL) |
3545 { | 3584 { |
3573 for (j = 1; j < m->in_use; j++) | 3612 for (j = 1; j < m->in_use; j++) |
3574 { | 3613 { |
3575 t->sub.list.line[j].start = m->list.line[j].start; | 3614 t->sub.list.line[j].start = m->list.line[j].start; |
3576 t->sub.list.line[j].end = m->list.line[j].end; | 3615 t->sub.list.line[j].end = m->list.line[j].end; |
3577 } | 3616 } |
3578 t->sub.in_use = m->in_use; | 3617 if (m->in_use > t->sub.in_use) |
3579 | 3618 t->sub.in_use = m->in_use; |
3580 /* t->state->out1 is the corresponding END_INVISIBLE node */ | 3619 |
3620 /* t->state->out1 is the corresponding END_INVISIBLE node; | |
3621 * Add it to the current list (zero-width match). */ | |
3581 addstate_here(thislist, t->state->out1->out, &t->sub, | 3622 addstate_here(thislist, t->state->out1->out, &t->sub, |
3582 &listidx); | 3623 &listidx); |
3583 } | 3624 } |
3584 else | 3625 else |
3585 { | 3626 { |
4144 fprintf(f, " =======================================================\n"); | 4185 fprintf(f, " =======================================================\n"); |
4145 #ifdef DEBUG | 4186 #ifdef DEBUG |
4146 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); | 4187 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); |
4147 #endif | 4188 #endif |
4148 fprintf(f, "\tInput text is \"%s\" \n", reginput); | 4189 fprintf(f, "\tInput text is \"%s\" \n", reginput); |
4149 fprintf(f, " =======================================================\n\n\n\n\n\n\n"); | 4190 fprintf(f, " =======================================================\n\n"); |
4150 nfa_print_state(f, start); | 4191 nfa_print_state(f, start); |
4151 fprintf(f, "\n\n"); | 4192 fprintf(f, "\n\n"); |
4152 fclose(f); | 4193 fclose(f); |
4153 } | 4194 } |
4154 else | 4195 else |