comparison src/regexp_nfa.c @ 4657:93b7ed814bec v7.3.1076

updated for version 7.3.1076 Problem: New regexp engine: \@= and \& don't work. Solution: Make these items work. Add column info to logging.
author Bram Moolenaar <bram@vim.org>
date Thu, 30 May 2013 21:42:13 +0200
parents 779ca415f8e1
children 0dce3d812e7a
comparison
equal deleted inserted replaced
4656:1a4b98208569 4657:93b7ed814bec
1738 1738
1739 case NFA_PREV_ATOM_NO_WIDTH: 1739 case NFA_PREV_ATOM_NO_WIDTH:
1740 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break; 1740 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
1741 case NFA_PREV_ATOM_NO_WIDTH_NEG: 1741 case NFA_PREV_ATOM_NO_WIDTH_NEG:
1742 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break; 1742 STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break;
1743 case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break; 1743 case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break;
1744 case NFA_NCLOSE: STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break; 1744 case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break;
1745 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; 1745 case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
1746 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; 1746 case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
1747 1747
1748 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; 1748 case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break;
1749 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; 1749 case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break;
2371 goto theend; 2371 goto theend;
2372 PUSH(frag(s, list1(&s->out))); 2372 PUSH(frag(s, list1(&s->out)));
2373 break; 2373 break;
2374 2374
2375 case NFA_PREV_ATOM_NO_WIDTH: 2375 case NFA_PREV_ATOM_NO_WIDTH:
2376 /* The \@= operator: match the preceding atom with 0 width. 2376 /* The \@= operator: match the preceding atom with zero width.
2377 * Surrounds the preceding atom with START_INVISIBLE and 2377 * Surrounds the preceding atom with START_INVISIBLE and
2378 * END_INVISIBLE, similarly to MOPEN. 2378 * END_INVISIBLE, similarly to MOPEN. */
2379 */
2380 /* TODO: Maybe this drops the speed? */
2381 goto theend;
2382 2379
2383 if (nfa_calc_size == TRUE) 2380 if (nfa_calc_size == TRUE)
2384 { 2381 {
2385 nstate += 2; 2382 nstate += 2;
2386 break; 2383 break;
2743 nfa_thread_T *thread; 2740 nfa_thread_T *thread;
2744 lpos_T save_lpos; 2741 lpos_T save_lpos;
2745 int save_in_use; 2742 int save_in_use;
2746 char_u *save_ptr; 2743 char_u *save_ptr;
2747 int i; 2744 int i;
2745 #ifdef ENABLE_LOG
2746 int did_print = FALSE;
2747 #endif
2748 2748
2749 if (l == NULL || state == NULL) 2749 if (l == NULL || state == NULL)
2750 return; 2750 return;
2751 2751
2752 switch (state->c) 2752 switch (state->c)
2780 case NFA_MOPEN + 8: 2780 case NFA_MOPEN + 8:
2781 case NFA_MOPEN + 9: 2781 case NFA_MOPEN + 9:
2782 /* These nodes do not need to be added, but we need to bail out 2782 /* These nodes do not need to be added, but we need to bail out
2783 * when it was tried to be added to this list before. */ 2783 * when it was tried to be added to this list before. */
2784 if (state->lastlist == l->id) 2784 if (state->lastlist == l->id)
2785 return; 2785 goto skip_add;
2786 state->lastlist = l->id; 2786 state->lastlist = l->id;
2787 break; 2787 break;
2788 2788
2789 default: 2789 default:
2790 if (state->lastlist == l->id) 2790 if (state->lastlist == l->id)
2791 { 2791 {
2792 /* This state is already in the list, don't add it again, 2792 /* This state is already in the list, don't add it again,
2793 * unless it is an MOPEN that is used for a backreference. */ 2793 * unless it is an MOPEN that is used for a backreference. */
2794 if (!nfa_has_backref) 2794 if (!nfa_has_backref)
2795 {
2796 skip_add:
2797 #ifdef ENABLE_LOG
2798 nfa_set_code(state->c);
2799 fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n",
2800 abs(state->id), l->id, state->c, code);
2801 #endif
2795 return; 2802 return;
2803 }
2796 2804
2797 /* See if the same state is already in the list with the same 2805 /* See if the same state is already in the list with the same
2798 * positions. */ 2806 * positions. */
2799 for (i = 0; i < l->n; ++i) 2807 for (i = 0; i < l->n; ++i)
2800 { 2808 {
2801 thread = &l->t[i]; 2809 thread = &l->t[i];
2802 if (thread->state->id == state->id 2810 if (thread->state->id == state->id
2803 && sub_equal(&thread->sub, sub)) 2811 && sub_equal(&thread->sub, sub))
2804 return; 2812 goto skip_add;
2805 } 2813 }
2806 } 2814 }
2807 2815
2808 /* when there are backreferences the number of states may be (a 2816 /* when there are backreferences the number of states may be (a
2809 * lot) bigger */ 2817 * lot) bigger */
2830 else 2838 else
2831 mch_memmove(&thread->sub.list.line[0], 2839 mch_memmove(&thread->sub.list.line[0],
2832 &sub->list.line[0], 2840 &sub->list.line[0],
2833 sizeof(struct linepos) * sub->in_use); 2841 sizeof(struct linepos) * sub->in_use);
2834 } 2842 }
2835 }
2836
2837 #ifdef ENABLE_LOG 2843 #ifdef ENABLE_LOG
2838 nfa_set_code(state->c); 2844 {
2839 fprintf(log_fd, "> Adding state %d to list. Character %d: %s\n", 2845 int col;
2840 abs(state->id), state->c, code); 2846
2847 if (thread->sub.in_use <= 0)
2848 col = -1;
2849 else if (REG_MULTI)
2850 col = thread->sub.list.multi[0].start.col;
2851 else
2852 col = (int)(thread->sub.list.line[0].start - regline);
2853 nfa_set_code(state->c);
2854 fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n",
2855 abs(state->id), l->id, state->c, code, col);
2856 did_print = TRUE;
2857 }
2858 #endif
2859 }
2860
2861 #ifdef ENABLE_LOG
2862 if (!did_print)
2863 {
2864 int col;
2865
2866 if (sub->in_use <= 0)
2867 col = -1;
2868 else if (REG_MULTI)
2869 col = sub->list.multi[0].start.col;
2870 else
2871 col = (int)(sub->list.line[0].start - regline);
2872 nfa_set_code(state->c);
2873 fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n",
2874 abs(state->id), l->id, state->c, code, col);
2875 }
2841 #endif 2876 #endif
2842 switch (state->c) 2877 switch (state->c)
2843 { 2878 {
2844 case NFA_MATCH: 2879 case NFA_MATCH:
2845 nfa_match = TRUE; 2880 nfa_match = TRUE;
2870 case NFA_SKIP_CHAR: 2905 case NFA_SKIP_CHAR:
2871 case NFA_NOPEN: 2906 case NFA_NOPEN:
2872 case NFA_NCLOSE: 2907 case NFA_NCLOSE:
2873 addstate(l, state->out, sub, off); 2908 addstate(l, state->out, sub, off);
2874 break; 2909 break;
2875
2876 /* If this state is reached, then a recursive call of nfa_regmatch()
2877 * succeeded. the next call saves the found submatches in the
2878 * first state after the "invisible" branch. */
2879 #if 0
2880 case NFA_END_INVISIBLE:
2881 break;
2882 #endif
2883 2910
2884 case NFA_MOPEN + 0: 2911 case NFA_MOPEN + 0:
2885 case NFA_MOPEN + 1: 2912 case NFA_MOPEN + 1:
2886 case NFA_MOPEN + 2: 2913 case NFA_MOPEN + 2:
2887 case NFA_MOPEN + 3: 2914 case NFA_MOPEN + 3:
3448 #ifdef NFA_REGEXP_DEBUG_LOG 3475 #ifdef NFA_REGEXP_DEBUG_LOG
3449 nfa_set_code(t->state->c); 3476 nfa_set_code(t->state->c);
3450 fprintf(debug, "%s, ", code); 3477 fprintf(debug, "%s, ", code);
3451 #endif 3478 #endif
3452 #ifdef ENABLE_LOG 3479 #ifdef ENABLE_LOG
3453 nfa_set_code(t->state->c); 3480 {
3454 fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id), 3481 int col;
3455 code, (int)t->state->c); 3482
3483 if (t->sub.in_use <= 0)
3484 col = -1;
3485 else if (REG_MULTI)
3486 col = t->sub.list.multi[0].start.col;
3487 else
3488 col = (int)(t->sub.list.line[0].start - regline);
3489 nfa_set_code(t->state->c);
3490 fprintf(log_fd, "(%d) char %d %s (start col %d) ... \n",
3491 abs(t->state->id), (int)t->state->c, code, col);
3492 }
3456 #endif 3493 #endif
3457 3494
3458 /* 3495 /*
3459 * Handle the possible codes of the current state. 3496 * Handle the possible codes of the current state.
3460 * The most important is NFA_MATCH. 3497 * The most important is NFA_MATCH.
3502 * the parent call. */ 3539 * the parent call. */
3503 if (start->c == NFA_MOPEN + 0) 3540 if (start->c == NFA_MOPEN + 0)
3504 addstate_here(thislist, t->state->out, &t->sub, &listidx); 3541 addstate_here(thislist, t->state->out, &t->sub, &listidx);
3505 else 3542 else
3506 { 3543 {
3544 /* TODO: only copy positions in use. */
3507 *m = t->sub; 3545 *m = t->sub;
3508 nfa_match = TRUE; 3546 nfa_match = TRUE;
3509 } 3547 }
3510 break; 3548 break;
3511 3549
3536 nfa_save_listids(start, listids); 3574 nfa_save_listids(start, listids);
3537 nfa_set_null_listids(start); 3575 nfa_set_null_listids(start);
3538 result = nfa_regmatch(t->state->out, submatch, m); 3576 result = nfa_regmatch(t->state->out, submatch, m);
3539 nfa_set_neg_listids(start); 3577 nfa_set_neg_listids(start);
3540 nfa_restore_listids(start, listids); 3578 nfa_restore_listids(start, listids);
3579 nfa_match = FALSE;
3541 3580
3542 #ifdef ENABLE_LOG 3581 #ifdef ENABLE_LOG
3543 log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); 3582 log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
3544 if (log_fd != NULL) 3583 if (log_fd != NULL)
3545 { 3584 {
3573 for (j = 1; j < m->in_use; j++) 3612 for (j = 1; j < m->in_use; j++)
3574 { 3613 {
3575 t->sub.list.line[j].start = m->list.line[j].start; 3614 t->sub.list.line[j].start = m->list.line[j].start;
3576 t->sub.list.line[j].end = m->list.line[j].end; 3615 t->sub.list.line[j].end = m->list.line[j].end;
3577 } 3616 }
3578 t->sub.in_use = m->in_use; 3617 if (m->in_use > t->sub.in_use)
3579 3618 t->sub.in_use = m->in_use;
3580 /* t->state->out1 is the corresponding END_INVISIBLE node */ 3619
3620 /* t->state->out1 is the corresponding END_INVISIBLE node;
3621 * Add it to the current list (zero-width match). */
3581 addstate_here(thislist, t->state->out1->out, &t->sub, 3622 addstate_here(thislist, t->state->out1->out, &t->sub,
3582 &listidx); 3623 &listidx);
3583 } 3624 }
3584 else 3625 else
3585 { 3626 {
4144 fprintf(f, " =======================================================\n"); 4185 fprintf(f, " =======================================================\n");
4145 #ifdef DEBUG 4186 #ifdef DEBUG
4146 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); 4187 fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
4147 #endif 4188 #endif
4148 fprintf(f, "\tInput text is \"%s\" \n", reginput); 4189 fprintf(f, "\tInput text is \"%s\" \n", reginput);
4149 fprintf(f, " =======================================================\n\n\n\n\n\n\n"); 4190 fprintf(f, " =======================================================\n\n");
4150 nfa_print_state(f, start); 4191 nfa_print_state(f, start);
4151 fprintf(f, "\n\n"); 4192 fprintf(f, "\n\n");
4152 fclose(f); 4193 fclose(f);
4153 } 4194 }
4154 else 4195 else