comparison src/regexp_nfa.c @ 4555:b2946c06d1b6 v7.3.1025

updated for version 7.3.1025 Problem: New regexp: not matching newline in string. (Marc Weber) Solution: Check for "\n" character.
author Bram Moolenaar <bram@vim.org>
date Sun, 26 May 2013 17:45:49 +0200
parents 7b835b2969af
children 888c12c899e5
comparison
equal deleted inserted replaced
4554:0bd4cf62bcdc 4555:b2946c06d1b6
163 163
164 static int *post_start; /* holds the postfix form of r.e. */ 164 static int *post_start; /* holds the postfix form of r.e. */
165 static int *post_end; 165 static int *post_end;
166 static int *post_ptr; 166 static int *post_ptr;
167 167
168 static int nstate; /* Number of states in the NFA. */ 168 static int nstate; /* Number of states in the NFA. Also used when
169 * executing. */
169 static int istate; /* Index in the state vector, used in new_state() */ 170 static int istate; /* Index in the state vector, used in new_state() */
170 static int nstate_max; /* Upper bound of estimated number of states. */
171 171
172 172
173 static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); 173 static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
174 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); 174 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
175 static int nfa_emit_equi_class __ARGS((int c, int neg)); 175 static int nfa_emit_equi_class __ARGS((int c, int neg));
217 nfa_regcomp_start(expr, re_flags) 217 nfa_regcomp_start(expr, re_flags)
218 char_u *expr; 218 char_u *expr;
219 int re_flags; /* see vim_regcomp() */ 219 int re_flags; /* see vim_regcomp() */
220 { 220 {
221 size_t postfix_size; 221 size_t postfix_size;
222 int nstate_max;
222 223
223 nstate = 0; 224 nstate = 0;
224 istate = 0; 225 istate = 0;
225 /* A reasonable estimation for size */ 226 /* A reasonable estimation for maximum size */
226 nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; 227 nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
227 228
228 /* Some items blow up in size, such as [A-z]. Add more space for that. 229 /* Some items blow up in size, such as [A-z]. Add more space for that.
229 * TODO: some patterns may still fail. */ 230 * TODO: some patterns may still fail. */
230 nstate_max += 1000; 231 nstate_max += 1000;
1966 * A partially built NFA without the matching state filled in. 1967 * A partially built NFA without the matching state filled in.
1967 * Frag_T.start points at the start state. 1968 * Frag_T.start points at the start state.
1968 * Frag_T.out is a list of places that need to be set to the 1969 * Frag_T.out is a list of places that need to be set to the
1969 * next state for this fragment. 1970 * next state for this fragment.
1970 */ 1971 */
1972
1973 /* Since the out pointers in the list are always
1974 * uninitialized, we use the pointers themselves
1975 * as storage for the Ptrlists. */
1971 typedef union Ptrlist Ptrlist; 1976 typedef union Ptrlist Ptrlist;
1977 union Ptrlist
1978 {
1979 Ptrlist *next;
1980 nfa_state_T *s;
1981 };
1982
1972 struct Frag 1983 struct Frag
1973 { 1984 {
1974 nfa_state_T *start; 1985 nfa_state_T *start;
1975 Ptrlist *out; 1986 Ptrlist *out;
1976 }; 1987 };
1977 typedef struct Frag Frag_T; 1988 typedef struct Frag Frag_T;
1978 1989
1979 static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out)); 1990 static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out));
1995 2006
1996 n.start = start; 2007 n.start = start;
1997 n.out = out; 2008 n.out = out;
1998 return n; 2009 return n;
1999 } 2010 }
2000
2001 /*
2002 * Since the out pointers in the list are always
2003 * uninitialized, we use the pointers themselves
2004 * as storage for the Ptrlists.
2005 */
2006 union Ptrlist
2007 {
2008 Ptrlist *next;
2009 nfa_state_T *s;
2010 };
2011 2011
2012 /* 2012 /*
2013 * Create singleton list containing just outp. 2013 * Create singleton list containing just outp.
2014 */ 2014 */
2015 static Ptrlist * 2015 static Ptrlist *
3356 break; 3356 break;
3357 } 3357 }
3358 #endif 3358 #endif
3359 3359
3360 case NFA_NEWL: 3360 case NFA_NEWL:
3361 if (!reg_line_lbr && REG_MULTI 3361 if (curc == NUL && !reg_line_lbr && REG_MULTI
3362 && curc == NUL && reglnum <= reg_maxline) 3362 && reglnum <= reg_maxline)
3363 { 3363 {
3364 go_to_nextline = TRUE; 3364 go_to_nextline = TRUE;
3365 /* Pass -1 for the offset, which means taking the position 3365 /* Pass -1 for the offset, which means taking the position
3366 * at the start of the next line. */ 3366 * at the start of the next line. */
3367 addstate(nextlist, t->state->out, &t->sub, -1, 3367 addstate(nextlist, t->state->out, &t->sub, -1,
3368 listid + 1, &match);
3369 }
3370 else if (curc == '\n' && reg_line_lbr)
3371 {
3372 /* match \n as if it is an ordinary character */
3373 addstate(nextlist, t->state->out, &t->sub, 1,
3368 listid + 1, &match); 3374 listid + 1, &match);
3369 } 3375 }
3370 break; 3376 break;
3371 3377
3372 case NFA_CLASS_ALNUM: 3378 case NFA_CLASS_ALNUM:
3830 3836
3831 /* Build postfix form of the regexp. Needed to build the NFA 3837 /* Build postfix form of the regexp. Needed to build the NFA
3832 * (and count its size). */ 3838 * (and count its size). */
3833 postfix = re2post(); 3839 postfix = re2post();
3834 if (postfix == NULL) 3840 if (postfix == NULL)
3841 {
3842 /* TODO: only give this error for debugging? */
3843 if (post_ptr >= post_end)
3844 EMSGN("Internal error: estimated max number of states insufficient: %ld", post_end - post_start);
3835 goto fail; /* Cascaded (syntax?) error */ 3845 goto fail; /* Cascaded (syntax?) error */
3846 }
3836 3847
3837 /* 3848 /*
3838 * In order to build the NFA, we parse the input regexp twice: 3849 * In order to build the NFA, we parse the input regexp twice:
3839 * 1. first pass to count size (so we can allocate space) 3850 * 1. first pass to count size (so we can allocate space)
3840 * 2. second to emit code 3851 * 2. second to emit code