Mercurial > vim
comparison src/regexp_nfa.c @ 4555:b2946c06d1b6 v7.3.1025
updated for version 7.3.1025
Problem: New regexp: not matching newline in string. (Marc Weber)
Solution: Check for "\n" character.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Sun, 26 May 2013 17:45:49 +0200 |
parents | 7b835b2969af |
children | 888c12c899e5 |
comparison
equal
deleted
inserted
replaced
4554:0bd4cf62bcdc | 4555:b2946c06d1b6 |
---|---|
163 | 163 |
164 static int *post_start; /* holds the postfix form of r.e. */ | 164 static int *post_start; /* holds the postfix form of r.e. */ |
165 static int *post_end; | 165 static int *post_end; |
166 static int *post_ptr; | 166 static int *post_ptr; |
167 | 167 |
168 static int nstate; /* Number of states in the NFA. */ | 168 static int nstate; /* Number of states in the NFA. Also used when |
169 * executing. */ | |
169 static int istate; /* Index in the state vector, used in new_state() */ | 170 static int istate; /* Index in the state vector, used in new_state() */ |
170 static int nstate_max; /* Upper bound of estimated number of states. */ | |
171 | 171 |
172 | 172 |
173 static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); | 173 static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags)); |
174 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); | 174 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl)); |
175 static int nfa_emit_equi_class __ARGS((int c, int neg)); | 175 static int nfa_emit_equi_class __ARGS((int c, int neg)); |
217 nfa_regcomp_start(expr, re_flags) | 217 nfa_regcomp_start(expr, re_flags) |
218 char_u *expr; | 218 char_u *expr; |
219 int re_flags; /* see vim_regcomp() */ | 219 int re_flags; /* see vim_regcomp() */ |
220 { | 220 { |
221 size_t postfix_size; | 221 size_t postfix_size; |
222 int nstate_max; | |
222 | 223 |
223 nstate = 0; | 224 nstate = 0; |
224 istate = 0; | 225 istate = 0; |
225 /* A reasonable estimation for size */ | 226 /* A reasonable estimation for maximum size */ |
226 nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; | 227 nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER; |
227 | 228 |
228 /* Some items blow up in size, such as [A-z]. Add more space for that. | 229 /* Some items blow up in size, such as [A-z]. Add more space for that. |
229 * TODO: some patterns may still fail. */ | 230 * TODO: some patterns may still fail. */ |
230 nstate_max += 1000; | 231 nstate_max += 1000; |
1966 * A partially built NFA without the matching state filled in. | 1967 * A partially built NFA without the matching state filled in. |
1967 * Frag_T.start points at the start state. | 1968 * Frag_T.start points at the start state. |
1968 * Frag_T.out is a list of places that need to be set to the | 1969 * Frag_T.out is a list of places that need to be set to the |
1969 * next state for this fragment. | 1970 * next state for this fragment. |
1970 */ | 1971 */ |
1972 | |
1973 /* Since the out pointers in the list are always | |
1974 * uninitialized, we use the pointers themselves | |
1975 * as storage for the Ptrlists. */ | |
1971 typedef union Ptrlist Ptrlist; | 1976 typedef union Ptrlist Ptrlist; |
1977 union Ptrlist | |
1978 { | |
1979 Ptrlist *next; | |
1980 nfa_state_T *s; | |
1981 }; | |
1982 | |
1972 struct Frag | 1983 struct Frag |
1973 { | 1984 { |
1974 nfa_state_T *start; | 1985 nfa_state_T *start; |
1975 Ptrlist *out; | 1986 Ptrlist *out; |
1976 }; | 1987 }; |
1977 typedef struct Frag Frag_T; | 1988 typedef struct Frag Frag_T; |
1978 | 1989 |
1979 static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out)); | 1990 static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out)); |
1995 | 2006 |
1996 n.start = start; | 2007 n.start = start; |
1997 n.out = out; | 2008 n.out = out; |
1998 return n; | 2009 return n; |
1999 } | 2010 } |
2000 | |
2001 /* | |
2002 * Since the out pointers in the list are always | |
2003 * uninitialized, we use the pointers themselves | |
2004 * as storage for the Ptrlists. | |
2005 */ | |
2006 union Ptrlist | |
2007 { | |
2008 Ptrlist *next; | |
2009 nfa_state_T *s; | |
2010 }; | |
2011 | 2011 |
2012 /* | 2012 /* |
2013 * Create singleton list containing just outp. | 2013 * Create singleton list containing just outp. |
2014 */ | 2014 */ |
2015 static Ptrlist * | 2015 static Ptrlist * |
3356 break; | 3356 break; |
3357 } | 3357 } |
3358 #endif | 3358 #endif |
3359 | 3359 |
3360 case NFA_NEWL: | 3360 case NFA_NEWL: |
3361 if (!reg_line_lbr && REG_MULTI | 3361 if (curc == NUL && !reg_line_lbr && REG_MULTI |
3362 && curc == NUL && reglnum <= reg_maxline) | 3362 && reglnum <= reg_maxline) |
3363 { | 3363 { |
3364 go_to_nextline = TRUE; | 3364 go_to_nextline = TRUE; |
3365 /* Pass -1 for the offset, which means taking the position | 3365 /* Pass -1 for the offset, which means taking the position |
3366 * at the start of the next line. */ | 3366 * at the start of the next line. */ |
3367 addstate(nextlist, t->state->out, &t->sub, -1, | 3367 addstate(nextlist, t->state->out, &t->sub, -1, |
3368 listid + 1, &match); | |
3369 } | |
3370 else if (curc == '\n' && reg_line_lbr) | |
3371 { | |
3372 /* match \n as if it is an ordinary character */ | |
3373 addstate(nextlist, t->state->out, &t->sub, 1, | |
3368 listid + 1, &match); | 3374 listid + 1, &match); |
3369 } | 3375 } |
3370 break; | 3376 break; |
3371 | 3377 |
3372 case NFA_CLASS_ALNUM: | 3378 case NFA_CLASS_ALNUM: |
3830 | 3836 |
3831 /* Build postfix form of the regexp. Needed to build the NFA | 3837 /* Build postfix form of the regexp. Needed to build the NFA |
3832 * (and count its size). */ | 3838 * (and count its size). */ |
3833 postfix = re2post(); | 3839 postfix = re2post(); |
3834 if (postfix == NULL) | 3840 if (postfix == NULL) |
3841 { | |
3842 /* TODO: only give this error for debugging? */ | |
3843 if (post_ptr >= post_end) | |
3844 EMSGN("Internal error: estimated max number of states insufficient: %ld", post_end - post_start); | |
3835 goto fail; /* Cascaded (syntax?) error */ | 3845 goto fail; /* Cascaded (syntax?) error */ |
3846 } | |
3836 | 3847 |
3837 /* | 3848 /* |
3838 * In order to build the NFA, we parse the input regexp twice: | 3849 * In order to build the NFA, we parse the input regexp twice: |
3839 * 1. first pass to count size (so we can allocate space) | 3850 * 1. first pass to count size (so we can allocate space) |
3840 * 2. second to emit code | 3851 * 2. second to emit code |