annotate src/regexp.h @ 4444:ccecb03e5e8b v7.3.970

updated for version 7.3.970 Problem: Syntax highlighting can be slow. Solution: Include the NFA regexp engine. Add the 'regexpengine' option to select which one is used. (various authors, including Ken Takata, Andrei Aiordachioaie, Russ Cox, Xiaozhou Liua, Ian Young)
author Bram Moolenaar <bram@vim.org>
date Sun, 19 May 2013 19:40:29 +0200
parents 84825cc6f049
children 5cc98a5898cf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
1 /* vi:set ts=8 sts=4 sw=4:
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
2 *
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
4 *
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
5 * This is NOT the original regular expression code as written by Henry
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
6 * Spencer. This code has been modified specifically for use with Vim, and
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
7 * should not be used apart from compiling Vim. If you want a good regular
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
8 * expression library, get the original code.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
9 *
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
11 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
12
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
13 #ifndef _REGEXP_H
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
14 #define _REGEXP_H
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
15
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
16 /*
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
17 * The number of sub-matches is limited to 10.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
18 * The first one (index 0) is the whole match, referenced with "\0".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
19 * The second one (index 1) is the first sub-match, referenced with "\1".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
20 * This goes up to the tenth (index 9), referenced with "\9".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
21 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
22 #define NSUBEXP 10
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
23
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
24 /*
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
25 * In the NFA engine: how many braces are allowed.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
26 * TODO(RE): Use dynamic memory allocation instead of static, like here
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
27 */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
28 #define NFA_MAX_BRACES 20
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
29
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
30 typedef struct regengine regengine_T;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
31
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
32 typedef struct thread thread_T;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
33
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
34 /*
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
35 * Structure returned by vim_regcomp() to pass on to vim_regexec().
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
36 * This is the general structure. For the actual matcher, two specific
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
37 * structures are used. See code below.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
38 */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
39 typedef struct regprog
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
40 {
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
41 regengine_T *engine;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
42 unsigned regflags;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
43 } regprog_T;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
44
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
45 /*
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
46 * Structure used by the back track matcher.
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
47 * These fields are only to be used in regexp.c!
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
48 * See regexp.c for an explanation.
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
49 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
50 typedef struct
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
51 {
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
52 /* These two members implement regprog_T */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
53 regengine_T *engine;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
54 unsigned regflags;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
55
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
56 int regstart;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
57 char_u reganch;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
58 char_u *regmust;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
59 int regmlen;
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
60 char_u reghasz;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
61 char_u program[1]; /* actually longer.. */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
62 } bt_regprog_T;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
63
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
64 /*
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
65 * Structure representing a NFA state.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
66 * A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
67 */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
68 typedef struct nfa_state nfa_state_T;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
69 struct nfa_state
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
70 {
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
71 int c;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
72 nfa_state_T *out;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
73 nfa_state_T *out1;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
74 int id;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
75 int lastlist;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
76 int visits;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
77 thread_T *lastthread;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
78 int negated;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
79 };
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
80
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
81 /*
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
82 * Structure used by the NFA matcher.
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
83 */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
84 typedef struct
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
85 {
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
86 /* These two members implement regprog_T */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
87 regengine_T *engine;
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
88 unsigned regflags;
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
89
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
90 regprog_T regprog;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
91 nfa_state_T *start;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
92 int nstate;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
93 nfa_state_T state[0]; /* actually longer.. */
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
94 } nfa_regprog_T;
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
95
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
96 /*
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
97 * Structure to be used for single-line matching.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
98 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
99 * When there is no match, the pointer is NULL.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
100 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
101 typedef struct
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
102 {
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
103 regprog_T *regprog;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
104 char_u *startp[NSUBEXP];
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
105 char_u *endp[NSUBEXP];
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
106 int rm_ic;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
107 } regmatch_T;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
108
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
109 /*
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
110 * Structure to be used for multi-line matching.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
111 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
112 * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
113 * The line numbers are relative to the first line, thus startpos[0].lnum is
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
114 * always 0.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
115 * When there is no match, the line number is -1.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
116 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
117 typedef struct
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
118 {
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
119 regprog_T *regprog;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
120 lpos_T startpos[NSUBEXP];
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
121 lpos_T endpos[NSUBEXP];
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
122 int rmm_ic;
418
84825cc6f049 updated for version 7.0109
vimboss
parents: 410
diff changeset
123 colnr_T rmm_maxcol; /* when not zero: maximum column */
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
124 } regmmatch_T;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
125
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
126 /*
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
127 * Structure used to store external references: "\z\(\)" to "\z\1".
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
128 * Use a reference count to avoid the need to copy this around. When it goes
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
129 * from 1 to zero the matches need to be freed.
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
130 */
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
131 typedef struct
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
132 {
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
133 short refcnt;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
134 char_u *matches[NSUBEXP];
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
135 } reg_extmatch_T;
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
136
4444
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
137 struct regengine
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
138 {
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
139 regprog_T *(*regcomp)(char_u*, int);
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
140 int (*regexec)(regmatch_T*, char_u*, colnr_T);
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
141 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
142 || defined(FIND_REPLACE_DIALOG) || defined(PROTO)
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
143 int (*regexec_nl)(regmatch_T*, char_u*, colnr_T);
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
144 #endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
145 long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
146 #ifdef DEBUG
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
147 char_u *expr;
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
148 #endif
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
149 };
ccecb03e5e8b updated for version 7.3.970
Bram Moolenaar <bram@vim.org>
parents: 418
diff changeset
150
7
3fc0f57ecb91 updated for version 7.0001
vimboss
parents:
diff changeset
151 #endif /* _REGEXP_H */