Mercurial > vim
annotate src/regexp.h @ 31965:b6a1e17f049d
Added tag v9.0.1314 for changeset c0a9bc376b54d4a62cd485c983ec34fe03fd7352
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Thu, 16 Feb 2023 16:15:05 +0100 |
parents | 7fb4e244b16e |
children | 90063f44c99a |
rev | line source |
---|---|
10042
4aead6a9b7a9
commit https://github.com/vim/vim/commit/edf3f97ae2af024708ebb4ac614227327033ca47
Christian Brabandt <cb@256bit.org>
parents:
6328
diff
changeset
|
1 /* vi:set ts=8 sts=4 sw=4 noet: |
7 | 2 * |
3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE | |
4 * | |
5 * This is NOT the original regular expression code as written by Henry | |
6 * Spencer. This code has been modified specifically for use with Vim, and | |
7 * should not be used apart from compiling Vim. If you want a good regular | |
8 * expression library, get the original code. | |
9 * | |
10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE | |
11 */ | |
12 | |
13 #ifndef _REGEXP_H | |
14 #define _REGEXP_H | |
15 | |
16 /* | |
17 * The number of sub-matches is limited to 10. | |
18 * The first one (index 0) is the whole match, referenced with "\0". | |
19 * The second one (index 1) is the first sub-match, referenced with "\1". | |
20 * This goes up to the tenth (index 9), referenced with "\9". | |
21 */ | |
22 #define NSUBEXP 10 | |
23 | |
24 /* | |
4444 | 25 * In the NFA engine: how many braces are allowed. |
26 * TODO(RE): Use dynamic memory allocation instead of static, like here | |
27 */ | |
28 #define NFA_MAX_BRACES 20 | |
29 | |
6328 | 30 /* |
31 * In the NFA engine: how many states are allowed | |
32 */ | |
33 #define NFA_MAX_STATES 100000 | |
28226
89c181c99e23
patch 8.2.4639: not sufficient parenthesis in preprocessor macros
Bram Moolenaar <Bram@vim.org>
parents:
18753
diff
changeset
|
34 #define NFA_TOO_EXPENSIVE (-1) |
6328 | 35 |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
36 // Which regexp engine to use? Needed for vim_regcomp(). |
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
37 // Must match with 'regexpengine'. |
6328 | 38 #define AUTOMATIC_ENGINE 0 |
39 #define BACKTRACKING_ENGINE 1 | |
40 #define NFA_ENGINE 2 | |
41 | |
4444 | 42 typedef struct regengine regengine_T; |
43 | |
44 /* | |
7 | 45 * Structure returned by vim_regcomp() to pass on to vim_regexec(). |
4444 | 46 * This is the general structure. For the actual matcher, two specific |
47 * structures are used. See code below. | |
48 */ | |
49 typedef struct regprog | |
50 { | |
51 regengine_T *engine; | |
52 unsigned regflags; | |
14354
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
53 unsigned re_engine; // automatic, backtracking or nfa engine |
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
54 unsigned re_flags; // second argument for vim_regcomp() |
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
55 int re_in_use; // prog is being executed |
4444 | 56 } regprog_T; |
57 | |
58 /* | |
59 * Structure used by the back track matcher. | |
7 | 60 * These fields are only to be used in regexp.c! |
4444 | 61 * See regexp.c for an explanation. |
7 | 62 */ |
63 typedef struct | |
64 { | |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
65 // These four members implement regprog_T |
4444 | 66 regengine_T *engine; |
67 unsigned regflags; | |
6328 | 68 unsigned re_engine; |
14354
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
69 unsigned re_flags; |
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
70 int re_in_use; |
4444 | 71 |
7 | 72 int regstart; |
73 char_u reganch; | |
74 char_u *regmust; | |
75 int regmlen; | |
4686
8db697ae406a
updated for version 7.3.1090
Bram Moolenaar <bram@vim.org>
parents:
4647
diff
changeset
|
76 #ifdef FEAT_SYN_HL |
4444 | 77 char_u reghasz; |
4686
8db697ae406a
updated for version 7.3.1090
Bram Moolenaar <bram@vim.org>
parents:
4647
diff
changeset
|
78 #endif |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
79 char_u program[1]; // actually longer.. |
4444 | 80 } bt_regprog_T; |
81 | |
82 /* | |
83 * Structure representing a NFA state. | |
16378
3d6b282e2d6e
patch 8.1.1194: typos and small problems in source files
Bram Moolenaar <Bram@vim.org>
parents:
14354
diff
changeset
|
84 * An NFA state may have no outgoing edge, when it is a NFA_MATCH state. |
4444 | 85 */ |
86 typedef struct nfa_state nfa_state_T; | |
87 struct nfa_state | |
88 { | |
89 int c; | |
90 nfa_state_T *out; | |
91 nfa_state_T *out1; | |
92 int id; | |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
93 int lastlist[2]; // 0: normal, 1: recursive |
4583
321cfbef9431
updated for version 7.3.1039
Bram Moolenaar <bram@vim.org>
parents:
4571
diff
changeset
|
94 int val; |
4444 | 95 }; |
96 | |
97 /* | |
98 * Structure used by the NFA matcher. | |
99 */ | |
100 typedef struct | |
101 { | |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
102 // These three members implement regprog_T |
4444 | 103 regengine_T *engine; |
7 | 104 unsigned regflags; |
6328 | 105 unsigned re_engine; |
14354
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
106 unsigned re_flags; |
ffd834f893aa
patch 8.1.0192: executing regexp recursively fails with a crash
Christian Brabandt <cb@256bit.org>
parents:
11521
diff
changeset
|
107 int re_in_use; |
4444 | 108 |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
109 nfa_state_T *start; // points into state[] |
4772
03375ccf28a2
updated for version 7.3.1133
Bram Moolenaar <bram@vim.org>
parents:
4718
diff
changeset
|
110 |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
111 int reganch; // pattern starts with ^ |
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
112 int regstart; // char at start of pattern |
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
113 char_u *match_text; // plain text to match with |
4772
03375ccf28a2
updated for version 7.3.1133
Bram Moolenaar <bram@vim.org>
parents:
4718
diff
changeset
|
114 |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
115 int has_zend; // pattern contains \ze |
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
116 int has_backref; // pattern contains \1 .. \9 |
4686
8db697ae406a
updated for version 7.3.1090
Bram Moolenaar <bram@vim.org>
parents:
4647
diff
changeset
|
117 #ifdef FEAT_SYN_HL |
8db697ae406a
updated for version 7.3.1090
Bram Moolenaar <bram@vim.org>
parents:
4647
diff
changeset
|
118 int reghasz; |
8db697ae406a
updated for version 7.3.1090
Bram Moolenaar <bram@vim.org>
parents:
4647
diff
changeset
|
119 #endif |
4690
9d97a0c045ef
updated for version 7.3.1092
Bram Moolenaar <bram@vim.org>
parents:
4686
diff
changeset
|
120 char_u *pattern; |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
121 int nsubexp; // number of () |
4444 | 122 int nstate; |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
123 nfa_state_T state[1]; // actually longer.. |
4444 | 124 } nfa_regprog_T; |
7 | 125 |
126 /* | |
127 * Structure to be used for single-line matching. | |
128 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". | |
129 * When there is no match, the pointer is NULL. | |
130 */ | |
131 typedef struct | |
132 { | |
133 regprog_T *regprog; | |
134 char_u *startp[NSUBEXP]; | |
135 char_u *endp[NSUBEXP]; | |
31235
7fb4e244b16e
patch 9.0.0951: trying every character position for a match is inefficient
Bram Moolenaar <Bram@vim.org>
parents:
29071
diff
changeset
|
136 |
7fb4e244b16e
patch 9.0.0951: trying every character position for a match is inefficient
Bram Moolenaar <Bram@vim.org>
parents:
29071
diff
changeset
|
137 colnr_T rm_matchcol; // match start without "\zs" |
7 | 138 int rm_ic; |
139 } regmatch_T; | |
140 | |
141 /* | |
142 * Structure to be used for multi-line matching. | |
143 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" | |
144 * and ends in line "endpos[no].lnum" just before column "endpos[no].col". | |
145 * The line numbers are relative to the first line, thus startpos[0].lnum is | |
146 * always 0. | |
147 * When there is no match, the line number is -1. | |
148 */ | |
149 typedef struct | |
150 { | |
151 regprog_T *regprog; | |
152 lpos_T startpos[NSUBEXP]; | |
153 lpos_T endpos[NSUBEXP]; | |
31235
7fb4e244b16e
patch 9.0.0951: trying every character position for a match is inefficient
Bram Moolenaar <Bram@vim.org>
parents:
29071
diff
changeset
|
154 |
7fb4e244b16e
patch 9.0.0951: trying every character position for a match is inefficient
Bram Moolenaar <Bram@vim.org>
parents:
29071
diff
changeset
|
155 colnr_T rmm_matchcol; // match start without "\zs" |
7 | 156 int rmm_ic; |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
157 colnr_T rmm_maxcol; // when not zero: maximum column |
7 | 158 } regmmatch_T; |
159 | |
160 /* | |
161 * Structure used to store external references: "\z\(\)" to "\z\1". | |
162 * Use a reference count to avoid the need to copy this around. When it goes | |
163 * from 1 to zero the matches need to be freed. | |
164 */ | |
165 typedef struct | |
166 { | |
167 short refcnt; | |
168 char_u *matches[NSUBEXP]; | |
169 } reg_extmatch_T; | |
170 | |
4444 | 171 struct regengine |
172 { | |
28568
53f25723f378
patch 8.2.4808: unused item in engine struct
Bram Moolenaar <Bram@vim.org>
parents:
28226
diff
changeset
|
173 // bt_regcomp or nfa_regcomp |
4444 | 174 regprog_T *(*regcomp)(char_u*, int); |
28568
53f25723f378
patch 8.2.4808: unused item in engine struct
Bram Moolenaar <Bram@vim.org>
parents:
28226
diff
changeset
|
175 // bt_regfree or nfa_regfree |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4785
diff
changeset
|
176 void (*regfree)(regprog_T *); |
28568
53f25723f378
patch 8.2.4808: unused item in engine struct
Bram Moolenaar <Bram@vim.org>
parents:
28226
diff
changeset
|
177 // bt_regexec_nl or nfa_regexec_nl |
11521
578df034735d
patch 8.0.0643: when a pattern search is slow Vim becomes unusable
Christian Brabandt <cb@256bit.org>
parents:
10042
diff
changeset
|
178 int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, int); |
28568
53f25723f378
patch 8.2.4808: unused item in engine struct
Bram Moolenaar <Bram@vim.org>
parents:
28226
diff
changeset
|
179 // bt_regexec_mult or nfa_regexec_mult |
29071
b90bca860b5a
patch 8.2.5057: using gettimeofday() for timeout is very inefficient
Bram Moolenaar <Bram@vim.org>
parents:
29048
diff
changeset
|
180 long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, int *); |
28568
53f25723f378
patch 8.2.4808: unused item in engine struct
Bram Moolenaar <Bram@vim.org>
parents:
28226
diff
changeset
|
181 //char_u *expr; |
4444 | 182 }; |
183 | |
29048
c98fc7a4dde4
patch 8.2.5046: vim_regsub() can overwrite the destination
Bram Moolenaar <Bram@vim.org>
parents:
28568
diff
changeset
|
184 // Flags used by vim_regsub() and vim_regsub_both() |
c98fc7a4dde4
patch 8.2.5046: vim_regsub() can overwrite the destination
Bram Moolenaar <Bram@vim.org>
parents:
28568
diff
changeset
|
185 #define REGSUB_COPY 1 |
c98fc7a4dde4
patch 8.2.5046: vim_regsub() can overwrite the destination
Bram Moolenaar <Bram@vim.org>
parents:
28568
diff
changeset
|
186 #define REGSUB_MAGIC 2 |
c98fc7a4dde4
patch 8.2.5046: vim_regsub() can overwrite the destination
Bram Moolenaar <Bram@vim.org>
parents:
28568
diff
changeset
|
187 #define REGSUB_BACKSLASH 4 |
c98fc7a4dde4
patch 8.2.5046: vim_regsub() can overwrite the destination
Bram Moolenaar <Bram@vim.org>
parents:
28568
diff
changeset
|
188 |
18753
6e3dc2d630c2
patch 8.1.2366: using old C style comments
Bram Moolenaar <Bram@vim.org>
parents:
16378
diff
changeset
|
189 #endif // _REGEXP_H |