Mercurial > vim
annotate src/regexp.c @ 5899:60cdaa05a6ad v7.4.292
updated for version 7.4.292
Problem: Searching for "a" does not match accented "a" with new regexp
engine, does match with old engine. (David B?rgin)
"ca" does not match "ca" with accented "a" with either engine.
Solution: Change the old engine, check for following composing character
also for single-byte patterns.
author | Bram Moolenaar <bram@vim.org> |
---|---|
date | Tue, 13 May 2014 18:04:00 +0200 |
parents | b5972833add9 |
children | 10fc95f48546 |
rev | line source |
---|---|
7 | 1 /* vi:set ts=8 sts=4 sw=4: |
2 * | |
3 * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub() | |
4 * | |
5 * NOTICE: | |
6 * | |
7 * This is NOT the original regular expression code as written by Henry | |
8 * Spencer. This code has been modified specifically for use with the VIM | |
9 * editor, and should not be used separately from Vim. If you want a good | |
10 * regular expression library, get the original code. The copyright notice | |
11 * that follows is from the original. | |
12 * | |
13 * END NOTICE | |
14 * | |
15 * Copyright (c) 1986 by University of Toronto. | |
16 * Written by Henry Spencer. Not derived from licensed software. | |
17 * | |
18 * Permission is granted to anyone to use this software for any | |
19 * purpose on any computer system, and to redistribute it freely, | |
20 * subject to the following restrictions: | |
21 * | |
22 * 1. The author is not responsible for the consequences of use of | |
23 * this software, no matter how awful, even if they arise | |
24 * from defects in it. | |
25 * | |
26 * 2. The origin of this software must not be misrepresented, either | |
27 * by explicit claim or by omission. | |
28 * | |
29 * 3. Altered versions must be plainly marked as such, and must not | |
30 * be misrepresented as being the original software. | |
31 * | |
32 * Beware that some of this code is subtly aware of the way operator | |
33 * precedence is structured in regular expressions. Serious changes in | |
34 * regular-expression syntax might require a total rethink. | |
35 * | |
24 | 36 * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert |
37 * Webb, Ciaran McCreesh and Bram Moolenaar. | |
7 | 38 * Named character class support added by Walter Briscoe (1998 Jul 01) |
39 */ | |
40 | |
4444 | 41 /* Uncomment the first if you do not want to see debugging logs or files |
42 * related to regular expressions, even when compiling with -DDEBUG. | |
43 * Uncomment the second to get the regexp debugging. */ | |
44 /* #undef DEBUG */ | |
45 /* #define DEBUG */ | |
46 | |
7 | 47 #include "vim.h" |
48 | |
4444 | 49 #ifdef DEBUG |
50 /* show/save debugging data when BT engine is used */ | |
51 # define BT_REGEXP_DUMP | |
52 /* save the debugging data to a file instead of displaying it */ | |
53 # define BT_REGEXP_LOG | |
4460 | 54 # define BT_REGEXP_DEBUG_LOG |
55 # define BT_REGEXP_DEBUG_LOG_NAME "bt_regexp_debug.log" | |
4444 | 56 #endif |
7 | 57 |
58 /* | |
59 * The "internal use only" fields in regexp.h are present to pass info from | |
60 * compile to execute that permits the execute phase to run lots faster on | |
61 * simple cases. They are: | |
62 * | |
63 * regstart char that must begin a match; NUL if none obvious; Can be a | |
64 * multi-byte character. | |
65 * reganch is the match anchored (at beginning-of-line only)? | |
66 * regmust string (pointer into program) that match must include, or NULL | |
67 * regmlen length of regmust string | |
68 * regflags RF_ values or'ed together | |
69 * | |
70 * Regstart and reganch permit very fast decisions on suitable starting points | |
71 * for a match, cutting down the work a lot. Regmust permits fast rejection | |
72 * of lines that cannot possibly match. The regmust tests are costly enough | |
73 * that vim_regcomp() supplies a regmust only if the r.e. contains something | |
74 * potentially expensive (at present, the only such thing detected is * or + | |
75 * at the start of the r.e., which can involve a lot of backup). Regmlen is | |
76 * supplied because the test in vim_regexec() needs it and vim_regcomp() is | |
77 * computing it anyway. | |
78 */ | |
79 | |
80 /* | |
81 * Structure for regexp "program". This is essentially a linear encoding | |
82 * of a nondeterministic finite-state machine (aka syntax charts or | |
83 * "railroad normal form" in parsing technology). Each node is an opcode | |
84 * plus a "next" pointer, possibly plus an operand. "Next" pointers of | |
85 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" | |
86 * pointer with a BRANCH on both ends of it is connecting two alternatives. | |
87 * (Here we have one of the subtle syntax dependencies: an individual BRANCH | |
88 * (as opposed to a collection of them) is never concatenated with anything | |
89 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX | |
167 | 90 * node points to the node after the stuff to be repeated. |
91 * The operand of some types of node is a literal string; for others, it is a | |
92 * node leading into a sub-FSM. In particular, the operand of a BRANCH node | |
93 * is the first node of the branch. | |
94 * (NB this is *not* a tree structure: the tail of the branch connects to the | |
95 * thing following the set of BRANCHes.) | |
7 | 96 * |
97 * pattern is coded like: | |
98 * | |
99 * +-----------------+ | |
100 * | V | |
101 * <aa>\|<bb> BRANCH <aa> BRANCH <bb> --> END | |
102 * | ^ | ^ | |
103 * +------+ +----------+ | |
104 * | |
105 * | |
106 * +------------------+ | |
107 * V | | |
108 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END | |
109 * | | ^ ^ | |
110 * | +---------------+ | | |
111 * +---------------------------------------------+ | |
112 * | |
113 * | |
167 | 114 * +----------------------+ |
115 * V | | |
233 | 116 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END |
856 | 117 * | | ^ ^ |
118 * | +-----------+ | | |
179 | 119 * +--------------------------------------------------+ |
167 | 120 * |
121 * | |
7 | 122 * +-------------------------+ |
123 * V | | |
124 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END | |
125 * | | ^ | |
126 * | +----------------+ | |
127 * +-----------------------------------------------+ | |
128 * | |
129 * | |
130 * <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END | |
131 * | | ^ ^ | |
132 * | +----------------+ | | |
133 * +--------------------------------+ | |
134 * | |
135 * +---------+ | |
136 * | V | |
137 * \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END | |
138 * | | | | ^ ^ | |
139 * | | | +-----+ | | |
140 * | | +----------------+ | | |
141 * | +---------------------------+ | | |
142 * +------------------------------------------------------+ | |
143 * | |
1209 | 144 * They all start with a BRANCH for "\|" alternatives, even when there is only |
7 | 145 * one alternative. |
146 */ | |
147 | |
148 /* | |
149 * The opcodes are: | |
150 */ | |
151 | |
152 /* definition number opnd? meaning */ | |
153 #define END 0 /* End of program or NOMATCH operand. */ | |
154 #define BOL 1 /* Match "" at beginning of line. */ | |
155 #define EOL 2 /* Match "" at end of line. */ | |
156 #define BRANCH 3 /* node Match this alternative, or the | |
157 * next... */ | |
158 #define BACK 4 /* Match "", "next" ptr points backward. */ | |
159 #define EXACTLY 5 /* str Match this string. */ | |
160 #define NOTHING 6 /* Match empty string. */ | |
161 #define STAR 7 /* node Match this (simple) thing 0 or more | |
162 * times. */ | |
163 #define PLUS 8 /* node Match this (simple) thing 1 or more | |
164 * times. */ | |
165 #define MATCH 9 /* node match the operand zero-width */ | |
166 #define NOMATCH 10 /* node check for no match with operand */ | |
167 #define BEHIND 11 /* node look behind for a match with operand */ | |
168 #define NOBEHIND 12 /* node look behind for no match with operand */ | |
169 #define SUBPAT 13 /* node match the operand here */ | |
170 #define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and | |
171 * n times (\{m,n\}). */ | |
172 #define BOW 15 /* Match "" after [^a-zA-Z0-9_] */ | |
173 #define EOW 16 /* Match "" at [^a-zA-Z0-9_] */ | |
174 #define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE | |
175 * and BRACE_COMPLEX. */ | |
176 #define NEWL 18 /* Match line-break */ | |
177 #define BHPOS 19 /* End position for BEHIND or NOBEHIND */ | |
178 | |
179 | |
180 /* character classes: 20-48 normal, 50-78 include a line-break */ | |
181 #define ADD_NL 30 | |
182 #define FIRST_NL ANY + ADD_NL | |
183 #define ANY 20 /* Match any one character. */ | |
184 #define ANYOF 21 /* str Match any character in this string. */ | |
185 #define ANYBUT 22 /* str Match any character not in this | |
186 * string. */ | |
187 #define IDENT 23 /* Match identifier char */ | |
188 #define SIDENT 24 /* Match identifier char but no digit */ | |
189 #define KWORD 25 /* Match keyword char */ | |
190 #define SKWORD 26 /* Match word char but no digit */ | |
191 #define FNAME 27 /* Match file name char */ | |
192 #define SFNAME 28 /* Match file name char but no digit */ | |
193 #define PRINT 29 /* Match printable char */ | |
194 #define SPRINT 30 /* Match printable char but no digit */ | |
195 #define WHITE 31 /* Match whitespace char */ | |
196 #define NWHITE 32 /* Match non-whitespace char */ | |
197 #define DIGIT 33 /* Match digit char */ | |
198 #define NDIGIT 34 /* Match non-digit char */ | |
199 #define HEX 35 /* Match hex char */ | |
200 #define NHEX 36 /* Match non-hex char */ | |
201 #define OCTAL 37 /* Match octal char */ | |
202 #define NOCTAL 38 /* Match non-octal char */ | |
203 #define WORD 39 /* Match word char */ | |
204 #define NWORD 40 /* Match non-word char */ | |
205 #define HEAD 41 /* Match head char */ | |
206 #define NHEAD 42 /* Match non-head char */ | |
207 #define ALPHA 43 /* Match alpha char */ | |
208 #define NALPHA 44 /* Match non-alpha char */ | |
209 #define LOWER 45 /* Match lowercase char */ | |
210 #define NLOWER 46 /* Match non-lowercase char */ | |
211 #define UPPER 47 /* Match uppercase char */ | |
212 #define NUPPER 48 /* Match non-uppercase char */ | |
213 #define LAST_NL NUPPER + ADD_NL | |
214 #define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL) | |
215 | |
216 #define MOPEN 80 /* -89 Mark this point in input as start of | |
217 * \( subexpr. MOPEN + 0 marks start of | |
218 * match. */ | |
219 #define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks | |
220 * end of match. */ | |
221 #define BACKREF 100 /* -109 node Match same string again \1-\9 */ | |
222 | |
223 #ifdef FEAT_SYN_HL | |
224 # define ZOPEN 110 /* -119 Mark this point in input as start of | |
225 * \z( subexpr. */ | |
226 # define ZCLOSE 120 /* -129 Analogous to ZOPEN. */ | |
227 # define ZREF 130 /* -139 node Match external submatch \z1-\z9 */ | |
228 #endif | |
229 | |
230 #define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */ | |
231 | |
232 #define NOPEN 150 /* Mark this point in input as start of | |
233 \%( subexpr. */ | |
234 #define NCLOSE 151 /* Analogous to NOPEN. */ | |
235 | |
236 #define MULTIBYTECODE 200 /* mbc Match one multi-byte character */ | |
237 #define RE_BOF 201 /* Match "" at beginning of file. */ | |
238 #define RE_EOF 202 /* Match "" at end of file. */ | |
239 #define CURSOR 203 /* Match location of cursor. */ | |
240 | |
241 #define RE_LNUM 204 /* nr cmp Match line number */ | |
242 #define RE_COL 205 /* nr cmp Match column number */ | |
243 #define RE_VCOL 206 /* nr cmp Match virtual column number */ | |
244 | |
639 | 245 #define RE_MARK 207 /* mark cmp Match mark position */ |
246 #define RE_VISUAL 208 /* Match Visual area */ | |
247 | |
7 | 248 /* |
249 * Magic characters have a special meaning, they don't match literally. | |
250 * Magic characters are negative. This separates them from literal characters | |
251 * (possibly multi-byte). Only ASCII characters can be Magic. | |
252 */ | |
253 #define Magic(x) ((int)(x) - 256) | |
254 #define un_Magic(x) ((x) + 256) | |
255 #define is_Magic(x) ((x) < 0) | |
256 | |
257 static int no_Magic __ARGS((int x)); | |
258 static int toggle_Magic __ARGS((int x)); | |
259 | |
260 static int | |
261 no_Magic(x) | |
262 int x; | |
263 { | |
264 if (is_Magic(x)) | |
265 return un_Magic(x); | |
266 return x; | |
267 } | |
268 | |
269 static int | |
270 toggle_Magic(x) | |
271 int x; | |
272 { | |
273 if (is_Magic(x)) | |
274 return un_Magic(x); | |
275 return Magic(x); | |
276 } | |
277 | |
278 /* | |
279 * The first byte of the regexp internal "program" is actually this magic | |
280 * number; the start node begins in the second byte. It's used to catch the | |
281 * most severe mutilation of the program by the caller. | |
282 */ | |
283 | |
284 #define REGMAGIC 0234 | |
285 | |
286 /* | |
287 * Opcode notes: | |
288 * | |
289 * BRANCH The set of branches constituting a single choice are hooked | |
290 * together with their "next" pointers, since precedence prevents | |
291 * anything being concatenated to any individual branch. The | |
292 * "next" pointer of the last BRANCH in a choice points to the | |
293 * thing following the whole choice. This is also where the | |
294 * final "next" pointer of each individual branch points; each | |
295 * branch starts with the operand node of a BRANCH node. | |
296 * | |
297 * BACK Normal "next" pointers all implicitly point forward; BACK | |
298 * exists to make loop structures possible. | |
299 * | |
300 * STAR,PLUS '=', and complex '*' and '+', are implemented as circular | |
301 * BRANCH structures using BACK. Simple cases (one character | |
302 * per match) are implemented with STAR and PLUS for speed | |
303 * and to minimize recursive plunges. | |
304 * | |
305 * BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX | |
306 * node, and defines the min and max limits to be used for that | |
307 * node. | |
308 * | |
309 * MOPEN,MCLOSE ...are numbered at compile time. | |
310 * ZOPEN,ZCLOSE ...ditto | |
311 */ | |
312 | |
313 /* | |
314 * A node is one char of opcode followed by two chars of "next" pointer. | |
315 * "Next" pointers are stored as two 8-bit bytes, high order first. The | |
316 * value is a positive offset from the opcode of the node containing it. | |
317 * An operand, if any, simply follows the node. (Note that much of the | |
318 * code generation knows about this implicit relationship.) | |
319 * | |
320 * Using two bytes for the "next" pointer is vast overkill for most things, | |
321 * but allows patterns to get big without disasters. | |
322 */ | |
323 #define OP(p) ((int)*(p)) | |
324 #define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377)) | |
325 #define OPERAND(p) ((p) + 3) | |
326 /* Obtain an operand that was stored as four bytes, MSB first. */ | |
327 #define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \ | |
328 + ((long)(p)[5] << 8) + (long)(p)[6]) | |
329 /* Obtain a second operand stored as four bytes. */ | |
330 #define OPERAND_MAX(p) OPERAND_MIN((p) + 4) | |
331 /* Obtain a second single-byte operand stored after a four bytes operand. */ | |
332 #define OPERAND_CMP(p) (p)[7] | |
333 | |
334 /* | |
335 * Utility definitions. | |
336 */ | |
337 #define UCHARAT(p) ((int)*(char_u *)(p)) | |
338 | |
339 /* Used for an error (down from) vim_regcomp(): give the error message, set | |
340 * rc_did_emsg and return NULL */ | |
653 | 341 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL) |
308 | 342 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL) |
4444 | 343 #define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL) |
344 #define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL) | |
345 #define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL) | |
7 | 346 |
347 #define MAX_LIMIT (32767L << 16L) | |
348 | |
349 static int re_multi_type __ARGS((int)); | |
350 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n)); | |
351 static char_u *cstrchr __ARGS((char_u *, int)); | |
352 | |
4444 | 353 #ifdef BT_REGEXP_DUMP |
354 static void regdump __ARGS((char_u *, bt_regprog_T *)); | |
355 #endif | |
7 | 356 #ifdef DEBUG |
357 static char_u *regprop __ARGS((char_u *)); | |
358 #endif | |
359 | |
4444 | 360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); |
361 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); | |
362 static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); | |
363 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); | |
4720
bd6bef0bd0fb
updated for version 7.3.1107
Bram Moolenaar <bram@vim.org>
parents:
4688
diff
changeset
|
364 #ifdef FEAT_SYN_HL |
4688
371cc0c44097
updated for version 7.3.1091
Bram Moolenaar <bram@vim.org>
parents:
4682
diff
changeset
|
365 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); |
371cc0c44097
updated for version 7.3.1091
Bram Moolenaar <bram@vim.org>
parents:
4682
diff
changeset
|
366 static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here"); |
4720
bd6bef0bd0fb
updated for version 7.3.1107
Bram Moolenaar <bram@vim.org>
parents:
4688
diff
changeset
|
367 #endif |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
368 static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%["); |
4760
532a9855bd30
updated for version 7.3.1127
Bram Moolenaar <bram@vim.org>
parents:
4746
diff
changeset
|
369 static char_u e_empty_sb[] = N_("E70: Empty %s%%[]"); |
7 | 370 #define NOT_MULTI 0 |
371 #define MULTI_ONE 1 | |
372 #define MULTI_MULT 2 | |
373 /* | |
374 * Return NOT_MULTI if c is not a "multi" operator. | |
375 * Return MULTI_ONE if c is a single "multi" operator. | |
376 * Return MULTI_MULT if c is a multi "multi" operator. | |
377 */ | |
378 static int | |
379 re_multi_type(c) | |
380 int c; | |
381 { | |
382 if (c == Magic('@') || c == Magic('=') || c == Magic('?')) | |
383 return MULTI_ONE; | |
384 if (c == Magic('*') || c == Magic('+') || c == Magic('{')) | |
385 return MULTI_MULT; | |
386 return NOT_MULTI; | |
387 } | |
388 | |
389 /* | |
390 * Flags to be passed up and down. | |
391 */ | |
392 #define HASWIDTH 0x1 /* Known never to match null string. */ | |
393 #define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */ | |
394 #define SPSTART 0x4 /* Starts with * or +. */ | |
395 #define HASNL 0x8 /* Contains some \n. */ | |
396 #define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */ | |
397 #define WORST 0 /* Worst case. */ | |
398 | |
399 /* | |
400 * When regcode is set to this value, code is not emitted and size is computed | |
401 * instead. | |
402 */ | |
403 #define JUST_CALC_SIZE ((char_u *) -1) | |
404 | |
359 | 405 static char_u *reg_prev_sub = NULL; |
406 | |
7 | 407 /* |
408 * REGEXP_INRANGE contains all characters which are always special in a [] | |
409 * range after '\'. | |
410 * REGEXP_ABBR contains all characters which act as abbreviations after '\'. | |
411 * These are: | |
412 * \n - New line (NL). | |
413 * \r - Carriage Return (CR). | |
414 * \t - Tab (TAB). | |
415 * \e - Escape (ESC). | |
416 * \b - Backspace (Ctrl_H). | |
24 | 417 * \d - Character code in decimal, eg \d123 |
418 * \o - Character code in octal, eg \o80 | |
419 * \x - Character code in hex, eg \x4a | |
420 * \u - Multibyte character code, eg \u20ac | |
421 * \U - Long multibyte character code, eg \U12345678 | |
7 | 422 */ |
423 static char_u REGEXP_INRANGE[] = "]^-n\\"; | |
24 | 424 static char_u REGEXP_ABBR[] = "nrtebdoxuU"; |
7 | 425 |
426 static int backslash_trans __ARGS((int c)); | |
167 | 427 static int get_char_class __ARGS((char_u **pp)); |
428 static int get_equi_class __ARGS((char_u **pp)); | |
429 static void reg_equi_class __ARGS((int c)); | |
430 static int get_coll_element __ARGS((char_u **pp)); | |
7 | 431 static char_u *skip_anyof __ARGS((char_u *p)); |
432 static void init_class_tab __ARGS((void)); | |
433 | |
434 /* | |
435 * Translate '\x' to its control character, except "\n", which is Magic. | |
436 */ | |
437 static int | |
438 backslash_trans(c) | |
439 int c; | |
440 { | |
441 switch (c) | |
442 { | |
443 case 'r': return CAR; | |
444 case 't': return TAB; | |
445 case 'e': return ESC; | |
446 case 'b': return BS; | |
447 } | |
448 return c; | |
449 } | |
450 | |
451 /* | |
167 | 452 * Check for a character class name "[:name:]". "pp" points to the '['. |
7 | 453 * Returns one of the CLASS_ items. CLASS_NONE means that no item was |
454 * recognized. Otherwise "pp" is advanced to after the item. | |
455 */ | |
456 static int | |
167 | 457 get_char_class(pp) |
7 | 458 char_u **pp; |
459 { | |
460 static const char *(class_names[]) = | |
461 { | |
462 "alnum:]", | |
463 #define CLASS_ALNUM 0 | |
464 "alpha:]", | |
465 #define CLASS_ALPHA 1 | |
466 "blank:]", | |
467 #define CLASS_BLANK 2 | |
468 "cntrl:]", | |
469 #define CLASS_CNTRL 3 | |
470 "digit:]", | |
471 #define CLASS_DIGIT 4 | |
472 "graph:]", | |
473 #define CLASS_GRAPH 5 | |
474 "lower:]", | |
475 #define CLASS_LOWER 6 | |
476 "print:]", | |
477 #define CLASS_PRINT 7 | |
478 "punct:]", | |
479 #define CLASS_PUNCT 8 | |
480 "space:]", | |
481 #define CLASS_SPACE 9 | |
482 "upper:]", | |
483 #define CLASS_UPPER 10 | |
484 "xdigit:]", | |
485 #define CLASS_XDIGIT 11 | |
486 "tab:]", | |
487 #define CLASS_TAB 12 | |
488 "return:]", | |
489 #define CLASS_RETURN 13 | |
490 "backspace:]", | |
491 #define CLASS_BACKSPACE 14 | |
492 "escape:]", | |
493 #define CLASS_ESCAPE 15 | |
494 }; | |
495 #define CLASS_NONE 99 | |
496 int i; | |
497 | |
498 if ((*pp)[1] == ':') | |
499 { | |
1877 | 500 for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i) |
7 | 501 if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0) |
502 { | |
503 *pp += STRLEN(class_names[i]) + 2; | |
504 return i; | |
505 } | |
506 } | |
507 return CLASS_NONE; | |
508 } | |
509 | |
510 /* | |
511 * Specific version of character class functions. | |
512 * Using a table to keep this fast. | |
513 */ | |
514 static short class_tab[256]; | |
515 | |
516 #define RI_DIGIT 0x01 | |
517 #define RI_HEX 0x02 | |
518 #define RI_OCTAL 0x04 | |
519 #define RI_WORD 0x08 | |
520 #define RI_HEAD 0x10 | |
521 #define RI_ALPHA 0x20 | |
522 #define RI_LOWER 0x40 | |
523 #define RI_UPPER 0x80 | |
524 #define RI_WHITE 0x100 | |
525 | |
526 static void | |
527 init_class_tab() | |
528 { | |
529 int i; | |
530 static int done = FALSE; | |
531 | |
532 if (done) | |
533 return; | |
534 | |
535 for (i = 0; i < 256; ++i) | |
536 { | |
537 if (i >= '0' && i <= '7') | |
538 class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD; | |
539 else if (i >= '8' && i <= '9') | |
540 class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD; | |
541 else if (i >= 'a' && i <= 'f') | |
542 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER; | |
543 #ifdef EBCDIC | |
544 else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r') | |
545 || (i >= 's' && i <= 'z')) | |
546 #else | |
547 else if (i >= 'g' && i <= 'z') | |
548 #endif | |
549 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER; | |
550 else if (i >= 'A' && i <= 'F') | |
551 class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER; | |
552 #ifdef EBCDIC | |
553 else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R') | |
554 || (i >= 'S' && i <= 'Z')) | |
555 #else | |
556 else if (i >= 'G' && i <= 'Z') | |
557 #endif | |
558 class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER; | |
559 else if (i == '_') | |
560 class_tab[i] = RI_WORD + RI_HEAD; | |
561 else | |
562 class_tab[i] = 0; | |
563 } | |
564 class_tab[' '] |= RI_WHITE; | |
565 class_tab['\t'] |= RI_WHITE; | |
566 done = TRUE; | |
567 } | |
568 | |
569 #ifdef FEAT_MBYTE | |
570 # define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) | |
571 # define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX)) | |
572 # define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL)) | |
573 # define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD)) | |
574 # define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD)) | |
575 # define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA)) | |
576 # define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER)) | |
577 # define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER)) | |
578 # define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE)) | |
579 #else | |
580 # define ri_digit(c) (class_tab[c] & RI_DIGIT) | |
581 # define ri_hex(c) (class_tab[c] & RI_HEX) | |
582 # define ri_octal(c) (class_tab[c] & RI_OCTAL) | |
583 # define ri_word(c) (class_tab[c] & RI_WORD) | |
584 # define ri_head(c) (class_tab[c] & RI_HEAD) | |
585 # define ri_alpha(c) (class_tab[c] & RI_ALPHA) | |
586 # define ri_lower(c) (class_tab[c] & RI_LOWER) | |
587 # define ri_upper(c) (class_tab[c] & RI_UPPER) | |
588 # define ri_white(c) (class_tab[c] & RI_WHITE) | |
589 #endif | |
590 | |
591 /* flags for regflags */ | |
592 #define RF_ICASE 1 /* ignore case */ | |
593 #define RF_NOICASE 2 /* don't ignore case */ | |
594 #define RF_HASNL 4 /* can match a NL */ | |
595 #define RF_ICOMBINE 8 /* ignore combining characters */ | |
596 #define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */ | |
597 | |
598 /* | |
599 * Global work variables for vim_regcomp(). | |
600 */ | |
601 | |
602 static char_u *regparse; /* Input-scan pointer. */ | |
603 static int prevchr_len; /* byte length of previous char */ | |
604 static int num_complex_braces; /* Complex \{...} count */ | |
605 static int regnpar; /* () count. */ | |
606 #ifdef FEAT_SYN_HL | |
607 static int regnzpar; /* \z() count. */ | |
608 static int re_has_z; /* \z item detected */ | |
609 #endif | |
610 static char_u *regcode; /* Code-emit pointer, or JUST_CALC_SIZE */ | |
611 static long regsize; /* Code size. */ | |
2010 | 612 static int reg_toolong; /* TRUE when offset out of range */ |
7 | 613 static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */ |
614 static unsigned regflags; /* RF_ flags for prog */ | |
615 static long brace_min[10]; /* Minimums for complex brace repeats */ | |
616 static long brace_max[10]; /* Maximums for complex brace repeats */ | |
617 static int brace_count[10]; /* Current counts for complex brace repeats */ | |
618 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
619 static int had_eol; /* TRUE when EOL found by vim_regcomp() */ | |
620 #endif | |
621 static int one_exactly = FALSE; /* only do one char for EXACTLY */ | |
622 | |
623 static int reg_magic; /* magicness of the pattern: */ | |
624 #define MAGIC_NONE 1 /* "\V" very unmagic */ | |
625 #define MAGIC_OFF 2 /* "\M" or 'magic' off */ | |
626 #define MAGIC_ON 3 /* "\m" or 'magic' */ | |
627 #define MAGIC_ALL 4 /* "\v" very magic */ | |
628 | |
629 static int reg_string; /* matching with a string instead of a buffer | |
630 line */ | |
481 | 631 static int reg_strict; /* "[abc" is illegal */ |
7 | 632 |
633 /* | |
634 * META contains all characters that may be magic, except '^' and '$'. | |
635 */ | |
636 | |
637 #ifdef EBCDIC | |
638 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~"; | |
639 #else | |
640 /* META[] is used often enough to justify turning it into a table. */ | |
641 static char_u META_flags[] = { | |
642 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
644 /* % & ( ) * + . */ | |
645 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, | |
646 /* 1 2 3 4 5 6 7 8 9 < = > ? */ | |
647 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, | |
648 /* @ A C D F H I K L M O */ | |
649 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, | |
650 /* P S U V W X Z [ _ */ | |
651 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, | |
652 /* a c d f h i k l m n o */ | |
653 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, | |
654 /* p s u v w x z { | ~ */ | |
655 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1 | |
656 }; | |
657 #endif | |
658 | |
4444 | 659 static int curchr; /* currently parsed character */ |
660 /* Previous character. Note: prevchr is sometimes -1 when we are not at the | |
661 * start, eg in /[ ^I]^ the pattern was never found even if it existed, | |
662 * because ^ was taken to be magic -- webb */ | |
663 static int prevchr; | |
664 static int prevprevchr; /* previous-previous character */ | |
665 static int nextchr; /* used for ungetchr() */ | |
7 | 666 |
667 /* arguments for reg() */ | |
668 #define REG_NOPAREN 0 /* toplevel reg() */ | |
669 #define REG_PAREN 1 /* \(\) */ | |
670 #define REG_ZPAREN 2 /* \z(\) */ | |
671 #define REG_NPAREN 3 /* \%(\) */ | |
672 | |
4679
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
673 typedef struct |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
674 { |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
675 char_u *regparse; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
676 int prevchr_len; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
677 int curchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
678 int prevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
679 int prevprevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
680 int nextchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
681 int at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
682 int prev_at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
683 int regnpar; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
684 } parse_state_T; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
685 |
7 | 686 /* |
687 * Forward declarations for vim_regcomp()'s friends. | |
688 */ | |
689 static void initchr __ARGS((char_u *)); | |
4679
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
690 static void save_parse_state __ARGS((parse_state_T *ps)); |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
691 static void restore_parse_state __ARGS((parse_state_T *ps)); |
7 | 692 static int getchr __ARGS((void)); |
693 static void skipchr_keepstart __ARGS((void)); | |
694 static int peekchr __ARGS((void)); | |
695 static void skipchr __ARGS((void)); | |
696 static void ungetchr __ARGS((void)); | |
24 | 697 static int gethexchrs __ARGS((int maxinputlen)); |
698 static int getoctchrs __ARGS((void)); | |
699 static int getdecchrs __ARGS((void)); | |
700 static int coll_get_char __ARGS((void)); | |
7 | 701 static void regcomp_start __ARGS((char_u *expr, int flags)); |
702 static char_u *reg __ARGS((int, int *)); | |
703 static char_u *regbranch __ARGS((int *flagp)); | |
704 static char_u *regconcat __ARGS((int *flagp)); | |
705 static char_u *regpiece __ARGS((int *)); | |
706 static char_u *regatom __ARGS((int *)); | |
707 static char_u *regnode __ARGS((int)); | |
714 | 708 #ifdef FEAT_MBYTE |
709 static int use_multibytecode __ARGS((int c)); | |
710 #endif | |
7 | 711 static int prog_magic_wrong __ARGS((void)); |
712 static char_u *regnext __ARGS((char_u *)); | |
713 static void regc __ARGS((int b)); | |
714 #ifdef FEAT_MBYTE | |
715 static void regmbc __ARGS((int c)); | |
2974 | 716 # define REGMBC(x) regmbc(x); |
717 # define CASEMBC(x) case x: | |
167 | 718 #else |
719 # define regmbc(c) regc(c) | |
2974 | 720 # define REGMBC(x) |
721 # define CASEMBC(x) | |
7 | 722 #endif |
723 static void reginsert __ARGS((int, char_u *)); | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
724 static void reginsert_nr __ARGS((int op, long val, char_u *opnd)); |
7 | 725 static void reginsert_limits __ARGS((int, long, long, char_u *)); |
726 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); | |
727 static int read_limits __ARGS((long *, long *)); | |
728 static void regtail __ARGS((char_u *, char_u *)); | |
729 static void regoptail __ARGS((char_u *, char_u *)); | |
730 | |
4444 | 731 static regengine_T bt_regengine; |
732 static regengine_T nfa_regengine; | |
733 | |
7 | 734 /* |
735 * Return TRUE if compiled regular expression "prog" can match a line break. | |
736 */ | |
737 int | |
738 re_multiline(prog) | |
739 regprog_T *prog; | |
740 { | |
741 return (prog->regflags & RF_HASNL); | |
742 } | |
743 | |
744 /* | |
745 * Return TRUE if compiled regular expression "prog" looks before the start | |
746 * position (pattern contains "\@<=" or "\@<!"). | |
747 */ | |
748 int | |
749 re_lookbehind(prog) | |
750 regprog_T *prog; | |
751 { | |
752 return (prog->regflags & RF_LOOKBH); | |
753 } | |
754 | |
755 /* | |
167 | 756 * Check for an equivalence class name "[=a=]". "pp" points to the '['. |
757 * Returns a character representing the class. Zero means that no item was | |
758 * recognized. Otherwise "pp" is advanced to after the item. | |
759 */ | |
760 static int | |
761 get_equi_class(pp) | |
762 char_u **pp; | |
763 { | |
764 int c; | |
765 int l = 1; | |
766 char_u *p = *pp; | |
767 | |
768 if (p[1] == '=') | |
769 { | |
770 #ifdef FEAT_MBYTE | |
771 if (has_mbyte) | |
474 | 772 l = (*mb_ptr2len)(p + 2); |
167 | 773 #endif |
774 if (p[l + 2] == '=' && p[l + 3] == ']') | |
775 { | |
776 #ifdef FEAT_MBYTE | |
777 if (has_mbyte) | |
778 c = mb_ptr2char(p + 2); | |
779 else | |
780 #endif | |
781 c = p[2]; | |
782 *pp += l + 4; | |
783 return c; | |
784 } | |
785 } | |
786 return 0; | |
787 } | |
788 | |
2247
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
789 #ifdef EBCDIC |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
790 /* |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
791 * Table for equivalence class "c". (IBM-1047) |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
792 */ |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
793 char *EQUIVAL_CLASS_C[16] = { |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
794 "A\x62\x63\x64\x65\x66\x67", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
795 "C\x68", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
796 "E\x71\x72\x73\x74", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
797 "I\x75\x76\x77\x78", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
798 "N\x69", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
799 "O\xEB\xEC\xED\xEE\xEF", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
800 "U\xFB\xFC\xFD\xFE", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
801 "Y\xBA", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
802 "a\x42\x43\x44\x45\x46\x47", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
803 "c\x48", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
804 "e\x51\x52\x53\x54", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
805 "i\x55\x56\x57\x58", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
806 "n\x49", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
807 "o\xCB\xCC\xCD\xCE\xCF", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
808 "u\xDB\xDC\xDD\xDE", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
809 "y\x8D\xDF", |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
810 }; |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
811 #endif |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
812 |
167 | 813 /* |
814 * Produce the bytes for equivalence class "c". | |
815 * Currently only handles latin1, latin9 and utf-8. | |
4444 | 816 * NOTE: When changing this function, also change nfa_emit_equi_class() |
167 | 817 */ |
818 static void | |
819 reg_equi_class(c) | |
820 int c; | |
821 { | |
822 #ifdef FEAT_MBYTE | |
823 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 | |
492 | 824 || STRCMP(p_enc, "iso-8859-15") == 0) |
167 | 825 #endif |
826 { | |
2247
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
827 #ifdef EBCDIC |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
828 int i; |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
829 |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
830 /* This might be slower than switch/case below. */ |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
831 for (i = 0; i < 16; i++) |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
832 { |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
833 if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL) |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
834 { |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
835 char *p = EQUIVAL_CLASS_C[i]; |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
836 |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
837 while (*p != 0) |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
838 regmbc(*p++); |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
839 return; |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
840 } |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
841 } |
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
842 #else |
167 | 843 switch (c) |
844 { | |
236 | 845 case 'A': case '\300': case '\301': case '\302': |
2974 | 846 CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd) |
847 CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2) | |
236 | 848 case '\303': case '\304': case '\305': |
849 regmbc('A'); regmbc('\300'); regmbc('\301'); | |
850 regmbc('\302'); regmbc('\303'); regmbc('\304'); | |
851 regmbc('\305'); | |
2974 | 852 REGMBC(0x100) REGMBC(0x102) REGMBC(0x104) |
853 REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0) | |
854 REGMBC(0x1ea2) | |
855 return; | |
856 case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06) | |
857 regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06) | |
167 | 858 return; |
236 | 859 case 'C': case '\307': |
2974 | 860 CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c) |
236 | 861 regmbc('C'); regmbc('\307'); |
2974 | 862 REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a) |
863 REGMBC(0x10c) | |
864 return; | |
865 case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a) | |
866 CASEMBC(0x1e0e) CASEMBC(0x1e10) | |
867 regmbc('D'); REGMBC(0x10e) REGMBC(0x110) | |
868 REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10) | |
167 | 869 return; |
236 | 870 case 'E': case '\310': case '\311': case '\312': case '\313': |
2974 | 871 CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118) |
872 CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc) | |
236 | 873 regmbc('E'); regmbc('\310'); regmbc('\311'); |
874 regmbc('\312'); regmbc('\313'); | |
2974 | 875 REGMBC(0x112) REGMBC(0x114) REGMBC(0x116) |
876 REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba) | |
877 REGMBC(0x1ebc) | |
878 return; | |
879 case 'F': CASEMBC(0x1e1e) | |
880 regmbc('F'); REGMBC(0x1e1e) | |
881 return; | |
882 case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120) | |
883 CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4) | |
884 CASEMBC(0x1e20) | |
885 regmbc('G'); REGMBC(0x11c) REGMBC(0x11e) | |
886 REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4) | |
887 REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20) | |
888 return; | |
889 case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22) | |
890 CASEMBC(0x1e26) CASEMBC(0x1e28) | |
891 regmbc('H'); REGMBC(0x124) REGMBC(0x126) | |
892 REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28) | |
167 | 893 return; |
236 | 894 case 'I': case '\314': case '\315': case '\316': case '\317': |
2974 | 895 CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e) |
896 CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8) | |
236 | 897 regmbc('I'); regmbc('\314'); regmbc('\315'); |
898 regmbc('\316'); regmbc('\317'); | |
2974 | 899 REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c) |
900 REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf) | |
901 REGMBC(0x1ec8) | |
902 return; | |
903 case 'J': CASEMBC(0x134) | |
904 regmbc('J'); REGMBC(0x134) | |
905 return; | |
906 case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30) | |
907 CASEMBC(0x1e34) | |
908 regmbc('K'); REGMBC(0x136) REGMBC(0x1e8) | |
909 REGMBC(0x1e30) REGMBC(0x1e34) | |
910 return; | |
911 case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d) | |
912 CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a) | |
913 regmbc('L'); REGMBC(0x139) REGMBC(0x13b) | |
914 REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141) | |
915 REGMBC(0x1e3a) | |
916 return; | |
917 case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40) | |
918 regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40) | |
167 | 919 return; |
236 | 920 case 'N': case '\321': |
2974 | 921 CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44) |
922 CASEMBC(0x1e48) | |
236 | 923 regmbc('N'); regmbc('\321'); |
2974 | 924 REGMBC(0x143) REGMBC(0x145) REGMBC(0x147) |
925 REGMBC(0x1e44) REGMBC(0x1e48) | |
167 | 926 return; |
236 | 927 case 'O': case '\322': case '\323': case '\324': case '\325': |
2974 | 928 case '\326': case '\330': |
929 CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0) | |
930 CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece) | |
236 | 931 regmbc('O'); regmbc('\322'); regmbc('\323'); |
932 regmbc('\324'); regmbc('\325'); regmbc('\326'); | |
2974 | 933 regmbc('\330'); |
934 REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150) | |
935 REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea) | |
936 REGMBC(0x1ec) REGMBC(0x1ece) | |
937 return; | |
938 case 'P': case 0x1e54: case 0x1e56: | |
939 regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56) | |
940 return; | |
941 case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158) | |
942 CASEMBC(0x1e58) CASEMBC(0x1e5e) | |
943 regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158) | |
944 REGMBC(0x1e58) REGMBC(0x1e5e) | |
945 return; | |
946 case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e) | |
947 CASEMBC(0x160) CASEMBC(0x1e60) | |
948 regmbc('S'); REGMBC(0x15a) REGMBC(0x15c) | |
949 REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60) | |
950 return; | |
951 case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166) | |
952 CASEMBC(0x1e6a) CASEMBC(0x1e6e) | |
953 regmbc('T'); REGMBC(0x162) REGMBC(0x164) | |
954 REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e) | |
167 | 955 return; |
236 | 956 case 'U': case '\331': case '\332': case '\333': case '\334': |
2974 | 957 CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e) |
958 CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3) | |
959 CASEMBC(0x1ee6) | |
236 | 960 regmbc('U'); regmbc('\331'); regmbc('\332'); |
961 regmbc('\333'); regmbc('\334'); | |
2974 | 962 REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c) |
963 REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172) | |
964 REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6) | |
965 return; | |
966 case 'V': CASEMBC(0x1e7c) | |
967 regmbc('V'); REGMBC(0x1e7c) | |
968 return; | |
969 case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82) | |
970 CASEMBC(0x1e84) CASEMBC(0x1e86) | |
971 regmbc('W'); REGMBC(0x174) REGMBC(0x1e80) | |
972 REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86) | |
973 return; | |
974 case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c) | |
975 regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c) | |
167 | 976 return; |
236 | 977 case 'Y': case '\335': |
2974 | 978 CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2) |
979 CASEMBC(0x1ef6) CASEMBC(0x1ef8) | |
236 | 980 regmbc('Y'); regmbc('\335'); |
2974 | 981 REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e) |
982 REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8) | |
983 return; | |
984 case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d) | |
985 CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94) | |
986 regmbc('Z'); REGMBC(0x179) REGMBC(0x17b) | |
987 REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90) | |
988 REGMBC(0x1e94) | |
167 | 989 return; |
236 | 990 case 'a': case '\340': case '\341': case '\342': |
991 case '\343': case '\344': case '\345': | |
2974 | 992 CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce) |
993 CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3) | |
236 | 994 regmbc('a'); regmbc('\340'); regmbc('\341'); |
995 regmbc('\342'); regmbc('\343'); regmbc('\344'); | |
996 regmbc('\345'); | |
2974 | 997 REGMBC(0x101) REGMBC(0x103) REGMBC(0x105) |
998 REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1) | |
999 REGMBC(0x1ea3) | |
1000 return; | |
1001 case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07) | |
1002 regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07) | |
167 | 1003 return; |
236 | 1004 case 'c': case '\347': |
2974 | 1005 CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d) |
236 | 1006 regmbc('c'); regmbc('\347'); |
2974 | 1007 REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b) |
1008 REGMBC(0x10d) | |
1009 return; | |
1010 case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1d0b) | |
1011 CASEMBC(0x1e11) | |
1012 regmbc('d'); REGMBC(0x10f) REGMBC(0x111) | |
1013 REGMBC(0x1e0b) REGMBC(0x01e0f) REGMBC(0x1e11) | |
167 | 1014 return; |
236 | 1015 case 'e': case '\350': case '\351': case '\352': case '\353': |
2974 | 1016 CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119) |
1017 CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd) | |
236 | 1018 regmbc('e'); regmbc('\350'); regmbc('\351'); |
1019 regmbc('\352'); regmbc('\353'); | |
2974 | 1020 REGMBC(0x113) REGMBC(0x115) REGMBC(0x117) |
1021 REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb) | |
1022 REGMBC(0x1ebd) | |
1023 return; | |
1024 case 'f': CASEMBC(0x1e1f) | |
1025 regmbc('f'); REGMBC(0x1e1f) | |
1026 return; | |
1027 case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121) | |
1028 CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5) | |
1029 CASEMBC(0x1e21) | |
1030 regmbc('g'); REGMBC(0x11d) REGMBC(0x11f) | |
1031 REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5) | |
1032 REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21) | |
1033 return; | |
1034 case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23) | |
1035 CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96) | |
1036 regmbc('h'); REGMBC(0x125) REGMBC(0x127) | |
1037 REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29) | |
1038 REGMBC(0x1e96) | |
167 | 1039 return; |
236 | 1040 case 'i': case '\354': case '\355': case '\356': case '\357': |
2974 | 1041 CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f) |
1042 CASEMBC(0x1d0) CASEMBC(0x1ec9) | |
236 | 1043 regmbc('i'); regmbc('\354'); regmbc('\355'); |
1044 regmbc('\356'); regmbc('\357'); | |
2974 | 1045 REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d) |
1046 REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9) | |
1047 return; | |
1048 case 'j': CASEMBC(0x135) CASEMBC(0x1f0) | |
1049 regmbc('j'); REGMBC(0x135) REGMBC(0x1f0) | |
1050 return; | |
1051 case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31) | |
1052 CASEMBC(0x1e35) | |
1053 regmbc('k'); REGMBC(0x137) REGMBC(0x1e9) | |
1054 REGMBC(0x1e31) REGMBC(0x1e35) | |
1055 return; | |
1056 case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e) | |
1057 CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b) | |
1058 regmbc('l'); REGMBC(0x13a) REGMBC(0x13c) | |
1059 REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142) | |
1060 REGMBC(0x1e3b) | |
1061 return; | |
1062 case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41) | |
1063 regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41) | |
167 | 1064 return; |
236 | 1065 case 'n': case '\361': |
2974 | 1066 CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149) |
1067 CASEMBC(0x1e45) CASEMBC(0x1e49) | |
236 | 1068 regmbc('n'); regmbc('\361'); |
2974 | 1069 REGMBC(0x144) REGMBC(0x146) REGMBC(0x148) |
1070 REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49) | |
167 | 1071 return; |
236 | 1072 case 'o': case '\362': case '\363': case '\364': case '\365': |
2974 | 1073 case '\366': case '\370': |
1074 CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1) | |
1075 CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf) | |
236 | 1076 regmbc('o'); regmbc('\362'); regmbc('\363'); |
1077 regmbc('\364'); regmbc('\365'); regmbc('\366'); | |
2974 | 1078 regmbc('\370'); |
1079 REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151) | |
1080 REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb) | |
1081 REGMBC(0x1ed) REGMBC(0x1ecf) | |
1082 return; | |
1083 case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57) | |
1084 regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57) | |
1085 return; | |
1086 case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159) | |
1087 CASEMBC(0x1e59) CASEMBC(0x1e5f) | |
1088 regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159) | |
1089 REGMBC(0x1e59) REGMBC(0x1e5f) | |
1090 return; | |
1091 case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f) | |
1092 CASEMBC(0x161) CASEMBC(0x1e61) | |
1093 regmbc('s'); REGMBC(0x15b) REGMBC(0x15d) | |
1094 REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61) | |
1095 return; | |
1096 case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167) | |
1097 CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97) | |
1098 regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167) | |
1099 REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97) | |
167 | 1100 return; |
236 | 1101 case 'u': case '\371': case '\372': case '\373': case '\374': |
2974 | 1102 CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f) |
1103 CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4) | |
1104 CASEMBC(0x1ee7) | |
236 | 1105 regmbc('u'); regmbc('\371'); regmbc('\372'); |
1106 regmbc('\373'); regmbc('\374'); | |
2974 | 1107 REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d) |
1108 REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173) | |
1109 REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7) | |
1110 return; | |
1111 case 'v': CASEMBC(0x1e7d) | |
1112 regmbc('v'); REGMBC(0x1e7d) | |
1113 return; | |
1114 case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83) | |
1115 CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98) | |
1116 regmbc('w'); REGMBC(0x175) REGMBC(0x1e81) | |
1117 REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87) | |
1118 REGMBC(0x1e98) | |
1119 return; | |
1120 case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d) | |
1121 regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d) | |
167 | 1122 return; |
236 | 1123 case 'y': case '\375': case '\377': |
2974 | 1124 CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99) |
1125 CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9) | |
236 | 1126 regmbc('y'); regmbc('\375'); regmbc('\377'); |
2974 | 1127 REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99) |
1128 REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9) | |
1129 return; | |
1130 case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e) | |
1131 CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95) | |
1132 regmbc('z'); REGMBC(0x17a) REGMBC(0x17c) | |
1133 REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91) | |
1134 REGMBC(0x1e95) | |
167 | 1135 return; |
1136 } | |
2247
c40cd9aad546
Add patch to improve support of z/OS (OS/390). (Ralf Schandl)
Bram Moolenaar <bram@vim.org>
parents:
2173
diff
changeset
|
1137 #endif |
167 | 1138 } |
1139 regmbc(c); | |
1140 } | |
1141 | |
1142 /* | |
1143 * Check for a collating element "[.a.]". "pp" points to the '['. | |
1144 * Returns a character. Zero means that no item was recognized. Otherwise | |
1145 * "pp" is advanced to after the item. | |
1146 * Currently only single characters are recognized! | |
1147 */ | |
1148 static int | |
1149 get_coll_element(pp) | |
1150 char_u **pp; | |
1151 { | |
1152 int c; | |
1153 int l = 1; | |
1154 char_u *p = *pp; | |
1155 | |
1156 if (p[1] == '.') | |
1157 { | |
1158 #ifdef FEAT_MBYTE | |
1159 if (has_mbyte) | |
474 | 1160 l = (*mb_ptr2len)(p + 2); |
167 | 1161 #endif |
1162 if (p[l + 2] == '.' && p[l + 3] == ']') | |
1163 { | |
1164 #ifdef FEAT_MBYTE | |
1165 if (has_mbyte) | |
1166 c = mb_ptr2char(p + 2); | |
1167 else | |
1168 #endif | |
1169 c = p[2]; | |
1170 *pp += l + 4; | |
1171 return c; | |
1172 } | |
1173 } | |
1174 return 0; | |
1175 } | |
1176 | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1177 static void get_cpo_flags __ARGS((void)); |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1178 static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */ |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1179 static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */ |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1180 |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1181 static void |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1182 get_cpo_flags() |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1183 { |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1184 reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1185 reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; |
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1186 } |
167 | 1187 |
1188 /* | |
1189 * Skip over a "[]" range. | |
1190 * "p" must point to the character after the '['. | |
1191 * The returned pointer is on the matching ']', or the terminating NUL. | |
1192 */ | |
1193 static char_u * | |
1194 skip_anyof(p) | |
1195 char_u *p; | |
1196 { | |
1197 #ifdef FEAT_MBYTE | |
1198 int l; | |
1199 #endif | |
1200 | |
1201 if (*p == '^') /* Complement of range. */ | |
1202 ++p; | |
1203 if (*p == ']' || *p == '-') | |
1204 ++p; | |
1205 while (*p != NUL && *p != ']') | |
1206 { | |
1207 #ifdef FEAT_MBYTE | |
474 | 1208 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) |
167 | 1209 p += l; |
1210 else | |
1211 #endif | |
1212 if (*p == '-') | |
1213 { | |
1214 ++p; | |
1215 if (*p != ']' && *p != NUL) | |
1216 mb_ptr_adv(p); | |
1217 } | |
1218 else if (*p == '\\' | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1219 && !reg_cpo_bsl |
167 | 1220 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1221 || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) |
167 | 1222 p += 2; |
1223 else if (*p == '[') | |
1224 { | |
1225 if (get_char_class(&p) == CLASS_NONE | |
1226 && get_equi_class(&p) == 0 | |
1227 && get_coll_element(&p) == 0) | |
1228 ++p; /* It was not a class name */ | |
1229 } | |
1230 else | |
1231 ++p; | |
1232 } | |
1233 | |
1234 return p; | |
1235 } | |
1236 | |
1237 /* | |
7 | 1238 * Skip past regular expression. |
153 | 1239 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc). |
7 | 1240 * Take care of characters with a backslash in front of it. |
1241 * Skip strings inside [ and ]. | |
1242 * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the | |
1243 * expression and change "\?" to "?". If "*newp" is not NULL the expression | |
1244 * is changed in-place. | |
1245 */ | |
1246 char_u * | |
1247 skip_regexp(startp, dirc, magic, newp) | |
1248 char_u *startp; | |
1249 int dirc; | |
1250 int magic; | |
1251 char_u **newp; | |
1252 { | |
1253 int mymagic; | |
1254 char_u *p = startp; | |
1255 | |
1256 if (magic) | |
1257 mymagic = MAGIC_ON; | |
1258 else | |
1259 mymagic = MAGIC_OFF; | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1260 get_cpo_flags(); |
7 | 1261 |
39 | 1262 for (; p[0] != NUL; mb_ptr_adv(p)) |
7 | 1263 { |
1264 if (p[0] == dirc) /* found end of regexp */ | |
1265 break; | |
1266 if ((p[0] == '[' && mymagic >= MAGIC_ON) | |
1267 || (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF)) | |
1268 { | |
1269 p = skip_anyof(p + 1); | |
1270 if (p[0] == NUL) | |
1271 break; | |
1272 } | |
1273 else if (p[0] == '\\' && p[1] != NUL) | |
1274 { | |
1275 if (dirc == '?' && newp != NULL && p[1] == '?') | |
1276 { | |
1277 /* change "\?" to "?", make a copy first. */ | |
1278 if (*newp == NULL) | |
1279 { | |
1280 *newp = vim_strsave(startp); | |
1281 if (*newp != NULL) | |
1282 p = *newp + (p - startp); | |
1283 } | |
1284 if (*newp != NULL) | |
1621 | 1285 STRMOVE(p, p + 1); |
7 | 1286 else |
1287 ++p; | |
1288 } | |
1289 else | |
1290 ++p; /* skip next character */ | |
1291 if (*p == 'v') | |
1292 mymagic = MAGIC_ALL; | |
1293 else if (*p == 'V') | |
1294 mymagic = MAGIC_NONE; | |
1295 } | |
1296 } | |
1297 return p; | |
1298 } | |
1299 | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1300 static regprog_T *bt_regcomp __ARGS((char_u *expr, int re_flags)); |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1301 static void bt_regfree __ARGS((regprog_T *prog)); |
4444 | 1302 |
7 | 1303 /* |
4444 | 1304 * bt_regcomp() - compile a regular expression into internal code for the |
1305 * traditional back track matcher. | |
41 | 1306 * Returns the program in allocated space. Returns NULL for an error. |
7 | 1307 * |
1308 * We can't allocate space until we know how big the compiled form will be, | |
1309 * but we can't compile it (and thus know how big it is) until we've got a | |
1310 * place to put the code. So we cheat: we compile it twice, once with code | |
1311 * generation turned off and size counting turned on, and once "for real". | |
1312 * This also means that we don't allocate space until we are sure that the | |
1313 * thing really will compile successfully, and we never have to move the | |
1314 * code and thus invalidate pointers into it. (Note that it has to be in | |
1315 * one piece because vim_free() must be able to free it all.) | |
1316 * | |
1317 * Whether upper/lower case is to be ignored is decided when executing the | |
1318 * program, it does not matter here. | |
1319 * | |
1320 * Beware that the optimization-preparation code in here knows about some | |
1321 * of the structure of the compiled regexp. | |
1322 * "re_flags": RE_MAGIC and/or RE_STRING. | |
1323 */ | |
4444 | 1324 static regprog_T * |
1325 bt_regcomp(expr, re_flags) | |
7 | 1326 char_u *expr; |
1327 int re_flags; | |
1328 { | |
4444 | 1329 bt_regprog_T *r; |
7 | 1330 char_u *scan; |
1331 char_u *longest; | |
1332 int len; | |
1333 int flags; | |
1334 | |
1335 if (expr == NULL) | |
1336 EMSG_RET_NULL(_(e_null)); | |
1337 | |
1338 init_class_tab(); | |
1339 | |
1340 /* | |
1341 * First pass: determine size, legality. | |
1342 */ | |
1343 regcomp_start(expr, re_flags); | |
1344 regcode = JUST_CALC_SIZE; | |
1345 regc(REGMAGIC); | |
1346 if (reg(REG_NOPAREN, &flags) == NULL) | |
1347 return NULL; | |
1348 | |
1349 /* Small enough for pointer-storage convention? */ | |
1350 #ifdef SMALL_MALLOC /* 16 bit storage allocation */ | |
1351 if (regsize >= 65536L - 256L) | |
1352 EMSG_RET_NULL(_("E339: Pattern too long")); | |
1353 #endif | |
1354 | |
1355 /* Allocate space. */ | |
4444 | 1356 r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE); |
7 | 1357 if (r == NULL) |
1358 return NULL; | |
1359 | |
1360 /* | |
1361 * Second pass: emit code. | |
1362 */ | |
1363 regcomp_start(expr, re_flags); | |
1364 regcode = r->program; | |
1365 regc(REGMAGIC); | |
2010 | 1366 if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong) |
7 | 1367 { |
1368 vim_free(r); | |
2010 | 1369 if (reg_toolong) |
1370 EMSG_RET_NULL(_("E339: Pattern too long")); | |
7 | 1371 return NULL; |
1372 } | |
1373 | |
1374 /* Dig out information for optimizations. */ | |
1375 r->regstart = NUL; /* Worst-case defaults. */ | |
1376 r->reganch = 0; | |
1377 r->regmust = NULL; | |
1378 r->regmlen = 0; | |
1379 r->regflags = regflags; | |
1380 if (flags & HASNL) | |
1381 r->regflags |= RF_HASNL; | |
1382 if (flags & HASLOOKBH) | |
1383 r->regflags |= RF_LOOKBH; | |
1384 #ifdef FEAT_SYN_HL | |
1385 /* Remember whether this pattern has any \z specials in it. */ | |
1386 r->reghasz = re_has_z; | |
1387 #endif | |
1388 scan = r->program + 1; /* First BRANCH. */ | |
1389 if (OP(regnext(scan)) == END) /* Only one top-level choice. */ | |
1390 { | |
1391 scan = OPERAND(scan); | |
1392 | |
1393 /* Starting-point info. */ | |
1394 if (OP(scan) == BOL || OP(scan) == RE_BOF) | |
1395 { | |
1396 r->reganch++; | |
1397 scan = regnext(scan); | |
1398 } | |
1399 | |
1400 if (OP(scan) == EXACTLY) | |
1401 { | |
1402 #ifdef FEAT_MBYTE | |
1403 if (has_mbyte) | |
1404 r->regstart = (*mb_ptr2char)(OPERAND(scan)); | |
1405 else | |
1406 #endif | |
1407 r->regstart = *OPERAND(scan); | |
1408 } | |
1409 else if ((OP(scan) == BOW | |
1410 || OP(scan) == EOW | |
1411 || OP(scan) == NOTHING | |
1412 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN | |
1413 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) | |
1414 && OP(regnext(scan)) == EXACTLY) | |
1415 { | |
1416 #ifdef FEAT_MBYTE | |
1417 if (has_mbyte) | |
1418 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan))); | |
1419 else | |
1420 #endif | |
1421 r->regstart = *OPERAND(regnext(scan)); | |
1422 } | |
1423 | |
1424 /* | |
1425 * If there's something expensive in the r.e., find the longest | |
1426 * literal string that must appear and make it the regmust. Resolve | |
1427 * ties in favor of later strings, since the regstart check works | |
1428 * with the beginning of the r.e. and avoiding duplication | |
1429 * strengthens checking. Not a strong reason, but sufficient in the | |
1430 * absence of others. | |
1431 */ | |
1432 /* | |
1433 * When the r.e. starts with BOW, it is faster to look for a regmust | |
1434 * first. Used a lot for "#" and "*" commands. (Added by mool). | |
1435 */ | |
1436 if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW) | |
1437 && !(flags & HASNL)) | |
1438 { | |
1439 longest = NULL; | |
1440 len = 0; | |
1441 for (; scan != NULL; scan = regnext(scan)) | |
1442 if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len) | |
1443 { | |
1444 longest = OPERAND(scan); | |
1445 len = (int)STRLEN(OPERAND(scan)); | |
1446 } | |
1447 r->regmust = longest; | |
1448 r->regmlen = len; | |
1449 } | |
1450 } | |
4444 | 1451 #ifdef BT_REGEXP_DUMP |
7 | 1452 regdump(expr, r); |
1453 #endif | |
4444 | 1454 r->engine = &bt_regengine; |
1455 return (regprog_T *)r; | |
7 | 1456 } |
1457 | |
1458 /* | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1459 * Free a compiled regexp program, returned by bt_regcomp(). |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1460 */ |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1461 static void |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1462 bt_regfree(prog) |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1463 regprog_T *prog; |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1464 { |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1465 vim_free(prog); |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1466 } |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1467 |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
1468 /* |
7 | 1469 * Setup to parse the regexp. Used once to get the length and once to do it. |
1470 */ | |
1471 static void | |
1472 regcomp_start(expr, re_flags) | |
1473 char_u *expr; | |
1474 int re_flags; /* see vim_regcomp() */ | |
1475 { | |
1476 initchr(expr); | |
1477 if (re_flags & RE_MAGIC) | |
1478 reg_magic = MAGIC_ON; | |
1479 else | |
1480 reg_magic = MAGIC_OFF; | |
1481 reg_string = (re_flags & RE_STRING); | |
481 | 1482 reg_strict = (re_flags & RE_STRICT); |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
1483 get_cpo_flags(); |
7 | 1484 |
1485 num_complex_braces = 0; | |
1486 regnpar = 1; | |
1487 vim_memset(had_endbrace, 0, sizeof(had_endbrace)); | |
1488 #ifdef FEAT_SYN_HL | |
1489 regnzpar = 1; | |
1490 re_has_z = 0; | |
1491 #endif | |
1492 regsize = 0L; | |
2010 | 1493 reg_toolong = FALSE; |
7 | 1494 regflags = 0; |
1495 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
1496 had_eol = FALSE; | |
1497 #endif | |
1498 } | |
1499 | |
1500 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
1501 /* | |
1502 * Check if during the previous call to vim_regcomp the EOL item "$" has been | |
1503 * found. This is messy, but it works fine. | |
1504 */ | |
1505 int | |
1506 vim_regcomp_had_eol() | |
1507 { | |
1508 return had_eol; | |
1509 } | |
1510 #endif | |
1511 | |
1512 /* | |
4444 | 1513 * Parse regular expression, i.e. main body or parenthesized thing. |
7 | 1514 * |
1515 * Caller must absorb opening parenthesis. | |
1516 * | |
1517 * Combining parenthesis handling with the base level of regular expression | |
1518 * is a trifle forced, but the need to tie the tails of the branches to what | |
1519 * follows makes it hard to avoid. | |
1520 */ | |
1521 static char_u * | |
1522 reg(paren, flagp) | |
1523 int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */ | |
1524 int *flagp; | |
1525 { | |
1526 char_u *ret; | |
1527 char_u *br; | |
1528 char_u *ender; | |
1529 int parno = 0; | |
1530 int flags; | |
1531 | |
1532 *flagp = HASWIDTH; /* Tentatively. */ | |
1533 | |
1534 #ifdef FEAT_SYN_HL | |
1535 if (paren == REG_ZPAREN) | |
1536 { | |
1537 /* Make a ZOPEN node. */ | |
1538 if (regnzpar >= NSUBEXP) | |
1539 EMSG_RET_NULL(_("E50: Too many \\z(")); | |
1540 parno = regnzpar; | |
1541 regnzpar++; | |
1542 ret = regnode(ZOPEN + parno); | |
1543 } | |
1544 else | |
1545 #endif | |
1546 if (paren == REG_PAREN) | |
1547 { | |
1548 /* Make a MOPEN node. */ | |
1549 if (regnpar >= NSUBEXP) | |
4444 | 1550 EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL); |
7 | 1551 parno = regnpar; |
1552 ++regnpar; | |
1553 ret = regnode(MOPEN + parno); | |
1554 } | |
1555 else if (paren == REG_NPAREN) | |
1556 { | |
1557 /* Make a NOPEN node. */ | |
1558 ret = regnode(NOPEN); | |
1559 } | |
1560 else | |
1561 ret = NULL; | |
1562 | |
1563 /* Pick up the branches, linking them together. */ | |
1564 br = regbranch(&flags); | |
1565 if (br == NULL) | |
1566 return NULL; | |
1567 if (ret != NULL) | |
1568 regtail(ret, br); /* [MZ]OPEN -> first. */ | |
1569 else | |
1570 ret = br; | |
1571 /* If one of the branches can be zero-width, the whole thing can. | |
1572 * If one of the branches has * at start or matches a line-break, the | |
1573 * whole thing can. */ | |
1574 if (!(flags & HASWIDTH)) | |
1575 *flagp &= ~HASWIDTH; | |
1576 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); | |
1577 while (peekchr() == Magic('|')) | |
1578 { | |
1579 skipchr(); | |
1580 br = regbranch(&flags); | |
2010 | 1581 if (br == NULL || reg_toolong) |
7 | 1582 return NULL; |
1583 regtail(ret, br); /* BRANCH -> BRANCH. */ | |
1584 if (!(flags & HASWIDTH)) | |
1585 *flagp &= ~HASWIDTH; | |
1586 *flagp |= flags & (SPSTART | HASNL | HASLOOKBH); | |
1587 } | |
1588 | |
1589 /* Make a closing node, and hook it on the end. */ | |
1590 ender = regnode( | |
1591 #ifdef FEAT_SYN_HL | |
1592 paren == REG_ZPAREN ? ZCLOSE + parno : | |
1593 #endif | |
1594 paren == REG_PAREN ? MCLOSE + parno : | |
1595 paren == REG_NPAREN ? NCLOSE : END); | |
1596 regtail(ret, ender); | |
1597 | |
1598 /* Hook the tails of the branches to the closing node. */ | |
1599 for (br = ret; br != NULL; br = regnext(br)) | |
1600 regoptail(br, ender); | |
1601 | |
1602 /* Check for proper termination. */ | |
1603 if (paren != REG_NOPAREN && getchr() != Magic(')')) | |
1604 { | |
1605 #ifdef FEAT_SYN_HL | |
1606 if (paren == REG_ZPAREN) | |
308 | 1607 EMSG_RET_NULL(_("E52: Unmatched \\z(")); |
7 | 1608 else |
1609 #endif | |
1610 if (paren == REG_NPAREN) | |
4444 | 1611 EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL); |
7 | 1612 else |
4444 | 1613 EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL); |
7 | 1614 } |
1615 else if (paren == REG_NOPAREN && peekchr() != NUL) | |
1616 { | |
1617 if (curchr == Magic(')')) | |
4444 | 1618 EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL); |
7 | 1619 else |
308 | 1620 EMSG_RET_NULL(_(e_trailing)); /* "Can't happen". */ |
7 | 1621 /* NOTREACHED */ |
1622 } | |
1623 /* | |
1624 * Here we set the flag allowing back references to this set of | |
1625 * parentheses. | |
1626 */ | |
1627 if (paren == REG_PAREN) | |
1628 had_endbrace[parno] = TRUE; /* have seen the close paren */ | |
1629 return ret; | |
1630 } | |
1631 | |
1632 /* | |
4444 | 1633 * Parse one alternative of an | operator. |
7 | 1634 * Implements the & operator. |
1635 */ | |
1636 static char_u * | |
1637 regbranch(flagp) | |
1638 int *flagp; | |
1639 { | |
1640 char_u *ret; | |
1641 char_u *chain = NULL; | |
1642 char_u *latest; | |
1643 int flags; | |
1644 | |
1645 *flagp = WORST | HASNL; /* Tentatively. */ | |
1646 | |
1647 ret = regnode(BRANCH); | |
1648 for (;;) | |
1649 { | |
1650 latest = regconcat(&flags); | |
1651 if (latest == NULL) | |
1652 return NULL; | |
1653 /* If one of the branches has width, the whole thing has. If one of | |
1654 * the branches anchors at start-of-line, the whole thing does. | |
1655 * If one of the branches uses look-behind, the whole thing does. */ | |
1656 *flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH); | |
1657 /* If one of the branches doesn't match a line-break, the whole thing | |
1658 * doesn't. */ | |
1659 *flagp &= ~HASNL | (flags & HASNL); | |
1660 if (chain != NULL) | |
1661 regtail(chain, latest); | |
1662 if (peekchr() != Magic('&')) | |
1663 break; | |
1664 skipchr(); | |
1665 regtail(latest, regnode(END)); /* operand ends */ | |
2010 | 1666 if (reg_toolong) |
1667 break; | |
7 | 1668 reginsert(MATCH, latest); |
1669 chain = latest; | |
1670 } | |
1671 | |
1672 return ret; | |
1673 } | |
1674 | |
1675 /* | |
4444 | 1676 * Parse one alternative of an | or & operator. |
7 | 1677 * Implements the concatenation operator. |
1678 */ | |
1679 static char_u * | |
1680 regconcat(flagp) | |
1681 int *flagp; | |
1682 { | |
1683 char_u *first = NULL; | |
1684 char_u *chain = NULL; | |
1685 char_u *latest; | |
1686 int flags; | |
1687 int cont = TRUE; | |
1688 | |
1689 *flagp = WORST; /* Tentatively. */ | |
1690 | |
1691 while (cont) | |
1692 { | |
1693 switch (peekchr()) | |
1694 { | |
1695 case NUL: | |
1696 case Magic('|'): | |
1697 case Magic('&'): | |
1698 case Magic(')'): | |
1699 cont = FALSE; | |
1700 break; | |
1701 case Magic('Z'): | |
1702 #ifdef FEAT_MBYTE | |
1703 regflags |= RF_ICOMBINE; | |
1704 #endif | |
1705 skipchr_keepstart(); | |
1706 break; | |
1707 case Magic('c'): | |
1708 regflags |= RF_ICASE; | |
1709 skipchr_keepstart(); | |
1710 break; | |
1711 case Magic('C'): | |
1712 regflags |= RF_NOICASE; | |
1713 skipchr_keepstart(); | |
1714 break; | |
1715 case Magic('v'): | |
1716 reg_magic = MAGIC_ALL; | |
1717 skipchr_keepstart(); | |
1718 curchr = -1; | |
1719 break; | |
1720 case Magic('m'): | |
1721 reg_magic = MAGIC_ON; | |
1722 skipchr_keepstart(); | |
1723 curchr = -1; | |
1724 break; | |
1725 case Magic('M'): | |
1726 reg_magic = MAGIC_OFF; | |
1727 skipchr_keepstart(); | |
1728 curchr = -1; | |
1729 break; | |
1730 case Magic('V'): | |
1731 reg_magic = MAGIC_NONE; | |
1732 skipchr_keepstart(); | |
1733 curchr = -1; | |
1734 break; | |
1735 default: | |
1736 latest = regpiece(&flags); | |
2010 | 1737 if (latest == NULL || reg_toolong) |
7 | 1738 return NULL; |
1739 *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH); | |
1740 if (chain == NULL) /* First piece. */ | |
1741 *flagp |= flags & SPSTART; | |
1742 else | |
1743 regtail(chain, latest); | |
1744 chain = latest; | |
1745 if (first == NULL) | |
1746 first = latest; | |
1747 break; | |
1748 } | |
1749 } | |
1750 if (first == NULL) /* Loop ran zero times. */ | |
1751 first = regnode(NOTHING); | |
1752 return first; | |
1753 } | |
1754 | |
1755 /* | |
4444 | 1756 * Parse something followed by possible [*+=]. |
7 | 1757 * |
1758 * Note that the branching code sequences used for = and the general cases | |
1759 * of * and + are somewhat optimized: they use the same NOTHING node as | |
1760 * both the endmarker for their branch list and the body of the last branch. | |
1761 * It might seem that this node could be dispensed with entirely, but the | |
1762 * endmarker role is not redundant. | |
1763 */ | |
1764 static char_u * | |
1765 regpiece(flagp) | |
1766 int *flagp; | |
1767 { | |
1768 char_u *ret; | |
1769 int op; | |
1770 char_u *next; | |
1771 int flags; | |
1772 long minval; | |
1773 long maxval; | |
1774 | |
1775 ret = regatom(&flags); | |
1776 if (ret == NULL) | |
1777 return NULL; | |
1778 | |
1779 op = peekchr(); | |
1780 if (re_multi_type(op) == NOT_MULTI) | |
1781 { | |
1782 *flagp = flags; | |
1783 return ret; | |
1784 } | |
1785 /* default flags */ | |
1786 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); | |
1787 | |
1788 skipchr(); | |
1789 switch (op) | |
1790 { | |
1791 case Magic('*'): | |
1792 if (flags & SIMPLE) | |
1793 reginsert(STAR, ret); | |
1794 else | |
1795 { | |
1796 /* Emit x* as (x&|), where & means "self". */ | |
1797 reginsert(BRANCH, ret); /* Either x */ | |
1798 regoptail(ret, regnode(BACK)); /* and loop */ | |
1799 regoptail(ret, ret); /* back */ | |
1800 regtail(ret, regnode(BRANCH)); /* or */ | |
1801 regtail(ret, regnode(NOTHING)); /* null. */ | |
1802 } | |
1803 break; | |
1804 | |
1805 case Magic('+'): | |
1806 if (flags & SIMPLE) | |
1807 reginsert(PLUS, ret); | |
1808 else | |
1809 { | |
1810 /* Emit x+ as x(&|), where & means "self". */ | |
1811 next = regnode(BRANCH); /* Either */ | |
1812 regtail(ret, next); | |
233 | 1813 regtail(regnode(BACK), ret); /* loop back */ |
7 | 1814 regtail(next, regnode(BRANCH)); /* or */ |
1815 regtail(ret, regnode(NOTHING)); /* null. */ | |
1816 } | |
1817 *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH))); | |
1818 break; | |
1819 | |
1820 case Magic('@'): | |
1821 { | |
1822 int lop = END; | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1823 int nr; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1824 |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1825 nr = getdecchrs(); |
7 | 1826 switch (no_Magic(getchr())) |
1827 { | |
1828 case '=': lop = MATCH; break; /* \@= */ | |
1829 case '!': lop = NOMATCH; break; /* \@! */ | |
1830 case '>': lop = SUBPAT; break; /* \@> */ | |
1831 case '<': switch (no_Magic(getchr())) | |
1832 { | |
1833 case '=': lop = BEHIND; break; /* \@<= */ | |
1834 case '!': lop = NOBEHIND; break; /* \@<! */ | |
1835 } | |
1836 } | |
1837 if (lop == END) | |
4444 | 1838 EMSG2_RET_NULL(_("E59: invalid character after %s@"), |
7 | 1839 reg_magic == MAGIC_ALL); |
1840 /* Look behind must match with behind_pos. */ | |
1841 if (lop == BEHIND || lop == NOBEHIND) | |
1842 { | |
1843 regtail(ret, regnode(BHPOS)); | |
1844 *flagp |= HASLOOKBH; | |
1845 } | |
1846 regtail(ret, regnode(END)); /* operand ends */ | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1847 if (lop == BEHIND || lop == NOBEHIND) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1848 { |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1849 if (nr < 0) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1850 nr = 0; /* no limit is same as zero limit */ |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1851 reginsert_nr(lop, nr, ret); |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1852 } |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1853 else |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
1854 reginsert(lop, ret); |
7 | 1855 break; |
1856 } | |
1857 | |
1858 case Magic('?'): | |
1859 case Magic('='): | |
1860 /* Emit x= as (x|) */ | |
1861 reginsert(BRANCH, ret); /* Either x */ | |
1862 regtail(ret, regnode(BRANCH)); /* or */ | |
1863 next = regnode(NOTHING); /* null. */ | |
1864 regtail(ret, next); | |
1865 regoptail(ret, next); | |
1866 break; | |
1867 | |
1868 case Magic('{'): | |
1869 if (!read_limits(&minval, &maxval)) | |
1870 return NULL; | |
1871 if (flags & SIMPLE) | |
1872 { | |
1873 reginsert(BRACE_SIMPLE, ret); | |
1874 reginsert_limits(BRACE_LIMITS, minval, maxval, ret); | |
1875 } | |
1876 else | |
1877 { | |
1878 if (num_complex_braces >= 10) | |
4444 | 1879 EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"), |
7 | 1880 reg_magic == MAGIC_ALL); |
1881 reginsert(BRACE_COMPLEX + num_complex_braces, ret); | |
1882 regoptail(ret, regnode(BACK)); | |
1883 regoptail(ret, ret); | |
1884 reginsert_limits(BRACE_LIMITS, minval, maxval, ret); | |
1885 ++num_complex_braces; | |
1886 } | |
1887 if (minval > 0 && maxval > 0) | |
1888 *flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH))); | |
1889 break; | |
1890 } | |
1891 if (re_multi_type(peekchr()) != NOT_MULTI) | |
1892 { | |
1893 /* Can't have a multi follow a multi. */ | |
1894 if (peekchr() == Magic('*')) | |
1895 sprintf((char *)IObuff, _("E61: Nested %s*"), | |
1896 reg_magic >= MAGIC_ON ? "" : "\\"); | |
1897 else | |
1898 sprintf((char *)IObuff, _("E62: Nested %s%c"), | |
1899 reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr())); | |
1900 EMSG_RET_NULL(IObuff); | |
1901 } | |
1902 | |
1903 return ret; | |
1904 } | |
1905 | |
4444 | 1906 /* When making changes to classchars also change nfa_classcodes. */ |
1907 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; | |
1908 static int classcodes[] = { | |
1909 ANY, IDENT, SIDENT, KWORD, SKWORD, | |
1910 FNAME, SFNAME, PRINT, SPRINT, | |
1911 WHITE, NWHITE, DIGIT, NDIGIT, | |
1912 HEX, NHEX, OCTAL, NOCTAL, | |
1913 WORD, NWORD, HEAD, NHEAD, | |
1914 ALPHA, NALPHA, LOWER, NLOWER, | |
1915 UPPER, NUPPER | |
1916 }; | |
1917 | |
7 | 1918 /* |
4444 | 1919 * Parse the lowest level. |
7 | 1920 * |
1921 * Optimization: gobbles an entire sequence of ordinary characters so that | |
1922 * it can turn them into a single node, which is smaller to store and | |
1923 * faster to run. Don't do this when one_exactly is set. | |
1924 */ | |
1925 static char_u * | |
1926 regatom(flagp) | |
1927 int *flagp; | |
1928 { | |
1929 char_u *ret; | |
1930 int flags; | |
1931 int c; | |
1932 char_u *p; | |
1933 int extra = 0; | |
1934 | |
1935 *flagp = WORST; /* Tentatively. */ | |
1936 | |
1937 c = getchr(); | |
1938 switch (c) | |
1939 { | |
1940 case Magic('^'): | |
1941 ret = regnode(BOL); | |
1942 break; | |
1943 | |
1944 case Magic('$'): | |
1945 ret = regnode(EOL); | |
1946 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
1947 had_eol = TRUE; | |
1948 #endif | |
1949 break; | |
1950 | |
1951 case Magic('<'): | |
1952 ret = regnode(BOW); | |
1953 break; | |
1954 | |
1955 case Magic('>'): | |
1956 ret = regnode(EOW); | |
1957 break; | |
1958 | |
1959 case Magic('_'): | |
1960 c = no_Magic(getchr()); | |
1961 if (c == '^') /* "\_^" is start-of-line */ | |
1962 { | |
1963 ret = regnode(BOL); | |
1964 break; | |
1965 } | |
1966 if (c == '$') /* "\_$" is end-of-line */ | |
1967 { | |
1968 ret = regnode(EOL); | |
1969 #if defined(FEAT_SYN_HL) || defined(PROTO) | |
1970 had_eol = TRUE; | |
1971 #endif | |
1972 break; | |
1973 } | |
1974 | |
1975 extra = ADD_NL; | |
1976 *flagp |= HASNL; | |
1977 | |
1978 /* "\_[" is character range plus newline */ | |
1979 if (c == '[') | |
1980 goto collection; | |
1981 | |
1982 /* "\_x" is character class plus newline */ | |
1983 /*FALLTHROUGH*/ | |
1984 | |
1985 /* | |
1986 * Character classes. | |
1987 */ | |
1988 case Magic('.'): | |
1989 case Magic('i'): | |
1990 case Magic('I'): | |
1991 case Magic('k'): | |
1992 case Magic('K'): | |
1993 case Magic('f'): | |
1994 case Magic('F'): | |
1995 case Magic('p'): | |
1996 case Magic('P'): | |
1997 case Magic('s'): | |
1998 case Magic('S'): | |
1999 case Magic('d'): | |
2000 case Magic('D'): | |
2001 case Magic('x'): | |
2002 case Magic('X'): | |
2003 case Magic('o'): | |
2004 case Magic('O'): | |
2005 case Magic('w'): | |
2006 case Magic('W'): | |
2007 case Magic('h'): | |
2008 case Magic('H'): | |
2009 case Magic('a'): | |
2010 case Magic('A'): | |
2011 case Magic('l'): | |
2012 case Magic('L'): | |
2013 case Magic('u'): | |
2014 case Magic('U'): | |
2015 p = vim_strchr(classchars, no_Magic(c)); | |
2016 if (p == NULL) | |
2017 EMSG_RET_NULL(_("E63: invalid use of \\_")); | |
714 | 2018 #ifdef FEAT_MBYTE |
2019 /* When '.' is followed by a composing char ignore the dot, so that | |
2020 * the composing char is matched here. */ | |
2021 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) | |
2022 { | |
2023 c = getchr(); | |
2024 goto do_multibyte; | |
2025 } | |
2026 #endif | |
7 | 2027 ret = regnode(classcodes[p - classchars] + extra); |
2028 *flagp |= HASWIDTH | SIMPLE; | |
2029 break; | |
2030 | |
2031 case Magic('n'): | |
2032 if (reg_string) | |
2033 { | |
2034 /* In a string "\n" matches a newline character. */ | |
2035 ret = regnode(EXACTLY); | |
2036 regc(NL); | |
2037 regc(NUL); | |
2038 *flagp |= HASWIDTH | SIMPLE; | |
2039 } | |
2040 else | |
2041 { | |
2042 /* In buffer text "\n" matches the end of a line. */ | |
2043 ret = regnode(NEWL); | |
2044 *flagp |= HASWIDTH | HASNL; | |
2045 } | |
2046 break; | |
2047 | |
2048 case Magic('('): | |
2049 if (one_exactly) | |
2050 EMSG_ONE_RET_NULL; | |
2051 ret = reg(REG_PAREN, &flags); | |
2052 if (ret == NULL) | |
2053 return NULL; | |
2054 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); | |
2055 break; | |
2056 | |
2057 case NUL: | |
2058 case Magic('|'): | |
2059 case Magic('&'): | |
2060 case Magic(')'): | |
1468 | 2061 if (one_exactly) |
2062 EMSG_ONE_RET_NULL; | |
7 | 2063 EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */ |
2064 /* NOTREACHED */ | |
2065 | |
2066 case Magic('='): | |
2067 case Magic('?'): | |
2068 case Magic('+'): | |
2069 case Magic('@'): | |
2070 case Magic('{'): | |
2071 case Magic('*'): | |
2072 c = no_Magic(c); | |
2073 sprintf((char *)IObuff, _("E64: %s%c follows nothing"), | |
2074 (c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL) | |
2075 ? "" : "\\", c); | |
2076 EMSG_RET_NULL(IObuff); | |
2077 /* NOTREACHED */ | |
2078 | |
2079 case Magic('~'): /* previous substitute pattern */ | |
359 | 2080 if (reg_prev_sub != NULL) |
7 | 2081 { |
2082 char_u *lp; | |
2083 | |
2084 ret = regnode(EXACTLY); | |
2085 lp = reg_prev_sub; | |
2086 while (*lp != NUL) | |
2087 regc(*lp++); | |
2088 regc(NUL); | |
2089 if (*reg_prev_sub != NUL) | |
2090 { | |
2091 *flagp |= HASWIDTH; | |
2092 if ((lp - reg_prev_sub) == 1) | |
2093 *flagp |= SIMPLE; | |
2094 } | |
2095 } | |
2096 else | |
2097 EMSG_RET_NULL(_(e_nopresub)); | |
2098 break; | |
2099 | |
2100 case Magic('1'): | |
2101 case Magic('2'): | |
2102 case Magic('3'): | |
2103 case Magic('4'): | |
2104 case Magic('5'): | |
2105 case Magic('6'): | |
2106 case Magic('7'): | |
2107 case Magic('8'): | |
2108 case Magic('9'): | |
2109 { | |
2110 int refnum; | |
2111 | |
2112 refnum = c - Magic('0'); | |
2113 /* | |
2114 * Check if the back reference is legal. We must have seen the | |
2115 * close brace. | |
2116 * TODO: Should also check that we don't refer to something | |
2117 * that is repeated (+*=): what instance of the repetition | |
2118 * should we match? | |
2119 */ | |
2120 if (!had_endbrace[refnum]) | |
2121 { | |
2122 /* Trick: check if "@<=" or "@<!" follows, in which case | |
2123 * the \1 can appear before the referenced match. */ | |
2124 for (p = regparse; *p != NUL; ++p) | |
2125 if (p[0] == '@' && p[1] == '<' | |
2126 && (p[2] == '!' || p[2] == '=')) | |
2127 break; | |
2128 if (*p == NUL) | |
2129 EMSG_RET_NULL(_("E65: Illegal back reference")); | |
2130 } | |
2131 ret = regnode(BACKREF + refnum); | |
2132 } | |
2133 break; | |
2134 | |
2135 case Magic('z'): | |
2136 { | |
2137 c = no_Magic(getchr()); | |
2138 switch (c) | |
2139 { | |
741 | 2140 #ifdef FEAT_SYN_HL |
7 | 2141 case '(': if (reg_do_extmatch != REX_SET) |
4688
371cc0c44097
updated for version 7.3.1091
Bram Moolenaar <bram@vim.org>
parents:
4682
diff
changeset
|
2142 EMSG_RET_NULL(_(e_z_not_allowed)); |
7 | 2143 if (one_exactly) |
2144 EMSG_ONE_RET_NULL; | |
2145 ret = reg(REG_ZPAREN, &flags); | |
2146 if (ret == NULL) | |
2147 return NULL; | |
2148 *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH); | |
2149 re_has_z = REX_SET; | |
2150 break; | |
2151 | |
2152 case '1': | |
2153 case '2': | |
2154 case '3': | |
2155 case '4': | |
2156 case '5': | |
2157 case '6': | |
2158 case '7': | |
2159 case '8': | |
2160 case '9': if (reg_do_extmatch != REX_USE) | |
4688
371cc0c44097
updated for version 7.3.1091
Bram Moolenaar <bram@vim.org>
parents:
4682
diff
changeset
|
2161 EMSG_RET_NULL(_(e_z1_not_allowed)); |
7 | 2162 ret = regnode(ZREF + c - '0'); |
2163 re_has_z = REX_USE; | |
2164 break; | |
741 | 2165 #endif |
7 | 2166 |
2167 case 's': ret = regnode(MOPEN + 0); | |
2168 break; | |
2169 | |
2170 case 'e': ret = regnode(MCLOSE + 0); | |
2171 break; | |
2172 | |
2173 default: EMSG_RET_NULL(_("E68: Invalid character after \\z")); | |
2174 } | |
2175 } | |
2176 break; | |
2177 | |
2178 case Magic('%'): | |
2179 { | |
2180 c = no_Magic(getchr()); | |
2181 switch (c) | |
2182 { | |
2183 /* () without a back reference */ | |
2184 case '(': | |
2185 if (one_exactly) | |
2186 EMSG_ONE_RET_NULL; | |
2187 ret = reg(REG_NPAREN, &flags); | |
2188 if (ret == NULL) | |
2189 return NULL; | |
2190 *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH); | |
2191 break; | |
2192 | |
2193 /* Catch \%^ and \%$ regardless of where they appear in the | |
2194 * pattern -- regardless of whether or not it makes sense. */ | |
2195 case '^': | |
2196 ret = regnode(RE_BOF); | |
2197 break; | |
2198 | |
2199 case '$': | |
2200 ret = regnode(RE_EOF); | |
2201 break; | |
2202 | |
2203 case '#': | |
2204 ret = regnode(CURSOR); | |
2205 break; | |
2206 | |
639 | 2207 case 'V': |
2208 ret = regnode(RE_VISUAL); | |
2209 break; | |
2210 | |
7 | 2211 /* \%[abc]: Emit as a list of branches, all ending at the last |
2212 * branch which matches nothing. */ | |
2213 case '[': | |
2214 if (one_exactly) /* doesn't nest */ | |
2215 EMSG_ONE_RET_NULL; | |
2216 { | |
2217 char_u *lastbranch; | |
2218 char_u *lastnode = NULL; | |
2219 char_u *br; | |
2220 | |
2221 ret = NULL; | |
2222 while ((c = getchr()) != ']') | |
2223 { | |
2224 if (c == NUL) | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
2225 EMSG2_RET_NULL(_(e_missing_sb), |
7 | 2226 reg_magic == MAGIC_ALL); |
2227 br = regnode(BRANCH); | |
2228 if (ret == NULL) | |
2229 ret = br; | |
2230 else | |
2231 regtail(lastnode, br); | |
2232 | |
2233 ungetchr(); | |
2234 one_exactly = TRUE; | |
2235 lastnode = regatom(flagp); | |
2236 one_exactly = FALSE; | |
2237 if (lastnode == NULL) | |
2238 return NULL; | |
2239 } | |
2240 if (ret == NULL) | |
4760
532a9855bd30
updated for version 7.3.1127
Bram Moolenaar <bram@vim.org>
parents:
4746
diff
changeset
|
2241 EMSG2_RET_NULL(_(e_empty_sb), |
7 | 2242 reg_magic == MAGIC_ALL); |
2243 lastbranch = regnode(BRANCH); | |
2244 br = regnode(NOTHING); | |
2245 if (ret != JUST_CALC_SIZE) | |
2246 { | |
2247 regtail(lastnode, br); | |
2248 regtail(lastbranch, br); | |
2249 /* connect all branches to the NOTHING | |
2250 * branch at the end */ | |
2251 for (br = ret; br != lastnode; ) | |
2252 { | |
2253 if (OP(br) == BRANCH) | |
2254 { | |
2255 regtail(br, lastbranch); | |
2256 br = OPERAND(br); | |
2257 } | |
2258 else | |
2259 br = regnext(br); | |
2260 } | |
2261 } | |
1701 | 2262 *flagp &= ~(HASWIDTH | SIMPLE); |
7 | 2263 break; |
2264 } | |
2265 | |
24 | 2266 case 'd': /* %d123 decimal */ |
2267 case 'o': /* %o123 octal */ | |
2268 case 'x': /* %xab hex 2 */ | |
2269 case 'u': /* %uabcd hex 4 */ | |
2270 case 'U': /* %U1234abcd hex 8 */ | |
2271 { | |
2272 int i; | |
2273 | |
2274 switch (c) | |
2275 { | |
2276 case 'd': i = getdecchrs(); break; | |
2277 case 'o': i = getoctchrs(); break; | |
2278 case 'x': i = gethexchrs(2); break; | |
2279 case 'u': i = gethexchrs(4); break; | |
2280 case 'U': i = gethexchrs(8); break; | |
2281 default: i = -1; break; | |
2282 } | |
2283 | |
2284 if (i < 0) | |
4444 | 2285 EMSG2_RET_NULL( |
24 | 2286 _("E678: Invalid character after %s%%[dxouU]"), |
2287 reg_magic == MAGIC_ALL); | |
714 | 2288 #ifdef FEAT_MBYTE |
2289 if (use_multibytecode(i)) | |
2290 ret = regnode(MULTIBYTECODE); | |
2291 else | |
2292 #endif | |
2293 ret = regnode(EXACTLY); | |
24 | 2294 if (i == 0) |
2295 regc(0x0a); | |
2296 else | |
2297 #ifdef FEAT_MBYTE | |
2298 regmbc(i); | |
2299 #else | |
2300 regc(i); | |
2301 #endif | |
2302 regc(NUL); | |
2303 *flagp |= HASWIDTH; | |
2304 break; | |
2305 } | |
2306 | |
7 | 2307 default: |
639 | 2308 if (VIM_ISDIGIT(c) || c == '<' || c == '>' |
2309 || c == '\'') | |
7 | 2310 { |
2311 long_u n = 0; | |
2312 int cmp; | |
2313 | |
2314 cmp = c; | |
2315 if (cmp == '<' || cmp == '>') | |
2316 c = getchr(); | |
2317 while (VIM_ISDIGIT(c)) | |
2318 { | |
2319 n = n * 10 + (c - '0'); | |
2320 c = getchr(); | |
2321 } | |
639 | 2322 if (c == '\'' && n == 0) |
2323 { | |
2324 /* "\%'m", "\%<'m" and "\%>'m": Mark */ | |
2325 c = getchr(); | |
2326 ret = regnode(RE_MARK); | |
2327 if (ret == JUST_CALC_SIZE) | |
2328 regsize += 2; | |
2329 else | |
2330 { | |
2331 *regcode++ = c; | |
2332 *regcode++ = cmp; | |
2333 } | |
2334 break; | |
2335 } | |
2336 else if (c == 'l' || c == 'c' || c == 'v') | |
7 | 2337 { |
2338 if (c == 'l') | |
2339 ret = regnode(RE_LNUM); | |
2340 else if (c == 'c') | |
2341 ret = regnode(RE_COL); | |
2342 else | |
2343 ret = regnode(RE_VCOL); | |
2344 if (ret == JUST_CALC_SIZE) | |
2345 regsize += 5; | |
2346 else | |
2347 { | |
2348 /* put the number and the optional | |
2349 * comparator after the opcode */ | |
2350 regcode = re_put_long(regcode, n); | |
2351 *regcode++ = cmp; | |
2352 } | |
2353 break; | |
2354 } | |
2355 } | |
2356 | |
4444 | 2357 EMSG2_RET_NULL(_("E71: Invalid character after %s%%"), |
7 | 2358 reg_magic == MAGIC_ALL); |
2359 } | |
2360 } | |
2361 break; | |
2362 | |
2363 case Magic('['): | |
2364 collection: | |
2365 { | |
2366 char_u *lp; | |
2367 | |
2368 /* | |
2369 * If there is no matching ']', we assume the '[' is a normal | |
2370 * character. This makes 'incsearch' and ":help [" work. | |
2371 */ | |
2372 lp = skip_anyof(regparse); | |
2373 if (*lp == ']') /* there is a matching ']' */ | |
2374 { | |
2375 int startc = -1; /* > 0 when next '-' is a range */ | |
2376 int endc; | |
2377 | |
2378 /* | |
2379 * In a character class, different parsing rules apply. | |
2380 * Not even \ is special anymore, nothing is. | |
2381 */ | |
2382 if (*regparse == '^') /* Complement of range. */ | |
2383 { | |
2384 ret = regnode(ANYBUT + extra); | |
2385 regparse++; | |
2386 } | |
2387 else | |
2388 ret = regnode(ANYOF + extra); | |
2389 | |
2390 /* At the start ']' and '-' mean the literal character. */ | |
2391 if (*regparse == ']' || *regparse == '-') | |
167 | 2392 { |
2393 startc = *regparse; | |
7 | 2394 regc(*regparse++); |
167 | 2395 } |
7 | 2396 |
2397 while (*regparse != NUL && *regparse != ']') | |
2398 { | |
2399 if (*regparse == '-') | |
2400 { | |
2401 ++regparse; | |
2402 /* The '-' is not used for a range at the end and | |
2403 * after or before a '\n'. */ | |
2404 if (*regparse == ']' || *regparse == NUL | |
2405 || startc == -1 | |
2406 || (regparse[0] == '\\' && regparse[1] == 'n')) | |
2407 { | |
2408 regc('-'); | |
2409 startc = '-'; /* [--x] is a range */ | |
2410 } | |
2411 else | |
2412 { | |
167 | 2413 /* Also accept "a-[.z.]" */ |
2414 endc = 0; | |
2415 if (*regparse == '[') | |
2416 endc = get_coll_element(®parse); | |
2417 if (endc == 0) | |
2418 { | |
7 | 2419 #ifdef FEAT_MBYTE |
167 | 2420 if (has_mbyte) |
2421 endc = mb_ptr2char_adv(®parse); | |
2422 else | |
7 | 2423 #endif |
167 | 2424 endc = *regparse++; |
2425 } | |
24 | 2426 |
2427 /* Handle \o40, \x20 and \u20AC style sequences */ | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
2428 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl) |
24 | 2429 endc = coll_get_char(); |
2430 | |
7 | 2431 if (startc > endc) |
2432 EMSG_RET_NULL(_(e_invrange)); | |
2433 #ifdef FEAT_MBYTE | |
2434 if (has_mbyte && ((*mb_char2len)(startc) > 1 | |
2435 || (*mb_char2len)(endc) > 1)) | |
2436 { | |
2437 /* Limit to a range of 256 chars */ | |
2438 if (endc > startc + 256) | |
2439 EMSG_RET_NULL(_(e_invrange)); | |
2440 while (++startc <= endc) | |
2441 regmbc(startc); | |
2442 } | |
2443 else | |
2444 #endif | |
2445 { | |
2446 #ifdef EBCDIC | |
2447 int alpha_only = FALSE; | |
2448 | |
2449 /* for alphabetical range skip the gaps | |
2450 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */ | |
2451 if (isalpha(startc) && isalpha(endc)) | |
2452 alpha_only = TRUE; | |
2453 #endif | |
2454 while (++startc <= endc) | |
2455 #ifdef EBCDIC | |
2456 if (!alpha_only || isalpha(startc)) | |
2457 #endif | |
2458 regc(startc); | |
2459 } | |
2460 startc = -1; | |
2461 } | |
2462 } | |
2463 /* | |
2464 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim | |
2465 * accepts "\t", "\e", etc., but only when the 'l' flag in | |
2466 * 'cpoptions' is not included. | |
167 | 2467 * Posix doesn't recognize backslash at all. |
7 | 2468 */ |
2469 else if (*regparse == '\\' | |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
2470 && !reg_cpo_bsl |
7 | 2471 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL |
4744
a62695305e03
updated for version 7.3.1119
Bram Moolenaar <bram@vim.org>
parents:
4732
diff
changeset
|
2472 || (!reg_cpo_lit |
7 | 2473 && vim_strchr(REGEXP_ABBR, |
2474 regparse[1]) != NULL))) | |
2475 { | |
2476 regparse++; | |
2477 if (*regparse == 'n') | |
2478 { | |
2479 /* '\n' in range: also match NL */ | |
2480 if (ret != JUST_CALC_SIZE) | |
2481 { | |
4084 | 2482 /* Using \n inside [^] does not change what |
2483 * matches. "[^\n]" is the same as ".". */ | |
2484 if (*ret == ANYOF) | |
2485 { | |
7 | 2486 *ret = ANYOF + ADD_NL; |
4084 | 2487 *flagp |= HASNL; |
2488 } | |
7 | 2489 /* else: must have had a \n already */ |
2490 } | |
2491 regparse++; | |
2492 startc = -1; | |
2493 } | |
24 | 2494 else if (*regparse == 'd' |
2495 || *regparse == 'o' | |
2496 || *regparse == 'x' | |
2497 || *regparse == 'u' | |
2498 || *regparse == 'U') | |
2499 { | |
2500 startc = coll_get_char(); | |
2501 if (startc == 0) | |
2502 regc(0x0a); | |
2503 else | |
2504 #ifdef FEAT_MBYTE | |
2505 regmbc(startc); | |
2506 #else | |
2507 regc(startc); | |
2508 #endif | |
2509 } | |
7 | 2510 else |
2511 { | |
2512 startc = backslash_trans(*regparse++); | |
2513 regc(startc); | |
2514 } | |
2515 } | |
2516 else if (*regparse == '[') | |
2517 { | |
2518 int c_class; | |
2519 int cu; | |
2520 | |
167 | 2521 c_class = get_char_class(®parse); |
7 | 2522 startc = -1; |
2523 /* Characters assumed to be 8 bits! */ | |
2524 switch (c_class) | |
2525 { | |
2526 case CLASS_NONE: | |
167 | 2527 c_class = get_equi_class(®parse); |
2528 if (c_class != 0) | |
2529 { | |
2530 /* produce equivalence class */ | |
2531 reg_equi_class(c_class); | |
2532 } | |
2533 else if ((c_class = | |
2534 get_coll_element(®parse)) != 0) | |
2535 { | |
2536 /* produce a collating element */ | |
2537 regmbc(c_class); | |
2538 } | |
2539 else | |
2540 { | |
2541 /* literal '[', allow [[-x] as a range */ | |
2542 startc = *regparse++; | |
2543 regc(startc); | |
2544 } | |
7 | 2545 break; |
2546 case CLASS_ALNUM: | |
2547 for (cu = 1; cu <= 255; cu++) | |
2548 if (isalnum(cu)) | |
2549 regc(cu); | |
2550 break; | |
2551 case CLASS_ALPHA: | |
2552 for (cu = 1; cu <= 255; cu++) | |
2553 if (isalpha(cu)) | |
2554 regc(cu); | |
2555 break; | |
2556 case CLASS_BLANK: | |
2557 regc(' '); | |
2558 regc('\t'); | |
2559 break; | |
2560 case CLASS_CNTRL: | |
2561 for (cu = 1; cu <= 255; cu++) | |
2562 if (iscntrl(cu)) | |
2563 regc(cu); | |
2564 break; | |
2565 case CLASS_DIGIT: | |
2566 for (cu = 1; cu <= 255; cu++) | |
2567 if (VIM_ISDIGIT(cu)) | |
2568 regc(cu); | |
2569 break; | |
2570 case CLASS_GRAPH: | |
2571 for (cu = 1; cu <= 255; cu++) | |
2572 if (isgraph(cu)) | |
2573 regc(cu); | |
2574 break; | |
2575 case CLASS_LOWER: | |
2576 for (cu = 1; cu <= 255; cu++) | |
1347 | 2577 if (MB_ISLOWER(cu)) |
7 | 2578 regc(cu); |
2579 break; | |
2580 case CLASS_PRINT: | |
2581 for (cu = 1; cu <= 255; cu++) | |
2582 if (vim_isprintc(cu)) | |
2583 regc(cu); | |
2584 break; | |
2585 case CLASS_PUNCT: | |
2586 for (cu = 1; cu <= 255; cu++) | |
2587 if (ispunct(cu)) | |
2588 regc(cu); | |
2589 break; | |
2590 case CLASS_SPACE: | |
2591 for (cu = 9; cu <= 13; cu++) | |
2592 regc(cu); | |
2593 regc(' '); | |
2594 break; | |
2595 case CLASS_UPPER: | |
2596 for (cu = 1; cu <= 255; cu++) | |
1347 | 2597 if (MB_ISUPPER(cu)) |
7 | 2598 regc(cu); |
2599 break; | |
2600 case CLASS_XDIGIT: | |
2601 for (cu = 1; cu <= 255; cu++) | |
2602 if (vim_isxdigit(cu)) | |
2603 regc(cu); | |
2604 break; | |
2605 case CLASS_TAB: | |
2606 regc('\t'); | |
2607 break; | |
2608 case CLASS_RETURN: | |
2609 regc('\r'); | |
2610 break; | |
2611 case CLASS_BACKSPACE: | |
2612 regc('\b'); | |
2613 break; | |
2614 case CLASS_ESCAPE: | |
2615 regc('\033'); | |
2616 break; | |
2617 } | |
2618 } | |
2619 else | |
2620 { | |
2621 #ifdef FEAT_MBYTE | |
2622 if (has_mbyte) | |
2623 { | |
2624 int len; | |
2625 | |
2626 /* produce a multibyte character, including any | |
2627 * following composing characters */ | |
2628 startc = mb_ptr2char(regparse); | |
474 | 2629 len = (*mb_ptr2len)(regparse); |
7 | 2630 if (enc_utf8 && utf_char2len(startc) != len) |
2631 startc = -1; /* composing chars */ | |
2632 while (--len >= 0) | |
2633 regc(*regparse++); | |
2634 } | |
2635 else | |
2636 #endif | |
2637 { | |
2638 startc = *regparse++; | |
2639 regc(startc); | |
2640 } | |
2641 } | |
2642 } | |
2643 regc(NUL); | |
2644 prevchr_len = 1; /* last char was the ']' */ | |
2645 if (*regparse != ']') | |
2646 EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */ | |
2647 skipchr(); /* let's be friends with the lexer again */ | |
2648 *flagp |= HASWIDTH | SIMPLE; | |
2649 break; | |
2650 } | |
481 | 2651 else if (reg_strict) |
4444 | 2652 EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF); |
7 | 2653 } |
2654 /* FALLTHROUGH */ | |
2655 | |
2656 default: | |
2657 { | |
2658 int len; | |
2659 | |
2660 #ifdef FEAT_MBYTE | |
2661 /* A multi-byte character is handled as a separate atom if it's | |
714 | 2662 * before a multi and when it's a composing char. */ |
2663 if (use_multibytecode(c)) | |
7 | 2664 { |
714 | 2665 do_multibyte: |
7 | 2666 ret = regnode(MULTIBYTECODE); |
2667 regmbc(c); | |
2668 *flagp |= HASWIDTH | SIMPLE; | |
2669 break; | |
2670 } | |
2671 #endif | |
2672 | |
2673 ret = regnode(EXACTLY); | |
2674 | |
2675 /* | |
2676 * Append characters as long as: | |
2677 * - there is no following multi, we then need the character in | |
2678 * front of it as a single character operand | |
2679 * - not running into a Magic character | |
2680 * - "one_exactly" is not set | |
2681 * But always emit at least one character. Might be a Multi, | |
2682 * e.g., a "[" without matching "]". | |
2683 */ | |
2684 for (len = 0; c != NUL && (len == 0 | |
2685 || (re_multi_type(peekchr()) == NOT_MULTI | |
2686 && !one_exactly | |
2687 && !is_Magic(c))); ++len) | |
2688 { | |
2689 c = no_Magic(c); | |
2690 #ifdef FEAT_MBYTE | |
2691 if (has_mbyte) | |
2692 { | |
2693 regmbc(c); | |
2694 if (enc_utf8) | |
2695 { | |
2696 int l; | |
2697 | |
714 | 2698 /* Need to get composing character too. */ |
7 | 2699 for (;;) |
2700 { | |
714 | 2701 l = utf_ptr2len(regparse); |
2702 if (!UTF_COMPOSINGLIKE(regparse, regparse + l)) | |
7 | 2703 break; |
714 | 2704 regmbc(utf_ptr2char(regparse)); |
2705 skipchr(); | |
7 | 2706 } |
2707 } | |
2708 } | |
2709 else | |
2710 #endif | |
2711 regc(c); | |
2712 c = getchr(); | |
2713 } | |
2714 ungetchr(); | |
2715 | |
2716 regc(NUL); | |
2717 *flagp |= HASWIDTH; | |
2718 if (len == 1) | |
2719 *flagp |= SIMPLE; | |
2720 } | |
2721 break; | |
2722 } | |
2723 | |
2724 return ret; | |
2725 } | |
2726 | |
714 | 2727 #ifdef FEAT_MBYTE |
2728 /* | |
2729 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for | |
2730 * character "c". | |
2731 */ | |
2732 static int | |
2733 use_multibytecode(c) | |
2734 int c; | |
2735 { | |
2736 return has_mbyte && (*mb_char2len)(c) > 1 | |
2737 && (re_multi_type(peekchr()) != NOT_MULTI | |
2738 || (enc_utf8 && utf_iscomposing(c))); | |
2739 } | |
2740 #endif | |
2741 | |
7 | 2742 /* |
4444 | 2743 * Emit a node. |
7 | 2744 * Return pointer to generated code. |
2745 */ | |
2746 static char_u * | |
2747 regnode(op) | |
2748 int op; | |
2749 { | |
2750 char_u *ret; | |
2751 | |
2752 ret = regcode; | |
2753 if (ret == JUST_CALC_SIZE) | |
2754 regsize += 3; | |
2755 else | |
2756 { | |
2757 *regcode++ = op; | |
2758 *regcode++ = NUL; /* Null "next" pointer. */ | |
2759 *regcode++ = NUL; | |
2760 } | |
2761 return ret; | |
2762 } | |
2763 | |
2764 /* | |
2765 * Emit (if appropriate) a byte of code | |
2766 */ | |
2767 static void | |
2768 regc(b) | |
2769 int b; | |
2770 { | |
2771 if (regcode == JUST_CALC_SIZE) | |
2772 regsize++; | |
2773 else | |
2774 *regcode++ = b; | |
2775 } | |
2776 | |
2777 #ifdef FEAT_MBYTE | |
2778 /* | |
2779 * Emit (if appropriate) a multi-byte character of code | |
2780 */ | |
2781 static void | |
2782 regmbc(c) | |
2783 int c; | |
2784 { | |
2974 | 2785 if (!has_mbyte && c > 0xff) |
2786 return; | |
7 | 2787 if (regcode == JUST_CALC_SIZE) |
2788 regsize += (*mb_char2len)(c); | |
2789 else | |
2790 regcode += (*mb_char2bytes)(c, regcode); | |
2791 } | |
2792 #endif | |
2793 | |
2794 /* | |
4444 | 2795 * Insert an operator in front of already-emitted operand |
7 | 2796 * |
2797 * Means relocating the operand. | |
2798 */ | |
2799 static void | |
2800 reginsert(op, opnd) | |
2801 int op; | |
2802 char_u *opnd; | |
2803 { | |
2804 char_u *src; | |
2805 char_u *dst; | |
2806 char_u *place; | |
2807 | |
2808 if (regcode == JUST_CALC_SIZE) | |
2809 { | |
2810 regsize += 3; | |
2811 return; | |
2812 } | |
2813 src = regcode; | |
2814 regcode += 3; | |
2815 dst = regcode; | |
2816 while (src > opnd) | |
2817 *--dst = *--src; | |
2818 | |
2819 place = opnd; /* Op node, where operand used to be. */ | |
2820 *place++ = op; | |
2821 *place++ = NUL; | |
2822 *place = NUL; | |
2823 } | |
2824 | |
2825 /* | |
4444 | 2826 * Insert an operator in front of already-emitted operand. |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2827 * Add a number to the operator. |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2828 */ |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2829 static void |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2830 reginsert_nr(op, val, opnd) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2831 int op; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2832 long val; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2833 char_u *opnd; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2834 { |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2835 char_u *src; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2836 char_u *dst; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2837 char_u *place; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2838 |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2839 if (regcode == JUST_CALC_SIZE) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2840 { |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2841 regsize += 7; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2842 return; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2843 } |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2844 src = regcode; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2845 regcode += 7; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2846 dst = regcode; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2847 while (src > opnd) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2848 *--dst = *--src; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2849 |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2850 place = opnd; /* Op node, where operand used to be. */ |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2851 *place++ = op; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2852 *place++ = NUL; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2853 *place++ = NUL; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2854 place = re_put_long(place, (long_u)val); |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2855 } |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2856 |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2857 /* |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
2858 * Insert an operator in front of already-emitted operand. |
7 | 2859 * The operator has the given limit values as operands. Also set next pointer. |
2860 * | |
2861 * Means relocating the operand. | |
2862 */ | |
2863 static void | |
2864 reginsert_limits(op, minval, maxval, opnd) | |
2865 int op; | |
2866 long minval; | |
2867 long maxval; | |
2868 char_u *opnd; | |
2869 { | |
2870 char_u *src; | |
2871 char_u *dst; | |
2872 char_u *place; | |
2873 | |
2874 if (regcode == JUST_CALC_SIZE) | |
2875 { | |
2876 regsize += 11; | |
2877 return; | |
2878 } | |
2879 src = regcode; | |
2880 regcode += 11; | |
2881 dst = regcode; | |
2882 while (src > opnd) | |
2883 *--dst = *--src; | |
2884 | |
2885 place = opnd; /* Op node, where operand used to be. */ | |
2886 *place++ = op; | |
2887 *place++ = NUL; | |
2888 *place++ = NUL; | |
2889 place = re_put_long(place, (long_u)minval); | |
2890 place = re_put_long(place, (long_u)maxval); | |
2891 regtail(opnd, place); | |
2892 } | |
2893 | |
2894 /* | |
2895 * Write a long as four bytes at "p" and return pointer to the next char. | |
2896 */ | |
2897 static char_u * | |
2898 re_put_long(p, val) | |
2899 char_u *p; | |
2900 long_u val; | |
2901 { | |
2902 *p++ = (char_u) ((val >> 24) & 0377); | |
2903 *p++ = (char_u) ((val >> 16) & 0377); | |
2904 *p++ = (char_u) ((val >> 8) & 0377); | |
2905 *p++ = (char_u) (val & 0377); | |
2906 return p; | |
2907 } | |
2908 | |
2909 /* | |
4444 | 2910 * Set the next-pointer at the end of a node chain. |
7 | 2911 */ |
2912 static void | |
2913 regtail(p, val) | |
2914 char_u *p; | |
2915 char_u *val; | |
2916 { | |
2917 char_u *scan; | |
2918 char_u *temp; | |
2919 int offset; | |
2920 | |
2921 if (p == JUST_CALC_SIZE) | |
2922 return; | |
2923 | |
2924 /* Find last node. */ | |
2925 scan = p; | |
2926 for (;;) | |
2927 { | |
2928 temp = regnext(scan); | |
2929 if (temp == NULL) | |
2930 break; | |
2931 scan = temp; | |
2932 } | |
2933 | |
233 | 2934 if (OP(scan) == BACK) |
7 | 2935 offset = (int)(scan - val); |
2936 else | |
2937 offset = (int)(val - scan); | |
2010 | 2938 /* When the offset uses more than 16 bits it can no longer fit in the two |
2974 | 2939 * bytes available. Use a global flag to avoid having to check return |
2010 | 2940 * values in too many places. */ |
2941 if (offset > 0xffff) | |
2942 reg_toolong = TRUE; | |
2943 else | |
2944 { | |
2945 *(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377); | |
2946 *(scan + 2) = (char_u) (offset & 0377); | |
2947 } | |
7 | 2948 } |
2949 | |
2950 /* | |
4444 | 2951 * Like regtail, on item after a BRANCH; nop if none. |
7 | 2952 */ |
2953 static void | |
2954 regoptail(p, val) | |
2955 char_u *p; | |
2956 char_u *val; | |
2957 { | |
2958 /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */ | |
2959 if (p == NULL || p == JUST_CALC_SIZE | |
2960 || (OP(p) != BRANCH | |
2961 && (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9))) | |
2962 return; | |
2963 regtail(OPERAND(p), val); | |
2964 } | |
2965 | |
2966 /* | |
4444 | 2967 * Functions for getting characters from the regexp input. |
7 | 2968 */ |
2969 | |
2970 static int at_start; /* True when on the first character */ | |
2971 static int prev_at_start; /* True when on the second character */ | |
2972 | |
4444 | 2973 /* |
2974 * Start parsing at "str". | |
2975 */ | |
7 | 2976 static void |
2977 initchr(str) | |
2978 char_u *str; | |
2979 { | |
2980 regparse = str; | |
2981 prevchr_len = 0; | |
2982 curchr = prevprevchr = prevchr = nextchr = -1; | |
2983 at_start = TRUE; | |
2984 prev_at_start = FALSE; | |
2985 } | |
2986 | |
4444 | 2987 /* |
4679
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2988 * Save the current parse state, so that it can be restored and parsing |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2989 * starts in the same state again. |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2990 */ |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2991 static void |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2992 save_parse_state(ps) |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2993 parse_state_T *ps; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2994 { |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2995 ps->regparse = regparse; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2996 ps->prevchr_len = prevchr_len; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2997 ps->curchr = curchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2998 ps->prevchr = prevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
2999 ps->prevprevchr = prevprevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3000 ps->nextchr = nextchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3001 ps->at_start = at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3002 ps->prev_at_start = prev_at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3003 ps->regnpar = regnpar; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3004 } |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3005 |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3006 /* |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3007 * Restore a previously saved parse state. |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3008 */ |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3009 static void |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3010 restore_parse_state(ps) |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3011 parse_state_T *ps; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3012 { |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3013 regparse = ps->regparse; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3014 prevchr_len = ps->prevchr_len; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3015 curchr = ps->curchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3016 prevchr = ps->prevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3017 prevprevchr = ps->prevprevchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3018 nextchr = ps->nextchr; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3019 at_start = ps->at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3020 prev_at_start = ps->prev_at_start; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3021 regnpar = ps->regnpar; |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3022 } |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3023 |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3024 |
4d92b873acef
updated for version 7.3.1087
Bram Moolenaar <bram@vim.org>
parents:
4579
diff
changeset
|
3025 /* |
4444 | 3026 * Get the next character without advancing. |
3027 */ | |
7 | 3028 static int |
3029 peekchr() | |
3030 { | |
167 | 3031 static int after_slash = FALSE; |
3032 | |
7 | 3033 if (curchr == -1) |
3034 { | |
3035 switch (curchr = regparse[0]) | |
3036 { | |
3037 case '.': | |
3038 case '[': | |
3039 case '~': | |
3040 /* magic when 'magic' is on */ | |
3041 if (reg_magic >= MAGIC_ON) | |
3042 curchr = Magic(curchr); | |
3043 break; | |
3044 case '(': | |
3045 case ')': | |
3046 case '{': | |
3047 case '%': | |
3048 case '+': | |
3049 case '=': | |
3050 case '?': | |
3051 case '@': | |
3052 case '!': | |
3053 case '&': | |
3054 case '|': | |
3055 case '<': | |
3056 case '>': | |
3057 case '#': /* future ext. */ | |
3058 case '"': /* future ext. */ | |
3059 case '\'': /* future ext. */ | |
3060 case ',': /* future ext. */ | |
3061 case '-': /* future ext. */ | |
3062 case ':': /* future ext. */ | |
3063 case ';': /* future ext. */ | |
3064 case '`': /* future ext. */ | |
3065 case '/': /* Can't be used in / command */ | |
3066 /* magic only after "\v" */ | |
3067 if (reg_magic == MAGIC_ALL) | |
3068 curchr = Magic(curchr); | |
3069 break; | |
3070 case '*': | |
167 | 3071 /* * is not magic as the very first character, eg "?*ptr", when |
3072 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But | |
3073 * "\(\*" is not magic, thus must be magic if "after_slash" */ | |
3074 if (reg_magic >= MAGIC_ON | |
3075 && !at_start | |
3076 && !(prev_at_start && prevchr == Magic('^')) | |
3077 && (after_slash | |
3078 || (prevchr != Magic('(') | |
3079 && prevchr != Magic('&') | |
3080 && prevchr != Magic('|')))) | |
7 | 3081 curchr = Magic('*'); |
3082 break; | |
3083 case '^': | |
3084 /* '^' is only magic as the very first character and if it's after | |
3085 * "\(", "\|", "\&' or "\n" */ | |
3086 if (reg_magic >= MAGIC_OFF | |
3087 && (at_start | |
3088 || reg_magic == MAGIC_ALL | |
3089 || prevchr == Magic('(') | |
3090 || prevchr == Magic('|') | |
3091 || prevchr == Magic('&') | |
3092 || prevchr == Magic('n') | |
3093 || (no_Magic(prevchr) == '(' | |
3094 && prevprevchr == Magic('%')))) | |
3095 { | |
3096 curchr = Magic('^'); | |
3097 at_start = TRUE; | |
3098 prev_at_start = FALSE; | |
3099 } | |
3100 break; | |
3101 case '$': | |
3102 /* '$' is only magic as the very last char and if it's in front of | |
3103 * either "\|", "\)", "\&", or "\n" */ | |
3104 if (reg_magic >= MAGIC_OFF) | |
3105 { | |
3106 char_u *p = regparse + 1; | |
3107 | |
3108 /* ignore \c \C \m and \M after '$' */ | |
3109 while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C' | |
3110 || p[1] == 'm' || p[1] == 'M' || p[1] == 'Z')) | |
3111 p += 2; | |
3112 if (p[0] == NUL | |
3113 || (p[0] == '\\' | |
3114 && (p[1] == '|' || p[1] == '&' || p[1] == ')' | |
3115 || p[1] == 'n')) | |
3116 || reg_magic == MAGIC_ALL) | |
3117 curchr = Magic('$'); | |
3118 } | |
3119 break; | |
3120 case '\\': | |
3121 { | |
3122 int c = regparse[1]; | |
3123 | |
3124 if (c == NUL) | |
3125 curchr = '\\'; /* trailing '\' */ | |
3126 else if ( | |
3127 #ifdef EBCDIC | |
3128 vim_strchr(META, c) | |
3129 #else | |
3130 c <= '~' && META_flags[c] | |
3131 #endif | |
3132 ) | |
3133 { | |
3134 /* | |
3135 * META contains everything that may be magic sometimes, | |
3136 * except ^ and $ ("\^" and "\$" are only magic after | |
3137 * "\v"). We now fetch the next character and toggle its | |
3138 * magicness. Therefore, \ is so meta-magic that it is | |
3139 * not in META. | |
3140 */ | |
3141 curchr = -1; | |
3142 prev_at_start = at_start; | |
3143 at_start = FALSE; /* be able to say "/\*ptr" */ | |
3144 ++regparse; | |
167 | 3145 ++after_slash; |
7 | 3146 peekchr(); |
3147 --regparse; | |
167 | 3148 --after_slash; |
7 | 3149 curchr = toggle_Magic(curchr); |
3150 } | |
3151 else if (vim_strchr(REGEXP_ABBR, c)) | |
3152 { | |
3153 /* | |
3154 * Handle abbreviations, like "\t" for TAB -- webb | |
3155 */ | |
3156 curchr = backslash_trans(c); | |
3157 } | |
3158 else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^')) | |
3159 curchr = toggle_Magic(c); | |
3160 else | |
3161 { | |
3162 /* | |
3163 * Next character can never be (made) magic? | |
3164 * Then backslashing it won't do anything. | |
3165 */ | |
3166 #ifdef FEAT_MBYTE | |
3167 if (has_mbyte) | |
3168 curchr = (*mb_ptr2char)(regparse + 1); | |
3169 else | |
3170 #endif | |
3171 curchr = c; | |
3172 } | |
3173 break; | |
3174 } | |
3175 | |
3176 #ifdef FEAT_MBYTE | |
3177 default: | |
3178 if (has_mbyte) | |
3179 curchr = (*mb_ptr2char)(regparse); | |
3180 #endif | |
3181 } | |
3182 } | |
3183 | |
3184 return curchr; | |
3185 } | |
3186 | |
3187 /* | |
3188 * Eat one lexed character. Do this in a way that we can undo it. | |
3189 */ | |
3190 static void | |
3191 skipchr() | |
3192 { | |
3193 /* peekchr() eats a backslash, do the same here */ | |
3194 if (*regparse == '\\') | |
3195 prevchr_len = 1; | |
3196 else | |
3197 prevchr_len = 0; | |
3198 if (regparse[prevchr_len] != NUL) | |
3199 { | |
3200 #ifdef FEAT_MBYTE | |
714 | 3201 if (enc_utf8) |
1449 | 3202 /* exclude composing chars that mb_ptr2len does include */ |
3203 prevchr_len += utf_ptr2len(regparse + prevchr_len); | |
714 | 3204 else if (has_mbyte) |
474 | 3205 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len); |
7 | 3206 else |
3207 #endif | |
3208 ++prevchr_len; | |
3209 } | |
3210 regparse += prevchr_len; | |
3211 prev_at_start = at_start; | |
3212 at_start = FALSE; | |
3213 prevprevchr = prevchr; | |
3214 prevchr = curchr; | |
3215 curchr = nextchr; /* use previously unget char, or -1 */ | |
3216 nextchr = -1; | |
3217 } | |
3218 | |
3219 /* | |
3220 * Skip a character while keeping the value of prev_at_start for at_start. | |
3221 * prevchr and prevprevchr are also kept. | |
3222 */ | |
3223 static void | |
3224 skipchr_keepstart() | |
3225 { | |
3226 int as = prev_at_start; | |
3227 int pr = prevchr; | |
3228 int prpr = prevprevchr; | |
3229 | |
3230 skipchr(); | |
3231 at_start = as; | |
3232 prevchr = pr; | |
3233 prevprevchr = prpr; | |
3234 } | |
3235 | |
4444 | 3236 /* |
3237 * Get the next character from the pattern. We know about magic and such, so | |
3238 * therefore we need a lexical analyzer. | |
3239 */ | |
7 | 3240 static int |
3241 getchr() | |
3242 { | |
3243 int chr = peekchr(); | |
3244 | |
3245 skipchr(); | |
3246 return chr; | |
3247 } | |
3248 | |
3249 /* | |
3250 * put character back. Works only once! | |
3251 */ | |
3252 static void | |
3253 ungetchr() | |
3254 { | |
3255 nextchr = curchr; | |
3256 curchr = prevchr; | |
3257 prevchr = prevprevchr; | |
3258 at_start = prev_at_start; | |
3259 prev_at_start = FALSE; | |
3260 | |
3261 /* Backup regparse, so that it's at the same position as before the | |
3262 * getchr(). */ | |
3263 regparse -= prevchr_len; | |
3264 } | |
3265 | |
3266 /* | |
29 | 3267 * Get and return the value of the hex string at the current position. |
3268 * Return -1 if there is no valid hex number. | |
3269 * The position is updated: | |
24 | 3270 * blahblah\%x20asdf |
856 | 3271 * before-^ ^-after |
24 | 3272 * The parameter controls the maximum number of input characters. This will be |
3273 * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence. | |
3274 */ | |
3275 static int | |
3276 gethexchrs(maxinputlen) | |
3277 int maxinputlen; | |
3278 { | |
3279 int nr = 0; | |
3280 int c; | |
3281 int i; | |
3282 | |
3283 for (i = 0; i < maxinputlen; ++i) | |
3284 { | |
3285 c = regparse[0]; | |
3286 if (!vim_isxdigit(c)) | |
3287 break; | |
3288 nr <<= 4; | |
3289 nr |= hex2nr(c); | |
3290 ++regparse; | |
3291 } | |
3292 | |
3293 if (i == 0) | |
3294 return -1; | |
3295 return nr; | |
3296 } | |
3297 | |
3298 /* | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
3299 * Get and return the value of the decimal string immediately after the |
24 | 3300 * current position. Return -1 for invalid. Consumes all digits. |
3301 */ | |
3302 static int | |
3303 getdecchrs() | |
3304 { | |
3305 int nr = 0; | |
3306 int c; | |
3307 int i; | |
3308 | |
3309 for (i = 0; ; ++i) | |
3310 { | |
3311 c = regparse[0]; | |
3312 if (c < '0' || c > '9') | |
3313 break; | |
3314 nr *= 10; | |
3315 nr += c - '0'; | |
3316 ++regparse; | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
3317 curchr = -1; /* no longer valid */ |
24 | 3318 } |
3319 | |
3320 if (i == 0) | |
3321 return -1; | |
3322 return nr; | |
3323 } | |
3324 | |
3325 /* | |
3326 * get and return the value of the octal string immediately after the current | |
3327 * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle | |
3328 * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't | |
3329 * treat 8 or 9 as recognised characters. Position is updated: | |
3330 * blahblah\%o210asdf | |
856 | 3331 * before-^ ^-after |
24 | 3332 */ |
3333 static int | |
3334 getoctchrs() | |
3335 { | |
3336 int nr = 0; | |
3337 int c; | |
3338 int i; | |
3339 | |
3340 for (i = 0; i < 3 && nr < 040; ++i) | |
3341 { | |
3342 c = regparse[0]; | |
3343 if (c < '0' || c > '7') | |
3344 break; | |
3345 nr <<= 3; | |
3346 nr |= hex2nr(c); | |
3347 ++regparse; | |
3348 } | |
3349 | |
3350 if (i == 0) | |
3351 return -1; | |
3352 return nr; | |
3353 } | |
3354 | |
3355 /* | |
3356 * Get a number after a backslash that is inside []. | |
3357 * When nothing is recognized return a backslash. | |
3358 */ | |
3359 static int | |
3360 coll_get_char() | |
3361 { | |
3362 int nr = -1; | |
3363 | |
3364 switch (*regparse++) | |
3365 { | |
3366 case 'd': nr = getdecchrs(); break; | |
3367 case 'o': nr = getoctchrs(); break; | |
3368 case 'x': nr = gethexchrs(2); break; | |
3369 case 'u': nr = gethexchrs(4); break; | |
3370 case 'U': nr = gethexchrs(8); break; | |
3371 } | |
3372 if (nr < 0) | |
3373 { | |
3374 /* If getting the number fails be backwards compatible: the character | |
3375 * is a backslash. */ | |
3376 --regparse; | |
3377 nr = '\\'; | |
3378 } | |
3379 return nr; | |
3380 } | |
3381 | |
3382 /* | |
7 | 3383 * read_limits - Read two integers to be taken as a minimum and maximum. |
3384 * If the first character is '-', then the range is reversed. | |
3385 * Should end with 'end'. If minval is missing, zero is default, if maxval is | |
3386 * missing, a very big number is the default. | |
3387 */ | |
3388 static int | |
3389 read_limits(minval, maxval) | |
3390 long *minval; | |
3391 long *maxval; | |
3392 { | |
3393 int reverse = FALSE; | |
3394 char_u *first_char; | |
3395 long tmp; | |
3396 | |
3397 if (*regparse == '-') | |
3398 { | |
3399 /* Starts with '-', so reverse the range later */ | |
3400 regparse++; | |
3401 reverse = TRUE; | |
3402 } | |
3403 first_char = regparse; | |
3404 *minval = getdigits(®parse); | |
3405 if (*regparse == ',') /* There is a comma */ | |
3406 { | |
3407 if (vim_isdigit(*++regparse)) | |
3408 *maxval = getdigits(®parse); | |
3409 else | |
3410 *maxval = MAX_LIMIT; | |
3411 } | |
3412 else if (VIM_ISDIGIT(*first_char)) | |
3413 *maxval = *minval; /* It was \{n} or \{-n} */ | |
3414 else | |
3415 *maxval = MAX_LIMIT; /* It was \{} or \{-} */ | |
3416 if (*regparse == '\\') | |
3417 regparse++; /* Allow either \{...} or \{...\} */ | |
167 | 3418 if (*regparse != '}') |
7 | 3419 { |
3420 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"), | |
3421 reg_magic == MAGIC_ALL ? "" : "\\"); | |
3422 EMSG_RET_FAIL(IObuff); | |
3423 } | |
3424 | |
3425 /* | |
3426 * Reverse the range if there was a '-', or make sure it is in the right | |
3427 * order otherwise. | |
3428 */ | |
3429 if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval)) | |
3430 { | |
3431 tmp = *minval; | |
3432 *minval = *maxval; | |
3433 *maxval = tmp; | |
3434 } | |
3435 skipchr(); /* let's be friends with the lexer again */ | |
3436 return OK; | |
3437 } | |
3438 | |
3439 /* | |
3440 * vim_regexec and friends | |
3441 */ | |
3442 | |
3443 /* | |
3444 * Global work variables for vim_regexec(). | |
3445 */ | |
3446 | |
3447 /* The current match-position is remembered with these variables: */ | |
3448 static linenr_T reglnum; /* line number, relative to first line */ | |
3449 static char_u *regline; /* start of current line */ | |
3450 static char_u *reginput; /* current input, points into "regline" */ | |
3451 | |
3452 static int need_clear_subexpr; /* subexpressions still need to be | |
3453 * cleared */ | |
3454 #ifdef FEAT_SYN_HL | |
3455 static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions | |
3456 * still need to be cleared */ | |
3457 #endif | |
3458 | |
3459 /* | |
3460 * Structure used to save the current input state, when it needs to be | |
3461 * restored after trying a match. Used by reg_save() and reg_restore(). | |
233 | 3462 * Also stores the length of "backpos". |
7 | 3463 */ |
3464 typedef struct | |
3465 { | |
3466 union | |
3467 { | |
3468 char_u *ptr; /* reginput pointer, for single-line regexp */ | |
3469 lpos_T pos; /* reginput pos, for multi-line regexp */ | |
3470 } rs_u; | |
233 | 3471 int rs_len; |
7 | 3472 } regsave_T; |
3473 | |
3474 /* struct to save start/end pointer/position in for \(\) */ | |
3475 typedef struct | |
3476 { | |
3477 union | |
3478 { | |
3479 char_u *ptr; | |
3480 lpos_T pos; | |
3481 } se_u; | |
3482 } save_se_T; | |
3483 | |
1579 | 3484 /* used for BEHIND and NOBEHIND matching */ |
3485 typedef struct regbehind_S | |
3486 { | |
3487 regsave_T save_after; | |
3488 regsave_T save_behind; | |
1602 | 3489 int save_need_clear_subexpr; |
1579 | 3490 save_se_T save_start[NSUBEXP]; |
3491 save_se_T save_end[NSUBEXP]; | |
3492 } regbehind_T; | |
3493 | |
7 | 3494 static char_u *reg_getline __ARGS((linenr_T lnum)); |
4444 | 3495 static long bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm)); |
3496 static long regtry __ARGS((bt_regprog_T *prog, colnr_T col)); | |
7 | 3497 static void cleanup_subexpr __ARGS((void)); |
3498 #ifdef FEAT_SYN_HL | |
3499 static void cleanup_zsubexpr __ARGS((void)); | |
3500 #endif | |
1579 | 3501 static void save_subexpr __ARGS((regbehind_T *bp)); |
3502 static void restore_subexpr __ARGS((regbehind_T *bp)); | |
7 | 3503 static void reg_nextline __ARGS((void)); |
233 | 3504 static void reg_save __ARGS((regsave_T *save, garray_T *gap)); |
3505 static void reg_restore __ARGS((regsave_T *save, garray_T *gap)); | |
7 | 3506 static int reg_save_equal __ARGS((regsave_T *save)); |
3507 static void save_se_multi __ARGS((save_se_T *savep, lpos_T *posp)); | |
3508 static void save_se_one __ARGS((save_se_T *savep, char_u **pp)); | |
3509 | |
3510 /* Save the sub-expressions before attempting a match. */ | |
3511 #define save_se(savep, posp, pp) \ | |
3512 REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp)) | |
3513 | |
3514 /* After a failed match restore the sub-expressions. */ | |
3515 #define restore_se(savep, posp, pp) { \ | |
3516 if (REG_MULTI) \ | |
3517 *(posp) = (savep)->se_u.pos; \ | |
3518 else \ | |
3519 *(pp) = (savep)->se_u.ptr; } | |
3520 | |
3521 static int re_num_cmp __ARGS((long_u val, char_u *scan)); | |
4891
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
3522 static int match_with_backref __ARGS((linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen)); |
180 | 3523 static int regmatch __ARGS((char_u *prog)); |
7 | 3524 static int regrepeat __ARGS((char_u *p, long maxcount)); |
3525 | |
3526 #ifdef DEBUG | |
3527 int regnarrate = 0; | |
3528 #endif | |
3529 | |
3530 /* | |
3531 * Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). | |
3532 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern | |
3533 * contains '\c' or '\C' the value is overruled. | |
3534 */ | |
3535 static int ireg_ic; | |
3536 | |
3537 #ifdef FEAT_MBYTE | |
3538 /* | |
3539 * Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag | |
3540 * in the regexp. Defaults to false, always. | |
3541 */ | |
3542 static int ireg_icombine; | |
3543 #endif | |
3544 | |
3545 /* | |
410 | 3546 * Copy of "rmm_maxcol": maximum column to search for a match. Zero when |
3547 * there is no maximum. | |
3548 */ | |
418 | 3549 static colnr_T ireg_maxcol; |
410 | 3550 |
3551 /* | |
7 | 3552 * Sometimes need to save a copy of a line. Since alloc()/free() is very |
3553 * slow, we keep one allocated piece of memory and only re-allocate it when | |
4444 | 3554 * it's too small. It's freed in bt_regexec_both() when finished. |
7 | 3555 */ |
1468 | 3556 static char_u *reg_tofree = NULL; |
7 | 3557 static unsigned reg_tofreelen; |
3558 | |
3559 /* | |
3560 * These variables are set when executing a regexp to speed up the execution. | |
1209 | 3561 * Which ones are set depends on whether a single-line or multi-line match is |
7 | 3562 * done: |
3563 * single-line multi-line | |
3564 * reg_match ®match_T NULL | |
3565 * reg_mmatch NULL ®mmatch_T | |
3566 * reg_startp reg_match->startp <invalid> | |
3567 * reg_endp reg_match->endp <invalid> | |
3568 * reg_startpos <invalid> reg_mmatch->startpos | |
3569 * reg_endpos <invalid> reg_mmatch->endpos | |
3570 * reg_win NULL window in which to search | |
4061 | 3571 * reg_buf curbuf buffer in which to search |
7 | 3572 * reg_firstlnum <invalid> first line in which to search |
3573 * reg_maxline 0 last line nr | |
3574 * reg_line_lbr FALSE or TRUE FALSE | |
3575 */ | |
3576 static regmatch_T *reg_match; | |
3577 static regmmatch_T *reg_mmatch; | |
3578 static char_u **reg_startp = NULL; | |
3579 static char_u **reg_endp = NULL; | |
3580 static lpos_T *reg_startpos = NULL; | |
3581 static lpos_T *reg_endpos = NULL; | |
3582 static win_T *reg_win; | |
3583 static buf_T *reg_buf; | |
3584 static linenr_T reg_firstlnum; | |
3585 static linenr_T reg_maxline; | |
3586 static int reg_line_lbr; /* "\n" in string is line break */ | |
3587 | |
270 | 3588 /* Values for rs_state in regitem_T. */ |
3589 typedef enum regstate_E | |
3590 { | |
3591 RS_NOPEN = 0 /* NOPEN and NCLOSE */ | |
3592 , RS_MOPEN /* MOPEN + [0-9] */ | |
3593 , RS_MCLOSE /* MCLOSE + [0-9] */ | |
3594 #ifdef FEAT_SYN_HL | |
3595 , RS_ZOPEN /* ZOPEN + [0-9] */ | |
3596 , RS_ZCLOSE /* ZCLOSE + [0-9] */ | |
3597 #endif | |
3598 , RS_BRANCH /* BRANCH */ | |
3599 , RS_BRCPLX_MORE /* BRACE_COMPLEX and trying one more match */ | |
3600 , RS_BRCPLX_LONG /* BRACE_COMPLEX and trying longest match */ | |
3601 , RS_BRCPLX_SHORT /* BRACE_COMPLEX and trying shortest match */ | |
3602 , RS_NOMATCH /* NOMATCH */ | |
3603 , RS_BEHIND1 /* BEHIND / NOBEHIND matching rest */ | |
3604 , RS_BEHIND2 /* BEHIND / NOBEHIND matching behind part */ | |
3605 , RS_STAR_LONG /* STAR/PLUS/BRACE_SIMPLE longest match */ | |
3606 , RS_STAR_SHORT /* STAR/PLUS/BRACE_SIMPLE shortest match */ | |
3607 } regstate_T; | |
3608 | |
3609 /* | |
3610 * When there are alternatives a regstate_T is put on the regstack to remember | |
3611 * what we are doing. | |
3612 * Before it may be another type of item, depending on rs_state, to remember | |
3613 * more things. | |
3614 */ | |
3615 typedef struct regitem_S | |
3616 { | |
3617 regstate_T rs_state; /* what we are doing, one of RS_ above */ | |
3618 char_u *rs_scan; /* current node in program */ | |
3619 union | |
3620 { | |
3621 save_se_T sesave; | |
3622 regsave_T regsave; | |
3623 } rs_un; /* room for saving reginput */ | |
1579 | 3624 short rs_no; /* submatch nr or BEHIND/NOBEHIND */ |
270 | 3625 } regitem_T; |
3626 | |
3627 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan)); | |
3628 static void regstack_pop __ARGS((char_u **scan)); | |
3629 | |
3630 /* used for STAR, PLUS and BRACE_SIMPLE matching */ | |
3631 typedef struct regstar_S | |
3632 { | |
3633 int nextb; /* next byte */ | |
3634 int nextb_ic; /* next byte reverse case */ | |
3635 long count; | |
3636 long minval; | |
3637 long maxval; | |
3638 } regstar_T; | |
3639 | |
3640 /* used to store input position when a BACK was encountered, so that we now if | |
3641 * we made any progress since the last time. */ | |
3642 typedef struct backpos_S | |
3643 { | |
3644 char_u *bp_scan; /* "scan" where BACK was encountered */ | |
3645 regsave_T bp_pos; /* last input position */ | |
3646 } backpos_T; | |
3647 | |
3648 /* | |
1520 | 3649 * "regstack" and "backpos" are used by regmatch(). They are kept over calls |
3650 * to avoid invoking malloc() and free() often. | |
3651 * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T | |
3652 * or regbehind_T. | |
3653 * "backpos_T" is a table with backpos_T for BACK | |
270 | 3654 */ |
1520 | 3655 static garray_T regstack = {0, 0, 0, 0, NULL}; |
3656 static garray_T backpos = {0, 0, 0, 0, NULL}; | |
3657 | |
3658 /* | |
3659 * Both for regstack and backpos tables we use the following strategy of | |
3660 * allocation (to reduce malloc/free calls): | |
3661 * - Initial size is fairly small. | |
3662 * - When needed, the tables are grown bigger (8 times at first, double after | |
3663 * that). | |
3664 * - After executing the match we free the memory only if the array has grown. | |
3665 * Thus the memory is kept allocated when it's at the initial size. | |
3666 * This makes it fast while not keeping a lot of memory allocated. | |
3667 * A three times speed increase was observed when using many simple patterns. | |
3668 */ | |
3669 #define REGSTACK_INITIAL 2048 | |
3670 #define BACKPOS_INITIAL 64 | |
3671 | |
3672 #if defined(EXITFREE) || defined(PROTO) | |
3673 void | |
3674 free_regexp_stuff() | |
3675 { | |
3676 ga_clear(®stack); | |
3677 ga_clear(&backpos); | |
3678 vim_free(reg_tofree); | |
3679 vim_free(reg_prev_sub); | |
3680 } | |
3681 #endif | |
270 | 3682 |
7 | 3683 /* |
3684 * Get pointer to the line "lnum", which is relative to "reg_firstlnum". | |
3685 */ | |
3686 static char_u * | |
3687 reg_getline(lnum) | |
3688 linenr_T lnum; | |
3689 { | |
3690 /* when looking behind for a match/no-match lnum is negative. But we | |
3691 * can't go before line 1 */ | |
3692 if (reg_firstlnum + lnum < 1) | |
3693 return NULL; | |
482 | 3694 if (lnum > reg_maxline) |
481 | 3695 /* Must have matched the "\n" in the last line. */ |
3696 return (char_u *)""; | |
7 | 3697 return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE); |
3698 } | |
3699 | |
3700 static regsave_T behind_pos; | |
3701 | |
3702 #ifdef FEAT_SYN_HL | |
3703 static char_u *reg_startzp[NSUBEXP]; /* Workspace to mark beginning */ | |
3704 static char_u *reg_endzp[NSUBEXP]; /* and end of \z(...\) matches */ | |
3705 static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */ | |
3706 static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */ | |
3707 #endif | |
3708 | |
3709 /* TRUE if using multi-line regexp. */ | |
3710 #define REG_MULTI (reg_match == NULL) | |
3711 | |
5838 | 3712 static int bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr)); |
3713 | |
4444 | 3714 |
7 | 3715 /* |
3716 * Match a regexp against a string. | |
3717 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | |
3718 * Uses curbuf for line count and 'iskeyword'. | |
5838 | 3719 * if "line_lbr" is TRUE consider a "\n" in "line" to be a line break. |
7 | 3720 * |
3721 * Return TRUE if there is a match, FALSE if not. | |
3722 */ | |
4444 | 3723 static int |
5838 | 3724 bt_regexec_nl(rmp, line, col, line_lbr) |
7 | 3725 regmatch_T *rmp; |
3726 char_u *line; /* string to match against */ | |
3727 colnr_T col; /* column to start looking for match */ | |
5838 | 3728 int line_lbr; |
7 | 3729 { |
3730 reg_match = rmp; | |
3731 reg_mmatch = NULL; | |
3732 reg_maxline = 0; | |
5838 | 3733 reg_line_lbr = line_lbr; |
4061 | 3734 reg_buf = curbuf; |
7 | 3735 reg_win = NULL; |
3736 ireg_ic = rmp->rm_ic; | |
3737 #ifdef FEAT_MBYTE | |
3738 ireg_icombine = FALSE; | |
3739 #endif | |
410 | 3740 ireg_maxcol = 0; |
4444 | 3741 return (bt_regexec_both(line, col, NULL) != 0); |
7 | 3742 } |
3743 | |
4444 | 3744 static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm)); |
3745 | |
7 | 3746 /* |
3747 * Match a regexp against multiple lines. | |
3748 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | |
3749 * Uses curbuf for line count and 'iskeyword'. | |
3750 * | |
3751 * Return zero if there is no match. Return number of lines contained in the | |
3752 * match otherwise. | |
3753 */ | |
4444 | 3754 static long |
3755 bt_regexec_multi(rmp, win, buf, lnum, col, tm) | |
7 | 3756 regmmatch_T *rmp; |
3757 win_T *win; /* window in which to search or NULL */ | |
3758 buf_T *buf; /* buffer in which to search */ | |
3759 linenr_T lnum; /* nr of line to start looking for match */ | |
3760 colnr_T col; /* column to start looking for match */ | |
1521 | 3761 proftime_T *tm; /* timeout limit or NULL */ |
7 | 3762 { |
3763 long r; | |
3764 | |
3765 reg_match = NULL; | |
3766 reg_mmatch = rmp; | |
3767 reg_buf = buf; | |
3768 reg_win = win; | |
3769 reg_firstlnum = lnum; | |
3770 reg_maxline = reg_buf->b_ml.ml_line_count - lnum; | |
3771 reg_line_lbr = FALSE; | |
3772 ireg_ic = rmp->rmm_ic; | |
3773 #ifdef FEAT_MBYTE | |
3774 ireg_icombine = FALSE; | |
3775 #endif | |
410 | 3776 ireg_maxcol = rmp->rmm_maxcol; |
7 | 3777 |
4444 | 3778 r = bt_regexec_both(NULL, col, tm); |
7 | 3779 |
3780 return r; | |
3781 } | |
3782 | |
3783 /* | |
3784 * Match a regexp against a string ("line" points to the string) or multiple | |
3785 * lines ("line" is NULL, use reg_getline()). | |
3786 */ | |
3787 static long | |
4444 | 3788 bt_regexec_both(line, col, tm) |
7 | 3789 char_u *line; |
3790 colnr_T col; /* column to start looking for match */ | |
1877 | 3791 proftime_T *tm UNUSED; /* timeout limit or NULL */ |
7 | 3792 { |
4444 | 3793 bt_regprog_T *prog; |
7 | 3794 char_u *s; |
189 | 3795 long retval = 0L; |
7 | 3796 |
1520 | 3797 /* Create "regstack" and "backpos" if they are not allocated yet. |
3798 * We allocate *_INITIAL amount of bytes first and then set the grow size | |
3799 * to much bigger value to avoid many malloc calls in case of deep regular | |
3800 * expressions. */ | |
3801 if (regstack.ga_data == NULL) | |
3802 { | |
3803 /* Use an item size of 1 byte, since we push different things | |
3804 * onto the regstack. */ | |
3805 ga_init2(®stack, 1, REGSTACK_INITIAL); | |
3806 ga_grow(®stack, REGSTACK_INITIAL); | |
3807 regstack.ga_growsize = REGSTACK_INITIAL * 8; | |
3808 } | |
3809 | |
3810 if (backpos.ga_data == NULL) | |
3811 { | |
3812 ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL); | |
3813 ga_grow(&backpos, BACKPOS_INITIAL); | |
3814 backpos.ga_growsize = BACKPOS_INITIAL * 8; | |
3815 } | |
270 | 3816 |
7 | 3817 if (REG_MULTI) |
3818 { | |
4444 | 3819 prog = (bt_regprog_T *)reg_mmatch->regprog; |
7 | 3820 line = reg_getline((linenr_T)0); |
3821 reg_startpos = reg_mmatch->startpos; | |
3822 reg_endpos = reg_mmatch->endpos; | |
3823 } | |
3824 else | |
3825 { | |
4444 | 3826 prog = (bt_regprog_T *)reg_match->regprog; |
7 | 3827 reg_startp = reg_match->startp; |
3828 reg_endp = reg_match->endp; | |
3829 } | |
3830 | |
3831 /* Be paranoid... */ | |
3832 if (prog == NULL || line == NULL) | |
3833 { | |
3834 EMSG(_(e_null)); | |
3835 goto theend; | |
3836 } | |
3837 | |
3838 /* Check validity of program. */ | |
3839 if (prog_magic_wrong()) | |
3840 goto theend; | |
3841 | |
410 | 3842 /* If the start column is past the maximum column: no need to try. */ |
3843 if (ireg_maxcol > 0 && col >= ireg_maxcol) | |
3844 goto theend; | |
3845 | |
7 | 3846 /* If pattern contains "\c" or "\C": overrule value of ireg_ic */ |
3847 if (prog->regflags & RF_ICASE) | |
3848 ireg_ic = TRUE; | |
3849 else if (prog->regflags & RF_NOICASE) | |
3850 ireg_ic = FALSE; | |
3851 | |
3852 #ifdef FEAT_MBYTE | |
3853 /* If pattern contains "\Z" overrule value of ireg_icombine */ | |
3854 if (prog->regflags & RF_ICOMBINE) | |
3855 ireg_icombine = TRUE; | |
3856 #endif | |
3857 | |
3858 /* If there is a "must appear" string, look for it. */ | |
3859 if (prog->regmust != NULL) | |
3860 { | |
3861 int c; | |
3862 | |
3863 #ifdef FEAT_MBYTE | |
3864 if (has_mbyte) | |
3865 c = (*mb_ptr2char)(prog->regmust); | |
3866 else | |
3867 #endif | |
3868 c = *prog->regmust; | |
3869 s = line + col; | |
170 | 3870 |
3871 /* | |
3872 * This is used very often, esp. for ":global". Use three versions of | |
3873 * the loop to avoid overhead of conditions. | |
3874 */ | |
3875 if (!ireg_ic | |
3876 #ifdef FEAT_MBYTE | |
3877 && !has_mbyte | |
3878 #endif | |
3879 ) | |
3880 while ((s = vim_strbyte(s, c)) != NULL) | |
3881 { | |
3882 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | |
3883 break; /* Found it. */ | |
3884 ++s; | |
3885 } | |
3886 #ifdef FEAT_MBYTE | |
3887 else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1)) | |
3888 while ((s = vim_strchr(s, c)) != NULL) | |
3889 { | |
3890 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | |
3891 break; /* Found it. */ | |
3892 mb_ptr_adv(s); | |
3893 } | |
3894 #endif | |
3895 else | |
3896 while ((s = cstrchr(s, c)) != NULL) | |
3897 { | |
3898 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | |
3899 break; /* Found it. */ | |
3900 mb_ptr_adv(s); | |
3901 } | |
7 | 3902 if (s == NULL) /* Not present. */ |
3903 goto theend; | |
3904 } | |
3905 | |
3906 regline = line; | |
3907 reglnum = 0; | |
2578 | 3908 reg_toolong = FALSE; |
7 | 3909 |
3910 /* Simplest case: Anchored match need be tried only once. */ | |
3911 if (prog->reganch) | |
3912 { | |
3913 int c; | |
3914 | |
3915 #ifdef FEAT_MBYTE | |
3916 if (has_mbyte) | |
3917 c = (*mb_ptr2char)(regline + col); | |
3918 else | |
3919 #endif | |
3920 c = regline[col]; | |
3921 if (prog->regstart == NUL | |
3922 || prog->regstart == c | |
3923 || (ireg_ic && (( | |
3924 #ifdef FEAT_MBYTE | |
3925 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) | |
3926 || (c < 255 && prog->regstart < 255 && | |
3927 #endif | |
1347 | 3928 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c))))) |
7 | 3929 retval = regtry(prog, col); |
3930 else | |
3931 retval = 0; | |
3932 } | |
3933 else | |
3934 { | |
1521 | 3935 #ifdef FEAT_RELTIME |
3936 int tm_count = 0; | |
3937 #endif | |
7 | 3938 /* Messy cases: unanchored match. */ |
180 | 3939 while (!got_int) |
7 | 3940 { |
3941 if (prog->regstart != NUL) | |
3942 { | |
170 | 3943 /* Skip until the char we know it must start with. |
3944 * Used often, do some work to avoid call overhead. */ | |
3945 if (!ireg_ic | |
3946 #ifdef FEAT_MBYTE | |
3947 && !has_mbyte | |
3948 #endif | |
3949 ) | |
3950 s = vim_strbyte(regline + col, prog->regstart); | |
3951 else | |
3952 s = cstrchr(regline + col, prog->regstart); | |
7 | 3953 if (s == NULL) |
3954 { | |
3955 retval = 0; | |
3956 break; | |
3957 } | |
3958 col = (int)(s - regline); | |
3959 } | |
3960 | |
410 | 3961 /* Check for maximum column to try. */ |
3962 if (ireg_maxcol > 0 && col >= ireg_maxcol) | |
3963 { | |
3964 retval = 0; | |
3965 break; | |
3966 } | |
3967 | |
7 | 3968 retval = regtry(prog, col); |
3969 if (retval > 0) | |
3970 break; | |
3971 | |
3972 /* if not currently on the first line, get it again */ | |
3973 if (reglnum != 0) | |
3974 { | |
481 | 3975 reglnum = 0; |
7 | 3976 regline = reg_getline((linenr_T)0); |
3977 } | |
3978 if (regline[col] == NUL) | |
3979 break; | |
3980 #ifdef FEAT_MBYTE | |
3981 if (has_mbyte) | |
474 | 3982 col += (*mb_ptr2len)(regline + col); |
7 | 3983 else |
3984 #endif | |
3985 ++col; | |
1521 | 3986 #ifdef FEAT_RELTIME |
3987 /* Check for timeout once in a twenty times to avoid overhead. */ | |
3988 if (tm != NULL && ++tm_count == 20) | |
3989 { | |
3990 tm_count = 0; | |
3991 if (profile_passed_limit(tm)) | |
3992 break; | |
3993 } | |
3994 #endif | |
7 | 3995 } |
3996 } | |
3997 | |
3998 theend: | |
1520 | 3999 /* Free "reg_tofree" when it's a bit big. |
4000 * Free regstack and backpos if they are bigger than their initial size. */ | |
4001 if (reg_tofreelen > 400) | |
4002 { | |
4003 vim_free(reg_tofree); | |
4004 reg_tofree = NULL; | |
4005 } | |
4006 if (regstack.ga_maxlen > REGSTACK_INITIAL) | |
4007 ga_clear(®stack); | |
4008 if (backpos.ga_maxlen > BACKPOS_INITIAL) | |
4009 ga_clear(&backpos); | |
270 | 4010 |
7 | 4011 return retval; |
4012 } | |
4013 | |
4014 #ifdef FEAT_SYN_HL | |
4015 static reg_extmatch_T *make_extmatch __ARGS((void)); | |
4016 | |
4017 /* | |
4018 * Create a new extmatch and mark it as referenced once. | |
4019 */ | |
4020 static reg_extmatch_T * | |
4021 make_extmatch() | |
4022 { | |
4023 reg_extmatch_T *em; | |
4024 | |
4025 em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T)); | |
4026 if (em != NULL) | |
4027 em->refcnt = 1; | |
4028 return em; | |
4029 } | |
4030 | |
4031 /* | |
4032 * Add a reference to an extmatch. | |
4033 */ | |
4034 reg_extmatch_T * | |
4035 ref_extmatch(em) | |
4036 reg_extmatch_T *em; | |
4037 { | |
4038 if (em != NULL) | |
4039 em->refcnt++; | |
4040 return em; | |
4041 } | |
4042 | |
4043 /* | |
4044 * Remove a reference to an extmatch. If there are no references left, free | |
4045 * the info. | |
4046 */ | |
4047 void | |
4048 unref_extmatch(em) | |
4049 reg_extmatch_T *em; | |
4050 { | |
4051 int i; | |
4052 | |
4053 if (em != NULL && --em->refcnt <= 0) | |
4054 { | |
4055 for (i = 0; i < NSUBEXP; ++i) | |
4056 vim_free(em->matches[i]); | |
4057 vim_free(em); | |
4058 } | |
4059 } | |
4060 #endif | |
4061 | |
4062 /* | |
4063 * regtry - try match of "prog" with at regline["col"]. | |
4064 * Returns 0 for failure, number of lines contained in the match otherwise. | |
4065 */ | |
4066 static long | |
4067 regtry(prog, col) | |
4444 | 4068 bt_regprog_T *prog; |
7 | 4069 colnr_T col; |
4070 { | |
4071 reginput = regline + col; | |
4072 need_clear_subexpr = TRUE; | |
4073 #ifdef FEAT_SYN_HL | |
4074 /* Clear the external match subpointers if necessary. */ | |
4075 if (prog->reghasz == REX_SET) | |
4076 need_clear_zsubexpr = TRUE; | |
4077 #endif | |
4078 | |
189 | 4079 if (regmatch(prog->program + 1) == 0) |
4080 return 0; | |
4081 | |
4082 cleanup_subexpr(); | |
4083 if (REG_MULTI) | |
7 | 4084 { |
189 | 4085 if (reg_startpos[0].lnum < 0) |
7 | 4086 { |
189 | 4087 reg_startpos[0].lnum = 0; |
4088 reg_startpos[0].col = col; | |
4089 } | |
4090 if (reg_endpos[0].lnum < 0) | |
4091 { | |
4092 reg_endpos[0].lnum = reglnum; | |
4093 reg_endpos[0].col = (int)(reginput - regline); | |
7 | 4094 } |
4095 else | |
189 | 4096 /* Use line number of "\ze". */ |
4097 reglnum = reg_endpos[0].lnum; | |
4098 } | |
4099 else | |
4100 { | |
4101 if (reg_startp[0] == NULL) | |
4102 reg_startp[0] = regline + col; | |
4103 if (reg_endp[0] == NULL) | |
4104 reg_endp[0] = reginput; | |
4105 } | |
7 | 4106 #ifdef FEAT_SYN_HL |
189 | 4107 /* Package any found \z(...\) matches for export. Default is none. */ |
4108 unref_extmatch(re_extmatch_out); | |
4109 re_extmatch_out = NULL; | |
4110 | |
4111 if (prog->reghasz == REX_SET) | |
4112 { | |
4113 int i; | |
4114 | |
4115 cleanup_zsubexpr(); | |
4116 re_extmatch_out = make_extmatch(); | |
4117 for (i = 0; i < NSUBEXP; i++) | |
7 | 4118 { |
189 | 4119 if (REG_MULTI) |
7 | 4120 { |
189 | 4121 /* Only accept single line matches. */ |
4122 if (reg_startzpos[i].lnum >= 0 | |
5820 | 4123 && reg_endzpos[i].lnum == reg_startzpos[i].lnum |
4124 && reg_endzpos[i].col >= reg_startzpos[i].col) | |
189 | 4125 re_extmatch_out->matches[i] = |
4126 vim_strnsave(reg_getline(reg_startzpos[i].lnum) | |
7 | 4127 + reg_startzpos[i].col, |
189 | 4128 reg_endzpos[i].col - reg_startzpos[i].col); |
4129 } | |
4130 else | |
4131 { | |
4132 if (reg_startzp[i] != NULL && reg_endzp[i] != NULL) | |
4133 re_extmatch_out->matches[i] = | |
7 | 4134 vim_strnsave(reg_startzp[i], |
189 | 4135 (int)(reg_endzp[i] - reg_startzp[i])); |
7 | 4136 } |
4137 } | |
189 | 4138 } |
7 | 4139 #endif |
189 | 4140 return 1 + reglnum; |
7 | 4141 } |
4142 | |
4143 #ifdef FEAT_MBYTE | |
4144 static int reg_prev_class __ARGS((void)); | |
4145 | |
4146 /* | |
4147 * Get class of previous character. | |
4148 */ | |
4149 static int | |
4150 reg_prev_class() | |
4151 { | |
4152 if (reginput > regline) | |
4069 | 4153 return mb_get_class_buf(reginput - 1 |
4154 - (*mb_head_off)(regline, reginput - 1), reg_buf); | |
7 | 4155 return -1; |
4156 } | |
5735 | 4157 #endif |
4158 | |
4730
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4159 static int reg_match_visual __ARGS((void)); |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4160 |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4161 /* |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4162 * Return TRUE if the current reginput position matches the Visual area. |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4163 */ |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4164 static int |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4165 reg_match_visual() |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4166 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4167 pos_T top, bot; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4168 linenr_T lnum; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4169 colnr_T col; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4170 win_T *wp = reg_win == NULL ? curwin : reg_win; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4171 int mode; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4172 colnr_T start, end; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4173 colnr_T start2, end2; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4174 colnr_T cols; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4175 |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4176 /* Check if the buffer is the current buffer. */ |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4177 if (reg_buf != curbuf || VIsual.lnum == 0) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4178 return FALSE; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4179 |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4180 if (VIsual_active) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4181 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4182 if (lt(VIsual, wp->w_cursor)) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4183 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4184 top = VIsual; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4185 bot = wp->w_cursor; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4186 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4187 else |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4188 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4189 top = wp->w_cursor; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4190 bot = VIsual; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4191 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4192 mode = VIsual_mode; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4193 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4194 else |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4195 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4196 if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end)) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4197 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4198 top = curbuf->b_visual.vi_start; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4199 bot = curbuf->b_visual.vi_end; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4200 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4201 else |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4202 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4203 top = curbuf->b_visual.vi_end; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4204 bot = curbuf->b_visual.vi_start; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4205 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4206 mode = curbuf->b_visual.vi_mode; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4207 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4208 lnum = reglnum + reg_firstlnum; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4209 if (lnum < top.lnum || lnum > bot.lnum) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4210 return FALSE; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4211 |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4212 if (mode == 'v') |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4213 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4214 col = (colnr_T)(reginput - regline); |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4215 if ((lnum == top.lnum && col < top.col) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4216 || (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e'))) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4217 return FALSE; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4218 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4219 else if (mode == Ctrl_V) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4220 { |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4221 getvvcol(wp, &top, &start, NULL, &end); |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4222 getvvcol(wp, &bot, &start2, NULL, &end2); |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4223 if (start2 < start) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4224 start = start2; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4225 if (end2 > end) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4226 end = end2; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4227 if (top.col == MAXCOL || bot.col == MAXCOL) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4228 end = MAXCOL; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4229 cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline)); |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4230 if (cols < start || cols > end - (*p_sel == 'e')) |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4231 return FALSE; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4232 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4233 return TRUE; |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4234 } |
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4235 |
39 | 4236 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput) |
7 | 4237 |
4238 /* | |
4239 * The arguments from BRACE_LIMITS are stored here. They are actually local | |
4240 * to regmatch(), but they are here to reduce the amount of stack space used | |
4241 * (it can be called recursively many times). | |
4242 */ | |
4243 static long bl_minval; | |
4244 static long bl_maxval; | |
4245 | |
4246 /* | |
4247 * regmatch - main matching routine | |
4248 * | |
180 | 4249 * Conceptually the strategy is simple: Check to see whether the current node |
4250 * matches, push an item onto the regstack and loop to see whether the rest | |
4251 * matches, and then act accordingly. In practice we make some effort to | |
4252 * avoid using the regstack, in particular by going through "ordinary" nodes | |
4253 * (that don't need to know whether the rest of the match failed) by a nested | |
4254 * loop. | |
7 | 4255 * |
4256 * Returns TRUE when there is a match. Leaves reginput and reglnum just after | |
4257 * the last matched character. | |
4258 * Returns FALSE when there is no match. Leaves reginput and reglnum in an | |
4259 * undefined state! | |
4260 */ | |
4261 static int | |
180 | 4262 regmatch(scan) |
7 | 4263 char_u *scan; /* Current node. */ |
4264 { | |
180 | 4265 char_u *next; /* Next node. */ |
4266 int op; | |
4267 int c; | |
4268 regitem_T *rp; | |
4269 int no; | |
4270 int status; /* one of the RA_ values: */ | |
4271 #define RA_FAIL 1 /* something failed, abort */ | |
4272 #define RA_CONT 2 /* continue in inner loop */ | |
4273 #define RA_BREAK 3 /* break inner loop */ | |
4274 #define RA_MATCH 4 /* successful match */ | |
4275 #define RA_NOMATCH 5 /* didn't match */ | |
270 | 4276 |
1520 | 4277 /* Make "regstack" and "backpos" empty. They are allocated and freed in |
4444 | 4278 * bt_regexec_both() to reduce malloc()/free() calls. */ |
270 | 4279 regstack.ga_len = 0; |
4280 backpos.ga_len = 0; | |
233 | 4281 |
180 | 4282 /* |
233 | 4283 * Repeat until "regstack" is empty. |
180 | 4284 */ |
4285 for (;;) | |
4286 { | |
5310 | 4287 /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q". |
4288 * Allow interrupting them with CTRL-C. */ | |
7 | 4289 fast_breakcheck(); |
4290 | |
4291 #ifdef DEBUG | |
4292 if (scan != NULL && regnarrate) | |
4293 { | |
4444 | 4294 mch_errmsg((char *)regprop(scan)); |
7 | 4295 mch_errmsg("(\n"); |
4296 } | |
4297 #endif | |
180 | 4298 |
4299 /* | |
233 | 4300 * Repeat for items that can be matched sequentially, without using the |
180 | 4301 * regstack. |
4302 */ | |
4303 for (;;) | |
7 | 4304 { |
180 | 4305 if (got_int || scan == NULL) |
4306 { | |
4307 status = RA_FAIL; | |
4308 break; | |
4309 } | |
4310 status = RA_CONT; | |
4311 | |
7 | 4312 #ifdef DEBUG |
4313 if (regnarrate) | |
4314 { | |
4444 | 4315 mch_errmsg((char *)regprop(scan)); |
7 | 4316 mch_errmsg("...\n"); |
4317 # ifdef FEAT_SYN_HL | |
4318 if (re_extmatch_in != NULL) | |
4319 { | |
4320 int i; | |
4321 | |
4322 mch_errmsg(_("External submatches:\n")); | |
4323 for (i = 0; i < NSUBEXP; i++) | |
4324 { | |
4325 mch_errmsg(" \""); | |
4326 if (re_extmatch_in->matches[i] != NULL) | |
4444 | 4327 mch_errmsg((char *)re_extmatch_in->matches[i]); |
7 | 4328 mch_errmsg("\"\n"); |
4329 } | |
4330 } | |
4331 # endif | |
4332 } | |
4333 #endif | |
4334 next = regnext(scan); | |
4335 | |
4336 op = OP(scan); | |
4337 /* Check for character class with NL added. */ | |
1018 | 4338 if (!reg_line_lbr && WITH_NL(op) && REG_MULTI |
4339 && *reginput == NUL && reglnum <= reg_maxline) | |
7 | 4340 { |
4341 reg_nextline(); | |
4342 } | |
4343 else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n') | |
4344 { | |
4345 ADVANCE_REGINPUT(); | |
4346 } | |
4347 else | |
4348 { | |
4349 if (WITH_NL(op)) | |
180 | 4350 op -= ADD_NL; |
7 | 4351 #ifdef FEAT_MBYTE |
4352 if (has_mbyte) | |
4353 c = (*mb_ptr2char)(reginput); | |
4354 else | |
4355 #endif | |
4356 c = *reginput; | |
4357 switch (op) | |
4358 { | |
4359 case BOL: | |
4360 if (reginput != regline) | |
180 | 4361 status = RA_NOMATCH; |
7 | 4362 break; |
4363 | |
4364 case EOL: | |
4365 if (c != NUL) | |
180 | 4366 status = RA_NOMATCH; |
7 | 4367 break; |
4368 | |
4369 case RE_BOF: | |
1458 | 4370 /* We're not at the beginning of the file when below the first |
4371 * line where we started, not at the start of the line or we | |
4372 * didn't start at the first line of the buffer. */ | |
7 | 4373 if (reglnum != 0 || reginput != regline |
1458 | 4374 || (REG_MULTI && reg_firstlnum > 1)) |
180 | 4375 status = RA_NOMATCH; |
7 | 4376 break; |
4377 | |
4378 case RE_EOF: | |
4379 if (reglnum != reg_maxline || c != NUL) | |
180 | 4380 status = RA_NOMATCH; |
7 | 4381 break; |
4382 | |
4383 case CURSOR: | |
4384 /* Check if the buffer is in a window and compare the | |
4385 * reg_win->w_cursor position to the match position. */ | |
4386 if (reg_win == NULL | |
4387 || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum) | |
4388 || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col)) | |
180 | 4389 status = RA_NOMATCH; |
7 | 4390 break; |
4391 | |
639 | 4392 case RE_MARK: |
4732
0798b096bab3
updated for version 7.3.1113
Bram Moolenaar <bram@vim.org>
parents:
4730
diff
changeset
|
4393 /* Compare the mark position to the match position. */ |
639 | 4394 { |
4395 int mark = OPERAND(scan)[0]; | |
4396 int cmp = OPERAND(scan)[1]; | |
4397 pos_T *pos; | |
4398 | |
4043 | 4399 pos = getmark_buf(reg_buf, mark, FALSE); |
1148 | 4400 if (pos == NULL /* mark doesn't exist */ |
4732
0798b096bab3
updated for version 7.3.1113
Bram Moolenaar <bram@vim.org>
parents:
4730
diff
changeset
|
4401 || pos->lnum <= 0 /* mark isn't set in reg_buf */ |
639 | 4402 || (pos->lnum == reglnum + reg_firstlnum |
4403 ? (pos->col == (colnr_T)(reginput - regline) | |
4404 ? (cmp == '<' || cmp == '>') | |
4405 : (pos->col < (colnr_T)(reginput - regline) | |
4406 ? cmp != '>' | |
4407 : cmp != '<')) | |
4408 : (pos->lnum < reglnum + reg_firstlnum | |
4409 ? cmp != '>' | |
4410 : cmp != '<'))) | |
4411 status = RA_NOMATCH; | |
4412 } | |
4413 break; | |
4414 | |
4415 case RE_VISUAL: | |
4730
749e2b2755d5
updated for version 7.3.1112
Bram Moolenaar <bram@vim.org>
parents:
4720
diff
changeset
|
4416 if (!reg_match_visual()) |
639 | 4417 status = RA_NOMATCH; |
4418 break; | |
4419 | |
7 | 4420 case RE_LNUM: |
4421 if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum), | |
4422 scan)) | |
180 | 4423 status = RA_NOMATCH; |
7 | 4424 break; |
4425 | |
4426 case RE_COL: | |
4427 if (!re_num_cmp((long_u)(reginput - regline) + 1, scan)) | |
180 | 4428 status = RA_NOMATCH; |
7 | 4429 break; |
4430 | |
4431 case RE_VCOL: | |
4432 if (!re_num_cmp((long_u)win_linetabsize( | |
4433 reg_win == NULL ? curwin : reg_win, | |
4434 regline, (colnr_T)(reginput - regline)) + 1, scan)) | |
180 | 4435 status = RA_NOMATCH; |
7 | 4436 break; |
4437 | |
4438 case BOW: /* \<word; reginput points to w */ | |
4439 if (c == NUL) /* Can't match at end of line */ | |
180 | 4440 status = RA_NOMATCH; |
7 | 4441 #ifdef FEAT_MBYTE |
180 | 4442 else if (has_mbyte) |
7 | 4443 { |
4444 int this_class; | |
4445 | |
4446 /* Get class of current and previous char (if it exists). */ | |
4069 | 4447 this_class = mb_get_class_buf(reginput, reg_buf); |
7 | 4448 if (this_class <= 1) |
180 | 4449 status = RA_NOMATCH; /* not on a word at all */ |
4450 else if (reg_prev_class() == this_class) | |
4451 status = RA_NOMATCH; /* previous char is in same word */ | |
7 | 4452 } |
4453 #endif | |
4454 else | |
4455 { | |
4061 | 4456 if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline |
4457 && vim_iswordc_buf(reginput[-1], reg_buf))) | |
180 | 4458 status = RA_NOMATCH; |
7 | 4459 } |
4460 break; | |
4461 | |
4462 case EOW: /* word\>; reginput points after d */ | |
4463 if (reginput == regline) /* Can't match at start of line */ | |
180 | 4464 status = RA_NOMATCH; |
7 | 4465 #ifdef FEAT_MBYTE |
180 | 4466 else if (has_mbyte) |
7 | 4467 { |
4468 int this_class, prev_class; | |
4469 | |
4470 /* Get class of current and previous char (if it exists). */ | |
4069 | 4471 this_class = mb_get_class_buf(reginput, reg_buf); |
7 | 4472 prev_class = reg_prev_class(); |
180 | 4473 if (this_class == prev_class |
4474 || prev_class == 0 || prev_class == 1) | |
4475 status = RA_NOMATCH; | |
7 | 4476 } |
180 | 4477 #endif |
7 | 4478 else |
4479 { | |
4043 | 4480 if (!vim_iswordc_buf(reginput[-1], reg_buf) |
4481 || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) | |
180 | 4482 status = RA_NOMATCH; |
7 | 4483 } |
4484 break; /* Matched with EOW */ | |
4485 | |
4486 case ANY: | |
4084 | 4487 /* ANY does not match new lines. */ |
7 | 4488 if (c == NUL) |
180 | 4489 status = RA_NOMATCH; |
4490 else | |
4491 ADVANCE_REGINPUT(); | |
7 | 4492 break; |
4493 | |
4494 case IDENT: | |
4495 if (!vim_isIDc(c)) | |
180 | 4496 status = RA_NOMATCH; |
4497 else | |
4498 ADVANCE_REGINPUT(); | |
7 | 4499 break; |
4500 | |
4501 case SIDENT: | |
4502 if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c)) | |
180 | 4503 status = RA_NOMATCH; |
4504 else | |
4505 ADVANCE_REGINPUT(); | |
7 | 4506 break; |
4507 | |
4508 case KWORD: | |
4069 | 4509 if (!vim_iswordp_buf(reginput, reg_buf)) |
180 | 4510 status = RA_NOMATCH; |
4511 else | |
4512 ADVANCE_REGINPUT(); | |
7 | 4513 break; |
4514 | |
4515 case SKWORD: | |
4069 | 4516 if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf)) |
180 | 4517 status = RA_NOMATCH; |
4518 else | |
4519 ADVANCE_REGINPUT(); | |
7 | 4520 break; |
4521 | |
4522 case FNAME: | |
4523 if (!vim_isfilec(c)) | |
180 | 4524 status = RA_NOMATCH; |
4525 else | |
4526 ADVANCE_REGINPUT(); | |
7 | 4527 break; |
4528 | |
4529 case SFNAME: | |
4530 if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c)) | |
180 | 4531 status = RA_NOMATCH; |
4532 else | |
4533 ADVANCE_REGINPUT(); | |
7 | 4534 break; |
4535 | |
4536 case PRINT: | |
5221
9982ec574beb
updated for version 7.4a.036
Bram Moolenaar <bram@vim.org>
parents:
4901
diff
changeset
|
4537 if (!vim_isprintc(PTR2CHAR(reginput))) |
180 | 4538 status = RA_NOMATCH; |
4539 else | |
4540 ADVANCE_REGINPUT(); | |
7 | 4541 break; |
4542 | |
4543 case SPRINT: | |
5221
9982ec574beb
updated for version 7.4a.036
Bram Moolenaar <bram@vim.org>
parents:
4901
diff
changeset
|
4544 if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput))) |
180 | 4545 status = RA_NOMATCH; |
4546 else | |
4547 ADVANCE_REGINPUT(); | |
7 | 4548 break; |
4549 | |
4550 case WHITE: | |
4551 if (!vim_iswhite(c)) | |
180 | 4552 status = RA_NOMATCH; |
4553 else | |
4554 ADVANCE_REGINPUT(); | |
7 | 4555 break; |
4556 | |
4557 case NWHITE: | |
4558 if (c == NUL || vim_iswhite(c)) | |
180 | 4559 status = RA_NOMATCH; |
4560 else | |
4561 ADVANCE_REGINPUT(); | |
7 | 4562 break; |
4563 | |
4564 case DIGIT: | |
4565 if (!ri_digit(c)) | |
180 | 4566 status = RA_NOMATCH; |
4567 else | |
4568 ADVANCE_REGINPUT(); | |
7 | 4569 break; |
4570 | |
4571 case NDIGIT: | |
4572 if (c == NUL || ri_digit(c)) | |
180 | 4573 status = RA_NOMATCH; |
4574 else | |
4575 ADVANCE_REGINPUT(); | |
7 | 4576 break; |
4577 | |
4578 case HEX: | |
4579 if (!ri_hex(c)) | |
180 | 4580 status = RA_NOMATCH; |
4581 else | |
4582 ADVANCE_REGINPUT(); | |
7 | 4583 break; |
4584 | |
4585 case NHEX: | |
4586 if (c == NUL || ri_hex(c)) | |
180 | 4587 status = RA_NOMATCH; |
4588 else | |
4589 ADVANCE_REGINPUT(); | |
7 | 4590 break; |
4591 | |
4592 case OCTAL: | |
4593 if (!ri_octal(c)) | |
180 | 4594 status = RA_NOMATCH; |
4595 else | |
4596 ADVANCE_REGINPUT(); | |
7 | 4597 break; |
4598 | |
4599 case NOCTAL: | |
4600 if (c == NUL || ri_octal(c)) | |
180 | 4601 status = RA_NOMATCH; |
4602 else | |
4603 ADVANCE_REGINPUT(); | |
7 | 4604 break; |
4605 | |
4606 case WORD: | |
4607 if (!ri_word(c)) | |
180 | 4608 status = RA_NOMATCH; |
4609 else | |
4610 ADVANCE_REGINPUT(); | |
7 | 4611 break; |
4612 | |
4613 case NWORD: | |
4614 if (c == NUL || ri_word(c)) | |
180 | 4615 status = RA_NOMATCH; |
4616 else | |
4617 ADVANCE_REGINPUT(); | |
7 | 4618 break; |
4619 | |
4620 case HEAD: | |
4621 if (!ri_head(c)) | |
180 | 4622 status = RA_NOMATCH; |
4623 else | |
4624 ADVANCE_REGINPUT(); | |
7 | 4625 break; |
4626 | |
4627 case NHEAD: | |
4628 if (c == NUL || ri_head(c)) | |
180 | 4629 status = RA_NOMATCH; |
4630 else | |
4631 ADVANCE_REGINPUT(); | |
7 | 4632 break; |
4633 | |
4634 case ALPHA: | |
4635 if (!ri_alpha(c)) | |
180 | 4636 status = RA_NOMATCH; |
4637 else | |
4638 ADVANCE_REGINPUT(); | |
7 | 4639 break; |
4640 | |
4641 case NALPHA: | |
4642 if (c == NUL || ri_alpha(c)) | |
180 | 4643 status = RA_NOMATCH; |
4644 else | |
4645 ADVANCE_REGINPUT(); | |
7 | 4646 break; |
4647 | |
4648 case LOWER: | |
4649 if (!ri_lower(c)) | |
180 | 4650 status = RA_NOMATCH; |
4651 else | |
4652 ADVANCE_REGINPUT(); | |
7 | 4653 break; |
4654 | |
4655 case NLOWER: | |
4656 if (c == NUL || ri_lower(c)) | |
180 | 4657 status = RA_NOMATCH; |
4658 else | |
4659 ADVANCE_REGINPUT(); | |
7 | 4660 break; |
4661 | |
4662 case UPPER: | |
4663 if (!ri_upper(c)) | |
180 | 4664 status = RA_NOMATCH; |
4665 else | |
4666 ADVANCE_REGINPUT(); | |
7 | 4667 break; |
4668 | |
4669 case NUPPER: | |
4670 if (c == NUL || ri_upper(c)) | |
180 | 4671 status = RA_NOMATCH; |
4672 else | |
4673 ADVANCE_REGINPUT(); | |
7 | 4674 break; |
4675 | |
4676 case EXACTLY: | |
4677 { | |
4678 int len; | |
4679 char_u *opnd; | |
4680 | |
4681 opnd = OPERAND(scan); | |
4682 /* Inline the first byte, for speed. */ | |
4683 if (*opnd != *reginput | |
4684 && (!ireg_ic || ( | |
4685 #ifdef FEAT_MBYTE | |
4686 !enc_utf8 && | |
4687 #endif | |
1347 | 4688 MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput)))) |
180 | 4689 status = RA_NOMATCH; |
4690 else if (*opnd == NUL) | |
7 | 4691 { |
4692 /* match empty string always works; happens when "~" is | |
4693 * empty. */ | |
4694 } | |
5899 | 4695 else |
4696 { | |
4697 if (opnd[1] == NUL | |
7 | 4698 #ifdef FEAT_MBYTE |
4699 && !(enc_utf8 && ireg_ic) | |
4700 #endif | |
4701 ) | |
5899 | 4702 { |
4703 len = 1; /* matched a single byte above */ | |
4704 } | |
4705 else | |
4706 { | |
4707 /* Need to match first byte again for multi-byte. */ | |
4708 len = (int)STRLEN(opnd); | |
4709 if (cstrncmp(opnd, reginput, &len) != 0) | |
4710 status = RA_NOMATCH; | |
4711 } | |
7 | 4712 #ifdef FEAT_MBYTE |
4713 /* Check for following composing character. */ | |
5899 | 4714 if (status != RA_NOMATCH |
4715 && enc_utf8 | |
4716 && UTF_COMPOSINGLIKE(reginput, reginput + len) | |
4717 && !ireg_icombine) | |
7 | 4718 { |
4719 /* raaron: This code makes a composing character get | |
4720 * ignored, which is the correct behavior (sometimes) | |
4721 * for voweled Hebrew texts. */ | |
5899 | 4722 status = RA_NOMATCH; |
7 | 4723 } |
180 | 4724 #endif |
5899 | 4725 if (status != RA_NOMATCH) |
180 | 4726 reginput += len; |
7 | 4727 } |
4728 } | |
4729 break; | |
4730 | |
4731 case ANYOF: | |
4732 case ANYBUT: | |
4733 if (c == NUL) | |
180 | 4734 status = RA_NOMATCH; |
4735 else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF)) | |
4736 status = RA_NOMATCH; | |
4737 else | |
4738 ADVANCE_REGINPUT(); | |
7 | 4739 break; |
4740 | |
4741 #ifdef FEAT_MBYTE | |
4742 case MULTIBYTECODE: | |
4743 if (has_mbyte) | |
4744 { | |
4745 int i, len; | |
4746 char_u *opnd; | |
944 | 4747 int opndc = 0, inpc; |
7 | 4748 |
4749 opnd = OPERAND(scan); | |
4750 /* Safety check (just in case 'encoding' was changed since | |
4751 * compiling the program). */ | |
474 | 4752 if ((len = (*mb_ptr2len)(opnd)) < 2) |
180 | 4753 { |
4754 status = RA_NOMATCH; | |
4755 break; | |
4756 } | |
714 | 4757 if (enc_utf8) |
4758 opndc = mb_ptr2char(opnd); | |
4759 if (enc_utf8 && utf_iscomposing(opndc)) | |
4760 { | |
4761 /* When only a composing char is given match at any | |
4762 * position where that composing char appears. */ | |
4763 status = RA_NOMATCH; | |
4764 for (i = 0; reginput[i] != NUL; i += utf_char2len(inpc)) | |
180 | 4765 { |
714 | 4766 inpc = mb_ptr2char(reginput + i); |
4767 if (!utf_iscomposing(inpc)) | |
4768 { | |
4769 if (i > 0) | |
4770 break; | |
4771 } | |
4772 else if (opndc == inpc) | |
4773 { | |
4774 /* Include all following composing chars. */ | |
4775 len = i + mb_ptr2len(reginput + i); | |
4776 status = RA_MATCH; | |
4777 break; | |
4778 } | |
180 | 4779 } |
714 | 4780 } |
4781 else | |
4782 for (i = 0; i < len; ++i) | |
4783 if (opnd[i] != reginput[i]) | |
4784 { | |
4785 status = RA_NOMATCH; | |
4786 break; | |
4787 } | |
7 | 4788 reginput += len; |
4789 } | |
4790 else | |
180 | 4791 status = RA_NOMATCH; |
7 | 4792 break; |
4793 #endif | |
4794 | |
4795 case NOTHING: | |
4796 break; | |
4797 | |
4798 case BACK: | |
233 | 4799 { |
4800 int i; | |
4801 backpos_T *bp; | |
4802 | |
4803 /* | |
4804 * When we run into BACK we need to check if we don't keep | |
4805 * looping without matching any input. The second and later | |
4806 * times a BACK is encountered it fails if the input is still | |
4807 * at the same position as the previous time. | |
4808 * The positions are stored in "backpos" and found by the | |
4809 * current value of "scan", the position in the RE program. | |
4810 */ | |
4811 bp = (backpos_T *)backpos.ga_data; | |
4812 for (i = 0; i < backpos.ga_len; ++i) | |
4813 if (bp[i].bp_scan == scan) | |
4814 break; | |
4815 if (i == backpos.ga_len) | |
4816 { | |
4817 /* First time at this BACK, make room to store the pos. */ | |
4818 if (ga_grow(&backpos, 1) == FAIL) | |
4819 status = RA_FAIL; | |
4820 else | |
4821 { | |
4822 /* get "ga_data" again, it may have changed */ | |
4823 bp = (backpos_T *)backpos.ga_data; | |
4824 bp[i].bp_scan = scan; | |
4825 ++backpos.ga_len; | |
4826 } | |
4827 } | |
4828 else if (reg_save_equal(&bp[i].bp_pos)) | |
4829 /* Still at same position as last time, fail. */ | |
4830 status = RA_NOMATCH; | |
4831 | |
4832 if (status != RA_FAIL && status != RA_NOMATCH) | |
4833 reg_save(&bp[i].bp_pos, &backpos); | |
4834 } | |
179 | 4835 break; |
4836 | |
7 | 4837 case MOPEN + 0: /* Match start: \zs */ |
4838 case MOPEN + 1: /* \( */ | |
4839 case MOPEN + 2: | |
4840 case MOPEN + 3: | |
4841 case MOPEN + 4: | |
4842 case MOPEN + 5: | |
4843 case MOPEN + 6: | |
4844 case MOPEN + 7: | |
4845 case MOPEN + 8: | |
4846 case MOPEN + 9: | |
4847 { | |
4848 no = op - MOPEN; | |
4849 cleanup_subexpr(); | |
270 | 4850 rp = regstack_push(RS_MOPEN, scan); |
180 | 4851 if (rp == NULL) |
4852 status = RA_FAIL; | |
4853 else | |
4854 { | |
4855 rp->rs_no = no; | |
4856 save_se(&rp->rs_un.sesave, ®_startpos[no], | |
4857 ®_startp[no]); | |
4858 /* We simply continue and handle the result when done. */ | |
4859 } | |
7 | 4860 } |
180 | 4861 break; |
7 | 4862 |
4863 case NOPEN: /* \%( */ | |
4864 case NCLOSE: /* \) after \%( */ | |
270 | 4865 if (regstack_push(RS_NOPEN, scan) == NULL) |
180 | 4866 status = RA_FAIL; |
4867 /* We simply continue and handle the result when done. */ | |
4868 break; | |
7 | 4869 |
4870 #ifdef FEAT_SYN_HL | |
4871 case ZOPEN + 1: | |
4872 case ZOPEN + 2: | |
4873 case ZOPEN + 3: | |
4874 case ZOPEN + 4: | |
4875 case ZOPEN + 5: | |
4876 case ZOPEN + 6: | |
4877 case ZOPEN + 7: | |
4878 case ZOPEN + 8: | |
4879 case ZOPEN + 9: | |
4880 { | |
4881 no = op - ZOPEN; | |
4882 cleanup_zsubexpr(); | |
270 | 4883 rp = regstack_push(RS_ZOPEN, scan); |
180 | 4884 if (rp == NULL) |
4885 status = RA_FAIL; | |
4886 else | |
4887 { | |
4888 rp->rs_no = no; | |
4889 save_se(&rp->rs_un.sesave, ®_startzpos[no], | |
4890 ®_startzp[no]); | |
4891 /* We simply continue and handle the result when done. */ | |
4892 } | |
7 | 4893 } |
180 | 4894 break; |
7 | 4895 #endif |
4896 | |
4897 case MCLOSE + 0: /* Match end: \ze */ | |
4898 case MCLOSE + 1: /* \) */ | |
4899 case MCLOSE + 2: | |
4900 case MCLOSE + 3: | |
4901 case MCLOSE + 4: | |
4902 case MCLOSE + 5: | |
4903 case MCLOSE + 6: | |
4904 case MCLOSE + 7: | |
4905 case MCLOSE + 8: | |
4906 case MCLOSE + 9: | |
4907 { | |
4908 no = op - MCLOSE; | |
4909 cleanup_subexpr(); | |
270 | 4910 rp = regstack_push(RS_MCLOSE, scan); |
180 | 4911 if (rp == NULL) |
4912 status = RA_FAIL; | |
4913 else | |
4914 { | |
4915 rp->rs_no = no; | |
4916 save_se(&rp->rs_un.sesave, ®_endpos[no], ®_endp[no]); | |
4917 /* We simply continue and handle the result when done. */ | |
4918 } | |
7 | 4919 } |
180 | 4920 break; |
7 | 4921 |
4922 #ifdef FEAT_SYN_HL | |
4923 case ZCLOSE + 1: /* \) after \z( */ | |
4924 case ZCLOSE + 2: | |
4925 case ZCLOSE + 3: | |
4926 case ZCLOSE + 4: | |
4927 case ZCLOSE + 5: | |
4928 case ZCLOSE + 6: | |
4929 case ZCLOSE + 7: | |
4930 case ZCLOSE + 8: | |
4931 case ZCLOSE + 9: | |
4932 { | |
4933 no = op - ZCLOSE; | |
4934 cleanup_zsubexpr(); | |
270 | 4935 rp = regstack_push(RS_ZCLOSE, scan); |
180 | 4936 if (rp == NULL) |
4937 status = RA_FAIL; | |
4938 else | |
4939 { | |
4940 rp->rs_no = no; | |
4941 save_se(&rp->rs_un.sesave, ®_endzpos[no], | |
4942 ®_endzp[no]); | |
4943 /* We simply continue and handle the result when done. */ | |
4944 } | |
7 | 4945 } |
180 | 4946 break; |
7 | 4947 #endif |
4948 | |
4949 case BACKREF + 1: | |
4950 case BACKREF + 2: | |
4951 case BACKREF + 3: | |
4952 case BACKREF + 4: | |
4953 case BACKREF + 5: | |
4954 case BACKREF + 6: | |
4955 case BACKREF + 7: | |
4956 case BACKREF + 8: | |
4957 case BACKREF + 9: | |
4958 { | |
4959 int len; | |
4960 | |
4961 no = op - BACKREF; | |
4962 cleanup_subexpr(); | |
4963 if (!REG_MULTI) /* Single-line regexp */ | |
4964 { | |
1815 | 4965 if (reg_startp[no] == NULL || reg_endp[no] == NULL) |
7 | 4966 { |
4967 /* Backref was not set: Match an empty string. */ | |
4968 len = 0; | |
4969 } | |
4970 else | |
4971 { | |
4972 /* Compare current input with back-ref in the same | |
4973 * line. */ | |
4974 len = (int)(reg_endp[no] - reg_startp[no]); | |
4975 if (cstrncmp(reg_startp[no], reginput, &len) != 0) | |
180 | 4976 status = RA_NOMATCH; |
7 | 4977 } |
4978 } | |
4979 else /* Multi-line regexp */ | |
4980 { | |
1815 | 4981 if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0) |
7 | 4982 { |
4983 /* Backref was not set: Match an empty string. */ | |
4984 len = 0; | |
4985 } | |
4986 else | |
4987 { | |
4988 if (reg_startpos[no].lnum == reglnum | |
4989 && reg_endpos[no].lnum == reglnum) | |
4990 { | |
4991 /* Compare back-ref within the current line. */ | |
4992 len = reg_endpos[no].col - reg_startpos[no].col; | |
4993 if (cstrncmp(regline + reg_startpos[no].col, | |
4994 reginput, &len) != 0) | |
180 | 4995 status = RA_NOMATCH; |
7 | 4996 } |
4997 else | |
4998 { | |
4999 /* Messy situation: Need to compare between two | |
5000 * lines. */ | |
4901
56fbf60e9476
updated for version 7.3.1196
Bram Moolenaar <bram@vim.org>
parents:
4899
diff
changeset
|
5001 int r = match_with_backref( |
4891
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
5002 reg_startpos[no].lnum, |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
5003 reg_startpos[no].col, |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
5004 reg_endpos[no].lnum, |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
5005 reg_endpos[no].col, |
4899
4837fd61be52
updated for version 7.3.1195
Bram Moolenaar <bram@vim.org>
parents:
4891
diff
changeset
|
5006 &len); |
4901
56fbf60e9476
updated for version 7.3.1196
Bram Moolenaar <bram@vim.org>
parents:
4899
diff
changeset
|
5007 |
56fbf60e9476
updated for version 7.3.1196
Bram Moolenaar <bram@vim.org>
parents:
4899
diff
changeset
|
5008 if (r != RA_MATCH) |
56fbf60e9476
updated for version 7.3.1196
Bram Moolenaar <bram@vim.org>
parents:
4899
diff
changeset
|
5009 status = r; |
7 | 5010 } |
5011 } | |
5012 } | |
5013 | |
5014 /* Matched the backref, skip over it. */ | |
5015 reginput += len; | |
5016 } | |
5017 break; | |
5018 | |
5019 #ifdef FEAT_SYN_HL | |
5020 case ZREF + 1: | |
5021 case ZREF + 2: | |
5022 case ZREF + 3: | |
5023 case ZREF + 4: | |
5024 case ZREF + 5: | |
5025 case ZREF + 6: | |
5026 case ZREF + 7: | |
5027 case ZREF + 8: | |
5028 case ZREF + 9: | |
5029 { | |
5030 int len; | |
5031 | |
5032 cleanup_zsubexpr(); | |
5033 no = op - ZREF; | |
5034 if (re_extmatch_in != NULL | |
5035 && re_extmatch_in->matches[no] != NULL) | |
5036 { | |
5037 len = (int)STRLEN(re_extmatch_in->matches[no]); | |
5038 if (cstrncmp(re_extmatch_in->matches[no], | |
5039 reginput, &len) != 0) | |
180 | 5040 status = RA_NOMATCH; |
5041 else | |
5042 reginput += len; | |
7 | 5043 } |
5044 else | |
5045 { | |
5046 /* Backref was not set: Match an empty string. */ | |
5047 } | |
5048 } | |
5049 break; | |
5050 #endif | |
5051 | |
5052 case BRANCH: | |
5053 { | |
5054 if (OP(next) != BRANCH) /* No choice. */ | |
5055 next = OPERAND(scan); /* Avoid recursion. */ | |
5056 else | |
5057 { | |
270 | 5058 rp = regstack_push(RS_BRANCH, scan); |
180 | 5059 if (rp == NULL) |
5060 status = RA_FAIL; | |
5061 else | |
5062 status = RA_BREAK; /* rest is below */ | |
7 | 5063 } |
5064 } | |
5065 break; | |
5066 | |
5067 case BRACE_LIMITS: | |
5068 { | |
5069 if (OP(next) == BRACE_SIMPLE) | |
5070 { | |
5071 bl_minval = OPERAND_MIN(scan); | |
5072 bl_maxval = OPERAND_MAX(scan); | |
5073 } | |
5074 else if (OP(next) >= BRACE_COMPLEX | |
5075 && OP(next) < BRACE_COMPLEX + 10) | |
5076 { | |
5077 no = OP(next) - BRACE_COMPLEX; | |
5078 brace_min[no] = OPERAND_MIN(scan); | |
5079 brace_max[no] = OPERAND_MAX(scan); | |
5080 brace_count[no] = 0; | |
5081 } | |
5082 else | |
5083 { | |
5084 EMSG(_(e_internal)); /* Shouldn't happen */ | |
180 | 5085 status = RA_FAIL; |
7 | 5086 } |
5087 } | |
5088 break; | |
5089 | |
5090 case BRACE_COMPLEX + 0: | |
5091 case BRACE_COMPLEX + 1: | |
5092 case BRACE_COMPLEX + 2: | |
5093 case BRACE_COMPLEX + 3: | |
5094 case BRACE_COMPLEX + 4: | |
5095 case BRACE_COMPLEX + 5: | |
5096 case BRACE_COMPLEX + 6: | |
5097 case BRACE_COMPLEX + 7: | |
5098 case BRACE_COMPLEX + 8: | |
5099 case BRACE_COMPLEX + 9: | |
5100 { | |
5101 no = op - BRACE_COMPLEX; | |
5102 ++brace_count[no]; | |
5103 | |
5104 /* If not matched enough times yet, try one more */ | |
5105 if (brace_count[no] <= (brace_min[no] <= brace_max[no] | |
180 | 5106 ? brace_min[no] : brace_max[no])) |
7 | 5107 { |
270 | 5108 rp = regstack_push(RS_BRCPLX_MORE, scan); |
180 | 5109 if (rp == NULL) |
5110 status = RA_FAIL; | |
5111 else | |
5112 { | |
5113 rp->rs_no = no; | |
233 | 5114 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5115 next = OPERAND(scan); |
5116 /* We continue and handle the result when done. */ | |
5117 } | |
5118 break; | |
7 | 5119 } |
5120 | |
5121 /* If matched enough times, may try matching some more */ | |
5122 if (brace_min[no] <= brace_max[no]) | |
5123 { | |
5124 /* Range is the normal way around, use longest match */ | |
5125 if (brace_count[no] <= brace_max[no]) | |
5126 { | |
270 | 5127 rp = regstack_push(RS_BRCPLX_LONG, scan); |
180 | 5128 if (rp == NULL) |
5129 status = RA_FAIL; | |
5130 else | |
5131 { | |
5132 rp->rs_no = no; | |
233 | 5133 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5134 next = OPERAND(scan); |
5135 /* We continue and handle the result when done. */ | |
5136 } | |
7 | 5137 } |
5138 } | |
5139 else | |
5140 { | |
5141 /* Range is backwards, use shortest match first */ | |
5142 if (brace_count[no] <= brace_min[no]) | |
5143 { | |
270 | 5144 rp = regstack_push(RS_BRCPLX_SHORT, scan); |
180 | 5145 if (rp == NULL) |
5146 status = RA_FAIL; | |
5147 else | |
5148 { | |
233 | 5149 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5150 /* We continue and handle the result when done. */ |
5151 } | |
7 | 5152 } |
5153 } | |
5154 } | |
5155 break; | |
5156 | |
5157 case BRACE_SIMPLE: | |
5158 case STAR: | |
5159 case PLUS: | |
5160 { | |
180 | 5161 regstar_T rst; |
7 | 5162 |
5163 /* | |
5164 * Lookahead to avoid useless match attempts when we know | |
5165 * what character comes next. | |
5166 */ | |
5167 if (OP(next) == EXACTLY) | |
5168 { | |
180 | 5169 rst.nextb = *OPERAND(next); |
7 | 5170 if (ireg_ic) |
5171 { | |
1347 | 5172 if (MB_ISUPPER(rst.nextb)) |
5173 rst.nextb_ic = MB_TOLOWER(rst.nextb); | |
7 | 5174 else |
1347 | 5175 rst.nextb_ic = MB_TOUPPER(rst.nextb); |
7 | 5176 } |
5177 else | |
180 | 5178 rst.nextb_ic = rst.nextb; |
7 | 5179 } |
5180 else | |
5181 { | |
180 | 5182 rst.nextb = NUL; |
5183 rst.nextb_ic = NUL; | |
7 | 5184 } |
5185 if (op != BRACE_SIMPLE) | |
5186 { | |
180 | 5187 rst.minval = (op == STAR) ? 0 : 1; |
5188 rst.maxval = MAX_LIMIT; | |
7 | 5189 } |
5190 else | |
5191 { | |
180 | 5192 rst.minval = bl_minval; |
5193 rst.maxval = bl_maxval; | |
7 | 5194 } |
5195 | |
5196 /* | |
5197 * When maxval > minval, try matching as much as possible, up | |
5198 * to maxval. When maxval < minval, try matching at least the | |
5199 * minimal number (since the range is backwards, that's also | |
5200 * maxval!). | |
5201 */ | |
180 | 5202 rst.count = regrepeat(OPERAND(scan), rst.maxval); |
7 | 5203 if (got_int) |
180 | 5204 { |
5205 status = RA_FAIL; | |
5206 break; | |
5207 } | |
5208 if (rst.minval <= rst.maxval | |
5209 ? rst.count >= rst.minval : rst.count >= rst.maxval) | |
7 | 5210 { |
180 | 5211 /* It could match. Prepare for trying to match what |
5212 * follows. The code is below. Parameters are stored in | |
5213 * a regstar_T on the regstack. */ | |
212 | 5214 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) |
189 | 5215 { |
5216 EMSG(_(e_maxmempat)); | |
5217 status = RA_FAIL; | |
5218 } | |
5219 else if (ga_grow(®stack, sizeof(regstar_T)) == FAIL) | |
180 | 5220 status = RA_FAIL; |
5221 else | |
7 | 5222 { |
180 | 5223 regstack.ga_len += sizeof(regstar_T); |
270 | 5224 rp = regstack_push(rst.minval <= rst.maxval |
233 | 5225 ? RS_STAR_LONG : RS_STAR_SHORT, scan); |
180 | 5226 if (rp == NULL) |
5227 status = RA_FAIL; | |
5228 else | |
7 | 5229 { |
180 | 5230 *(((regstar_T *)rp) - 1) = rst; |
5231 status = RA_BREAK; /* skip the restore bits */ | |
7 | 5232 } |
5233 } | |
5234 } | |
5235 else | |
180 | 5236 status = RA_NOMATCH; |
5237 | |
7 | 5238 } |
180 | 5239 break; |
7 | 5240 |
5241 case NOMATCH: | |
5242 case MATCH: | |
5243 case SUBPAT: | |
270 | 5244 rp = regstack_push(RS_NOMATCH, scan); |
180 | 5245 if (rp == NULL) |
5246 status = RA_FAIL; | |
5247 else | |
7 | 5248 { |
180 | 5249 rp->rs_no = op; |
233 | 5250 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5251 next = OPERAND(scan); |
5252 /* We continue and handle the result when done. */ | |
7 | 5253 } |
5254 break; | |
5255 | |
5256 case BEHIND: | |
5257 case NOBEHIND: | |
180 | 5258 /* Need a bit of room to store extra positions. */ |
212 | 5259 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) |
189 | 5260 { |
5261 EMSG(_(e_maxmempat)); | |
5262 status = RA_FAIL; | |
5263 } | |
5264 else if (ga_grow(®stack, sizeof(regbehind_T)) == FAIL) | |
180 | 5265 status = RA_FAIL; |
5266 else | |
7 | 5267 { |
180 | 5268 regstack.ga_len += sizeof(regbehind_T); |
270 | 5269 rp = regstack_push(RS_BEHIND1, scan); |
180 | 5270 if (rp == NULL) |
5271 status = RA_FAIL; | |
5272 else | |
7 | 5273 { |
1579 | 5274 /* Need to save the subexpr to be able to restore them |
5275 * when there is a match but we don't use it. */ | |
5276 save_subexpr(((regbehind_T *)rp) - 1); | |
5277 | |
180 | 5278 rp->rs_no = op; |
233 | 5279 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5280 /* First try if what follows matches. If it does then we |
5281 * check the behind match by looping. */ | |
7 | 5282 } |
5283 } | |
180 | 5284 break; |
7 | 5285 |
5286 case BHPOS: | |
5287 if (REG_MULTI) | |
5288 { | |
5289 if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline) | |
5290 || behind_pos.rs_u.pos.lnum != reglnum) | |
180 | 5291 status = RA_NOMATCH; |
7 | 5292 } |
5293 else if (behind_pos.rs_u.ptr != reginput) | |
180 | 5294 status = RA_NOMATCH; |
7 | 5295 break; |
5296 | |
5297 case NEWL: | |
1018 | 5298 if ((c != NUL || !REG_MULTI || reglnum > reg_maxline |
5299 || reg_line_lbr) && (c != '\n' || !reg_line_lbr)) | |
180 | 5300 status = RA_NOMATCH; |
5301 else if (reg_line_lbr) | |
7 | 5302 ADVANCE_REGINPUT(); |
5303 else | |
5304 reg_nextline(); | |
5305 break; | |
5306 | |
5307 case END: | |
180 | 5308 status = RA_MATCH; /* Success! */ |
5309 break; | |
7 | 5310 |
5311 default: | |
5312 EMSG(_(e_re_corr)); | |
5313 #ifdef DEBUG | |
5314 printf("Illegal op code %d\n", op); | |
5315 #endif | |
180 | 5316 status = RA_FAIL; |
5317 break; | |
7 | 5318 } |
5319 } | |
5320 | |
180 | 5321 /* If we can't continue sequentially, break the inner loop. */ |
5322 if (status != RA_CONT) | |
5323 break; | |
5324 | |
5325 /* Continue in inner loop, advance to next item. */ | |
7 | 5326 scan = next; |
180 | 5327 |
5328 } /* end of inner loop */ | |
7 | 5329 |
5330 /* | |
180 | 5331 * If there is something on the regstack execute the code for the state. |
233 | 5332 * If the state is popped then loop and use the older state. |
7 | 5333 */ |
180 | 5334 while (regstack.ga_len > 0 && status != RA_FAIL) |
5335 { | |
5336 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; | |
5337 switch (rp->rs_state) | |
5338 { | |
5339 case RS_NOPEN: | |
5340 /* Result is passed on as-is, simply pop the state. */ | |
270 | 5341 regstack_pop(&scan); |
180 | 5342 break; |
5343 | |
5344 case RS_MOPEN: | |
5345 /* Pop the state. Restore pointers when there is no match. */ | |
5346 if (status == RA_NOMATCH) | |
5347 restore_se(&rp->rs_un.sesave, ®_startpos[rp->rs_no], | |
5348 ®_startp[rp->rs_no]); | |
270 | 5349 regstack_pop(&scan); |
180 | 5350 break; |
5351 | |
5352 #ifdef FEAT_SYN_HL | |
5353 case RS_ZOPEN: | |
5354 /* Pop the state. Restore pointers when there is no match. */ | |
5355 if (status == RA_NOMATCH) | |
5356 restore_se(&rp->rs_un.sesave, ®_startzpos[rp->rs_no], | |
5357 ®_startzp[rp->rs_no]); | |
270 | 5358 regstack_pop(&scan); |
180 | 5359 break; |
5360 #endif | |
5361 | |
5362 case RS_MCLOSE: | |
5363 /* Pop the state. Restore pointers when there is no match. */ | |
5364 if (status == RA_NOMATCH) | |
5365 restore_se(&rp->rs_un.sesave, ®_endpos[rp->rs_no], | |
5366 ®_endp[rp->rs_no]); | |
270 | 5367 regstack_pop(&scan); |
180 | 5368 break; |
5369 | |
5370 #ifdef FEAT_SYN_HL | |
5371 case RS_ZCLOSE: | |
5372 /* Pop the state. Restore pointers when there is no match. */ | |
5373 if (status == RA_NOMATCH) | |
5374 restore_se(&rp->rs_un.sesave, ®_endzpos[rp->rs_no], | |
5375 ®_endzp[rp->rs_no]); | |
270 | 5376 regstack_pop(&scan); |
180 | 5377 break; |
5378 #endif | |
5379 | |
5380 case RS_BRANCH: | |
5381 if (status == RA_MATCH) | |
5382 /* this branch matched, use it */ | |
270 | 5383 regstack_pop(&scan); |
180 | 5384 else |
5385 { | |
5386 if (status != RA_BREAK) | |
5387 { | |
5388 /* After a non-matching branch: try next one. */ | |
233 | 5389 reg_restore(&rp->rs_un.regsave, &backpos); |
180 | 5390 scan = rp->rs_scan; |
5391 } | |
5392 if (scan == NULL || OP(scan) != BRANCH) | |
5393 { | |
5394 /* no more branches, didn't find a match */ | |
5395 status = RA_NOMATCH; | |
270 | 5396 regstack_pop(&scan); |
180 | 5397 } |
5398 else | |
5399 { | |
5400 /* Prepare to try a branch. */ | |
5401 rp->rs_scan = regnext(scan); | |
233 | 5402 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5403 scan = OPERAND(scan); |
5404 } | |
5405 } | |
5406 break; | |
5407 | |
5408 case RS_BRCPLX_MORE: | |
5409 /* Pop the state. Restore pointers when there is no match. */ | |
5410 if (status == RA_NOMATCH) | |
5411 { | |
233 | 5412 reg_restore(&rp->rs_un.regsave, &backpos); |
180 | 5413 --brace_count[rp->rs_no]; /* decrement match count */ |
5414 } | |
270 | 5415 regstack_pop(&scan); |
180 | 5416 break; |
5417 | |
5418 case RS_BRCPLX_LONG: | |
5419 /* Pop the state. Restore pointers when there is no match. */ | |
5420 if (status == RA_NOMATCH) | |
5421 { | |
5422 /* There was no match, but we did find enough matches. */ | |
233 | 5423 reg_restore(&rp->rs_un.regsave, &backpos); |
180 | 5424 --brace_count[rp->rs_no]; |
5425 /* continue with the items after "\{}" */ | |
5426 status = RA_CONT; | |
5427 } | |
270 | 5428 regstack_pop(&scan); |
180 | 5429 if (status == RA_CONT) |
5430 scan = regnext(scan); | |
5431 break; | |
5432 | |
5433 case RS_BRCPLX_SHORT: | |
5434 /* Pop the state. Restore pointers when there is no match. */ | |
5435 if (status == RA_NOMATCH) | |
5436 /* There was no match, try to match one more item. */ | |
233 | 5437 reg_restore(&rp->rs_un.regsave, &backpos); |
270 | 5438 regstack_pop(&scan); |
180 | 5439 if (status == RA_NOMATCH) |
5440 { | |
5441 scan = OPERAND(scan); | |
5442 status = RA_CONT; | |
5443 } | |
5444 break; | |
5445 | |
5446 case RS_NOMATCH: | |
5447 /* Pop the state. If the operand matches for NOMATCH or | |
5448 * doesn't match for MATCH/SUBPAT, we fail. Otherwise backup, | |
5449 * except for SUBPAT, and continue with the next item. */ | |
5450 if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH)) | |
5451 status = RA_NOMATCH; | |
5452 else | |
5453 { | |
5454 status = RA_CONT; | |
233 | 5455 if (rp->rs_no != SUBPAT) /* zero-width */ |
5456 reg_restore(&rp->rs_un.regsave, &backpos); | |
180 | 5457 } |
270 | 5458 regstack_pop(&scan); |
180 | 5459 if (status == RA_CONT) |
5460 scan = regnext(scan); | |
5461 break; | |
5462 | |
5463 case RS_BEHIND1: | |
5464 if (status == RA_NOMATCH) | |
5465 { | |
270 | 5466 regstack_pop(&scan); |
180 | 5467 regstack.ga_len -= sizeof(regbehind_T); |
5468 } | |
5469 else | |
5470 { | |
5471 /* The stuff after BEHIND/NOBEHIND matches. Now try if | |
5472 * the behind part does (not) match before the current | |
5473 * position in the input. This must be done at every | |
5474 * position in the input and checking if the match ends at | |
5475 * the current position. */ | |
5476 | |
5477 /* save the position after the found match for next */ | |
233 | 5478 reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); |
180 | 5479 |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5480 /* Start looking for a match with operand at the current |
1209 | 5481 * position. Go back one character until we find the |
180 | 5482 * result, hitting the start of the line or the previous |
5483 * line (for multi-line matching). | |
5484 * Set behind_pos to where the match should end, BHPOS | |
5485 * will match it. Save the current value. */ | |
5486 (((regbehind_T *)rp) - 1)->save_behind = behind_pos; | |
5487 behind_pos = rp->rs_un.regsave; | |
5488 | |
5489 rp->rs_state = RS_BEHIND2; | |
5490 | |
233 | 5491 reg_restore(&rp->rs_un.regsave, &backpos); |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5492 scan = OPERAND(rp->rs_scan) + 4; |
180 | 5493 } |
5494 break; | |
5495 | |
5496 case RS_BEHIND2: | |
5497 /* | |
5498 * Looping for BEHIND / NOBEHIND match. | |
5499 */ | |
5500 if (status == RA_MATCH && reg_save_equal(&behind_pos)) | |
5501 { | |
5502 /* found a match that ends where "next" started */ | |
5503 behind_pos = (((regbehind_T *)rp) - 1)->save_behind; | |
5504 if (rp->rs_no == BEHIND) | |
233 | 5505 reg_restore(&(((regbehind_T *)rp) - 1)->save_after, |
5506 &backpos); | |
180 | 5507 else |
1579 | 5508 { |
5509 /* But we didn't want a match. Need to restore the | |
5510 * subexpr, because what follows matched, so they have | |
5511 * been set. */ | |
180 | 5512 status = RA_NOMATCH; |
1579 | 5513 restore_subexpr(((regbehind_T *)rp) - 1); |
5514 } | |
270 | 5515 regstack_pop(&scan); |
180 | 5516 regstack.ga_len -= sizeof(regbehind_T); |
5517 } | |
5518 else | |
5519 { | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5520 long limit; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5521 |
1579 | 5522 /* No match or a match that doesn't end where we want it: Go |
5523 * back one character. May go to previous line once. */ | |
180 | 5524 no = OK; |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5525 limit = OPERAND_MIN(rp->rs_scan); |
180 | 5526 if (REG_MULTI) |
5527 { | |
4682
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5528 if (limit > 0 |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5529 && ((rp->rs_un.regsave.rs_u.pos.lnum |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5530 < behind_pos.rs_u.pos.lnum |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5531 ? (colnr_T)STRLEN(regline) |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5532 : behind_pos.rs_u.pos.col) |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5533 - rp->rs_un.regsave.rs_u.pos.col >= limit)) |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5534 no = FAIL; |
2f51ee8825db
updated for version 7.3.1088
Bram Moolenaar <bram@vim.org>
parents:
4679
diff
changeset
|
5535 else if (rp->rs_un.regsave.rs_u.pos.col == 0) |
180 | 5536 { |
5537 if (rp->rs_un.regsave.rs_u.pos.lnum | |
5538 < behind_pos.rs_u.pos.lnum | |
5539 || reg_getline( | |
5540 --rp->rs_un.regsave.rs_u.pos.lnum) | |
5541 == NULL) | |
5542 no = FAIL; | |
5543 else | |
5544 { | |
233 | 5545 reg_restore(&rp->rs_un.regsave, &backpos); |
180 | 5546 rp->rs_un.regsave.rs_u.pos.col = |
5547 (colnr_T)STRLEN(regline); | |
5548 } | |
5549 } | |
5550 else | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5551 { |
4176 | 5552 #ifdef FEAT_MBYTE |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5553 if (has_mbyte) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5554 rp->rs_un.regsave.rs_u.pos.col -= |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5555 (*mb_head_off)(regline, regline |
4176 | 5556 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5557 else |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5558 #endif |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5559 --rp->rs_un.regsave.rs_u.pos.col; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5560 } |
180 | 5561 } |
5562 else | |
5563 { | |
5564 if (rp->rs_un.regsave.rs_u.ptr == regline) | |
5565 no = FAIL; | |
5566 else | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5567 { |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5568 mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr); |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5569 if (limit > 0 && (long)(behind_pos.rs_u.ptr |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5570 - rp->rs_un.regsave.rs_u.ptr) > limit) |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5571 no = FAIL; |
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5572 } |
180 | 5573 } |
5574 if (no == OK) | |
5575 { | |
5576 /* Advanced, prepare for finding match again. */ | |
233 | 5577 reg_restore(&rp->rs_un.regsave, &backpos); |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
5578 scan = OPERAND(rp->rs_scan) + 4; |
1579 | 5579 if (status == RA_MATCH) |
5580 { | |
5581 /* We did match, so subexpr may have been changed, | |
5582 * need to restore them for the next try. */ | |
5583 status = RA_NOMATCH; | |
5584 restore_subexpr(((regbehind_T *)rp) - 1); | |
5585 } | |
180 | 5586 } |
5587 else | |
5588 { | |
5589 /* Can't advance. For NOBEHIND that's a match. */ | |
5590 behind_pos = (((regbehind_T *)rp) - 1)->save_behind; | |
5591 if (rp->rs_no == NOBEHIND) | |
5592 { | |
233 | 5593 reg_restore(&(((regbehind_T *)rp) - 1)->save_after, |
5594 &backpos); | |
180 | 5595 status = RA_MATCH; |
5596 } | |
5597 else | |
1579 | 5598 { |
5599 /* We do want a proper match. Need to restore the | |
5600 * subexpr if we had a match, because they may have | |
5601 * been set. */ | |
5602 if (status == RA_MATCH) | |
5603 { | |
5604 status = RA_NOMATCH; | |
5605 restore_subexpr(((regbehind_T *)rp) - 1); | |
5606 } | |
5607 } | |
270 | 5608 regstack_pop(&scan); |
180 | 5609 regstack.ga_len -= sizeof(regbehind_T); |
5610 } | |
5611 } | |
5612 break; | |
5613 | |
5614 case RS_STAR_LONG: | |
5615 case RS_STAR_SHORT: | |
5616 { | |
5617 regstar_T *rst = ((regstar_T *)rp) - 1; | |
5618 | |
5619 if (status == RA_MATCH) | |
5620 { | |
270 | 5621 regstack_pop(&scan); |
180 | 5622 regstack.ga_len -= sizeof(regstar_T); |
5623 break; | |
5624 } | |
5625 | |
5626 /* Tried once already, restore input pointers. */ | |
5627 if (status != RA_BREAK) | |
233 | 5628 reg_restore(&rp->rs_un.regsave, &backpos); |
180 | 5629 |
5630 /* Repeat until we found a position where it could match. */ | |
5631 for (;;) | |
5632 { | |
5633 if (status != RA_BREAK) | |
5634 { | |
5635 /* Tried first position already, advance. */ | |
5636 if (rp->rs_state == RS_STAR_LONG) | |
5637 { | |
685 | 5638 /* Trying for longest match, but couldn't or |
5639 * didn't match -- back up one char. */ | |
180 | 5640 if (--rst->count < rst->minval) |
5641 break; | |
5642 if (reginput == regline) | |
5643 { | |
5644 /* backup to last char of previous line */ | |
5645 --reglnum; | |
5646 regline = reg_getline(reglnum); | |
5647 /* Just in case regrepeat() didn't count | |
5648 * right. */ | |
5649 if (regline == NULL) | |
5650 break; | |
5651 reginput = regline + STRLEN(regline); | |
5652 fast_breakcheck(); | |
5653 } | |
5654 else | |
5655 mb_ptr_back(regline, reginput); | |
5656 } | |
5657 else | |
5658 { | |
5659 /* Range is backwards, use shortest match first. | |
5660 * Careful: maxval and minval are exchanged! | |
5661 * Couldn't or didn't match: try advancing one | |
5662 * char. */ | |
5663 if (rst->count == rst->minval | |
5664 || regrepeat(OPERAND(rp->rs_scan), 1L) == 0) | |
5665 break; | |
5666 ++rst->count; | |
5667 } | |
5668 if (got_int) | |
5669 break; | |
5670 } | |
5671 else | |
5672 status = RA_NOMATCH; | |
5673 | |
5674 /* If it could match, try it. */ | |
5675 if (rst->nextb == NUL || *reginput == rst->nextb | |
5676 || *reginput == rst->nextb_ic) | |
5677 { | |
233 | 5678 reg_save(&rp->rs_un.regsave, &backpos); |
180 | 5679 scan = regnext(rp->rs_scan); |
5680 status = RA_CONT; | |
5681 break; | |
5682 } | |
5683 } | |
5684 if (status != RA_CONT) | |
5685 { | |
5686 /* Failed. */ | |
270 | 5687 regstack_pop(&scan); |
180 | 5688 regstack.ga_len -= sizeof(regstar_T); |
5689 status = RA_NOMATCH; | |
5690 } | |
5691 } | |
5692 break; | |
5693 } | |
5694 | |
685 | 5695 /* If we want to continue the inner loop or didn't pop a state |
5696 * continue matching loop */ | |
180 | 5697 if (status == RA_CONT || rp == (regitem_T *) |
5698 ((char *)regstack.ga_data + regstack.ga_len) - 1) | |
5699 break; | |
5700 } | |
5701 | |
189 | 5702 /* May need to continue with the inner loop, starting at "scan". */ |
180 | 5703 if (status == RA_CONT) |
5704 continue; | |
5705 | |
5706 /* | |
5707 * If the regstack is empty or something failed we are done. | |
5708 */ | |
5709 if (regstack.ga_len == 0 || status == RA_FAIL) | |
5710 { | |
5711 if (scan == NULL) | |
5712 { | |
5713 /* | |
5714 * We get here only if there's trouble -- normally "case END" is | |
5715 * the terminating point. | |
5716 */ | |
5717 EMSG(_(e_re_corr)); | |
7 | 5718 #ifdef DEBUG |
180 | 5719 printf("Premature EOL\n"); |
7 | 5720 #endif |
180 | 5721 } |
189 | 5722 if (status == RA_FAIL) |
5723 got_int = TRUE; | |
180 | 5724 return (status == RA_MATCH); |
5725 } | |
5726 | |
5727 } /* End of loop until the regstack is empty. */ | |
5728 | |
5729 /* NOTREACHED */ | |
5730 } | |
5731 | |
5732 /* | |
5733 * Push an item onto the regstack. | |
5734 * Returns pointer to new item. Returns NULL when out of memory. | |
5735 */ | |
5736 static regitem_T * | |
270 | 5737 regstack_push(state, scan) |
180 | 5738 regstate_T state; |
5739 char_u *scan; | |
5740 { | |
5741 regitem_T *rp; | |
5742 | |
270 | 5743 if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp) |
189 | 5744 { |
5745 EMSG(_(e_maxmempat)); | |
5746 return NULL; | |
5747 } | |
270 | 5748 if (ga_grow(®stack, sizeof(regitem_T)) == FAIL) |
180 | 5749 return NULL; |
5750 | |
270 | 5751 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len); |
180 | 5752 rp->rs_state = state; |
5753 rp->rs_scan = scan; | |
5754 | |
270 | 5755 regstack.ga_len += sizeof(regitem_T); |
180 | 5756 return rp; |
5757 } | |
5758 | |
5759 /* | |
5760 * Pop an item from the regstack. | |
5761 */ | |
5762 static void | |
270 | 5763 regstack_pop(scan) |
180 | 5764 char_u **scan; |
5765 { | |
5766 regitem_T *rp; | |
5767 | |
270 | 5768 rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1; |
180 | 5769 *scan = rp->rs_scan; |
5770 | |
270 | 5771 regstack.ga_len -= sizeof(regitem_T); |
7 | 5772 } |
5773 | |
5774 /* | |
5775 * regrepeat - repeatedly match something simple, return how many. | |
5776 * Advances reginput (and reglnum) to just after the matched chars. | |
5777 */ | |
5778 static int | |
5779 regrepeat(p, maxcount) | |
5780 char_u *p; | |
5781 long maxcount; /* maximum number of matches allowed */ | |
5782 { | |
5783 long count = 0; | |
5784 char_u *scan; | |
5785 char_u *opnd; | |
5786 int mask; | |
5787 int testval = 0; | |
5788 | |
5789 scan = reginput; /* Make local copy of reginput for speed. */ | |
5790 opnd = OPERAND(p); | |
5791 switch (OP(p)) | |
5792 { | |
5793 case ANY: | |
5794 case ANY + ADD_NL: | |
5795 while (count < maxcount) | |
5796 { | |
5797 /* Matching anything means we continue until end-of-line (or | |
5798 * end-of-file for ANY + ADD_NL), only limited by maxcount. */ | |
5799 while (*scan != NUL && count < maxcount) | |
5800 { | |
5801 ++count; | |
39 | 5802 mb_ptr_adv(scan); |
7 | 5803 } |
1018 | 5804 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5805 || reg_line_lbr || count == maxcount) | |
7 | 5806 break; |
5807 ++count; /* count the line-break */ | |
5808 reg_nextline(); | |
5809 scan = reginput; | |
5810 if (got_int) | |
5811 break; | |
5812 } | |
5813 break; | |
5814 | |
5815 case IDENT: | |
5816 case IDENT + ADD_NL: | |
5817 testval = TRUE; | |
5818 /*FALLTHROUGH*/ | |
5819 case SIDENT: | |
5820 case SIDENT + ADD_NL: | |
5821 while (count < maxcount) | |
5822 { | |
4466 | 5823 if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan))) |
7 | 5824 { |
39 | 5825 mb_ptr_adv(scan); |
7 | 5826 } |
5827 else if (*scan == NUL) | |
5828 { | |
1018 | 5829 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5830 || reg_line_lbr) | |
7 | 5831 break; |
5832 reg_nextline(); | |
5833 scan = reginput; | |
5834 if (got_int) | |
5835 break; | |
5836 } | |
5837 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
5838 ++scan; | |
5839 else | |
5840 break; | |
5841 ++count; | |
5842 } | |
5843 break; | |
5844 | |
5845 case KWORD: | |
5846 case KWORD + ADD_NL: | |
5847 testval = TRUE; | |
5848 /*FALLTHROUGH*/ | |
5849 case SKWORD: | |
5850 case SKWORD + ADD_NL: | |
5851 while (count < maxcount) | |
5852 { | |
4069 | 5853 if (vim_iswordp_buf(scan, reg_buf) |
5854 && (testval || !VIM_ISDIGIT(*scan))) | |
7 | 5855 { |
39 | 5856 mb_ptr_adv(scan); |
7 | 5857 } |
5858 else if (*scan == NUL) | |
5859 { | |
1018 | 5860 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5861 || reg_line_lbr) | |
7 | 5862 break; |
5863 reg_nextline(); | |
5864 scan = reginput; | |
5865 if (got_int) | |
5866 break; | |
5867 } | |
5868 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
5869 ++scan; | |
5870 else | |
5871 break; | |
5872 ++count; | |
5873 } | |
5874 break; | |
5875 | |
5876 case FNAME: | |
5877 case FNAME + ADD_NL: | |
5878 testval = TRUE; | |
5879 /*FALLTHROUGH*/ | |
5880 case SFNAME: | |
5881 case SFNAME + ADD_NL: | |
5882 while (count < maxcount) | |
5883 { | |
4466 | 5884 if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan))) |
7 | 5885 { |
39 | 5886 mb_ptr_adv(scan); |
7 | 5887 } |
5888 else if (*scan == NUL) | |
5889 { | |
1018 | 5890 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5891 || reg_line_lbr) | |
7 | 5892 break; |
5893 reg_nextline(); | |
5894 scan = reginput; | |
5895 if (got_int) | |
5896 break; | |
5897 } | |
5898 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
5899 ++scan; | |
5900 else | |
5901 break; | |
5902 ++count; | |
5903 } | |
5904 break; | |
5905 | |
5906 case PRINT: | |
5907 case PRINT + ADD_NL: | |
5908 testval = TRUE; | |
5909 /*FALLTHROUGH*/ | |
5910 case SPRINT: | |
5911 case SPRINT + ADD_NL: | |
5912 while (count < maxcount) | |
5913 { | |
5914 if (*scan == NUL) | |
5915 { | |
1018 | 5916 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5917 || reg_line_lbr) | |
7 | 5918 break; |
5919 reg_nextline(); | |
5920 scan = reginput; | |
5921 if (got_int) | |
5922 break; | |
5923 } | |
5221
9982ec574beb
updated for version 7.4a.036
Bram Moolenaar <bram@vim.org>
parents:
4901
diff
changeset
|
5924 else if (vim_isprintc(PTR2CHAR(scan)) == 1 |
9982ec574beb
updated for version 7.4a.036
Bram Moolenaar <bram@vim.org>
parents:
4901
diff
changeset
|
5925 && (testval || !VIM_ISDIGIT(*scan))) |
7 | 5926 { |
39 | 5927 mb_ptr_adv(scan); |
7 | 5928 } |
5929 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
5930 ++scan; | |
5931 else | |
5932 break; | |
5933 ++count; | |
5934 } | |
5935 break; | |
5936 | |
5937 case WHITE: | |
5938 case WHITE + ADD_NL: | |
5939 testval = mask = RI_WHITE; | |
5940 do_class: | |
5941 while (count < maxcount) | |
5942 { | |
5943 #ifdef FEAT_MBYTE | |
5944 int l; | |
5945 #endif | |
5946 if (*scan == NUL) | |
5947 { | |
1018 | 5948 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
5949 || reg_line_lbr) | |
7 | 5950 break; |
5951 reg_nextline(); | |
5952 scan = reginput; | |
5953 if (got_int) | |
5954 break; | |
5955 } | |
5956 #ifdef FEAT_MBYTE | |
474 | 5957 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) |
7 | 5958 { |
5959 if (testval != 0) | |
5960 break; | |
5961 scan += l; | |
5962 } | |
5963 #endif | |
5964 else if ((class_tab[*scan] & mask) == testval) | |
5965 ++scan; | |
5966 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
5967 ++scan; | |
5968 else | |
5969 break; | |
5970 ++count; | |
5971 } | |
5972 break; | |
5973 | |
5974 case NWHITE: | |
5975 case NWHITE + ADD_NL: | |
5976 mask = RI_WHITE; | |
5977 goto do_class; | |
5978 case DIGIT: | |
5979 case DIGIT + ADD_NL: | |
5980 testval = mask = RI_DIGIT; | |
5981 goto do_class; | |
5982 case NDIGIT: | |
5983 case NDIGIT + ADD_NL: | |
5984 mask = RI_DIGIT; | |
5985 goto do_class; | |
5986 case HEX: | |
5987 case HEX + ADD_NL: | |
5988 testval = mask = RI_HEX; | |
5989 goto do_class; | |
5990 case NHEX: | |
5991 case NHEX + ADD_NL: | |
5992 mask = RI_HEX; | |
5993 goto do_class; | |
5994 case OCTAL: | |
5995 case OCTAL + ADD_NL: | |
5996 testval = mask = RI_OCTAL; | |
5997 goto do_class; | |
5998 case NOCTAL: | |
5999 case NOCTAL + ADD_NL: | |
6000 mask = RI_OCTAL; | |
6001 goto do_class; | |
6002 case WORD: | |
6003 case WORD + ADD_NL: | |
6004 testval = mask = RI_WORD; | |
6005 goto do_class; | |
6006 case NWORD: | |
6007 case NWORD + ADD_NL: | |
6008 mask = RI_WORD; | |
6009 goto do_class; | |
6010 case HEAD: | |
6011 case HEAD + ADD_NL: | |
6012 testval = mask = RI_HEAD; | |
6013 goto do_class; | |
6014 case NHEAD: | |
6015 case NHEAD + ADD_NL: | |
6016 mask = RI_HEAD; | |
6017 goto do_class; | |
6018 case ALPHA: | |
6019 case ALPHA + ADD_NL: | |
6020 testval = mask = RI_ALPHA; | |
6021 goto do_class; | |
6022 case NALPHA: | |
6023 case NALPHA + ADD_NL: | |
6024 mask = RI_ALPHA; | |
6025 goto do_class; | |
6026 case LOWER: | |
6027 case LOWER + ADD_NL: | |
6028 testval = mask = RI_LOWER; | |
6029 goto do_class; | |
6030 case NLOWER: | |
6031 case NLOWER + ADD_NL: | |
6032 mask = RI_LOWER; | |
6033 goto do_class; | |
6034 case UPPER: | |
6035 case UPPER + ADD_NL: | |
6036 testval = mask = RI_UPPER; | |
6037 goto do_class; | |
6038 case NUPPER: | |
6039 case NUPPER + ADD_NL: | |
6040 mask = RI_UPPER; | |
6041 goto do_class; | |
6042 | |
6043 case EXACTLY: | |
6044 { | |
6045 int cu, cl; | |
6046 | |
6047 /* This doesn't do a multi-byte character, because a MULTIBYTECODE | |
1347 | 6048 * would have been used for it. It does handle single-byte |
6049 * characters, such as latin1. */ | |
7 | 6050 if (ireg_ic) |
6051 { | |
1347 | 6052 cu = MB_TOUPPER(*opnd); |
6053 cl = MB_TOLOWER(*opnd); | |
7 | 6054 while (count < maxcount && (*scan == cu || *scan == cl)) |
6055 { | |
6056 count++; | |
6057 scan++; | |
6058 } | |
6059 } | |
6060 else | |
6061 { | |
6062 cu = *opnd; | |
6063 while (count < maxcount && *scan == cu) | |
6064 { | |
6065 count++; | |
6066 scan++; | |
6067 } | |
6068 } | |
6069 break; | |
6070 } | |
6071 | |
6072 #ifdef FEAT_MBYTE | |
6073 case MULTIBYTECODE: | |
6074 { | |
6075 int i, len, cf = 0; | |
6076 | |
6077 /* Safety check (just in case 'encoding' was changed since | |
6078 * compiling the program). */ | |
474 | 6079 if ((len = (*mb_ptr2len)(opnd)) > 1) |
7 | 6080 { |
6081 if (ireg_ic && enc_utf8) | |
6082 cf = utf_fold(utf_ptr2char(opnd)); | |
6083 while (count < maxcount) | |
6084 { | |
6085 for (i = 0; i < len; ++i) | |
6086 if (opnd[i] != scan[i]) | |
6087 break; | |
6088 if (i < len && (!ireg_ic || !enc_utf8 | |
6089 || utf_fold(utf_ptr2char(scan)) != cf)) | |
6090 break; | |
6091 scan += len; | |
6092 ++count; | |
6093 } | |
6094 } | |
6095 } | |
6096 break; | |
6097 #endif | |
6098 | |
6099 case ANYOF: | |
6100 case ANYOF + ADD_NL: | |
6101 testval = TRUE; | |
6102 /*FALLTHROUGH*/ | |
6103 | |
6104 case ANYBUT: | |
6105 case ANYBUT + ADD_NL: | |
6106 while (count < maxcount) | |
6107 { | |
6108 #ifdef FEAT_MBYTE | |
6109 int len; | |
6110 #endif | |
6111 if (*scan == NUL) | |
6112 { | |
1018 | 6113 if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline |
6114 || reg_line_lbr) | |
7 | 6115 break; |
6116 reg_nextline(); | |
6117 scan = reginput; | |
6118 if (got_int) | |
6119 break; | |
6120 } | |
6121 else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | |
6122 ++scan; | |
6123 #ifdef FEAT_MBYTE | |
474 | 6124 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) |
7 | 6125 { |
6126 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) | |
6127 break; | |
6128 scan += len; | |
6129 } | |
6130 #endif | |
6131 else | |
6132 { | |
6133 if ((cstrchr(opnd, *scan) == NULL) == testval) | |
6134 break; | |
6135 ++scan; | |
6136 } | |
6137 ++count; | |
6138 } | |
6139 break; | |
6140 | |
6141 case NEWL: | |
6142 while (count < maxcount | |
1018 | 6143 && ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr |
6144 && REG_MULTI) || (*scan == '\n' && reg_line_lbr))) | |
7 | 6145 { |
6146 count++; | |
6147 if (reg_line_lbr) | |
6148 ADVANCE_REGINPUT(); | |
6149 else | |
6150 reg_nextline(); | |
6151 scan = reginput; | |
6152 if (got_int) | |
6153 break; | |
6154 } | |
6155 break; | |
6156 | |
6157 default: /* Oh dear. Called inappropriately. */ | |
6158 EMSG(_(e_re_corr)); | |
6159 #ifdef DEBUG | |
6160 printf("Called regrepeat with op code %d\n", OP(p)); | |
6161 #endif | |
6162 break; | |
6163 } | |
6164 | |
6165 reginput = scan; | |
6166 | |
6167 return (int)count; | |
6168 } | |
6169 | |
6170 /* | |
6171 * regnext - dig the "next" pointer out of a node | |
2010 | 6172 * Returns NULL when calculating size, when there is no next item and when |
6173 * there is an error. | |
7 | 6174 */ |
6175 static char_u * | |
6176 regnext(p) | |
6177 char_u *p; | |
6178 { | |
6179 int offset; | |
6180 | |
2010 | 6181 if (p == JUST_CALC_SIZE || reg_toolong) |
7 | 6182 return NULL; |
6183 | |
6184 offset = NEXT(p); | |
6185 if (offset == 0) | |
6186 return NULL; | |
6187 | |
233 | 6188 if (OP(p) == BACK) |
7 | 6189 return p - offset; |
6190 else | |
6191 return p + offset; | |
6192 } | |
6193 | |
6194 /* | |
6195 * Check the regexp program for its magic number. | |
6196 * Return TRUE if it's wrong. | |
6197 */ | |
6198 static int | |
6199 prog_magic_wrong() | |
6200 { | |
4444 | 6201 regprog_T *prog; |
6202 | |
6203 prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog; | |
6204 if (prog->engine == &nfa_regengine) | |
6205 /* For NFA matcher we don't check the magic */ | |
6206 return FALSE; | |
6207 | |
6208 if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC) | |
7 | 6209 { |
6210 EMSG(_(e_re_corr)); | |
6211 return TRUE; | |
6212 } | |
6213 return FALSE; | |
6214 } | |
6215 | |
6216 /* | |
6217 * Cleanup the subexpressions, if this wasn't done yet. | |
6218 * This construction is used to clear the subexpressions only when they are | |
6219 * used (to increase speed). | |
6220 */ | |
6221 static void | |
6222 cleanup_subexpr() | |
6223 { | |
6224 if (need_clear_subexpr) | |
6225 { | |
6226 if (REG_MULTI) | |
6227 { | |
6228 /* Use 0xff to set lnum to -1 */ | |
6229 vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP); | |
6230 vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP); | |
6231 } | |
6232 else | |
6233 { | |
6234 vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP); | |
6235 vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP); | |
6236 } | |
6237 need_clear_subexpr = FALSE; | |
6238 } | |
6239 } | |
6240 | |
6241 #ifdef FEAT_SYN_HL | |
6242 static void | |
6243 cleanup_zsubexpr() | |
6244 { | |
6245 if (need_clear_zsubexpr) | |
6246 { | |
6247 if (REG_MULTI) | |
6248 { | |
6249 /* Use 0xff to set lnum to -1 */ | |
6250 vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP); | |
6251 vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP); | |
6252 } | |
6253 else | |
6254 { | |
6255 vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP); | |
6256 vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP); | |
6257 } | |
6258 need_clear_zsubexpr = FALSE; | |
6259 } | |
6260 } | |
6261 #endif | |
6262 | |
6263 /* | |
1579 | 6264 * Save the current subexpr to "bp", so that they can be restored |
6265 * later by restore_subexpr(). | |
6266 */ | |
6267 static void | |
6268 save_subexpr(bp) | |
6269 regbehind_T *bp; | |
6270 { | |
6271 int i; | |
6272 | |
1602 | 6273 /* When "need_clear_subexpr" is set we don't need to save the values, only |
6274 * remember that this flag needs to be set again when restoring. */ | |
6275 bp->save_need_clear_subexpr = need_clear_subexpr; | |
6276 if (!need_clear_subexpr) | |
1579 | 6277 { |
1602 | 6278 for (i = 0; i < NSUBEXP; ++i) |
1579 | 6279 { |
1602 | 6280 if (REG_MULTI) |
6281 { | |
6282 bp->save_start[i].se_u.pos = reg_startpos[i]; | |
6283 bp->save_end[i].se_u.pos = reg_endpos[i]; | |
6284 } | |
6285 else | |
6286 { | |
6287 bp->save_start[i].se_u.ptr = reg_startp[i]; | |
6288 bp->save_end[i].se_u.ptr = reg_endp[i]; | |
6289 } | |
1579 | 6290 } |
6291 } | |
6292 } | |
6293 | |
6294 /* | |
6295 * Restore the subexpr from "bp". | |
6296 */ | |
6297 static void | |
6298 restore_subexpr(bp) | |
6299 regbehind_T *bp; | |
6300 { | |
6301 int i; | |
6302 | |
1602 | 6303 /* Only need to restore saved values when they are not to be cleared. */ |
6304 need_clear_subexpr = bp->save_need_clear_subexpr; | |
6305 if (!need_clear_subexpr) | |
1579 | 6306 { |
1602 | 6307 for (i = 0; i < NSUBEXP; ++i) |
1579 | 6308 { |
1602 | 6309 if (REG_MULTI) |
6310 { | |
6311 reg_startpos[i] = bp->save_start[i].se_u.pos; | |
6312 reg_endpos[i] = bp->save_end[i].se_u.pos; | |
6313 } | |
6314 else | |
6315 { | |
6316 reg_startp[i] = bp->save_start[i].se_u.ptr; | |
6317 reg_endp[i] = bp->save_end[i].se_u.ptr; | |
6318 } | |
1579 | 6319 } |
6320 } | |
6321 } | |
6322 | |
6323 /* | |
7 | 6324 * Advance reglnum, regline and reginput to the next line. |
6325 */ | |
6326 static void | |
6327 reg_nextline() | |
6328 { | |
6329 regline = reg_getline(++reglnum); | |
6330 reginput = regline; | |
6331 fast_breakcheck(); | |
6332 } | |
6333 | |
6334 /* | |
6335 * Save the input line and position in a regsave_T. | |
6336 */ | |
6337 static void | |
233 | 6338 reg_save(save, gap) |
7 | 6339 regsave_T *save; |
233 | 6340 garray_T *gap; |
7 | 6341 { |
6342 if (REG_MULTI) | |
6343 { | |
6344 save->rs_u.pos.col = (colnr_T)(reginput - regline); | |
6345 save->rs_u.pos.lnum = reglnum; | |
6346 } | |
6347 else | |
6348 save->rs_u.ptr = reginput; | |
233 | 6349 save->rs_len = gap->ga_len; |
7 | 6350 } |
6351 | |
6352 /* | |
6353 * Restore the input line and position from a regsave_T. | |
6354 */ | |
6355 static void | |
233 | 6356 reg_restore(save, gap) |
7 | 6357 regsave_T *save; |
233 | 6358 garray_T *gap; |
7 | 6359 { |
6360 if (REG_MULTI) | |
6361 { | |
6362 if (reglnum != save->rs_u.pos.lnum) | |
6363 { | |
6364 /* only call reg_getline() when the line number changed to save | |
6365 * a bit of time */ | |
6366 reglnum = save->rs_u.pos.lnum; | |
6367 regline = reg_getline(reglnum); | |
6368 } | |
6369 reginput = regline + save->rs_u.pos.col; | |
6370 } | |
6371 else | |
6372 reginput = save->rs_u.ptr; | |
233 | 6373 gap->ga_len = save->rs_len; |
7 | 6374 } |
6375 | |
6376 /* | |
6377 * Return TRUE if current position is equal to saved position. | |
6378 */ | |
6379 static int | |
6380 reg_save_equal(save) | |
6381 regsave_T *save; | |
6382 { | |
6383 if (REG_MULTI) | |
6384 return reglnum == save->rs_u.pos.lnum | |
6385 && reginput == regline + save->rs_u.pos.col; | |
6386 return reginput == save->rs_u.ptr; | |
6387 } | |
6388 | |
6389 /* | |
6390 * Tentatively set the sub-expression start to the current position (after | |
6391 * calling regmatch() they will have changed). Need to save the existing | |
6392 * values for when there is no match. | |
6393 * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()), | |
6394 * depending on REG_MULTI. | |
6395 */ | |
6396 static void | |
6397 save_se_multi(savep, posp) | |
6398 save_se_T *savep; | |
6399 lpos_T *posp; | |
6400 { | |
6401 savep->se_u.pos = *posp; | |
6402 posp->lnum = reglnum; | |
6403 posp->col = (colnr_T)(reginput - regline); | |
6404 } | |
6405 | |
6406 static void | |
6407 save_se_one(savep, pp) | |
6408 save_se_T *savep; | |
6409 char_u **pp; | |
6410 { | |
6411 savep->se_u.ptr = *pp; | |
6412 *pp = reginput; | |
6413 } | |
6414 | |
6415 /* | |
6416 * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL. | |
6417 */ | |
6418 static int | |
6419 re_num_cmp(val, scan) | |
6420 long_u val; | |
6421 char_u *scan; | |
6422 { | |
6423 long_u n = OPERAND_MIN(scan); | |
6424 | |
6425 if (OPERAND_CMP(scan) == '>') | |
6426 return val > n; | |
6427 if (OPERAND_CMP(scan) == '<') | |
6428 return val < n; | |
6429 return val == n; | |
6430 } | |
6431 | |
4891
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6432 /* |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6433 * Check whether a backreference matches. |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6434 * Returns RA_FAIL, RA_NOMATCH or RA_MATCH. |
5504 | 6435 * If "bytelen" is not NULL, it is set to the byte length of the match in the |
6436 * last line. | |
4891
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6437 */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6438 static int |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6439 match_with_backref(start_lnum, start_col, end_lnum, end_col, bytelen) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6440 linenr_T start_lnum; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6441 colnr_T start_col; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6442 linenr_T end_lnum; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6443 colnr_T end_col; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6444 int *bytelen; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6445 { |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6446 linenr_T clnum = start_lnum; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6447 colnr_T ccol = start_col; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6448 int len; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6449 char_u *p; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6450 |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6451 if (bytelen != NULL) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6452 *bytelen = 0; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6453 for (;;) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6454 { |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6455 /* Since getting one line may invalidate the other, need to make copy. |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6456 * Slow! */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6457 if (regline != reg_tofree) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6458 { |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6459 len = (int)STRLEN(regline); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6460 if (reg_tofree == NULL || len >= (int)reg_tofreelen) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6461 { |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6462 len += 50; /* get some extra */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6463 vim_free(reg_tofree); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6464 reg_tofree = alloc(len); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6465 if (reg_tofree == NULL) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6466 return RA_FAIL; /* out of memory!*/ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6467 reg_tofreelen = len; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6468 } |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6469 STRCPY(reg_tofree, regline); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6470 reginput = reg_tofree + (reginput - regline); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6471 regline = reg_tofree; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6472 } |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6473 |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6474 /* Get the line to compare with. */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6475 p = reg_getline(clnum); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6476 if (clnum == end_lnum) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6477 len = end_col - ccol; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6478 else |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6479 len = (int)STRLEN(p + ccol); |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6480 |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6481 if (cstrncmp(p + ccol, reginput, &len) != 0) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6482 return RA_NOMATCH; /* doesn't match */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6483 if (bytelen != NULL) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6484 *bytelen += len; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6485 if (clnum == end_lnum) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6486 break; /* match and at end! */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6487 if (reglnum >= reg_maxline) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6488 return RA_NOMATCH; /* text too short */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6489 |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6490 /* Advance to next line. */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6491 reg_nextline(); |
5504 | 6492 if (bytelen != NULL) |
6493 *bytelen = 0; | |
4891
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6494 ++clnum; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6495 ccol = 0; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6496 if (got_int) |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6497 return RA_FAIL; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6498 } |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6499 |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6500 /* found a match! Note that regline may now point to a copy of the line, |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6501 * that should not matter. */ |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6502 return RA_MATCH; |
4c42efb4c098
updated for version 7.3.1191
Bram Moolenaar <bram@vim.org>
parents:
4805
diff
changeset
|
6503 } |
7 | 6504 |
4444 | 6505 #ifdef BT_REGEXP_DUMP |
7 | 6506 |
6507 /* | |
6508 * regdump - dump a regexp onto stdout in vaguely comprehensible form | |
6509 */ | |
6510 static void | |
6511 regdump(pattern, r) | |
6512 char_u *pattern; | |
4444 | 6513 bt_regprog_T *r; |
7 | 6514 { |
6515 char_u *s; | |
6516 int op = EXACTLY; /* Arbitrary non-END op. */ | |
6517 char_u *next; | |
6518 char_u *end = NULL; | |
4444 | 6519 FILE *f; |
6520 | |
6521 #ifdef BT_REGEXP_LOG | |
6522 f = fopen("bt_regexp_log.log", "a"); | |
6523 #else | |
6524 f = stdout; | |
6525 #endif | |
6526 if (f == NULL) | |
6527 return; | |
6528 fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern); | |
7 | 6529 |
6530 s = r->program + 1; | |
6531 /* | |
6532 * Loop until we find the END that isn't before a referred next (an END | |
6533 * can also appear in a NOMATCH operand). | |
6534 */ | |
6535 while (op != END || s <= end) | |
6536 { | |
6537 op = OP(s); | |
4444 | 6538 fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */ |
7 | 6539 next = regnext(s); |
6540 if (next == NULL) /* Next ptr. */ | |
4444 | 6541 fprintf(f, "(0)"); |
7 | 6542 else |
4444 | 6543 fprintf(f, "(%d)", (int)((s - r->program) + (next - s))); |
7 | 6544 if (end < next) |
6545 end = next; | |
6546 if (op == BRACE_LIMITS) | |
6547 { | |
4746
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6548 /* Two ints */ |
4444 | 6549 fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s)); |
7 | 6550 s += 8; |
6551 } | |
4746
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6552 else if (op == BEHIND || op == NOBEHIND) |
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6553 { |
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6554 /* one int */ |
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6555 fprintf(f, " count %ld", OPERAND_MIN(s)); |
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6556 s += 4; |
d1376091d18b
updated for version 7.3.1120
Bram Moolenaar <bram@vim.org>
parents:
4744
diff
changeset
|
6557 } |
4770
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6558 else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL) |
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6559 { |
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6560 /* one int plus comperator */ |
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6561 fprintf(f, " count %ld", OPERAND_MIN(s)); |
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6562 s += 5; |
b20dbf3a5370
updated for version 7.3.1132
Bram Moolenaar <bram@vim.org>
parents:
4762
diff
changeset
|
6563 } |
7 | 6564 s += 3; |
6565 if (op == ANYOF || op == ANYOF + ADD_NL | |
6566 || op == ANYBUT || op == ANYBUT + ADD_NL | |
6567 || op == EXACTLY) | |
6568 { | |
6569 /* Literal string, where present. */ | |
4444 | 6570 fprintf(f, "\nxxxxxxxxx\n"); |
7 | 6571 while (*s != NUL) |
4444 | 6572 fprintf(f, "%c", *s++); |
6573 fprintf(f, "\nxxxxxxxxx\n"); | |
7 | 6574 s++; |
6575 } | |
4444 | 6576 fprintf(f, "\r\n"); |
7 | 6577 } |
6578 | |
6579 /* Header fields of interest. */ | |
6580 if (r->regstart != NUL) | |
4444 | 6581 fprintf(f, "start `%s' 0x%x; ", r->regstart < 256 |
7 | 6582 ? (char *)transchar(r->regstart) |
6583 : "multibyte", r->regstart); | |
6584 if (r->reganch) | |
4444 | 6585 fprintf(f, "anchored; "); |
7 | 6586 if (r->regmust != NULL) |
4444 | 6587 fprintf(f, "must have \"%s\"", r->regmust); |
6588 fprintf(f, "\r\n"); | |
6589 | |
6590 #ifdef BT_REGEXP_LOG | |
6591 fclose(f); | |
6592 #endif | |
7 | 6593 } |
4444 | 6594 #endif /* BT_REGEXP_DUMP */ |
6595 | |
6596 #ifdef DEBUG | |
7 | 6597 /* |
6598 * regprop - printable representation of opcode | |
6599 */ | |
6600 static char_u * | |
6601 regprop(op) | |
6602 char_u *op; | |
6603 { | |
4444 | 6604 char *p; |
6605 static char buf[50]; | |
6606 | |
6607 STRCPY(buf, ":"); | |
6608 | |
6609 switch ((int) OP(op)) | |
7 | 6610 { |
6611 case BOL: | |
6612 p = "BOL"; | |
6613 break; | |
6614 case EOL: | |
6615 p = "EOL"; | |
6616 break; | |
6617 case RE_BOF: | |
6618 p = "BOF"; | |
6619 break; | |
6620 case RE_EOF: | |
6621 p = "EOF"; | |
6622 break; | |
6623 case CURSOR: | |
6624 p = "CURSOR"; | |
6625 break; | |
639 | 6626 case RE_VISUAL: |
6627 p = "RE_VISUAL"; | |
6628 break; | |
7 | 6629 case RE_LNUM: |
6630 p = "RE_LNUM"; | |
6631 break; | |
639 | 6632 case RE_MARK: |
6633 p = "RE_MARK"; | |
6634 break; | |
7 | 6635 case RE_COL: |
6636 p = "RE_COL"; | |
6637 break; | |
6638 case RE_VCOL: | |
6639 p = "RE_VCOL"; | |
6640 break; | |
6641 case BOW: | |
6642 p = "BOW"; | |
6643 break; | |
6644 case EOW: | |
6645 p = "EOW"; | |
6646 break; | |
6647 case ANY: | |
6648 p = "ANY"; | |
6649 break; | |
6650 case ANY + ADD_NL: | |
6651 p = "ANY+NL"; | |
6652 break; | |
6653 case ANYOF: | |
6654 p = "ANYOF"; | |
6655 break; | |
6656 case ANYOF + ADD_NL: | |
6657 p = "ANYOF+NL"; | |
6658 break; | |
6659 case ANYBUT: | |
6660 p = "ANYBUT"; | |
6661 break; | |
6662 case ANYBUT + ADD_NL: | |
6663 p = "ANYBUT+NL"; | |
6664 break; | |
6665 case IDENT: | |
6666 p = "IDENT"; | |
6667 break; | |
6668 case IDENT + ADD_NL: | |
6669 p = "IDENT+NL"; | |
6670 break; | |
6671 case SIDENT: | |
6672 p = "SIDENT"; | |
6673 break; | |
6674 case SIDENT + ADD_NL: | |
6675 p = "SIDENT+NL"; | |
6676 break; | |
6677 case KWORD: | |
6678 p = "KWORD"; | |
6679 break; | |
6680 case KWORD + ADD_NL: | |
6681 p = "KWORD+NL"; | |
6682 break; | |
6683 case SKWORD: | |
6684 p = "SKWORD"; | |
6685 break; | |
6686 case SKWORD + ADD_NL: | |
6687 p = "SKWORD+NL"; | |
6688 break; | |
6689 case FNAME: | |
6690 p = "FNAME"; | |
6691 break; | |
6692 case FNAME + ADD_NL: | |
6693 p = "FNAME+NL"; | |
6694 break; | |
6695 case SFNAME: | |
6696 p = "SFNAME"; | |
6697 break; | |
6698 case SFNAME + ADD_NL: | |
6699 p = "SFNAME+NL"; | |
6700 break; | |
6701 case PRINT: | |
6702 p = "PRINT"; | |
6703 break; | |
6704 case PRINT + ADD_NL: | |
6705 p = "PRINT+NL"; | |
6706 break; | |
6707 case SPRINT: | |
6708 p = "SPRINT"; | |
6709 break; | |
6710 case SPRINT + ADD_NL: | |
6711 p = "SPRINT+NL"; | |
6712 break; | |
6713 case WHITE: | |
6714 p = "WHITE"; | |
6715 break; | |
6716 case WHITE + ADD_NL: | |
6717 p = "WHITE+NL"; | |
6718 break; | |
6719 case NWHITE: | |
6720 p = "NWHITE"; | |
6721 break; | |
6722 case NWHITE + ADD_NL: | |
6723 p = "NWHITE+NL"; | |
6724 break; | |
6725 case DIGIT: | |
6726 p = "DIGIT"; | |
6727 break; | |
6728 case DIGIT + ADD_NL: | |
6729 p = "DIGIT+NL"; | |
6730 break; | |
6731 case NDIGIT: | |
6732 p = "NDIGIT"; | |
6733 break; | |
6734 case NDIGIT + ADD_NL: | |
6735 p = "NDIGIT+NL"; | |
6736 break; | |
6737 case HEX: | |
6738 p = "HEX"; | |
6739 break; | |
6740 case HEX + ADD_NL: | |
6741 p = "HEX+NL"; | |
6742 break; | |
6743 case NHEX: | |
6744 p = "NHEX"; | |
6745 break; | |
6746 case NHEX + ADD_NL: | |
6747 p = "NHEX+NL"; | |
6748 break; | |
6749 case OCTAL: | |
6750 p = "OCTAL"; | |
6751 break; | |
6752 case OCTAL + ADD_NL: | |
6753 p = "OCTAL+NL"; | |
6754 break; | |
6755 case NOCTAL: | |
6756 p = "NOCTAL"; | |
6757 break; | |
6758 case NOCTAL + ADD_NL: | |
6759 p = "NOCTAL+NL"; | |
6760 break; | |
6761 case WORD: | |
6762 p = "WORD"; | |
6763 break; | |
6764 case WORD + ADD_NL: | |
6765 p = "WORD+NL"; | |
6766 break; | |
6767 case NWORD: | |
6768 p = "NWORD"; | |
6769 break; | |
6770 case NWORD + ADD_NL: | |
6771 p = "NWORD+NL"; | |
6772 break; | |
6773 case HEAD: | |
6774 p = "HEAD"; | |
6775 break; | |
6776 case HEAD + ADD_NL: | |
6777 p = "HEAD+NL"; | |
6778 break; | |
6779 case NHEAD: | |
6780 p = "NHEAD"; | |
6781 break; | |
6782 case NHEAD + ADD_NL: | |
6783 p = "NHEAD+NL"; | |
6784 break; | |
6785 case ALPHA: | |
6786 p = "ALPHA"; | |
6787 break; | |
6788 case ALPHA + ADD_NL: | |
6789 p = "ALPHA+NL"; | |
6790 break; | |
6791 case NALPHA: | |
6792 p = "NALPHA"; | |
6793 break; | |
6794 case NALPHA + ADD_NL: | |
6795 p = "NALPHA+NL"; | |
6796 break; | |
6797 case LOWER: | |
6798 p = "LOWER"; | |
6799 break; | |
6800 case LOWER + ADD_NL: | |
6801 p = "LOWER+NL"; | |
6802 break; | |
6803 case NLOWER: | |
6804 p = "NLOWER"; | |
6805 break; | |
6806 case NLOWER + ADD_NL: | |
6807 p = "NLOWER+NL"; | |
6808 break; | |
6809 case UPPER: | |
6810 p = "UPPER"; | |
6811 break; | |
6812 case UPPER + ADD_NL: | |
6813 p = "UPPER+NL"; | |
6814 break; | |
6815 case NUPPER: | |
6816 p = "NUPPER"; | |
6817 break; | |
6818 case NUPPER + ADD_NL: | |
6819 p = "NUPPER+NL"; | |
6820 break; | |
6821 case BRANCH: | |
6822 p = "BRANCH"; | |
6823 break; | |
6824 case EXACTLY: | |
6825 p = "EXACTLY"; | |
6826 break; | |
6827 case NOTHING: | |
6828 p = "NOTHING"; | |
6829 break; | |
6830 case BACK: | |
6831 p = "BACK"; | |
6832 break; | |
6833 case END: | |
6834 p = "END"; | |
6835 break; | |
6836 case MOPEN + 0: | |
6837 p = "MATCH START"; | |
6838 break; | |
6839 case MOPEN + 1: | |
6840 case MOPEN + 2: | |
6841 case MOPEN + 3: | |
6842 case MOPEN + 4: | |
6843 case MOPEN + 5: | |
6844 case MOPEN + 6: | |
6845 case MOPEN + 7: | |
6846 case MOPEN + 8: | |
6847 case MOPEN + 9: | |
6848 sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN); | |
6849 p = NULL; | |
6850 break; | |
6851 case MCLOSE + 0: | |
6852 p = "MATCH END"; | |
6853 break; | |
6854 case MCLOSE + 1: | |
6855 case MCLOSE + 2: | |
6856 case MCLOSE + 3: | |
6857 case MCLOSE + 4: | |
6858 case MCLOSE + 5: | |
6859 case MCLOSE + 6: | |
6860 case MCLOSE + 7: | |
6861 case MCLOSE + 8: | |
6862 case MCLOSE + 9: | |
6863 sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE); | |
6864 p = NULL; | |
6865 break; | |
6866 case BACKREF + 1: | |
6867 case BACKREF + 2: | |
6868 case BACKREF + 3: | |
6869 case BACKREF + 4: | |
6870 case BACKREF + 5: | |
6871 case BACKREF + 6: | |
6872 case BACKREF + 7: | |
6873 case BACKREF + 8: | |
6874 case BACKREF + 9: | |
6875 sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF); | |
6876 p = NULL; | |
6877 break; | |
6878 case NOPEN: | |
6879 p = "NOPEN"; | |
6880 break; | |
6881 case NCLOSE: | |
6882 p = "NCLOSE"; | |
6883 break; | |
6884 #ifdef FEAT_SYN_HL | |
6885 case ZOPEN + 1: | |
6886 case ZOPEN + 2: | |
6887 case ZOPEN + 3: | |
6888 case ZOPEN + 4: | |
6889 case ZOPEN + 5: | |
6890 case ZOPEN + 6: | |
6891 case ZOPEN + 7: | |
6892 case ZOPEN + 8: | |
6893 case ZOPEN + 9: | |
6894 sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN); | |
6895 p = NULL; | |
6896 break; | |
6897 case ZCLOSE + 1: | |
6898 case ZCLOSE + 2: | |
6899 case ZCLOSE + 3: | |
6900 case ZCLOSE + 4: | |
6901 case ZCLOSE + 5: | |
6902 case ZCLOSE + 6: | |
6903 case ZCLOSE + 7: | |
6904 case ZCLOSE + 8: | |
6905 case ZCLOSE + 9: | |
6906 sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE); | |
6907 p = NULL; | |
6908 break; | |
6909 case ZREF + 1: | |
6910 case ZREF + 2: | |
6911 case ZREF + 3: | |
6912 case ZREF + 4: | |
6913 case ZREF + 5: | |
6914 case ZREF + 6: | |
6915 case ZREF + 7: | |
6916 case ZREF + 8: | |
6917 case ZREF + 9: | |
6918 sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF); | |
6919 p = NULL; | |
6920 break; | |
6921 #endif | |
6922 case STAR: | |
6923 p = "STAR"; | |
6924 break; | |
6925 case PLUS: | |
6926 p = "PLUS"; | |
6927 break; | |
6928 case NOMATCH: | |
6929 p = "NOMATCH"; | |
6930 break; | |
6931 case MATCH: | |
6932 p = "MATCH"; | |
6933 break; | |
6934 case BEHIND: | |
6935 p = "BEHIND"; | |
6936 break; | |
6937 case NOBEHIND: | |
6938 p = "NOBEHIND"; | |
6939 break; | |
6940 case SUBPAT: | |
6941 p = "SUBPAT"; | |
6942 break; | |
6943 case BRACE_LIMITS: | |
6944 p = "BRACE_LIMITS"; | |
6945 break; | |
6946 case BRACE_SIMPLE: | |
6947 p = "BRACE_SIMPLE"; | |
6948 break; | |
6949 case BRACE_COMPLEX + 0: | |
6950 case BRACE_COMPLEX + 1: | |
6951 case BRACE_COMPLEX + 2: | |
6952 case BRACE_COMPLEX + 3: | |
6953 case BRACE_COMPLEX + 4: | |
6954 case BRACE_COMPLEX + 5: | |
6955 case BRACE_COMPLEX + 6: | |
6956 case BRACE_COMPLEX + 7: | |
6957 case BRACE_COMPLEX + 8: | |
6958 case BRACE_COMPLEX + 9: | |
6959 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); | |
6960 p = NULL; | |
6961 break; | |
6962 #ifdef FEAT_MBYTE | |
6963 case MULTIBYTECODE: | |
6964 p = "MULTIBYTECODE"; | |
6965 break; | |
6966 #endif | |
6967 case NEWL: | |
6968 p = "NEWL"; | |
6969 break; | |
6970 default: | |
6971 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); | |
6972 p = NULL; | |
6973 break; | |
6974 } | |
6975 if (p != NULL) | |
4444 | 6976 STRCAT(buf, p); |
6977 return (char_u *)buf; | |
7 | 6978 } |
4444 | 6979 #endif /* DEBUG */ |
7 | 6980 |
6981 #ifdef FEAT_MBYTE | |
6982 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3)); | |
6983 | |
6984 typedef struct | |
6985 { | |
6986 int a, b, c; | |
6987 } decomp_T; | |
6988 | |
6989 | |
6990 /* 0xfb20 - 0xfb4f */ | |
297 | 6991 static decomp_T decomp_table[0xfb4f-0xfb20+1] = |
7 | 6992 { |
6993 {0x5e2,0,0}, /* 0xfb20 alt ayin */ | |
6994 {0x5d0,0,0}, /* 0xfb21 alt alef */ | |
6995 {0x5d3,0,0}, /* 0xfb22 alt dalet */ | |
6996 {0x5d4,0,0}, /* 0xfb23 alt he */ | |
6997 {0x5db,0,0}, /* 0xfb24 alt kaf */ | |
6998 {0x5dc,0,0}, /* 0xfb25 alt lamed */ | |
6999 {0x5dd,0,0}, /* 0xfb26 alt mem-sofit */ | |
7000 {0x5e8,0,0}, /* 0xfb27 alt resh */ | |
7001 {0x5ea,0,0}, /* 0xfb28 alt tav */ | |
7002 {'+', 0, 0}, /* 0xfb29 alt plus */ | |
7003 {0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */ | |
7004 {0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */ | |
7005 {0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */ | |
7006 {0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */ | |
7007 {0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */ | |
7008 {0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */ | |
7009 {0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */ | |
7010 {0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */ | |
7011 {0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */ | |
7012 {0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */ | |
7013 {0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */ | |
7014 {0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */ | |
7015 {0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */ | |
7016 {0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */ | |
7017 {0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */ | |
7018 {0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */ | |
7019 {0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */ | |
7020 {0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */ | |
7021 {0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */ | |
7022 {0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */ | |
7023 {0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */ | |
7024 {0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */ | |
7025 {0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */ | |
7026 {0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */ | |
7027 {0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */ | |
7028 {0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */ | |
7029 {0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */ | |
7030 {0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */ | |
7031 {0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */ | |
7032 {0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */ | |
7033 {0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */ | |
7034 {0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */ | |
7035 {0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */ | |
7036 {0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */ | |
7037 {0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */ | |
7038 {0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */ | |
7039 {0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */ | |
7040 {0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */ | |
7041 }; | |
7042 | |
7043 static void | |
7044 mb_decompose(c, c1, c2, c3) | |
7045 int c, *c1, *c2, *c3; | |
7046 { | |
7047 decomp_T d; | |
7048 | |
4505
d037b9cbdaaa
updated for version 7.3.1000
Bram Moolenaar <bram@vim.org>
parents:
4466
diff
changeset
|
7049 if (c >= 0xfb20 && c <= 0xfb4f) |
7 | 7050 { |
7051 d = decomp_table[c - 0xfb20]; | |
7052 *c1 = d.a; | |
7053 *c2 = d.b; | |
7054 *c3 = d.c; | |
7055 } | |
7056 else | |
7057 { | |
7058 *c1 = c; | |
7059 *c2 = *c3 = 0; | |
7060 } | |
7061 } | |
7062 #endif | |
7063 | |
7064 /* | |
7065 * Compare two strings, ignore case if ireg_ic set. | |
7066 * Return 0 if strings match, non-zero otherwise. | |
7067 * Correct the length "*n" when composing characters are ignored. | |
7068 */ | |
7069 static int | |
7070 cstrncmp(s1, s2, n) | |
7071 char_u *s1, *s2; | |
7072 int *n; | |
7073 { | |
7074 int result; | |
7075 | |
7076 if (!ireg_ic) | |
7077 result = STRNCMP(s1, s2, *n); | |
7078 else | |
7079 result = MB_STRNICMP(s1, s2, *n); | |
7080 | |
7081 #ifdef FEAT_MBYTE | |
7082 /* if it failed and it's utf8 and we want to combineignore: */ | |
7083 if (result != 0 && enc_utf8 && ireg_icombine) | |
7084 { | |
7085 char_u *str1, *str2; | |
7086 int c1, c2, c11, c12; | |
7087 int junk; | |
7088 | |
7089 /* we have to handle the strcmp ourselves, since it is necessary to | |
7090 * deal with the composing characters by ignoring them: */ | |
7091 str1 = s1; | |
7092 str2 = s2; | |
7093 c1 = c2 = 0; | |
507 | 7094 while ((int)(str1 - s1) < *n) |
7 | 7095 { |
7096 c1 = mb_ptr2char_adv(&str1); | |
7097 c2 = mb_ptr2char_adv(&str2); | |
7098 | |
7099 /* decompose the character if necessary, into 'base' characters | |
7100 * because I don't care about Arabic, I will hard-code the Hebrew | |
7101 * which I *do* care about! So sue me... */ | |
7102 if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2))) | |
7103 { | |
7104 /* decomposition necessary? */ | |
7105 mb_decompose(c1, &c11, &junk, &junk); | |
7106 mb_decompose(c2, &c12, &junk, &junk); | |
7107 c1 = c11; | |
7108 c2 = c12; | |
7109 if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12))) | |
7110 break; | |
7111 } | |
7112 } | |
7113 result = c2 - c1; | |
7114 if (result == 0) | |
7115 *n = (int)(str2 - s2); | |
7116 } | |
7117 #endif | |
7118 | |
7119 return result; | |
7120 } | |
7121 | |
7122 /* | |
7123 * cstrchr: This function is used a lot for simple searches, keep it fast! | |
7124 */ | |
7125 static char_u * | |
7126 cstrchr(s, c) | |
7127 char_u *s; | |
7128 int c; | |
7129 { | |
7130 char_u *p; | |
7131 int cc; | |
7132 | |
7133 if (!ireg_ic | |
7134 #ifdef FEAT_MBYTE | |
7135 || (!enc_utf8 && mb_char2len(c) > 1) | |
7136 #endif | |
7137 ) | |
7138 return vim_strchr(s, c); | |
7139 | |
7140 /* tolower() and toupper() can be slow, comparing twice should be a lot | |
7141 * faster (esp. when using MS Visual C++!). | |
7142 * For UTF-8 need to use folded case. */ | |
7143 #ifdef FEAT_MBYTE | |
7144 if (enc_utf8 && c > 0x80) | |
7145 cc = utf_fold(c); | |
7146 else | |
7147 #endif | |
1347 | 7148 if (MB_ISUPPER(c)) |
7149 cc = MB_TOLOWER(c); | |
7150 else if (MB_ISLOWER(c)) | |
7151 cc = MB_TOUPPER(c); | |
7 | 7152 else |
7153 return vim_strchr(s, c); | |
7154 | |
7155 #ifdef FEAT_MBYTE | |
7156 if (has_mbyte) | |
7157 { | |
474 | 7158 for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) |
7 | 7159 { |
7160 if (enc_utf8 && c > 0x80) | |
7161 { | |
7162 if (utf_fold(utf_ptr2char(p)) == cc) | |
7163 return p; | |
7164 } | |
7165 else if (*p == c || *p == cc) | |
7166 return p; | |
7167 } | |
7168 } | |
7169 else | |
7170 #endif | |
7171 /* Faster version for when there are no multi-byte characters. */ | |
7172 for (p = s; *p != NUL; ++p) | |
7173 if (*p == c || *p == cc) | |
7174 return p; | |
7175 | |
7176 return NULL; | |
7177 } | |
7178 | |
7179 /*************************************************************** | |
7180 * regsub stuff * | |
7181 ***************************************************************/ | |
7182 | |
7183 /* This stuff below really confuses cc on an SGI -- webb */ | |
7184 #ifdef __sgi | |
7185 # undef __ARGS | |
7186 # define __ARGS(x) () | |
7187 #endif | |
7188 | |
7189 /* | |
7190 * We should define ftpr as a pointer to a function returning a pointer to | |
7191 * a function returning a pointer to a function ... | |
7192 * This is impossible, so we declare a pointer to a function returning a | |
7193 * pointer to a function returning void. This should work for all compilers. | |
7194 */ | |
772 | 7195 typedef void (*(*fptr_T) __ARGS((int *, int)))(); |
7196 | |
7197 static fptr_T do_upper __ARGS((int *, int)); | |
7198 static fptr_T do_Upper __ARGS((int *, int)); | |
7199 static fptr_T do_lower __ARGS((int *, int)); | |
7200 static fptr_T do_Lower __ARGS((int *, int)); | |
7 | 7201 |
7202 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash)); | |
7203 | |
772 | 7204 static fptr_T |
7 | 7205 do_upper(d, c) |
772 | 7206 int *d; |
7207 int c; | |
7 | 7208 { |
772 | 7209 *d = MB_TOUPPER(c); |
7210 | |
7211 return (fptr_T)NULL; | |
7 | 7212 } |
7213 | |
772 | 7214 static fptr_T |
7215 do_Upper(d, c) | |
7216 int *d; | |
7 | 7217 int c; |
7218 { | |
772 | 7219 *d = MB_TOUPPER(c); |
7220 | |
7221 return (fptr_T)do_Upper; | |
7222 } | |
7223 | |
7224 static fptr_T | |
7225 do_lower(d, c) | |
7226 int *d; | |
7227 int c; | |
7228 { | |
7229 *d = MB_TOLOWER(c); | |
7230 | |
7231 return (fptr_T)NULL; | |
7232 } | |
7233 | |
7234 static fptr_T | |
7235 do_Lower(d, c) | |
7236 int *d; | |
7237 int c; | |
7238 { | |
7239 *d = MB_TOLOWER(c); | |
7240 | |
7241 return (fptr_T)do_Lower; | |
7 | 7242 } |
7243 | |
7244 /* | |
7245 * regtilde(): Replace tildes in the pattern by the old pattern. | |
7246 * | |
7247 * Short explanation of the tilde: It stands for the previous replacement | |
7248 * pattern. If that previous pattern also contains a ~ we should go back a | |
7249 * step further... But we insert the previous pattern into the current one | |
7250 * and remember that. | |
772 | 7251 * This still does not handle the case where "magic" changes. So require the |
7252 * user to keep his hands off of "magic". | |
7 | 7253 * |
7254 * The tildes are parsed once before the first call to vim_regsub(). | |
7255 */ | |
7256 char_u * | |
7257 regtilde(source, magic) | |
7258 char_u *source; | |
7259 int magic; | |
7260 { | |
7261 char_u *newsub = source; | |
7262 char_u *tmpsub; | |
7263 char_u *p; | |
7264 int len; | |
7265 int prevlen; | |
7266 | |
7267 for (p = newsub; *p; ++p) | |
7268 { | |
7269 if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic)) | |
7270 { | |
7271 if (reg_prev_sub != NULL) | |
7272 { | |
7273 /* length = len(newsub) - 1 + len(prev_sub) + 1 */ | |
7274 prevlen = (int)STRLEN(reg_prev_sub); | |
7275 tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen)); | |
7276 if (tmpsub != NULL) | |
7277 { | |
7278 /* copy prefix */ | |
7279 len = (int)(p - newsub); /* not including ~ */ | |
7280 mch_memmove(tmpsub, newsub, (size_t)len); | |
1209 | 7281 /* interpret tilde */ |
7 | 7282 mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen); |
7283 /* copy postfix */ | |
7284 if (!magic) | |
7285 ++p; /* back off \ */ | |
7286 STRCPY(tmpsub + len + prevlen, p + 1); | |
7287 | |
7288 if (newsub != source) /* already allocated newsub */ | |
7289 vim_free(newsub); | |
7290 newsub = tmpsub; | |
7291 p = newsub + len + prevlen; | |
7292 } | |
7293 } | |
7294 else if (magic) | |
1621 | 7295 STRMOVE(p, p + 1); /* remove '~' */ |
7 | 7296 else |
1621 | 7297 STRMOVE(p, p + 2); /* remove '\~' */ |
7 | 7298 --p; |
7299 } | |
7300 else | |
7301 { | |
7302 if (*p == '\\' && p[1]) /* skip escaped characters */ | |
7303 ++p; | |
7304 #ifdef FEAT_MBYTE | |
7305 if (has_mbyte) | |
474 | 7306 p += (*mb_ptr2len)(p) - 1; |
7 | 7307 #endif |
7308 } | |
7309 } | |
7310 | |
7311 vim_free(reg_prev_sub); | |
7312 if (newsub != source) /* newsub was allocated, just keep it */ | |
7313 reg_prev_sub = newsub; | |
7314 else /* no ~ found, need to save newsub */ | |
7315 reg_prev_sub = vim_strsave(newsub); | |
7316 return newsub; | |
7317 } | |
7318 | |
7319 #ifdef FEAT_EVAL | |
7320 static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */ | |
7321 | |
7322 /* These pointers are used instead of reg_match and reg_mmatch for | |
7323 * reg_submatch(). Needed for when the substitution string is an expression | |
7324 * that contains a call to substitute() and submatch(). */ | |
7325 static regmatch_T *submatch_match; | |
7326 static regmmatch_T *submatch_mmatch; | |
2011 | 7327 static linenr_T submatch_firstlnum; |
7328 static linenr_T submatch_maxline; | |
2904 | 7329 static int submatch_line_lbr; |
7 | 7330 #endif |
7331 | |
7332 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO) | |
7333 /* | |
7334 * vim_regsub() - perform substitutions after a vim_regexec() or | |
7335 * vim_regexec_multi() match. | |
7336 * | |
7337 * If "copy" is TRUE really copy into "dest". | |
7338 * If "copy" is FALSE nothing is copied, this is just to find out the length | |
7339 * of the result. | |
7340 * | |
7341 * If "backslash" is TRUE, a backslash will be removed later, need to double | |
7342 * them to keep them, and insert a backslash before a CR to avoid it being | |
7343 * replaced with a line break later. | |
7344 * | |
7345 * Note: The matched text must not change between the call of | |
7346 * vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back | |
7347 * references invalid! | |
7348 * | |
7349 * Returns the size of the replacement, including terminating NUL. | |
7350 */ | |
7351 int | |
7352 vim_regsub(rmp, source, dest, copy, magic, backslash) | |
7353 regmatch_T *rmp; | |
7354 char_u *source; | |
7355 char_u *dest; | |
7356 int copy; | |
7357 int magic; | |
7358 int backslash; | |
7359 { | |
7360 reg_match = rmp; | |
7361 reg_mmatch = NULL; | |
7362 reg_maxline = 0; | |
4061 | 7363 reg_buf = curbuf; |
5836 | 7364 reg_line_lbr = TRUE; |
7 | 7365 return vim_regsub_both(source, dest, copy, magic, backslash); |
7366 } | |
7367 #endif | |
7368 | |
7369 int | |
7370 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash) | |
7371 regmmatch_T *rmp; | |
7372 linenr_T lnum; | |
7373 char_u *source; | |
7374 char_u *dest; | |
7375 int copy; | |
7376 int magic; | |
7377 int backslash; | |
7378 { | |
7379 reg_match = NULL; | |
7380 reg_mmatch = rmp; | |
7381 reg_buf = curbuf; /* always works on the current buffer! */ | |
7382 reg_firstlnum = lnum; | |
7383 reg_maxline = curbuf->b_ml.ml_line_count - lnum; | |
5836 | 7384 reg_line_lbr = FALSE; |
7 | 7385 return vim_regsub_both(source, dest, copy, magic, backslash); |
7386 } | |
7387 | |
7388 static int | |
7389 vim_regsub_both(source, dest, copy, magic, backslash) | |
7390 char_u *source; | |
7391 char_u *dest; | |
7392 int copy; | |
7393 int magic; | |
7394 int backslash; | |
7395 { | |
7396 char_u *src; | |
7397 char_u *dst; | |
7398 char_u *s; | |
7399 int c; | |
772 | 7400 int cc; |
7 | 7401 int no = -1; |
4244 | 7402 fptr_T func_all = (fptr_T)NULL; |
7403 fptr_T func_one = (fptr_T)NULL; | |
7 | 7404 linenr_T clnum = 0; /* init for GCC */ |
7405 int len = 0; /* init for GCC */ | |
7406 #ifdef FEAT_EVAL | |
7407 static char_u *eval_result = NULL; | |
7408 #endif | |
7409 | |
7410 /* Be paranoid... */ | |
7411 if (source == NULL || dest == NULL) | |
7412 { | |
7413 EMSG(_(e_null)); | |
7414 return 0; | |
7415 } | |
7416 if (prog_magic_wrong()) | |
7417 return 0; | |
7418 src = source; | |
7419 dst = dest; | |
7420 | |
7421 /* | |
7422 * When the substitute part starts with "\=" evaluate it as an expression. | |
7423 */ | |
7424 if (source[0] == '\\' && source[1] == '=' | |
7425 #ifdef FEAT_EVAL | |
7426 && !can_f_submatch /* can't do this recursively */ | |
7427 #endif | |
7428 ) | |
7429 { | |
7430 #ifdef FEAT_EVAL | |
7431 /* To make sure that the length doesn't change between checking the | |
7432 * length and copying the string, and to speed up things, the | |
7433 * resulting string is saved from the call with "copy" == FALSE to the | |
7434 * call with "copy" == TRUE. */ | |
7435 if (copy) | |
7436 { | |
7437 if (eval_result != NULL) | |
7438 { | |
7439 STRCPY(dest, eval_result); | |
7440 dst += STRLEN(eval_result); | |
7441 vim_free(eval_result); | |
7442 eval_result = NULL; | |
7443 } | |
7444 } | |
7445 else | |
7446 { | |
7447 win_T *save_reg_win; | |
7448 int save_ireg_ic; | |
7449 | |
7450 vim_free(eval_result); | |
7451 | |
7452 /* The expression may contain substitute(), which calls us | |
7453 * recursively. Make sure submatch() gets the text from the first | |
7454 * level. Don't need to save "reg_buf", because | |
7455 * vim_regexec_multi() can't be called recursively. */ | |
7456 submatch_match = reg_match; | |
7457 submatch_mmatch = reg_mmatch; | |
2011 | 7458 submatch_firstlnum = reg_firstlnum; |
7459 submatch_maxline = reg_maxline; | |
2904 | 7460 submatch_line_lbr = reg_line_lbr; |
7 | 7461 save_reg_win = reg_win; |
7462 save_ireg_ic = ireg_ic; | |
7463 can_f_submatch = TRUE; | |
7464 | |
714 | 7465 eval_result = eval_to_string(source + 2, NULL, TRUE); |
7 | 7466 if (eval_result != NULL) |
7467 { | |
2125
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7468 int had_backslash = FALSE; |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7469 |
39 | 7470 for (s = eval_result; *s != NUL; mb_ptr_adv(s)) |
7 | 7471 { |
2904 | 7472 /* Change NL to CR, so that it becomes a line break, |
7473 * unless called from vim_regexec_nl(). | |
7 | 7474 * Skip over a backslashed character. */ |
2904 | 7475 if (*s == NL && !submatch_line_lbr) |
7 | 7476 *s = CAR; |
7477 else if (*s == '\\' && s[1] != NUL) | |
2125
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7478 { |
7 | 7479 ++s; |
2173 | 7480 /* Change NL to CR here too, so that this works: |
7481 * :s/abc\\\ndef/\="aaa\\\nbbb"/ on text: | |
7482 * abc\ | |
7483 * def | |
2904 | 7484 * Not when called from vim_regexec_nl(). |
2173 | 7485 */ |
2904 | 7486 if (*s == NL && !submatch_line_lbr) |
2173 | 7487 *s = CAR; |
2125
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7488 had_backslash = TRUE; |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7489 } |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7490 } |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7491 if (had_backslash && backslash) |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7492 { |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7493 /* Backslashes will be consumed, need to double them. */ |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7494 s = vim_strsave_escaped(eval_result, (char_u *)"\\"); |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7495 if (s != NULL) |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7496 { |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7497 vim_free(eval_result); |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7498 eval_result = s; |
b8744d1982d1
updated for version 7.2.407
Bram Moolenaar <bram@zimbu.org>
parents:
2012
diff
changeset
|
7499 } |
7 | 7500 } |
7501 | |
7502 dst += STRLEN(eval_result); | |
7503 } | |
7504 | |
7505 reg_match = submatch_match; | |
7506 reg_mmatch = submatch_mmatch; | |
2011 | 7507 reg_firstlnum = submatch_firstlnum; |
7508 reg_maxline = submatch_maxline; | |
2904 | 7509 reg_line_lbr = submatch_line_lbr; |
7 | 7510 reg_win = save_reg_win; |
7511 ireg_ic = save_ireg_ic; | |
7512 can_f_submatch = FALSE; | |
7513 } | |
7514 #endif | |
7515 } | |
7516 else | |
7517 while ((c = *src++) != NUL) | |
7518 { | |
7519 if (c == '&' && magic) | |
7520 no = 0; | |
7521 else if (c == '\\' && *src != NUL) | |
7522 { | |
7523 if (*src == '&' && !magic) | |
7524 { | |
7525 ++src; | |
7526 no = 0; | |
7527 } | |
7528 else if ('0' <= *src && *src <= '9') | |
7529 { | |
7530 no = *src++ - '0'; | |
7531 } | |
7532 else if (vim_strchr((char_u *)"uUlLeE", *src)) | |
7533 { | |
7534 switch (*src++) | |
7535 { | |
4244 | 7536 case 'u': func_one = (fptr_T)do_upper; |
7 | 7537 continue; |
4244 | 7538 case 'U': func_all = (fptr_T)do_Upper; |
7 | 7539 continue; |
4244 | 7540 case 'l': func_one = (fptr_T)do_lower; |
7 | 7541 continue; |
4244 | 7542 case 'L': func_all = (fptr_T)do_Lower; |
7 | 7543 continue; |
7544 case 'e': | |
4244 | 7545 case 'E': func_one = func_all = (fptr_T)NULL; |
7 | 7546 continue; |
7547 } | |
7548 } | |
7549 } | |
7550 if (no < 0) /* Ordinary character. */ | |
7551 { | |
798 | 7552 if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL) |
7553 { | |
1209 | 7554 /* Copy a special key as-is. */ |
798 | 7555 if (copy) |
7556 { | |
7557 *dst++ = c; | |
7558 *dst++ = *src++; | |
7559 *dst++ = *src++; | |
7560 } | |
7561 else | |
7562 { | |
7563 dst += 3; | |
7564 src += 2; | |
7565 } | |
7566 continue; | |
7567 } | |
7568 | |
7 | 7569 if (c == '\\' && *src != NUL) |
7570 { | |
7571 /* Check for abbreviations -- webb */ | |
7572 switch (*src) | |
7573 { | |
7574 case 'r': c = CAR; ++src; break; | |
7575 case 'n': c = NL; ++src; break; | |
7576 case 't': c = TAB; ++src; break; | |
7577 /* Oh no! \e already has meaning in subst pat :-( */ | |
7578 /* case 'e': c = ESC; ++src; break; */ | |
7579 case 'b': c = Ctrl_H; ++src; break; | |
7580 | |
7581 /* If "backslash" is TRUE the backslash will be removed | |
7582 * later. Used to insert a literal CR. */ | |
7583 default: if (backslash) | |
7584 { | |
7585 if (copy) | |
7586 *dst = '\\'; | |
7587 ++dst; | |
7588 } | |
7589 c = *src++; | |
7590 } | |
7591 } | |
798 | 7592 #ifdef FEAT_MBYTE |
7593 else if (has_mbyte) | |
7594 c = mb_ptr2char(src - 1); | |
7595 #endif | |
7 | 7596 |
7597 /* Write to buffer, if copy is set. */ | |
4244 | 7598 if (func_one != (fptr_T)NULL) |
7599 /* Turbo C complains without the typecast */ | |
7600 func_one = (fptr_T)(func_one(&cc, c)); | |
7601 else if (func_all != (fptr_T)NULL) | |
7602 /* Turbo C complains without the typecast */ | |
7603 func_all = (fptr_T)(func_all(&cc, c)); | |
7604 else /* just copy */ | |
772 | 7605 cc = c; |
7606 | |
7607 #ifdef FEAT_MBYTE | |
7608 if (has_mbyte) | |
7 | 7609 { |
2307
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7610 int totlen = mb_ptr2len(src - 1); |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7611 |
7 | 7612 if (copy) |
772 | 7613 mb_char2bytes(cc, dst); |
7614 dst += mb_char2len(cc) - 1; | |
2307
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7615 if (enc_utf8) |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7616 { |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7617 int clen = utf_ptr2len(src - 1); |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7618 |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7619 /* If the character length is shorter than "totlen", there |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7620 * are composing characters; copy them as-is. */ |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7621 if (clen < totlen) |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7622 { |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7623 if (copy) |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7624 mch_memmove(dst + 1, src - 1 + clen, |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7625 (size_t)(totlen - clen)); |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7626 dst += totlen - clen; |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7627 } |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7628 } |
81527f127fb1
Fix: Composing characters in :s substitute text were dropped.
Bram Moolenaar <bram@vim.org>
parents:
2247
diff
changeset
|
7629 src += totlen - 1; |
7 | 7630 } |
7631 else | |
7632 #endif | |
7633 if (copy) | |
772 | 7634 *dst = cc; |
7 | 7635 dst++; |
7636 } | |
7637 else | |
7638 { | |
7639 if (REG_MULTI) | |
7640 { | |
7641 clnum = reg_mmatch->startpos[no].lnum; | |
7642 if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0) | |
7643 s = NULL; | |
7644 else | |
7645 { | |
7646 s = reg_getline(clnum) + reg_mmatch->startpos[no].col; | |
7647 if (reg_mmatch->endpos[no].lnum == clnum) | |
7648 len = reg_mmatch->endpos[no].col | |
7649 - reg_mmatch->startpos[no].col; | |
7650 else | |
7651 len = (int)STRLEN(s); | |
7652 } | |
7653 } | |
7654 else | |
7655 { | |
7656 s = reg_match->startp[no]; | |
7657 if (reg_match->endp[no] == NULL) | |
7658 s = NULL; | |
7659 else | |
7660 len = (int)(reg_match->endp[no] - s); | |
7661 } | |
7662 if (s != NULL) | |
7663 { | |
7664 for (;;) | |
7665 { | |
7666 if (len == 0) | |
7667 { | |
7668 if (REG_MULTI) | |
7669 { | |
7670 if (reg_mmatch->endpos[no].lnum == clnum) | |
7671 break; | |
7672 if (copy) | |
7673 *dst = CAR; | |
7674 ++dst; | |
7675 s = reg_getline(++clnum); | |
7676 if (reg_mmatch->endpos[no].lnum == clnum) | |
7677 len = reg_mmatch->endpos[no].col; | |
7678 else | |
7679 len = (int)STRLEN(s); | |
7680 } | |
7681 else | |
7682 break; | |
7683 } | |
7684 else if (*s == NUL) /* we hit NUL. */ | |
7685 { | |
7686 if (copy) | |
7687 EMSG(_(e_re_damg)); | |
7688 goto exit; | |
7689 } | |
7690 else | |
7691 { | |
7692 if (backslash && (*s == CAR || *s == '\\')) | |
7693 { | |
7694 /* | |
7695 * Insert a backslash in front of a CR, otherwise | |
7696 * it will be replaced by a line break. | |
7697 * Number of backslashes will be halved later, | |
7698 * double them here. | |
7699 */ | |
7700 if (copy) | |
7701 { | |
7702 dst[0] = '\\'; | |
7703 dst[1] = *s; | |
7704 } | |
7705 dst += 2; | |
7706 } | |
7707 else | |
7708 { | |
772 | 7709 #ifdef FEAT_MBYTE |
7710 if (has_mbyte) | |
7711 c = mb_ptr2char(s); | |
7712 else | |
7713 #endif | |
7714 c = *s; | |
7715 | |
4244 | 7716 if (func_one != (fptr_T)NULL) |
7717 /* Turbo C complains without the typecast */ | |
7718 func_one = (fptr_T)(func_one(&cc, c)); | |
7719 else if (func_all != (fptr_T)NULL) | |
7720 /* Turbo C complains without the typecast */ | |
7721 func_all = (fptr_T)(func_all(&cc, c)); | |
7722 else /* just copy */ | |
772 | 7723 cc = c; |
7724 | |
7725 #ifdef FEAT_MBYTE | |
7726 if (has_mbyte) | |
7 | 7727 { |
1332 | 7728 int l; |
7729 | |
7730 /* Copy composing characters separately, one | |
7731 * at a time. */ | |
7732 if (enc_utf8) | |
7733 l = utf_ptr2len(s) - 1; | |
7734 else | |
7735 l = mb_ptr2len(s) - 1; | |
772 | 7736 |
7737 s += l; | |
7738 len -= l; | |
7739 if (copy) | |
7740 mb_char2bytes(cc, dst); | |
7741 dst += mb_char2len(cc) - 1; | |
7 | 7742 } |
772 | 7743 else |
7744 #endif | |
7745 if (copy) | |
7746 *dst = cc; | |
7747 dst++; | |
7 | 7748 } |
772 | 7749 |
7 | 7750 ++s; |
7751 --len; | |
7752 } | |
7753 } | |
7754 } | |
7755 no = -1; | |
7756 } | |
7757 } | |
7758 if (copy) | |
7759 *dst = NUL; | |
7760 | |
7761 exit: | |
7762 return (int)((dst - dest) + 1); | |
7763 } | |
7764 | |
7765 #ifdef FEAT_EVAL | |
2012 | 7766 static char_u *reg_getline_submatch __ARGS((linenr_T lnum)); |
7767 | |
7 | 7768 /* |
2011 | 7769 * Call reg_getline() with the line numbers from the submatch. If a |
7770 * substitute() was used the reg_maxline and other values have been | |
7771 * overwritten. | |
7772 */ | |
7773 static char_u * | |
7774 reg_getline_submatch(lnum) | |
7775 linenr_T lnum; | |
7776 { | |
7777 char_u *s; | |
7778 linenr_T save_first = reg_firstlnum; | |
7779 linenr_T save_max = reg_maxline; | |
7780 | |
7781 reg_firstlnum = submatch_firstlnum; | |
7782 reg_maxline = submatch_maxline; | |
7783 | |
7784 s = reg_getline(lnum); | |
7785 | |
7786 reg_firstlnum = save_first; | |
7787 reg_maxline = save_max; | |
7788 return s; | |
7789 } | |
7790 | |
7791 /* | |
1209 | 7792 * Used for the submatch() function: get the string from the n'th submatch in |
7 | 7793 * allocated memory. |
7794 * Returns NULL when not in a ":s" command and for a non-existing submatch. | |
7795 */ | |
7796 char_u * | |
7797 reg_submatch(no) | |
7798 int no; | |
7799 { | |
7800 char_u *retval = NULL; | |
7801 char_u *s; | |
7802 int len; | |
7803 int round; | |
7804 linenr_T lnum; | |
7805 | |
840 | 7806 if (!can_f_submatch || no < 0) |
7 | 7807 return NULL; |
7808 | |
7809 if (submatch_match == NULL) | |
7810 { | |
7811 /* | |
7812 * First round: compute the length and allocate memory. | |
7813 * Second round: copy the text. | |
7814 */ | |
7815 for (round = 1; round <= 2; ++round) | |
7816 { | |
7817 lnum = submatch_mmatch->startpos[no].lnum; | |
7818 if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0) | |
7819 return NULL; | |
7820 | |
2011 | 7821 s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col; |
7 | 7822 if (s == NULL) /* anti-crash check, cannot happen? */ |
7823 break; | |
7824 if (submatch_mmatch->endpos[no].lnum == lnum) | |
7825 { | |
7826 /* Within one line: take form start to end col. */ | |
7827 len = submatch_mmatch->endpos[no].col | |
7828 - submatch_mmatch->startpos[no].col; | |
7829 if (round == 2) | |
418 | 7830 vim_strncpy(retval, s, len); |
7 | 7831 ++len; |
7832 } | |
7833 else | |
7834 { | |
7835 /* Multiple lines: take start line from start col, middle | |
7836 * lines completely and end line up to end col. */ | |
7837 len = (int)STRLEN(s); | |
7838 if (round == 2) | |
7839 { | |
7840 STRCPY(retval, s); | |
7841 retval[len] = '\n'; | |
7842 } | |
7843 ++len; | |
7844 ++lnum; | |
7845 while (lnum < submatch_mmatch->endpos[no].lnum) | |
7846 { | |
2011 | 7847 s = reg_getline_submatch(lnum++); |
7 | 7848 if (round == 2) |
7849 STRCPY(retval + len, s); | |
7850 len += (int)STRLEN(s); | |
7851 if (round == 2) | |
7852 retval[len] = '\n'; | |
7853 ++len; | |
7854 } | |
7855 if (round == 2) | |
2011 | 7856 STRNCPY(retval + len, reg_getline_submatch(lnum), |
7 | 7857 submatch_mmatch->endpos[no].col); |
7858 len += submatch_mmatch->endpos[no].col; | |
7859 if (round == 2) | |
7860 retval[len] = NUL; | |
7861 ++len; | |
7862 } | |
7863 | |
840 | 7864 if (retval == NULL) |
7 | 7865 { |
7866 retval = lalloc((long_u)len, TRUE); | |
840 | 7867 if (retval == NULL) |
7 | 7868 return NULL; |
7869 } | |
7870 } | |
7871 } | |
7872 else | |
7873 { | |
1815 | 7874 s = submatch_match->startp[no]; |
7875 if (s == NULL || submatch_match->endp[no] == NULL) | |
7 | 7876 retval = NULL; |
7877 else | |
7878 retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s)); | |
7879 } | |
7880 | |
7881 return retval; | |
7882 } | |
5794 | 7883 |
7884 /* | |
7885 * Used for the submatch() function with the optional non-zero argument: get | |
7886 * the list of strings from the n'th submatch in allocated memory with NULs | |
7887 * represented in NLs. | |
7888 * Returns a list of allocated strings. Returns NULL when not in a ":s" | |
7889 * command, for a non-existing submatch and for any error. | |
7890 */ | |
7891 list_T * | |
7892 reg_submatch_list(no) | |
7893 int no; | |
7894 { | |
7895 char_u *s; | |
7896 linenr_T slnum; | |
7897 linenr_T elnum; | |
7898 colnr_T scol; | |
7899 colnr_T ecol; | |
7900 int i; | |
7901 list_T *list; | |
7902 int error = FALSE; | |
7903 | |
7904 if (!can_f_submatch || no < 0) | |
7905 return NULL; | |
7906 | |
7907 if (submatch_match == NULL) | |
7908 { | |
7909 slnum = submatch_mmatch->startpos[no].lnum; | |
7910 elnum = submatch_mmatch->endpos[no].lnum; | |
7911 if (slnum < 0 || elnum < 0) | |
7912 return NULL; | |
7913 | |
7914 scol = submatch_mmatch->startpos[no].col; | |
7915 ecol = submatch_mmatch->endpos[no].col; | |
7916 | |
7917 list = list_alloc(); | |
7918 if (list == NULL) | |
7919 return NULL; | |
7920 | |
7921 s = reg_getline_submatch(slnum) + scol; | |
7922 if (slnum == elnum) | |
7923 { | |
7924 if (list_append_string(list, s, ecol - scol) == FAIL) | |
7925 error = TRUE; | |
7926 } | |
7927 else | |
7928 { | |
7929 if (list_append_string(list, s, -1) == FAIL) | |
7930 error = TRUE; | |
7931 for (i = 1; i < elnum - slnum; i++) | |
7932 { | |
7933 s = reg_getline_submatch(slnum + i); | |
7934 if (list_append_string(list, s, -1) == FAIL) | |
7935 error = TRUE; | |
7936 } | |
7937 s = reg_getline_submatch(elnum); | |
7938 if (list_append_string(list, s, ecol) == FAIL) | |
7939 error = TRUE; | |
7940 } | |
7941 } | |
7942 else | |
7943 { | |
7944 s = submatch_match->startp[no]; | |
7945 if (s == NULL || submatch_match->endp[no] == NULL) | |
7946 return NULL; | |
7947 list = list_alloc(); | |
7948 if (list == NULL) | |
7949 return NULL; | |
7950 if (list_append_string(list, s, | |
7951 (int)(submatch_match->endp[no] - s)) == FAIL) | |
7952 error = TRUE; | |
7953 } | |
7954 | |
7955 if (error) | |
7956 { | |
7957 list_free(list, TRUE); | |
7958 return NULL; | |
7959 } | |
7960 return list; | |
7961 } | |
7 | 7962 #endif |
4444 | 7963 |
7964 static regengine_T bt_regengine = | |
7965 { | |
7966 bt_regcomp, | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
7967 bt_regfree, |
4444 | 7968 bt_regexec_nl, |
7969 bt_regexec_multi | |
7970 #ifdef DEBUG | |
7971 ,(char_u *)"" | |
7972 #endif | |
7973 }; | |
7974 | |
7975 | |
7976 #include "regexp_nfa.c" | |
7977 | |
7978 static regengine_T nfa_regengine = | |
7979 { | |
7980 nfa_regcomp, | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
7981 nfa_regfree, |
4444 | 7982 nfa_regexec_nl, |
7983 nfa_regexec_multi | |
7984 #ifdef DEBUG | |
7985 ,(char_u *)"" | |
7986 #endif | |
7987 }; | |
7988 | |
7989 /* Which regexp engine to use? Needed for vim_regcomp(). | |
7990 * Must match with 'regexpengine'. */ | |
7991 static int regexp_engine = 0; | |
7992 #define AUTOMATIC_ENGINE 0 | |
7993 #define BACKTRACKING_ENGINE 1 | |
7994 #define NFA_ENGINE 2 | |
7995 #ifdef DEBUG | |
7996 static char_u regname[][30] = { | |
7997 "AUTOMATIC Regexp Engine", | |
4579
7a2be4a39423
updated for version 7.3.1037
Bram Moolenaar <bram@vim.org>
parents:
4505
diff
changeset
|
7998 "BACKTRACKING Regexp Engine", |
4444 | 7999 "NFA Regexp Engine" |
8000 }; | |
8001 #endif | |
8002 | |
8003 /* | |
8004 * Compile a regular expression into internal code. | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8005 * Returns the program in allocated memory. |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8006 * Use vim_regfree() to free the memory. |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8007 * Returns NULL for an error. |
4444 | 8008 */ |
8009 regprog_T * | |
8010 vim_regcomp(expr_arg, re_flags) | |
8011 char_u *expr_arg; | |
8012 int re_flags; | |
8013 { | |
8014 regprog_T *prog = NULL; | |
8015 char_u *expr = expr_arg; | |
8016 | |
8017 regexp_engine = p_re; | |
8018 | |
8019 /* Check for prefix "\%#=", that sets the regexp engine */ | |
8020 if (STRNCMP(expr, "\\%#=", 4) == 0) | |
8021 { | |
8022 int newengine = expr[4] - '0'; | |
8023 | |
8024 if (newengine == AUTOMATIC_ENGINE | |
8025 || newengine == BACKTRACKING_ENGINE | |
8026 || newengine == NFA_ENGINE) | |
8027 { | |
8028 regexp_engine = expr[4] - '0'; | |
8029 expr += 5; | |
8030 #ifdef DEBUG | |
5897 | 8031 smsg((char_u *)"New regexp mode selected (%d): %s", |
8032 regexp_engine, regname[newengine]); | |
4444 | 8033 #endif |
8034 } | |
8035 else | |
8036 { | |
8037 EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used ")); | |
8038 regexp_engine = AUTOMATIC_ENGINE; | |
8039 } | |
8040 } | |
8041 #ifdef DEBUG | |
8042 bt_regengine.expr = expr; | |
8043 nfa_regengine.expr = expr; | |
8044 #endif | |
8045 | |
8046 /* | |
8047 * First try the NFA engine, unless backtracking was requested. | |
8048 */ | |
8049 if (regexp_engine != BACKTRACKING_ENGINE) | |
8050 prog = nfa_regengine.regcomp(expr, re_flags); | |
8051 else | |
8052 prog = bt_regengine.regcomp(expr, re_flags); | |
8053 | |
8054 if (prog == NULL) /* error compiling regexp with initial engine */ | |
8055 { | |
4460 | 8056 #ifdef BT_REGEXP_DEBUG_LOG |
4444 | 8057 if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */ |
8058 { | |
8059 FILE *f; | |
4460 | 8060 f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); |
4444 | 8061 if (f) |
8062 { | |
4762
47906f888725
updated for version 7.3.1128
Bram Moolenaar <bram@vim.org>
parents:
4760
diff
changeset
|
8063 fprintf(f, "Syntax error in \"%s\"\n", expr); |
4444 | 8064 fclose(f); |
8065 } | |
8066 else | |
4460 | 8067 EMSG2("(NFA) Could not open \"%s\" to write !!!", |
8068 BT_REGEXP_DEBUG_LOG_NAME); | |
4444 | 8069 } |
8070 #endif | |
8071 /* | |
5353 | 8072 * If the NFA engine failed, the backtracking engine won't work either. |
8073 * | |
4444 | 8074 if (regexp_engine == AUTOMATIC_ENGINE) |
4762
47906f888725
updated for version 7.3.1128
Bram Moolenaar <bram@vim.org>
parents:
4760
diff
changeset
|
8075 prog = bt_regengine.regcomp(expr, re_flags); |
5353 | 8076 */ |
4762
47906f888725
updated for version 7.3.1128
Bram Moolenaar <bram@vim.org>
parents:
4760
diff
changeset
|
8077 } |
4444 | 8078 |
8079 return prog; | |
8080 } | |
8081 | |
8082 /* | |
4805
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8083 * Free a compiled regexp program, returned by vim_regcomp(). |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8084 */ |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8085 void |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8086 vim_regfree(prog) |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8087 regprog_T *prog; |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8088 { |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8089 if (prog != NULL) |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8090 prog->engine->regfree(prog); |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8091 } |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8092 |
66803af09906
updated for version 7.3.1149
Bram Moolenaar <bram@vim.org>
parents:
4770
diff
changeset
|
8093 /* |
4444 | 8094 * Match a regexp against a string. |
8095 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | |
8096 * Uses curbuf for line count and 'iskeyword'. | |
8097 * | |
8098 * Return TRUE if there is a match, FALSE if not. | |
8099 */ | |
8100 int | |
8101 vim_regexec(rmp, line, col) | |
8102 regmatch_T *rmp; | |
8103 char_u *line; /* string to match against */ | |
8104 colnr_T col; /* column to start looking for match */ | |
8105 { | |
5838 | 8106 return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE); |
4444 | 8107 } |
8108 | |
8109 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \ | |
8110 || defined(FIND_REPLACE_DIALOG) || defined(PROTO) | |
8111 /* | |
8112 * Like vim_regexec(), but consider a "\n" in "line" to be a line break. | |
8113 */ | |
8114 int | |
8115 vim_regexec_nl(rmp, line, col) | |
8116 regmatch_T *rmp; | |
8117 char_u *line; | |
8118 colnr_T col; | |
8119 { | |
5838 | 8120 return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE); |
4444 | 8121 } |
8122 #endif | |
8123 | |
8124 /* | |
8125 * Match a regexp against multiple lines. | |
8126 * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). | |
8127 * Uses curbuf for line count and 'iskeyword'. | |
8128 * | |
8129 * Return zero if there is no match. Return number of lines contained in the | |
8130 * match otherwise. | |
8131 */ | |
8132 long | |
8133 vim_regexec_multi(rmp, win, buf, lnum, col, tm) | |
8134 regmmatch_T *rmp; | |
8135 win_T *win; /* window in which to search or NULL */ | |
8136 buf_T *buf; /* buffer in which to search */ | |
8137 linenr_T lnum; /* nr of line to start looking for match */ | |
8138 colnr_T col; /* column to start looking for match */ | |
8139 proftime_T *tm; /* timeout limit or NULL */ | |
8140 { | |
8141 return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); | |
8142 } |