comparison src/regexp.c @ 167:c93c9cad9618

updated for version 7.0051
author vimboss
date Tue, 22 Feb 2005 08:39:57 +0000
parents 19670b05ee32
children 8c60f65311fa
comparison
equal deleted inserted replaced
166:3a28ed993bbe 167:c93c9cad9618
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" 72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
73 * pointer with a BRANCH on both ends of it is connecting two alternatives. 73 * pointer with a BRANCH on both ends of it is connecting two alternatives.
74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH 74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH
75 * (as opposed to a collection of them) is never concatenated with anything 75 * (as opposed to a collection of them) is never concatenated with anything
76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX 76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX
77 * node points to the node after the stuff to be repeated. The operand of some 77 * node points to the node after the stuff to be repeated.
78 * types of node is a literal string; for others, it is a node leading into a 78 * The operand of some types of node is a literal string; for others, it is a
79 * sub-FSM. In particular, the operand of a BRANCH node is the first node of 79 * node leading into a sub-FSM. In particular, the operand of a BRANCH node
80 * the branch. (NB this is *not* a tree structure: the tail of the branch 80 * is the first node of the branch.
81 * connects to the thing following the set of BRANCHes.) 81 * (NB this is *not* a tree structure: the tail of the branch connects to the
82 * thing following the set of BRANCHes.)
82 * 83 *
83 * pattern is coded like: 84 * pattern is coded like:
84 * 85 *
85 * +-----------------+ 86 * +-----------------+
86 * | V 87 * | V
93 * V | 94 * V |
94 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END 95 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
95 * | | ^ ^ 96 * | | ^ ^
96 * | +---------------+ | 97 * | +---------------+ |
97 * +---------------------------------------------+ 98 * +---------------------------------------------+
99 *
100 *
101 * +----------------------+
102 * V |
103 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
104 * | | ^ ^
105 * | +----------+ |
106 * +-------------------------------------------------+
98 * 107 *
99 * 108 *
100 * +-------------------------+ 109 * +-------------------------+
101 * V | 110 * V |
102 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END 111 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
384 */ 393 */
385 static char_u REGEXP_INRANGE[] = "]^-n\\"; 394 static char_u REGEXP_INRANGE[] = "]^-n\\";
386 static char_u REGEXP_ABBR[] = "nrtebdoxuU"; 395 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
387 396
388 static int backslash_trans __ARGS((int c)); 397 static int backslash_trans __ARGS((int c));
389 static int skip_class_name __ARGS((char_u **pp)); 398 static int get_char_class __ARGS((char_u **pp));
399 static int get_equi_class __ARGS((char_u **pp));
400 static void reg_equi_class __ARGS((int c));
401 static int get_coll_element __ARGS((char_u **pp));
390 static char_u *skip_anyof __ARGS((char_u *p)); 402 static char_u *skip_anyof __ARGS((char_u *p));
391 static void init_class_tab __ARGS((void)); 403 static void init_class_tab __ARGS((void));
392 404
393 /* 405 /*
394 * Translate '\x' to its control character, except "\n", which is Magic. 406 * Translate '\x' to its control character, except "\n", which is Magic.
406 } 418 }
407 return c; 419 return c;
408 } 420 }
409 421
410 /* 422 /*
411 * Check for a character class name. "pp" points to the '['. 423 * Check for a character class name "[:name:]". "pp" points to the '['.
412 * Returns one of the CLASS_ items. CLASS_NONE means that no item was 424 * Returns one of the CLASS_ items. CLASS_NONE means that no item was
413 * recognized. Otherwise "pp" is advanced to after the item. 425 * recognized. Otherwise "pp" is advanced to after the item.
414 */ 426 */
415 static int 427 static int
416 skip_class_name(pp) 428 get_char_class(pp)
417 char_u **pp; 429 char_u **pp;
418 { 430 {
419 static const char *(class_names[]) = 431 static const char *(class_names[]) =
420 { 432 {
421 "alnum:]", 433 "alnum:]",
465 } 477 }
466 return CLASS_NONE; 478 return CLASS_NONE;
467 } 479 }
468 480
469 /* 481 /*
470 * Skip over a "[]" range.
471 * "p" must point to the character after the '['.
472 * The returned pointer is on the matching ']', or the terminating NUL.
473 */
474 static char_u *
475 skip_anyof(p)
476 char_u *p;
477 {
478 int cpo_lit; /* 'cpoptions' contains 'l' flag */
479 #ifdef FEAT_MBYTE
480 int l;
481 #endif
482
483 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
484
485 if (*p == '^') /* Complement of range. */
486 ++p;
487 if (*p == ']' || *p == '-')
488 ++p;
489 while (*p != NUL && *p != ']')
490 {
491 #ifdef FEAT_MBYTE
492 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
493 p += l;
494 else
495 #endif
496 if (*p == '-')
497 {
498 ++p;
499 if (*p != ']' && *p != NUL)
500 mb_ptr_adv(p);
501 }
502 else if (*p == '\\'
503 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
504 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
505 p += 2;
506 else if (*p == '[')
507 {
508 if (skip_class_name(&p) == CLASS_NONE)
509 ++p; /* It was not a class name */
510 }
511 else
512 ++p;
513 }
514
515 return p;
516 }
517
518 /*
519 * Specific version of character class functions. 482 * Specific version of character class functions.
520 * Using a table to keep this fast. 483 * Using a table to keep this fast.
521 */ 484 */
522 static short class_tab[256]; 485 static short class_tab[256];
523 486
693 static int prog_magic_wrong __ARGS((void)); 656 static int prog_magic_wrong __ARGS((void));
694 static char_u *regnext __ARGS((char_u *)); 657 static char_u *regnext __ARGS((char_u *));
695 static void regc __ARGS((int b)); 658 static void regc __ARGS((int b));
696 #ifdef FEAT_MBYTE 659 #ifdef FEAT_MBYTE
697 static void regmbc __ARGS((int c)); 660 static void regmbc __ARGS((int c));
661 #else
662 # define regmbc(c) regc(c)
698 #endif 663 #endif
699 static void reginsert __ARGS((int, char_u *)); 664 static void reginsert __ARGS((int, char_u *));
700 static void reginsert_limits __ARGS((int, long, long, char_u *)); 665 static void reginsert_limits __ARGS((int, long, long, char_u *));
701 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); 666 static char_u *re_put_long __ARGS((char_u *pr, long_u val));
702 static int read_limits __ARGS((long *, long *)); 667 static int read_limits __ARGS((long *, long *));
720 int 685 int
721 re_lookbehind(prog) 686 re_lookbehind(prog)
722 regprog_T *prog; 687 regprog_T *prog;
723 { 688 {
724 return (prog->regflags & RF_LOOKBH); 689 return (prog->regflags & RF_LOOKBH);
690 }
691
692 /*
693 * Check for an equivalence class name "[=a=]". "pp" points to the '['.
694 * Returns a character representing the class. Zero means that no item was
695 * recognized. Otherwise "pp" is advanced to after the item.
696 */
697 static int
698 get_equi_class(pp)
699 char_u **pp;
700 {
701 int c;
702 int l = 1;
703 char_u *p = *pp;
704
705 if (p[1] == '=')
706 {
707 #ifdef FEAT_MBYTE
708 if (has_mbyte)
709 l = mb_ptr2len_check(p + 2);
710 #endif
711 if (p[l + 2] == '=' && p[l + 3] == ']')
712 {
713 #ifdef FEAT_MBYTE
714 if (has_mbyte)
715 c = mb_ptr2char(p + 2);
716 else
717 #endif
718 c = p[2];
719 *pp += l + 4;
720 return c;
721 }
722 }
723 return 0;
724 }
725
726 /*
727 * Produce the bytes for equivalence class "c".
728 * Currently only handles latin1, latin9 and utf-8.
729 */
730 static void
731 reg_equi_class(c)
732 int c;
733 {
734 #ifdef FEAT_MBYTE
735 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
736 || STRCMP(p_enc, "latin9") == 0)
737 #endif
738 {
739 switch (c)
740 {
741 case 'A': case 'À': case 'Á': case 'Â':
742 case 'Ã': case 'Ä': case 'Å':
743 regmbc('A'); regmbc('À'); regmbc('Á'); regmbc('Â');
744 regmbc('Ã'); regmbc('Ä'); regmbc('Å');
745 return;
746 case 'C': case 'Ç':
747 regmbc('C'); regmbc('Ç');
748 return;
749 case 'E': case 'È': case 'É': case 'Ê': case 'Ë':
750 regmbc('E'); regmbc('È'); regmbc('É'); regmbc('Ê');
751 regmbc('Ë');
752 return;
753 case 'I': case 'Ì': case 'Í': case 'Î': case 'Ï':
754 regmbc('I'); regmbc('Ì'); regmbc('Í'); regmbc('Î');
755 regmbc('Ï');
756 return;
757 case 'N': case 'Ñ':
758 regmbc('N'); regmbc('Ñ');
759 return;
760 case 'O': case 'Ò': case 'Ó': case 'Ô': case 'Õ': case 'Ö':
761 regmbc('O'); regmbc('Ò'); regmbc('Ó'); regmbc('Ô');
762 regmbc('Õ'); regmbc('Ö');
763 return;
764 case 'U': case 'Ù': case 'Ú': case 'Û': case 'Ü':
765 regmbc('U'); regmbc('Ù'); regmbc('Ú'); regmbc('Û');
766 regmbc('Ü');
767 return;
768 case 'Y': case 'Ý':
769 regmbc('Y'); regmbc('Ý');
770 return;
771 case 'a': case 'à': case 'á': case 'â':
772 case 'ã': case 'ä': case 'å':
773 regmbc('a'); regmbc('à'); regmbc('á'); regmbc('â');
774 regmbc('ã'); regmbc('ä'); regmbc('å');
775 return;
776 case 'c': case 'ç':
777 regmbc('c'); regmbc('ç');
778 return;
779 case 'e': case 'è': case 'é': case 'ê': case 'ë':
780 regmbc('e'); regmbc('è'); regmbc('é'); regmbc('ê');
781 regmbc('ë');
782 return;
783 case 'i': case 'ì': case 'í': case 'î': case 'ï':
784 regmbc('i'); regmbc('ì'); regmbc('í'); regmbc('î');
785 regmbc('ï');
786 return;
787 case 'n': case 'ñ':
788 regmbc('n'); regmbc('ñ');
789 return;
790 case 'o': case 'ò': case 'ó': case 'ô': case 'õ': case 'ö':
791 regmbc('o'); regmbc('ò'); regmbc('ó'); regmbc('ô');
792 regmbc('õ'); regmbc('ö');
793 return;
794 case 'u': case 'ù': case 'ú': case 'û': case 'ü':
795 regmbc('u'); regmbc('ù'); regmbc('ú'); regmbc('û');
796 regmbc('ü');
797 return;
798 case 'y': case 'ý': case 'ÿ':
799 regmbc('y'); regmbc('ý'); regmbc('ÿ');
800 return;
801 }
802 }
803 regmbc(c);
804 }
805
806 /*
807 * Check for a collating element "[.a.]". "pp" points to the '['.
808 * Returns a character. Zero means that no item was recognized. Otherwise
809 * "pp" is advanced to after the item.
810 * Currently only single characters are recognized!
811 */
812 static int
813 get_coll_element(pp)
814 char_u **pp;
815 {
816 int c;
817 int l = 1;
818 char_u *p = *pp;
819
820 if (p[1] == '.')
821 {
822 #ifdef FEAT_MBYTE
823 if (has_mbyte)
824 l = mb_ptr2len_check(p + 2);
825 #endif
826 if (p[l + 2] == '.' && p[l + 3] == ']')
827 {
828 #ifdef FEAT_MBYTE
829 if (has_mbyte)
830 c = mb_ptr2char(p + 2);
831 else
832 #endif
833 c = p[2];
834 *pp += l + 4;
835 return c;
836 }
837 }
838 return 0;
839 }
840
841
842 /*
843 * Skip over a "[]" range.
844 * "p" must point to the character after the '['.
845 * The returned pointer is on the matching ']', or the terminating NUL.
846 */
847 static char_u *
848 skip_anyof(p)
849 char_u *p;
850 {
851 int cpo_lit; /* 'cpoptions' contains 'l' flag */
852 int cpo_bsl; /* 'cpoptions' contains '\' flag */
853 #ifdef FEAT_MBYTE
854 int l;
855 #endif
856
857 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
858 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
859
860 if (*p == '^') /* Complement of range. */
861 ++p;
862 if (*p == ']' || *p == '-')
863 ++p;
864 while (*p != NUL && *p != ']')
865 {
866 #ifdef FEAT_MBYTE
867 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
868 p += l;
869 else
870 #endif
871 if (*p == '-')
872 {
873 ++p;
874 if (*p != ']' && *p != NUL)
875 mb_ptr_adv(p);
876 }
877 else if (*p == '\\'
878 && !cpo_bsl
879 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
880 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
881 p += 2;
882 else if (*p == '[')
883 {
884 if (get_char_class(&p) == CLASS_NONE
885 && get_equi_class(&p) == 0
886 && get_coll_element(&p) == 0)
887 ++p; /* It was not a class name */
888 }
889 else
890 ++p;
891 }
892
893 return p;
725 } 894 }
726 895
727 /* 896 /*
728 * Skip past regular expression. 897 * Skip past regular expression.
729 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc). 898 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
1249 if (re_multi_type(op) == NOT_MULTI) 1418 if (re_multi_type(op) == NOT_MULTI)
1250 { 1419 {
1251 *flagp = flags; 1420 *flagp = flags;
1252 return ret; 1421 return ret;
1253 } 1422 }
1254 if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
1255 {
1256 if (op == Magic('*'))
1257 EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
1258 reg_magic >= MAGIC_ON);
1259 if (op == Magic('+'))
1260 EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
1261 reg_magic == MAGIC_ALL);
1262 /* "\{}" is checked below, it's allowed when there is an upper limit */
1263 }
1264 /* default flags */ 1423 /* default flags */
1265 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); 1424 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1266 1425
1267 skipchr(); 1426 skipchr();
1268 switch (op) 1427 switch (op)
1336 break; 1495 break;
1337 1496
1338 case Magic('{'): 1497 case Magic('{'):
1339 if (!read_limits(&minval, &maxval)) 1498 if (!read_limits(&minval, &maxval))
1340 return NULL; 1499 return NULL;
1341 if (!(flags & HASWIDTH) && (maxval > minval
1342 ? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
1343 EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
1344 reg_magic == MAGIC_ALL);
1345 if (flags & SIMPLE) 1500 if (flags & SIMPLE)
1346 { 1501 {
1347 reginsert(BRACE_SIMPLE, ret); 1502 reginsert(BRACE_SIMPLE, ret);
1348 reginsert_limits(BRACE_LIMITS, minval, maxval, ret); 1503 reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1349 } 1504 }
1389 int *flagp; 1544 int *flagp;
1390 { 1545 {
1391 char_u *ret; 1546 char_u *ret;
1392 int flags; 1547 int flags;
1393 int cpo_lit; /* 'cpoptions' contains 'l' flag */ 1548 int cpo_lit; /* 'cpoptions' contains 'l' flag */
1549 int cpo_bsl; /* 'cpoptions' contains '\' flag */
1394 int c; 1550 int c;
1395 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; 1551 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1396 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, 1552 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
1397 FNAME, SFNAME, PRINT, SPRINT, 1553 FNAME, SFNAME, PRINT, SPRINT,
1398 WHITE, NWHITE, DIGIT, NDIGIT, 1554 WHITE, NWHITE, DIGIT, NDIGIT,
1404 char_u *p; 1560 char_u *p;
1405 int extra = 0; 1561 int extra = 0;
1406 1562
1407 *flagp = WORST; /* Tentatively. */ 1563 *flagp = WORST; /* Tentatively. */
1408 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL); 1564 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
1565 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
1409 1566
1410 c = getchr(); 1567 c = getchr();
1411 switch (c) 1568 switch (c)
1412 { 1569 {
1413 case Magic('^'): 1570 case Magic('^'):
1825 else 1982 else
1826 ret = regnode(ANYOF + extra); 1983 ret = regnode(ANYOF + extra);
1827 1984
1828 /* At the start ']' and '-' mean the literal character. */ 1985 /* At the start ']' and '-' mean the literal character. */
1829 if (*regparse == ']' || *regparse == '-') 1986 if (*regparse == ']' || *regparse == '-')
1987 {
1988 startc = *regparse;
1830 regc(*regparse++); 1989 regc(*regparse++);
1990 }
1831 1991
1832 while (*regparse != NUL && *regparse != ']') 1992 while (*regparse != NUL && *regparse != ']')
1833 { 1993 {
1834 if (*regparse == '-') 1994 if (*regparse == '-')
1835 { 1995 {
1843 regc('-'); 2003 regc('-');
1844 startc = '-'; /* [--x] is a range */ 2004 startc = '-'; /* [--x] is a range */
1845 } 2005 }
1846 else 2006 else
1847 { 2007 {
2008 /* Also accept "a-[.z.]" */
2009 endc = 0;
2010 if (*regparse == '[')
2011 endc = get_coll_element(&regparse);
2012 if (endc == 0)
2013 {
1848 #ifdef FEAT_MBYTE 2014 #ifdef FEAT_MBYTE
1849 if (has_mbyte) 2015 if (has_mbyte)
1850 endc = mb_ptr2char_adv(&regparse); 2016 endc = mb_ptr2char_adv(&regparse);
1851 else 2017 else
1852 #endif 2018 #endif
1853 endc = *regparse++; 2019 endc = *regparse++;
2020 }
1854 2021
1855 /* Handle \o40, \x20 and \u20AC style sequences */ 2022 /* Handle \o40, \x20 and \u20AC style sequences */
1856 if (endc == '\\' && !cpo_lit) 2023 if (endc == '\\' && !cpo_lit && !cpo_bsl)
1857 endc = coll_get_char(); 2024 endc = coll_get_char();
1858 2025
1859 if (startc > endc) 2026 if (startc > endc)
1860 EMSG_RET_NULL(_(e_invrange)); 2027 EMSG_RET_NULL(_(e_invrange));
1861 #ifdef FEAT_MBYTE 2028 #ifdef FEAT_MBYTE
1890 } 2057 }
1891 /* 2058 /*
1892 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim 2059 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
1893 * accepts "\t", "\e", etc., but only when the 'l' flag in 2060 * accepts "\t", "\e", etc., but only when the 'l' flag in
1894 * 'cpoptions' is not included. 2061 * 'cpoptions' is not included.
2062 * Posix doesn't recognize backslash at all.
1895 */ 2063 */
1896 else if (*regparse == '\\' 2064 else if (*regparse == '\\'
2065 && !cpo_bsl
1897 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL 2066 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
1898 || (!cpo_lit 2067 || (!cpo_lit
1899 && vim_strchr(REGEXP_ABBR, 2068 && vim_strchr(REGEXP_ABBR,
1900 regparse[1]) != NULL))) 2069 regparse[1]) != NULL)))
1901 { 2070 {
1940 else if (*regparse == '[') 2109 else if (*regparse == '[')
1941 { 2110 {
1942 int c_class; 2111 int c_class;
1943 int cu; 2112 int cu;
1944 2113
1945 c_class = skip_class_name(&regparse); 2114 c_class = get_char_class(&regparse);
1946 startc = -1; 2115 startc = -1;
1947 /* Characters assumed to be 8 bits! */ 2116 /* Characters assumed to be 8 bits! */
1948 switch (c_class) 2117 switch (c_class)
1949 { 2118 {
1950 case CLASS_NONE: 2119 case CLASS_NONE:
1951 /* literal '[', allow [[-x] as a range */ 2120 c_class = get_equi_class(&regparse);
1952 startc = *regparse++; 2121 if (c_class != 0)
1953 regc(startc); 2122 {
2123 /* produce equivalence class */
2124 reg_equi_class(c_class);
2125 }
2126 else if ((c_class =
2127 get_coll_element(&regparse)) != 0)
2128 {
2129 /* produce a collating element */
2130 regmbc(c_class);
2131 }
2132 else
2133 {
2134 /* literal '[', allow [[-x] as a range */
2135 startc = *regparse++;
2136 regc(startc);
2137 }
1954 break; 2138 break;
1955 case CLASS_ALNUM: 2139 case CLASS_ALNUM:
1956 for (cu = 1; cu <= 255; cu++) 2140 for (cu = 1; cu <= 255; cu++)
1957 if (isalnum(cu)) 2141 if (isalnum(cu))
1958 regc(cu); 2142 regc(cu);
2352 } 2536 }
2353 2537
2354 static int 2538 static int
2355 peekchr() 2539 peekchr()
2356 { 2540 {
2541 static int after_slash = FALSE;
2542
2357 if (curchr == -1) 2543 if (curchr == -1)
2358 { 2544 {
2359 switch (curchr = regparse[0]) 2545 switch (curchr = regparse[0])
2360 { 2546 {
2361 case '.': 2547 case '.':
2390 /* magic only after "\v" */ 2576 /* magic only after "\v" */
2391 if (reg_magic == MAGIC_ALL) 2577 if (reg_magic == MAGIC_ALL)
2392 curchr = Magic(curchr); 2578 curchr = Magic(curchr);
2393 break; 2579 break;
2394 case '*': 2580 case '*':
2395 /* * is not magic as the very first character, eg "?*ptr" and when 2581 /* * is not magic as the very first character, eg "?*ptr", when
2396 * after '^', eg "/^*ptr" */ 2582 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But
2397 if (reg_magic >= MAGIC_ON && !at_start 2583 * "\(\*" is not magic, thus must be magic if "after_slash" */
2398 && !(prev_at_start && prevchr == Magic('^'))) 2584 if (reg_magic >= MAGIC_ON
2585 && !at_start
2586 && !(prev_at_start && prevchr == Magic('^'))
2587 && (after_slash
2588 || (prevchr != Magic('(')
2589 && prevchr != Magic('&')
2590 && prevchr != Magic('|'))))
2399 curchr = Magic('*'); 2591 curchr = Magic('*');
2400 break; 2592 break;
2401 case '^': 2593 case '^':
2402 /* '^' is only magic as the very first character and if it's after 2594 /* '^' is only magic as the very first character and if it's after
2403 * "\(", "\|", "\&' or "\n" */ 2595 * "\(", "\|", "\&' or "\n" */
2458 */ 2650 */
2459 curchr = -1; 2651 curchr = -1;
2460 prev_at_start = at_start; 2652 prev_at_start = at_start;
2461 at_start = FALSE; /* be able to say "/\*ptr" */ 2653 at_start = FALSE; /* be able to say "/\*ptr" */
2462 ++regparse; 2654 ++regparse;
2655 ++after_slash;
2463 peekchr(); 2656 peekchr();
2464 --regparse; 2657 --regparse;
2658 --after_slash;
2465 curchr = toggle_Magic(curchr); 2659 curchr = toggle_Magic(curchr);
2466 } 2660 }
2467 else if (vim_strchr(REGEXP_ABBR, c)) 2661 else if (vim_strchr(REGEXP_ABBR, c))
2468 { 2662 {
2469 /* 2663 /*
2721 *maxval = *minval; /* It was \{n} or \{-n} */ 2915 *maxval = *minval; /* It was \{n} or \{-n} */
2722 else 2916 else
2723 *maxval = MAX_LIMIT; /* It was \{} or \{-} */ 2917 *maxval = MAX_LIMIT; /* It was \{} or \{-} */
2724 if (*regparse == '\\') 2918 if (*regparse == '\\')
2725 regparse++; /* Allow either \{...} or \{...\} */ 2919 regparse++; /* Allow either \{...} or \{...\} */
2726 if (*regparse != '}' || (*maxval == 0 && *minval == 0)) 2920 if (*regparse != '}')
2727 { 2921 {
2728 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"), 2922 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
2729 reg_magic == MAGIC_ALL ? "" : "\\"); 2923 reg_magic == MAGIC_ALL ? "" : "\\");
2730 EMSG_RET_FAIL(IObuff); 2924 EMSG_RET_FAIL(IObuff);
2731 } 2925 }
2813 *(posp) = (savep)->se_u.pos; \ 3007 *(posp) = (savep)->se_u.pos; \
2814 else \ 3008 else \
2815 *(pp) = (savep)->se_u.ptr; } 3009 *(pp) = (savep)->se_u.ptr; }
2816 3010
2817 static int re_num_cmp __ARGS((long_u val, char_u *scan)); 3011 static int re_num_cmp __ARGS((long_u val, char_u *scan));
2818 static int regmatch __ARGS((char_u *prog)); 3012 static int regmatch __ARGS((char_u *prog, regsave_T *startp));
2819 static int regrepeat __ARGS((char_u *p, long maxcount)); 3013 static int regrepeat __ARGS((char_u *p, long maxcount));
2820 3014
2821 #ifdef DEBUG 3015 #ifdef DEBUG
2822 int regnarrate = 0; 3016 int regnarrate = 0;
2823 #endif 3017 #endif
3271 /* Clear the external match subpointers if necessary. */ 3465 /* Clear the external match subpointers if necessary. */
3272 if (prog->reghasz == REX_SET) 3466 if (prog->reghasz == REX_SET)
3273 need_clear_zsubexpr = TRUE; 3467 need_clear_zsubexpr = TRUE;
3274 #endif 3468 #endif
3275 3469
3276 if (regmatch(prog->program + 1)) 3470 if (regmatch(prog->program + 1, NULL))
3277 { 3471 {
3278 cleanup_subexpr(); 3472 cleanup_subexpr();
3279 if (REG_MULTI) 3473 if (REG_MULTI)
3280 { 3474 {
3281 if (reg_startpos[0].lnum < 0) 3475 if (reg_startpos[0].lnum < 0)
3377 * the last matched character. 3571 * the last matched character.
3378 * Returns FALSE when there is no match. Leaves reginput and reglnum in an 3572 * Returns FALSE when there is no match. Leaves reginput and reglnum in an
3379 * undefined state! 3573 * undefined state!
3380 */ 3574 */
3381 static int 3575 static int
3382 regmatch(scan) 3576 regmatch(scan, startp)
3383 char_u *scan; /* Current node. */ 3577 char_u *scan; /* Current node. */
3578 regsave_T *startp; /* start position for BACK */
3384 { 3579 {
3385 char_u *next; /* Next node. */ 3580 char_u *next; /* Next node. */
3386 int op; 3581 int op;
3387 int c; 3582 int c;
3388 3583
3801 3996
3802 case NOTHING: 3997 case NOTHING:
3803 break; 3998 break;
3804 3999
3805 case BACK: 4000 case BACK:
4001 /* When we run into BACK without matching something non-empty, we
4002 * fail. */
4003 if (startp != NULL && reg_save_equal(startp))
4004 return FALSE;
3806 break; 4005 break;
3807 4006
3808 case MOPEN + 0: /* Match start: \zs */ 4007 case MOPEN + 0: /* Match start: \zs */
3809 case MOPEN + 1: /* \( */ 4008 case MOPEN + 1: /* \( */
3810 case MOPEN + 2: 4009 case MOPEN + 2:
3821 4020
3822 no = op - MOPEN; 4021 no = op - MOPEN;
3823 cleanup_subexpr(); 4022 cleanup_subexpr();
3824 save_se(&save, &reg_startpos[no], &reg_startp[no]); 4023 save_se(&save, &reg_startpos[no], &reg_startp[no]);
3825 4024
3826 if (regmatch(next)) 4025 if (regmatch(next, startp))
3827 return TRUE; 4026 return TRUE;
3828 4027
3829 restore_se(&save, &reg_startpos[no], &reg_startp[no]); 4028 restore_se(&save, &reg_startpos[no], &reg_startp[no]);
3830 return FALSE; 4029 return FALSE;
3831 } 4030 }
3832 /* break; Not Reached */ 4031 /* break; Not Reached */
3833 4032
3834 case NOPEN: /* \%( */ 4033 case NOPEN: /* \%( */
3835 case NCLOSE: /* \) after \%( */ 4034 case NCLOSE: /* \) after \%( */
3836 if (regmatch(next)) 4035 if (regmatch(next, startp))
3837 return TRUE; 4036 return TRUE;
3838 return FALSE; 4037 return FALSE;
3839 /* break; Not Reached */ 4038 /* break; Not Reached */
3840 4039
3841 #ifdef FEAT_SYN_HL 4040 #ifdef FEAT_SYN_HL
3854 4053
3855 no = op - ZOPEN; 4054 no = op - ZOPEN;
3856 cleanup_zsubexpr(); 4055 cleanup_zsubexpr();
3857 save_se(&save, &reg_startzpos[no], &reg_startzp[no]); 4056 save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
3858 4057
3859 if (regmatch(next)) 4058 if (regmatch(next, startp))
3860 return TRUE; 4059 return TRUE;
3861 4060
3862 restore_se(&save, &reg_startzpos[no], &reg_startzp[no]); 4061 restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
3863 return FALSE; 4062 return FALSE;
3864 } 4063 }
3881 4080
3882 no = op - MCLOSE; 4081 no = op - MCLOSE;
3883 cleanup_subexpr(); 4082 cleanup_subexpr();
3884 save_se(&save, &reg_endpos[no], &reg_endp[no]); 4083 save_se(&save, &reg_endpos[no], &reg_endp[no]);
3885 4084
3886 if (regmatch(next)) 4085 if (regmatch(next, startp))
3887 return TRUE; 4086 return TRUE;
3888 4087
3889 restore_se(&save, &reg_endpos[no], &reg_endp[no]); 4088 restore_se(&save, &reg_endpos[no], &reg_endp[no]);
3890 return FALSE; 4089 return FALSE;
3891 } 4090 }
3907 4106
3908 no = op - ZCLOSE; 4107 no = op - ZCLOSE;
3909 cleanup_zsubexpr(); 4108 cleanup_zsubexpr();
3910 save_se(&save, &reg_endzpos[no], &reg_endzp[no]); 4109 save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
3911 4110
3912 if (regmatch(next)) 4111 if (regmatch(next, startp))
3913 return TRUE; 4112 return TRUE;
3914 4113
3915 restore_se(&save, &reg_endzpos[no], &reg_endzp[no]); 4114 restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
3916 return FALSE; 4115 return FALSE;
3917 } 4116 }
4074 regsave_T save; 4273 regsave_T save;
4075 4274
4076 do 4275 do
4077 { 4276 {
4078 reg_save(&save); 4277 reg_save(&save);
4079 if (regmatch(OPERAND(scan))) 4278 if (regmatch(OPERAND(scan), &save))
4080 return TRUE; 4279 return TRUE;
4081 reg_restore(&save); 4280 reg_restore(&save);
4082 scan = regnext(scan); 4281 scan = regnext(scan);
4083 } while (scan != NULL && OP(scan) == BRANCH); 4282 } while (scan != NULL && OP(scan) == BRANCH);
4084 return FALSE; 4283 return FALSE;
4132 /* If not matched enough times yet, try one more */ 4331 /* If not matched enough times yet, try one more */
4133 if (brace_count[no] <= (brace_min[no] <= brace_max[no] 4332 if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4134 ? brace_min[no] : brace_max[no])) 4333 ? brace_min[no] : brace_max[no]))
4135 { 4334 {
4136 reg_save(&save); 4335 reg_save(&save);
4137 if (regmatch(OPERAND(scan))) 4336 if (regmatch(OPERAND(scan), &save))
4138 return TRUE; 4337 return TRUE;
4139 reg_restore(&save); 4338 reg_restore(&save);
4140 --brace_count[no]; /* failed, decrement match count */ 4339 --brace_count[no]; /* failed, decrement match count */
4141 return FALSE; 4340 return FALSE;
4142 } 4341 }
4146 { 4345 {
4147 /* Range is the normal way around, use longest match */ 4346 /* Range is the normal way around, use longest match */
4148 if (brace_count[no] <= brace_max[no]) 4347 if (brace_count[no] <= brace_max[no])
4149 { 4348 {
4150 reg_save(&save); 4349 reg_save(&save);
4151 if (regmatch(OPERAND(scan))) 4350 if (regmatch(OPERAND(scan), &save))
4152 return TRUE; /* matched some more times */ 4351 return TRUE; /* matched some more times */
4153 reg_restore(&save); 4352 reg_restore(&save);
4154 --brace_count[no]; /* matched just enough times */ 4353 --brace_count[no]; /* matched just enough times */
4155 /* continue with the items after \{} */ 4354 /* { continue with the items after \{} */
4156 } 4355 }
4157 } 4356 }
4158 else 4357 else
4159 { 4358 {
4160 /* Range is backwards, use shortest match first */ 4359 /* Range is backwards, use shortest match first */
4161 if (brace_count[no] <= brace_min[no]) 4360 if (brace_count[no] <= brace_min[no])
4162 { 4361 {
4163 reg_save(&save); 4362 reg_save(&save);
4164 if (regmatch(next)) 4363 if (regmatch(next, &save))
4165 return TRUE; 4364 return TRUE;
4166 reg_restore(&save); 4365 reg_restore(&save);
4167 next = OPERAND(scan); 4366 next = OPERAND(scan);
4168 /* must try to match one more item */ 4367 /* must try to match one more item */
4169 } 4368 }
4232 /* If it could match, try it. */ 4431 /* If it could match, try it. */
4233 if (nextb == NUL || *reginput == nextb 4432 if (nextb == NUL || *reginput == nextb
4234 || *reginput == nextb_ic) 4433 || *reginput == nextb_ic)
4235 { 4434 {
4236 reg_save(&save); 4435 reg_save(&save);
4237 if (regmatch(next)) 4436 if (regmatch(next, startp))
4238 return TRUE; 4437 return TRUE;
4239 reg_restore(&save); 4438 reg_restore(&save);
4240 } 4439 }
4241 /* Couldn't or didn't match -- back up one char. */ 4440 /* Couldn't or didn't match -- back up one char. */
4242 if (--count < minval) 4441 if (--count < minval)
4269 /* If it could work, try it. */ 4468 /* If it could work, try it. */
4270 if (nextb == NUL || *reginput == nextb 4469 if (nextb == NUL || *reginput == nextb
4271 || *reginput == nextb_ic) 4470 || *reginput == nextb_ic)
4272 { 4471 {
4273 reg_save(&save); 4472 reg_save(&save);
4274 if (regmatch(next)) 4473 if (regmatch(next, &save))
4275 return TRUE; 4474 return TRUE;
4276 reg_restore(&save); 4475 reg_restore(&save);
4277 } 4476 }
4278 /* Couldn't or didn't match: try advancing one char. */ 4477 /* Couldn't or didn't match: try advancing one char. */
4279 if (count == minval 4478 if (count == minval
4293 regsave_T save; 4492 regsave_T save;
4294 4493
4295 /* If the operand matches, we fail. Otherwise backup and 4494 /* If the operand matches, we fail. Otherwise backup and
4296 * continue with the next item. */ 4495 * continue with the next item. */
4297 reg_save(&save); 4496 reg_save(&save);
4298 if (regmatch(OPERAND(scan))) 4497 if (regmatch(OPERAND(scan), startp))
4299 return FALSE; 4498 return FALSE;
4300 reg_restore(&save); 4499 reg_restore(&save);
4301 } 4500 }
4302 break; 4501 break;
4303 4502
4307 regsave_T save; 4506 regsave_T save;
4308 4507
4309 /* If the operand doesn't match, we fail. Otherwise backup 4508 /* If the operand doesn't match, we fail. Otherwise backup
4310 * and continue with the next item. */ 4509 * and continue with the next item. */
4311 reg_save(&save); 4510 reg_save(&save);
4312 if (!regmatch(OPERAND(scan))) 4511 if (!regmatch(OPERAND(scan), startp))
4313 return FALSE; 4512 return FALSE;
4314 if (op == MATCH) /* zero-width */ 4513 if (op == MATCH) /* zero-width */
4315 reg_restore(&save); 4514 reg_restore(&save);
4316 } 4515 }
4317 break; 4516 break;
4329 * the match ends at the current position. 4528 * the match ends at the current position.
4330 * First check if the next item matches, that's probably 4529 * First check if the next item matches, that's probably
4331 * faster. 4530 * faster.
4332 */ 4531 */
4333 reg_save(&save_start); 4532 reg_save(&save_start);
4334 if (regmatch(next)) 4533 if (regmatch(next, startp))
4335 { 4534 {
4336 /* save the position after the found match for next */ 4535 /* save the position after the found match for next */
4337 reg_save(&save_after); 4536 reg_save(&save_after);
4338 4537
4339 /* start looking for a match with operand at the current 4538 /* start looking for a match with operand at the current
4345 save_behind_pos = behind_pos; 4544 save_behind_pos = behind_pos;
4346 behind_pos = save_start; 4545 behind_pos = save_start;
4347 for (;;) 4546 for (;;)
4348 { 4547 {
4349 reg_restore(&save_start); 4548 reg_restore(&save_start);
4350 if (regmatch(OPERAND(scan)) 4549 if (regmatch(OPERAND(scan), startp)
4351 && reg_save_equal(&behind_pos)) 4550 && reg_save_equal(&behind_pos))
4352 { 4551 {
4353 behind_pos = save_behind_pos; 4552 behind_pos = save_behind_pos;
4354 /* found a match that ends where "next" started */ 4553 /* found a match that ends where "next" started */
4355 if (needmatch) 4554 if (needmatch)