Mercurial > vim
comparison src/regexp.c @ 167:c93c9cad9618
updated for version 7.0051
author | vimboss |
---|---|
date | Tue, 22 Feb 2005 08:39:57 +0000 |
parents | 19670b05ee32 |
children | 8c60f65311fa |
comparison
equal
deleted
inserted
replaced
166:3a28ed993bbe | 167:c93c9cad9618 |
---|---|
72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" | 72 * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next" |
73 * pointer with a BRANCH on both ends of it is connecting two alternatives. | 73 * pointer with a BRANCH on both ends of it is connecting two alternatives. |
74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH | 74 * (Here we have one of the subtle syntax dependencies: an individual BRANCH |
75 * (as opposed to a collection of them) is never concatenated with anything | 75 * (as opposed to a collection of them) is never concatenated with anything |
76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX | 76 * because of operator precedence). The "next" pointer of a BRACES_COMPLEX |
77 * node points to the node after the stuff to be repeated. The operand of some | 77 * node points to the node after the stuff to be repeated. |
78 * types of node is a literal string; for others, it is a node leading into a | 78 * The operand of some types of node is a literal string; for others, it is a |
79 * sub-FSM. In particular, the operand of a BRANCH node is the first node of | 79 * node leading into a sub-FSM. In particular, the operand of a BRANCH node |
80 * the branch. (NB this is *not* a tree structure: the tail of the branch | 80 * is the first node of the branch. |
81 * connects to the thing following the set of BRANCHes.) | 81 * (NB this is *not* a tree structure: the tail of the branch connects to the |
82 * thing following the set of BRANCHes.) | |
82 * | 83 * |
83 * pattern is coded like: | 84 * pattern is coded like: |
84 * | 85 * |
85 * +-----------------+ | 86 * +-----------------+ |
86 * | V | 87 * | V |
93 * V | | 94 * V | |
94 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END | 95 * <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END |
95 * | | ^ ^ | 96 * | | ^ ^ |
96 * | +---------------+ | | 97 * | +---------------+ | |
97 * +---------------------------------------------+ | 98 * +---------------------------------------------+ |
99 * | |
100 * | |
101 * +----------------------+ | |
102 * V | | |
103 * <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END | |
104 * | | ^ ^ | |
105 * | +----------+ | | |
106 * +-------------------------------------------------+ | |
98 * | 107 * |
99 * | 108 * |
100 * +-------------------------+ | 109 * +-------------------------+ |
101 * V | | 110 * V | |
102 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END | 111 * <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END |
384 */ | 393 */ |
385 static char_u REGEXP_INRANGE[] = "]^-n\\"; | 394 static char_u REGEXP_INRANGE[] = "]^-n\\"; |
386 static char_u REGEXP_ABBR[] = "nrtebdoxuU"; | 395 static char_u REGEXP_ABBR[] = "nrtebdoxuU"; |
387 | 396 |
388 static int backslash_trans __ARGS((int c)); | 397 static int backslash_trans __ARGS((int c)); |
389 static int skip_class_name __ARGS((char_u **pp)); | 398 static int get_char_class __ARGS((char_u **pp)); |
399 static int get_equi_class __ARGS((char_u **pp)); | |
400 static void reg_equi_class __ARGS((int c)); | |
401 static int get_coll_element __ARGS((char_u **pp)); | |
390 static char_u *skip_anyof __ARGS((char_u *p)); | 402 static char_u *skip_anyof __ARGS((char_u *p)); |
391 static void init_class_tab __ARGS((void)); | 403 static void init_class_tab __ARGS((void)); |
392 | 404 |
393 /* | 405 /* |
394 * Translate '\x' to its control character, except "\n", which is Magic. | 406 * Translate '\x' to its control character, except "\n", which is Magic. |
406 } | 418 } |
407 return c; | 419 return c; |
408 } | 420 } |
409 | 421 |
410 /* | 422 /* |
411 * Check for a character class name. "pp" points to the '['. | 423 * Check for a character class name "[:name:]". "pp" points to the '['. |
412 * Returns one of the CLASS_ items. CLASS_NONE means that no item was | 424 * Returns one of the CLASS_ items. CLASS_NONE means that no item was |
413 * recognized. Otherwise "pp" is advanced to after the item. | 425 * recognized. Otherwise "pp" is advanced to after the item. |
414 */ | 426 */ |
415 static int | 427 static int |
416 skip_class_name(pp) | 428 get_char_class(pp) |
417 char_u **pp; | 429 char_u **pp; |
418 { | 430 { |
419 static const char *(class_names[]) = | 431 static const char *(class_names[]) = |
420 { | 432 { |
421 "alnum:]", | 433 "alnum:]", |
465 } | 477 } |
466 return CLASS_NONE; | 478 return CLASS_NONE; |
467 } | 479 } |
468 | 480 |
469 /* | 481 /* |
470 * Skip over a "[]" range. | |
471 * "p" must point to the character after the '['. | |
472 * The returned pointer is on the matching ']', or the terminating NUL. | |
473 */ | |
474 static char_u * | |
475 skip_anyof(p) | |
476 char_u *p; | |
477 { | |
478 int cpo_lit; /* 'cpoptions' contains 'l' flag */ | |
479 #ifdef FEAT_MBYTE | |
480 int l; | |
481 #endif | |
482 | |
483 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL); | |
484 | |
485 if (*p == '^') /* Complement of range. */ | |
486 ++p; | |
487 if (*p == ']' || *p == '-') | |
488 ++p; | |
489 while (*p != NUL && *p != ']') | |
490 { | |
491 #ifdef FEAT_MBYTE | |
492 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1) | |
493 p += l; | |
494 else | |
495 #endif | |
496 if (*p == '-') | |
497 { | |
498 ++p; | |
499 if (*p != ']' && *p != NUL) | |
500 mb_ptr_adv(p); | |
501 } | |
502 else if (*p == '\\' | |
503 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL | |
504 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) | |
505 p += 2; | |
506 else if (*p == '[') | |
507 { | |
508 if (skip_class_name(&p) == CLASS_NONE) | |
509 ++p; /* It was not a class name */ | |
510 } | |
511 else | |
512 ++p; | |
513 } | |
514 | |
515 return p; | |
516 } | |
517 | |
518 /* | |
519 * Specific version of character class functions. | 482 * Specific version of character class functions. |
520 * Using a table to keep this fast. | 483 * Using a table to keep this fast. |
521 */ | 484 */ |
522 static short class_tab[256]; | 485 static short class_tab[256]; |
523 | 486 |
693 static int prog_magic_wrong __ARGS((void)); | 656 static int prog_magic_wrong __ARGS((void)); |
694 static char_u *regnext __ARGS((char_u *)); | 657 static char_u *regnext __ARGS((char_u *)); |
695 static void regc __ARGS((int b)); | 658 static void regc __ARGS((int b)); |
696 #ifdef FEAT_MBYTE | 659 #ifdef FEAT_MBYTE |
697 static void regmbc __ARGS((int c)); | 660 static void regmbc __ARGS((int c)); |
661 #else | |
662 # define regmbc(c) regc(c) | |
698 #endif | 663 #endif |
699 static void reginsert __ARGS((int, char_u *)); | 664 static void reginsert __ARGS((int, char_u *)); |
700 static void reginsert_limits __ARGS((int, long, long, char_u *)); | 665 static void reginsert_limits __ARGS((int, long, long, char_u *)); |
701 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); | 666 static char_u *re_put_long __ARGS((char_u *pr, long_u val)); |
702 static int read_limits __ARGS((long *, long *)); | 667 static int read_limits __ARGS((long *, long *)); |
720 int | 685 int |
721 re_lookbehind(prog) | 686 re_lookbehind(prog) |
722 regprog_T *prog; | 687 regprog_T *prog; |
723 { | 688 { |
724 return (prog->regflags & RF_LOOKBH); | 689 return (prog->regflags & RF_LOOKBH); |
690 } | |
691 | |
692 /* | |
693 * Check for an equivalence class name "[=a=]". "pp" points to the '['. | |
694 * Returns a character representing the class. Zero means that no item was | |
695 * recognized. Otherwise "pp" is advanced to after the item. | |
696 */ | |
697 static int | |
698 get_equi_class(pp) | |
699 char_u **pp; | |
700 { | |
701 int c; | |
702 int l = 1; | |
703 char_u *p = *pp; | |
704 | |
705 if (p[1] == '=') | |
706 { | |
707 #ifdef FEAT_MBYTE | |
708 if (has_mbyte) | |
709 l = mb_ptr2len_check(p + 2); | |
710 #endif | |
711 if (p[l + 2] == '=' && p[l + 3] == ']') | |
712 { | |
713 #ifdef FEAT_MBYTE | |
714 if (has_mbyte) | |
715 c = mb_ptr2char(p + 2); | |
716 else | |
717 #endif | |
718 c = p[2]; | |
719 *pp += l + 4; | |
720 return c; | |
721 } | |
722 } | |
723 return 0; | |
724 } | |
725 | |
726 /* | |
727 * Produce the bytes for equivalence class "c". | |
728 * Currently only handles latin1, latin9 and utf-8. | |
729 */ | |
730 static void | |
731 reg_equi_class(c) | |
732 int c; | |
733 { | |
734 #ifdef FEAT_MBYTE | |
735 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 | |
736 || STRCMP(p_enc, "latin9") == 0) | |
737 #endif | |
738 { | |
739 switch (c) | |
740 { | |
741 case 'A': case 'À': case 'Á': case 'Â': | |
742 case 'Ã': case 'Ä': case 'Å': | |
743 regmbc('A'); regmbc('À'); regmbc('Á'); regmbc('Â'); | |
744 regmbc('Ã'); regmbc('Ä'); regmbc('Å'); | |
745 return; | |
746 case 'C': case 'Ç': | |
747 regmbc('C'); regmbc('Ç'); | |
748 return; | |
749 case 'E': case 'È': case 'É': case 'Ê': case 'Ë': | |
750 regmbc('E'); regmbc('È'); regmbc('É'); regmbc('Ê'); | |
751 regmbc('Ë'); | |
752 return; | |
753 case 'I': case 'Ì': case 'Í': case 'Î': case 'Ï': | |
754 regmbc('I'); regmbc('Ì'); regmbc('Í'); regmbc('Î'); | |
755 regmbc('Ï'); | |
756 return; | |
757 case 'N': case 'Ñ': | |
758 regmbc('N'); regmbc('Ñ'); | |
759 return; | |
760 case 'O': case 'Ò': case 'Ó': case 'Ô': case 'Õ': case 'Ö': | |
761 regmbc('O'); regmbc('Ò'); regmbc('Ó'); regmbc('Ô'); | |
762 regmbc('Õ'); regmbc('Ö'); | |
763 return; | |
764 case 'U': case 'Ù': case 'Ú': case 'Û': case 'Ü': | |
765 regmbc('U'); regmbc('Ù'); regmbc('Ú'); regmbc('Û'); | |
766 regmbc('Ü'); | |
767 return; | |
768 case 'Y': case 'Ý': | |
769 regmbc('Y'); regmbc('Ý'); | |
770 return; | |
771 case 'a': case 'à': case 'á': case 'â': | |
772 case 'ã': case 'ä': case 'å': | |
773 regmbc('a'); regmbc('à'); regmbc('á'); regmbc('â'); | |
774 regmbc('ã'); regmbc('ä'); regmbc('å'); | |
775 return; | |
776 case 'c': case 'ç': | |
777 regmbc('c'); regmbc('ç'); | |
778 return; | |
779 case 'e': case 'è': case 'é': case 'ê': case 'ë': | |
780 regmbc('e'); regmbc('è'); regmbc('é'); regmbc('ê'); | |
781 regmbc('ë'); | |
782 return; | |
783 case 'i': case 'ì': case 'í': case 'î': case 'ï': | |
784 regmbc('i'); regmbc('ì'); regmbc('í'); regmbc('î'); | |
785 regmbc('ï'); | |
786 return; | |
787 case 'n': case 'ñ': | |
788 regmbc('n'); regmbc('ñ'); | |
789 return; | |
790 case 'o': case 'ò': case 'ó': case 'ô': case 'õ': case 'ö': | |
791 regmbc('o'); regmbc('ò'); regmbc('ó'); regmbc('ô'); | |
792 regmbc('õ'); regmbc('ö'); | |
793 return; | |
794 case 'u': case 'ù': case 'ú': case 'û': case 'ü': | |
795 regmbc('u'); regmbc('ù'); regmbc('ú'); regmbc('û'); | |
796 regmbc('ü'); | |
797 return; | |
798 case 'y': case 'ý': case 'ÿ': | |
799 regmbc('y'); regmbc('ý'); regmbc('ÿ'); | |
800 return; | |
801 } | |
802 } | |
803 regmbc(c); | |
804 } | |
805 | |
806 /* | |
807 * Check for a collating element "[.a.]". "pp" points to the '['. | |
808 * Returns a character. Zero means that no item was recognized. Otherwise | |
809 * "pp" is advanced to after the item. | |
810 * Currently only single characters are recognized! | |
811 */ | |
812 static int | |
813 get_coll_element(pp) | |
814 char_u **pp; | |
815 { | |
816 int c; | |
817 int l = 1; | |
818 char_u *p = *pp; | |
819 | |
820 if (p[1] == '.') | |
821 { | |
822 #ifdef FEAT_MBYTE | |
823 if (has_mbyte) | |
824 l = mb_ptr2len_check(p + 2); | |
825 #endif | |
826 if (p[l + 2] == '.' && p[l + 3] == ']') | |
827 { | |
828 #ifdef FEAT_MBYTE | |
829 if (has_mbyte) | |
830 c = mb_ptr2char(p + 2); | |
831 else | |
832 #endif | |
833 c = p[2]; | |
834 *pp += l + 4; | |
835 return c; | |
836 } | |
837 } | |
838 return 0; | |
839 } | |
840 | |
841 | |
842 /* | |
843 * Skip over a "[]" range. | |
844 * "p" must point to the character after the '['. | |
845 * The returned pointer is on the matching ']', or the terminating NUL. | |
846 */ | |
847 static char_u * | |
848 skip_anyof(p) | |
849 char_u *p; | |
850 { | |
851 int cpo_lit; /* 'cpoptions' contains 'l' flag */ | |
852 int cpo_bsl; /* 'cpoptions' contains '\' flag */ | |
853 #ifdef FEAT_MBYTE | |
854 int l; | |
855 #endif | |
856 | |
857 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL); | |
858 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL); | |
859 | |
860 if (*p == '^') /* Complement of range. */ | |
861 ++p; | |
862 if (*p == ']' || *p == '-') | |
863 ++p; | |
864 while (*p != NUL && *p != ']') | |
865 { | |
866 #ifdef FEAT_MBYTE | |
867 if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1) | |
868 p += l; | |
869 else | |
870 #endif | |
871 if (*p == '-') | |
872 { | |
873 ++p; | |
874 if (*p != ']' && *p != NUL) | |
875 mb_ptr_adv(p); | |
876 } | |
877 else if (*p == '\\' | |
878 && !cpo_bsl | |
879 && (vim_strchr(REGEXP_INRANGE, p[1]) != NULL | |
880 || (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL))) | |
881 p += 2; | |
882 else if (*p == '[') | |
883 { | |
884 if (get_char_class(&p) == CLASS_NONE | |
885 && get_equi_class(&p) == 0 | |
886 && get_coll_element(&p) == 0) | |
887 ++p; /* It was not a class name */ | |
888 } | |
889 else | |
890 ++p; | |
891 } | |
892 | |
893 return p; | |
725 } | 894 } |
726 | 895 |
727 /* | 896 /* |
728 * Skip past regular expression. | 897 * Skip past regular expression. |
729 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc). | 898 * Stop at end of "startp" or where "dirc" is found ('/', '?', etc). |
1249 if (re_multi_type(op) == NOT_MULTI) | 1418 if (re_multi_type(op) == NOT_MULTI) |
1250 { | 1419 { |
1251 *flagp = flags; | 1420 *flagp = flags; |
1252 return ret; | 1421 return ret; |
1253 } | 1422 } |
1254 if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT) | |
1255 { | |
1256 if (op == Magic('*')) | |
1257 EMSG_M_RET_NULL(_("E56: %s* operand could be empty"), | |
1258 reg_magic >= MAGIC_ON); | |
1259 if (op == Magic('+')) | |
1260 EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"), | |
1261 reg_magic == MAGIC_ALL); | |
1262 /* "\{}" is checked below, it's allowed when there is an upper limit */ | |
1263 } | |
1264 /* default flags */ | 1423 /* default flags */ |
1265 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); | 1424 *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH))); |
1266 | 1425 |
1267 skipchr(); | 1426 skipchr(); |
1268 switch (op) | 1427 switch (op) |
1336 break; | 1495 break; |
1337 | 1496 |
1338 case Magic('{'): | 1497 case Magic('{'): |
1339 if (!read_limits(&minval, &maxval)) | 1498 if (!read_limits(&minval, &maxval)) |
1340 return NULL; | 1499 return NULL; |
1341 if (!(flags & HASWIDTH) && (maxval > minval | |
1342 ? maxval >= MAX_LIMIT : minval >= MAX_LIMIT)) | |
1343 EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"), | |
1344 reg_magic == MAGIC_ALL); | |
1345 if (flags & SIMPLE) | 1500 if (flags & SIMPLE) |
1346 { | 1501 { |
1347 reginsert(BRACE_SIMPLE, ret); | 1502 reginsert(BRACE_SIMPLE, ret); |
1348 reginsert_limits(BRACE_LIMITS, minval, maxval, ret); | 1503 reginsert_limits(BRACE_LIMITS, minval, maxval, ret); |
1349 } | 1504 } |
1389 int *flagp; | 1544 int *flagp; |
1390 { | 1545 { |
1391 char_u *ret; | 1546 char_u *ret; |
1392 int flags; | 1547 int flags; |
1393 int cpo_lit; /* 'cpoptions' contains 'l' flag */ | 1548 int cpo_lit; /* 'cpoptions' contains 'l' flag */ |
1549 int cpo_bsl; /* 'cpoptions' contains '\' flag */ | |
1394 int c; | 1550 int c; |
1395 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; | 1551 static char_u *classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU"; |
1396 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, | 1552 static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD, |
1397 FNAME, SFNAME, PRINT, SPRINT, | 1553 FNAME, SFNAME, PRINT, SPRINT, |
1398 WHITE, NWHITE, DIGIT, NDIGIT, | 1554 WHITE, NWHITE, DIGIT, NDIGIT, |
1404 char_u *p; | 1560 char_u *p; |
1405 int extra = 0; | 1561 int extra = 0; |
1406 | 1562 |
1407 *flagp = WORST; /* Tentatively. */ | 1563 *flagp = WORST; /* Tentatively. */ |
1408 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL); | 1564 cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL); |
1565 cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL); | |
1409 | 1566 |
1410 c = getchr(); | 1567 c = getchr(); |
1411 switch (c) | 1568 switch (c) |
1412 { | 1569 { |
1413 case Magic('^'): | 1570 case Magic('^'): |
1825 else | 1982 else |
1826 ret = regnode(ANYOF + extra); | 1983 ret = regnode(ANYOF + extra); |
1827 | 1984 |
1828 /* At the start ']' and '-' mean the literal character. */ | 1985 /* At the start ']' and '-' mean the literal character. */ |
1829 if (*regparse == ']' || *regparse == '-') | 1986 if (*regparse == ']' || *regparse == '-') |
1987 { | |
1988 startc = *regparse; | |
1830 regc(*regparse++); | 1989 regc(*regparse++); |
1990 } | |
1831 | 1991 |
1832 while (*regparse != NUL && *regparse != ']') | 1992 while (*regparse != NUL && *regparse != ']') |
1833 { | 1993 { |
1834 if (*regparse == '-') | 1994 if (*regparse == '-') |
1835 { | 1995 { |
1843 regc('-'); | 2003 regc('-'); |
1844 startc = '-'; /* [--x] is a range */ | 2004 startc = '-'; /* [--x] is a range */ |
1845 } | 2005 } |
1846 else | 2006 else |
1847 { | 2007 { |
2008 /* Also accept "a-[.z.]" */ | |
2009 endc = 0; | |
2010 if (*regparse == '[') | |
2011 endc = get_coll_element(®parse); | |
2012 if (endc == 0) | |
2013 { | |
1848 #ifdef FEAT_MBYTE | 2014 #ifdef FEAT_MBYTE |
1849 if (has_mbyte) | 2015 if (has_mbyte) |
1850 endc = mb_ptr2char_adv(®parse); | 2016 endc = mb_ptr2char_adv(®parse); |
1851 else | 2017 else |
1852 #endif | 2018 #endif |
1853 endc = *regparse++; | 2019 endc = *regparse++; |
2020 } | |
1854 | 2021 |
1855 /* Handle \o40, \x20 and \u20AC style sequences */ | 2022 /* Handle \o40, \x20 and \u20AC style sequences */ |
1856 if (endc == '\\' && !cpo_lit) | 2023 if (endc == '\\' && !cpo_lit && !cpo_bsl) |
1857 endc = coll_get_char(); | 2024 endc = coll_get_char(); |
1858 | 2025 |
1859 if (startc > endc) | 2026 if (startc > endc) |
1860 EMSG_RET_NULL(_(e_invrange)); | 2027 EMSG_RET_NULL(_(e_invrange)); |
1861 #ifdef FEAT_MBYTE | 2028 #ifdef FEAT_MBYTE |
1890 } | 2057 } |
1891 /* | 2058 /* |
1892 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim | 2059 * Only "\]", "\^", "\]" and "\\" are special in Vi. Vim |
1893 * accepts "\t", "\e", etc., but only when the 'l' flag in | 2060 * accepts "\t", "\e", etc., but only when the 'l' flag in |
1894 * 'cpoptions' is not included. | 2061 * 'cpoptions' is not included. |
2062 * Posix doesn't recognize backslash at all. | |
1895 */ | 2063 */ |
1896 else if (*regparse == '\\' | 2064 else if (*regparse == '\\' |
2065 && !cpo_bsl | |
1897 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL | 2066 && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL |
1898 || (!cpo_lit | 2067 || (!cpo_lit |
1899 && vim_strchr(REGEXP_ABBR, | 2068 && vim_strchr(REGEXP_ABBR, |
1900 regparse[1]) != NULL))) | 2069 regparse[1]) != NULL))) |
1901 { | 2070 { |
1940 else if (*regparse == '[') | 2109 else if (*regparse == '[') |
1941 { | 2110 { |
1942 int c_class; | 2111 int c_class; |
1943 int cu; | 2112 int cu; |
1944 | 2113 |
1945 c_class = skip_class_name(®parse); | 2114 c_class = get_char_class(®parse); |
1946 startc = -1; | 2115 startc = -1; |
1947 /* Characters assumed to be 8 bits! */ | 2116 /* Characters assumed to be 8 bits! */ |
1948 switch (c_class) | 2117 switch (c_class) |
1949 { | 2118 { |
1950 case CLASS_NONE: | 2119 case CLASS_NONE: |
1951 /* literal '[', allow [[-x] as a range */ | 2120 c_class = get_equi_class(®parse); |
1952 startc = *regparse++; | 2121 if (c_class != 0) |
1953 regc(startc); | 2122 { |
2123 /* produce equivalence class */ | |
2124 reg_equi_class(c_class); | |
2125 } | |
2126 else if ((c_class = | |
2127 get_coll_element(®parse)) != 0) | |
2128 { | |
2129 /* produce a collating element */ | |
2130 regmbc(c_class); | |
2131 } | |
2132 else | |
2133 { | |
2134 /* literal '[', allow [[-x] as a range */ | |
2135 startc = *regparse++; | |
2136 regc(startc); | |
2137 } | |
1954 break; | 2138 break; |
1955 case CLASS_ALNUM: | 2139 case CLASS_ALNUM: |
1956 for (cu = 1; cu <= 255; cu++) | 2140 for (cu = 1; cu <= 255; cu++) |
1957 if (isalnum(cu)) | 2141 if (isalnum(cu)) |
1958 regc(cu); | 2142 regc(cu); |
2352 } | 2536 } |
2353 | 2537 |
2354 static int | 2538 static int |
2355 peekchr() | 2539 peekchr() |
2356 { | 2540 { |
2541 static int after_slash = FALSE; | |
2542 | |
2357 if (curchr == -1) | 2543 if (curchr == -1) |
2358 { | 2544 { |
2359 switch (curchr = regparse[0]) | 2545 switch (curchr = regparse[0]) |
2360 { | 2546 { |
2361 case '.': | 2547 case '.': |
2390 /* magic only after "\v" */ | 2576 /* magic only after "\v" */ |
2391 if (reg_magic == MAGIC_ALL) | 2577 if (reg_magic == MAGIC_ALL) |
2392 curchr = Magic(curchr); | 2578 curchr = Magic(curchr); |
2393 break; | 2579 break; |
2394 case '*': | 2580 case '*': |
2395 /* * is not magic as the very first character, eg "?*ptr" and when | 2581 /* * is not magic as the very first character, eg "?*ptr", when |
2396 * after '^', eg "/^*ptr" */ | 2582 * after '^', eg "/^*ptr" and when after "\(", "\|", "\&". But |
2397 if (reg_magic >= MAGIC_ON && !at_start | 2583 * "\(\*" is not magic, thus must be magic if "after_slash" */ |
2398 && !(prev_at_start && prevchr == Magic('^'))) | 2584 if (reg_magic >= MAGIC_ON |
2585 && !at_start | |
2586 && !(prev_at_start && prevchr == Magic('^')) | |
2587 && (after_slash | |
2588 || (prevchr != Magic('(') | |
2589 && prevchr != Magic('&') | |
2590 && prevchr != Magic('|')))) | |
2399 curchr = Magic('*'); | 2591 curchr = Magic('*'); |
2400 break; | 2592 break; |
2401 case '^': | 2593 case '^': |
2402 /* '^' is only magic as the very first character and if it's after | 2594 /* '^' is only magic as the very first character and if it's after |
2403 * "\(", "\|", "\&' or "\n" */ | 2595 * "\(", "\|", "\&' or "\n" */ |
2458 */ | 2650 */ |
2459 curchr = -1; | 2651 curchr = -1; |
2460 prev_at_start = at_start; | 2652 prev_at_start = at_start; |
2461 at_start = FALSE; /* be able to say "/\*ptr" */ | 2653 at_start = FALSE; /* be able to say "/\*ptr" */ |
2462 ++regparse; | 2654 ++regparse; |
2655 ++after_slash; | |
2463 peekchr(); | 2656 peekchr(); |
2464 --regparse; | 2657 --regparse; |
2658 --after_slash; | |
2465 curchr = toggle_Magic(curchr); | 2659 curchr = toggle_Magic(curchr); |
2466 } | 2660 } |
2467 else if (vim_strchr(REGEXP_ABBR, c)) | 2661 else if (vim_strchr(REGEXP_ABBR, c)) |
2468 { | 2662 { |
2469 /* | 2663 /* |
2721 *maxval = *minval; /* It was \{n} or \{-n} */ | 2915 *maxval = *minval; /* It was \{n} or \{-n} */ |
2722 else | 2916 else |
2723 *maxval = MAX_LIMIT; /* It was \{} or \{-} */ | 2917 *maxval = MAX_LIMIT; /* It was \{} or \{-} */ |
2724 if (*regparse == '\\') | 2918 if (*regparse == '\\') |
2725 regparse++; /* Allow either \{...} or \{...\} */ | 2919 regparse++; /* Allow either \{...} or \{...\} */ |
2726 if (*regparse != '}' || (*maxval == 0 && *minval == 0)) | 2920 if (*regparse != '}') |
2727 { | 2921 { |
2728 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"), | 2922 sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"), |
2729 reg_magic == MAGIC_ALL ? "" : "\\"); | 2923 reg_magic == MAGIC_ALL ? "" : "\\"); |
2730 EMSG_RET_FAIL(IObuff); | 2924 EMSG_RET_FAIL(IObuff); |
2731 } | 2925 } |
2813 *(posp) = (savep)->se_u.pos; \ | 3007 *(posp) = (savep)->se_u.pos; \ |
2814 else \ | 3008 else \ |
2815 *(pp) = (savep)->se_u.ptr; } | 3009 *(pp) = (savep)->se_u.ptr; } |
2816 | 3010 |
2817 static int re_num_cmp __ARGS((long_u val, char_u *scan)); | 3011 static int re_num_cmp __ARGS((long_u val, char_u *scan)); |
2818 static int regmatch __ARGS((char_u *prog)); | 3012 static int regmatch __ARGS((char_u *prog, regsave_T *startp)); |
2819 static int regrepeat __ARGS((char_u *p, long maxcount)); | 3013 static int regrepeat __ARGS((char_u *p, long maxcount)); |
2820 | 3014 |
2821 #ifdef DEBUG | 3015 #ifdef DEBUG |
2822 int regnarrate = 0; | 3016 int regnarrate = 0; |
2823 #endif | 3017 #endif |
3271 /* Clear the external match subpointers if necessary. */ | 3465 /* Clear the external match subpointers if necessary. */ |
3272 if (prog->reghasz == REX_SET) | 3466 if (prog->reghasz == REX_SET) |
3273 need_clear_zsubexpr = TRUE; | 3467 need_clear_zsubexpr = TRUE; |
3274 #endif | 3468 #endif |
3275 | 3469 |
3276 if (regmatch(prog->program + 1)) | 3470 if (regmatch(prog->program + 1, NULL)) |
3277 { | 3471 { |
3278 cleanup_subexpr(); | 3472 cleanup_subexpr(); |
3279 if (REG_MULTI) | 3473 if (REG_MULTI) |
3280 { | 3474 { |
3281 if (reg_startpos[0].lnum < 0) | 3475 if (reg_startpos[0].lnum < 0) |
3377 * the last matched character. | 3571 * the last matched character. |
3378 * Returns FALSE when there is no match. Leaves reginput and reglnum in an | 3572 * Returns FALSE when there is no match. Leaves reginput and reglnum in an |
3379 * undefined state! | 3573 * undefined state! |
3380 */ | 3574 */ |
3381 static int | 3575 static int |
3382 regmatch(scan) | 3576 regmatch(scan, startp) |
3383 char_u *scan; /* Current node. */ | 3577 char_u *scan; /* Current node. */ |
3578 regsave_T *startp; /* start position for BACK */ | |
3384 { | 3579 { |
3385 char_u *next; /* Next node. */ | 3580 char_u *next; /* Next node. */ |
3386 int op; | 3581 int op; |
3387 int c; | 3582 int c; |
3388 | 3583 |
3801 | 3996 |
3802 case NOTHING: | 3997 case NOTHING: |
3803 break; | 3998 break; |
3804 | 3999 |
3805 case BACK: | 4000 case BACK: |
4001 /* When we run into BACK without matching something non-empty, we | |
4002 * fail. */ | |
4003 if (startp != NULL && reg_save_equal(startp)) | |
4004 return FALSE; | |
3806 break; | 4005 break; |
3807 | 4006 |
3808 case MOPEN + 0: /* Match start: \zs */ | 4007 case MOPEN + 0: /* Match start: \zs */ |
3809 case MOPEN + 1: /* \( */ | 4008 case MOPEN + 1: /* \( */ |
3810 case MOPEN + 2: | 4009 case MOPEN + 2: |
3821 | 4020 |
3822 no = op - MOPEN; | 4021 no = op - MOPEN; |
3823 cleanup_subexpr(); | 4022 cleanup_subexpr(); |
3824 save_se(&save, ®_startpos[no], ®_startp[no]); | 4023 save_se(&save, ®_startpos[no], ®_startp[no]); |
3825 | 4024 |
3826 if (regmatch(next)) | 4025 if (regmatch(next, startp)) |
3827 return TRUE; | 4026 return TRUE; |
3828 | 4027 |
3829 restore_se(&save, ®_startpos[no], ®_startp[no]); | 4028 restore_se(&save, ®_startpos[no], ®_startp[no]); |
3830 return FALSE; | 4029 return FALSE; |
3831 } | 4030 } |
3832 /* break; Not Reached */ | 4031 /* break; Not Reached */ |
3833 | 4032 |
3834 case NOPEN: /* \%( */ | 4033 case NOPEN: /* \%( */ |
3835 case NCLOSE: /* \) after \%( */ | 4034 case NCLOSE: /* \) after \%( */ |
3836 if (regmatch(next)) | 4035 if (regmatch(next, startp)) |
3837 return TRUE; | 4036 return TRUE; |
3838 return FALSE; | 4037 return FALSE; |
3839 /* break; Not Reached */ | 4038 /* break; Not Reached */ |
3840 | 4039 |
3841 #ifdef FEAT_SYN_HL | 4040 #ifdef FEAT_SYN_HL |
3854 | 4053 |
3855 no = op - ZOPEN; | 4054 no = op - ZOPEN; |
3856 cleanup_zsubexpr(); | 4055 cleanup_zsubexpr(); |
3857 save_se(&save, ®_startzpos[no], ®_startzp[no]); | 4056 save_se(&save, ®_startzpos[no], ®_startzp[no]); |
3858 | 4057 |
3859 if (regmatch(next)) | 4058 if (regmatch(next, startp)) |
3860 return TRUE; | 4059 return TRUE; |
3861 | 4060 |
3862 restore_se(&save, ®_startzpos[no], ®_startzp[no]); | 4061 restore_se(&save, ®_startzpos[no], ®_startzp[no]); |
3863 return FALSE; | 4062 return FALSE; |
3864 } | 4063 } |
3881 | 4080 |
3882 no = op - MCLOSE; | 4081 no = op - MCLOSE; |
3883 cleanup_subexpr(); | 4082 cleanup_subexpr(); |
3884 save_se(&save, ®_endpos[no], ®_endp[no]); | 4083 save_se(&save, ®_endpos[no], ®_endp[no]); |
3885 | 4084 |
3886 if (regmatch(next)) | 4085 if (regmatch(next, startp)) |
3887 return TRUE; | 4086 return TRUE; |
3888 | 4087 |
3889 restore_se(&save, ®_endpos[no], ®_endp[no]); | 4088 restore_se(&save, ®_endpos[no], ®_endp[no]); |
3890 return FALSE; | 4089 return FALSE; |
3891 } | 4090 } |
3907 | 4106 |
3908 no = op - ZCLOSE; | 4107 no = op - ZCLOSE; |
3909 cleanup_zsubexpr(); | 4108 cleanup_zsubexpr(); |
3910 save_se(&save, ®_endzpos[no], ®_endzp[no]); | 4109 save_se(&save, ®_endzpos[no], ®_endzp[no]); |
3911 | 4110 |
3912 if (regmatch(next)) | 4111 if (regmatch(next, startp)) |
3913 return TRUE; | 4112 return TRUE; |
3914 | 4113 |
3915 restore_se(&save, ®_endzpos[no], ®_endzp[no]); | 4114 restore_se(&save, ®_endzpos[no], ®_endzp[no]); |
3916 return FALSE; | 4115 return FALSE; |
3917 } | 4116 } |
4074 regsave_T save; | 4273 regsave_T save; |
4075 | 4274 |
4076 do | 4275 do |
4077 { | 4276 { |
4078 reg_save(&save); | 4277 reg_save(&save); |
4079 if (regmatch(OPERAND(scan))) | 4278 if (regmatch(OPERAND(scan), &save)) |
4080 return TRUE; | 4279 return TRUE; |
4081 reg_restore(&save); | 4280 reg_restore(&save); |
4082 scan = regnext(scan); | 4281 scan = regnext(scan); |
4083 } while (scan != NULL && OP(scan) == BRANCH); | 4282 } while (scan != NULL && OP(scan) == BRANCH); |
4084 return FALSE; | 4283 return FALSE; |
4132 /* If not matched enough times yet, try one more */ | 4331 /* If not matched enough times yet, try one more */ |
4133 if (brace_count[no] <= (brace_min[no] <= brace_max[no] | 4332 if (brace_count[no] <= (brace_min[no] <= brace_max[no] |
4134 ? brace_min[no] : brace_max[no])) | 4333 ? brace_min[no] : brace_max[no])) |
4135 { | 4334 { |
4136 reg_save(&save); | 4335 reg_save(&save); |
4137 if (regmatch(OPERAND(scan))) | 4336 if (regmatch(OPERAND(scan), &save)) |
4138 return TRUE; | 4337 return TRUE; |
4139 reg_restore(&save); | 4338 reg_restore(&save); |
4140 --brace_count[no]; /* failed, decrement match count */ | 4339 --brace_count[no]; /* failed, decrement match count */ |
4141 return FALSE; | 4340 return FALSE; |
4142 } | 4341 } |
4146 { | 4345 { |
4147 /* Range is the normal way around, use longest match */ | 4346 /* Range is the normal way around, use longest match */ |
4148 if (brace_count[no] <= brace_max[no]) | 4347 if (brace_count[no] <= brace_max[no]) |
4149 { | 4348 { |
4150 reg_save(&save); | 4349 reg_save(&save); |
4151 if (regmatch(OPERAND(scan))) | 4350 if (regmatch(OPERAND(scan), &save)) |
4152 return TRUE; /* matched some more times */ | 4351 return TRUE; /* matched some more times */ |
4153 reg_restore(&save); | 4352 reg_restore(&save); |
4154 --brace_count[no]; /* matched just enough times */ | 4353 --brace_count[no]; /* matched just enough times */ |
4155 /* continue with the items after \{} */ | 4354 /* { continue with the items after \{} */ |
4156 } | 4355 } |
4157 } | 4356 } |
4158 else | 4357 else |
4159 { | 4358 { |
4160 /* Range is backwards, use shortest match first */ | 4359 /* Range is backwards, use shortest match first */ |
4161 if (brace_count[no] <= brace_min[no]) | 4360 if (brace_count[no] <= brace_min[no]) |
4162 { | 4361 { |
4163 reg_save(&save); | 4362 reg_save(&save); |
4164 if (regmatch(next)) | 4363 if (regmatch(next, &save)) |
4165 return TRUE; | 4364 return TRUE; |
4166 reg_restore(&save); | 4365 reg_restore(&save); |
4167 next = OPERAND(scan); | 4366 next = OPERAND(scan); |
4168 /* must try to match one more item */ | 4367 /* must try to match one more item */ |
4169 } | 4368 } |
4232 /* If it could match, try it. */ | 4431 /* If it could match, try it. */ |
4233 if (nextb == NUL || *reginput == nextb | 4432 if (nextb == NUL || *reginput == nextb |
4234 || *reginput == nextb_ic) | 4433 || *reginput == nextb_ic) |
4235 { | 4434 { |
4236 reg_save(&save); | 4435 reg_save(&save); |
4237 if (regmatch(next)) | 4436 if (regmatch(next, startp)) |
4238 return TRUE; | 4437 return TRUE; |
4239 reg_restore(&save); | 4438 reg_restore(&save); |
4240 } | 4439 } |
4241 /* Couldn't or didn't match -- back up one char. */ | 4440 /* Couldn't or didn't match -- back up one char. */ |
4242 if (--count < minval) | 4441 if (--count < minval) |
4269 /* If it could work, try it. */ | 4468 /* If it could work, try it. */ |
4270 if (nextb == NUL || *reginput == nextb | 4469 if (nextb == NUL || *reginput == nextb |
4271 || *reginput == nextb_ic) | 4470 || *reginput == nextb_ic) |
4272 { | 4471 { |
4273 reg_save(&save); | 4472 reg_save(&save); |
4274 if (regmatch(next)) | 4473 if (regmatch(next, &save)) |
4275 return TRUE; | 4474 return TRUE; |
4276 reg_restore(&save); | 4475 reg_restore(&save); |
4277 } | 4476 } |
4278 /* Couldn't or didn't match: try advancing one char. */ | 4477 /* Couldn't or didn't match: try advancing one char. */ |
4279 if (count == minval | 4478 if (count == minval |
4293 regsave_T save; | 4492 regsave_T save; |
4294 | 4493 |
4295 /* If the operand matches, we fail. Otherwise backup and | 4494 /* If the operand matches, we fail. Otherwise backup and |
4296 * continue with the next item. */ | 4495 * continue with the next item. */ |
4297 reg_save(&save); | 4496 reg_save(&save); |
4298 if (regmatch(OPERAND(scan))) | 4497 if (regmatch(OPERAND(scan), startp)) |
4299 return FALSE; | 4498 return FALSE; |
4300 reg_restore(&save); | 4499 reg_restore(&save); |
4301 } | 4500 } |
4302 break; | 4501 break; |
4303 | 4502 |
4307 regsave_T save; | 4506 regsave_T save; |
4308 | 4507 |
4309 /* If the operand doesn't match, we fail. Otherwise backup | 4508 /* If the operand doesn't match, we fail. Otherwise backup |
4310 * and continue with the next item. */ | 4509 * and continue with the next item. */ |
4311 reg_save(&save); | 4510 reg_save(&save); |
4312 if (!regmatch(OPERAND(scan))) | 4511 if (!regmatch(OPERAND(scan), startp)) |
4313 return FALSE; | 4512 return FALSE; |
4314 if (op == MATCH) /* zero-width */ | 4513 if (op == MATCH) /* zero-width */ |
4315 reg_restore(&save); | 4514 reg_restore(&save); |
4316 } | 4515 } |
4317 break; | 4516 break; |
4329 * the match ends at the current position. | 4528 * the match ends at the current position. |
4330 * First check if the next item matches, that's probably | 4529 * First check if the next item matches, that's probably |
4331 * faster. | 4530 * faster. |
4332 */ | 4531 */ |
4333 reg_save(&save_start); | 4532 reg_save(&save_start); |
4334 if (regmatch(next)) | 4533 if (regmatch(next, startp)) |
4335 { | 4534 { |
4336 /* save the position after the found match for next */ | 4535 /* save the position after the found match for next */ |
4337 reg_save(&save_after); | 4536 reg_save(&save_after); |
4338 | 4537 |
4339 /* start looking for a match with operand at the current | 4538 /* start looking for a match with operand at the current |
4345 save_behind_pos = behind_pos; | 4544 save_behind_pos = behind_pos; |
4346 behind_pos = save_start; | 4545 behind_pos = save_start; |
4347 for (;;) | 4546 for (;;) |
4348 { | 4547 { |
4349 reg_restore(&save_start); | 4548 reg_restore(&save_start); |
4350 if (regmatch(OPERAND(scan)) | 4549 if (regmatch(OPERAND(scan), startp) |
4351 && reg_save_equal(&behind_pos)) | 4550 && reg_save_equal(&behind_pos)) |
4352 { | 4551 { |
4353 behind_pos = save_behind_pos; | 4552 behind_pos = save_behind_pos; |
4354 /* found a match that ends where "next" started */ | 4553 /* found a match that ends where "next" started */ |
4355 if (needmatch) | 4554 if (needmatch) |