comparison src/regexp_nfa.c @ 4543:08ac46980953 v7.3.1019

updated for version 7.3.1019 Problem: These do not work with the new regexp engine: \%o123, \%x123, \%d123, \%u123 and \%U123. Solution: Implement these items.
author Bram Moolenaar <bram@vim.org>
date Sat, 25 May 2013 22:04:23 +0200
parents 80170d61a85c
children fc997f05cbc7
comparison
equal deleted inserted replaced
4542:c02661ad95f8 4543:08ac46980953
602 int got_coll_char; 602 int got_coll_char;
603 char_u *p; 603 char_u *p;
604 char_u *endp; 604 char_u *endp;
605 #ifdef FEAT_MBYTE 605 #ifdef FEAT_MBYTE
606 char_u *old_regparse = regparse; 606 char_u *old_regparse = regparse;
607 int clen;
608 int i; 607 int i;
609 #endif 608 #endif
610 int extra = 0; 609 int extra = 0;
611 int first; 610 int first;
612 int emit_range; 611 int emit_range;
621 620
622 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL; 621 cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
623 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL; 622 cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
624 623
625 c = getchr(); 624 c = getchr();
626
627 #ifdef FEAT_MBYTE
628 /* clen has the length of the current char, without composing chars */
629 clen = (*mb_char2len)(c);
630 if (has_mbyte && clen > 1)
631 goto nfa_do_multibyte;
632 #endif
633 switch (c) 625 switch (c)
634 { 626 {
627 case NUL:
628 syntax_error = TRUE;
629 EMSG_RET_FAIL(_("E865: (NFA) Regexp end encountered prematurely"));
630
635 case Magic('^'): 631 case Magic('^'):
636 EMIT(NFA_BOL); 632 EMIT(NFA_BOL);
637 break; 633 break;
638 634
639 case Magic('$'): 635 case Magic('$'):
745 case Magic('('): 741 case Magic('('):
746 if (nfa_reg(REG_PAREN) == FAIL) 742 if (nfa_reg(REG_PAREN) == FAIL)
747 return FAIL; /* cascaded error */ 743 return FAIL; /* cascaded error */
748 break; 744 break;
749 745
750 case NUL:
751 syntax_error = TRUE;
752 EMSG_RET_FAIL(_("E865: (NFA) Regexp end encountered prematurely"));
753
754 case Magic('|'): 746 case Magic('|'):
755 case Magic('&'): 747 case Magic('&'):
756 case Magic(')'): 748 case Magic(')'):
757 syntax_error = TRUE; 749 syntax_error = TRUE;
758 EMSGN(_(e_misplaced), no_Magic(c)); 750 EMSGN(_(e_misplaced), no_Magic(c));
832 case 'd': /* %d123 decimal */ 824 case 'd': /* %d123 decimal */
833 case 'o': /* %o123 octal */ 825 case 'o': /* %o123 octal */
834 case 'x': /* %xab hex 2 */ 826 case 'x': /* %xab hex 2 */
835 case 'u': /* %uabcd hex 4 */ 827 case 'u': /* %uabcd hex 4 */
836 case 'U': /* %U1234abcd hex 8 */ 828 case 'U': /* %U1234abcd hex 8 */
837 /* Not yet supported */ 829 {
838 return FAIL; 830 int i;
839 831
840 c = coll_get_char(); 832 switch (c)
841 EMIT(c); 833 {
834 case 'd': i = getdecchrs(); break;
835 case 'o': i = getoctchrs(); break;
836 case 'x': i = gethexchrs(2); break;
837 case 'u': i = gethexchrs(4); break;
838 case 'U': i = gethexchrs(8); break;
839 default: i = -1; break;
840 }
841
842 if (i < 0)
843 EMSG2_RET_FAIL(
844 _("E678: Invalid character after %s%%[dxouU]"),
845 reg_magic == MAGIC_ALL);
846 /* TODO: what if a composing character follows? */
847 EMIT(i);
848 }
842 break; 849 break;
843 850
844 /* Catch \%^ and \%$ regardless of where they appear in the 851 /* Catch \%^ and \%$ regardless of where they appear in the
845 * pattern -- regardless of whether or not it makes sense. */ 852 * pattern -- regardless of whether or not it makes sense. */
846 case '^': 853 case '^':
1215 { 1222 {
1216 #ifdef FEAT_MBYTE 1223 #ifdef FEAT_MBYTE
1217 int plen; 1224 int plen;
1218 1225
1219 nfa_do_multibyte: 1226 nfa_do_multibyte:
1220 /* Length of current char with composing chars. */ 1227 /* plen is length of current char with composing chars */
1221 if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse)) 1228 if (enc_utf8 && ((*mb_char2len)(c)
1222 || utf_iscomposing(c))) 1229 != (plen = (*mb_ptr2len)(old_regparse))
1230 || utf_iscomposing(c)))
1223 { 1231 {
1224 /* A base character plus composing characters, or just one 1232 /* A base character plus composing characters, or just one
1225 * or more composing characters. 1233 * or more composing characters.
1226 * This requires creating a separate atom as if enclosing 1234 * This requires creating a separate atom as if enclosing
1227 * the characters in (), where NFA_COMPOSING is the ( and 1235 * the characters in (), where NFA_COMPOSING is the ( and