Mercurial > vim
comparison src/regexp.c @ 15603:639b8318472c v8.1.0809
patch 8.1.0809: too many #ifdefs
commit https://github.com/vim/vim/commit/a12a161b8ce09d024ed71c2134149fa323f8ee8e
Author: Bram Moolenaar <Bram@vim.org>
Date: Thu Jan 24 16:39:02 2019 +0100
patch 8.1.0809: too many #ifdefs
Problem: Too many #ifdefs.
Solution: Graduate FEAT_MBYTE, part 3.
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Thu, 24 Jan 2019 16:45:06 +0100 |
parents | d89c5b339c2a |
children | 2e2f07561f4b |
comparison
equal
deleted
inserted
replaced
15602:2ef2028868e2 | 15603:639b8318472c |
---|---|
357 | 357 |
358 static int re_mult_next(char *what); | 358 static int re_mult_next(char *what); |
359 | 359 |
360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); | 360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); |
361 static char_u e_reverse_range[] = N_("E944: Reverse range in character class"); | 361 static char_u e_reverse_range[] = N_("E944: Reverse range in character class"); |
362 #ifdef FEAT_MBYTE | |
363 static char_u e_large_class[] = N_("E945: Range too large in character class"); | 362 static char_u e_large_class[] = N_("E945: Range too large in character class"); |
364 #endif | |
365 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); | 363 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); |
366 static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); | 364 static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); |
367 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); | 365 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); |
368 #ifdef FEAT_SYN_HL | 366 #ifdef FEAT_SYN_HL |
369 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); | 367 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); |
559 class_tab[' '] |= RI_WHITE; | 557 class_tab[' '] |= RI_WHITE; |
560 class_tab['\t'] |= RI_WHITE; | 558 class_tab['\t'] |= RI_WHITE; |
561 done = TRUE; | 559 done = TRUE; |
562 } | 560 } |
563 | 561 |
564 #ifdef FEAT_MBYTE | 562 #define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) |
565 # define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) | 563 #define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX)) |
566 # define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX)) | 564 #define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL)) |
567 # define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL)) | 565 #define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD)) |
568 # define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD)) | 566 #define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD)) |
569 # define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD)) | 567 #define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA)) |
570 # define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA)) | 568 #define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER)) |
571 # define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER)) | 569 #define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER)) |
572 # define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER)) | 570 #define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE)) |
573 # define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE)) | |
574 #else | |
575 # define ri_digit(c) (class_tab[c] & RI_DIGIT) | |
576 # define ri_hex(c) (class_tab[c] & RI_HEX) | |
577 # define ri_octal(c) (class_tab[c] & RI_OCTAL) | |
578 # define ri_word(c) (class_tab[c] & RI_WORD) | |
579 # define ri_head(c) (class_tab[c] & RI_HEAD) | |
580 # define ri_alpha(c) (class_tab[c] & RI_ALPHA) | |
581 # define ri_lower(c) (class_tab[c] & RI_LOWER) | |
582 # define ri_upper(c) (class_tab[c] & RI_UPPER) | |
583 # define ri_white(c) (class_tab[c] & RI_WHITE) | |
584 #endif | |
585 | 571 |
586 /* flags for regflags */ | 572 /* flags for regflags */ |
587 #define RF_ICASE 1 /* ignore case */ | 573 #define RF_ICASE 1 /* ignore case */ |
588 #define RF_NOICASE 2 /* don't ignore case */ | 574 #define RF_NOICASE 2 /* don't ignore case */ |
589 #define RF_HASNL 4 /* can match a NL */ | 575 #define RF_HASNL 4 /* can match a NL */ |
696 static char_u *regbranch(int *flagp); | 682 static char_u *regbranch(int *flagp); |
697 static char_u *regconcat(int *flagp); | 683 static char_u *regconcat(int *flagp); |
698 static char_u *regpiece(int *); | 684 static char_u *regpiece(int *); |
699 static char_u *regatom(int *); | 685 static char_u *regatom(int *); |
700 static char_u *regnode(int); | 686 static char_u *regnode(int); |
701 #ifdef FEAT_MBYTE | |
702 static int use_multibytecode(int c); | 687 static int use_multibytecode(int c); |
703 #endif | |
704 static int prog_magic_wrong(void); | 688 static int prog_magic_wrong(void); |
705 static char_u *regnext(char_u *); | 689 static char_u *regnext(char_u *); |
706 static void regc(int b); | 690 static void regc(int b); |
707 #ifdef FEAT_MBYTE | |
708 static void regmbc(int c); | 691 static void regmbc(int c); |
709 # define REGMBC(x) regmbc(x); | 692 #define REGMBC(x) regmbc(x); |
710 # define CASEMBC(x) case x: | 693 #define CASEMBC(x) case x: |
711 #else | |
712 # define regmbc(c) regc(c) | |
713 # define REGMBC(x) | |
714 # define CASEMBC(x) | |
715 #endif | |
716 static void reginsert(int, char_u *); | 694 static void reginsert(int, char_u *); |
717 static void reginsert_nr(int op, long val, char_u *opnd); | 695 static void reginsert_nr(int op, long val, char_u *opnd); |
718 static void reginsert_limits(int, long, long, char_u *); | 696 static void reginsert_limits(int, long, long, char_u *); |
719 static char_u *re_put_long(char_u *pr, long_u val); | 697 static char_u *re_put_long(char_u *pr, long_u val); |
720 static int read_limits(long *, long *); | 698 static int read_limits(long *, long *); |
745 int l = 1; | 723 int l = 1; |
746 char_u *p = *pp; | 724 char_u *p = *pp; |
747 | 725 |
748 if (p[1] == '=') | 726 if (p[1] == '=') |
749 { | 727 { |
750 #ifdef FEAT_MBYTE | |
751 if (has_mbyte) | 728 if (has_mbyte) |
752 l = (*mb_ptr2len)(p + 2); | 729 l = (*mb_ptr2len)(p + 2); |
753 #endif | |
754 if (p[l + 2] == '=' && p[l + 3] == ']') | 730 if (p[l + 2] == '=' && p[l + 3] == ']') |
755 { | 731 { |
756 #ifdef FEAT_MBYTE | |
757 if (has_mbyte) | 732 if (has_mbyte) |
758 c = mb_ptr2char(p + 2); | 733 c = mb_ptr2char(p + 2); |
759 else | 734 else |
760 #endif | |
761 c = p[2]; | 735 c = p[2]; |
762 *pp += l + 4; | 736 *pp += l + 4; |
763 return c; | 737 return c; |
764 } | 738 } |
765 } | 739 } |
796 * NOTE: When changing this function, also change nfa_emit_equi_class() | 770 * NOTE: When changing this function, also change nfa_emit_equi_class() |
797 */ | 771 */ |
798 static void | 772 static void |
799 reg_equi_class(int c) | 773 reg_equi_class(int c) |
800 { | 774 { |
801 #ifdef FEAT_MBYTE | |
802 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 | 775 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 |
803 || STRCMP(p_enc, "iso-8859-15") == 0) | 776 || STRCMP(p_enc, "iso-8859-15") == 0) |
804 #endif | |
805 { | 777 { |
806 #ifdef EBCDIC | 778 #ifdef EBCDIC |
807 int i; | 779 int i; |
808 | 780 |
809 /* This might be slower than switch/case below. */ | 781 /* This might be slower than switch/case below. */ |
1132 int l = 1; | 1104 int l = 1; |
1133 char_u *p = *pp; | 1105 char_u *p = *pp; |
1134 | 1106 |
1135 if (p[0] != NUL && p[1] == '.') | 1107 if (p[0] != NUL && p[1] == '.') |
1136 { | 1108 { |
1137 #ifdef FEAT_MBYTE | |
1138 if (has_mbyte) | 1109 if (has_mbyte) |
1139 l = (*mb_ptr2len)(p + 2); | 1110 l = (*mb_ptr2len)(p + 2); |
1140 #endif | |
1141 if (p[l + 2] == '.' && p[l + 3] == ']') | 1111 if (p[l + 2] == '.' && p[l + 3] == ']') |
1142 { | 1112 { |
1143 #ifdef FEAT_MBYTE | |
1144 if (has_mbyte) | 1113 if (has_mbyte) |
1145 c = mb_ptr2char(p + 2); | 1114 c = mb_ptr2char(p + 2); |
1146 else | 1115 else |
1147 #endif | |
1148 c = p[2]; | 1116 c = p[2]; |
1149 *pp += l + 4; | 1117 *pp += l + 4; |
1150 return c; | 1118 return c; |
1151 } | 1119 } |
1152 } | 1120 } |
1169 * The returned pointer is on the matching ']', or the terminating NUL. | 1137 * The returned pointer is on the matching ']', or the terminating NUL. |
1170 */ | 1138 */ |
1171 static char_u * | 1139 static char_u * |
1172 skip_anyof(char_u *p) | 1140 skip_anyof(char_u *p) |
1173 { | 1141 { |
1174 #ifdef FEAT_MBYTE | |
1175 int l; | 1142 int l; |
1176 #endif | |
1177 | 1143 |
1178 if (*p == '^') /* Complement of range. */ | 1144 if (*p == '^') /* Complement of range. */ |
1179 ++p; | 1145 ++p; |
1180 if (*p == ']' || *p == '-') | 1146 if (*p == ']' || *p == '-') |
1181 ++p; | 1147 ++p; |
1182 while (*p != NUL && *p != ']') | 1148 while (*p != NUL && *p != ']') |
1183 { | 1149 { |
1184 #ifdef FEAT_MBYTE | |
1185 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) | 1150 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) |
1186 p += l; | 1151 p += l; |
1187 else | 1152 else |
1188 #endif | |
1189 if (*p == '-') | 1153 if (*p == '-') |
1190 { | 1154 { |
1191 ++p; | 1155 ++p; |
1192 if (*p != ']' && *p != NUL) | 1156 if (*p != ']' && *p != NUL) |
1193 MB_PTR_ADV(p); | 1157 MB_PTR_ADV(p); |
1393 scan = regnext(scan); | 1357 scan = regnext(scan); |
1394 } | 1358 } |
1395 | 1359 |
1396 if (OP(scan) == EXACTLY) | 1360 if (OP(scan) == EXACTLY) |
1397 { | 1361 { |
1398 #ifdef FEAT_MBYTE | |
1399 if (has_mbyte) | 1362 if (has_mbyte) |
1400 r->regstart = (*mb_ptr2char)(OPERAND(scan)); | 1363 r->regstart = (*mb_ptr2char)(OPERAND(scan)); |
1401 else | 1364 else |
1402 #endif | |
1403 r->regstart = *OPERAND(scan); | 1365 r->regstart = *OPERAND(scan); |
1404 } | 1366 } |
1405 else if ((OP(scan) == BOW | 1367 else if ((OP(scan) == BOW |
1406 || OP(scan) == EOW | 1368 || OP(scan) == EOW |
1407 || OP(scan) == NOTHING | 1369 || OP(scan) == NOTHING |
1408 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN | 1370 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN |
1409 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) | 1371 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) |
1410 && OP(regnext(scan)) == EXACTLY) | 1372 && OP(regnext(scan)) == EXACTLY) |
1411 { | 1373 { |
1412 #ifdef FEAT_MBYTE | |
1413 if (has_mbyte) | 1374 if (has_mbyte) |
1414 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan))); | 1375 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan))); |
1415 else | 1376 else |
1416 #endif | |
1417 r->regstart = *OPERAND(regnext(scan)); | 1377 r->regstart = *OPERAND(regnext(scan)); |
1418 } | 1378 } |
1419 | 1379 |
1420 /* | 1380 /* |
1421 * If there's something expensive in the r.e., find the longest | 1381 * If there's something expensive in the r.e., find the longest |
1694 case Magic('&'): | 1654 case Magic('&'): |
1695 case Magic(')'): | 1655 case Magic(')'): |
1696 cont = FALSE; | 1656 cont = FALSE; |
1697 break; | 1657 break; |
1698 case Magic('Z'): | 1658 case Magic('Z'): |
1699 #ifdef FEAT_MBYTE | |
1700 regflags |= RF_ICOMBINE; | 1659 regflags |= RF_ICOMBINE; |
1701 #endif | |
1702 skipchr_keepstart(); | 1660 skipchr_keepstart(); |
1703 break; | 1661 break; |
1704 case Magic('c'): | 1662 case Magic('c'): |
1705 regflags |= RF_ICASE; | 1663 regflags |= RF_ICASE; |
1706 skipchr_keepstart(); | 1664 skipchr_keepstart(); |
2006 case Magic('u'): | 1964 case Magic('u'): |
2007 case Magic('U'): | 1965 case Magic('U'): |
2008 p = vim_strchr(classchars, no_Magic(c)); | 1966 p = vim_strchr(classchars, no_Magic(c)); |
2009 if (p == NULL) | 1967 if (p == NULL) |
2010 EMSG_RET_NULL(_("E63: invalid use of \\_")); | 1968 EMSG_RET_NULL(_("E63: invalid use of \\_")); |
2011 #ifdef FEAT_MBYTE | 1969 |
2012 /* When '.' is followed by a composing char ignore the dot, so that | 1970 /* When '.' is followed by a composing char ignore the dot, so that |
2013 * the composing char is matched here. */ | 1971 * the composing char is matched here. */ |
2014 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) | 1972 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) |
2015 { | 1973 { |
2016 c = getchr(); | 1974 c = getchr(); |
2017 goto do_multibyte; | 1975 goto do_multibyte; |
2018 } | 1976 } |
2019 #endif | |
2020 ret = regnode(classcodes[p - classchars] + extra); | 1977 ret = regnode(classcodes[p - classchars] + extra); |
2021 *flagp |= HASWIDTH | SIMPLE; | 1978 *flagp |= HASWIDTH | SIMPLE; |
2022 break; | 1979 break; |
2023 | 1980 |
2024 case Magic('n'): | 1981 case Magic('n'): |
2266 | 2223 |
2267 if (i < 0) | 2224 if (i < 0) |
2268 EMSG2_RET_NULL( | 2225 EMSG2_RET_NULL( |
2269 _("E678: Invalid character after %s%%[dxouU]"), | 2226 _("E678: Invalid character after %s%%[dxouU]"), |
2270 reg_magic == MAGIC_ALL); | 2227 reg_magic == MAGIC_ALL); |
2271 #ifdef FEAT_MBYTE | |
2272 if (use_multibytecode(i)) | 2228 if (use_multibytecode(i)) |
2273 ret = regnode(MULTIBYTECODE); | 2229 ret = regnode(MULTIBYTECODE); |
2274 else | 2230 else |
2275 #endif | |
2276 ret = regnode(EXACTLY); | 2231 ret = regnode(EXACTLY); |
2277 if (i == 0) | 2232 if (i == 0) |
2278 regc(0x0a); | 2233 regc(0x0a); |
2279 else | 2234 else |
2280 #ifdef FEAT_MBYTE | |
2281 regmbc(i); | 2235 regmbc(i); |
2282 #else | |
2283 regc(i); | |
2284 #endif | |
2285 regc(NUL); | 2236 regc(NUL); |
2286 *flagp |= HASWIDTH; | 2237 *flagp |= HASWIDTH; |
2287 break; | 2238 break; |
2288 } | 2239 } |
2289 | 2240 |
2401 endc = 0; | 2352 endc = 0; |
2402 if (*regparse == '[') | 2353 if (*regparse == '[') |
2403 endc = get_coll_element(®parse); | 2354 endc = get_coll_element(®parse); |
2404 if (endc == 0) | 2355 if (endc == 0) |
2405 { | 2356 { |
2406 #ifdef FEAT_MBYTE | |
2407 if (has_mbyte) | 2357 if (has_mbyte) |
2408 endc = mb_ptr2char_adv(®parse); | 2358 endc = mb_ptr2char_adv(®parse); |
2409 else | 2359 else |
2410 #endif | |
2411 endc = *regparse++; | 2360 endc = *regparse++; |
2412 } | 2361 } |
2413 | 2362 |
2414 /* Handle \o40, \x20 and \u20AC style sequences */ | 2363 /* Handle \o40, \x20 and \u20AC style sequences */ |
2415 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl) | 2364 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl) |
2416 endc = coll_get_char(); | 2365 endc = coll_get_char(); |
2417 | 2366 |
2418 if (startc > endc) | 2367 if (startc > endc) |
2419 EMSG_RET_NULL(_(e_reverse_range)); | 2368 EMSG_RET_NULL(_(e_reverse_range)); |
2420 #ifdef FEAT_MBYTE | |
2421 if (has_mbyte && ((*mb_char2len)(startc) > 1 | 2369 if (has_mbyte && ((*mb_char2len)(startc) > 1 |
2422 || (*mb_char2len)(endc) > 1)) | 2370 || (*mb_char2len)(endc) > 1)) |
2423 { | 2371 { |
2424 /* Limit to a range of 256 chars. */ | 2372 /* Limit to a range of 256 chars. */ |
2425 if (endc > startc + 256) | 2373 if (endc > startc + 256) |
2426 EMSG_RET_NULL(_(e_large_class)); | 2374 EMSG_RET_NULL(_(e_large_class)); |
2427 while (++startc <= endc) | 2375 while (++startc <= endc) |
2428 regmbc(startc); | 2376 regmbc(startc); |
2429 } | 2377 } |
2430 else | 2378 else |
2431 #endif | |
2432 { | 2379 { |
2433 #ifdef EBCDIC | 2380 #ifdef EBCDIC |
2434 int alpha_only = FALSE; | 2381 int alpha_only = FALSE; |
2435 | 2382 |
2436 /* for alphabetical range skip the gaps | 2383 /* for alphabetical range skip the gaps |
2486 { | 2433 { |
2487 startc = coll_get_char(); | 2434 startc = coll_get_char(); |
2488 if (startc == 0) | 2435 if (startc == 0) |
2489 regc(0x0a); | 2436 regc(0x0a); |
2490 else | 2437 else |
2491 #ifdef FEAT_MBYTE | |
2492 regmbc(startc); | 2438 regmbc(startc); |
2493 #else | |
2494 regc(startc); | |
2495 #endif | |
2496 } | 2439 } |
2497 else | 2440 else |
2498 { | 2441 { |
2499 startc = backslash_trans(*regparse++); | 2442 startc = backslash_trans(*regparse++); |
2500 regc(startc); | 2443 regc(startc); |
2604 break; | 2547 break; |
2605 } | 2548 } |
2606 } | 2549 } |
2607 else | 2550 else |
2608 { | 2551 { |
2609 #ifdef FEAT_MBYTE | |
2610 if (has_mbyte) | 2552 if (has_mbyte) |
2611 { | 2553 { |
2612 int len; | 2554 int len; |
2613 | 2555 |
2614 /* produce a multibyte character, including any | 2556 /* produce a multibyte character, including any |
2619 startc = -1; /* composing chars */ | 2561 startc = -1; /* composing chars */ |
2620 while (--len >= 0) | 2562 while (--len >= 0) |
2621 regc(*regparse++); | 2563 regc(*regparse++); |
2622 } | 2564 } |
2623 else | 2565 else |
2624 #endif | |
2625 { | 2566 { |
2626 startc = *regparse++; | 2567 startc = *regparse++; |
2627 regc(startc); | 2568 regc(startc); |
2628 } | 2569 } |
2629 } | 2570 } |
2643 | 2584 |
2644 default: | 2585 default: |
2645 { | 2586 { |
2646 int len; | 2587 int len; |
2647 | 2588 |
2648 #ifdef FEAT_MBYTE | |
2649 /* A multi-byte character is handled as a separate atom if it's | 2589 /* A multi-byte character is handled as a separate atom if it's |
2650 * before a multi and when it's a composing char. */ | 2590 * before a multi and when it's a composing char. */ |
2651 if (use_multibytecode(c)) | 2591 if (use_multibytecode(c)) |
2652 { | 2592 { |
2653 do_multibyte: | 2593 do_multibyte: |
2654 ret = regnode(MULTIBYTECODE); | 2594 ret = regnode(MULTIBYTECODE); |
2655 regmbc(c); | 2595 regmbc(c); |
2656 *flagp |= HASWIDTH | SIMPLE; | 2596 *flagp |= HASWIDTH | SIMPLE; |
2657 break; | 2597 break; |
2658 } | 2598 } |
2659 #endif | |
2660 | 2599 |
2661 ret = regnode(EXACTLY); | 2600 ret = regnode(EXACTLY); |
2662 | 2601 |
2663 /* | 2602 /* |
2664 * Append characters as long as: | 2603 * Append characters as long as: |
2673 || (re_multi_type(peekchr()) == NOT_MULTI | 2612 || (re_multi_type(peekchr()) == NOT_MULTI |
2674 && !one_exactly | 2613 && !one_exactly |
2675 && !is_Magic(c))); ++len) | 2614 && !is_Magic(c))); ++len) |
2676 { | 2615 { |
2677 c = no_Magic(c); | 2616 c = no_Magic(c); |
2678 #ifdef FEAT_MBYTE | |
2679 if (has_mbyte) | 2617 if (has_mbyte) |
2680 { | 2618 { |
2681 regmbc(c); | 2619 regmbc(c); |
2682 if (enc_utf8) | 2620 if (enc_utf8) |
2683 { | 2621 { |
2693 skipchr(); | 2631 skipchr(); |
2694 } | 2632 } |
2695 } | 2633 } |
2696 } | 2634 } |
2697 else | 2635 else |
2698 #endif | |
2699 regc(c); | 2636 regc(c); |
2700 c = getchr(); | 2637 c = getchr(); |
2701 } | 2638 } |
2702 ungetchr(); | 2639 ungetchr(); |
2703 | 2640 |
2710 } | 2647 } |
2711 | 2648 |
2712 return ret; | 2649 return ret; |
2713 } | 2650 } |
2714 | 2651 |
2715 #ifdef FEAT_MBYTE | |
2716 /* | 2652 /* |
2717 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for | 2653 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for |
2718 * character "c". | 2654 * character "c". |
2719 */ | 2655 */ |
2720 static int | 2656 static int |
2722 { | 2658 { |
2723 return has_mbyte && (*mb_char2len)(c) > 1 | 2659 return has_mbyte && (*mb_char2len)(c) > 1 |
2724 && (re_multi_type(peekchr()) != NOT_MULTI | 2660 && (re_multi_type(peekchr()) != NOT_MULTI |
2725 || (enc_utf8 && utf_iscomposing(c))); | 2661 || (enc_utf8 && utf_iscomposing(c))); |
2726 } | 2662 } |
2727 #endif | |
2728 | 2663 |
2729 /* | 2664 /* |
2730 * Emit a node. | 2665 * Emit a node. |
2731 * Return pointer to generated code. | 2666 * Return pointer to generated code. |
2732 */ | 2667 */ |
2757 regsize++; | 2692 regsize++; |
2758 else | 2693 else |
2759 *regcode++ = b; | 2694 *regcode++ = b; |
2760 } | 2695 } |
2761 | 2696 |
2762 #ifdef FEAT_MBYTE | |
2763 /* | 2697 /* |
2764 * Emit (if appropriate) a multi-byte character of code | 2698 * Emit (if appropriate) a multi-byte character of code |
2765 */ | 2699 */ |
2766 static void | 2700 static void |
2767 regmbc(int c) | 2701 regmbc(int c) |
2771 if (regcode == JUST_CALC_SIZE) | 2705 if (regcode == JUST_CALC_SIZE) |
2772 regsize += (*mb_char2len)(c); | 2706 regsize += (*mb_char2len)(c); |
2773 else | 2707 else |
2774 regcode += (*mb_char2bytes)(c, regcode); | 2708 regcode += (*mb_char2bytes)(c, regcode); |
2775 } | 2709 } |
2776 #endif | |
2777 | 2710 |
2778 /* | 2711 /* |
2779 * Insert an operator in front of already-emitted operand | 2712 * Insert an operator in front of already-emitted operand |
2780 * | 2713 * |
2781 * Means relocating the operand. | 2714 * Means relocating the operand. |
3137 { | 3070 { |
3138 /* | 3071 /* |
3139 * Next character can never be (made) magic? | 3072 * Next character can never be (made) magic? |
3140 * Then backslashing it won't do anything. | 3073 * Then backslashing it won't do anything. |
3141 */ | 3074 */ |
3142 #ifdef FEAT_MBYTE | |
3143 if (has_mbyte) | 3075 if (has_mbyte) |
3144 curchr = (*mb_ptr2char)(regparse + 1); | 3076 curchr = (*mb_ptr2char)(regparse + 1); |
3145 else | 3077 else |
3146 #endif | |
3147 curchr = c; | 3078 curchr = c; |
3148 } | 3079 } |
3149 break; | 3080 break; |
3150 } | 3081 } |
3151 | 3082 |
3152 #ifdef FEAT_MBYTE | |
3153 default: | 3083 default: |
3154 if (has_mbyte) | 3084 if (has_mbyte) |
3155 curchr = (*mb_ptr2char)(regparse); | 3085 curchr = (*mb_ptr2char)(regparse); |
3156 #endif | |
3157 } | 3086 } |
3158 } | 3087 } |
3159 | 3088 |
3160 return curchr; | 3089 return curchr; |
3161 } | 3090 } |
3171 prevchr_len = 1; | 3100 prevchr_len = 1; |
3172 else | 3101 else |
3173 prevchr_len = 0; | 3102 prevchr_len = 0; |
3174 if (regparse[prevchr_len] != NUL) | 3103 if (regparse[prevchr_len] != NUL) |
3175 { | 3104 { |
3176 #ifdef FEAT_MBYTE | |
3177 if (enc_utf8) | 3105 if (enc_utf8) |
3178 /* exclude composing chars that mb_ptr2len does include */ | 3106 /* exclude composing chars that mb_ptr2len does include */ |
3179 prevchr_len += utf_ptr2len(regparse + prevchr_len); | 3107 prevchr_len += utf_ptr2len(regparse + prevchr_len); |
3180 else if (has_mbyte) | 3108 else if (has_mbyte) |
3181 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len); | 3109 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len); |
3182 else | 3110 else |
3183 #endif | |
3184 ++prevchr_len; | 3111 ++prevchr_len; |
3185 } | 3112 } |
3186 regparse += prevchr_len; | 3113 regparse += prevchr_len; |
3187 prev_at_start = at_start; | 3114 prev_at_start = at_start; |
3188 at_start = FALSE; | 3115 at_start = FALSE; |
3536 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). | 3463 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). |
3537 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern | 3464 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern |
3538 * contains '\c' or '\C' the value is overruled. */ | 3465 * contains '\c' or '\C' the value is overruled. */ |
3539 int reg_ic; | 3466 int reg_ic; |
3540 | 3467 |
3541 #ifdef FEAT_MBYTE | |
3542 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z | 3468 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z |
3543 * flag in the regexp. Defaults to false, always. */ | 3469 * flag in the regexp. Defaults to false, always. */ |
3544 int reg_icombine; | 3470 int reg_icombine; |
3545 #endif | |
3546 | 3471 |
3547 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when | 3472 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when |
3548 * there is no maximum. */ | 3473 * there is no maximum. */ |
3549 colnr_T reg_maxcol; | 3474 colnr_T reg_maxcol; |
3550 | 3475 |
3712 rex.reg_maxline = 0; | 3637 rex.reg_maxline = 0; |
3713 rex.reg_line_lbr = line_lbr; | 3638 rex.reg_line_lbr = line_lbr; |
3714 rex.reg_buf = curbuf; | 3639 rex.reg_buf = curbuf; |
3715 rex.reg_win = NULL; | 3640 rex.reg_win = NULL; |
3716 rex.reg_ic = rmp->rm_ic; | 3641 rex.reg_ic = rmp->rm_ic; |
3717 #ifdef FEAT_MBYTE | |
3718 rex.reg_icombine = FALSE; | 3642 rex.reg_icombine = FALSE; |
3719 #endif | |
3720 rex.reg_maxcol = 0; | 3643 rex.reg_maxcol = 0; |
3721 | 3644 |
3722 return bt_regexec_both(line, col, NULL, NULL); | 3645 return bt_regexec_both(line, col, NULL, NULL); |
3723 } | 3646 } |
3724 | 3647 |
3746 rex.reg_win = win; | 3669 rex.reg_win = win; |
3747 rex.reg_firstlnum = lnum; | 3670 rex.reg_firstlnum = lnum; |
3748 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; | 3671 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; |
3749 rex.reg_line_lbr = FALSE; | 3672 rex.reg_line_lbr = FALSE; |
3750 rex.reg_ic = rmp->rmm_ic; | 3673 rex.reg_ic = rmp->rmm_ic; |
3751 #ifdef FEAT_MBYTE | |
3752 rex.reg_icombine = FALSE; | 3674 rex.reg_icombine = FALSE; |
3753 #endif | |
3754 rex.reg_maxcol = rmp->rmm_maxcol; | 3675 rex.reg_maxcol = rmp->rmm_maxcol; |
3755 | 3676 |
3756 return bt_regexec_both(NULL, col, tm, timed_out); | 3677 return bt_regexec_both(NULL, col, tm, timed_out); |
3757 } | 3678 } |
3758 | 3679 |
3825 if (prog->regflags & RF_ICASE) | 3746 if (prog->regflags & RF_ICASE) |
3826 rex.reg_ic = TRUE; | 3747 rex.reg_ic = TRUE; |
3827 else if (prog->regflags & RF_NOICASE) | 3748 else if (prog->regflags & RF_NOICASE) |
3828 rex.reg_ic = FALSE; | 3749 rex.reg_ic = FALSE; |
3829 | 3750 |
3830 #ifdef FEAT_MBYTE | |
3831 /* If pattern contains "\Z" overrule value of rex.reg_icombine */ | 3751 /* If pattern contains "\Z" overrule value of rex.reg_icombine */ |
3832 if (prog->regflags & RF_ICOMBINE) | 3752 if (prog->regflags & RF_ICOMBINE) |
3833 rex.reg_icombine = TRUE; | 3753 rex.reg_icombine = TRUE; |
3834 #endif | |
3835 | 3754 |
3836 /* If there is a "must appear" string, look for it. */ | 3755 /* If there is a "must appear" string, look for it. */ |
3837 if (prog->regmust != NULL) | 3756 if (prog->regmust != NULL) |
3838 { | 3757 { |
3839 int c; | 3758 int c; |
3840 | 3759 |
3841 #ifdef FEAT_MBYTE | |
3842 if (has_mbyte) | 3760 if (has_mbyte) |
3843 c = (*mb_ptr2char)(prog->regmust); | 3761 c = (*mb_ptr2char)(prog->regmust); |
3844 else | 3762 else |
3845 #endif | |
3846 c = *prog->regmust; | 3763 c = *prog->regmust; |
3847 s = line + col; | 3764 s = line + col; |
3848 | 3765 |
3849 /* | 3766 /* |
3850 * This is used very often, esp. for ":global". Use three versions of | 3767 * This is used very often, esp. for ":global". Use three versions of |
3851 * the loop to avoid overhead of conditions. | 3768 * the loop to avoid overhead of conditions. |
3852 */ | 3769 */ |
3853 if (!rex.reg_ic | 3770 if (!rex.reg_ic && !has_mbyte) |
3854 #ifdef FEAT_MBYTE | |
3855 && !has_mbyte | |
3856 #endif | |
3857 ) | |
3858 while ((s = vim_strbyte(s, c)) != NULL) | 3771 while ((s = vim_strbyte(s, c)) != NULL) |
3859 { | 3772 { |
3860 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | 3773 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) |
3861 break; /* Found it. */ | 3774 break; /* Found it. */ |
3862 ++s; | 3775 ++s; |
3863 } | 3776 } |
3864 #ifdef FEAT_MBYTE | |
3865 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1)) | 3777 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1)) |
3866 while ((s = vim_strchr(s, c)) != NULL) | 3778 while ((s = vim_strchr(s, c)) != NULL) |
3867 { | 3779 { |
3868 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | 3780 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) |
3869 break; /* Found it. */ | 3781 break; /* Found it. */ |
3870 MB_PTR_ADV(s); | 3782 MB_PTR_ADV(s); |
3871 } | 3783 } |
3872 #endif | |
3873 else | 3784 else |
3874 while ((s = cstrchr(s, c)) != NULL) | 3785 while ((s = cstrchr(s, c)) != NULL) |
3875 { | 3786 { |
3876 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) | 3787 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) |
3877 break; /* Found it. */ | 3788 break; /* Found it. */ |
3888 /* Simplest case: Anchored match need be tried only once. */ | 3799 /* Simplest case: Anchored match need be tried only once. */ |
3889 if (prog->reganch) | 3800 if (prog->reganch) |
3890 { | 3801 { |
3891 int c; | 3802 int c; |
3892 | 3803 |
3893 #ifdef FEAT_MBYTE | |
3894 if (has_mbyte) | 3804 if (has_mbyte) |
3895 c = (*mb_ptr2char)(rex.line + col); | 3805 c = (*mb_ptr2char)(rex.line + col); |
3896 else | 3806 else |
3897 #endif | |
3898 c = rex.line[col]; | 3807 c = rex.line[col]; |
3899 if (prog->regstart == NUL | 3808 if (prog->regstart == NUL |
3900 || prog->regstart == c | 3809 || prog->regstart == c |
3901 || (rex.reg_ic && (( | 3810 || (rex.reg_ic |
3902 #ifdef FEAT_MBYTE | 3811 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) |
3903 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) | |
3904 || (c < 255 && prog->regstart < 255 && | 3812 || (c < 255 && prog->regstart < 255 && |
3905 #endif | |
3906 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c))))) | 3813 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c))))) |
3907 retval = regtry(prog, col, tm, timed_out); | 3814 retval = regtry(prog, col, tm, timed_out); |
3908 else | 3815 else |
3909 retval = 0; | 3816 retval = 0; |
3910 } | 3817 } |
3918 { | 3825 { |
3919 if (prog->regstart != NUL) | 3826 if (prog->regstart != NUL) |
3920 { | 3827 { |
3921 /* Skip until the char we know it must start with. | 3828 /* Skip until the char we know it must start with. |
3922 * Used often, do some work to avoid call overhead. */ | 3829 * Used often, do some work to avoid call overhead. */ |
3923 if (!rex.reg_ic | 3830 if (!rex.reg_ic && !has_mbyte) |
3924 #ifdef FEAT_MBYTE | |
3925 && !has_mbyte | |
3926 #endif | |
3927 ) | |
3928 s = vim_strbyte(rex.line + col, prog->regstart); | 3831 s = vim_strbyte(rex.line + col, prog->regstart); |
3929 else | 3832 else |
3930 s = cstrchr(rex.line + col, prog->regstart); | 3833 s = cstrchr(rex.line + col, prog->regstart); |
3931 if (s == NULL) | 3834 if (s == NULL) |
3932 { | 3835 { |
3953 rex.lnum = 0; | 3856 rex.lnum = 0; |
3954 rex.line = reg_getline((linenr_T)0); | 3857 rex.line = reg_getline((linenr_T)0); |
3955 } | 3858 } |
3956 if (rex.line[col] == NUL) | 3859 if (rex.line[col] == NUL) |
3957 break; | 3860 break; |
3958 #ifdef FEAT_MBYTE | |
3959 if (has_mbyte) | 3861 if (has_mbyte) |
3960 col += (*mb_ptr2len)(rex.line + col); | 3862 col += (*mb_ptr2len)(rex.line + col); |
3961 else | 3863 else |
3962 #endif | |
3963 ++col; | 3864 ++col; |
3964 #ifdef FEAT_RELTIME | 3865 #ifdef FEAT_RELTIME |
3965 /* Check for timeout once in a twenty times to avoid overhead. */ | 3866 /* Check for timeout once in a twenty times to avoid overhead. */ |
3966 if (tm != NULL && ++tm_count == 20) | 3867 if (tm != NULL && ++tm_count == 20) |
3967 { | 3868 { |
4114 } | 4015 } |
4115 #endif | 4016 #endif |
4116 return 1 + rex.lnum; | 4017 return 1 + rex.lnum; |
4117 } | 4018 } |
4118 | 4019 |
4119 #ifdef FEAT_MBYTE | |
4120 /* | 4020 /* |
4121 * Get class of previous character. | 4021 * Get class of previous character. |
4122 */ | 4022 */ |
4123 static int | 4023 static int |
4124 reg_prev_class(void) | 4024 reg_prev_class(void) |
4125 { | 4025 { |
4126 if (rex.input > rex.line) | 4026 if (rex.input > rex.line) |
4127 return mb_get_class_buf(rex.input - 1 | 4027 return mb_get_class_buf(rex.input - 1 |
4128 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf); | 4028 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf); |
4129 return -1; | 4029 return -1; |
4130 } | 4030 } |
4131 #endif | |
4132 | 4031 |
4133 /* | 4032 /* |
4134 * Return TRUE if the current rex.input position matches the Visual area. | 4033 * Return TRUE if the current rex.input position matches the Visual area. |
4135 */ | 4034 */ |
4136 static int | 4035 static int |
4337 } | 4236 } |
4338 else | 4237 else |
4339 { | 4238 { |
4340 if (WITH_NL(op)) | 4239 if (WITH_NL(op)) |
4341 op -= ADD_NL; | 4240 op -= ADD_NL; |
4342 #ifdef FEAT_MBYTE | |
4343 if (has_mbyte) | 4241 if (has_mbyte) |
4344 c = (*mb_ptr2char)(rex.input); | 4242 c = (*mb_ptr2char)(rex.input); |
4345 else | 4243 else |
4346 #endif | |
4347 c = *rex.input; | 4244 c = *rex.input; |
4348 switch (op) | 4245 switch (op) |
4349 { | 4246 { |
4350 case BOL: | 4247 case BOL: |
4351 if (rex.input != rex.line) | 4248 if (rex.input != rex.line) |
4429 break; | 4326 break; |
4430 | 4327 |
4431 case BOW: /* \<word; rex.input points to w */ | 4328 case BOW: /* \<word; rex.input points to w */ |
4432 if (c == NUL) /* Can't match at end of line */ | 4329 if (c == NUL) /* Can't match at end of line */ |
4433 status = RA_NOMATCH; | 4330 status = RA_NOMATCH; |
4434 #ifdef FEAT_MBYTE | |
4435 else if (has_mbyte) | 4331 else if (has_mbyte) |
4436 { | 4332 { |
4437 int this_class; | 4333 int this_class; |
4438 | 4334 |
4439 /* Get class of current and previous char (if it exists). */ | 4335 /* Get class of current and previous char (if it exists). */ |
4441 if (this_class <= 1) | 4337 if (this_class <= 1) |
4442 status = RA_NOMATCH; /* not on a word at all */ | 4338 status = RA_NOMATCH; /* not on a word at all */ |
4443 else if (reg_prev_class() == this_class) | 4339 else if (reg_prev_class() == this_class) |
4444 status = RA_NOMATCH; /* previous char is in same word */ | 4340 status = RA_NOMATCH; /* previous char is in same word */ |
4445 } | 4341 } |
4446 #endif | |
4447 else | 4342 else |
4448 { | 4343 { |
4449 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line | 4344 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line |
4450 && vim_iswordc_buf(rex.input[-1], rex.reg_buf))) | 4345 && vim_iswordc_buf(rex.input[-1], rex.reg_buf))) |
4451 status = RA_NOMATCH; | 4346 status = RA_NOMATCH; |
4453 break; | 4348 break; |
4454 | 4349 |
4455 case EOW: /* word\>; rex.input points after d */ | 4350 case EOW: /* word\>; rex.input points after d */ |
4456 if (rex.input == rex.line) /* Can't match at start of line */ | 4351 if (rex.input == rex.line) /* Can't match at start of line */ |
4457 status = RA_NOMATCH; | 4352 status = RA_NOMATCH; |
4458 #ifdef FEAT_MBYTE | |
4459 else if (has_mbyte) | 4353 else if (has_mbyte) |
4460 { | 4354 { |
4461 int this_class, prev_class; | 4355 int this_class, prev_class; |
4462 | 4356 |
4463 /* Get class of current and previous char (if it exists). */ | 4357 /* Get class of current and previous char (if it exists). */ |
4465 prev_class = reg_prev_class(); | 4359 prev_class = reg_prev_class(); |
4466 if (this_class == prev_class | 4360 if (this_class == prev_class |
4467 || prev_class == 0 || prev_class == 1) | 4361 || prev_class == 0 || prev_class == 1) |
4468 status = RA_NOMATCH; | 4362 status = RA_NOMATCH; |
4469 } | 4363 } |
4470 #endif | |
4471 else | 4364 else |
4472 { | 4365 { |
4473 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf) | 4366 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf) |
4474 || (rex.input[0] != NUL | 4367 || (rex.input[0] != NUL |
4475 && vim_iswordc_buf(c, rex.reg_buf))) | 4368 && vim_iswordc_buf(c, rex.reg_buf))) |
4674 char_u *opnd; | 4567 char_u *opnd; |
4675 | 4568 |
4676 opnd = OPERAND(scan); | 4569 opnd = OPERAND(scan); |
4677 /* Inline the first byte, for speed. */ | 4570 /* Inline the first byte, for speed. */ |
4678 if (*opnd != *rex.input | 4571 if (*opnd != *rex.input |
4679 && (!rex.reg_ic || ( | 4572 && (!rex.reg_ic |
4680 #ifdef FEAT_MBYTE | 4573 || (!enc_utf8 |
4681 !enc_utf8 && | 4574 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input)))) |
4682 #endif | |
4683 MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input)))) | |
4684 status = RA_NOMATCH; | 4575 status = RA_NOMATCH; |
4685 else if (*opnd == NUL) | 4576 else if (*opnd == NUL) |
4686 { | 4577 { |
4687 /* match empty string always works; happens when "~" is | 4578 /* match empty string always works; happens when "~" is |
4688 * empty. */ | 4579 * empty. */ |
4689 } | 4580 } |
4690 else | 4581 else |
4691 { | 4582 { |
4692 if (opnd[1] == NUL | 4583 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic)) |
4693 #ifdef FEAT_MBYTE | |
4694 && !(enc_utf8 && rex.reg_ic) | |
4695 #endif | |
4696 ) | |
4697 { | 4584 { |
4698 len = 1; /* matched a single byte above */ | 4585 len = 1; /* matched a single byte above */ |
4699 } | 4586 } |
4700 else | 4587 else |
4701 { | 4588 { |
4702 /* Need to match first byte again for multi-byte. */ | 4589 /* Need to match first byte again for multi-byte. */ |
4703 len = (int)STRLEN(opnd); | 4590 len = (int)STRLEN(opnd); |
4704 if (cstrncmp(opnd, rex.input, &len) != 0) | 4591 if (cstrncmp(opnd, rex.input, &len) != 0) |
4705 status = RA_NOMATCH; | 4592 status = RA_NOMATCH; |
4706 } | 4593 } |
4707 #ifdef FEAT_MBYTE | |
4708 /* Check for following composing character, unless %C | 4594 /* Check for following composing character, unless %C |
4709 * follows (skips over all composing chars). */ | 4595 * follows (skips over all composing chars). */ |
4710 if (status != RA_NOMATCH | 4596 if (status != RA_NOMATCH |
4711 && enc_utf8 | 4597 && enc_utf8 |
4712 && UTF_COMPOSINGLIKE(rex.input, rex.input + len) | 4598 && UTF_COMPOSINGLIKE(rex.input, rex.input + len) |
4716 /* raaron: This code makes a composing character get | 4602 /* raaron: This code makes a composing character get |
4717 * ignored, which is the correct behavior (sometimes) | 4603 * ignored, which is the correct behavior (sometimes) |
4718 * for voweled Hebrew texts. */ | 4604 * for voweled Hebrew texts. */ |
4719 status = RA_NOMATCH; | 4605 status = RA_NOMATCH; |
4720 } | 4606 } |
4721 #endif | |
4722 if (status != RA_NOMATCH) | 4607 if (status != RA_NOMATCH) |
4723 rex.input += len; | 4608 rex.input += len; |
4724 } | 4609 } |
4725 } | 4610 } |
4726 break; | 4611 break; |
4733 status = RA_NOMATCH; | 4618 status = RA_NOMATCH; |
4734 else | 4619 else |
4735 ADVANCE_REGINPUT(); | 4620 ADVANCE_REGINPUT(); |
4736 break; | 4621 break; |
4737 | 4622 |
4738 #ifdef FEAT_MBYTE | |
4739 case MULTIBYTECODE: | 4623 case MULTIBYTECODE: |
4740 if (has_mbyte) | 4624 if (has_mbyte) |
4741 { | 4625 { |
4742 int i, len; | 4626 int i, len; |
4743 char_u *opnd; | 4627 char_u *opnd; |
4786 rex.input += len; | 4670 rex.input += len; |
4787 } | 4671 } |
4788 else | 4672 else |
4789 status = RA_NOMATCH; | 4673 status = RA_NOMATCH; |
4790 break; | 4674 break; |
4791 #endif | |
4792 case RE_COMPOSING: | 4675 case RE_COMPOSING: |
4793 #ifdef FEAT_MBYTE | |
4794 if (enc_utf8) | 4676 if (enc_utf8) |
4795 { | 4677 { |
4796 /* Skip composing characters. */ | 4678 /* Skip composing characters. */ |
4797 while (utf_iscomposing(utf_ptr2char(rex.input))) | 4679 while (utf_iscomposing(utf_ptr2char(rex.input))) |
4798 MB_CPTR_ADV(rex.input); | 4680 MB_CPTR_ADV(rex.input); |
4799 } | 4681 } |
4800 #endif | |
4801 break; | 4682 break; |
4802 | 4683 |
4803 case NOTHING: | 4684 case NOTHING: |
4804 break; | 4685 break; |
4805 | 4686 |
5559 (colnr_T)STRLEN(rex.line); | 5440 (colnr_T)STRLEN(rex.line); |
5560 } | 5441 } |
5561 } | 5442 } |
5562 else | 5443 else |
5563 { | 5444 { |
5564 #ifdef FEAT_MBYTE | |
5565 if (has_mbyte) | 5445 if (has_mbyte) |
5566 { | 5446 { |
5567 char_u *line = | 5447 char_u *line = |
5568 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); | 5448 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); |
5569 | 5449 |
5570 rp->rs_un.regsave.rs_u.pos.col -= | 5450 rp->rs_un.regsave.rs_u.pos.col -= |
5571 (*mb_head_off)(line, line | 5451 (*mb_head_off)(line, line |
5572 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; | 5452 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; |
5573 } | 5453 } |
5574 else | 5454 else |
5575 #endif | |
5576 --rp->rs_un.regsave.rs_u.pos.col; | 5455 --rp->rs_un.regsave.rs_u.pos.col; |
5577 } | 5456 } |
5578 } | 5457 } |
5579 else | 5458 else |
5580 { | 5459 { |
5950 case WHITE + ADD_NL: | 5829 case WHITE + ADD_NL: |
5951 testval = mask = RI_WHITE; | 5830 testval = mask = RI_WHITE; |
5952 do_class: | 5831 do_class: |
5953 while (count < maxcount) | 5832 while (count < maxcount) |
5954 { | 5833 { |
5955 #ifdef FEAT_MBYTE | |
5956 int l; | 5834 int l; |
5957 #endif | 5835 |
5958 if (*scan == NUL) | 5836 if (*scan == NUL) |
5959 { | 5837 { |
5960 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline | 5838 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline |
5961 || rex.reg_line_lbr) | 5839 || rex.reg_line_lbr) |
5962 break; | 5840 break; |
5963 reg_nextline(); | 5841 reg_nextline(); |
5964 scan = rex.input; | 5842 scan = rex.input; |
5965 if (got_int) | 5843 if (got_int) |
5966 break; | 5844 break; |
5967 } | 5845 } |
5968 #ifdef FEAT_MBYTE | |
5969 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) | 5846 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) |
5970 { | 5847 { |
5971 if (testval != 0) | 5848 if (testval != 0) |
5972 break; | 5849 break; |
5973 scan += l; | 5850 scan += l; |
5974 } | 5851 } |
5975 #endif | |
5976 else if ((class_tab[*scan] & mask) == testval) | 5852 else if ((class_tab[*scan] & mask) == testval) |
5977 ++scan; | 5853 ++scan; |
5978 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | 5854 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) |
5979 ++scan; | 5855 ++scan; |
5980 else | 5856 else |
6079 } | 5955 } |
6080 } | 5956 } |
6081 break; | 5957 break; |
6082 } | 5958 } |
6083 | 5959 |
6084 #ifdef FEAT_MBYTE | |
6085 case MULTIBYTECODE: | 5960 case MULTIBYTECODE: |
6086 { | 5961 { |
6087 int i, len, cf = 0; | 5962 int i, len, cf = 0; |
6088 | 5963 |
6089 /* Safety check (just in case 'encoding' was changed since | 5964 /* Safety check (just in case 'encoding' was changed since |
6104 ++count; | 5979 ++count; |
6105 } | 5980 } |
6106 } | 5981 } |
6107 } | 5982 } |
6108 break; | 5983 break; |
6109 #endif | |
6110 | 5984 |
6111 case ANYOF: | 5985 case ANYOF: |
6112 case ANYOF + ADD_NL: | 5986 case ANYOF + ADD_NL: |
6113 testval = TRUE; | 5987 testval = TRUE; |
6114 /* FALLTHROUGH */ | 5988 /* FALLTHROUGH */ |
6115 | 5989 |
6116 case ANYBUT: | 5990 case ANYBUT: |
6117 case ANYBUT + ADD_NL: | 5991 case ANYBUT + ADD_NL: |
6118 while (count < maxcount) | 5992 while (count < maxcount) |
6119 { | 5993 { |
6120 #ifdef FEAT_MBYTE | |
6121 int len; | 5994 int len; |
6122 #endif | 5995 |
6123 if (*scan == NUL) | 5996 if (*scan == NUL) |
6124 { | 5997 { |
6125 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline | 5998 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline |
6126 || rex.reg_line_lbr) | 5999 || rex.reg_line_lbr) |
6127 break; | 6000 break; |
6130 if (got_int) | 6003 if (got_int) |
6131 break; | 6004 break; |
6132 } | 6005 } |
6133 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) | 6006 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) |
6134 ++scan; | 6007 ++scan; |
6135 #ifdef FEAT_MBYTE | |
6136 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) | 6008 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) |
6137 { | 6009 { |
6138 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) | 6010 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) |
6139 break; | 6011 break; |
6140 scan += len; | 6012 scan += len; |
6141 } | 6013 } |
6142 #endif | |
6143 else | 6014 else |
6144 { | 6015 { |
6145 if ((cstrchr(opnd, *scan) == NULL) == testval) | 6016 if ((cstrchr(opnd, *scan) == NULL) == testval) |
6146 break; | 6017 break; |
6147 ++scan; | 6018 ++scan; |
6953 case BRACE_COMPLEX + 8: | 6824 case BRACE_COMPLEX + 8: |
6954 case BRACE_COMPLEX + 9: | 6825 case BRACE_COMPLEX + 9: |
6955 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); | 6826 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); |
6956 p = NULL; | 6827 p = NULL; |
6957 break; | 6828 break; |
6958 #ifdef FEAT_MBYTE | |
6959 case MULTIBYTECODE: | 6829 case MULTIBYTECODE: |
6960 p = "MULTIBYTECODE"; | 6830 p = "MULTIBYTECODE"; |
6961 break; | 6831 break; |
6962 #endif | |
6963 case NEWL: | 6832 case NEWL: |
6964 p = "NEWL"; | 6833 p = "NEWL"; |
6965 break; | 6834 break; |
6966 default: | 6835 default: |
6967 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); | 6836 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); |
6987 return FAIL; | 6856 return FAIL; |
6988 } | 6857 } |
6989 return OK; | 6858 return OK; |
6990 } | 6859 } |
6991 | 6860 |
6992 #ifdef FEAT_MBYTE | |
6993 typedef struct | 6861 typedef struct |
6994 { | 6862 { |
6995 int a, b, c; | 6863 int a, b, c; |
6996 } decomp_T; | 6864 } decomp_T; |
6997 | 6865 |
7065 { | 6933 { |
7066 *c1 = c; | 6934 *c1 = c; |
7067 *c2 = *c3 = 0; | 6935 *c2 = *c3 = 0; |
7068 } | 6936 } |
7069 } | 6937 } |
7070 #endif | |
7071 | 6938 |
7072 /* | 6939 /* |
7073 * Compare two strings, ignore case if rex.reg_ic set. | 6940 * Compare two strings, ignore case if rex.reg_ic set. |
7074 * Return 0 if strings match, non-zero otherwise. | 6941 * Return 0 if strings match, non-zero otherwise. |
7075 * Correct the length "*n" when composing characters are ignored. | 6942 * Correct the length "*n" when composing characters are ignored. |
7082 if (!rex.reg_ic) | 6949 if (!rex.reg_ic) |
7083 result = STRNCMP(s1, s2, *n); | 6950 result = STRNCMP(s1, s2, *n); |
7084 else | 6951 else |
7085 result = MB_STRNICMP(s1, s2, *n); | 6952 result = MB_STRNICMP(s1, s2, *n); |
7086 | 6953 |
7087 #ifdef FEAT_MBYTE | |
7088 /* if it failed and it's utf8 and we want to combineignore: */ | 6954 /* if it failed and it's utf8 and we want to combineignore: */ |
7089 if (result != 0 && enc_utf8 && rex.reg_icombine) | 6955 if (result != 0 && enc_utf8 && rex.reg_icombine) |
7090 { | 6956 { |
7091 char_u *str1, *str2; | 6957 char_u *str1, *str2; |
7092 int c1, c2, c11, c12; | 6958 int c1, c2, c11, c12; |
7119 } | 6985 } |
7120 result = c2 - c1; | 6986 result = c2 - c1; |
7121 if (result == 0) | 6987 if (result == 0) |
7122 *n = (int)(str2 - s2); | 6988 *n = (int)(str2 - s2); |
7123 } | 6989 } |
7124 #endif | |
7125 | 6990 |
7126 return result; | 6991 return result; |
7127 } | 6992 } |
7128 | 6993 |
7129 /* | 6994 /* |
7133 cstrchr(char_u *s, int c) | 6998 cstrchr(char_u *s, int c) |
7134 { | 6999 { |
7135 char_u *p; | 7000 char_u *p; |
7136 int cc; | 7001 int cc; |
7137 | 7002 |
7138 if (!rex.reg_ic | 7003 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1)) |
7139 #ifdef FEAT_MBYTE | |
7140 || (!enc_utf8 && mb_char2len(c) > 1) | |
7141 #endif | |
7142 ) | |
7143 return vim_strchr(s, c); | 7004 return vim_strchr(s, c); |
7144 | 7005 |
7145 /* tolower() and toupper() can be slow, comparing twice should be a lot | 7006 /* tolower() and toupper() can be slow, comparing twice should be a lot |
7146 * faster (esp. when using MS Visual C++!). | 7007 * faster (esp. when using MS Visual C++!). |
7147 * For UTF-8 need to use folded case. */ | 7008 * For UTF-8 need to use folded case. */ |
7148 #ifdef FEAT_MBYTE | |
7149 if (enc_utf8 && c > 0x80) | 7009 if (enc_utf8 && c > 0x80) |
7150 cc = utf_fold(c); | 7010 cc = utf_fold(c); |
7151 else | 7011 else |
7152 #endif | |
7153 if (MB_ISUPPER(c)) | 7012 if (MB_ISUPPER(c)) |
7154 cc = MB_TOLOWER(c); | 7013 cc = MB_TOLOWER(c); |
7155 else if (MB_ISLOWER(c)) | 7014 else if (MB_ISLOWER(c)) |
7156 cc = MB_TOUPPER(c); | 7015 cc = MB_TOUPPER(c); |
7157 else | 7016 else |
7158 return vim_strchr(s, c); | 7017 return vim_strchr(s, c); |
7159 | 7018 |
7160 #ifdef FEAT_MBYTE | |
7161 if (has_mbyte) | 7019 if (has_mbyte) |
7162 { | 7020 { |
7163 for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) | 7021 for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) |
7164 { | 7022 { |
7165 if (enc_utf8 && c > 0x80) | 7023 if (enc_utf8 && c > 0x80) |
7170 else if (*p == c || *p == cc) | 7028 else if (*p == c || *p == cc) |
7171 return p; | 7029 return p; |
7172 } | 7030 } |
7173 } | 7031 } |
7174 else | 7032 else |
7175 #endif | |
7176 /* Faster version for when there are no multi-byte characters. */ | 7033 /* Faster version for when there are no multi-byte characters. */ |
7177 for (p = s; *p != NUL; ++p) | 7034 for (p = s; *p != NUL; ++p) |
7178 if (*p == c || *p == cc) | 7035 if (*p == c || *p == cc) |
7179 return p; | 7036 return p; |
7180 | 7037 |
7283 } | 7140 } |
7284 else | 7141 else |
7285 { | 7142 { |
7286 if (*p == '\\' && p[1]) /* skip escaped characters */ | 7143 if (*p == '\\' && p[1]) /* skip escaped characters */ |
7287 ++p; | 7144 ++p; |
7288 #ifdef FEAT_MBYTE | |
7289 if (has_mbyte) | 7145 if (has_mbyte) |
7290 p += (*mb_ptr2len)(p) - 1; | 7146 p += (*mb_ptr2len)(p) - 1; |
7291 #endif | |
7292 } | 7147 } |
7293 } | 7148 } |
7294 | 7149 |
7295 vim_free(reg_prev_sub); | 7150 vim_free(reg_prev_sub); |
7296 if (newsub != source) /* newsub was allocated, just keep it */ | 7151 if (newsub != source) /* newsub was allocated, just keep it */ |
7680 ++dst; | 7535 ++dst; |
7681 } | 7536 } |
7682 c = *src++; | 7537 c = *src++; |
7683 } | 7538 } |
7684 } | 7539 } |
7685 #ifdef FEAT_MBYTE | |
7686 else if (has_mbyte) | 7540 else if (has_mbyte) |
7687 c = mb_ptr2char(src - 1); | 7541 c = mb_ptr2char(src - 1); |
7688 #endif | |
7689 | 7542 |
7690 /* Write to buffer, if copy is set. */ | 7543 /* Write to buffer, if copy is set. */ |
7691 if (func_one != (fptr_T)NULL) | 7544 if (func_one != (fptr_T)NULL) |
7692 /* Turbo C complains without the typecast */ | 7545 /* Turbo C complains without the typecast */ |
7693 func_one = (fptr_T)(func_one(&cc, c)); | 7546 func_one = (fptr_T)(func_one(&cc, c)); |
7695 /* Turbo C complains without the typecast */ | 7548 /* Turbo C complains without the typecast */ |
7696 func_all = (fptr_T)(func_all(&cc, c)); | 7549 func_all = (fptr_T)(func_all(&cc, c)); |
7697 else /* just copy */ | 7550 else /* just copy */ |
7698 cc = c; | 7551 cc = c; |
7699 | 7552 |
7700 #ifdef FEAT_MBYTE | |
7701 if (has_mbyte) | 7553 if (has_mbyte) |
7702 { | 7554 { |
7703 int totlen = mb_ptr2len(src - 1); | 7555 int totlen = mb_ptr2len(src - 1); |
7704 | 7556 |
7705 if (copy) | 7557 if (copy) |
7719 dst += totlen - clen; | 7571 dst += totlen - clen; |
7720 } | 7572 } |
7721 } | 7573 } |
7722 src += totlen - 1; | 7574 src += totlen - 1; |
7723 } | 7575 } |
7724 else | 7576 else if (copy) |
7725 #endif | |
7726 if (copy) | |
7727 *dst = cc; | 7577 *dst = cc; |
7728 dst++; | 7578 dst++; |
7729 } | 7579 } |
7730 else | 7580 else |
7731 { | 7581 { |
7797 } | 7647 } |
7798 dst += 2; | 7648 dst += 2; |
7799 } | 7649 } |
7800 else | 7650 else |
7801 { | 7651 { |
7802 #ifdef FEAT_MBYTE | |
7803 if (has_mbyte) | 7652 if (has_mbyte) |
7804 c = mb_ptr2char(s); | 7653 c = mb_ptr2char(s); |
7805 else | 7654 else |
7806 #endif | |
7807 c = *s; | 7655 c = *s; |
7808 | 7656 |
7809 if (func_one != (fptr_T)NULL) | 7657 if (func_one != (fptr_T)NULL) |
7810 /* Turbo C complains without the typecast */ | 7658 /* Turbo C complains without the typecast */ |
7811 func_one = (fptr_T)(func_one(&cc, c)); | 7659 func_one = (fptr_T)(func_one(&cc, c)); |
7813 /* Turbo C complains without the typecast */ | 7661 /* Turbo C complains without the typecast */ |
7814 func_all = (fptr_T)(func_all(&cc, c)); | 7662 func_all = (fptr_T)(func_all(&cc, c)); |
7815 else /* just copy */ | 7663 else /* just copy */ |
7816 cc = c; | 7664 cc = c; |
7817 | 7665 |
7818 #ifdef FEAT_MBYTE | |
7819 if (has_mbyte) | 7666 if (has_mbyte) |
7820 { | 7667 { |
7821 int l; | 7668 int l; |
7822 | 7669 |
7823 /* Copy composing characters separately, one | 7670 /* Copy composing characters separately, one |
7831 len -= l; | 7678 len -= l; |
7832 if (copy) | 7679 if (copy) |
7833 mb_char2bytes(cc, dst); | 7680 mb_char2bytes(cc, dst); |
7834 dst += mb_char2len(cc) - 1; | 7681 dst += mb_char2len(cc) - 1; |
7835 } | 7682 } |
7836 else | 7683 else if (copy) |
7837 #endif | |
7838 if (copy) | |
7839 *dst = cc; | 7684 *dst = cc; |
7840 dst++; | 7685 dst++; |
7841 } | 7686 } |
7842 | 7687 |
7843 ++s; | 7688 ++s; |