comparison src/regexp.c @ 15603:639b8318472c v8.1.0809

patch 8.1.0809: too many #ifdefs commit https://github.com/vim/vim/commit/a12a161b8ce09d024ed71c2134149fa323f8ee8e Author: Bram Moolenaar <Bram@vim.org> Date: Thu Jan 24 16:39:02 2019 +0100 patch 8.1.0809: too many #ifdefs Problem: Too many #ifdefs. Solution: Graduate FEAT_MBYTE, part 3.
author Bram Moolenaar <Bram@vim.org>
date Thu, 24 Jan 2019 16:45:06 +0100
parents d89c5b339c2a
children 2e2f07561f4b
comparison
equal deleted inserted replaced
15602:2ef2028868e2 15603:639b8318472c
357 357
358 static int re_mult_next(char *what); 358 static int re_mult_next(char *what);
359 359
360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s["); 360 static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
361 static char_u e_reverse_range[] = N_("E944: Reverse range in character class"); 361 static char_u e_reverse_range[] = N_("E944: Reverse range in character class");
362 #ifdef FEAT_MBYTE
363 static char_u e_large_class[] = N_("E945: Range too large in character class"); 362 static char_u e_large_class[] = N_("E945: Range too large in character class");
364 #endif
365 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%("); 363 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
366 static char_u e_unmatchedp[] = N_("E54: Unmatched %s("); 364 static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
367 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)"); 365 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
368 #ifdef FEAT_SYN_HL 366 #ifdef FEAT_SYN_HL
369 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here"); 367 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
559 class_tab[' '] |= RI_WHITE; 557 class_tab[' '] |= RI_WHITE;
560 class_tab['\t'] |= RI_WHITE; 558 class_tab['\t'] |= RI_WHITE;
561 done = TRUE; 559 done = TRUE;
562 } 560 }
563 561
564 #ifdef FEAT_MBYTE 562 #define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
565 # define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT)) 563 #define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
566 # define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX)) 564 #define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
567 # define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL)) 565 #define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
568 # define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD)) 566 #define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
569 # define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD)) 567 #define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
570 # define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA)) 568 #define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
571 # define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER)) 569 #define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
572 # define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER)) 570 #define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
573 # define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
574 #else
575 # define ri_digit(c) (class_tab[c] & RI_DIGIT)
576 # define ri_hex(c) (class_tab[c] & RI_HEX)
577 # define ri_octal(c) (class_tab[c] & RI_OCTAL)
578 # define ri_word(c) (class_tab[c] & RI_WORD)
579 # define ri_head(c) (class_tab[c] & RI_HEAD)
580 # define ri_alpha(c) (class_tab[c] & RI_ALPHA)
581 # define ri_lower(c) (class_tab[c] & RI_LOWER)
582 # define ri_upper(c) (class_tab[c] & RI_UPPER)
583 # define ri_white(c) (class_tab[c] & RI_WHITE)
584 #endif
585 571
586 /* flags for regflags */ 572 /* flags for regflags */
587 #define RF_ICASE 1 /* ignore case */ 573 #define RF_ICASE 1 /* ignore case */
588 #define RF_NOICASE 2 /* don't ignore case */ 574 #define RF_NOICASE 2 /* don't ignore case */
589 #define RF_HASNL 4 /* can match a NL */ 575 #define RF_HASNL 4 /* can match a NL */
696 static char_u *regbranch(int *flagp); 682 static char_u *regbranch(int *flagp);
697 static char_u *regconcat(int *flagp); 683 static char_u *regconcat(int *flagp);
698 static char_u *regpiece(int *); 684 static char_u *regpiece(int *);
699 static char_u *regatom(int *); 685 static char_u *regatom(int *);
700 static char_u *regnode(int); 686 static char_u *regnode(int);
701 #ifdef FEAT_MBYTE
702 static int use_multibytecode(int c); 687 static int use_multibytecode(int c);
703 #endif
704 static int prog_magic_wrong(void); 688 static int prog_magic_wrong(void);
705 static char_u *regnext(char_u *); 689 static char_u *regnext(char_u *);
706 static void regc(int b); 690 static void regc(int b);
707 #ifdef FEAT_MBYTE
708 static void regmbc(int c); 691 static void regmbc(int c);
709 # define REGMBC(x) regmbc(x); 692 #define REGMBC(x) regmbc(x);
710 # define CASEMBC(x) case x: 693 #define CASEMBC(x) case x:
711 #else
712 # define regmbc(c) regc(c)
713 # define REGMBC(x)
714 # define CASEMBC(x)
715 #endif
716 static void reginsert(int, char_u *); 694 static void reginsert(int, char_u *);
717 static void reginsert_nr(int op, long val, char_u *opnd); 695 static void reginsert_nr(int op, long val, char_u *opnd);
718 static void reginsert_limits(int, long, long, char_u *); 696 static void reginsert_limits(int, long, long, char_u *);
719 static char_u *re_put_long(char_u *pr, long_u val); 697 static char_u *re_put_long(char_u *pr, long_u val);
720 static int read_limits(long *, long *); 698 static int read_limits(long *, long *);
745 int l = 1; 723 int l = 1;
746 char_u *p = *pp; 724 char_u *p = *pp;
747 725
748 if (p[1] == '=') 726 if (p[1] == '=')
749 { 727 {
750 #ifdef FEAT_MBYTE
751 if (has_mbyte) 728 if (has_mbyte)
752 l = (*mb_ptr2len)(p + 2); 729 l = (*mb_ptr2len)(p + 2);
753 #endif
754 if (p[l + 2] == '=' && p[l + 3] == ']') 730 if (p[l + 2] == '=' && p[l + 3] == ']')
755 { 731 {
756 #ifdef FEAT_MBYTE
757 if (has_mbyte) 732 if (has_mbyte)
758 c = mb_ptr2char(p + 2); 733 c = mb_ptr2char(p + 2);
759 else 734 else
760 #endif
761 c = p[2]; 735 c = p[2];
762 *pp += l + 4; 736 *pp += l + 4;
763 return c; 737 return c;
764 } 738 }
765 } 739 }
796 * NOTE: When changing this function, also change nfa_emit_equi_class() 770 * NOTE: When changing this function, also change nfa_emit_equi_class()
797 */ 771 */
798 static void 772 static void
799 reg_equi_class(int c) 773 reg_equi_class(int c)
800 { 774 {
801 #ifdef FEAT_MBYTE
802 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 775 if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
803 || STRCMP(p_enc, "iso-8859-15") == 0) 776 || STRCMP(p_enc, "iso-8859-15") == 0)
804 #endif
805 { 777 {
806 #ifdef EBCDIC 778 #ifdef EBCDIC
807 int i; 779 int i;
808 780
809 /* This might be slower than switch/case below. */ 781 /* This might be slower than switch/case below. */
1132 int l = 1; 1104 int l = 1;
1133 char_u *p = *pp; 1105 char_u *p = *pp;
1134 1106
1135 if (p[0] != NUL && p[1] == '.') 1107 if (p[0] != NUL && p[1] == '.')
1136 { 1108 {
1137 #ifdef FEAT_MBYTE
1138 if (has_mbyte) 1109 if (has_mbyte)
1139 l = (*mb_ptr2len)(p + 2); 1110 l = (*mb_ptr2len)(p + 2);
1140 #endif
1141 if (p[l + 2] == '.' && p[l + 3] == ']') 1111 if (p[l + 2] == '.' && p[l + 3] == ']')
1142 { 1112 {
1143 #ifdef FEAT_MBYTE
1144 if (has_mbyte) 1113 if (has_mbyte)
1145 c = mb_ptr2char(p + 2); 1114 c = mb_ptr2char(p + 2);
1146 else 1115 else
1147 #endif
1148 c = p[2]; 1116 c = p[2];
1149 *pp += l + 4; 1117 *pp += l + 4;
1150 return c; 1118 return c;
1151 } 1119 }
1152 } 1120 }
1169 * The returned pointer is on the matching ']', or the terminating NUL. 1137 * The returned pointer is on the matching ']', or the terminating NUL.
1170 */ 1138 */
1171 static char_u * 1139 static char_u *
1172 skip_anyof(char_u *p) 1140 skip_anyof(char_u *p)
1173 { 1141 {
1174 #ifdef FEAT_MBYTE
1175 int l; 1142 int l;
1176 #endif
1177 1143
1178 if (*p == '^') /* Complement of range. */ 1144 if (*p == '^') /* Complement of range. */
1179 ++p; 1145 ++p;
1180 if (*p == ']' || *p == '-') 1146 if (*p == ']' || *p == '-')
1181 ++p; 1147 ++p;
1182 while (*p != NUL && *p != ']') 1148 while (*p != NUL && *p != ']')
1183 { 1149 {
1184 #ifdef FEAT_MBYTE
1185 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) 1150 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
1186 p += l; 1151 p += l;
1187 else 1152 else
1188 #endif
1189 if (*p == '-') 1153 if (*p == '-')
1190 { 1154 {
1191 ++p; 1155 ++p;
1192 if (*p != ']' && *p != NUL) 1156 if (*p != ']' && *p != NUL)
1193 MB_PTR_ADV(p); 1157 MB_PTR_ADV(p);
1393 scan = regnext(scan); 1357 scan = regnext(scan);
1394 } 1358 }
1395 1359
1396 if (OP(scan) == EXACTLY) 1360 if (OP(scan) == EXACTLY)
1397 { 1361 {
1398 #ifdef FEAT_MBYTE
1399 if (has_mbyte) 1362 if (has_mbyte)
1400 r->regstart = (*mb_ptr2char)(OPERAND(scan)); 1363 r->regstart = (*mb_ptr2char)(OPERAND(scan));
1401 else 1364 else
1402 #endif
1403 r->regstart = *OPERAND(scan); 1365 r->regstart = *OPERAND(scan);
1404 } 1366 }
1405 else if ((OP(scan) == BOW 1367 else if ((OP(scan) == BOW
1406 || OP(scan) == EOW 1368 || OP(scan) == EOW
1407 || OP(scan) == NOTHING 1369 || OP(scan) == NOTHING
1408 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN 1370 || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1409 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) 1371 || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1410 && OP(regnext(scan)) == EXACTLY) 1372 && OP(regnext(scan)) == EXACTLY)
1411 { 1373 {
1412 #ifdef FEAT_MBYTE
1413 if (has_mbyte) 1374 if (has_mbyte)
1414 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan))); 1375 r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1415 else 1376 else
1416 #endif
1417 r->regstart = *OPERAND(regnext(scan)); 1377 r->regstart = *OPERAND(regnext(scan));
1418 } 1378 }
1419 1379
1420 /* 1380 /*
1421 * If there's something expensive in the r.e., find the longest 1381 * If there's something expensive in the r.e., find the longest
1694 case Magic('&'): 1654 case Magic('&'):
1695 case Magic(')'): 1655 case Magic(')'):
1696 cont = FALSE; 1656 cont = FALSE;
1697 break; 1657 break;
1698 case Magic('Z'): 1658 case Magic('Z'):
1699 #ifdef FEAT_MBYTE
1700 regflags |= RF_ICOMBINE; 1659 regflags |= RF_ICOMBINE;
1701 #endif
1702 skipchr_keepstart(); 1660 skipchr_keepstart();
1703 break; 1661 break;
1704 case Magic('c'): 1662 case Magic('c'):
1705 regflags |= RF_ICASE; 1663 regflags |= RF_ICASE;
1706 skipchr_keepstart(); 1664 skipchr_keepstart();
2006 case Magic('u'): 1964 case Magic('u'):
2007 case Magic('U'): 1965 case Magic('U'):
2008 p = vim_strchr(classchars, no_Magic(c)); 1966 p = vim_strchr(classchars, no_Magic(c));
2009 if (p == NULL) 1967 if (p == NULL)
2010 EMSG_RET_NULL(_("E63: invalid use of \\_")); 1968 EMSG_RET_NULL(_("E63: invalid use of \\_"));
2011 #ifdef FEAT_MBYTE 1969
2012 /* When '.' is followed by a composing char ignore the dot, so that 1970 /* When '.' is followed by a composing char ignore the dot, so that
2013 * the composing char is matched here. */ 1971 * the composing char is matched here. */
2014 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) 1972 if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2015 { 1973 {
2016 c = getchr(); 1974 c = getchr();
2017 goto do_multibyte; 1975 goto do_multibyte;
2018 } 1976 }
2019 #endif
2020 ret = regnode(classcodes[p - classchars] + extra); 1977 ret = regnode(classcodes[p - classchars] + extra);
2021 *flagp |= HASWIDTH | SIMPLE; 1978 *flagp |= HASWIDTH | SIMPLE;
2022 break; 1979 break;
2023 1980
2024 case Magic('n'): 1981 case Magic('n'):
2266 2223
2267 if (i < 0) 2224 if (i < 0)
2268 EMSG2_RET_NULL( 2225 EMSG2_RET_NULL(
2269 _("E678: Invalid character after %s%%[dxouU]"), 2226 _("E678: Invalid character after %s%%[dxouU]"),
2270 reg_magic == MAGIC_ALL); 2227 reg_magic == MAGIC_ALL);
2271 #ifdef FEAT_MBYTE
2272 if (use_multibytecode(i)) 2228 if (use_multibytecode(i))
2273 ret = regnode(MULTIBYTECODE); 2229 ret = regnode(MULTIBYTECODE);
2274 else 2230 else
2275 #endif
2276 ret = regnode(EXACTLY); 2231 ret = regnode(EXACTLY);
2277 if (i == 0) 2232 if (i == 0)
2278 regc(0x0a); 2233 regc(0x0a);
2279 else 2234 else
2280 #ifdef FEAT_MBYTE
2281 regmbc(i); 2235 regmbc(i);
2282 #else
2283 regc(i);
2284 #endif
2285 regc(NUL); 2236 regc(NUL);
2286 *flagp |= HASWIDTH; 2237 *flagp |= HASWIDTH;
2287 break; 2238 break;
2288 } 2239 }
2289 2240
2401 endc = 0; 2352 endc = 0;
2402 if (*regparse == '[') 2353 if (*regparse == '[')
2403 endc = get_coll_element(&regparse); 2354 endc = get_coll_element(&regparse);
2404 if (endc == 0) 2355 if (endc == 0)
2405 { 2356 {
2406 #ifdef FEAT_MBYTE
2407 if (has_mbyte) 2357 if (has_mbyte)
2408 endc = mb_ptr2char_adv(&regparse); 2358 endc = mb_ptr2char_adv(&regparse);
2409 else 2359 else
2410 #endif
2411 endc = *regparse++; 2360 endc = *regparse++;
2412 } 2361 }
2413 2362
2414 /* Handle \o40, \x20 and \u20AC style sequences */ 2363 /* Handle \o40, \x20 and \u20AC style sequences */
2415 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl) 2364 if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
2416 endc = coll_get_char(); 2365 endc = coll_get_char();
2417 2366
2418 if (startc > endc) 2367 if (startc > endc)
2419 EMSG_RET_NULL(_(e_reverse_range)); 2368 EMSG_RET_NULL(_(e_reverse_range));
2420 #ifdef FEAT_MBYTE
2421 if (has_mbyte && ((*mb_char2len)(startc) > 1 2369 if (has_mbyte && ((*mb_char2len)(startc) > 1
2422 || (*mb_char2len)(endc) > 1)) 2370 || (*mb_char2len)(endc) > 1))
2423 { 2371 {
2424 /* Limit to a range of 256 chars. */ 2372 /* Limit to a range of 256 chars. */
2425 if (endc > startc + 256) 2373 if (endc > startc + 256)
2426 EMSG_RET_NULL(_(e_large_class)); 2374 EMSG_RET_NULL(_(e_large_class));
2427 while (++startc <= endc) 2375 while (++startc <= endc)
2428 regmbc(startc); 2376 regmbc(startc);
2429 } 2377 }
2430 else 2378 else
2431 #endif
2432 { 2379 {
2433 #ifdef EBCDIC 2380 #ifdef EBCDIC
2434 int alpha_only = FALSE; 2381 int alpha_only = FALSE;
2435 2382
2436 /* for alphabetical range skip the gaps 2383 /* for alphabetical range skip the gaps
2486 { 2433 {
2487 startc = coll_get_char(); 2434 startc = coll_get_char();
2488 if (startc == 0) 2435 if (startc == 0)
2489 regc(0x0a); 2436 regc(0x0a);
2490 else 2437 else
2491 #ifdef FEAT_MBYTE
2492 regmbc(startc); 2438 regmbc(startc);
2493 #else
2494 regc(startc);
2495 #endif
2496 } 2439 }
2497 else 2440 else
2498 { 2441 {
2499 startc = backslash_trans(*regparse++); 2442 startc = backslash_trans(*regparse++);
2500 regc(startc); 2443 regc(startc);
2604 break; 2547 break;
2605 } 2548 }
2606 } 2549 }
2607 else 2550 else
2608 { 2551 {
2609 #ifdef FEAT_MBYTE
2610 if (has_mbyte) 2552 if (has_mbyte)
2611 { 2553 {
2612 int len; 2554 int len;
2613 2555
2614 /* produce a multibyte character, including any 2556 /* produce a multibyte character, including any
2619 startc = -1; /* composing chars */ 2561 startc = -1; /* composing chars */
2620 while (--len >= 0) 2562 while (--len >= 0)
2621 regc(*regparse++); 2563 regc(*regparse++);
2622 } 2564 }
2623 else 2565 else
2624 #endif
2625 { 2566 {
2626 startc = *regparse++; 2567 startc = *regparse++;
2627 regc(startc); 2568 regc(startc);
2628 } 2569 }
2629 } 2570 }
2643 2584
2644 default: 2585 default:
2645 { 2586 {
2646 int len; 2587 int len;
2647 2588
2648 #ifdef FEAT_MBYTE
2649 /* A multi-byte character is handled as a separate atom if it's 2589 /* A multi-byte character is handled as a separate atom if it's
2650 * before a multi and when it's a composing char. */ 2590 * before a multi and when it's a composing char. */
2651 if (use_multibytecode(c)) 2591 if (use_multibytecode(c))
2652 { 2592 {
2653 do_multibyte: 2593 do_multibyte:
2654 ret = regnode(MULTIBYTECODE); 2594 ret = regnode(MULTIBYTECODE);
2655 regmbc(c); 2595 regmbc(c);
2656 *flagp |= HASWIDTH | SIMPLE; 2596 *flagp |= HASWIDTH | SIMPLE;
2657 break; 2597 break;
2658 } 2598 }
2659 #endif
2660 2599
2661 ret = regnode(EXACTLY); 2600 ret = regnode(EXACTLY);
2662 2601
2663 /* 2602 /*
2664 * Append characters as long as: 2603 * Append characters as long as:
2673 || (re_multi_type(peekchr()) == NOT_MULTI 2612 || (re_multi_type(peekchr()) == NOT_MULTI
2674 && !one_exactly 2613 && !one_exactly
2675 && !is_Magic(c))); ++len) 2614 && !is_Magic(c))); ++len)
2676 { 2615 {
2677 c = no_Magic(c); 2616 c = no_Magic(c);
2678 #ifdef FEAT_MBYTE
2679 if (has_mbyte) 2617 if (has_mbyte)
2680 { 2618 {
2681 regmbc(c); 2619 regmbc(c);
2682 if (enc_utf8) 2620 if (enc_utf8)
2683 { 2621 {
2693 skipchr(); 2631 skipchr();
2694 } 2632 }
2695 } 2633 }
2696 } 2634 }
2697 else 2635 else
2698 #endif
2699 regc(c); 2636 regc(c);
2700 c = getchr(); 2637 c = getchr();
2701 } 2638 }
2702 ungetchr(); 2639 ungetchr();
2703 2640
2710 } 2647 }
2711 2648
2712 return ret; 2649 return ret;
2713 } 2650 }
2714 2651
2715 #ifdef FEAT_MBYTE
2716 /* 2652 /*
2717 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for 2653 * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2718 * character "c". 2654 * character "c".
2719 */ 2655 */
2720 static int 2656 static int
2722 { 2658 {
2723 return has_mbyte && (*mb_char2len)(c) > 1 2659 return has_mbyte && (*mb_char2len)(c) > 1
2724 && (re_multi_type(peekchr()) != NOT_MULTI 2660 && (re_multi_type(peekchr()) != NOT_MULTI
2725 || (enc_utf8 && utf_iscomposing(c))); 2661 || (enc_utf8 && utf_iscomposing(c)));
2726 } 2662 }
2727 #endif
2728 2663
2729 /* 2664 /*
2730 * Emit a node. 2665 * Emit a node.
2731 * Return pointer to generated code. 2666 * Return pointer to generated code.
2732 */ 2667 */
2757 regsize++; 2692 regsize++;
2758 else 2693 else
2759 *regcode++ = b; 2694 *regcode++ = b;
2760 } 2695 }
2761 2696
2762 #ifdef FEAT_MBYTE
2763 /* 2697 /*
2764 * Emit (if appropriate) a multi-byte character of code 2698 * Emit (if appropriate) a multi-byte character of code
2765 */ 2699 */
2766 static void 2700 static void
2767 regmbc(int c) 2701 regmbc(int c)
2771 if (regcode == JUST_CALC_SIZE) 2705 if (regcode == JUST_CALC_SIZE)
2772 regsize += (*mb_char2len)(c); 2706 regsize += (*mb_char2len)(c);
2773 else 2707 else
2774 regcode += (*mb_char2bytes)(c, regcode); 2708 regcode += (*mb_char2bytes)(c, regcode);
2775 } 2709 }
2776 #endif
2777 2710
2778 /* 2711 /*
2779 * Insert an operator in front of already-emitted operand 2712 * Insert an operator in front of already-emitted operand
2780 * 2713 *
2781 * Means relocating the operand. 2714 * Means relocating the operand.
3137 { 3070 {
3138 /* 3071 /*
3139 * Next character can never be (made) magic? 3072 * Next character can never be (made) magic?
3140 * Then backslashing it won't do anything. 3073 * Then backslashing it won't do anything.
3141 */ 3074 */
3142 #ifdef FEAT_MBYTE
3143 if (has_mbyte) 3075 if (has_mbyte)
3144 curchr = (*mb_ptr2char)(regparse + 1); 3076 curchr = (*mb_ptr2char)(regparse + 1);
3145 else 3077 else
3146 #endif
3147 curchr = c; 3078 curchr = c;
3148 } 3079 }
3149 break; 3080 break;
3150 } 3081 }
3151 3082
3152 #ifdef FEAT_MBYTE
3153 default: 3083 default:
3154 if (has_mbyte) 3084 if (has_mbyte)
3155 curchr = (*mb_ptr2char)(regparse); 3085 curchr = (*mb_ptr2char)(regparse);
3156 #endif
3157 } 3086 }
3158 } 3087 }
3159 3088
3160 return curchr; 3089 return curchr;
3161 } 3090 }
3171 prevchr_len = 1; 3100 prevchr_len = 1;
3172 else 3101 else
3173 prevchr_len = 0; 3102 prevchr_len = 0;
3174 if (regparse[prevchr_len] != NUL) 3103 if (regparse[prevchr_len] != NUL)
3175 { 3104 {
3176 #ifdef FEAT_MBYTE
3177 if (enc_utf8) 3105 if (enc_utf8)
3178 /* exclude composing chars that mb_ptr2len does include */ 3106 /* exclude composing chars that mb_ptr2len does include */
3179 prevchr_len += utf_ptr2len(regparse + prevchr_len); 3107 prevchr_len += utf_ptr2len(regparse + prevchr_len);
3180 else if (has_mbyte) 3108 else if (has_mbyte)
3181 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len); 3109 prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
3182 else 3110 else
3183 #endif
3184 ++prevchr_len; 3111 ++prevchr_len;
3185 } 3112 }
3186 regparse += prevchr_len; 3113 regparse += prevchr_len;
3187 prev_at_start = at_start; 3114 prev_at_start = at_start;
3188 at_start = FALSE; 3115 at_start = FALSE;
3536 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec(). 3463 /* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
3537 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern 3464 * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3538 * contains '\c' or '\C' the value is overruled. */ 3465 * contains '\c' or '\C' the value is overruled. */
3539 int reg_ic; 3466 int reg_ic;
3540 3467
3541 #ifdef FEAT_MBYTE
3542 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z 3468 /* Similar to "reg_ic", but only for 'combining' characters. Set with \Z
3543 * flag in the regexp. Defaults to false, always. */ 3469 * flag in the regexp. Defaults to false, always. */
3544 int reg_icombine; 3470 int reg_icombine;
3545 #endif
3546 3471
3547 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when 3472 /* Copy of "rmm_maxcol": maximum column to search for a match. Zero when
3548 * there is no maximum. */ 3473 * there is no maximum. */
3549 colnr_T reg_maxcol; 3474 colnr_T reg_maxcol;
3550 3475
3712 rex.reg_maxline = 0; 3637 rex.reg_maxline = 0;
3713 rex.reg_line_lbr = line_lbr; 3638 rex.reg_line_lbr = line_lbr;
3714 rex.reg_buf = curbuf; 3639 rex.reg_buf = curbuf;
3715 rex.reg_win = NULL; 3640 rex.reg_win = NULL;
3716 rex.reg_ic = rmp->rm_ic; 3641 rex.reg_ic = rmp->rm_ic;
3717 #ifdef FEAT_MBYTE
3718 rex.reg_icombine = FALSE; 3642 rex.reg_icombine = FALSE;
3719 #endif
3720 rex.reg_maxcol = 0; 3643 rex.reg_maxcol = 0;
3721 3644
3722 return bt_regexec_both(line, col, NULL, NULL); 3645 return bt_regexec_both(line, col, NULL, NULL);
3723 } 3646 }
3724 3647
3746 rex.reg_win = win; 3669 rex.reg_win = win;
3747 rex.reg_firstlnum = lnum; 3670 rex.reg_firstlnum = lnum;
3748 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum; 3671 rex.reg_maxline = rex.reg_buf->b_ml.ml_line_count - lnum;
3749 rex.reg_line_lbr = FALSE; 3672 rex.reg_line_lbr = FALSE;
3750 rex.reg_ic = rmp->rmm_ic; 3673 rex.reg_ic = rmp->rmm_ic;
3751 #ifdef FEAT_MBYTE
3752 rex.reg_icombine = FALSE; 3674 rex.reg_icombine = FALSE;
3753 #endif
3754 rex.reg_maxcol = rmp->rmm_maxcol; 3675 rex.reg_maxcol = rmp->rmm_maxcol;
3755 3676
3756 return bt_regexec_both(NULL, col, tm, timed_out); 3677 return bt_regexec_both(NULL, col, tm, timed_out);
3757 } 3678 }
3758 3679
3825 if (prog->regflags & RF_ICASE) 3746 if (prog->regflags & RF_ICASE)
3826 rex.reg_ic = TRUE; 3747 rex.reg_ic = TRUE;
3827 else if (prog->regflags & RF_NOICASE) 3748 else if (prog->regflags & RF_NOICASE)
3828 rex.reg_ic = FALSE; 3749 rex.reg_ic = FALSE;
3829 3750
3830 #ifdef FEAT_MBYTE
3831 /* If pattern contains "\Z" overrule value of rex.reg_icombine */ 3751 /* If pattern contains "\Z" overrule value of rex.reg_icombine */
3832 if (prog->regflags & RF_ICOMBINE) 3752 if (prog->regflags & RF_ICOMBINE)
3833 rex.reg_icombine = TRUE; 3753 rex.reg_icombine = TRUE;
3834 #endif
3835 3754
3836 /* If there is a "must appear" string, look for it. */ 3755 /* If there is a "must appear" string, look for it. */
3837 if (prog->regmust != NULL) 3756 if (prog->regmust != NULL)
3838 { 3757 {
3839 int c; 3758 int c;
3840 3759
3841 #ifdef FEAT_MBYTE
3842 if (has_mbyte) 3760 if (has_mbyte)
3843 c = (*mb_ptr2char)(prog->regmust); 3761 c = (*mb_ptr2char)(prog->regmust);
3844 else 3762 else
3845 #endif
3846 c = *prog->regmust; 3763 c = *prog->regmust;
3847 s = line + col; 3764 s = line + col;
3848 3765
3849 /* 3766 /*
3850 * This is used very often, esp. for ":global". Use three versions of 3767 * This is used very often, esp. for ":global". Use three versions of
3851 * the loop to avoid overhead of conditions. 3768 * the loop to avoid overhead of conditions.
3852 */ 3769 */
3853 if (!rex.reg_ic 3770 if (!rex.reg_ic && !has_mbyte)
3854 #ifdef FEAT_MBYTE
3855 && !has_mbyte
3856 #endif
3857 )
3858 while ((s = vim_strbyte(s, c)) != NULL) 3771 while ((s = vim_strbyte(s, c)) != NULL)
3859 { 3772 {
3860 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) 3773 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3861 break; /* Found it. */ 3774 break; /* Found it. */
3862 ++s; 3775 ++s;
3863 } 3776 }
3864 #ifdef FEAT_MBYTE
3865 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1)) 3777 else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3866 while ((s = vim_strchr(s, c)) != NULL) 3778 while ((s = vim_strchr(s, c)) != NULL)
3867 { 3779 {
3868 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) 3780 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3869 break; /* Found it. */ 3781 break; /* Found it. */
3870 MB_PTR_ADV(s); 3782 MB_PTR_ADV(s);
3871 } 3783 }
3872 #endif
3873 else 3784 else
3874 while ((s = cstrchr(s, c)) != NULL) 3785 while ((s = cstrchr(s, c)) != NULL)
3875 { 3786 {
3876 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) 3787 if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3877 break; /* Found it. */ 3788 break; /* Found it. */
3888 /* Simplest case: Anchored match need be tried only once. */ 3799 /* Simplest case: Anchored match need be tried only once. */
3889 if (prog->reganch) 3800 if (prog->reganch)
3890 { 3801 {
3891 int c; 3802 int c;
3892 3803
3893 #ifdef FEAT_MBYTE
3894 if (has_mbyte) 3804 if (has_mbyte)
3895 c = (*mb_ptr2char)(rex.line + col); 3805 c = (*mb_ptr2char)(rex.line + col);
3896 else 3806 else
3897 #endif
3898 c = rex.line[col]; 3807 c = rex.line[col];
3899 if (prog->regstart == NUL 3808 if (prog->regstart == NUL
3900 || prog->regstart == c 3809 || prog->regstart == c
3901 || (rex.reg_ic && (( 3810 || (rex.reg_ic
3902 #ifdef FEAT_MBYTE 3811 && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3903 (enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3904 || (c < 255 && prog->regstart < 255 && 3812 || (c < 255 && prog->regstart < 255 &&
3905 #endif
3906 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c))))) 3813 MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3907 retval = regtry(prog, col, tm, timed_out); 3814 retval = regtry(prog, col, tm, timed_out);
3908 else 3815 else
3909 retval = 0; 3816 retval = 0;
3910 } 3817 }
3918 { 3825 {
3919 if (prog->regstart != NUL) 3826 if (prog->regstart != NUL)
3920 { 3827 {
3921 /* Skip until the char we know it must start with. 3828 /* Skip until the char we know it must start with.
3922 * Used often, do some work to avoid call overhead. */ 3829 * Used often, do some work to avoid call overhead. */
3923 if (!rex.reg_ic 3830 if (!rex.reg_ic && !has_mbyte)
3924 #ifdef FEAT_MBYTE
3925 && !has_mbyte
3926 #endif
3927 )
3928 s = vim_strbyte(rex.line + col, prog->regstart); 3831 s = vim_strbyte(rex.line + col, prog->regstart);
3929 else 3832 else
3930 s = cstrchr(rex.line + col, prog->regstart); 3833 s = cstrchr(rex.line + col, prog->regstart);
3931 if (s == NULL) 3834 if (s == NULL)
3932 { 3835 {
3953 rex.lnum = 0; 3856 rex.lnum = 0;
3954 rex.line = reg_getline((linenr_T)0); 3857 rex.line = reg_getline((linenr_T)0);
3955 } 3858 }
3956 if (rex.line[col] == NUL) 3859 if (rex.line[col] == NUL)
3957 break; 3860 break;
3958 #ifdef FEAT_MBYTE
3959 if (has_mbyte) 3861 if (has_mbyte)
3960 col += (*mb_ptr2len)(rex.line + col); 3862 col += (*mb_ptr2len)(rex.line + col);
3961 else 3863 else
3962 #endif
3963 ++col; 3864 ++col;
3964 #ifdef FEAT_RELTIME 3865 #ifdef FEAT_RELTIME
3965 /* Check for timeout once in a twenty times to avoid overhead. */ 3866 /* Check for timeout once in a twenty times to avoid overhead. */
3966 if (tm != NULL && ++tm_count == 20) 3867 if (tm != NULL && ++tm_count == 20)
3967 { 3868 {
4114 } 4015 }
4115 #endif 4016 #endif
4116 return 1 + rex.lnum; 4017 return 1 + rex.lnum;
4117 } 4018 }
4118 4019
4119 #ifdef FEAT_MBYTE
4120 /* 4020 /*
4121 * Get class of previous character. 4021 * Get class of previous character.
4122 */ 4022 */
4123 static int 4023 static int
4124 reg_prev_class(void) 4024 reg_prev_class(void)
4125 { 4025 {
4126 if (rex.input > rex.line) 4026 if (rex.input > rex.line)
4127 return mb_get_class_buf(rex.input - 1 4027 return mb_get_class_buf(rex.input - 1
4128 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf); 4028 - (*mb_head_off)(rex.line, rex.input - 1), rex.reg_buf);
4129 return -1; 4029 return -1;
4130 } 4030 }
4131 #endif
4132 4031
4133 /* 4032 /*
4134 * Return TRUE if the current rex.input position matches the Visual area. 4033 * Return TRUE if the current rex.input position matches the Visual area.
4135 */ 4034 */
4136 static int 4035 static int
4337 } 4236 }
4338 else 4237 else
4339 { 4238 {
4340 if (WITH_NL(op)) 4239 if (WITH_NL(op))
4341 op -= ADD_NL; 4240 op -= ADD_NL;
4342 #ifdef FEAT_MBYTE
4343 if (has_mbyte) 4241 if (has_mbyte)
4344 c = (*mb_ptr2char)(rex.input); 4242 c = (*mb_ptr2char)(rex.input);
4345 else 4243 else
4346 #endif
4347 c = *rex.input; 4244 c = *rex.input;
4348 switch (op) 4245 switch (op)
4349 { 4246 {
4350 case BOL: 4247 case BOL:
4351 if (rex.input != rex.line) 4248 if (rex.input != rex.line)
4429 break; 4326 break;
4430 4327
4431 case BOW: /* \<word; rex.input points to w */ 4328 case BOW: /* \<word; rex.input points to w */
4432 if (c == NUL) /* Can't match at end of line */ 4329 if (c == NUL) /* Can't match at end of line */
4433 status = RA_NOMATCH; 4330 status = RA_NOMATCH;
4434 #ifdef FEAT_MBYTE
4435 else if (has_mbyte) 4331 else if (has_mbyte)
4436 { 4332 {
4437 int this_class; 4333 int this_class;
4438 4334
4439 /* Get class of current and previous char (if it exists). */ 4335 /* Get class of current and previous char (if it exists). */
4441 if (this_class <= 1) 4337 if (this_class <= 1)
4442 status = RA_NOMATCH; /* not on a word at all */ 4338 status = RA_NOMATCH; /* not on a word at all */
4443 else if (reg_prev_class() == this_class) 4339 else if (reg_prev_class() == this_class)
4444 status = RA_NOMATCH; /* previous char is in same word */ 4340 status = RA_NOMATCH; /* previous char is in same word */
4445 } 4341 }
4446 #endif
4447 else 4342 else
4448 { 4343 {
4449 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line 4344 if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
4450 && vim_iswordc_buf(rex.input[-1], rex.reg_buf))) 4345 && vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
4451 status = RA_NOMATCH; 4346 status = RA_NOMATCH;
4453 break; 4348 break;
4454 4349
4455 case EOW: /* word\>; rex.input points after d */ 4350 case EOW: /* word\>; rex.input points after d */
4456 if (rex.input == rex.line) /* Can't match at start of line */ 4351 if (rex.input == rex.line) /* Can't match at start of line */
4457 status = RA_NOMATCH; 4352 status = RA_NOMATCH;
4458 #ifdef FEAT_MBYTE
4459 else if (has_mbyte) 4353 else if (has_mbyte)
4460 { 4354 {
4461 int this_class, prev_class; 4355 int this_class, prev_class;
4462 4356
4463 /* Get class of current and previous char (if it exists). */ 4357 /* Get class of current and previous char (if it exists). */
4465 prev_class = reg_prev_class(); 4359 prev_class = reg_prev_class();
4466 if (this_class == prev_class 4360 if (this_class == prev_class
4467 || prev_class == 0 || prev_class == 1) 4361 || prev_class == 0 || prev_class == 1)
4468 status = RA_NOMATCH; 4362 status = RA_NOMATCH;
4469 } 4363 }
4470 #endif
4471 else 4364 else
4472 { 4365 {
4473 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf) 4366 if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
4474 || (rex.input[0] != NUL 4367 || (rex.input[0] != NUL
4475 && vim_iswordc_buf(c, rex.reg_buf))) 4368 && vim_iswordc_buf(c, rex.reg_buf)))
4674 char_u *opnd; 4567 char_u *opnd;
4675 4568
4676 opnd = OPERAND(scan); 4569 opnd = OPERAND(scan);
4677 /* Inline the first byte, for speed. */ 4570 /* Inline the first byte, for speed. */
4678 if (*opnd != *rex.input 4571 if (*opnd != *rex.input
4679 && (!rex.reg_ic || ( 4572 && (!rex.reg_ic
4680 #ifdef FEAT_MBYTE 4573 || (!enc_utf8
4681 !enc_utf8 && 4574 && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
4682 #endif
4683 MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
4684 status = RA_NOMATCH; 4575 status = RA_NOMATCH;
4685 else if (*opnd == NUL) 4576 else if (*opnd == NUL)
4686 { 4577 {
4687 /* match empty string always works; happens when "~" is 4578 /* match empty string always works; happens when "~" is
4688 * empty. */ 4579 * empty. */
4689 } 4580 }
4690 else 4581 else
4691 { 4582 {
4692 if (opnd[1] == NUL 4583 if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
4693 #ifdef FEAT_MBYTE
4694 && !(enc_utf8 && rex.reg_ic)
4695 #endif
4696 )
4697 { 4584 {
4698 len = 1; /* matched a single byte above */ 4585 len = 1; /* matched a single byte above */
4699 } 4586 }
4700 else 4587 else
4701 { 4588 {
4702 /* Need to match first byte again for multi-byte. */ 4589 /* Need to match first byte again for multi-byte. */
4703 len = (int)STRLEN(opnd); 4590 len = (int)STRLEN(opnd);
4704 if (cstrncmp(opnd, rex.input, &len) != 0) 4591 if (cstrncmp(opnd, rex.input, &len) != 0)
4705 status = RA_NOMATCH; 4592 status = RA_NOMATCH;
4706 } 4593 }
4707 #ifdef FEAT_MBYTE
4708 /* Check for following composing character, unless %C 4594 /* Check for following composing character, unless %C
4709 * follows (skips over all composing chars). */ 4595 * follows (skips over all composing chars). */
4710 if (status != RA_NOMATCH 4596 if (status != RA_NOMATCH
4711 && enc_utf8 4597 && enc_utf8
4712 && UTF_COMPOSINGLIKE(rex.input, rex.input + len) 4598 && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
4716 /* raaron: This code makes a composing character get 4602 /* raaron: This code makes a composing character get
4717 * ignored, which is the correct behavior (sometimes) 4603 * ignored, which is the correct behavior (sometimes)
4718 * for voweled Hebrew texts. */ 4604 * for voweled Hebrew texts. */
4719 status = RA_NOMATCH; 4605 status = RA_NOMATCH;
4720 } 4606 }
4721 #endif
4722 if (status != RA_NOMATCH) 4607 if (status != RA_NOMATCH)
4723 rex.input += len; 4608 rex.input += len;
4724 } 4609 }
4725 } 4610 }
4726 break; 4611 break;
4733 status = RA_NOMATCH; 4618 status = RA_NOMATCH;
4734 else 4619 else
4735 ADVANCE_REGINPUT(); 4620 ADVANCE_REGINPUT();
4736 break; 4621 break;
4737 4622
4738 #ifdef FEAT_MBYTE
4739 case MULTIBYTECODE: 4623 case MULTIBYTECODE:
4740 if (has_mbyte) 4624 if (has_mbyte)
4741 { 4625 {
4742 int i, len; 4626 int i, len;
4743 char_u *opnd; 4627 char_u *opnd;
4786 rex.input += len; 4670 rex.input += len;
4787 } 4671 }
4788 else 4672 else
4789 status = RA_NOMATCH; 4673 status = RA_NOMATCH;
4790 break; 4674 break;
4791 #endif
4792 case RE_COMPOSING: 4675 case RE_COMPOSING:
4793 #ifdef FEAT_MBYTE
4794 if (enc_utf8) 4676 if (enc_utf8)
4795 { 4677 {
4796 /* Skip composing characters. */ 4678 /* Skip composing characters. */
4797 while (utf_iscomposing(utf_ptr2char(rex.input))) 4679 while (utf_iscomposing(utf_ptr2char(rex.input)))
4798 MB_CPTR_ADV(rex.input); 4680 MB_CPTR_ADV(rex.input);
4799 } 4681 }
4800 #endif
4801 break; 4682 break;
4802 4683
4803 case NOTHING: 4684 case NOTHING:
4804 break; 4685 break;
4805 4686
5559 (colnr_T)STRLEN(rex.line); 5440 (colnr_T)STRLEN(rex.line);
5560 } 5441 }
5561 } 5442 }
5562 else 5443 else
5563 { 5444 {
5564 #ifdef FEAT_MBYTE
5565 if (has_mbyte) 5445 if (has_mbyte)
5566 { 5446 {
5567 char_u *line = 5447 char_u *line =
5568 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum); 5448 reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
5569 5449
5570 rp->rs_un.regsave.rs_u.pos.col -= 5450 rp->rs_un.regsave.rs_u.pos.col -=
5571 (*mb_head_off)(line, line 5451 (*mb_head_off)(line, line
5572 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1; 5452 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
5573 } 5453 }
5574 else 5454 else
5575 #endif
5576 --rp->rs_un.regsave.rs_u.pos.col; 5455 --rp->rs_un.regsave.rs_u.pos.col;
5577 } 5456 }
5578 } 5457 }
5579 else 5458 else
5580 { 5459 {
5950 case WHITE + ADD_NL: 5829 case WHITE + ADD_NL:
5951 testval = mask = RI_WHITE; 5830 testval = mask = RI_WHITE;
5952 do_class: 5831 do_class:
5953 while (count < maxcount) 5832 while (count < maxcount)
5954 { 5833 {
5955 #ifdef FEAT_MBYTE
5956 int l; 5834 int l;
5957 #endif 5835
5958 if (*scan == NUL) 5836 if (*scan == NUL)
5959 { 5837 {
5960 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline 5838 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
5961 || rex.reg_line_lbr) 5839 || rex.reg_line_lbr)
5962 break; 5840 break;
5963 reg_nextline(); 5841 reg_nextline();
5964 scan = rex.input; 5842 scan = rex.input;
5965 if (got_int) 5843 if (got_int)
5966 break; 5844 break;
5967 } 5845 }
5968 #ifdef FEAT_MBYTE
5969 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) 5846 else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5970 { 5847 {
5971 if (testval != 0) 5848 if (testval != 0)
5972 break; 5849 break;
5973 scan += l; 5850 scan += l;
5974 } 5851 }
5975 #endif
5976 else if ((class_tab[*scan] & mask) == testval) 5852 else if ((class_tab[*scan] & mask) == testval)
5977 ++scan; 5853 ++scan;
5978 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) 5854 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5979 ++scan; 5855 ++scan;
5980 else 5856 else
6079 } 5955 }
6080 } 5956 }
6081 break; 5957 break;
6082 } 5958 }
6083 5959
6084 #ifdef FEAT_MBYTE
6085 case MULTIBYTECODE: 5960 case MULTIBYTECODE:
6086 { 5961 {
6087 int i, len, cf = 0; 5962 int i, len, cf = 0;
6088 5963
6089 /* Safety check (just in case 'encoding' was changed since 5964 /* Safety check (just in case 'encoding' was changed since
6104 ++count; 5979 ++count;
6105 } 5980 }
6106 } 5981 }
6107 } 5982 }
6108 break; 5983 break;
6109 #endif
6110 5984
6111 case ANYOF: 5985 case ANYOF:
6112 case ANYOF + ADD_NL: 5986 case ANYOF + ADD_NL:
6113 testval = TRUE; 5987 testval = TRUE;
6114 /* FALLTHROUGH */ 5988 /* FALLTHROUGH */
6115 5989
6116 case ANYBUT: 5990 case ANYBUT:
6117 case ANYBUT + ADD_NL: 5991 case ANYBUT + ADD_NL:
6118 while (count < maxcount) 5992 while (count < maxcount)
6119 { 5993 {
6120 #ifdef FEAT_MBYTE
6121 int len; 5994 int len;
6122 #endif 5995
6123 if (*scan == NUL) 5996 if (*scan == NUL)
6124 { 5997 {
6125 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline 5998 if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
6126 || rex.reg_line_lbr) 5999 || rex.reg_line_lbr)
6127 break; 6000 break;
6130 if (got_int) 6003 if (got_int)
6131 break; 6004 break;
6132 } 6005 }
6133 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) 6006 else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6134 ++scan; 6007 ++scan;
6135 #ifdef FEAT_MBYTE
6136 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) 6008 else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
6137 { 6009 {
6138 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) 6010 if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6139 break; 6011 break;
6140 scan += len; 6012 scan += len;
6141 } 6013 }
6142 #endif
6143 else 6014 else
6144 { 6015 {
6145 if ((cstrchr(opnd, *scan) == NULL) == testval) 6016 if ((cstrchr(opnd, *scan) == NULL) == testval)
6146 break; 6017 break;
6147 ++scan; 6018 ++scan;
6953 case BRACE_COMPLEX + 8: 6824 case BRACE_COMPLEX + 8:
6954 case BRACE_COMPLEX + 9: 6825 case BRACE_COMPLEX + 9:
6955 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX); 6826 sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6956 p = NULL; 6827 p = NULL;
6957 break; 6828 break;
6958 #ifdef FEAT_MBYTE
6959 case MULTIBYTECODE: 6829 case MULTIBYTECODE:
6960 p = "MULTIBYTECODE"; 6830 p = "MULTIBYTECODE";
6961 break; 6831 break;
6962 #endif
6963 case NEWL: 6832 case NEWL:
6964 p = "NEWL"; 6833 p = "NEWL";
6965 break; 6834 break;
6966 default: 6835 default:
6967 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op)); 6836 sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
6987 return FAIL; 6856 return FAIL;
6988 } 6857 }
6989 return OK; 6858 return OK;
6990 } 6859 }
6991 6860
6992 #ifdef FEAT_MBYTE
6993 typedef struct 6861 typedef struct
6994 { 6862 {
6995 int a, b, c; 6863 int a, b, c;
6996 } decomp_T; 6864 } decomp_T;
6997 6865
7065 { 6933 {
7066 *c1 = c; 6934 *c1 = c;
7067 *c2 = *c3 = 0; 6935 *c2 = *c3 = 0;
7068 } 6936 }
7069 } 6937 }
7070 #endif
7071 6938
7072 /* 6939 /*
7073 * Compare two strings, ignore case if rex.reg_ic set. 6940 * Compare two strings, ignore case if rex.reg_ic set.
7074 * Return 0 if strings match, non-zero otherwise. 6941 * Return 0 if strings match, non-zero otherwise.
7075 * Correct the length "*n" when composing characters are ignored. 6942 * Correct the length "*n" when composing characters are ignored.
7082 if (!rex.reg_ic) 6949 if (!rex.reg_ic)
7083 result = STRNCMP(s1, s2, *n); 6950 result = STRNCMP(s1, s2, *n);
7084 else 6951 else
7085 result = MB_STRNICMP(s1, s2, *n); 6952 result = MB_STRNICMP(s1, s2, *n);
7086 6953
7087 #ifdef FEAT_MBYTE
7088 /* if it failed and it's utf8 and we want to combineignore: */ 6954 /* if it failed and it's utf8 and we want to combineignore: */
7089 if (result != 0 && enc_utf8 && rex.reg_icombine) 6955 if (result != 0 && enc_utf8 && rex.reg_icombine)
7090 { 6956 {
7091 char_u *str1, *str2; 6957 char_u *str1, *str2;
7092 int c1, c2, c11, c12; 6958 int c1, c2, c11, c12;
7119 } 6985 }
7120 result = c2 - c1; 6986 result = c2 - c1;
7121 if (result == 0) 6987 if (result == 0)
7122 *n = (int)(str2 - s2); 6988 *n = (int)(str2 - s2);
7123 } 6989 }
7124 #endif
7125 6990
7126 return result; 6991 return result;
7127 } 6992 }
7128 6993
7129 /* 6994 /*
7133 cstrchr(char_u *s, int c) 6998 cstrchr(char_u *s, int c)
7134 { 6999 {
7135 char_u *p; 7000 char_u *p;
7136 int cc; 7001 int cc;
7137 7002
7138 if (!rex.reg_ic 7003 if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
7139 #ifdef FEAT_MBYTE
7140 || (!enc_utf8 && mb_char2len(c) > 1)
7141 #endif
7142 )
7143 return vim_strchr(s, c); 7004 return vim_strchr(s, c);
7144 7005
7145 /* tolower() and toupper() can be slow, comparing twice should be a lot 7006 /* tolower() and toupper() can be slow, comparing twice should be a lot
7146 * faster (esp. when using MS Visual C++!). 7007 * faster (esp. when using MS Visual C++!).
7147 * For UTF-8 need to use folded case. */ 7008 * For UTF-8 need to use folded case. */
7148 #ifdef FEAT_MBYTE
7149 if (enc_utf8 && c > 0x80) 7009 if (enc_utf8 && c > 0x80)
7150 cc = utf_fold(c); 7010 cc = utf_fold(c);
7151 else 7011 else
7152 #endif
7153 if (MB_ISUPPER(c)) 7012 if (MB_ISUPPER(c))
7154 cc = MB_TOLOWER(c); 7013 cc = MB_TOLOWER(c);
7155 else if (MB_ISLOWER(c)) 7014 else if (MB_ISLOWER(c))
7156 cc = MB_TOUPPER(c); 7015 cc = MB_TOUPPER(c);
7157 else 7016 else
7158 return vim_strchr(s, c); 7017 return vim_strchr(s, c);
7159 7018
7160 #ifdef FEAT_MBYTE
7161 if (has_mbyte) 7019 if (has_mbyte)
7162 { 7020 {
7163 for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) 7021 for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
7164 { 7022 {
7165 if (enc_utf8 && c > 0x80) 7023 if (enc_utf8 && c > 0x80)
7170 else if (*p == c || *p == cc) 7028 else if (*p == c || *p == cc)
7171 return p; 7029 return p;
7172 } 7030 }
7173 } 7031 }
7174 else 7032 else
7175 #endif
7176 /* Faster version for when there are no multi-byte characters. */ 7033 /* Faster version for when there are no multi-byte characters. */
7177 for (p = s; *p != NUL; ++p) 7034 for (p = s; *p != NUL; ++p)
7178 if (*p == c || *p == cc) 7035 if (*p == c || *p == cc)
7179 return p; 7036 return p;
7180 7037
7283 } 7140 }
7284 else 7141 else
7285 { 7142 {
7286 if (*p == '\\' && p[1]) /* skip escaped characters */ 7143 if (*p == '\\' && p[1]) /* skip escaped characters */
7287 ++p; 7144 ++p;
7288 #ifdef FEAT_MBYTE
7289 if (has_mbyte) 7145 if (has_mbyte)
7290 p += (*mb_ptr2len)(p) - 1; 7146 p += (*mb_ptr2len)(p) - 1;
7291 #endif
7292 } 7147 }
7293 } 7148 }
7294 7149
7295 vim_free(reg_prev_sub); 7150 vim_free(reg_prev_sub);
7296 if (newsub != source) /* newsub was allocated, just keep it */ 7151 if (newsub != source) /* newsub was allocated, just keep it */
7680 ++dst; 7535 ++dst;
7681 } 7536 }
7682 c = *src++; 7537 c = *src++;
7683 } 7538 }
7684 } 7539 }
7685 #ifdef FEAT_MBYTE
7686 else if (has_mbyte) 7540 else if (has_mbyte)
7687 c = mb_ptr2char(src - 1); 7541 c = mb_ptr2char(src - 1);
7688 #endif
7689 7542
7690 /* Write to buffer, if copy is set. */ 7543 /* Write to buffer, if copy is set. */
7691 if (func_one != (fptr_T)NULL) 7544 if (func_one != (fptr_T)NULL)
7692 /* Turbo C complains without the typecast */ 7545 /* Turbo C complains without the typecast */
7693 func_one = (fptr_T)(func_one(&cc, c)); 7546 func_one = (fptr_T)(func_one(&cc, c));
7695 /* Turbo C complains without the typecast */ 7548 /* Turbo C complains without the typecast */
7696 func_all = (fptr_T)(func_all(&cc, c)); 7549 func_all = (fptr_T)(func_all(&cc, c));
7697 else /* just copy */ 7550 else /* just copy */
7698 cc = c; 7551 cc = c;
7699 7552
7700 #ifdef FEAT_MBYTE
7701 if (has_mbyte) 7553 if (has_mbyte)
7702 { 7554 {
7703 int totlen = mb_ptr2len(src - 1); 7555 int totlen = mb_ptr2len(src - 1);
7704 7556
7705 if (copy) 7557 if (copy)
7719 dst += totlen - clen; 7571 dst += totlen - clen;
7720 } 7572 }
7721 } 7573 }
7722 src += totlen - 1; 7574 src += totlen - 1;
7723 } 7575 }
7724 else 7576 else if (copy)
7725 #endif
7726 if (copy)
7727 *dst = cc; 7577 *dst = cc;
7728 dst++; 7578 dst++;
7729 } 7579 }
7730 else 7580 else
7731 { 7581 {
7797 } 7647 }
7798 dst += 2; 7648 dst += 2;
7799 } 7649 }
7800 else 7650 else
7801 { 7651 {
7802 #ifdef FEAT_MBYTE
7803 if (has_mbyte) 7652 if (has_mbyte)
7804 c = mb_ptr2char(s); 7653 c = mb_ptr2char(s);
7805 else 7654 else
7806 #endif
7807 c = *s; 7655 c = *s;
7808 7656
7809 if (func_one != (fptr_T)NULL) 7657 if (func_one != (fptr_T)NULL)
7810 /* Turbo C complains without the typecast */ 7658 /* Turbo C complains without the typecast */
7811 func_one = (fptr_T)(func_one(&cc, c)); 7659 func_one = (fptr_T)(func_one(&cc, c));
7813 /* Turbo C complains without the typecast */ 7661 /* Turbo C complains without the typecast */
7814 func_all = (fptr_T)(func_all(&cc, c)); 7662 func_all = (fptr_T)(func_all(&cc, c));
7815 else /* just copy */ 7663 else /* just copy */
7816 cc = c; 7664 cc = c;
7817 7665
7818 #ifdef FEAT_MBYTE
7819 if (has_mbyte) 7666 if (has_mbyte)
7820 { 7667 {
7821 int l; 7668 int l;
7822 7669
7823 /* Copy composing characters separately, one 7670 /* Copy composing characters separately, one
7831 len -= l; 7678 len -= l;
7832 if (copy) 7679 if (copy)
7833 mb_char2bytes(cc, dst); 7680 mb_char2bytes(cc, dst);
7834 dst += mb_char2len(cc) - 1; 7681 dst += mb_char2len(cc) - 1;
7835 } 7682 }
7836 else 7683 else if (copy)
7837 #endif
7838 if (copy)
7839 *dst = cc; 7684 *dst = cc;
7840 dst++; 7685 dst++;
7841 } 7686 }
7842 7687
7843 ++s; 7688 ++s;