comparison src/spell.c @ 376:d2bc505a6d91

updated for version 7.0098
author vimboss
date Thu, 30 Jun 2005 21:52:39 +0000
parents f14cbd913415
children 997a094e44d2
comparison
equal deleted inserted replaced
375:f14cbd913415 376:d2bc505a6d91
316 char_u *sl_pbyts; /* prefix tree word bytes */ 316 char_u *sl_pbyts; /* prefix tree word bytes */
317 idx_T *sl_pidxs; /* prefix tree word indexes */ 317 idx_T *sl_pidxs; /* prefix tree word indexes */
318 318
319 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ 319 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
320 320
321 char_u *sl_midword; /* MIDWORD string or NULL */
322
321 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 323 int sl_prefixcnt; /* number of items in "sl_prefprog" */
322 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 324 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
323 325
324 garray_T sl_rep; /* list of fromto_T entries from REP lines */ 326 garray_T sl_rep; /* list of fromto_T entries from REP lines */
325 short sl_rep_first[256]; /* indexes where byte first appears, -1 if 327 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
462 int mi_prefixlen; /* byte length of prefix */ 464 int mi_prefixlen; /* byte length of prefix */
463 465
464 /* others */ 466 /* others */
465 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 467 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
466 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 468 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
469 buf_T *mi_buf; /* buffer being checked */
467 } matchinf_T; 470 } matchinf_T;
468 471
469 /* 472 /*
470 * The tables used for recognizing word characters according to spelling. 473 * The tables used for recognizing word characters according to spelling.
471 * These are only used for the first 256 characters of 'encoding'. 474 * These are only used for the first 256 characters of 'encoding'.
478 char_u st_upper[256]; /* chars: upper case */ 481 char_u st_upper[256]; /* chars: upper case */
479 } spelltab_T; 482 } spelltab_T;
480 483
481 static spelltab_T spelltab; 484 static spelltab_T spelltab;
482 static int did_set_spelltab; 485 static int did_set_spelltab;
483 static char_u spell_ismw[256]; /* flags: is midword char */
484 #ifdef FEAT_MBYTE
485 static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */
486 #endif
487 486
488 #define CF_WORD 0x01 487 #define CF_WORD 0x01
489 #define CF_UPPER 0x02 488 #define CF_UPPER 0x02
490 489
491 static void clear_spell_chartab __ARGS((spelltab_T *sp)); 490 static void clear_spell_chartab __ARGS((spelltab_T *sp));
492 static int set_spell_finish __ARGS((spelltab_T *new_st)); 491 static int set_spell_finish __ARGS((spelltab_T *new_st));
493 static int spell_iswordp __ARGS((char_u *p)); 492 static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
493 static int spell_iswordp_nmw __ARGS((char_u *p));
494 #ifdef FEAT_MBYTE
495 static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
496 #endif
494 static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); 497 static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
495 498
496 /* 499 /*
497 * Return TRUE if "p" points to a word character. Like spell_iswordp() but 500 * Return TRUE if "p" points to a word character. Like spell_iswordp() but
498 * without the special handling of a single quote. 501 * without the special handling of a single quote.
569 /* mode values for find_word */ 572 /* mode values for find_word */
570 #define FIND_FOLDWORD 0 /* find word case-folded */ 573 #define FIND_FOLDWORD 0 /* find word case-folded */
571 #define FIND_KEEPWORD 1 /* find keep-case word */ 574 #define FIND_KEEPWORD 1 /* find keep-case word */
572 #define FIND_PREFIX 2 /* find word after prefix */ 575 #define FIND_PREFIX 2 /* find word after prefix */
573 576
577 /* values for read_cnt_string() */
578 #define ERR_NOMEM -1
579 #define ERR_TRUNC -2
580
574 static slang_T *slang_alloc __ARGS((char_u *lang)); 581 static slang_T *slang_alloc __ARGS((char_u *lang));
575 static void slang_free __ARGS((slang_T *lp)); 582 static void slang_free __ARGS((slang_T *lp));
576 static void slang_clear __ARGS((slang_T *lp)); 583 static void slang_clear __ARGS((slang_T *lp));
577 static void find_word __ARGS((matchinf_T *mip, int mode)); 584 static void find_word __ARGS((matchinf_T *mip, int mode));
578 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang)); 585 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang));
582 static int no_spell_checking __ARGS((void)); 589 static int no_spell_checking __ARGS((void));
583 static void spell_load_lang __ARGS((char_u *lang)); 590 static void spell_load_lang __ARGS((char_u *lang));
584 static char_u *spell_enc __ARGS((void)); 591 static char_u *spell_enc __ARGS((void));
585 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); 592 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
586 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); 593 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
594 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *errp));
587 #ifdef FEAT_MBYTE 595 #ifdef FEAT_MBYTE
588 static int *mb_str2wide __ARGS((char_u *s)); 596 static int *mb_str2wide __ARGS((char_u *s));
589 #endif 597 #endif
590 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); 598 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
599 static void clear_midword __ARGS((buf_T *buf));
600 static void use_midword __ARGS((slang_T *lp, buf_T *buf));
591 static int find_region __ARGS((char_u *rp, char_u *region)); 601 static int find_region __ARGS((char_u *rp, char_u *region));
592 static int captype __ARGS((char_u *word, char_u *end)); 602 static int captype __ARGS((char_u *word, char_u *end));
593 static void spell_reload_one __ARGS((char_u *fname, int added_word)); 603 static void spell_reload_one __ARGS((char_u *fname, int added_word));
594 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); 604 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
595 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); 605 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
712 else 722 else
713 { 723 {
714 mi.mi_end = skipdigits(ptr); 724 mi.mi_end = skipdigits(ptr);
715 nrlen = mi.mi_end - ptr; 725 nrlen = mi.mi_end - ptr;
716 } 726 }
717 if (!spell_iswordp(mi.mi_end)) 727 if (!spell_iswordp(mi.mi_end, wp->w_buffer))
718 return (int)(mi.mi_end - ptr); 728 return (int)(mi.mi_end - ptr);
719 729
720 /* Try including the digits in the word. */ 730 /* Try including the digits in the word. */
721 mi.mi_fend = ptr + nrlen; 731 mi.mi_fend = ptr + nrlen;
722 } 732 }
723 else 733 else
724 mi.mi_fend = ptr; 734 mi.mi_fend = ptr;
725 735
726 /* Find the normal end of the word (until the next non-word character). */ 736 /* Find the normal end of the word (until the next non-word character). */
727 mi.mi_word = ptr; 737 mi.mi_word = ptr;
728 if (spell_iswordp(mi.mi_fend)) 738 if (spell_iswordp(mi.mi_fend, wp->w_buffer))
729 { 739 {
730 do 740 do
731 { 741 {
732 mb_ptr_adv(mi.mi_fend); 742 mb_ptr_adv(mi.mi_fend);
733 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend)); 743 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
734 } 744 }
735 745
736 /* We always use the characters up to the next non-word character, 746 /* We always use the characters up to the next non-word character,
737 * also for bad words. */ 747 * also for bad words. */
738 mi.mi_end = mi.mi_fend; 748 mi.mi_end = mi.mi_fend;
739 749
740 /* Check caps type later. */ 750 /* Check caps type later. */
741 mi.mi_capflags = 0; 751 mi.mi_capflags = 0;
742 mi.mi_cend = NULL; 752 mi.mi_cend = NULL;
753 mi.mi_buf = wp->w_buffer;
743 754
744 /* Include one non-word character so that we can check for the 755 /* Include one non-word character so that we can check for the
745 * word end. */ 756 * word end. */
746 if (*mi.mi_fend != NUL) 757 if (*mi.mi_fend != NUL)
747 mb_ptr_adv(mi.mi_fend); 758 mb_ptr_adv(mi.mi_fend);
964 975
965 #ifdef FEAT_MBYTE 976 #ifdef FEAT_MBYTE
966 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 977 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
967 continue; /* not at first byte of character */ 978 continue; /* not at first byte of character */
968 #endif 979 #endif
969 if (spell_iswordp(ptr + wlen)) 980 if (spell_iswordp(ptr + wlen, mip->mi_buf))
970 continue; /* next char is a word character */ 981 continue; /* next char is a word character */
971 982
972 #ifdef FEAT_MBYTE 983 #ifdef FEAT_MBYTE
973 if (mode != FIND_KEEPWORD && has_mbyte) 984 if (mode != FIND_KEEPWORD && has_mbyte)
974 { 985 {
1225 1236
1226 p = mip->mi_fend; 1237 p = mip->mi_fend;
1227 do 1238 do
1228 { 1239 {
1229 mb_ptr_adv(mip->mi_fend); 1240 mb_ptr_adv(mip->mi_fend);
1230 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend)); 1241 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
1231 1242
1232 /* Include the non-word character so that we can check for the 1243 /* Include the non-word character so that we can check for the
1233 * word end. */ 1244 * word end. */
1234 if (*mip->mi_fend != NUL) 1245 if (*mip->mi_fend != NUL)
1235 mb_ptr_adv(mip->mi_fend); 1246 mb_ptr_adv(mip->mi_fend);
1448 p = skipwhite(p + 1); 1459 p = skipwhite(p + 1);
1449 1460
1450 if (*p != NUL) 1461 if (*p != NUL)
1451 { 1462 {
1452 *buf = ' '; 1463 *buf = ' ';
1453 vim_strncpy(buf + 1, line, maxlen - 1); 1464 vim_strncpy(buf + 1, line, maxlen - 2);
1454 n = p - line; 1465 n = p - line;
1455 if (n >= maxlen) 1466 if (n >= maxlen)
1456 n = maxlen - 1; 1467 n = maxlen - 1;
1457 vim_memset(buf + 1, ' ', n); 1468 vim_memset(buf + 1, ' ', n);
1458 } 1469 }
1583 } 1594 }
1584 ga_clear(gap); 1595 ga_clear(gap);
1585 1596
1586 gap = &lp->sl_sal; 1597 gap = &lp->sl_sal;
1587 if (lp->sl_sofo) 1598 if (lp->sl_sofo)
1588 /* SOFOFROM and SOFOTO items: free lists of wide characters. */ 1599 {
1589 for (i = 0; i < gap->ga_len; ++i) 1600 /* "ga_len" is set to 1 without adding an item for latin1 */
1590 vim_free(((int **)gap->ga_data)[i]); 1601 if (gap->ga_data != NULL)
1602 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
1603 for (i = 0; i < gap->ga_len; ++i)
1604 vim_free(((int **)gap->ga_data)[i]);
1605 }
1591 else 1606 else
1592 /* SAL items: free salitem_T items */ 1607 /* SAL items: free salitem_T items */
1593 while (gap->ga_len > 0) 1608 while (gap->ga_len > 0)
1594 { 1609 {
1595 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; 1610 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1604 } 1619 }
1605 ga_clear(gap); 1620 ga_clear(gap);
1606 1621
1607 for (i = 0; i < lp->sl_prefixcnt; ++i) 1622 for (i = 0; i < lp->sl_prefixcnt; ++i)
1608 vim_free(lp->sl_prefprog[i]); 1623 vim_free(lp->sl_prefprog[i]);
1624 lp->sl_prefixcnt = 0;
1609 vim_free(lp->sl_prefprog); 1625 vim_free(lp->sl_prefprog);
1626 lp->sl_prefprog = NULL;
1627
1628 vim_free(lp->sl_midword);
1629 lp->sl_midword = NULL;
1610 1630
1611 #ifdef FEAT_MBYTE 1631 #ifdef FEAT_MBYTE
1612 { 1632 {
1613 int todo = lp->sl_map_hash.ht_used; 1633 int todo = lp->sl_map_hash.ht_used;
1614 hashitem_T *hi; 1634 hashitem_T *hi;
1759 if (p == NULL) 1779 if (p == NULL)
1760 goto endFAIL; 1780 goto endFAIL;
1761 for (i = 0; i < cnt; ++i) 1781 for (i = 0; i < cnt; ++i)
1762 p[i] = getc(fd); /* <charflags> */ 1782 p[i] = getc(fd); /* <charflags> */
1763 1783
1764 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */ 1784 /* <fcharslen> <fchars> */
1765 if (ccnt <= 0) 1785 fol = read_cnt_string(fd, 2, &ccnt);
1786 if (ccnt != 0)
1766 { 1787 {
1767 vim_free(p); 1788 vim_free(p);
1768 goto formerr; 1789 if (ccnt == ERR_NOMEM)
1769 } 1790 goto endFAIL;
1770 fol = alloc((unsigned)ccnt + 1); 1791 if (ccnt == ERR_TRUNC)
1771 if (fol == NULL) 1792 goto formerr;
1772 { 1793 }
1773 vim_free(p);
1774 goto endFAIL;
1775 }
1776 for (i = 0; i < ccnt; ++i)
1777 fol[i] = getc(fd); /* <fchars> */
1778 fol[i] = NUL;
1779 1794
1780 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ 1795 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
1781 i = set_spell_charflags(p, cnt, fol); 1796 i = set_spell_charflags(p, cnt, fol);
1782 vim_free(p); 1797 vim_free(p);
1783 vim_free(fol); 1798 vim_free(fol);
1793 if (cnt != 0) 1808 if (cnt != 0)
1794 goto formerr; 1809 goto formerr;
1795 } 1810 }
1796 1811
1797 /* <midwordlen> <midword> */ 1812 /* <midwordlen> <midword> */
1798 cnt = (getc(fd) << 8) + getc(fd); 1813 lp->sl_midword = read_cnt_string(fd, 2, &cnt);
1799 if (cnt < 0) 1814 if (cnt == ERR_TRUNC)
1800 goto truncerr; 1815 goto truncerr;
1801 if (cnt > 0) 1816 if (cnt == ERR_NOMEM)
1802 { 1817 goto endFAIL;
1803 for (i = 0; i < cnt; ++i)
1804 if (i < MAXWLEN) /* truncate at reasonable length */
1805 buf[i] = getc(fd);
1806 if (i < MAXWLEN)
1807 buf[i] = NUL;
1808 else
1809 buf[MAXWLEN] = NUL;
1810
1811 /* The midword characters add up to any midword characters from other
1812 * .spel files. */
1813 for (p = buf; *p != NUL; )
1814 #ifdef FEAT_MBYTE
1815 if (has_mbyte)
1816 {
1817 c = mb_ptr2char(p);
1818 i = mb_ptr2len_check(p);
1819 if (c < 256)
1820 spell_ismw[c] = TRUE;
1821 else if (spell_ismw_mb == NULL)
1822 /* First multi-byte char in "spell_ismw_mb". */
1823 spell_ismw_mb = vim_strnsave(p, i);
1824 else
1825 {
1826 /* Append multi-byte chars to "spell_ismw_mb". */
1827 n = STRLEN(spell_ismw_mb);
1828 bp = vim_strnsave(spell_ismw_mb, n + i);
1829 if (bp != NULL)
1830 {
1831 vim_free(spell_ismw_mb);
1832 spell_ismw_mb = bp;
1833 vim_strncpy(bp + n, p, i);
1834 }
1835 }
1836 p += i;
1837 }
1838 else
1839 #endif
1840 spell_ismw[*p++] = TRUE;
1841 }
1842 1818
1843 /* <prefcondcnt> <prefcond> ... */ 1819 /* <prefcondcnt> <prefcond> ... */
1844 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ 1820 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
1845 if (cnt > 0) 1821 if (cnt > 0)
1846 { 1822 {
2289 sourcing_name = save_sourcing_name; 2265 sourcing_name = save_sourcing_name;
2290 sourcing_lnum = save_sourcing_lnum; 2266 sourcing_lnum = save_sourcing_lnum;
2291 2267
2292 return lp; 2268 return lp;
2293 } 2269 }
2270
2271 /*
2272 * Read a length field from "fd" in "cnt_bytes" bytes.
2273 * Allocate memory and read the string into it.
2274 * Returns NULL when the count is zero.
2275 * Sets "errp" to ERR_TRUNC when reading failed, ERR_NOMEM when out of
2276 * memory, zero when OK.
2277 */
2278 static char_u *
2279 read_cnt_string(fd, cnt_bytes, errp)
2280 FILE *fd;
2281 int cnt_bytes;
2282 int *errp;
2283 {
2284 int cnt = 0;
2285 int i;
2286 char_u *str;
2287
2288 /* read the length bytes, MSB first */
2289 for (i = 0; i < cnt_bytes; ++i)
2290 cnt = (cnt << 8) + getc(fd);
2291 if (cnt < 0)
2292 {
2293 *errp = ERR_TRUNC;
2294 return NULL;
2295 }
2296
2297 /* allocate memory */
2298 str = alloc((unsigned)cnt + 1);
2299 if (str == NULL)
2300 {
2301 *errp = ERR_NOMEM;
2302 return NULL;
2303 }
2304 *errp = 0;
2305
2306 /* Read the string. Doesn't check for truncated file. */
2307 for (i = 0; i < cnt; ++i)
2308 str[i] = getc(fd);
2309 str[i] = NUL;
2310
2311 return str;
2312 }
2313
2294 2314
2295 #ifdef FEAT_MBYTE 2315 #ifdef FEAT_MBYTE
2296 /* 2316 /*
2297 * Turn a multi-byte string into a wide character string. 2317 * Turn a multi-byte string into a wide character string.
2298 * Return it in allocated memory (NULL for out-of-memory) 2318 * Return it in allocated memory (NULL for out-of-memory)
2452 int load_spf; 2472 int load_spf;
2453 int len; 2473 int len;
2454 char_u *p; 2474 char_u *p;
2455 2475
2456 ga_init2(&ga, sizeof(langp_T), 2); 2476 ga_init2(&ga, sizeof(langp_T), 2);
2477 clear_midword(buf);
2457 2478
2458 /* Make the name of the .spl file associated with 'spellfile'. */ 2479 /* Make the name of the .spl file associated with 'spellfile'. */
2459 if (*buf->b_p_spf == NUL) 2480 if (*buf->b_p_spf == NUL)
2460 load_spf = FALSE; 2481 load_spf = FALSE;
2461 else 2482 else
2540 return e_outofmem; 2561 return e_outofmem;
2541 } 2562 }
2542 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp; 2563 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
2543 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; 2564 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2544 ++ga.ga_len; 2565 ++ga.ga_len;
2566 use_midword(lp, buf);
2545 2567
2546 /* Check if this is the spell file related to 'spellfile'. */ 2568 /* Check if this is the spell file related to 'spellfile'. */
2547 if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE) 2569 if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE)
2548 == FPC_SAME) 2570 == FPC_SAME)
2549 load_spf = FALSE; 2571 load_spf = FALSE;
2573 if (lp != NULL && ga_grow(&ga, 1) == OK) 2595 if (lp != NULL && ga_grow(&ga, 1) == OK)
2574 { 2596 {
2575 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp; 2597 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
2576 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL; 2598 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL;
2577 ++ga.ga_len; 2599 ++ga.ga_len;
2600 use_midword(lp, buf);
2578 } 2601 }
2579 } 2602 }
2580 2603
2581 /* Add a NULL entry to mark the end of the list. */ 2604 /* Add a NULL entry to mark the end of the list. */
2582 if (ga_grow(&ga, 1) == FAIL) 2605 if (ga_grow(&ga, 1) == FAIL)
2590 /* Everything is fine, store the new b_langp value. */ 2613 /* Everything is fine, store the new b_langp value. */
2591 ga_clear(&buf->b_langp); 2614 ga_clear(&buf->b_langp);
2592 buf->b_langp = ga; 2615 buf->b_langp = ga;
2593 2616
2594 return NULL; 2617 return NULL;
2618 }
2619
2620 /*
2621 * Clear the midword characters for buffer "buf".
2622 */
2623 static void
2624 clear_midword(buf)
2625 buf_T *buf;
2626 {
2627 vim_memset(buf->b_spell_ismw, 0, 256);
2628 #ifdef FEAT_MBYTE
2629 vim_free(buf->b_spell_ismw_mb);
2630 buf->b_spell_ismw_mb = NULL;
2631 #endif
2632 }
2633
2634 /*
2635 * Use the "sl_midword" field of language "lp" for buffer "buf".
2636 * They add up to any currently used midword characters.
2637 */
2638 static void
2639 use_midword(lp, buf)
2640 slang_T *lp;
2641 buf_T *buf;
2642 {
2643 char_u *p;
2644
2645 for (p = lp->sl_midword; *p != NUL; )
2646 #ifdef FEAT_MBYTE
2647 if (has_mbyte)
2648 {
2649 int c, l, n;
2650 char_u *bp;
2651
2652 c = mb_ptr2char(p);
2653 l = mb_ptr2len_check(p);
2654 if (c < 256)
2655 buf->b_spell_ismw[c] = TRUE;
2656 else if (buf->b_spell_ismw_mb == NULL)
2657 /* First multi-byte char in "b_spell_ismw_mb". */
2658 buf->b_spell_ismw_mb = vim_strnsave(p, l);
2659 else
2660 {
2661 /* Append multi-byte chars to "b_spell_ismw_mb". */
2662 n = STRLEN(buf->b_spell_ismw_mb);
2663 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
2664 if (bp != NULL)
2665 {
2666 vim_free(buf->b_spell_ismw_mb);
2667 buf->b_spell_ismw_mb = bp;
2668 vim_strncpy(bp + n, p, l);
2669 }
2670 }
2671 p += l;
2672 }
2673 else
2674 #endif
2675 buf->b_spell_ismw[*p++] = TRUE;
2595 } 2676 }
2596 2677
2597 /* 2678 /*
2598 * Find the region "region[2]" in "rp" (points to "sl_regions"). 2679 * Find the region "region[2]" in "rp" (points to "sl_regions").
2599 * Each region is simply stored as the two characters of it's name. 2680 * Each region is simply stored as the two characters of it's name.
2633 int firstcap; 2714 int firstcap;
2634 int allcap; 2715 int allcap;
2635 int past_second = FALSE; /* past second word char */ 2716 int past_second = FALSE; /* past second word char */
2636 2717
2637 /* find first letter */ 2718 /* find first letter */
2638 for (p = word; !spell_iswordp(p); mb_ptr_adv(p)) 2719 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
2639 if (end == NULL ? *p == NUL : p >= end) 2720 if (end == NULL ? *p == NUL : p >= end)
2640 return 0; /* only non-word characters, illegal word */ 2721 return 0; /* only non-word characters, illegal word */
2641 #ifdef FEAT_MBYTE 2722 #ifdef FEAT_MBYTE
2642 if (has_mbyte) 2723 if (has_mbyte)
2643 c = mb_ptr2char_adv(&p); 2724 c = mb_ptr2char_adv(&p);
2649 /* 2730 /*
2650 * Need to check all letters to find a word with mixed upper/lower. 2731 * Need to check all letters to find a word with mixed upper/lower.
2651 * But a word with an upper char only at start is a ONECAP. 2732 * But a word with an upper char only at start is a ONECAP.
2652 */ 2733 */
2653 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) 2734 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
2654 if (spell_iswordp(p)) 2735 if (spell_iswordp_nmw(p))
2655 { 2736 {
2656 #ifdef FEAT_MBYTE 2737 #ifdef FEAT_MBYTE
2657 c = mb_ptr2char(p); 2738 c = mb_ptr2char(p);
2658 #else 2739 #else
2659 c = *p; 2740 c = *p;
3835 STRCPY(newword, word); 3916 STRCPY(newword, word);
3836 if (ae->ae_chop != NULL) 3917 if (ae->ae_chop != NULL)
3837 { 3918 {
3838 /* Remove chop string. */ 3919 /* Remove chop string. */
3839 p = newword + STRLEN(newword); 3920 p = newword + STRLEN(newword);
3840 #ifdef FEAT_MBYTE 3921 i = MB_CHARLEN(ae->ae_chop);
3841 if (has_mbyte)
3842 i = mb_charlen(ae->ae_chop);
3843 else
3844 #endif
3845 i = STRLEN(ae->ae_chop);
3846 for ( ; i > 0; --i) 3922 for ( ; i > 0; --i)
3847 mb_ptr_back(newword, p); 3923 mb_ptr_back(newword, p);
3848 *p = NUL; 3924 *p = NUL;
3849 } 3925 }
3850 if (ae->ae_add != NULL) 3926 if (ae->ae_add != NULL)
5273 { 5349 {
5274 int i; 5350 int i;
5275 5351
5276 did_set_spelltab = FALSE; 5352 did_set_spelltab = FALSE;
5277 clear_spell_chartab(&spelltab); 5353 clear_spell_chartab(&spelltab);
5278 vim_memset(spell_ismw, FALSE, sizeof(spell_ismw));
5279 #ifdef FEAT_MBYTE 5354 #ifdef FEAT_MBYTE
5280 vim_free(spell_ismw_mb);
5281 spell_ismw_mb = NULL;
5282
5283 if (enc_dbcs) 5355 if (enc_dbcs)
5284 { 5356 {
5285 /* DBCS: assume double-wide characters are word characters. */ 5357 /* DBCS: assume double-wide characters are word characters. */
5286 for (i = 128; i <= 255; ++i) 5358 for (i = 128; i <= 255; ++i)
5287 if (MB_BYTE2LEN(i) == 2) 5359 if (MB_BYTE2LEN(i) == 2)
5473 * As a special case we see "midword" characters as word character when it is 5545 * As a special case we see "midword" characters as word character when it is
5474 * followed by a word character. This finds they'there but not 'they there'. 5546 * followed by a word character. This finds they'there but not 'they there'.
5475 * Thus this only works properly when past the first character of the word. 5547 * Thus this only works properly when past the first character of the word.
5476 */ 5548 */
5477 static int 5549 static int
5478 spell_iswordp(p) 5550 spell_iswordp(p, buf)
5479 char_u *p; 5551 char_u *p;
5552 buf_T *buf; /* buffer used */
5480 { 5553 {
5481 #ifdef FEAT_MBYTE 5554 #ifdef FEAT_MBYTE
5482 char_u *s; 5555 char_u *s;
5483 int l; 5556 int l;
5484 int c; 5557 int c;
5488 l = MB_BYTE2LEN(*p); 5561 l = MB_BYTE2LEN(*p);
5489 s = p; 5562 s = p;
5490 if (l == 1) 5563 if (l == 1)
5491 { 5564 {
5492 /* be quick for ASCII */ 5565 /* be quick for ASCII */
5493 if (spell_ismw[*p]) 5566 if (buf->b_spell_ismw[*p])
5494 { 5567 {
5495 s = p + 1; /* skip a mid-word character */ 5568 s = p + 1; /* skip a mid-word character */
5496 l = MB_BYTE2LEN(*s); 5569 l = MB_BYTE2LEN(*s);
5497 } 5570 }
5498 } 5571 }
5499 else 5572 else
5500 { 5573 {
5501 c = mb_ptr2char(p); 5574 c = mb_ptr2char(p);
5502 if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL 5575 if (c < 256 ? buf->b_spell_ismw[c]
5503 && vim_strchr(spell_ismw_mb, c) != NULL)) 5576 : (buf->b_spell_ismw_mb != NULL
5577 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
5504 { 5578 {
5505 s = p + l; 5579 s = p + l;
5506 l = MB_BYTE2LEN(*s); 5580 l = MB_BYTE2LEN(*s);
5507 } 5581 }
5508 } 5582 }
5511 return mb_get_class(s) >= 2; 5585 return mb_get_class(s) >= 2;
5512 return spelltab.st_isw[*s]; 5586 return spelltab.st_isw[*s];
5513 } 5587 }
5514 #endif 5588 #endif
5515 5589
5516 return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]]; 5590 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
5591 }
5592
5593 /*
5594 * Return TRUE if "p" points to a word character.
5595 * Unlike spell_iswordp() this doesn't check for "midword" characters.
5596 */
5597 static int
5598 spell_iswordp_nmw(p)
5599 char_u *p;
5600 {
5601 #ifdef FEAT_MBYTE
5602 if (has_mbyte && MB_BYTE2LEN(*p) > 1)
5603 return mb_get_class(p) >= 2;
5604 #endif
5605
5606 return spelltab.st_isw[*p];
5517 } 5607 }
5518 5608
5519 #ifdef FEAT_MBYTE 5609 #ifdef FEAT_MBYTE
5520 /* 5610 /*
5521 * Return TRUE if "p" points to a word character. 5611 * Return TRUE if "p" points to a word character.
5522 * Wide version of spell_iswordp(). 5612 * Wide version of spell_iswordp().
5523 */ 5613 */
5524 static int 5614 static int
5525 spell_iswordp_w(p) 5615 spell_iswordp_w(p, buf)
5526 int *p; 5616 int *p;
5617 buf_T *buf;
5527 { 5618 {
5528 int *s; 5619 int *s;
5529 5620
5530 if (*p < 256 ? spell_ismw[*p] : (spell_ismw_mb != NULL 5621 if (*p < 256 ? buf->b_spell_ismw[*p]
5531 && vim_strchr(spell_ismw_mb, *p) != NULL)) 5622 : (buf->b_spell_ismw_mb != NULL
5623 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
5532 s = p + 1; 5624 s = p + 1;
5533 else 5625 else
5534 s = p; 5626 s = p;
5535 5627
5536 if (mb_char2len(*s) > 1) 5628 if (mb_char2len(*s) > 1)
6265 #endif 6357 #endif
6266 { 6358 {
6267 l = 1; 6359 l = 1;
6268 wcopy[0] = c; 6360 wcopy[0] = c;
6269 } 6361 }
6270 vim_strncpy(wcopy + l, p, MAXWLEN - l); 6362 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
6271 } 6363 }
6272 6364
6273 /* 6365 /*
6274 * Make a copy of "word" with all the letters upper cased into 6366 * Make a copy of "word" with all the letters upper cased into
6275 * "wcopy[MAXWLEN]". The result is NUL terminated. 6367 * "wcopy[MAXWLEN]". The result is NUL terminated.
6318 static void 6410 static void
6319 suggest_try_special(su) 6411 suggest_try_special(su)
6320 suginfo_T *su; 6412 suginfo_T *su;
6321 { 6413 {
6322 char_u *p; 6414 char_u *p;
6323 int len; 6415 size_t len;
6324 int c; 6416 int c;
6325 char_u word[MAXWLEN]; 6417 char_u word[MAXWLEN];
6326 6418
6327 /* 6419 /*
6328 * Recognize a word that is repeated: "the the". 6420 * Recognize a word that is repeated: "the the".
6574 if (!spell_valid_case(su->su_badflags, 6666 if (!spell_valid_case(su->su_badflags,
6575 captype(preword + prewordlen, NULL))) 6667 captype(preword + prewordlen, NULL)))
6576 newscore += SCORE_ICASE; 6668 newscore += SCORE_ICASE;
6577 6669
6578 if ((fword[sp->ts_fidx] == NUL 6670 if ((fword[sp->ts_fidx] == NUL
6579 || !spell_iswordp(fword + sp->ts_fidx)) 6671 || !spell_iswordp(fword + sp->ts_fidx, curbuf))
6580 && sp->ts_fidx >= sp->ts_fidxtry) 6672 && sp->ts_fidx >= sp->ts_fidxtry)
6581 { 6673 {
6582 /* The badword also ends: add suggestions. Give a penalty 6674 /* The badword also ends: add suggestions. Give a penalty
6583 * when changing non-word char to word char, e.g., "thes," 6675 * when changing non-word char to word char, e.g., "thes,"
6584 * -> "these". */ 6676 * -> "these". */
6587 if (has_mbyte) 6679 if (has_mbyte)
6588 mb_ptr_back(fword, p); 6680 mb_ptr_back(fword, p);
6589 else 6681 else
6590 #endif 6682 #endif
6591 --p; 6683 --p;
6592 if (!spell_iswordp(p)) 6684 if (!spell_iswordp(p, curbuf))
6593 { 6685 {
6594 p = preword + STRLEN(preword); 6686 p = preword + STRLEN(preword);
6595 #ifdef FEAT_MBYTE 6687 #ifdef FEAT_MBYTE
6596 if (has_mbyte) 6688 if (has_mbyte)
6597 mb_ptr_back(preword, p); 6689 mb_ptr_back(preword, p);
6598 else 6690 else
6599 #endif 6691 #endif
6600 --p; 6692 --p;
6601 if (spell_iswordp(p)) 6693 if (spell_iswordp(p, curbuf))
6602 newscore += SCORE_NONWORD; 6694 newscore += SCORE_NONWORD;
6603 } 6695 }
6604 6696
6605 add_suggestion(su, &su->su_ga, preword, 6697 add_suggestion(su, &su->su_ga, preword,
6606 sp->ts_fidx - repextra, 6698 sp->ts_fidx - repextra,
6620 { 6712 {
6621 /* Save things to be restored at STATE_SPLITUNDO. */ 6713 /* Save things to be restored at STATE_SPLITUNDO. */
6622 sp->ts_save_prewordlen = prewordlen; 6714 sp->ts_save_prewordlen = prewordlen;
6623 sp->ts_save_badflags = su->su_badflags; 6715 sp->ts_save_badflags = su->su_badflags;
6624 sp->ts_save_splitoff = splitoff; 6716 sp->ts_save_splitoff = splitoff;
6717 sp->ts_state = STATE_SPLITUNDO;
6718
6719 ++depth;
6720 sp = &stack[depth];
6625 6721
6626 /* Append a space to preword. */ 6722 /* Append a space to preword. */
6627 STRCAT(preword, " "); 6723 STRCAT(preword, " ");
6628 prewordlen = STRLEN(preword); 6724 prewordlen = STRLEN(preword);
6629 splitoff = sp->ts_twordlen; 6725 splitoff = sp->ts_twordlen;
6726
6727 /* If the badword has a non-word character at this
6728 * position skip it. That means replacing the
6729 * non-word character with a space. */
6730 if (!spell_iswordp_nmw(fword + sp->ts_fidx))
6731 {
6732 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
6733 #ifdef FEAT_MBYTE
6734 if (has_mbyte)
6735 sp->ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
6736 else
6737 #endif
6738 ++sp->ts_fidx;
6739 }
6630 #ifdef FEAT_MBYTE 6740 #ifdef FEAT_MBYTE
6631 if (has_mbyte) 6741 if (has_mbyte)
6632 { 6742 {
6633 int i = 0; 6743 int i = 0;
6634 6744
6635 /* Case-folding may change the number of bytes: 6745 /* Case-folding may change the number of bytes:
6636 * Count nr of chars in fword[sp->ts_fidx] and 6746 * Count nr of chars in fword[ts_fidx] and
6637 * advance that many chars in su->su_badptr. */ 6747 * advance that many chars in su->su_badptr. */
6638 for (p = fword; p < fword + sp->ts_fidx; 6748 for (p = fword; p < fword + sp->ts_fidx;
6639 mb_ptr_adv(p)) 6749 mb_ptr_adv(p))
6640 ++i; 6750 ++i;
6641 for (p = su->su_badptr; i > 0; mb_ptr_adv(p)) 6751 for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
6645 #endif 6755 #endif
6646 p = su->su_badptr + sp->ts_fidx; 6756 p = su->su_badptr + sp->ts_fidx;
6647 su->su_badflags = captype(p, su->su_badptr 6757 su->su_badflags = captype(p, su->su_badptr
6648 + su->su_badlen); 6758 + su->su_badlen);
6649 6759
6650 sp->ts_state = STATE_SPLITUNDO;
6651 ++depth;
6652 /* Restart at top of the tree. */ 6760 /* Restart at top of the tree. */
6653 stack[depth].ts_arridx = 0; 6761 sp->ts_arridx = 0;
6654 } 6762 }
6655 } 6763 }
6656 break; 6764 break;
6657 6765
6658 case STATE_SPLITUNDO: 6766 case STATE_SPLITUNDO:
7973 else if (i < 0) 8081 else if (i < 0)
7974 { 8082 {
7975 /* When replacing part of the word check that we actually change 8083 /* When replacing part of the word check that we actually change
7976 * something. For "the the" a suggestion can be replacing the first 8084 * something. For "the the" a suggestion can be replacing the first
7977 * "the" with itself, since "the" wasn't banned. */ 8085 * "the" with itself, since "the" wasn't banned. */
7978 if (badlen == STRLEN(goodword) 8086 if (badlen == (int)STRLEN(goodword)
7979 && STRNCMP(su->su_badword, goodword, badlen) == 0) 8087 && STRNCMP(su->su_badword, goodword, badlen) == 0)
7980 return; 8088 return;
7981 } 8089 }
7982 8090
7983 8091
7993 { 8101 {
7994 /* Found it. Remember the lowest score. */ 8102 /* Found it. Remember the lowest score. */
7995 if (stp[i].st_score > score) 8103 if (stp[i].st_score > score)
7996 { 8104 {
7997 stp[i].st_score = score; 8105 stp[i].st_score = score;
8106 stp[i].st_altscore = altscore;
7998 stp[i].st_had_bonus = had_bonus; 8107 stp[i].st_had_bonus = had_bonus;
7999 } 8108 }
8000 break; 8109 break;
8001 } 8110 }
8002 8111
8353 *t++ = ' '; 8462 *t++ = ' ';
8354 s = skipwhite(s); 8463 s = skipwhite(s);
8355 } 8464 }
8356 else 8465 else
8357 { 8466 {
8358 if (spell_iswordp(s)) 8467 if (spell_iswordp_nmw(s))
8359 *t++ = *s; 8468 *t++ = *s;
8360 ++s; 8469 ++s;
8361 } 8470 }
8362 } 8471 }
8363 *t = NUL; 8472 *t = NUL;
8431 s++; 8540 s++;
8432 8541
8433 if (*s == NUL 8542 if (*s == NUL
8434 || (*s == '^' 8543 || (*s == '^'
8435 && (i == 0 || !(word[i - 1] == ' ' 8544 && (i == 0 || !(word[i - 1] == ' '
8436 || spell_iswordp(word + i - 1))) 8545 || spell_iswordp(word + i - 1, curbuf)))
8437 && (*(s + 1) != '$' 8546 && (*(s + 1) != '$'
8438 || (!spell_iswordp(word + i + k0)))) 8547 || (!spell_iswordp(word + i + k0, curbuf))))
8439 || (*s == '$' && i > 0 8548 || (*s == '$' && i > 0
8440 && spell_iswordp(word + i - 1) 8549 && spell_iswordp(word + i - 1, curbuf)
8441 && (!spell_iswordp(word + i + k0)))) 8550 && (!spell_iswordp(word + i + k0, curbuf))))
8442 { 8551 {
8443 /* search for followup rules, if: */ 8552 /* search for followup rules, if: */
8444 /* followup and k > 1 and NO '-' in searchstring */ 8553 /* followup and k > 1 and NO '-' in searchstring */
8445 c0 = word[i + k - 1]; 8554 c0 = word[i + k - 1];
8446 n0 = slang->sl_sal_first[c0]; 8555 n0 = slang->sl_sal_first[c0];
8498 } 8607 }
8499 8608
8500 if (*s == NUL 8609 if (*s == NUL
8501 /* *s == '^' cuts */ 8610 /* *s == '^' cuts */
8502 || (*s == '$' 8611 || (*s == '$'
8503 && !spell_iswordp(word + i + k0))) 8612 && !spell_iswordp(word + i + k0,
8613 curbuf)))
8504 { 8614 {
8505 if (k0 == k) 8615 if (k0 == k)
8506 /* this is just a piece of the string */ 8616 /* this is just a piece of the string */
8507 continue; 8617 continue;
8508 8618
8645 did_white = TRUE; 8755 did_white = TRUE;
8646 } 8756 }
8647 else 8757 else
8648 { 8758 {
8649 did_white = FALSE; 8759 did_white = FALSE;
8650 if (!spell_iswordp(t)) 8760 if (!spell_iswordp_nmw(t))
8651 continue; 8761 continue;
8652 } 8762 }
8653 } 8763 }
8654 word[n++] = c; 8764 word[n++] = c;
8655 } 8765 }
8722 s++; 8832 s++;
8723 8833
8724 if (*s == NUL 8834 if (*s == NUL
8725 || (*s == '^' 8835 || (*s == '^'
8726 && (i == 0 || !(word[i - 1] == ' ' 8836 && (i == 0 || !(word[i - 1] == ' '
8727 || spell_iswordp_w(word + i - 1))) 8837 || spell_iswordp_w(word + i - 1, curbuf)))
8728 && (*(s + 1) != '$' 8838 && (*(s + 1) != '$'
8729 || (!spell_iswordp_w(word + i + k0)))) 8839 || (!spell_iswordp_w(word + i + k0, curbuf))))
8730 || (*s == '$' && i > 0 8840 || (*s == '$' && i > 0
8731 && spell_iswordp_w(word + i - 1) 8841 && spell_iswordp_w(word + i - 1, curbuf)
8732 && (!spell_iswordp_w(word + i + k0)))) 8842 && (!spell_iswordp_w(word + i + k0, curbuf))))
8733 { 8843 {
8734 /* search for followup rules, if: */ 8844 /* search for followup rules, if: */
8735 /* followup and k > 1 and NO '-' in searchstring */ 8845 /* followup and k > 1 and NO '-' in searchstring */
8736 c0 = word[i + k - 1]; 8846 c0 = word[i + k - 1];
8737 n0 = slang->sl_sal_first[c0 & 0xff]; 8847 n0 = slang->sl_sal_first[c0 & 0xff];
8793 } 8903 }
8794 8904
8795 if (*s == NUL 8905 if (*s == NUL
8796 /* *s == '^' cuts */ 8906 /* *s == '^' cuts */
8797 || (*s == '$' 8907 || (*s == '$'
8798 && !spell_iswordp_w(word + i + k0))) 8908 && !spell_iswordp_w(word + i + k0,
8909 curbuf)))
8799 { 8910 {
8800 if (k0 == k) 8911 if (k0 == k)
8801 /* this is just a piece of the string */ 8912 /* this is just a piece of the string */
8802 continue; 8913 continue;
8803 8914
9453 curi[depth] += i - 1; 9564 curi[depth] += i - 1;
9454 9565
9455 i = valid_word_prefix(i, n, prefid, word, slang); 9566 i = valid_word_prefix(i, n, prefid, word, slang);
9456 if (i != 0) 9567 if (i != 0)
9457 { 9568 {
9458 vim_strncpy(prefix + depth, word, MAXWLEN - depth); 9569 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
9459 dump_word(prefix, round, 9570 dump_word(prefix, round,
9460 (i & WF_RAREPFX) ? (flags | WF_RARE) 9571 (i & WF_RAREPFX) ? (flags | WF_RARE)
9461 : flags, lnum++); 9572 : flags, lnum++);
9462 } 9573 }
9463 } 9574 }