comparison src/spell.c @ 366:b498dee21bd3

updated for version 7.0095
author vimboss
date Mon, 27 Jun 2005 22:27:46 +0000
parents e111db373ca4
children a698eb686ded
comparison
equal deleted inserted replaced
365:bda4394122e0 366:b498dee21bd3
84 * 84 *
85 * <HEADER>: <fileID> 85 * <HEADER>: <fileID>
86 * <regioncnt> <regionname> ... 86 * <regioncnt> <regionname> ...
87 * <charflagslen> <charflags> 87 * <charflagslen> <charflags>
88 * <fcharslen> <fchars> 88 * <fcharslen> <fchars>
89 * <midwordlen> <midword>
89 * <prefcondcnt> <prefcond> ... 90 * <prefcondcnt> <prefcond> ...
90 * 91 *
91 * <fileID> 10 bytes "VIMspell07" 92 * <fileID> 10 bytes "VIMspell08"
92 * <regioncnt> 1 byte number of regions following (8 supported) 93 * <regioncnt> 1 byte number of regions following (8 supported)
93 * <regionname> 2 bytes Region name: ca, au, etc. Lower case. 94 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
94 * First <regionname> is region 1. 95 * First <regionname> is region 1.
95 * 96 *
96 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 97 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
97 * <charflags> N bytes List of flags (first one is for character 128): 98 * <charflags> N bytes List of flags (first one is for character 128):
98 * 0x01 word character CF_WORD 99 * 0x01 word character CF_WORD
99 * 0x02 upper-case character CF_UPPER 100 * 0x02 upper-case character CF_UPPER
100 * <fcharslen> 2 bytes Number of bytes in <fchars>. 101 * <fcharslen> 2 bytes Number of bytes in <fchars>.
101 * <fchars> N bytes Folded characters, first one is for character 128. 102 * <fchars> N bytes Folded characters, first one is for character 128.
103 *
104 * <midwordlen> 2 bytes Number of bytes in <midword>.
105 * <midword> N bytes Characters that are word characters only when used
106 * in the middle of a word.
102 * 107 *
103 * <prefcondcnt> 2 bytes Number of <prefcond> items following. 108 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
104 * 109 *
105 * <prefcond> : <condlen> <condstr> 110 * <prefcond> : <condlen> <condstr>
106 * 111 *
167 * | <prefixID> <prefcondnr> ] 172 * | <prefixID> <prefcondnr> ]
168 * 173 *
169 * <byte> 1 byte Byte value of the sibling. Special cases: 174 * <byte> 1 byte Byte value of the sibling. Special cases:
170 * BY_NOFLAGS: End of word without flags and for all 175 * BY_NOFLAGS: End of word without flags and for all
171 * regions. 176 * regions.
172 * BY_FLAGS: End of word, <flags> follow. For 177 * For PREFIXTREE <prefixID> and
173 * PREFIXTREE <prefixID> and <prefcondnr> 178 * <prefcondnr> follow.
174 * follow. 179 * BY_FLAGS: End of word, <flags> follow.
180 * For PREFIXTREE <prefixID> and
181 * <prefcondnr> follow for rare prefix.
175 * BY_INDEX: Child of sibling is shared, <nodeidx> 182 * BY_INDEX: Child of sibling is shared, <nodeidx>
176 * and <xbyte> follow. 183 * and <xbyte> follow.
177 * 184 *
178 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. 185 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
179 * 186 *
233 #define WF_PFX 0x20 /* prefix ID list follows */ 240 #define WF_PFX 0x20 /* prefix ID list follows */
234 #define WF_KEEPCAP 0x80 /* keep-case word */ 241 #define WF_KEEPCAP 0x80 /* keep-case word */
235 242
236 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) 243 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
237 244
245 #define WF_RAREPFX 0x1000000 /* in sl_pidxs: flag for rare postponed
246 prefix; must be above prefixID (one byte)
247 and prefcondnr (two bytes) */
248
238 #define BY_NOFLAGS 0 /* end of word without flags or region */ 249 #define BY_NOFLAGS 0 /* end of word without flags or region */
239 #define BY_FLAGS 1 /* end of word, flag byte follows */ 250 #define BY_FLAGS 1 /* end of word, flag byte follows */
240 #define BY_INDEX 2 /* child is shared, index follows */ 251 #define BY_INDEX 2 /* child is shared, index follows */
241 #define BY_SPECIAL BY_INDEX /* hightest special byte value */ 252 #define BY_SPECIAL BY_INDEX /* hightest special byte value */
242 253
341 #define SP_OK 0 352 #define SP_OK 0
342 #define SP_RARE 1 353 #define SP_RARE 1
343 #define SP_LOCAL 2 354 #define SP_LOCAL 2
344 #define SP_BAD 3 355 #define SP_BAD 3
345 356
346 #define VIMSPELLMAGIC "VIMspell07" /* string at start of Vim spell file */ 357 #define VIMSPELLMAGIC "VIMspell08" /* string at start of Vim spell file */
347 #define VIMSPELLMAGICL 10 358 #define VIMSPELLMAGICL 10
348 359
349 /* 360 /*
350 * Information used when looking for suggestions. 361 * Information used when looking for suggestions.
351 */ 362 */
397 #define SCORE_SIMILAR 33 /* substitute a similar character */ 408 #define SCORE_SIMILAR 33 /* substitute a similar character */
398 #define SCORE_DEL 94 /* delete a character */ 409 #define SCORE_DEL 94 /* delete a character */
399 #define SCORE_DELDUP 64 /* delete a duplicated character */ 410 #define SCORE_DELDUP 64 /* delete a duplicated character */
400 #define SCORE_INS 96 /* insert a character */ 411 #define SCORE_INS 96 /* insert a character */
401 #define SCORE_INSDUP 66 /* insert a duplicate character */ 412 #define SCORE_INSDUP 66 /* insert a duplicate character */
413 #define SCORE_NONWORD 103 /* change non-word to word char */
402 414
403 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. 415 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
404 * 350 allows for about three changes. */ 416 * 350 allows for about three changes. */
405 417
406 #define SCORE_BIG SCORE_INS * 3 /* big difference */ 418 #define SCORE_BIG SCORE_INS * 3 /* big difference */
447 char_u st_upper[256]; /* chars: upper case */ 459 char_u st_upper[256]; /* chars: upper case */
448 } spelltab_T; 460 } spelltab_T;
449 461
450 static spelltab_T spelltab; 462 static spelltab_T spelltab;
451 static int did_set_spelltab; 463 static int did_set_spelltab;
464 static char_u spell_ismw[256]; /* flags: is midword char */
465 #ifdef FEAT_MBYTE
466 static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */
467 #endif
452 468
453 #define CF_WORD 0x01 469 #define CF_WORD 0x01
454 #define CF_UPPER 0x02 470 #define CF_UPPER 0x02
455 471
456 static void clear_spell_chartab __ARGS((spelltab_T *sp)); 472 static void clear_spell_chartab __ARGS((spelltab_T *sp));
959 continue; 975 continue;
960 } 976 }
961 977
962 /* When mode is FIND_PREFIX the word must support the prefix: 978 /* When mode is FIND_PREFIX the word must support the prefix:
963 * check the prefix ID and the condition. Do that for the list at 979 * check the prefix ID and the condition. Do that for the list at
964 * mip->mi_prefarridx. */ 980 * mip->mi_prefarridx that find_prefix() filled. */
965 if (mode == FIND_PREFIX) 981 if (mode == FIND_PREFIX)
966 { 982 {
967 /* The prefix ID is stored two bytes above the flags. */ 983 /* The prefix ID is stored two bytes above the flags. */
968 prefid = (unsigned)flags >> 16; 984 prefid = (unsigned)flags >> 16;
969 if (!valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, 985 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
970 prefid, mip->mi_fword + mip->mi_prefixlen, 986 prefid, mip->mi_fword + mip->mi_prefixlen,
971 slang)) 987 slang);
988 if (c == 0)
972 continue; 989 continue;
990
991 /* Use the WF_RARE flag for a rare prefix. */
992 if (c & WF_RAREPFX)
993 flags |= WF_RARE;
973 } 994 }
974 995
975 if (flags & WF_BANNED) 996 if (flags & WF_BANNED)
976 res = SP_BANNED; 997 res = SP_BANNED;
977 else if (flags & WF_REGION) 998 else if (flags & WF_REGION)
1004 break; 1025 break;
1005 } 1026 }
1006 } 1027 }
1007 1028
1008 /* 1029 /*
1009 * Return TRUE if the prefix indicated by "mip->mi_prefarridx" matches with 1030 * Return non-zero if the prefix indicated by "mip->mi_prefarridx" matches
1010 * the prefix ID "prefid" for the word "word". 1031 * with the prefix ID "prefid" for the word "word".
1032 * The WF_RAREPFX flag is included in the return value for a rare prefix.
1011 */ 1033 */
1012 static int 1034 static int
1013 valid_word_prefix(totprefcnt, arridx, prefid, word, slang) 1035 valid_word_prefix(totprefcnt, arridx, prefid, word, slang)
1014 int totprefcnt; /* nr of prefix IDs */ 1036 int totprefcnt; /* nr of prefix IDs */
1015 int arridx; /* idx in sl_pidxs[] */ 1037 int arridx; /* idx in sl_pidxs[] */
1029 /* Check the prefix ID. */ 1051 /* Check the prefix ID. */
1030 if (prefid != (pidx & 0xff)) 1052 if (prefid != (pidx & 0xff))
1031 continue; 1053 continue;
1032 1054
1033 /* Check the condition, if there is one. The condition index is 1055 /* Check the condition, if there is one. The condition index is
1034 * stored above the prefix ID byte. */ 1056 * stored in the two bytes above the prefix ID byte. */
1035 rp = slang->sl_prefprog[(unsigned)pidx >> 8]; 1057 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1036 if (rp != NULL) 1058 if (rp != NULL)
1037 { 1059 {
1038 regmatch.regprog = rp; 1060 regmatch.regprog = rp;
1039 regmatch.rm_ic = FALSE; 1061 regmatch.rm_ic = FALSE;
1040 if (!vim_regexec(&regmatch, word, 0)) 1062 if (!vim_regexec(&regmatch, word, 0))
1041 continue; 1063 continue;
1042 } 1064 }
1043 1065
1044 /* It's a match! */ 1066 /* It's a match! Return the WF_RAREPFX flag. */
1045 return TRUE; 1067 return pidx;
1046 } 1068 }
1047 return FALSE; 1069 return 0;
1048 } 1070 }
1049 1071
1050 /* 1072 /*
1051 * Check if the word at "mip->mi_word" has a matching prefix. 1073 * Check if the word at "mip->mi_word" has a matching prefix.
1052 * If it does, then check the following word. 1074 * If it does, then check the following word.
1645 1667
1646 /* <HEADER>: <fileID> 1668 /* <HEADER>: <fileID>
1647 * <regioncnt> <regionname> ... 1669 * <regioncnt> <regionname> ...
1648 * <charflagslen> <charflags> 1670 * <charflagslen> <charflags>
1649 * <fcharslen> <fchars> 1671 * <fcharslen> <fchars>
1672 * <midwordlen> <midword>
1650 * <prefcondcnt> <prefcond> ... 1673 * <prefcondcnt> <prefcond> ...
1651 */ 1674 */
1652 for (i = 0; i < VIMSPELLMAGICL; ++i) 1675 for (i = 0; i < VIMSPELLMAGICL; ++i)
1653 buf[i] = getc(fd); /* <fileID> */ 1676 buf[i] = getc(fd); /* <fileID> */
1654 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 1677 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
1715 { 1738 {
1716 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ 1739 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
1717 cnt = (getc(fd) << 8) + getc(fd); 1740 cnt = (getc(fd) << 8) + getc(fd);
1718 if (cnt != 0) 1741 if (cnt != 0)
1719 goto formerr; 1742 goto formerr;
1743 }
1744
1745 /* <midwordlen> <midword> */
1746 cnt = (getc(fd) << 8) + getc(fd);
1747 if (cnt < 0)
1748 goto truncerr;
1749 if (cnt > 0)
1750 {
1751 for (i = 0; i < cnt; ++i)
1752 if (i < MAXWLEN) /* truncate at reasonable length */
1753 buf[i] = getc(fd);
1754 if (i < MAXWLEN)
1755 buf[i] = NUL;
1756 else
1757 buf[MAXWLEN] = NUL;
1758
1759 /* The midword characters add up to any midword characters from other
1760 * .spel files. */
1761 for (p = buf; *p != NUL; )
1762 #ifdef FEAT_MBYTE
1763 if (has_mbyte)
1764 {
1765 c = mb_ptr2char(p);
1766 i = mb_ptr2len_check(p);
1767 if (c < 256)
1768 spell_ismw[c] = TRUE;
1769 else if (spell_ismw_mb == NULL)
1770 /* First multi-byte char in "spell_ismw_mb". */
1771 spell_ismw_mb = vim_strnsave(p, i);
1772 else
1773 {
1774 /* Append multi-byte chars to "spell_ismw_mb". */
1775 n = STRLEN(spell_ismw_mb);
1776 bp = vim_strnsave(spell_ismw_mb, n + i);
1777 if (bp != NULL)
1778 {
1779 vim_free(spell_ismw_mb);
1780 spell_ismw_mb = bp;
1781 vim_strncpy(bp + n, p, i);
1782 }
1783 }
1784 p += i;
1785 }
1786 else
1787 #endif
1788 spell_ismw[*p++] = TRUE;
1720 } 1789 }
1721 1790
1722 /* <prefcondcnt> <prefcond> ... */ 1791 /* <prefcondcnt> <prefcond> ... */
1723 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ 1792 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
1724 if (cnt > 0) 1793 if (cnt > 0)
2002 int len; 2071 int len;
2003 int i; 2072 int i;
2004 int n; 2073 int n;
2005 idx_T idx = startidx; 2074 idx_T idx = startidx;
2006 int c; 2075 int c;
2076 int c2;
2007 #define SHARED_MASK 0x8000000 2077 #define SHARED_MASK 0x8000000
2008 2078
2009 len = getc(fd); /* <siblingcount> */ 2079 len = getc(fd); /* <siblingcount> */
2010 if (len <= 0) 2080 if (len <= 0)
2011 return -1; 2081 return -1;
2020 c = getc(fd); /* <byte> */ 2090 c = getc(fd); /* <byte> */
2021 if (c < 0) 2091 if (c < 0)
2022 return -1; 2092 return -1;
2023 if (c <= BY_SPECIAL) 2093 if (c <= BY_SPECIAL)
2024 { 2094 {
2025 if (c == BY_NOFLAGS) 2095 if (c == BY_NOFLAGS && !prefixtree)
2026 { 2096 {
2027 /* No flags, all regions. */ 2097 /* No flags, all regions. */
2028 idxs[idx] = 0; 2098 idxs[idx] = 0;
2029 c = 0; 2099 c = 0;
2030 } 2100 }
2031 else if (c == BY_FLAGS) 2101 else if (c == BY_FLAGS || c == BY_NOFLAGS)
2032 { 2102 {
2033 if (prefixtree) 2103 if (prefixtree)
2034 { 2104 {
2035 /* Read the prefix ID and the condition nr. In idxs[] 2105 /* Read the prefix ID and the condition nr. In idxs[]
2036 * store the prefix ID in the low byte, the condition 2106 * store the prefix ID in the low byte, the condition
2037 * index shifted up 8 bits. */ 2107 * index shifted up 8 bits. */
2038 c = getc(fd); /* <prefixID> */ 2108 c2 = getc(fd); /* <prefixID> */
2039 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ 2109 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */
2040 if (n >= maxprefcondnr) 2110 if (n >= maxprefcondnr)
2041 return -2; 2111 return -2;
2042 c = (n << 8) + c; 2112 c2 += (n << 8);
2113 if (c == BY_NOFLAGS)
2114 c = c2;
2115 else
2116 c = c2 | WF_RAREPFX;
2043 } 2117 }
2044 else 2118 else
2045 { 2119 {
2046 /* Read flags and optional region and prefix ID. In 2120 /* Read flags and optional region and prefix ID. In
2047 * idxs[] the flags go in the low byte, region above that 2121 * idxs[] the flags go in the low byte, region above that
2354 { 2428 {
2355 lp = first_lang; 2429 lp = first_lang;
2356 first_lang = lp->sl_next; 2430 first_lang = lp->sl_next;
2357 slang_free(lp); 2431 slang_free(lp);
2358 } 2432 }
2433
2434 init_spell_chartab();
2359 } 2435 }
2360 # endif 2436 # endif
2361 2437
2362 # if defined(FEAT_MBYTE) || defined(PROTO) 2438 # if defined(FEAT_MBYTE) || defined(PROTO)
2363 /* 2439 /*
2450 affentry_T *ae_next; /* next affix with same name/number */ 2526 affentry_T *ae_next; /* next affix with same name/number */
2451 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ 2527 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
2452 char_u *ae_add; /* text to add to basic word (can be NULL) */ 2528 char_u *ae_add; /* text to add to basic word (can be NULL) */
2453 char_u *ae_cond; /* condition (NULL for ".") */ 2529 char_u *ae_cond; /* condition (NULL for ".") */
2454 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ 2530 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
2531 int ae_rare; /* rare affix */
2455 }; 2532 };
2456 2533
2457 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ 2534 /* Affix header from ".aff" file. Used for af_pref and af_suff. */
2458 typedef struct affheader_S 2535 typedef struct affheader_S
2459 { 2536 {
2534 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 2611 garray_T si_sal; /* list of fromto_T entries from SAL lines */
2535 int si_followup; /* soundsalike: ? */ 2612 int si_followup; /* soundsalike: ? */
2536 int si_collapse; /* soundsalike: ? */ 2613 int si_collapse; /* soundsalike: ? */
2537 int si_rem_accents; /* soundsalike: remove accents */ 2614 int si_rem_accents; /* soundsalike: remove accents */
2538 garray_T si_map; /* MAP info concatenated */ 2615 garray_T si_map; /* MAP info concatenated */
2616 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */
2539 garray_T si_prefcond; /* table with conditions for postponed 2617 garray_T si_prefcond; /* table with conditions for postponed
2540 * prefixes, each stored as a string */ 2618 * prefixes, each stored as a string */
2541 int si_newID; /* current value for ah_newID */ 2619 int si_newID; /* current value for ah_newID */
2542 } spellinfo_T; 2620 } spellinfo_T;
2543 2621
2593 char_u *upp = NULL; 2671 char_u *upp = NULL;
2594 static char *e_affname = N_("Affix name too long in %s line %d: %s"); 2672 static char *e_affname = N_("Affix name too long in %s line %d: %s");
2595 int do_rep; 2673 int do_rep;
2596 int do_sal; 2674 int do_sal;
2597 int do_map; 2675 int do_map;
2676 int do_midword;
2598 int found_map = FALSE; 2677 int found_map = FALSE;
2599 hashitem_T *hi; 2678 hashitem_T *hi;
2600 2679
2601 /* 2680 /*
2602 * Open the file. 2681 * Open the file.
2610 2689
2611 if (spin->si_verbose || p_verbose > 2) 2690 if (spin->si_verbose || p_verbose > 2)
2612 { 2691 {
2613 if (!spin->si_verbose) 2692 if (!spin->si_verbose)
2614 verbose_enter(); 2693 verbose_enter();
2615 smsg((char_u *)_("Reading affix file %s..."), fname); 2694 smsg((char_u *)_("Reading affix file %s ..."), fname);
2616 out_flush(); 2695 out_flush();
2617 if (!spin->si_verbose) 2696 if (!spin->si_verbose)
2618 verbose_leave(); 2697 verbose_leave();
2619 } 2698 }
2620 2699
2624 /* Only do SAL lines when not done in another .aff file already. */ 2703 /* Only do SAL lines when not done in another .aff file already. */
2625 do_sal = spin->si_sal.ga_len == 0; 2704 do_sal = spin->si_sal.ga_len == 0;
2626 2705
2627 /* Only do MAP lines when not done in another .aff file already. */ 2706 /* Only do MAP lines when not done in another .aff file already. */
2628 do_map = spin->si_map.ga_len == 0; 2707 do_map = spin->si_map.ga_len == 0;
2708
2709 /* Only do MIDWORD line when not done in another .aff file already */
2710 do_midword = spin->si_midword == NULL;
2629 2711
2630 /* 2712 /*
2631 * Allocate and init the afffile_T structure. 2713 * Allocate and init the afffile_T structure.
2632 */ 2714 */
2633 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T)); 2715 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
2703 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), 2785 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
2704 fname, aff->af_enc, p_enc); 2786 fname, aff->af_enc, p_enc);
2705 #else 2787 #else
2706 smsg((char_u *)_("Conversion in %s not supported"), fname); 2788 smsg((char_u *)_("Conversion in %s not supported"), fname);
2707 #endif 2789 #endif
2790 }
2791 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2)
2792 {
2793 if (do_midword)
2794 spin->si_midword = vim_strsave(items[1]);
2708 } 2795 }
2709 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) 2796 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
2710 { 2797 {
2711 /* ignored, we always split */ 2798 /* ignored, we always split */
2712 } 2799 }
2791 && aff_todo > 0 2878 && aff_todo > 0
2792 && STRCMP(cur_aff->ah_key, items[1]) == 0 2879 && STRCMP(cur_aff->ah_key, items[1]) == 0
2793 && itemcnt >= 5) 2880 && itemcnt >= 5)
2794 { 2881 {
2795 affentry_T *aff_entry; 2882 affentry_T *aff_entry;
2883 int rare = FALSE;
2884 int lasti = 5;
2885
2886 /* Check for "rare" after the other info. */
2887 if (itemcnt > 5 && STRICMP(items[5], "rare") == 0)
2888 {
2889 rare = TRUE;
2890 lasti = 6;
2891 }
2796 2892
2797 /* Myspell allows extra text after the item, but that might 2893 /* Myspell allows extra text after the item, but that might
2798 * mean mistakes go unnoticed. Require a comment-starter. */ 2894 * mean mistakes go unnoticed. Require a comment-starter. */
2799 if (itemcnt > 5 && *items[5] != '#') 2895 if (itemcnt > lasti && *items[lasti] != '#')
2800 smsg((char_u *)_("Trailing text in %s line %d: %s"), 2896 smsg((char_u *)_("Trailing text in %s line %d: %s"),
2801 fname, lnum, items[5]); 2897 fname, lnum, items[lasti]);
2802 2898
2803 /* New item for an affix letter. */ 2899 /* New item for an affix letter. */
2804 --aff_todo; 2900 --aff_todo;
2805 aff_entry = (affentry_T *)getroom(&spin->si_blocks, 2901 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
2806 sizeof(affentry_T)); 2902 sizeof(affentry_T));
2807 if (aff_entry == NULL) 2903 if (aff_entry == NULL)
2808 break; 2904 break;
2905 aff_entry->ae_rare = rare;
2809 2906
2810 if (STRCMP(items[2], "0") != 0) 2907 if (STRCMP(items[2], "0") != 0)
2811 aff_entry->ae_chop = getroom_save(&spin->si_blocks, 2908 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
2812 items[2]); 2909 items[2]);
2813 if (STRCMP(items[3], "0") != 0) 2910 if (STRCMP(items[3], "0") != 0)
2866 /* Add the prefix to the prefix tree. */ 2963 /* Add the prefix to the prefix tree. */
2867 if (aff_entry->ae_add == NULL) 2964 if (aff_entry->ae_add == NULL)
2868 p = (char_u *)""; 2965 p = (char_u *)"";
2869 else 2966 else
2870 p = aff_entry->ae_add; 2967 p = aff_entry->ae_add;
2871 tree_add_word(p, spin->si_prefroot, -1, idx, 2968 tree_add_word(p, spin->si_prefroot, rare ? -2 : -1,
2872 cur_aff->ah_newID, &spin->si_blocks); 2969 idx, cur_aff->ah_newID, &spin->si_blocks);
2873 } 2970 }
2874 } 2971 }
2875 } 2972 }
2876 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2) 2973 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
2877 { 2974 {
3158 3255
3159 if (spin->si_verbose || p_verbose > 2) 3256 if (spin->si_verbose || p_verbose > 2)
3160 { 3257 {
3161 if (!spin->si_verbose) 3258 if (!spin->si_verbose)
3162 verbose_enter(); 3259 verbose_enter();
3163 smsg((char_u *)_("Reading dictionary file %s..."), fname); 3260 smsg((char_u *)_("Reading dictionary file %s ..."), fname);
3164 out_flush(); 3261 out_flush();
3165 if (!spin->si_verbose) 3262 if (!spin->si_verbose)
3166 verbose_leave(); 3263 verbose_leave();
3167 } 3264 }
3168 3265
3382 regmatch_T regmatch; 3479 regmatch_T regmatch;
3383 char_u newword[MAXWLEN]; 3480 char_u newword[MAXWLEN];
3384 int retval = OK; 3481 int retval = OK;
3385 int i; 3482 int i;
3386 char_u *p; 3483 char_u *p;
3484 int use_flags;
3387 3485
3388 todo = ht->ht_used; 3486 todo = ht->ht_used;
3389 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) 3487 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
3390 { 3488 {
3391 if (!HASHITEM_EMPTY(hi)) 3489 if (!HASHITEM_EMPTY(hi))
3458 } 3556 }
3459 if (ae->ae_add != NULL) 3557 if (ae->ae_add != NULL)
3460 STRCAT(newword, ae->ae_add); 3558 STRCAT(newword, ae->ae_add);
3461 } 3559 }
3462 3560
3561 /* Obey the "rare" flag of the affix. */
3562 if (ae->ae_rare)
3563 use_flags = flags | WF_RARE;
3564 else
3565 use_flags = flags;
3566
3463 /* Store the modified word. */ 3567 /* Store the modified word. */
3464 if (store_word(newword, spin, 3568 if (store_word(newword, spin, use_flags,
3465 flags, spin->si_region, pfxlist) == FAIL) 3569 spin->si_region, pfxlist) == FAIL)
3466 retval = FAIL; 3570 retval = FAIL;
3467 3571
3468 /* When added a suffix and combining is allowed also 3572 /* When added a suffix and combining is allowed also
3469 * try adding prefixes additionally. */ 3573 * try adding prefixes additionally. */
3470 if (xht != NULL && ah->ah_combine) 3574 if (xht != NULL && ah->ah_combine)
3471 if (store_aff_word(newword, spin, afflist, affile, 3575 if (store_aff_word(newword, spin, afflist, affile,
3472 xht, NULL, TRUE, flags, pfxlist) == FAIL) 3576 xht, NULL, TRUE, use_flags, pfxlist)
3577 == FAIL)
3473 retval = FAIL; 3578 retval = FAIL;
3474 } 3579 }
3475 } 3580 }
3476 } 3581 }
3477 } 3582 }
3512 3617
3513 if (spin->si_verbose || p_verbose > 2) 3618 if (spin->si_verbose || p_verbose > 2)
3514 { 3619 {
3515 if (!spin->si_verbose) 3620 if (!spin->si_verbose)
3516 verbose_enter(); 3621 verbose_enter();
3517 smsg((char_u *)_("Reading word file %s..."), fname); 3622 smsg((char_u *)_("Reading word file %s ..."), fname);
3518 out_flush(); 3623 out_flush();
3519 if (!spin->si_verbose) 3624 if (!spin->si_verbose)
3520 verbose_leave(); 3625 verbose_leave();
3521 } 3626 }
3522 3627
3815 return res; 3920 return res;
3816 } 3921 }
3817 3922
3818 /* 3923 /*
3819 * Add word "word" to a word tree at "root". 3924 * Add word "word" to a word tree at "root".
3820 * When "flags" is -1 we are adding to the prefix tree where flags don't 3925 * When "flags" < 0 we are adding to the prefix tree where flags is used for
3821 * matter and "region" is the condition nr. 3926 * "rare" and "region" is the condition nr.
3822 * Returns FAIL when out of memory. 3927 * Returns FAIL when out of memory.
3823 */ 3928 */
3824 static int 3929 static int
3825 tree_add_word(word, root, flags, region, prefixID, blp) 3930 tree_add_word(word, root, flags, region, prefixID, blp)
3826 char_u *word; 3931 char_u *word;
4107 } 4212 }
4108 4213
4109 /* <HEADER>: <fileID> <regioncnt> <regionname> ... 4214 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
4110 * <charflagslen> <charflags> 4215 * <charflagslen> <charflags>
4111 * <fcharslen> <fchars> 4216 * <fcharslen> <fchars>
4217 * <midwordlen> <midword>
4112 * <prefcondcnt> <prefcond> ... */ 4218 * <prefcondcnt> <prefcond> ... */
4113 4219
4114 /* <fileID> */ 4220 /* <fileID> */
4115 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) 4221 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
4116 EMSG(_(e_write)); 4222 EMSG(_(e_write));
4144 putc(0, fd); 4250 putc(0, fd);
4145 } 4251 }
4146 else 4252 else
4147 write_spell_chartab(fd); 4253 write_spell_chartab(fd);
4148 4254
4255
4256 if (spin->si_midword == NULL)
4257 put_bytes(fd, 0L, 2); /* <midwordlen> */
4258 else
4259 {
4260 i = STRLEN(spin->si_midword);
4261 put_bytes(fd, (long_u)i, 2); /* <midwordlen> */
4262 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
4263 }
4264
4265
4149 /* Write the prefix conditions. */ 4266 /* Write the prefix conditions. */
4150 write_spell_prefcond(fd, &spin->si_prefcond); 4267 write_spell_prefcond(fd, &spin->si_prefcond);
4268
4269 /* <SUGGEST> : <repcount> <rep> ...
4270 * <salflags> <salcount> <sal> ...
4271 * <maplen> <mapstr> */
4151 4272
4152 /* Sort the REP items. */ 4273 /* Sort the REP items. */
4153 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, 4274 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
4154 sizeof(fromto_T), rep_compare); 4275 sizeof(fromto_T), rep_compare);
4155 4276
4156 /* <SUGGEST> : <repcount> <rep> ...
4157 * <salflags> <salcount> <sal> ...
4158 * <maplen> <mapstr> */
4159 for (round = 1; round <= 2; ++round) 4277 for (round = 1; round <= 2; ++round)
4160 { 4278 {
4161 if (round == 1) 4279 if (round == 1)
4162 gap = &spin->si_rep; 4280 gap = &spin->si_rep;
4163 else 4281 else
4300 /* For a NUL byte (end of word) write the flags etc. */ 4418 /* For a NUL byte (end of word) write the flags etc. */
4301 if (prefixtree) 4419 if (prefixtree)
4302 { 4420 {
4303 /* In PREFIXTREE write the required prefixID and the 4421 /* In PREFIXTREE write the required prefixID and the
4304 * associated condition nr (stored in wn_region). */ 4422 * associated condition nr (stored in wn_region). */
4305 putc(BY_FLAGS, fd); /* <byte> */ 4423 if (np->wn_flags == (char_u)-2)
4424 putc(BY_FLAGS, fd); /* <byte> rare */
4425 else
4426 putc(BY_NOFLAGS, fd); /* <byte> */
4306 putc(np->wn_prefixID, fd); /* <prefixID> */ 4427 putc(np->wn_prefixID, fd); /* <prefixID> */
4307 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ 4428 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
4308 } 4429 }
4309 else 4430 else
4310 { 4431 {
4447 * "path/en.latin1.add.spl". */ 4568 * "path/en.latin1.add.spl". */
4448 innames = &fnames[0]; 4569 innames = &fnames[0];
4449 incount = 1; 4570 incount = 1;
4450 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); 4571 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
4451 } 4572 }
4573 else if (fcount == 1)
4574 {
4575 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
4576 innames = &fnames[0];
4577 incount = 1;
4578 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
4579 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
4580 }
4452 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) 4581 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
4453 { 4582 {
4454 /* Name ends in ".spl", use as the file name. */ 4583 /* Name ends in ".spl", use as the file name. */
4455 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); 4584 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
4456 } 4585 }
4606 */ 4735 */
4607 if (!added_word || p_verbose > 2) 4736 if (!added_word || p_verbose > 2)
4608 { 4737 {
4609 if (added_word) 4738 if (added_word)
4610 verbose_enter(); 4739 verbose_enter();
4611 smsg((char_u *)_("Writing spell file %s..."), wfname); 4740 smsg((char_u *)_("Writing spell file %s ..."), wfname);
4612 out_flush(); 4741 out_flush();
4613 if (added_word) 4742 if (added_word)
4614 verbose_leave(); 4743 verbose_leave();
4615 } 4744 }
4616 4745
4635 /* Free the allocated memory. */ 4764 /* Free the allocated memory. */
4636 ga_clear(&spin.si_rep); 4765 ga_clear(&spin.si_rep);
4637 ga_clear(&spin.si_sal); 4766 ga_clear(&spin.si_sal);
4638 ga_clear(&spin.si_map); 4767 ga_clear(&spin.si_map);
4639 ga_clear(&spin.si_prefcond); 4768 ga_clear(&spin.si_prefcond);
4769 vim_free(spin.si_midword);
4640 4770
4641 /* Free the .aff file structures. */ 4771 /* Free the .aff file structures. */
4642 for (i = 0; i < incount; ++i) 4772 for (i = 0; i < incount; ++i)
4643 if (afile[i] != NULL) 4773 if (afile[i] != NULL)
4644 spell_free_aff(afile[i]); 4774 spell_free_aff(afile[i]);
4827 { 4957 {
4828 int i; 4958 int i;
4829 4959
4830 did_set_spelltab = FALSE; 4960 did_set_spelltab = FALSE;
4831 clear_spell_chartab(&spelltab); 4961 clear_spell_chartab(&spelltab);
4832 4962 vim_memset(spell_ismw, FALSE, sizeof(spell_ismw));
4833 #ifdef FEAT_MBYTE 4963 #ifdef FEAT_MBYTE
4964 vim_free(spell_ismw_mb);
4965 spell_ismw_mb = NULL;
4966
4834 if (enc_dbcs) 4967 if (enc_dbcs)
4835 { 4968 {
4836 /* DBCS: assume double-wide characters are word characters. */ 4969 /* DBCS: assume double-wide characters are word characters. */
4837 for (i = 128; i <= 255; ++i) 4970 for (i = 128; i <= 255; ++i)
4838 if (MB_BYTE2LEN(i) == 2) 4971 if (MB_BYTE2LEN(i) == 2)
5019 return OK; 5152 return OK;
5020 } 5153 }
5021 5154
5022 /* 5155 /*
5023 * Return TRUE if "p" points to a word character. 5156 * Return TRUE if "p" points to a word character.
5024 * As a special case we see a single quote as a word character when it is 5157 * As a special case we see "midword" characters as word character when it is
5025 * followed by a word character. This finds they'there but not 'they there'. 5158 * followed by a word character. This finds they'there but not 'they there'.
5159 * Thus this only works properly when past the first character of the word.
5026 */ 5160 */
5027 static int 5161 static int
5028 spell_iswordp(p) 5162 spell_iswordp(p)
5029 char_u *p; 5163 char_u *p;
5030 { 5164 {
5165 #ifdef FEAT_MBYTE
5031 char_u *s; 5166 char_u *s;
5032 5167 int l;
5033 if (*p == '\'') 5168 int c;
5034 s = p + 1; 5169
5035 else 5170 if (has_mbyte)
5171 {
5172 l = MB_BYTE2LEN(*p);
5036 s = p; 5173 s = p;
5037 #ifdef FEAT_MBYTE 5174 if (l == 1)
5038 if (has_mbyte && MB_BYTE2LEN(*s) > 1) 5175 {
5039 return mb_get_class(s) >= 2; 5176 /* be quick for ASCII */
5177 if (spell_ismw[*p])
5178 {
5179 s = p + 1; /* skip a mid-word character */
5180 l = MB_BYTE2LEN(*s);
5181 }
5182 }
5183 else
5184 {
5185 c = mb_ptr2char(p);
5186 if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL
5187 && vim_strchr(spell_ismw_mb, c) != NULL))
5188 {
5189 s = p + l;
5190 l = MB_BYTE2LEN(*s);
5191 }
5192 }
5193
5194 if (l > 1)
5195 return mb_get_class(s) >= 2;
5196 return spelltab.st_isw[*s];
5197 }
5040 #endif 5198 #endif
5041 return spelltab.st_isw[*s]; 5199
5200 return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]];
5042 } 5201 }
5043 5202
5044 /* 5203 /*
5045 * Write the table with prefix conditions to the .spl file. 5204 * Write the table with prefix conditions to the .spl file.
5046 */ 5205 */
5716 5875
5717 if ((fword[sp->ts_fidx] == NUL 5876 if ((fword[sp->ts_fidx] == NUL
5718 || !spell_iswordp(fword + sp->ts_fidx)) 5877 || !spell_iswordp(fword + sp->ts_fidx))
5719 && sp->ts_fidx >= sp->ts_fidxtry) 5878 && sp->ts_fidx >= sp->ts_fidxtry)
5720 { 5879 {
5721 /* The badword also ends: add suggestions, */ 5880 /* The badword also ends: add suggestions. Give a penalty
5881 * when changing non-word char to word char, e.g., "thes,"
5882 * -> "these". */
5883 p = fword + sp->ts_fidx;
5884 #ifdef FEAT_MBYTE
5885 if (has_mbyte)
5886 mb_ptr_back(fword, p);
5887 else
5888 #endif
5889 --p;
5890 if (!spell_iswordp(p))
5891 {
5892 p = preword + STRLEN(preword);
5893 #ifdef FEAT_MBYTE
5894 if (has_mbyte)
5895 mb_ptr_back(preword, p);
5896 else
5897 #endif
5898 --p;
5899 if (spell_iswordp(p))
5900 newscore += SCORE_NONWORD;
5901 }
5902
5722 add_suggestion(su, &su->su_ga, preword, 5903 add_suggestion(su, &su->su_ga, preword,
5723 sp->ts_fidx - repextra, 5904 sp->ts_fidx - repextra,
5724 sp->ts_score + newscore, 0, FALSE); 5905 sp->ts_score + newscore, 0, FALSE);
5725 } 5906 }
5726 else if (sp->ts_fidx >= sp->ts_fidxtry 5907 else if (sp->ts_fidx >= sp->ts_fidxtry
7091 p = NULL; 7272 p = NULL;
7092 } 7273 }
7093 7274
7094 if (score <= su->su_maxscore) 7275 if (score <= su->su_maxscore)
7095 { 7276 {
7096 /* Check if the word is already there. */ 7277 /* Check if the word is already there. Also check the length that is
7278 * being replaced "thes," -> "these" is a different suggestion from
7279 * "thes" -> "these". */
7097 stp = &SUG(*gap, 0); 7280 stp = &SUG(*gap, 0);
7098 for (i = gap->ga_len - 1; i >= 0; --i) 7281 for (i = gap->ga_len - 1; i >= 0; --i)
7099 if (STRCMP(stp[i].st_word, goodword) == 0) 7282 if (STRCMP(stp[i].st_word, goodword) == 0
7283 && stp[i].st_orglen == badlen)
7100 { 7284 {
7101 /* Found it. Remember the lowest score. */ 7285 /* Found it. Remember the lowest score. */
7102 if (stp[i].st_score > score) 7286 if (stp[i].st_score > score)
7103 { 7287 {
7104 stp[i].st_score = score; 7288 stp[i].st_score = score;
8162 for (i = 1; i < len; ++i) 8346 for (i = 1; i < len; ++i)
8163 if (byts[n + i] != 0) 8347 if (byts[n + i] != 0)
8164 break; 8348 break;
8165 curi[depth] += i - 1; 8349 curi[depth] += i - 1;
8166 8350
8167 if (valid_word_prefix(i, n, prefid, word, slang)) 8351 i = valid_word_prefix(i, n, prefid, word, slang);
8352 if (i != 0)
8168 { 8353 {
8169 vim_strncpy(prefix + depth, word, MAXWLEN - depth); 8354 vim_strncpy(prefix + depth, word, MAXWLEN - depth);
8170 dump_word(prefix, round, flags, lnum++); 8355 dump_word(prefix, round,
8356 (i & WF_RAREPFX) ? (flags | WF_RARE)
8357 : flags, lnum++);
8171 } 8358 }
8172 } 8359 }
8173 else 8360 else
8174 { 8361 {
8175 /* Normal char, go one level deeper. */ 8362 /* Normal char, go one level deeper. */