Mercurial > vim
comparison src/spell.c @ 366:b498dee21bd3
updated for version 7.0095
author | vimboss |
---|---|
date | Mon, 27 Jun 2005 22:27:46 +0000 |
parents | e111db373ca4 |
children | a698eb686ded |
comparison
equal
deleted
inserted
replaced
365:bda4394122e0 | 366:b498dee21bd3 |
---|---|
84 * | 84 * |
85 * <HEADER>: <fileID> | 85 * <HEADER>: <fileID> |
86 * <regioncnt> <regionname> ... | 86 * <regioncnt> <regionname> ... |
87 * <charflagslen> <charflags> | 87 * <charflagslen> <charflags> |
88 * <fcharslen> <fchars> | 88 * <fcharslen> <fchars> |
89 * <midwordlen> <midword> | |
89 * <prefcondcnt> <prefcond> ... | 90 * <prefcondcnt> <prefcond> ... |
90 * | 91 * |
91 * <fileID> 10 bytes "VIMspell07" | 92 * <fileID> 10 bytes "VIMspell08" |
92 * <regioncnt> 1 byte number of regions following (8 supported) | 93 * <regioncnt> 1 byte number of regions following (8 supported) |
93 * <regionname> 2 bytes Region name: ca, au, etc. Lower case. | 94 * <regionname> 2 bytes Region name: ca, au, etc. Lower case. |
94 * First <regionname> is region 1. | 95 * First <regionname> is region 1. |
95 * | 96 * |
96 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). | 97 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). |
97 * <charflags> N bytes List of flags (first one is for character 128): | 98 * <charflags> N bytes List of flags (first one is for character 128): |
98 * 0x01 word character CF_WORD | 99 * 0x01 word character CF_WORD |
99 * 0x02 upper-case character CF_UPPER | 100 * 0x02 upper-case character CF_UPPER |
100 * <fcharslen> 2 bytes Number of bytes in <fchars>. | 101 * <fcharslen> 2 bytes Number of bytes in <fchars>. |
101 * <fchars> N bytes Folded characters, first one is for character 128. | 102 * <fchars> N bytes Folded characters, first one is for character 128. |
103 * | |
104 * <midwordlen> 2 bytes Number of bytes in <midword>. | |
105 * <midword> N bytes Characters that are word characters only when used | |
106 * in the middle of a word. | |
102 * | 107 * |
103 * <prefcondcnt> 2 bytes Number of <prefcond> items following. | 108 * <prefcondcnt> 2 bytes Number of <prefcond> items following. |
104 * | 109 * |
105 * <prefcond> : <condlen> <condstr> | 110 * <prefcond> : <condlen> <condstr> |
106 * | 111 * |
167 * | <prefixID> <prefcondnr> ] | 172 * | <prefixID> <prefcondnr> ] |
168 * | 173 * |
169 * <byte> 1 byte Byte value of the sibling. Special cases: | 174 * <byte> 1 byte Byte value of the sibling. Special cases: |
170 * BY_NOFLAGS: End of word without flags and for all | 175 * BY_NOFLAGS: End of word without flags and for all |
171 * regions. | 176 * regions. |
172 * BY_FLAGS: End of word, <flags> follow. For | 177 * For PREFIXTREE <prefixID> and |
173 * PREFIXTREE <prefixID> and <prefcondnr> | 178 * <prefcondnr> follow. |
174 * follow. | 179 * BY_FLAGS: End of word, <flags> follow. |
180 * For PREFIXTREE <prefixID> and | |
181 * <prefcondnr> follow for rare prefix. | |
175 * BY_INDEX: Child of sibling is shared, <nodeidx> | 182 * BY_INDEX: Child of sibling is shared, <nodeidx> |
176 * and <xbyte> follow. | 183 * and <xbyte> follow. |
177 * | 184 * |
178 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. | 185 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. |
179 * | 186 * |
233 #define WF_PFX 0x20 /* prefix ID list follows */ | 240 #define WF_PFX 0x20 /* prefix ID list follows */ |
234 #define WF_KEEPCAP 0x80 /* keep-case word */ | 241 #define WF_KEEPCAP 0x80 /* keep-case word */ |
235 | 242 |
236 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) | 243 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) |
237 | 244 |
245 #define WF_RAREPFX 0x1000000 /* in sl_pidxs: flag for rare postponed | |
246 prefix; must be above prefixID (one byte) | |
247 and prefcondnr (two bytes) */ | |
248 | |
238 #define BY_NOFLAGS 0 /* end of word without flags or region */ | 249 #define BY_NOFLAGS 0 /* end of word without flags or region */ |
239 #define BY_FLAGS 1 /* end of word, flag byte follows */ | 250 #define BY_FLAGS 1 /* end of word, flag byte follows */ |
240 #define BY_INDEX 2 /* child is shared, index follows */ | 251 #define BY_INDEX 2 /* child is shared, index follows */ |
241 #define BY_SPECIAL BY_INDEX /* hightest special byte value */ | 252 #define BY_SPECIAL BY_INDEX /* hightest special byte value */ |
242 | 253 |
341 #define SP_OK 0 | 352 #define SP_OK 0 |
342 #define SP_RARE 1 | 353 #define SP_RARE 1 |
343 #define SP_LOCAL 2 | 354 #define SP_LOCAL 2 |
344 #define SP_BAD 3 | 355 #define SP_BAD 3 |
345 | 356 |
346 #define VIMSPELLMAGIC "VIMspell07" /* string at start of Vim spell file */ | 357 #define VIMSPELLMAGIC "VIMspell08" /* string at start of Vim spell file */ |
347 #define VIMSPELLMAGICL 10 | 358 #define VIMSPELLMAGICL 10 |
348 | 359 |
349 /* | 360 /* |
350 * Information used when looking for suggestions. | 361 * Information used when looking for suggestions. |
351 */ | 362 */ |
397 #define SCORE_SIMILAR 33 /* substitute a similar character */ | 408 #define SCORE_SIMILAR 33 /* substitute a similar character */ |
398 #define SCORE_DEL 94 /* delete a character */ | 409 #define SCORE_DEL 94 /* delete a character */ |
399 #define SCORE_DELDUP 64 /* delete a duplicated character */ | 410 #define SCORE_DELDUP 64 /* delete a duplicated character */ |
400 #define SCORE_INS 96 /* insert a character */ | 411 #define SCORE_INS 96 /* insert a character */ |
401 #define SCORE_INSDUP 66 /* insert a duplicate character */ | 412 #define SCORE_INSDUP 66 /* insert a duplicate character */ |
413 #define SCORE_NONWORD 103 /* change non-word to word char */ | |
402 | 414 |
403 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. | 415 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. |
404 * 350 allows for about three changes. */ | 416 * 350 allows for about three changes. */ |
405 | 417 |
406 #define SCORE_BIG SCORE_INS * 3 /* big difference */ | 418 #define SCORE_BIG SCORE_INS * 3 /* big difference */ |
447 char_u st_upper[256]; /* chars: upper case */ | 459 char_u st_upper[256]; /* chars: upper case */ |
448 } spelltab_T; | 460 } spelltab_T; |
449 | 461 |
450 static spelltab_T spelltab; | 462 static spelltab_T spelltab; |
451 static int did_set_spelltab; | 463 static int did_set_spelltab; |
464 static char_u spell_ismw[256]; /* flags: is midword char */ | |
465 #ifdef FEAT_MBYTE | |
466 static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */ | |
467 #endif | |
452 | 468 |
453 #define CF_WORD 0x01 | 469 #define CF_WORD 0x01 |
454 #define CF_UPPER 0x02 | 470 #define CF_UPPER 0x02 |
455 | 471 |
456 static void clear_spell_chartab __ARGS((spelltab_T *sp)); | 472 static void clear_spell_chartab __ARGS((spelltab_T *sp)); |
959 continue; | 975 continue; |
960 } | 976 } |
961 | 977 |
962 /* When mode is FIND_PREFIX the word must support the prefix: | 978 /* When mode is FIND_PREFIX the word must support the prefix: |
963 * check the prefix ID and the condition. Do that for the list at | 979 * check the prefix ID and the condition. Do that for the list at |
964 * mip->mi_prefarridx. */ | 980 * mip->mi_prefarridx that find_prefix() filled. */ |
965 if (mode == FIND_PREFIX) | 981 if (mode == FIND_PREFIX) |
966 { | 982 { |
967 /* The prefix ID is stored two bytes above the flags. */ | 983 /* The prefix ID is stored two bytes above the flags. */ |
968 prefid = (unsigned)flags >> 16; | 984 prefid = (unsigned)flags >> 16; |
969 if (!valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, | 985 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, |
970 prefid, mip->mi_fword + mip->mi_prefixlen, | 986 prefid, mip->mi_fword + mip->mi_prefixlen, |
971 slang)) | 987 slang); |
988 if (c == 0) | |
972 continue; | 989 continue; |
990 | |
991 /* Use the WF_RARE flag for a rare prefix. */ | |
992 if (c & WF_RAREPFX) | |
993 flags |= WF_RARE; | |
973 } | 994 } |
974 | 995 |
975 if (flags & WF_BANNED) | 996 if (flags & WF_BANNED) |
976 res = SP_BANNED; | 997 res = SP_BANNED; |
977 else if (flags & WF_REGION) | 998 else if (flags & WF_REGION) |
1004 break; | 1025 break; |
1005 } | 1026 } |
1006 } | 1027 } |
1007 | 1028 |
1008 /* | 1029 /* |
1009 * Return TRUE if the prefix indicated by "mip->mi_prefarridx" matches with | 1030 * Return non-zero if the prefix indicated by "mip->mi_prefarridx" matches |
1010 * the prefix ID "prefid" for the word "word". | 1031 * with the prefix ID "prefid" for the word "word". |
1032 * The WF_RAREPFX flag is included in the return value for a rare prefix. | |
1011 */ | 1033 */ |
1012 static int | 1034 static int |
1013 valid_word_prefix(totprefcnt, arridx, prefid, word, slang) | 1035 valid_word_prefix(totprefcnt, arridx, prefid, word, slang) |
1014 int totprefcnt; /* nr of prefix IDs */ | 1036 int totprefcnt; /* nr of prefix IDs */ |
1015 int arridx; /* idx in sl_pidxs[] */ | 1037 int arridx; /* idx in sl_pidxs[] */ |
1029 /* Check the prefix ID. */ | 1051 /* Check the prefix ID. */ |
1030 if (prefid != (pidx & 0xff)) | 1052 if (prefid != (pidx & 0xff)) |
1031 continue; | 1053 continue; |
1032 | 1054 |
1033 /* Check the condition, if there is one. The condition index is | 1055 /* Check the condition, if there is one. The condition index is |
1034 * stored above the prefix ID byte. */ | 1056 * stored in the two bytes above the prefix ID byte. */ |
1035 rp = slang->sl_prefprog[(unsigned)pidx >> 8]; | 1057 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; |
1036 if (rp != NULL) | 1058 if (rp != NULL) |
1037 { | 1059 { |
1038 regmatch.regprog = rp; | 1060 regmatch.regprog = rp; |
1039 regmatch.rm_ic = FALSE; | 1061 regmatch.rm_ic = FALSE; |
1040 if (!vim_regexec(®match, word, 0)) | 1062 if (!vim_regexec(®match, word, 0)) |
1041 continue; | 1063 continue; |
1042 } | 1064 } |
1043 | 1065 |
1044 /* It's a match! */ | 1066 /* It's a match! Return the WF_RAREPFX flag. */ |
1045 return TRUE; | 1067 return pidx; |
1046 } | 1068 } |
1047 return FALSE; | 1069 return 0; |
1048 } | 1070 } |
1049 | 1071 |
1050 /* | 1072 /* |
1051 * Check if the word at "mip->mi_word" has a matching prefix. | 1073 * Check if the word at "mip->mi_word" has a matching prefix. |
1052 * If it does, then check the following word. | 1074 * If it does, then check the following word. |
1645 | 1667 |
1646 /* <HEADER>: <fileID> | 1668 /* <HEADER>: <fileID> |
1647 * <regioncnt> <regionname> ... | 1669 * <regioncnt> <regionname> ... |
1648 * <charflagslen> <charflags> | 1670 * <charflagslen> <charflags> |
1649 * <fcharslen> <fchars> | 1671 * <fcharslen> <fchars> |
1672 * <midwordlen> <midword> | |
1650 * <prefcondcnt> <prefcond> ... | 1673 * <prefcondcnt> <prefcond> ... |
1651 */ | 1674 */ |
1652 for (i = 0; i < VIMSPELLMAGICL; ++i) | 1675 for (i = 0; i < VIMSPELLMAGICL; ++i) |
1653 buf[i] = getc(fd); /* <fileID> */ | 1676 buf[i] = getc(fd); /* <fileID> */ |
1654 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) | 1677 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) |
1715 { | 1738 { |
1716 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ | 1739 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ |
1717 cnt = (getc(fd) << 8) + getc(fd); | 1740 cnt = (getc(fd) << 8) + getc(fd); |
1718 if (cnt != 0) | 1741 if (cnt != 0) |
1719 goto formerr; | 1742 goto formerr; |
1743 } | |
1744 | |
1745 /* <midwordlen> <midword> */ | |
1746 cnt = (getc(fd) << 8) + getc(fd); | |
1747 if (cnt < 0) | |
1748 goto truncerr; | |
1749 if (cnt > 0) | |
1750 { | |
1751 for (i = 0; i < cnt; ++i) | |
1752 if (i < MAXWLEN) /* truncate at reasonable length */ | |
1753 buf[i] = getc(fd); | |
1754 if (i < MAXWLEN) | |
1755 buf[i] = NUL; | |
1756 else | |
1757 buf[MAXWLEN] = NUL; | |
1758 | |
1759 /* The midword characters add up to any midword characters from other | |
1760 * .spel files. */ | |
1761 for (p = buf; *p != NUL; ) | |
1762 #ifdef FEAT_MBYTE | |
1763 if (has_mbyte) | |
1764 { | |
1765 c = mb_ptr2char(p); | |
1766 i = mb_ptr2len_check(p); | |
1767 if (c < 256) | |
1768 spell_ismw[c] = TRUE; | |
1769 else if (spell_ismw_mb == NULL) | |
1770 /* First multi-byte char in "spell_ismw_mb". */ | |
1771 spell_ismw_mb = vim_strnsave(p, i); | |
1772 else | |
1773 { | |
1774 /* Append multi-byte chars to "spell_ismw_mb". */ | |
1775 n = STRLEN(spell_ismw_mb); | |
1776 bp = vim_strnsave(spell_ismw_mb, n + i); | |
1777 if (bp != NULL) | |
1778 { | |
1779 vim_free(spell_ismw_mb); | |
1780 spell_ismw_mb = bp; | |
1781 vim_strncpy(bp + n, p, i); | |
1782 } | |
1783 } | |
1784 p += i; | |
1785 } | |
1786 else | |
1787 #endif | |
1788 spell_ismw[*p++] = TRUE; | |
1720 } | 1789 } |
1721 | 1790 |
1722 /* <prefcondcnt> <prefcond> ... */ | 1791 /* <prefcondcnt> <prefcond> ... */ |
1723 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ | 1792 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */ |
1724 if (cnt > 0) | 1793 if (cnt > 0) |
2002 int len; | 2071 int len; |
2003 int i; | 2072 int i; |
2004 int n; | 2073 int n; |
2005 idx_T idx = startidx; | 2074 idx_T idx = startidx; |
2006 int c; | 2075 int c; |
2076 int c2; | |
2007 #define SHARED_MASK 0x8000000 | 2077 #define SHARED_MASK 0x8000000 |
2008 | 2078 |
2009 len = getc(fd); /* <siblingcount> */ | 2079 len = getc(fd); /* <siblingcount> */ |
2010 if (len <= 0) | 2080 if (len <= 0) |
2011 return -1; | 2081 return -1; |
2020 c = getc(fd); /* <byte> */ | 2090 c = getc(fd); /* <byte> */ |
2021 if (c < 0) | 2091 if (c < 0) |
2022 return -1; | 2092 return -1; |
2023 if (c <= BY_SPECIAL) | 2093 if (c <= BY_SPECIAL) |
2024 { | 2094 { |
2025 if (c == BY_NOFLAGS) | 2095 if (c == BY_NOFLAGS && !prefixtree) |
2026 { | 2096 { |
2027 /* No flags, all regions. */ | 2097 /* No flags, all regions. */ |
2028 idxs[idx] = 0; | 2098 idxs[idx] = 0; |
2029 c = 0; | 2099 c = 0; |
2030 } | 2100 } |
2031 else if (c == BY_FLAGS) | 2101 else if (c == BY_FLAGS || c == BY_NOFLAGS) |
2032 { | 2102 { |
2033 if (prefixtree) | 2103 if (prefixtree) |
2034 { | 2104 { |
2035 /* Read the prefix ID and the condition nr. In idxs[] | 2105 /* Read the prefix ID and the condition nr. In idxs[] |
2036 * store the prefix ID in the low byte, the condition | 2106 * store the prefix ID in the low byte, the condition |
2037 * index shifted up 8 bits. */ | 2107 * index shifted up 8 bits. */ |
2038 c = getc(fd); /* <prefixID> */ | 2108 c2 = getc(fd); /* <prefixID> */ |
2039 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ | 2109 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */ |
2040 if (n >= maxprefcondnr) | 2110 if (n >= maxprefcondnr) |
2041 return -2; | 2111 return -2; |
2042 c = (n << 8) + c; | 2112 c2 += (n << 8); |
2113 if (c == BY_NOFLAGS) | |
2114 c = c2; | |
2115 else | |
2116 c = c2 | WF_RAREPFX; | |
2043 } | 2117 } |
2044 else | 2118 else |
2045 { | 2119 { |
2046 /* Read flags and optional region and prefix ID. In | 2120 /* Read flags and optional region and prefix ID. In |
2047 * idxs[] the flags go in the low byte, region above that | 2121 * idxs[] the flags go in the low byte, region above that |
2354 { | 2428 { |
2355 lp = first_lang; | 2429 lp = first_lang; |
2356 first_lang = lp->sl_next; | 2430 first_lang = lp->sl_next; |
2357 slang_free(lp); | 2431 slang_free(lp); |
2358 } | 2432 } |
2433 | |
2434 init_spell_chartab(); | |
2359 } | 2435 } |
2360 # endif | 2436 # endif |
2361 | 2437 |
2362 # if defined(FEAT_MBYTE) || defined(PROTO) | 2438 # if defined(FEAT_MBYTE) || defined(PROTO) |
2363 /* | 2439 /* |
2450 affentry_T *ae_next; /* next affix with same name/number */ | 2526 affentry_T *ae_next; /* next affix with same name/number */ |
2451 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ | 2527 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ |
2452 char_u *ae_add; /* text to add to basic word (can be NULL) */ | 2528 char_u *ae_add; /* text to add to basic word (can be NULL) */ |
2453 char_u *ae_cond; /* condition (NULL for ".") */ | 2529 char_u *ae_cond; /* condition (NULL for ".") */ |
2454 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ | 2530 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ |
2531 int ae_rare; /* rare affix */ | |
2455 }; | 2532 }; |
2456 | 2533 |
2457 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ | 2534 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ |
2458 typedef struct affheader_S | 2535 typedef struct affheader_S |
2459 { | 2536 { |
2534 garray_T si_sal; /* list of fromto_T entries from SAL lines */ | 2611 garray_T si_sal; /* list of fromto_T entries from SAL lines */ |
2535 int si_followup; /* soundsalike: ? */ | 2612 int si_followup; /* soundsalike: ? */ |
2536 int si_collapse; /* soundsalike: ? */ | 2613 int si_collapse; /* soundsalike: ? */ |
2537 int si_rem_accents; /* soundsalike: remove accents */ | 2614 int si_rem_accents; /* soundsalike: remove accents */ |
2538 garray_T si_map; /* MAP info concatenated */ | 2615 garray_T si_map; /* MAP info concatenated */ |
2616 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ | |
2539 garray_T si_prefcond; /* table with conditions for postponed | 2617 garray_T si_prefcond; /* table with conditions for postponed |
2540 * prefixes, each stored as a string */ | 2618 * prefixes, each stored as a string */ |
2541 int si_newID; /* current value for ah_newID */ | 2619 int si_newID; /* current value for ah_newID */ |
2542 } spellinfo_T; | 2620 } spellinfo_T; |
2543 | 2621 |
2593 char_u *upp = NULL; | 2671 char_u *upp = NULL; |
2594 static char *e_affname = N_("Affix name too long in %s line %d: %s"); | 2672 static char *e_affname = N_("Affix name too long in %s line %d: %s"); |
2595 int do_rep; | 2673 int do_rep; |
2596 int do_sal; | 2674 int do_sal; |
2597 int do_map; | 2675 int do_map; |
2676 int do_midword; | |
2598 int found_map = FALSE; | 2677 int found_map = FALSE; |
2599 hashitem_T *hi; | 2678 hashitem_T *hi; |
2600 | 2679 |
2601 /* | 2680 /* |
2602 * Open the file. | 2681 * Open the file. |
2610 | 2689 |
2611 if (spin->si_verbose || p_verbose > 2) | 2690 if (spin->si_verbose || p_verbose > 2) |
2612 { | 2691 { |
2613 if (!spin->si_verbose) | 2692 if (!spin->si_verbose) |
2614 verbose_enter(); | 2693 verbose_enter(); |
2615 smsg((char_u *)_("Reading affix file %s..."), fname); | 2694 smsg((char_u *)_("Reading affix file %s ..."), fname); |
2616 out_flush(); | 2695 out_flush(); |
2617 if (!spin->si_verbose) | 2696 if (!spin->si_verbose) |
2618 verbose_leave(); | 2697 verbose_leave(); |
2619 } | 2698 } |
2620 | 2699 |
2624 /* Only do SAL lines when not done in another .aff file already. */ | 2703 /* Only do SAL lines when not done in another .aff file already. */ |
2625 do_sal = spin->si_sal.ga_len == 0; | 2704 do_sal = spin->si_sal.ga_len == 0; |
2626 | 2705 |
2627 /* Only do MAP lines when not done in another .aff file already. */ | 2706 /* Only do MAP lines when not done in another .aff file already. */ |
2628 do_map = spin->si_map.ga_len == 0; | 2707 do_map = spin->si_map.ga_len == 0; |
2708 | |
2709 /* Only do MIDWORD line when not done in another .aff file already */ | |
2710 do_midword = spin->si_midword == NULL; | |
2629 | 2711 |
2630 /* | 2712 /* |
2631 * Allocate and init the afffile_T structure. | 2713 * Allocate and init the afffile_T structure. |
2632 */ | 2714 */ |
2633 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T)); | 2715 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T)); |
2703 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | 2785 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), |
2704 fname, aff->af_enc, p_enc); | 2786 fname, aff->af_enc, p_enc); |
2705 #else | 2787 #else |
2706 smsg((char_u *)_("Conversion in %s not supported"), fname); | 2788 smsg((char_u *)_("Conversion in %s not supported"), fname); |
2707 #endif | 2789 #endif |
2790 } | |
2791 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2) | |
2792 { | |
2793 if (do_midword) | |
2794 spin->si_midword = vim_strsave(items[1]); | |
2708 } | 2795 } |
2709 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) | 2796 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) |
2710 { | 2797 { |
2711 /* ignored, we always split */ | 2798 /* ignored, we always split */ |
2712 } | 2799 } |
2791 && aff_todo > 0 | 2878 && aff_todo > 0 |
2792 && STRCMP(cur_aff->ah_key, items[1]) == 0 | 2879 && STRCMP(cur_aff->ah_key, items[1]) == 0 |
2793 && itemcnt >= 5) | 2880 && itemcnt >= 5) |
2794 { | 2881 { |
2795 affentry_T *aff_entry; | 2882 affentry_T *aff_entry; |
2883 int rare = FALSE; | |
2884 int lasti = 5; | |
2885 | |
2886 /* Check for "rare" after the other info. */ | |
2887 if (itemcnt > 5 && STRICMP(items[5], "rare") == 0) | |
2888 { | |
2889 rare = TRUE; | |
2890 lasti = 6; | |
2891 } | |
2796 | 2892 |
2797 /* Myspell allows extra text after the item, but that might | 2893 /* Myspell allows extra text after the item, but that might |
2798 * mean mistakes go unnoticed. Require a comment-starter. */ | 2894 * mean mistakes go unnoticed. Require a comment-starter. */ |
2799 if (itemcnt > 5 && *items[5] != '#') | 2895 if (itemcnt > lasti && *items[lasti] != '#') |
2800 smsg((char_u *)_("Trailing text in %s line %d: %s"), | 2896 smsg((char_u *)_("Trailing text in %s line %d: %s"), |
2801 fname, lnum, items[5]); | 2897 fname, lnum, items[lasti]); |
2802 | 2898 |
2803 /* New item for an affix letter. */ | 2899 /* New item for an affix letter. */ |
2804 --aff_todo; | 2900 --aff_todo; |
2805 aff_entry = (affentry_T *)getroom(&spin->si_blocks, | 2901 aff_entry = (affentry_T *)getroom(&spin->si_blocks, |
2806 sizeof(affentry_T)); | 2902 sizeof(affentry_T)); |
2807 if (aff_entry == NULL) | 2903 if (aff_entry == NULL) |
2808 break; | 2904 break; |
2905 aff_entry->ae_rare = rare; | |
2809 | 2906 |
2810 if (STRCMP(items[2], "0") != 0) | 2907 if (STRCMP(items[2], "0") != 0) |
2811 aff_entry->ae_chop = getroom_save(&spin->si_blocks, | 2908 aff_entry->ae_chop = getroom_save(&spin->si_blocks, |
2812 items[2]); | 2909 items[2]); |
2813 if (STRCMP(items[3], "0") != 0) | 2910 if (STRCMP(items[3], "0") != 0) |
2866 /* Add the prefix to the prefix tree. */ | 2963 /* Add the prefix to the prefix tree. */ |
2867 if (aff_entry->ae_add == NULL) | 2964 if (aff_entry->ae_add == NULL) |
2868 p = (char_u *)""; | 2965 p = (char_u *)""; |
2869 else | 2966 else |
2870 p = aff_entry->ae_add; | 2967 p = aff_entry->ae_add; |
2871 tree_add_word(p, spin->si_prefroot, -1, idx, | 2968 tree_add_word(p, spin->si_prefroot, rare ? -2 : -1, |
2872 cur_aff->ah_newID, &spin->si_blocks); | 2969 idx, cur_aff->ah_newID, &spin->si_blocks); |
2873 } | 2970 } |
2874 } | 2971 } |
2875 } | 2972 } |
2876 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2) | 2973 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2) |
2877 { | 2974 { |
3158 | 3255 |
3159 if (spin->si_verbose || p_verbose > 2) | 3256 if (spin->si_verbose || p_verbose > 2) |
3160 { | 3257 { |
3161 if (!spin->si_verbose) | 3258 if (!spin->si_verbose) |
3162 verbose_enter(); | 3259 verbose_enter(); |
3163 smsg((char_u *)_("Reading dictionary file %s..."), fname); | 3260 smsg((char_u *)_("Reading dictionary file %s ..."), fname); |
3164 out_flush(); | 3261 out_flush(); |
3165 if (!spin->si_verbose) | 3262 if (!spin->si_verbose) |
3166 verbose_leave(); | 3263 verbose_leave(); |
3167 } | 3264 } |
3168 | 3265 |
3382 regmatch_T regmatch; | 3479 regmatch_T regmatch; |
3383 char_u newword[MAXWLEN]; | 3480 char_u newword[MAXWLEN]; |
3384 int retval = OK; | 3481 int retval = OK; |
3385 int i; | 3482 int i; |
3386 char_u *p; | 3483 char_u *p; |
3484 int use_flags; | |
3387 | 3485 |
3388 todo = ht->ht_used; | 3486 todo = ht->ht_used; |
3389 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) | 3487 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) |
3390 { | 3488 { |
3391 if (!HASHITEM_EMPTY(hi)) | 3489 if (!HASHITEM_EMPTY(hi)) |
3458 } | 3556 } |
3459 if (ae->ae_add != NULL) | 3557 if (ae->ae_add != NULL) |
3460 STRCAT(newword, ae->ae_add); | 3558 STRCAT(newword, ae->ae_add); |
3461 } | 3559 } |
3462 | 3560 |
3561 /* Obey the "rare" flag of the affix. */ | |
3562 if (ae->ae_rare) | |
3563 use_flags = flags | WF_RARE; | |
3564 else | |
3565 use_flags = flags; | |
3566 | |
3463 /* Store the modified word. */ | 3567 /* Store the modified word. */ |
3464 if (store_word(newword, spin, | 3568 if (store_word(newword, spin, use_flags, |
3465 flags, spin->si_region, pfxlist) == FAIL) | 3569 spin->si_region, pfxlist) == FAIL) |
3466 retval = FAIL; | 3570 retval = FAIL; |
3467 | 3571 |
3468 /* When added a suffix and combining is allowed also | 3572 /* When added a suffix and combining is allowed also |
3469 * try adding prefixes additionally. */ | 3573 * try adding prefixes additionally. */ |
3470 if (xht != NULL && ah->ah_combine) | 3574 if (xht != NULL && ah->ah_combine) |
3471 if (store_aff_word(newword, spin, afflist, affile, | 3575 if (store_aff_word(newword, spin, afflist, affile, |
3472 xht, NULL, TRUE, flags, pfxlist) == FAIL) | 3576 xht, NULL, TRUE, use_flags, pfxlist) |
3577 == FAIL) | |
3473 retval = FAIL; | 3578 retval = FAIL; |
3474 } | 3579 } |
3475 } | 3580 } |
3476 } | 3581 } |
3477 } | 3582 } |
3512 | 3617 |
3513 if (spin->si_verbose || p_verbose > 2) | 3618 if (spin->si_verbose || p_verbose > 2) |
3514 { | 3619 { |
3515 if (!spin->si_verbose) | 3620 if (!spin->si_verbose) |
3516 verbose_enter(); | 3621 verbose_enter(); |
3517 smsg((char_u *)_("Reading word file %s..."), fname); | 3622 smsg((char_u *)_("Reading word file %s ..."), fname); |
3518 out_flush(); | 3623 out_flush(); |
3519 if (!spin->si_verbose) | 3624 if (!spin->si_verbose) |
3520 verbose_leave(); | 3625 verbose_leave(); |
3521 } | 3626 } |
3522 | 3627 |
3815 return res; | 3920 return res; |
3816 } | 3921 } |
3817 | 3922 |
3818 /* | 3923 /* |
3819 * Add word "word" to a word tree at "root". | 3924 * Add word "word" to a word tree at "root". |
3820 * When "flags" is -1 we are adding to the prefix tree where flags don't | 3925 * When "flags" < 0 we are adding to the prefix tree where flags is used for |
3821 * matter and "region" is the condition nr. | 3926 * "rare" and "region" is the condition nr. |
3822 * Returns FAIL when out of memory. | 3927 * Returns FAIL when out of memory. |
3823 */ | 3928 */ |
3824 static int | 3929 static int |
3825 tree_add_word(word, root, flags, region, prefixID, blp) | 3930 tree_add_word(word, root, flags, region, prefixID, blp) |
3826 char_u *word; | 3931 char_u *word; |
4107 } | 4212 } |
4108 | 4213 |
4109 /* <HEADER>: <fileID> <regioncnt> <regionname> ... | 4214 /* <HEADER>: <fileID> <regioncnt> <regionname> ... |
4110 * <charflagslen> <charflags> | 4215 * <charflagslen> <charflags> |
4111 * <fcharslen> <fchars> | 4216 * <fcharslen> <fchars> |
4217 * <midwordlen> <midword> | |
4112 * <prefcondcnt> <prefcond> ... */ | 4218 * <prefcondcnt> <prefcond> ... */ |
4113 | 4219 |
4114 /* <fileID> */ | 4220 /* <fileID> */ |
4115 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) | 4221 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) |
4116 EMSG(_(e_write)); | 4222 EMSG(_(e_write)); |
4144 putc(0, fd); | 4250 putc(0, fd); |
4145 } | 4251 } |
4146 else | 4252 else |
4147 write_spell_chartab(fd); | 4253 write_spell_chartab(fd); |
4148 | 4254 |
4255 | |
4256 if (spin->si_midword == NULL) | |
4257 put_bytes(fd, 0L, 2); /* <midwordlen> */ | |
4258 else | |
4259 { | |
4260 i = STRLEN(spin->si_midword); | |
4261 put_bytes(fd, (long_u)i, 2); /* <midwordlen> */ | |
4262 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ | |
4263 } | |
4264 | |
4265 | |
4149 /* Write the prefix conditions. */ | 4266 /* Write the prefix conditions. */ |
4150 write_spell_prefcond(fd, &spin->si_prefcond); | 4267 write_spell_prefcond(fd, &spin->si_prefcond); |
4268 | |
4269 /* <SUGGEST> : <repcount> <rep> ... | |
4270 * <salflags> <salcount> <sal> ... | |
4271 * <maplen> <mapstr> */ | |
4151 | 4272 |
4152 /* Sort the REP items. */ | 4273 /* Sort the REP items. */ |
4153 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, | 4274 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, |
4154 sizeof(fromto_T), rep_compare); | 4275 sizeof(fromto_T), rep_compare); |
4155 | 4276 |
4156 /* <SUGGEST> : <repcount> <rep> ... | |
4157 * <salflags> <salcount> <sal> ... | |
4158 * <maplen> <mapstr> */ | |
4159 for (round = 1; round <= 2; ++round) | 4277 for (round = 1; round <= 2; ++round) |
4160 { | 4278 { |
4161 if (round == 1) | 4279 if (round == 1) |
4162 gap = &spin->si_rep; | 4280 gap = &spin->si_rep; |
4163 else | 4281 else |
4300 /* For a NUL byte (end of word) write the flags etc. */ | 4418 /* For a NUL byte (end of word) write the flags etc. */ |
4301 if (prefixtree) | 4419 if (prefixtree) |
4302 { | 4420 { |
4303 /* In PREFIXTREE write the required prefixID and the | 4421 /* In PREFIXTREE write the required prefixID and the |
4304 * associated condition nr (stored in wn_region). */ | 4422 * associated condition nr (stored in wn_region). */ |
4305 putc(BY_FLAGS, fd); /* <byte> */ | 4423 if (np->wn_flags == (char_u)-2) |
4424 putc(BY_FLAGS, fd); /* <byte> rare */ | |
4425 else | |
4426 putc(BY_NOFLAGS, fd); /* <byte> */ | |
4306 putc(np->wn_prefixID, fd); /* <prefixID> */ | 4427 putc(np->wn_prefixID, fd); /* <prefixID> */ |
4307 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ | 4428 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ |
4308 } | 4429 } |
4309 else | 4430 else |
4310 { | 4431 { |
4447 * "path/en.latin1.add.spl". */ | 4568 * "path/en.latin1.add.spl". */ |
4448 innames = &fnames[0]; | 4569 innames = &fnames[0]; |
4449 incount = 1; | 4570 incount = 1; |
4450 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); | 4571 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); |
4451 } | 4572 } |
4573 else if (fcount == 1) | |
4574 { | |
4575 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ | |
4576 innames = &fnames[0]; | |
4577 incount = 1; | |
4578 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], | |
4579 spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | |
4580 } | |
4452 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) | 4581 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) |
4453 { | 4582 { |
4454 /* Name ends in ".spl", use as the file name. */ | 4583 /* Name ends in ".spl", use as the file name. */ |
4455 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); | 4584 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); |
4456 } | 4585 } |
4606 */ | 4735 */ |
4607 if (!added_word || p_verbose > 2) | 4736 if (!added_word || p_verbose > 2) |
4608 { | 4737 { |
4609 if (added_word) | 4738 if (added_word) |
4610 verbose_enter(); | 4739 verbose_enter(); |
4611 smsg((char_u *)_("Writing spell file %s..."), wfname); | 4740 smsg((char_u *)_("Writing spell file %s ..."), wfname); |
4612 out_flush(); | 4741 out_flush(); |
4613 if (added_word) | 4742 if (added_word) |
4614 verbose_leave(); | 4743 verbose_leave(); |
4615 } | 4744 } |
4616 | 4745 |
4635 /* Free the allocated memory. */ | 4764 /* Free the allocated memory. */ |
4636 ga_clear(&spin.si_rep); | 4765 ga_clear(&spin.si_rep); |
4637 ga_clear(&spin.si_sal); | 4766 ga_clear(&spin.si_sal); |
4638 ga_clear(&spin.si_map); | 4767 ga_clear(&spin.si_map); |
4639 ga_clear(&spin.si_prefcond); | 4768 ga_clear(&spin.si_prefcond); |
4769 vim_free(spin.si_midword); | |
4640 | 4770 |
4641 /* Free the .aff file structures. */ | 4771 /* Free the .aff file structures. */ |
4642 for (i = 0; i < incount; ++i) | 4772 for (i = 0; i < incount; ++i) |
4643 if (afile[i] != NULL) | 4773 if (afile[i] != NULL) |
4644 spell_free_aff(afile[i]); | 4774 spell_free_aff(afile[i]); |
4827 { | 4957 { |
4828 int i; | 4958 int i; |
4829 | 4959 |
4830 did_set_spelltab = FALSE; | 4960 did_set_spelltab = FALSE; |
4831 clear_spell_chartab(&spelltab); | 4961 clear_spell_chartab(&spelltab); |
4832 | 4962 vim_memset(spell_ismw, FALSE, sizeof(spell_ismw)); |
4833 #ifdef FEAT_MBYTE | 4963 #ifdef FEAT_MBYTE |
4964 vim_free(spell_ismw_mb); | |
4965 spell_ismw_mb = NULL; | |
4966 | |
4834 if (enc_dbcs) | 4967 if (enc_dbcs) |
4835 { | 4968 { |
4836 /* DBCS: assume double-wide characters are word characters. */ | 4969 /* DBCS: assume double-wide characters are word characters. */ |
4837 for (i = 128; i <= 255; ++i) | 4970 for (i = 128; i <= 255; ++i) |
4838 if (MB_BYTE2LEN(i) == 2) | 4971 if (MB_BYTE2LEN(i) == 2) |
5019 return OK; | 5152 return OK; |
5020 } | 5153 } |
5021 | 5154 |
5022 /* | 5155 /* |
5023 * Return TRUE if "p" points to a word character. | 5156 * Return TRUE if "p" points to a word character. |
5024 * As a special case we see a single quote as a word character when it is | 5157 * As a special case we see "midword" characters as word character when it is |
5025 * followed by a word character. This finds they'there but not 'they there'. | 5158 * followed by a word character. This finds they'there but not 'they there'. |
5159 * Thus this only works properly when past the first character of the word. | |
5026 */ | 5160 */ |
5027 static int | 5161 static int |
5028 spell_iswordp(p) | 5162 spell_iswordp(p) |
5029 char_u *p; | 5163 char_u *p; |
5030 { | 5164 { |
5165 #ifdef FEAT_MBYTE | |
5031 char_u *s; | 5166 char_u *s; |
5032 | 5167 int l; |
5033 if (*p == '\'') | 5168 int c; |
5034 s = p + 1; | 5169 |
5035 else | 5170 if (has_mbyte) |
5171 { | |
5172 l = MB_BYTE2LEN(*p); | |
5036 s = p; | 5173 s = p; |
5037 #ifdef FEAT_MBYTE | 5174 if (l == 1) |
5038 if (has_mbyte && MB_BYTE2LEN(*s) > 1) | 5175 { |
5039 return mb_get_class(s) >= 2; | 5176 /* be quick for ASCII */ |
5177 if (spell_ismw[*p]) | |
5178 { | |
5179 s = p + 1; /* skip a mid-word character */ | |
5180 l = MB_BYTE2LEN(*s); | |
5181 } | |
5182 } | |
5183 else | |
5184 { | |
5185 c = mb_ptr2char(p); | |
5186 if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL | |
5187 && vim_strchr(spell_ismw_mb, c) != NULL)) | |
5188 { | |
5189 s = p + l; | |
5190 l = MB_BYTE2LEN(*s); | |
5191 } | |
5192 } | |
5193 | |
5194 if (l > 1) | |
5195 return mb_get_class(s) >= 2; | |
5196 return spelltab.st_isw[*s]; | |
5197 } | |
5040 #endif | 5198 #endif |
5041 return spelltab.st_isw[*s]; | 5199 |
5200 return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]]; | |
5042 } | 5201 } |
5043 | 5202 |
5044 /* | 5203 /* |
5045 * Write the table with prefix conditions to the .spl file. | 5204 * Write the table with prefix conditions to the .spl file. |
5046 */ | 5205 */ |
5716 | 5875 |
5717 if ((fword[sp->ts_fidx] == NUL | 5876 if ((fword[sp->ts_fidx] == NUL |
5718 || !spell_iswordp(fword + sp->ts_fidx)) | 5877 || !spell_iswordp(fword + sp->ts_fidx)) |
5719 && sp->ts_fidx >= sp->ts_fidxtry) | 5878 && sp->ts_fidx >= sp->ts_fidxtry) |
5720 { | 5879 { |
5721 /* The badword also ends: add suggestions, */ | 5880 /* The badword also ends: add suggestions. Give a penalty |
5881 * when changing non-word char to word char, e.g., "thes," | |
5882 * -> "these". */ | |
5883 p = fword + sp->ts_fidx; | |
5884 #ifdef FEAT_MBYTE | |
5885 if (has_mbyte) | |
5886 mb_ptr_back(fword, p); | |
5887 else | |
5888 #endif | |
5889 --p; | |
5890 if (!spell_iswordp(p)) | |
5891 { | |
5892 p = preword + STRLEN(preword); | |
5893 #ifdef FEAT_MBYTE | |
5894 if (has_mbyte) | |
5895 mb_ptr_back(preword, p); | |
5896 else | |
5897 #endif | |
5898 --p; | |
5899 if (spell_iswordp(p)) | |
5900 newscore += SCORE_NONWORD; | |
5901 } | |
5902 | |
5722 add_suggestion(su, &su->su_ga, preword, | 5903 add_suggestion(su, &su->su_ga, preword, |
5723 sp->ts_fidx - repextra, | 5904 sp->ts_fidx - repextra, |
5724 sp->ts_score + newscore, 0, FALSE); | 5905 sp->ts_score + newscore, 0, FALSE); |
5725 } | 5906 } |
5726 else if (sp->ts_fidx >= sp->ts_fidxtry | 5907 else if (sp->ts_fidx >= sp->ts_fidxtry |
7091 p = NULL; | 7272 p = NULL; |
7092 } | 7273 } |
7093 | 7274 |
7094 if (score <= su->su_maxscore) | 7275 if (score <= su->su_maxscore) |
7095 { | 7276 { |
7096 /* Check if the word is already there. */ | 7277 /* Check if the word is already there. Also check the length that is |
7278 * being replaced "thes," -> "these" is a different suggestion from | |
7279 * "thes" -> "these". */ | |
7097 stp = &SUG(*gap, 0); | 7280 stp = &SUG(*gap, 0); |
7098 for (i = gap->ga_len - 1; i >= 0; --i) | 7281 for (i = gap->ga_len - 1; i >= 0; --i) |
7099 if (STRCMP(stp[i].st_word, goodword) == 0) | 7282 if (STRCMP(stp[i].st_word, goodword) == 0 |
7283 && stp[i].st_orglen == badlen) | |
7100 { | 7284 { |
7101 /* Found it. Remember the lowest score. */ | 7285 /* Found it. Remember the lowest score. */ |
7102 if (stp[i].st_score > score) | 7286 if (stp[i].st_score > score) |
7103 { | 7287 { |
7104 stp[i].st_score = score; | 7288 stp[i].st_score = score; |
8162 for (i = 1; i < len; ++i) | 8346 for (i = 1; i < len; ++i) |
8163 if (byts[n + i] != 0) | 8347 if (byts[n + i] != 0) |
8164 break; | 8348 break; |
8165 curi[depth] += i - 1; | 8349 curi[depth] += i - 1; |
8166 | 8350 |
8167 if (valid_word_prefix(i, n, prefid, word, slang)) | 8351 i = valid_word_prefix(i, n, prefid, word, slang); |
8352 if (i != 0) | |
8168 { | 8353 { |
8169 vim_strncpy(prefix + depth, word, MAXWLEN - depth); | 8354 vim_strncpy(prefix + depth, word, MAXWLEN - depth); |
8170 dump_word(prefix, round, flags, lnum++); | 8355 dump_word(prefix, round, |
8356 (i & WF_RAREPFX) ? (flags | WF_RARE) | |
8357 : flags, lnum++); | |
8171 } | 8358 } |
8172 } | 8359 } |
8173 else | 8360 else |
8174 { | 8361 { |
8175 /* Normal char, go one level deeper. */ | 8362 /* Normal char, go one level deeper. */ |