# HG changeset patch # User vimboss # Date 1119911266 0 # Node ID b498dee21bd3f97108eb2fee8efbd28c90262485 # Parent bda4394122e05d8816f4ade578c4ae1ae25bb569 updated for version 7.0095 diff --git a/src/spell.c b/src/spell.c --- a/src/spell.c +++ b/src/spell.c @@ -86,9 +86,10 @@ * ... * * + * * ... * - * 10 bytes "VIMspell07" + * 10 bytes "VIMspell08" * 1 byte number of regions following (8 supported) * 2 bytes Region name: ca, au, etc. Lower case. * First is region 1. @@ -100,6 +101,10 @@ * 2 bytes Number of bytes in . * N bytes Folded characters, first one is for character 128. * + * 2 bytes Number of bytes in . + * N bytes Characters that are word characters only when used + * in the middle of a word. + * * 2 bytes Number of items following. * * : @@ -169,9 +174,11 @@ * 1 byte Byte value of the sibling. Special cases: * BY_NOFLAGS: End of word without flags and for all * regions. - * BY_FLAGS: End of word, follow. For - * PREFIXTREE and - * follow. + * For PREFIXTREE and + * follow. + * BY_FLAGS: End of word, follow. + * For PREFIXTREE and + * follow for rare prefix. * BY_INDEX: Child of sibling is shared, * and follow. * @@ -235,6 +242,10 @@ typedef long idx_T; #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) +#define WF_RAREPFX 0x1000000 /* in sl_pidxs: flag for rare postponed + prefix; must be above prefixID (one byte) + and prefcondnr (two bytes) */ + #define BY_NOFLAGS 0 /* end of word without flags or region */ #define BY_FLAGS 1 /* end of word, flag byte follows */ #define BY_INDEX 2 /* child is shared, index follows */ @@ -343,7 +354,7 @@ typedef struct langp_S #define SP_LOCAL 2 #define SP_BAD 3 -#define VIMSPELLMAGIC "VIMspell07" /* string at start of Vim spell file */ +#define VIMSPELLMAGIC "VIMspell08" /* string at start of Vim spell file */ #define VIMSPELLMAGICL 10 /* @@ -399,6 +410,7 @@ typedef struct suggest_S #define SCORE_DELDUP 64 /* delete a duplicated character */ #define SCORE_INS 96 /* insert a character */ #define SCORE_INSDUP 66 /* insert a duplicate character */ +#define SCORE_NONWORD 103 /* change non-word to word char */ #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. * 350 allows for about three changes. */ @@ -449,6 +461,10 @@ typedef struct spelltab_S static spelltab_T spelltab; static int did_set_spelltab; +static char_u spell_ismw[256]; /* flags: is midword char */ +#ifdef FEAT_MBYTE +static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */ +#endif #define CF_WORD 0x01 #define CF_UPPER 0x02 @@ -961,15 +977,20 @@ find_word(mip, mode) /* When mode is FIND_PREFIX the word must support the prefix: * check the prefix ID and the condition. Do that for the list at - * mip->mi_prefarridx. */ + * mip->mi_prefarridx that find_prefix() filled. */ if (mode == FIND_PREFIX) { /* The prefix ID is stored two bytes above the flags. */ prefid = (unsigned)flags >> 16; - if (!valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, + c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, prefid, mip->mi_fword + mip->mi_prefixlen, - slang)) + slang); + if (c == 0) continue; + + /* Use the WF_RARE flag for a rare prefix. */ + if (c & WF_RAREPFX) + flags |= WF_RARE; } if (flags & WF_BANNED) @@ -1006,8 +1027,9 @@ find_word(mip, mode) } /* - * Return TRUE if the prefix indicated by "mip->mi_prefarridx" matches with - * the prefix ID "prefid" for the word "word". + * Return non-zero if the prefix indicated by "mip->mi_prefarridx" matches + * with the prefix ID "prefid" for the word "word". + * The WF_RAREPFX flag is included in the return value for a rare prefix. */ static int valid_word_prefix(totprefcnt, arridx, prefid, word, slang) @@ -1031,8 +1053,8 @@ valid_word_prefix(totprefcnt, arridx, pr continue; /* Check the condition, if there is one. The condition index is - * stored above the prefix ID byte. */ - rp = slang->sl_prefprog[(unsigned)pidx >> 8]; + * stored in the two bytes above the prefix ID byte. */ + rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; if (rp != NULL) { regmatch.regprog = rp; @@ -1041,10 +1063,10 @@ valid_word_prefix(totprefcnt, arridx, pr continue; } - /* It's a match! */ - return TRUE; - } - return FALSE; + /* It's a match! Return the WF_RAREPFX flag. */ + return pidx; + } + return 0; } /* @@ -1647,6 +1669,7 @@ spell_load_file(fname, lang, old_lp, sil * ... * * + * * ... */ for (i = 0; i < VIMSPELLMAGICL; ++i) @@ -1719,6 +1742,52 @@ formerr: goto formerr; } + /* */ + cnt = (getc(fd) << 8) + getc(fd); + if (cnt < 0) + goto truncerr; + if (cnt > 0) + { + for (i = 0; i < cnt; ++i) + if (i < MAXWLEN) /* truncate at reasonable length */ + buf[i] = getc(fd); + if (i < MAXWLEN) + buf[i] = NUL; + else + buf[MAXWLEN] = NUL; + + /* The midword characters add up to any midword characters from other + * .spel files. */ + for (p = buf; *p != NUL; ) +#ifdef FEAT_MBYTE + if (has_mbyte) + { + c = mb_ptr2char(p); + i = mb_ptr2len_check(p); + if (c < 256) + spell_ismw[c] = TRUE; + else if (spell_ismw_mb == NULL) + /* First multi-byte char in "spell_ismw_mb". */ + spell_ismw_mb = vim_strnsave(p, i); + else + { + /* Append multi-byte chars to "spell_ismw_mb". */ + n = STRLEN(spell_ismw_mb); + bp = vim_strnsave(spell_ismw_mb, n + i); + if (bp != NULL) + { + vim_free(spell_ismw_mb); + spell_ismw_mb = bp; + vim_strncpy(bp + n, p, i); + } + } + p += i; + } + else +#endif + spell_ismw[*p++] = TRUE; + } + /* ... */ cnt = (getc(fd) << 8) + getc(fd); /* */ if (cnt > 0) @@ -2004,6 +2073,7 @@ read_tree(fd, byts, idxs, maxidx, starti int n; idx_T idx = startidx; int c; + int c2; #define SHARED_MASK 0x8000000 len = getc(fd); /* */ @@ -2022,24 +2092,28 @@ read_tree(fd, byts, idxs, maxidx, starti return -1; if (c <= BY_SPECIAL) { - if (c == BY_NOFLAGS) + if (c == BY_NOFLAGS && !prefixtree) { /* No flags, all regions. */ idxs[idx] = 0; c = 0; } - else if (c == BY_FLAGS) + else if (c == BY_FLAGS || c == BY_NOFLAGS) { if (prefixtree) { /* Read the prefix ID and the condition nr. In idxs[] * store the prefix ID in the low byte, the condition * index shifted up 8 bits. */ - c = getc(fd); /* */ + c2 = getc(fd); /* */ n = (getc(fd) << 8) + getc(fd); /* */ if (n >= maxprefcondnr) return -2; - c = (n << 8) + c; + c2 += (n << 8); + if (c == BY_NOFLAGS) + c = c2; + else + c = c2 | WF_RAREPFX; } else { @@ -2356,6 +2430,8 @@ spell_free_all() first_lang = lp->sl_next; slang_free(lp); } + + init_spell_chartab(); } # endif @@ -2452,6 +2528,7 @@ struct affentry_S char_u *ae_add; /* text to add to basic word (can be NULL) */ char_u *ae_cond; /* condition (NULL for ".") */ regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ + int ae_rare; /* rare affix */ }; /* Affix header from ".aff" file. Used for af_pref and af_suff. */ @@ -2536,6 +2613,7 @@ typedef struct spellinfo_S int si_collapse; /* soundsalike: ? */ int si_rem_accents; /* soundsalike: remove accents */ garray_T si_map; /* MAP info concatenated */ + char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ garray_T si_prefcond; /* table with conditions for postponed * prefixes, each stored as a string */ int si_newID; /* current value for ah_newID */ @@ -2595,6 +2673,7 @@ spell_read_aff(fname, spin) int do_rep; int do_sal; int do_map; + int do_midword; int found_map = FALSE; hashitem_T *hi; @@ -2612,7 +2691,7 @@ spell_read_aff(fname, spin) { if (!spin->si_verbose) verbose_enter(); - smsg((char_u *)_("Reading affix file %s..."), fname); + smsg((char_u *)_("Reading affix file %s ..."), fname); out_flush(); if (!spin->si_verbose) verbose_leave(); @@ -2627,6 +2706,9 @@ spell_read_aff(fname, spin) /* Only do MAP lines when not done in another .aff file already. */ do_map = spin->si_map.ga_len == 0; + /* Only do MIDWORD line when not done in another .aff file already */ + do_midword = spin->si_midword == NULL; + /* * Allocate and init the afffile_T structure. */ @@ -2706,6 +2788,11 @@ spell_read_aff(fname, spin) smsg((char_u *)_("Conversion in %s not supported"), fname); #endif } + else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2) + { + if (do_midword) + spin->si_midword = vim_strsave(items[1]); + } else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) { /* ignored, we always split */ @@ -2793,12 +2880,21 @@ spell_read_aff(fname, spin) && itemcnt >= 5) { affentry_T *aff_entry; + int rare = FALSE; + int lasti = 5; + + /* Check for "rare" after the other info. */ + if (itemcnt > 5 && STRICMP(items[5], "rare") == 0) + { + rare = TRUE; + lasti = 6; + } /* Myspell allows extra text after the item, but that might * mean mistakes go unnoticed. Require a comment-starter. */ - if (itemcnt > 5 && *items[5] != '#') + if (itemcnt > lasti && *items[lasti] != '#') smsg((char_u *)_("Trailing text in %s line %d: %s"), - fname, lnum, items[5]); + fname, lnum, items[lasti]); /* New item for an affix letter. */ --aff_todo; @@ -2806,6 +2902,7 @@ spell_read_aff(fname, spin) sizeof(affentry_T)); if (aff_entry == NULL) break; + aff_entry->ae_rare = rare; if (STRCMP(items[2], "0") != 0) aff_entry->ae_chop = getroom_save(&spin->si_blocks, @@ -2868,8 +2965,8 @@ spell_read_aff(fname, spin) p = (char_u *)""; else p = aff_entry->ae_add; - tree_add_word(p, spin->si_prefroot, -1, idx, - cur_aff->ah_newID, &spin->si_blocks); + tree_add_word(p, spin->si_prefroot, rare ? -2 : -1, + idx, cur_aff->ah_newID, &spin->si_blocks); } } } @@ -3160,7 +3257,7 @@ spell_read_dic(fname, spin, affile) { if (!spin->si_verbose) verbose_enter(); - smsg((char_u *)_("Reading dictionary file %s..."), fname); + smsg((char_u *)_("Reading dictionary file %s ..."), fname); out_flush(); if (!spin->si_verbose) verbose_leave(); @@ -3384,6 +3481,7 @@ store_aff_word(word, spin, afflist, affi int retval = OK; int i; char_u *p; + int use_flags; todo = ht->ht_used; for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) @@ -3460,16 +3558,23 @@ store_aff_word(word, spin, afflist, affi STRCAT(newword, ae->ae_add); } + /* Obey the "rare" flag of the affix. */ + if (ae->ae_rare) + use_flags = flags | WF_RARE; + else + use_flags = flags; + /* Store the modified word. */ - if (store_word(newword, spin, - flags, spin->si_region, pfxlist) == FAIL) + if (store_word(newword, spin, use_flags, + spin->si_region, pfxlist) == FAIL) retval = FAIL; /* When added a suffix and combining is allowed also * try adding prefixes additionally. */ if (xht != NULL && ah->ah_combine) if (store_aff_word(newword, spin, afflist, affile, - xht, NULL, TRUE, flags, pfxlist) == FAIL) + xht, NULL, TRUE, use_flags, pfxlist) + == FAIL) retval = FAIL; } } @@ -3514,7 +3619,7 @@ spell_read_wordfile(fname, spin) { if (!spin->si_verbose) verbose_enter(); - smsg((char_u *)_("Reading word file %s..."), fname); + smsg((char_u *)_("Reading word file %s ..."), fname); out_flush(); if (!spin->si_verbose) verbose_leave(); @@ -3817,8 +3922,8 @@ store_word(word, spin, flags, region, pf /* * Add word "word" to a word tree at "root". - * When "flags" is -1 we are adding to the prefix tree where flags don't - * matter and "region" is the condition nr. + * When "flags" < 0 we are adding to the prefix tree where flags is used for + * "rare" and "region" is the condition nr. * Returns FAIL when out of memory. */ static int @@ -4109,6 +4214,7 @@ write_vim_spell(fname, spin) /*
: ... * * + * * ... */ /* */ @@ -4146,16 +4252,28 @@ write_vim_spell(fname, spin) else write_spell_chartab(fd); + + if (spin->si_midword == NULL) + put_bytes(fd, 0L, 2); /* */ + else + { + i = STRLEN(spin->si_midword); + put_bytes(fd, (long_u)i, 2); /* */ + fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* */ + } + + /* Write the prefix conditions. */ write_spell_prefcond(fd, &spin->si_prefcond); + /* : ... + * ... + * */ + /* Sort the REP items. */ qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len, sizeof(fromto_T), rep_compare); - /* : ... - * ... - * */ for (round = 1; round <= 2; ++round) { if (round == 1) @@ -4302,7 +4420,10 @@ put_node(fd, node, index, regionmask, pr { /* In PREFIXTREE write the required prefixID and the * associated condition nr (stored in wn_region). */ - putc(BY_FLAGS, fd); /* */ + if (np->wn_flags == (char_u)-2) + putc(BY_FLAGS, fd); /* rare */ + else + putc(BY_NOFLAGS, fd); /* */ putc(np->wn_prefixID, fd); /* */ put_bytes(fd, (long_u)np->wn_region, 2); /* */ } @@ -4449,6 +4570,14 @@ mkspell(fcount, fnames, ascii, overwrite incount = 1; vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); } + else if (fcount == 1) + { + /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ + innames = &fnames[0]; + incount = 1; + vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], + spin.si_ascii ? (char_u *)"ascii" : spell_enc()); + } else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) { /* Name ends in ".spl", use as the file name. */ @@ -4608,7 +4737,7 @@ mkspell(fcount, fnames, ascii, overwrite { if (added_word) verbose_enter(); - smsg((char_u *)_("Writing spell file %s..."), wfname); + smsg((char_u *)_("Writing spell file %s ..."), wfname); out_flush(); if (added_word) verbose_leave(); @@ -4637,6 +4766,7 @@ mkspell(fcount, fnames, ascii, overwrite ga_clear(&spin.si_sal); ga_clear(&spin.si_map); ga_clear(&spin.si_prefcond); + vim_free(spin.si_midword); /* Free the .aff file structures. */ for (i = 0; i < incount; ++i) @@ -4829,8 +4959,11 @@ init_spell_chartab() did_set_spelltab = FALSE; clear_spell_chartab(&spelltab); - + vim_memset(spell_ismw, FALSE, sizeof(spell_ismw)); #ifdef FEAT_MBYTE + vim_free(spell_ismw_mb); + spell_ismw_mb = NULL; + if (enc_dbcs) { /* DBCS: assume double-wide characters are word characters. */ @@ -5021,24 +5154,50 @@ set_spell_finish(new_st) /* * Return TRUE if "p" points to a word character. - * As a special case we see a single quote as a word character when it is + * As a special case we see "midword" characters as word character when it is * followed by a word character. This finds they'there but not 'they there'. + * Thus this only works properly when past the first character of the word. */ static int spell_iswordp(p) char_u *p; { +#ifdef FEAT_MBYTE char_u *s; - - if (*p == '\'') - s = p + 1; - else + int l; + int c; + + if (has_mbyte) + { + l = MB_BYTE2LEN(*p); s = p; -#ifdef FEAT_MBYTE - if (has_mbyte && MB_BYTE2LEN(*s) > 1) - return mb_get_class(s) >= 2; + if (l == 1) + { + /* be quick for ASCII */ + if (spell_ismw[*p]) + { + s = p + 1; /* skip a mid-word character */ + l = MB_BYTE2LEN(*s); + } + } + else + { + c = mb_ptr2char(p); + if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL + && vim_strchr(spell_ismw_mb, c) != NULL)) + { + s = p + l; + l = MB_BYTE2LEN(*s); + } + } + + if (l > 1) + return mb_get_class(s) >= 2; + return spelltab.st_isw[*s]; + } #endif - return spelltab.st_isw[*s]; + + return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]]; } /* @@ -5718,7 +5877,29 @@ suggest_try_change(su) || !spell_iswordp(fword + sp->ts_fidx)) && sp->ts_fidx >= sp->ts_fidxtry) { - /* The badword also ends: add suggestions, */ + /* The badword also ends: add suggestions. Give a penalty + * when changing non-word char to word char, e.g., "thes," + * -> "these". */ + p = fword + sp->ts_fidx; +#ifdef FEAT_MBYTE + if (has_mbyte) + mb_ptr_back(fword, p); + else +#endif + --p; + if (!spell_iswordp(p)) + { + p = preword + STRLEN(preword); +#ifdef FEAT_MBYTE + if (has_mbyte) + mb_ptr_back(preword, p); + else +#endif + --p; + if (spell_iswordp(p)) + newscore += SCORE_NONWORD; + } + add_suggestion(su, &su->su_ga, preword, sp->ts_fidx - repextra, sp->ts_score + newscore, 0, FALSE); @@ -7093,10 +7274,13 @@ add_suggestion(su, gap, goodword, badlen if (score <= su->su_maxscore) { - /* Check if the word is already there. */ + /* Check if the word is already there. Also check the length that is + * being replaced "thes," -> "these" is a different suggestion from + * "thes" -> "these". */ stp = &SUG(*gap, 0); for (i = gap->ga_len - 1; i >= 0; --i) - if (STRCMP(stp[i].st_word, goodword) == 0) + if (STRCMP(stp[i].st_word, goodword) == 0 + && stp[i].st_orglen == badlen) { /* Found it. Remember the lowest score. */ if (stp[i].st_score > score) @@ -8164,10 +8348,13 @@ apply_prefixes(slang, word, round, flags break; curi[depth] += i - 1; - if (valid_word_prefix(i, n, prefid, word, slang)) + i = valid_word_prefix(i, n, prefid, word, slang); + if (i != 0) { vim_strncpy(prefix + depth, word, MAXWLEN - depth); - dump_word(prefix, round, flags, lnum++); + dump_word(prefix, round, + (i & WF_RAREPFX) ? (flags | WF_RARE) + : flags, lnum++); } } else