Mercurial > vim
diff src/spell.c @ 255:c8742c8da9ab
updated for version 7.0070
author | vimboss |
---|---|
date | Sat, 23 Apr 2005 20:42:23 +0000 |
parents | f146656fb903 |
children | ed33f83b42d8 |
line wrap: on
line diff
--- a/src/spell.c +++ b/src/spell.c @@ -97,6 +97,8 @@ static affitem_T dumai; /* * Structure used to store words and other info for one language, loaded from * a .spl file. + * The main access is through hashtable "sl_word", using the case-folded + * word as the key. This finds a linked list of fword_T. */ typedef struct slang_S slang_T; struct slang_S @@ -149,6 +151,8 @@ static addword_T dumaw; /* * Structure to store a basic word. * There are many of these, keep it small! + * The list of prefix and suffix NRs is stored after "fw_word" to avoid the + * need for two extra pointers. */ typedef struct fword_S fword_T; struct fword_S @@ -223,7 +227,7 @@ typedef struct langp_S * (Needed to keep ADD_ flags in one byte.) */ #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) -#define VIMSPELLMAGIC "VIMspell03" /* string at start of Vim spell file */ +#define VIMSPELLMAGIC "VIMspell04" /* string at start of Vim spell file */ #define VIMSPELLMAGICL 10 /* @@ -307,7 +311,7 @@ spell_check(wp, line, ptr, attrp) return (int)(mi.mi_end - ptr); /* Make case-folded copy of the word. */ - (void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); + (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); mi.mi_cword = mi.mi_fword; mi.mi_fendlen = STRLEN(mi.mi_fword); mi.mi_faddlen = 0; @@ -404,6 +408,8 @@ word_match(mip) * "d'", "de-", "'s-", "l'de-". But not "'s". * Also need to do this when a matching word was already found, because we * might find a longer match this way (French: "qu" and "qu'a-t-elle"). + * The check above may have added characters to mi_fword, thus we need to + * truncate it after the basic word for the hash lookup. */ cc = mip->mi_fword[mip->mi_fendlen]; mip->mi_fword[mip->mi_fendlen] = NUL; @@ -772,7 +778,7 @@ fold_addchars(mip, addlen) else #endif l = 1; - (void)str_foldcase(mip->mi_fend, l, p + mip->mi_faddlen, + (void)spell_casefold(mip->mi_fend, l, p + mip->mi_faddlen, MAXWLEN - mip->mi_fendlen - mip->mi_faddlen); mip->mi_fend += l; mip->mi_faddlen += STRLEN(p + mip->mi_faddlen); @@ -992,6 +998,8 @@ suffix_match(mip) * Stop checking if there are no suffixes with so many characters. */ sufp = endw; + *endw = NUL; /* truncate after possible suffix */ + for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen) { /* Move the pointer to the possible suffix back one character, unless @@ -1012,13 +1020,11 @@ suffix_match(mip) if (ht->ht_used == 0) continue; - *endw = NUL; /* truncate after possible suffix */ hi = hash_find(ht, sufp); if (HASHITEM_EMPTY(hi)) ai = NULL; else ai = HI2AI(hi); - *endw = endw_c; } if (ai != NULL) @@ -1027,6 +1033,7 @@ suffix_match(mip) * we can use. */ tlen = sufp - mip->mi_cword; /* length of word without suffix */ mch_memmove(pword, mip->mi_cword, tlen); + *endw = endw_c; for ( ; ai != NULL; ai = ai->ai_next) { @@ -1068,9 +1075,12 @@ suffix_match(mip) } } } + + *endw = NUL; /* truncate after possible suffix */ } } + *endw = endw_c; mip->mi_capflags = capflags_save; return FALSE; } @@ -1115,7 +1125,7 @@ match_caps(flags, caseword, mip, cword, else #endif c = *p++; - if (MB_ISUPPER(c)) + if (spell_isupper(c)) { if (capflags == 0 || (capflags & BWF_ONECAP)) { @@ -1460,7 +1470,7 @@ spell_load_file(fname, cookie) int round; char_u *save_sourcing_name = sourcing_name; linenr_T save_sourcing_lnum = sourcing_lnum; - int cnt; + int cnt, ccnt; int choplen; int addlen; int leadlen; @@ -1474,39 +1484,41 @@ spell_load_file(fname, cookie) addword_T *aw, *naw; int flen; int xlen; + char_u *fol; fd = fopen((char *)fname, "r"); if (fd == NULL) { EMSG2(_(e_notopen), fname); - goto errorend; + goto endFAIL; } /* Set sourcing_name, so that error messages mention the file name. */ sourcing_name = fname; sourcing_lnum = 0; - /* <HEADER>: <fileID> <regioncnt> <regionname> ... */ + /* <HEADER>: <fileID> <regioncnt> <regionname> ... + * <charflagslen> <charflags> <fcharslen> <fchars> */ for (i = 0; i < VIMSPELLMAGICL; ++i) buf[i] = getc(fd); /* <fileID> */ if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { EMSG(_("E757: Wrong file ID in spell file")); - goto errorend; + goto endFAIL; } cnt = getc(fd); /* <regioncnt> */ - if (cnt == EOF) + if (cnt < 0) { truncerr: EMSG(_("E758: Truncated spell file")); - goto errorend; + goto endFAIL; } if (cnt > 8) { formerr: EMSG(_("E759: Format error in spell file")); - goto errorend; + goto endFAIL; } for (i = 0; i < cnt; ++i) { @@ -1515,8 +1527,39 @@ formerr: } lp->sl_regions[cnt * 2] = NUL; - /* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... - * round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... */ + cnt = getc(fd); /* <charflagslen> */ + if (cnt > 0) + { + p = (char_u *)getroom(lp, &bl_used, cnt); + if (p == NULL) + goto endFAIL; + for (i = 0; i < cnt; ++i) + p[i] = getc(fd); /* <charflags> */ + + ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */ + if (ccnt <= 0) + goto formerr; + fol = (char_u *)getroom(lp, &bl_used, ccnt + 1); + if (fol == NULL) + goto endFAIL; + for (i = 0; i < ccnt; ++i) + fol[i] = getc(fd); /* <fchars> */ + fol[i] = NUL; + + /* Set the word-char flags and fill spell_isupper() table. */ + if (set_spell_charflags(p, cnt, fol) == FAIL) + goto formerr; + } + else + { + /* When <charflagslen> is zero then <fcharlen> must also be zero. */ + cnt = (getc(fd) << 8) + getc(fd); + if (cnt != 0) + goto formerr; + } + + /* round 1: <PREFIXLIST>: <affcount> <affix> ... + * round 2: <SUFFIXLIST>: <affcount> <affix> ... */ for (round = 1; round <= 2; ++round) { affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */ @@ -1537,9 +1580,6 @@ formerr: suffm = affcount > 256 ? 2 : 1; } - i = (getc(fd) << 8) + getc(fd); /* <afftotcnt> */ - /* afftotcnt is not used */ - /* * For each affix NR there can be several affixes. */ @@ -1555,7 +1595,7 @@ formerr: * <affaddlen> <affadd> */ affflags = getc(fd); /* <affflags> */ choplen = getc(fd); /* <affchoplen> */ - if (choplen == EOF) + if (choplen < 0) goto truncerr; if (choplen >= MAXWLEN) goto formerr; @@ -1563,7 +1603,7 @@ formerr: buf[i] = getc(fd); buf[i] = NUL; addlen = getc(fd); /* <affaddlen> */ - if (addlen == EOF) + if (addlen < 0) goto truncerr; if (affflags & AFF_PREWORD) xlen = addlen + 2; /* space for lead and trail string */ @@ -1571,12 +1611,11 @@ formerr: xlen = 0; /* Get room to store the affitem_T, chop and add strings. */ - p = (char_u *)getroom(lp, &bl_used, + ai = (affitem_T *)getroom(lp, &bl_used, sizeof(affitem_T) + addlen + choplen + 1 + xlen); - if (p == NULL) - goto errorend; - - ai = (affitem_T *)p; + if (ai == NULL) + goto endFAIL; + ai->ai_nr = affnr; ai->ai_flags = affflags; ai->ai_choplen = choplen; @@ -1596,8 +1635,12 @@ formerr: int l, leadoff, trailoff; /* - * Separate lead and trail string, put word at ai_add, so - * that it can be used as hashtable key. + * A preword is a prefix that's recognized as a word: it + * contains a word characters folled by a non-word + * character. + * <affadd> is the whole prefix. Separate lead and trail + * string, put the word itself at ai_add, so that it can + * be used as hashtable key. */ /* lead string: up to first word char */ while (*p != NUL && !spell_iswordc(p)) @@ -1623,13 +1666,13 @@ formerr: hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash); if (HASHITEM_EMPTY(hi)) { - /* First affix with this word, add to hashtable. */ + /* First preword with this word, add to hashtable. */ hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash); ai->ai_next = NULL; } else { - /* There already is an affix with this word, link in + /* There already is a preword with this word, link in * the list. */ ai2 = HI2AI(hi); ai->ai_next = ai2->ai_next; @@ -1660,7 +1703,7 @@ formerr: { /* Longer affix, need more hashtables. */ if (ga_grow(gap, addlen - gap->ga_len) == FAIL) - goto errorend; + goto endFAIL; /* Re-allocating ga_data means that an ht_array * pointing to ht_smallarray becomes invalid. We @@ -1733,14 +1776,14 @@ formerr: */ /* Use <nr> bytes from the previous word. */ wlen = getc(fd); /* <nr> */ - if (wlen == EOF) + if (wlen < 0) { if (widx >= wordcount) /* normal way to end the file */ break; goto truncerr; } - /* Read further word bytes until one below 0x20, that must be the + /* Read further word bytes until one below 0x20, that one must be the * flags. Keep this fast! */ for (;;) { @@ -1760,10 +1803,12 @@ formerr: { /* Read <caselen> and <caseword> first, its length may differ from * the case-folded word. Note: this should only happen after the - * basic word! */ + * basic word without KEEPCAP! */ wlen = getc(fd); if (wlen < 0) goto truncerr; + if (wlen >= MAXWLEN) + goto formerr; for (i = 0; i < wlen; ++i) cbuf[i] = getc(fd); cbuf[i] = NUL; @@ -1800,7 +1845,7 @@ formerr: fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen + (p - affixbuf)); if (fw == NULL) - goto errorend; + goto endFAIL; mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1); /* Put the affix NRs just after the word, if any. */ @@ -1811,12 +1856,15 @@ formerr: fw->fw_prefixcnt = prefixcnt; fw->fw_suffixcnt = suffixcnt; + /* We store the word in the hashtable case-folded. For a KEEPCAP word + * the entry must already exist, because fw_word can't be used as the + * key, it differs from "buf"! */ hash = hash_hash(buf); hi = hash_lookup(&lp->sl_words, buf, hash); if (HASHITEM_EMPTY(hi)) { if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL) - goto errorend; + goto endFAIL; fw->fw_next = NULL; } else @@ -1826,7 +1874,7 @@ formerr: fw2 = HI2FWORD(hi); fw->fw_next = fw2->fw_next; fw2->fw_next = fw; - --widx; /* don't count this one */ + --widx; /* don't count this one as a basic word */ } if (flags & BWF_REGION) @@ -1841,15 +1889,20 @@ formerr: adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ else adds = getc(fd); /* <addcnt> */ + if (adds < 0) + goto formerr; if (adds > 30) { - /* Use a hashtable to loopup the part until the next word end. + /* Use a hashtable to lookup the part until the next word end. + * Thus for "de-bur-die" "de" is the basic word, "-bur" is key + * in the addition hashtable, "-bur<NUL>die" the whole + * addition and "aw_saveb" is '-'. * This uses more memory and involves some overhead, thus only - * do it when there are many additions (e.g., for French). */ + * do it when there are many additions (e.g., for French). */ ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T)); if (ht == NULL) - goto errorend; + goto endFAIL; hash_init(ht); fw->fw_adds = (addword_T *)ht; fw->fw_flags |= BWF_ADDHASH; @@ -1860,19 +1913,26 @@ formerr: else ht = NULL; + /* + * Note: uses cbuf[] to copy bytes from previous addition. + */ while (--adds >= 0) { /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] * [<addstring>] [<region>] */ flags = getc(fd); /* <addflags> */ addlen = getc(fd); /* <addlen> */ - if (addlen == EOF) + if (addlen < 0) goto truncerr; if (addlen >= MAXWLEN) goto formerr; if (flags & ADD_LEADLEN) + { leadlen = getc(fd); /* <leadlen> */ + if (leadlen > addlen) + goto formerr; + } else leadlen = 0; @@ -1891,7 +1951,7 @@ formerr: { /* <addstring> is in original case, need to get * case-folded word too. */ - (void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN); + (void)spell_casefold(cbuf, addlen, fbuf, MAXWLEN); flen = addlen - leadlen + 1; addlen = STRLEN(fbuf); } @@ -1901,7 +1961,7 @@ formerr: aw = (addword_T *)getroom(lp, &bl_used, sizeof(addword_T) + addlen + flen); if (aw == NULL) - goto errorend; + goto endFAIL; if (flags & ADD_KEEPCAP) { @@ -1954,7 +2014,7 @@ formerr: naw = (addword_T *)getroom(lp, &bl_used, sizeof(addword_T) + STRLEN(NOWC_KEY)); if (naw == NULL) - goto errorend; + goto endFAIL; STRCPY(naw->aw_word, NOWC_KEY); hash_add_item(ht, hi, naw->aw_word, hash); naw->aw_next = aw; @@ -1994,11 +2054,12 @@ formerr: } } } - goto end_OK; - -errorend: + goto endOK; + +endFAIL: lp->sl_error = TRUE; -end_OK: + +endOK: if (fd != NULL) fclose(fd); hash_unlock(&lp->sl_words); @@ -2187,7 +2248,7 @@ captype(word, end) #else c = *p++; #endif - firstcap = allcap = MB_ISUPPER(c); + firstcap = allcap = spell_isupper(c); /* * Need to check all letters to find a word with mixed upper/lower. @@ -2201,7 +2262,7 @@ captype(word, end) #else c = *p; #endif - if (!MB_ISUPPER(c)) + if (!spell_isupper(c)) { /* UUl -> KEEPCAP */ if (past_second && allcap) @@ -2345,9 +2406,9 @@ struct basicword_S garray_T bw_prefix; /* table with prefix numbers */ garray_T bw_suffix; /* table with suffix numbers */ int bw_region; /* region bits */ - char_u *bw_caseword; /* keep-case word */ - char_u *bw_leadstring; /* must come before bw_word */ - char_u *bw_addstring; /* must come after bw_word */ + char_u *bw_caseword; /* keep-case word or NULL */ + char_u *bw_leadstring; /* must come before bw_word or NULL */ + char_u *bw_addstring; /* must come after bw_word or NULL */ char_u bw_word[1]; /* actually longer: word case folded */ }; @@ -2391,12 +2452,12 @@ static void add_affhash __ARGS((hashtab_ static void clear_affhash __ARGS((hashtab_T *ht)); static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords)); static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask)); +static basicword_T *get_basicword __ARGS((char_u *word, int asize)); static void combine_regions __ARGS((hashtab_T *newwords)); static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw)); -static void expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap)); -static void expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae)); -static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); -static void put_bytes __ARGS((FILE *fd, long_u nr, int len)); +static int expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap)); +static int expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae)); +static int add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); static void write_affix __ARGS((FILE *fd, affheader_T *ah)); static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); @@ -2428,6 +2489,9 @@ spell_read_aff(fname, conv, ascii) affheader_T *cur_aff = NULL; int aff_todo = 0; hashtab_T *tp; + char_u *low = NULL; + char_u *fol = NULL; + char_u *upp = NULL; fd = fopen((char *)fname, "r"); if (fd == NULL) @@ -2449,8 +2513,9 @@ spell_read_aff(fname, conv, ascii) /* * Read all the lines in the file one by one. */ - while (!vim_fgets(rline, MAXLINELEN, fd)) + while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { + line_breakcheck(); ++lnum; /* Skip comment lines. */ @@ -2462,6 +2527,12 @@ spell_read_aff(fname, conv, ascii) if (conv->vc_type != CONV_NONE) { pc = string_convert(conv, rline, NULL); + if (pc == NULL) + { + smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), + fname, lnum, rline); + continue; + } line = pc; } else @@ -2587,6 +2658,30 @@ spell_read_aff(fname, conv, ascii) cur_aff->ah_first = aff_entry; } } + else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2) + { + if (fol != NULL) + smsg((char_u *)_("Duplicate FOL in %s line %d"), + fname, lnum); + else + fol = vim_strsave(items[1]); + } + else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2) + { + if (low != NULL) + smsg((char_u *)_("Duplicate LOW in %s line %d"), + fname, lnum); + else + low = vim_strsave(items[1]); + } + else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2) + { + if (upp != NULL) + smsg((char_u *)_("Duplicate UPP in %s line %d"), + fname, lnum); + else + upp = vim_strsave(items[1]); + } else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) /* Ignore REP count */; else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3) @@ -2608,6 +2703,18 @@ spell_read_aff(fname, conv, ascii) } + if (fol != NULL || low != NULL || upp != NULL) + { + if (fol == NULL || low == NULL || upp == NULL) + smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); + else + set_spell_chartab(fol, low, upp); + + vim_free(fol); + vim_free(low); + vim_free(upp); + } + vim_free(pc); fclose(fd); return aff; @@ -2720,8 +2827,9 @@ spell_read_dic(ht, fname, conv, ascii) * The words are converted to 'encoding' here, before being added to * the hashtable. */ - while (!vim_fgets(line, MAXLINELEN, fd)) + while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) { + line_breakcheck(); ++lnum; /* Remove CR, LF and white space from end. */ @@ -2745,6 +2853,12 @@ spell_read_dic(ht, fname, conv, ascii) if (conv->vc_type != CONV_NONE) { pc = string_convert(conv, line, NULL); + if (pc == NULL) + { + smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), + fname, lnum, line); + continue; + } w = pc; } else @@ -2756,7 +2870,10 @@ spell_read_dic(ht, fname, conv, ascii) dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T) + STRLEN(w)); if (dw == NULL) + { + vim_free(pc); break; + } STRCPY(dw->dw_word, w); vim_free(pc); @@ -3136,7 +3253,7 @@ trans_affixes(dw, bw, oldaff, newwords) char_u key[2]; char_u *p; char_u *affnm; - garray_T *gap; + garray_T *gap, *agap; hashitem_T *aff_hi; affheader_T *ah; affentry_T *ae; @@ -3144,7 +3261,6 @@ trans_affixes(dw, bw, oldaff, newwords) int i; basicword_T *nbw; int alen; - int wlen; garray_T suffixga; /* list of words with non-word suffixes */ garray_T prefixga; /* list of words with non-word prefixes */ char_u nword[MAXWLEN]; @@ -3179,7 +3295,7 @@ trans_affixes(dw, bw, oldaff, newwords) for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { /* Setup for regexp matching. Note that we don't ignore case. - * This is weird, because he rules in an .aff file don't care + * This is weird, because the rules in an .aff file don't care * about case, but it's necessary for compatibility with Myspell. */ regmatch.regprog = ae->ae_prog; @@ -3190,23 +3306,19 @@ trans_affixes(dw, bw, oldaff, newwords) if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL) && (gap != &bw->bw_suffix || bw->bw_addstring == NULL)) { - /* Affix has a non-word character and isn't prepended to + /* + * Affix has a non-word character and isn't prepended to * leader or appended to addition. Need to use another - * word with an addition. It's a copy of the basicword_T - * "bw". */ - if (gap == &bw->bw_suffix) + * word with a leadstring and/or addstring. + */ + if (gap == &bw->bw_suffix || ae->ae_add_nw == NULL) { - alen = ae->ae_add_nw - ae->ae_add; - nbw = (basicword_T *)alloc((unsigned)( - sizeof(basicword_T) + STRLEN(bw->bw_word) - + alen + 1)); - if (nbw != NULL) + /* Suffix or prefix with only non-word chars. + * Build the new basic word in "nword": Remove chop + * string and append/prepend addition. */ + if (gap == &bw->bw_suffix) { - *nbw = *bw; - ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); - ga_init2(&nbw->bw_suffix, sizeof(short_u), 1); - - /* Adding the suffix may change the caps. */ + /* suffix goes at the end of the word */ STRCPY(nword, dw->dw_word); if (ae->ae_chop != NULL) { @@ -3217,64 +3329,11 @@ trans_affixes(dw, bw, oldaff, newwords) *p = NUL; } STRCAT(nword, ae->ae_add); - flags = captype(nword, nword + STRLEN(nword)); - if (flags & BWF_KEEPCAP) - { - /* "caseword" excludes the addition */ - nword[STRLEN(dw->dw_word) + alen] = NUL; - nbw->bw_caseword = vim_strsave(nword); - } - nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP - | BWF_KEEPCAP); - nbw->bw_flags |= flags; - - if (bw->bw_leadstring != NULL) - nbw->bw_leadstring = - vim_strsave(bw->bw_leadstring); - nbw->bw_addstring = vim_strsave(ae->ae_add_nw); - - STRCPY(nbw->bw_word, bw->bw_word); - if (alen > 0 || ae->ae_chop != NULL) - { - /* Suffix starts with word character and/or - * chop off something. Append it to the word. - * Add new word entry. */ - wlen = STRLEN(nbw->bw_word); - if (ae->ae_chop != NULL) - wlen -= STRLEN(ae->ae_chop); - mch_memmove(nbw->bw_word + wlen, ae->ae_add, - alen); - nbw->bw_word[wlen + alen] = NUL; - add_to_wordlist(newwords, nbw); - } - else - /* Basic word is the same, link "nbw" after - * "bw". */ - bw->bw_next = nbw; - - /* Remember this word, we need to set bw_prefix - * and bw_prefix later. */ - if (ga_grow(&suffixga, 1) == OK) - ((basicword_T **)suffixga.ga_data) - [suffixga.ga_len++] = nbw; + agap = &suffixga; } - } - else if (ae->ae_add_nw == NULL) - { - /* Prefix that starts with non-word char(s) and may be - * followed by word chars: Make a leadstring and - * prepend word chars before the word. */ - alen = STRLEN(ae->ae_add_pw); - nbw = (basicword_T *)alloc((unsigned)( - sizeof(basicword_T) + STRLEN(bw->bw_word) - + alen + 1)); - if (nbw != NULL) + else { - *nbw = *bw; - ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); - ga_init2(&nbw->bw_suffix, sizeof(short_u), 1); - - /* Adding the prefix may change the caps. */ + /* prefix goes before the word */ STRCPY(nword, ae->ae_add); p = dw->dw_word; if (ae->ae_chop != NULL) @@ -3282,51 +3341,33 @@ trans_affixes(dw, bw, oldaff, newwords) for (i = mb_charlen(ae->ae_chop); i > 0; --i) mb_ptr_adv( p); STRCAT(nword, p); - - flags = captype(nword, nword + STRLEN(nword)); - if (flags & BWF_KEEPCAP) - /* "caseword" excludes the addition */ - nbw->bw_caseword = vim_strsave(nword - + (ae->ae_add_pw - ae->ae_add)); - else - nbw->bw_caseword = NULL; - nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP - | BWF_KEEPCAP); - nbw->bw_flags |= flags; - - if (bw->bw_addstring != NULL) - nbw->bw_addstring = - vim_strsave(bw->bw_addstring); - else - nbw->bw_addstring = NULL; - nbw->bw_leadstring = vim_strnsave(ae->ae_add, - ae->ae_add_pw - ae->ae_add); - - if (alen > 0 || ae->ae_chop != NULL) - { - /* Prefix ends in word character and/or chop - * off something. Prepend it to the word. - * Add new word entry. */ - STRCPY(nbw->bw_word, ae->ae_add_pw); - p = bw->bw_word; - if (ae->ae_chop != NULL) - p += STRLEN(ae->ae_chop); - STRCAT(nbw->bw_word, p); - add_to_wordlist(newwords, nbw); - } + agap = &prefixga; + } + + /* Create a basicword_T from the word. */ + nbw = get_basicword(nword, 1); + if (nbw != NULL) + { + nbw->bw_region = bw->bw_region; + nbw->bw_flags |= bw->bw_flags + & ~(BWF_ONECAP | BWF_ALLCAP | BWF_KEEPCAP); + + if (STRCMP(bw->bw_word, nbw->bw_word) != 0) + /* Basic word differs, add new word entry. */ + (void)add_to_wordlist(newwords, nbw); else { /* Basic word is the same, link "nbw" after * "bw". */ - STRCPY(nbw->bw_word, bw->bw_word); + nbw->bw_next = bw->bw_next; bw->bw_next = nbw; } - /* Remember this word, we need to set bw_suffix - * and bw_suffix later. */ - if (ga_grow(&prefixga, 1) == OK) - ((basicword_T **)prefixga.ga_data) - [prefixga.ga_len++] = nbw; + /* Remember this word, we need to set bw_prefix + * or bw_suffix later. */ + if (ga_grow(agap, 1) == OK) + ((basicword_T **)agap->ga_data) + [agap->ga_len++] = nbw; } } else @@ -3345,7 +3386,7 @@ trans_affixes(dw, bw, oldaff, newwords) #else n = 1; #endif - (void)str_foldcase(p, n, nword + alen, + (void)spell_casefold(p, n, nword + alen, MAXWLEN - alen); alen += STRLEN(nword + alen); } @@ -3393,7 +3434,7 @@ trans_affixes(dw, bw, oldaff, newwords) else nbw->bw_leadstring = NULL; - add_to_wordlist(newwords, nbw); + (void)add_to_wordlist(newwords, nbw); /* Remember this word, we need to set bw_suffix * and bw_suffix later. */ @@ -3482,17 +3523,6 @@ build_wordlist(newwords, oldwords, oldaf hashitem_T *old_hi; dicword_T *dw; basicword_T *bw; - char_u foldword[MAXLINELEN]; - int leadlen; - char_u leadstring[MAXLINELEN]; - int addlen; - char_u addstring[MAXLINELEN]; - int dwlen; - char_u *p; - int clen; - int flags; - char_u *cp = NULL; - int l; char_u message[MAXLINELEN + MAXWLEN]; todo = oldwords->ht_used; @@ -3519,107 +3549,15 @@ build_wordlist(newwords, oldwords, oldaf break; } - /* The basic words are always stored with folded case. */ - dwlen = STRLEN(dw->dw_word); - (void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN); - flags = captype(dw->dw_word, dw->dw_word + dwlen); - - /* Check for non-word characters before the word. */ - clen = 0; - leadlen = 0; - if (!spell_iswordc(foldword)) - { - p = foldword; - for (;;) - { - mb_ptr_adv(p); - ++clen; - if (*p == NUL) /* Only non-word chars (bad word!) */ - { - if (p_verbose > 0) - smsg((char_u *)_("Warning: word without word characters: \"%s\""), - foldword); - break; - } - if (spell_iswordc(p)) - { - /* Move the leader to "leadstring" and remove it from - * "foldword". */ - leadlen = p - foldword; - mch_memmove(leadstring, foldword, leadlen); - leadstring[leadlen] = NUL; - mch_memmove(foldword, p, STRLEN(p) + 1); - break; - } - } - } - - /* Check for non-word characters after word characters. */ - addlen = 0; - for (p = foldword; spell_iswordc(p); mb_ptr_adv(p)) - { - if (*p == NUL) - break; - ++clen; - } - if (*p != NUL) - { - /* Move the addition to "addstring" and truncate "foldword". */ - if (flags & BWF_KEEPCAP) - { - /* Preserve caps, need to skip the right number of - * characters in the original word (case folding may - * change the byte count). */ - l = 0; - for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp)) - ++l; - addlen = STRLEN(cp); - mch_memmove(addstring, cp, addlen + 1); - } - else - { - addlen = STRLEN(p); - mch_memmove(addstring, p, addlen + 1); - } - *p = NUL; - } - - bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T) - + STRLEN(foldword)); + bw = get_basicword(dw->dw_word, 10); if (bw == NULL) break; - STRCPY(bw->bw_word, foldword); bw->bw_region = regionmask; - if (leadlen > 0) - bw->bw_leadstring = vim_strsave(leadstring); - else - bw->bw_leadstring = NULL; - if (addlen > 0) - bw->bw_addstring = vim_strsave(addstring); - else - bw->bw_addstring = NULL; - - add_to_wordlist(newwords, bw); - - if (flags & BWF_KEEPCAP) - { - if (addlen == 0) - /* use the whole word */ - bw->bw_caseword = vim_strsave(dw->dw_word + leadlen); - else - /* use only up to the addition */ - bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen, - cp - dw->dw_word - leadlen); - if (bw->bw_caseword == NULL) /* out of memory */ - flags &= ~BWF_KEEPCAP; - } - bw->bw_flags = flags; + (void)add_to_wordlist(newwords, bw); /* Deal with any affix names on the old word, translate them * into affix numbers. */ - ga_init2(&bw->bw_prefix, sizeof(short_u), 10); - ga_init2(&bw->bw_suffix, sizeof(short_u), 10); if (dw->dw_affnm != NULL) trans_affixes(dw, bw, oldaff, newwords); } @@ -3630,6 +3568,128 @@ build_wordlist(newwords, oldwords, oldaf } /* + * Get a basicword_T from a word in original case. + * Caller must set bw_region. + * Returns NULL when something fails. + */ + static basicword_T * +get_basicword(word, asize) + char_u *word; + int asize; /* growsize for affix garray */ +{ + int dwlen; + char_u foldword[MAXLINELEN]; + int flags; + int clen; + int leadlen; + char_u *p; + char_u leadstring[MAXLINELEN]; + int addlen; + char_u addstring[MAXLINELEN]; + char_u *cp = NULL; + int l; + basicword_T *bw; + + /* The basic words are always stored with folded case. */ + dwlen = STRLEN(word); + (void)spell_casefold(word, dwlen, foldword, MAXLINELEN); + flags = captype(word, word + dwlen); + + /* Check for non-word characters before the word. */ + clen = 0; + leadlen = 0; + if (!spell_iswordc(foldword)) + { + p = foldword; + for (;;) + { + mb_ptr_adv(p); + ++clen; + if (*p == NUL) /* Only non-word chars (bad word!) */ + { + if (p_verbose > 0) + smsg((char_u *)_("Warning: word without word characters: \"%s\""), + foldword); + break; + } + if (spell_iswordc(p)) + { + /* Move the leader to "leadstring" and remove it from + * "foldword". */ + leadlen = p - foldword; + mch_memmove(leadstring, foldword, leadlen); + leadstring[leadlen] = NUL; + mch_memmove(foldword, p, STRLEN(p) + 1); + break; + } + } + } + + /* Check for non-word characters after word characters. */ + addlen = 0; + for (p = foldword; spell_iswordc(p); mb_ptr_adv(p)) + { + if (*p == NUL) + break; + ++clen; + } + if (*p != NUL) + { + /* Move the addition to "addstring" and truncate "foldword". */ + if (flags & BWF_KEEPCAP) + { + /* Preserve caps, need to skip the right number of + * characters in the original word (case folding may + * change the byte count). */ + l = 0; + for (cp = word; l < clen; mb_ptr_adv(cp)) + ++l; + addlen = STRLEN(cp); + mch_memmove(addstring, cp, addlen + 1); + } + else + { + addlen = STRLEN(p); + mch_memmove(addstring, p, addlen + 1); + } + *p = NUL; + } + + bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T) + + STRLEN(foldword)); + if (bw == NULL) + return NULL; + + STRCPY(bw->bw_word, foldword); + + if (leadlen > 0) + bw->bw_leadstring = vim_strsave(leadstring); + else + bw->bw_leadstring = NULL; + if (addlen > 0) + bw->bw_addstring = vim_strsave(addstring); + else + bw->bw_addstring = NULL; + + if (flags & BWF_KEEPCAP) + { + if (addlen == 0) + /* use the whole word */ + bw->bw_caseword = vim_strsave(word + leadlen); + else + /* use only up to the addition */ + bw->bw_caseword = vim_strnsave(word + leadlen, + cp - word - leadlen); + } + + bw->bw_flags = flags; + ga_init2(&bw->bw_prefix, sizeof(short_u), asize); + ga_init2(&bw->bw_suffix, sizeof(short_u), asize); + + return bw; +} + +/* * Go through the list of words and combine the ones that are identical except * for the region. */ @@ -3662,14 +3722,16 @@ combine_regions(newwords) && (bw->bw_addstring == NULL) == (nbw->bw_addstring == NULL) && ((bw->bw_flags & BWF_KEEPCAP) == 0 - || (STRCMP(bw->bw_caseword, - nbw->bw_caseword) == 0)) + || bw->bw_caseword == NULL + || nbw->bw_caseword == NULL + || STRCMP(bw->bw_caseword, + nbw->bw_caseword) == 0) && (bw->bw_leadstring == NULL - || (STRCMP(bw->bw_leadstring, - nbw->bw_leadstring) == 0)) + || STRCMP(bw->bw_leadstring, + nbw->bw_leadstring) == 0) && (bw->bw_addstring == NULL - || (STRCMP(bw->bw_addstring, - nbw->bw_addstring) == 0)) + || STRCMP(bw->bw_addstring, + nbw->bw_addstring) == 0) && same_affixes(bw, nbw) ) { @@ -3716,8 +3778,10 @@ same_affixes(bw, nbw) * This is also needed when a word with an addition has a prefix and the word * with prefix also exists. E.g., "blurp's/D" (D is prefix "de") and * "deblurp". "deblurp" would match and no prefix would be tried. + * + * Returns FAIL when out of memory. */ - static void + static int expand_affixes(newwords, prefgap, suffgap) hashtab_T *newwords; garray_T *prefgap; @@ -3731,6 +3795,7 @@ expand_affixes(newwords, prefgap, suffga garray_T add_words; int n; char_u message[MAXLINELEN + MAXWLEN]; + int retval = OK; ga_init2(&add_words, sizeof(basicword_T *), 10); @@ -3806,7 +3871,12 @@ expand_affixes(newwords, prefgap, suffga { /* Expand the word for this combination of * prefixes and affixes. */ - expand_one_aff(bw, &add_words, pae, sae); + if (expand_one_aff(bw, &add_words, + pae, sae) == FAIL) + { + retval = FAIL; + goto theend; + } /* Advance to next suffix entry, if there * is one. */ @@ -3831,9 +3901,16 @@ expand_affixes(newwords, prefgap, suffga * all its items. */ for (pi = 0; pi < add_words.ga_len; ++pi) - add_to_wordlist(newwords, ((basicword_T **)add_words.ga_data)[pi]); - + { + retval = add_to_wordlist(newwords, + ((basicword_T **)add_words.ga_data)[pi]); + if (retval == FAIL) + break; + } + +theend: ga_clear(&add_words); + return retval; } /* @@ -3841,8 +3918,9 @@ expand_affixes(newwords, prefgap, suffga * prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL. * Don't do this when not necessary: * - no leadstring and adding prefix doesn't result in existing word. + * Returns FAIL when out of memory. */ - static void + static int expand_one_aff(bw, add_words, pae, sae) basicword_T *bw; garray_T *add_words; @@ -3873,7 +3951,7 @@ expand_one_aff(bw, add_words, pae, sae) STRCPY(word + l, bw->bw_word + choplen); /* Do the same for bw_caseword, if it's there. */ - if (bw->bw_flags & BWF_KEEPCAP) + if ((bw->bw_flags & BWF_KEEPCAP) && bw->bw_caseword != NULL) { if (l > 0) mch_memmove(caseword, pae->ae_add, l); @@ -3907,112 +3985,116 @@ expand_one_aff(bw, add_words, pae, sae) nbw = (basicword_T *)alloc_clear((unsigned) sizeof(basicword_T) + STRLEN(word)); - if (nbw != NULL) + if (nbw == NULL) + return FAIL; + + /* Add the new word to the list of words to be added later. */ + if (ga_grow(add_words, 1) == FAIL) { - /* Add the new word to the list of words to be added later. */ - if (ga_grow(add_words, 1) == FAIL) + vim_free(nbw); + return FAIL; + } + ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw; + + /* Copy the (modified) basic word, flags and region. */ + STRCPY(nbw->bw_word, word); + nbw->bw_flags = bw->bw_flags; + nbw->bw_region = bw->bw_region; + + /* Set the (modified) caseword. */ + if (bw->bw_flags & BWF_KEEPCAP) + nbw->bw_caseword = vim_strsave(caseword); + else + nbw->bw_caseword = NULL; + + if (bw->bw_leadstring != NULL) + { + if (pae != NULL) { - vim_free(nbw); - return; - } - ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw; - - /* Copy the (modified) basic word, flags and region. */ - STRCPY(nbw->bw_word, word); - nbw->bw_flags = bw->bw_flags; - nbw->bw_region = bw->bw_region; - - /* Set the (modified) caseword. */ - if (bw->bw_flags & BWF_KEEPCAP) - if ((nbw->bw_caseword = vim_strsave(caseword)) == NULL) - nbw->bw_flags &= ~BWF_KEEPCAP; - - if (bw->bw_leadstring != NULL) - { - if (pae != NULL) + /* Prepend prefix to leadstring. */ + ll = STRLEN(bw->bw_leadstring); + l = choplen = 0; + if (pae->ae_add != NULL) + l = STRLEN(pae->ae_add); + if (pae->ae_chop != NULL) { - /* Prepend prefix to leadstring. */ - ll = STRLEN(bw->bw_leadstring); - l = choplen = 0; - if (pae->ae_add != NULL) - l = STRLEN(pae->ae_add); - if (pae->ae_chop != NULL) - { - choplen = STRLEN(pae->ae_chop); - if (choplen > ll) /* TODO: error? */ - choplen = ll; - } - nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1)); - if (nbw->bw_leadstring != NULL) - { - if (l > 0) - mch_memmove(nbw->bw_leadstring, pae->ae_add, l); - STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen); - } + choplen = STRLEN(pae->ae_chop); + if (choplen > ll) /* TODO: error? */ + choplen = ll; } - else - nbw->bw_leadstring = vim_strsave(bw->bw_leadstring); - } - else if (bw->bw_prefix.ga_len > 0) - { - /* There is no leadstring, copy the list of possible prefixes. */ - ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); - if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK) + nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1)); + if (nbw->bw_leadstring != NULL) { - mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data, - bw->bw_prefix.ga_len * sizeof(short_u)); - nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len; + if (l > 0) + mch_memmove(nbw->bw_leadstring, pae->ae_add, l); + STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen); } } - - if (bw->bw_addstring != NULL) + else + nbw->bw_leadstring = vim_strsave(bw->bw_leadstring); + } + else if (bw->bw_prefix.ga_len > 0) + { + /* There is no leadstring, copy the list of possible prefixes. */ + ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); + if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK) { - if (sae != NULL) - { - /* Append suffix to addstring. */ - l = STRLEN(bw->bw_addstring); - if (sae->ae_chop != NULL) - { - l -= STRLEN(sae->ae_chop); - if (l < 0) /* TODO: error? */ - l = 0; - } - if (sae->ae_add == NULL) - ll = 0; - else - ll = STRLEN(sae->ae_add); - nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1)); - if (nbw->bw_addstring != NULL) - { - STRCPY(nbw->bw_addstring, bw->bw_addstring); - if (sae->ae_add == NULL) - nbw->bw_addstring[l] = NUL; - else - STRCPY(nbw->bw_addstring + l, sae->ae_add); - } - } - else - nbw->bw_addstring = vim_strsave(bw->bw_addstring); + mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data, + bw->bw_prefix.ga_len * sizeof(short_u)); + nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len; } } + + if (bw->bw_addstring != NULL) + { + if (sae != NULL) + { + /* Append suffix to addstring. */ + l = STRLEN(bw->bw_addstring); + if (sae->ae_chop != NULL) + { + l -= STRLEN(sae->ae_chop); + if (l < 0) /* TODO: error? */ + l = 0; + } + if (sae->ae_add == NULL) + ll = 0; + else + ll = STRLEN(sae->ae_add); + nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1)); + if (nbw->bw_addstring != NULL) + { + STRCPY(nbw->bw_addstring, bw->bw_addstring); + if (sae->ae_add == NULL) + nbw->bw_addstring[l] = NUL; + else + STRCPY(nbw->bw_addstring + l, sae->ae_add); + } + } + else + nbw->bw_addstring = vim_strsave(bw->bw_addstring); + } + + return OK; } /* * Add basicword_T "*bw" to wordlist "newwords". */ - static void + static int add_to_wordlist(newwords, bw) hashtab_T *newwords; basicword_T *bw; { hashitem_T *hi; basicword_T *bw2; + int retval = OK; hi = hash_find(newwords, bw->bw_word); if (HASHITEM_EMPTY(hi)) { /* New entry, add to hashlist. */ - hash_add(newwords, bw->bw_word); + retval = hash_add(newwords, bw->bw_word); bw->bw_next = NULL; } else @@ -4022,12 +4104,13 @@ add_to_wordlist(newwords, bw) bw->bw_next = bw2->bw_next; bw2->bw_next = bw; } + return retval; } /* * Write a number to file "fd", MSB first, in "len" bytes. */ - static void + void put_bytes(fd, nr, len) FILE *fd; long_u nr; @@ -4105,22 +4188,29 @@ write_affixlist(fd, aff, bytes) * <SUGGEST> <WORDLIST> * * <HEADER>: <fileID> <regioncnt> <regionname> ... + * <charflagslen> <charflags> <fcharslen> <fchars> * - * <fileID> 10 bytes "VIMspell03" + * <fileID> 10 bytes "VIMspell04" * <regioncnt> 1 byte number of regions following (8 supported) * <regionname> 2 bytes Region name: ca, au, etc. * First <regionname> is region 1. * + * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). + * <charflags> N bytes List of flags (first one is for character 128): + * 0x01 word character + * 0x01 upper-case character + * <fcharslen> 2 bytes Number of bytes in <fchars>. + * <fchars> N bytes Folded characters, first one is for character 128. * - * <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... - * <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... + * + * <PREFIXLIST>: <affcount> <affix> ... + * <SUFFIXLIST>: <affcount> <affix> ... * list of possible affixes: prefixes and suffixes. * * <affcount> 2 bytes Number of affixes (MSB comes first). * When more than 256 an affixNR is 2 bytes. * This is separate for prefixes and suffixes! * First affixNR is 0. - * <afftotcnt> 2 bytes Total number of affix items (MSB comes first). * * <affix>: <affitemcnt> <affitem> ... * @@ -4228,8 +4318,6 @@ write_vim_spell(fname, prefga, suffga, n int flags, aflags; basicword_T *bw, *bwf, *bw2 = NULL; int i; - int cnt; - affentry_T *ae; int round; garray_T bwga; @@ -4242,12 +4330,14 @@ write_vim_spell(fname, prefga, suffga, n return; } - fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); + /* <HEADER>: <fileID> <regioncnt> <regionname> ... + * <charflagslen> <charflags> <fcharslen> <fchars> */ + fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); /* <fileID> */ /* write the region names if there is more than one */ if (regcount > 1) { - putc(regcount, wif.wif_fd); + putc(regcount, wif.wif_fd); /* <regioncnt> <regionname> ... */ fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd); wif.wif_regionmask = (1 << regcount) - 1; } @@ -4257,20 +4347,17 @@ write_vim_spell(fname, prefga, suffga, n wif.wif_regionmask = 0; } - /* Write the prefix and suffix lists. */ + /* Write the table with character flags and table for case folding. + * <charflagslen> <charflags> <fcharlen> <fchars> */ + write_spell_chartab(wif.wif_fd); + + /* <PREFIXLIST>: <affcount> <affix> ... + * <SUFFIXLIST>: <affcount> <affix> ... */ for (round = 1; round <= 2; ++round) { gap = round == 1 ? prefga : suffga; put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */ - /* Count the total number of affix items. */ - cnt = 0; - for (i = 0; i < gap->ga_len; ++i) - for (ae = ((affheader_T *)gap->ga_data + i)->ah_first; - ae != NULL; ae = ae->ae_next) - ++cnt; - put_bytes(wif.wif_fd, (long_u)cnt, 2); /* <afftotcnt> */ - for (i = 0; i < gap->ga_len; ++i) write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i); } @@ -4279,12 +4366,14 @@ write_vim_spell(fname, prefga, suffga, n wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1; wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1; - /* Write the suggest info. TODO */ - put_bytes(wif.wif_fd, 0L, 4); + /* <SUGGEST> : <suggestlen> <more> ... + * TODO. Only write a zero length for now. */ + put_bytes(wif.wif_fd, 0L, 4); /* <suggestlen> */ /* - * Write the word list. <wordcount> <worditem> ... + * <WORDLIST>: <wordcount> <worditem> ... */ + /* number of basic words in 4 bytes */ put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */ @@ -4333,8 +4422,10 @@ write_vim_spell(fname, prefga, suffga, n | BWF_ALLCAP); if (flags == aflags && ((flags & BWF_KEEPCAP) == 0 - || (STRCMP(bw->bw_caseword, - bw2->bw_caseword) == 0)) + || bw->bw_caseword == NULL + || bw2->bw_caseword == NULL + || STRCMP(bw->bw_caseword, + bw2->bw_caseword) == 0) && same_affixes(bw, bw2)) break; } @@ -4385,6 +4476,7 @@ write_vim_spell(fname, prefga, suffga, n } ga_clear(&bwga); + vim_free(wtab); } fclose(wif.wif_fd); @@ -4548,7 +4640,7 @@ write_bword(wif, bwf, lowcap) if (lowcap) return; - if (flags & BWF_KEEPCAP) + if ((flags & BWF_KEEPCAP) && bw->bw_caseword != NULL) { len = STRLEN(bw->bw_caseword); putc(len, fd); /* <caselen> */ @@ -4684,6 +4776,7 @@ write_bword(wif, bwf, lowcap) bw2 = bw; } + vim_free(wtab); } } @@ -4710,6 +4803,7 @@ ex_mkspell(eap) vimconv_T conv; int ascii = FALSE; char_u *arg = eap->arg; + int error = FALSE; if (STRNCMP(arg, "-ascii", 6) == 0) { @@ -4766,6 +4860,10 @@ ex_mkspell(eap) } } + /* Clear the char type tables, don't want to use any of the currently + * used spell properties. */ + init_spell_chartab(); + /* * Read all the .aff and .dic files. * Text is converted to 'encoding'. @@ -4846,15 +4944,18 @@ ex_mkspell(eap) */ MSG(_("Processing words...")); out_flush(); - expand_affixes(&newwords, &prefga, &suffga); - - /* Write the info in the spell file. */ - smsg((char_u *)_("Writing spell file %s..."), wfname); - out_flush(); - write_vim_spell(wfname, &prefga, &suffga, &newwords, + error = expand_affixes(&newwords, &prefga, &suffga) == FAIL; + + if (!error) + { + /* Write the info in the spell file. */ + smsg((char_u *)_("Writing spell file %s..."), wfname); + out_flush(); + write_vim_spell(wfname, &prefga, &suffga, &newwords, fcount - 1, region_name); - MSG(_("Done!")); - out_flush(); + MSG(_("Done!")); + out_flush(); + } /* Free the allocated stuff. */ free_wordtable(&newwords);