vim: src/spell.c comparison

comparison src/spell.c @ 255:c8742c8da9ab

updated for version 7.0070

author	vimboss
date	Sat, 23 Apr 2005 20:42:23 +0000
parents	f146656fb903
children	ed33f83b42d8

comparison

equal deleted inserted replaced

-:c50c82c5e230
+:c8742c8da9ab
 #define AFF_PREWORD	0x02	/* prefix includes word */
 /*
 * Structure used to store words and other info for one language, loaded from
 * a .spl file.
+* The main access is through hashtable "sl_word", using the case-folded
+* word as the key.  This finds a linked list of fword_T.
 */
 typedef struct slang_S slang_T;
 struct slang_S
 {
 slang_T	*sl_next;	/* next language */
 #define HI2ADDWORD(hi)	((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw)))
 /*
 * Structure to store a basic word.
 * There are many of these, keep it small!
+* The list of prefix and suffix NRs is stored after "fw_word" to avoid the
+* need for two extra pointers.
 */
 typedef struct fword_S fword_T;
 struct fword_S
 {
 fword_T	*fw_next;	/* same basic word with different caps and/or
 /* Translate ADD_ flags to BWF_ flags.
 * (Needed to keep ADD_ flags in one byte.) */
 #define ADD2BWF(x)	(((x) & 0x0f) | (((x) & 0xf0) << 4))
-#define VIMSPELLMAGIC "VIMspell03"  /* string at start of Vim spell file */
+#define VIMSPELLMAGIC "VIMspell04"  /* string at start of Vim spell file */
 #define VIMSPELLMAGICL 10
 /*
 * Structure to store info for word matching.
 */
 /* A word starting with a number is always OK. */
 if (*ptr >= '0' && *ptr <= '9')
 	return (int)(mi.mi_end - ptr);
 /* Make case-folded copy of the word. */
-(void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
+(void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
 mi.mi_cword = mi.mi_fword;
 mi.mi_fendlen = STRLEN(mi.mi_fword);
 mi.mi_faddlen = 0;
 mi.mi_fend = mi.mi_end;
 * Try finding a matching preword for "mip->mi_word".  These are
 * prefixes that have a non-word character after a word character:
 * "d'", "de-", "'s-", "l'de-".  But not "'s".
 * Also need to do this when a matching word was already found, because we
 * might find a longer match this way (French: "qu" and "qu'a-t-elle").
+* The check above may have added characters to mi_fword, thus we need to
+* truncate it after the basic word for the hash lookup.
 */
 cc = mip->mi_fword[mip->mi_fendlen];
 mip->mi_fword[mip->mi_fendlen] = NUL;
 hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash);
 mip->mi_fword[mip->mi_fendlen] = cc;
 	if (has_mbyte)
 	    l = (*mb_ptr2len_check)(mip->mi_fend);
 	else
 #endif
 	    l = 1;
-	(void)str_foldcase(mip->mi_fend, l, p + mip->mi_faddlen,
+	(void)spell_casefold(mip->mi_fend, l, p + mip->mi_faddlen,
 				 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen);
 	mip->mi_fend += l;
 	mip->mi_faddlen += STRLEN(p + mip->mi_faddlen);
 }
 }
 * Try suffixes of different length, starting with an empty suffix (chop
 * only, thus adds something).
 * Stop checking if there are no suffixes with so many characters.
 */
 sufp = endw;
+*endw = NUL;	/* truncate after possible suffix */
 for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen)
 {
 	/* Move the pointer to the possible suffix back one character, unless
 	 * doing the first round (empty suffix). */
 	if (charlen > 0)
 	    /* Get pointer to hashtab for suffix of this many chars. */
 	    ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1;
 	    if (ht->ht_used == 0)
 		continue;
-	    *endw = NUL;	/* truncate after possible suffix */
 	    hi = hash_find(ht, sufp);
 	    if (HASHITEM_EMPTY(hi))
 		ai = NULL;
 	    else
 		ai = HI2AI(hi);
-	    *endw = endw_c;
 	}
 	if (ai != NULL)
 	{
 	    /* Found a list of matching suffixes.  Now check that there is one
 	     * we can use. */
 	    tlen = sufp - mip->mi_cword;    /* length of word without suffix */
 	    mch_memmove(pword, mip->mi_cword, tlen);
+	    *endw = endw_c;
 	    for ( ; ai != NULL; ai = ai->ai_next)
 	    {
 		/* Found a matching suffix.  Create the basic word by removing
 		 * the suffix and adding the chop string. */
 			mip->mi_capflags = capflags_save;
 			return TRUE;
 		    }
 		}
 	    }
-	}
-}
+	    *endw = NUL;	/* truncate after possible suffix */
+	}
+}
+*endw = endw_c;
 mip->mi_capflags = capflags_save;
 return FALSE;
 }
 /*
 		if (has_mbyte)
 		    c = mb_ptr2char_adv(&p);
 		else
 #endif
 		    c = *p++;
-		if (MB_ISUPPER(c))
+		if (spell_isupper(c))
 		{
 		    if (capflags == 0 || (capflags & BWF_ONECAP))
 		    {
 			capflags = BWF_KEEPCAP;	/* lU or UlU */
 			break;
 int		flags;
 affitem_T	*ai, *ai2, **aip;
 int		round;
 char_u	*save_sourcing_name = sourcing_name;
 linenr_T	save_sourcing_lnum = sourcing_lnum;
-int		cnt;
+int		cnt, ccnt;
 int		choplen;
 int		addlen;
 int		leadlen;
 int		wordcount;
 fword_T	*fw, *fw2;
 hash_T	hash;
 int		adds;
 addword_T	*aw, *naw;
 int		flen;
 int		xlen;
+char_u	*fol;
 fd = fopen((char *)fname, "r");
 if (fd == NULL)
 {
 	EMSG2(_(e_notopen), fname);
-	goto errorend;
+	goto endFAIL;
 }
 /* Set sourcing_name, so that error messages mention the file name. */
 sourcing_name = fname;
 sourcing_lnum = 0;
-/* <HEADER>: <fileID> <regioncnt> <regionname> ... */
+/* <HEADER>: <fileID> <regioncnt> <regionname> ...
+*		 <charflagslen> <charflags>  <fcharslen> <fchars> */
 for (i = 0; i < VIMSPELLMAGICL; ++i)
 	buf[i] = getc(fd);				/* <fileID> */
 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
 {
 	EMSG(_("E757: Wrong file ID in spell file"));
-	goto errorend;
+	goto endFAIL;
 }
 cnt = getc(fd);					/* <regioncnt> */
-if (cnt == EOF)
+if (cnt < 0)
 {
 truncerr:
 	EMSG(_("E758: Truncated spell file"));
-	goto errorend;
+	goto endFAIL;
 }
 if (cnt > 8)
 {
 formerr:
 	EMSG(_("E759: Format error in spell file"));
-	goto errorend;
+	goto endFAIL;
 }
 for (i = 0; i < cnt; ++i)
 {
 	lp->sl_regions[i * 2] = getc(fd);		/* <regionname> */
 	lp->sl_regions[i * 2 + 1] = getc(fd);
 }
 lp->sl_regions[cnt * 2] = NUL;
-/* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ...
+cnt = getc(fd);					/* <charflagslen> */
-* round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ...  */
+if (cnt > 0)
+{
+	p = (char_u *)getroom(lp, &bl_used, cnt);
+	if (p == NULL)
+	    goto endFAIL;
+	for (i = 0; i < cnt; ++i)
+	    p[i] = getc(fd);				/* <charflags> */
+	ccnt = (getc(fd) << 8) + getc(fd);		/* <fcharslen> */
+	if (ccnt <= 0)
+	    goto formerr;
+	fol = (char_u *)getroom(lp, &bl_used, ccnt + 1);
+	if (fol == NULL)
+	    goto endFAIL;
+	for (i = 0; i < ccnt; ++i)
+	    fol[i] = getc(fd);				/* <fchars> */
+	fol[i] = NUL;
+	/* Set the word-char flags and fill spell_isupper() table. */
+	if (set_spell_charflags(p, cnt, fol) == FAIL)
+	    goto formerr;
+}
+else
+{
+	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
+	cnt = (getc(fd) << 8) + getc(fd);
+	if (cnt != 0)
+	    goto formerr;
+}
+/* round 1: <PREFIXLIST>: <affcount> <affix> ...
+* round 2: <SUFFIXLIST>: <affcount> <affix> ...  */
 for (round = 1; round <= 2; ++round)
 {
 	affcount = (getc(fd) << 8) + getc(fd);		/* <affcount> */
 	if (affcount < 0)
 	    goto truncerr;
 	    gap = &lp->sl_sufftab;
 	    aip = &lp->sl_suffzero;
 	    lp->sl_suffcnt = affcount;
 	    suffm = affcount > 256 ? 2 : 1;
 	}
-	i = (getc(fd) << 8) + getc(fd);		/* <afftotcnt> */
-	/* afftotcnt is not used */
 	/*
 	 * For each affix NR there can be several affixes.
 	 */
 	for (affnr = 0; affnr < affcount; ++affnr)
 	    {
 		/* <affitem>: <affflags> <affchoplen> <affchop>
 		 *				    <affaddlen> <affadd> */
 		affflags = getc(fd);			/* <affflags> */
 		choplen = getc(fd);			/* <affchoplen> */
-		if (choplen == EOF)
+		if (choplen < 0)
 		    goto truncerr;
 		if (choplen >= MAXWLEN)
 		    goto formerr;
 		for (i = 0; i < choplen; ++i)		/* <affchop> */
 		    buf[i] = getc(fd);
 		buf[i] = NUL;
 		addlen = getc(fd);			/* <affaddlen> */
-		if (addlen == EOF)
+		if (addlen < 0)
 		    goto truncerr;
 		if (affflags & AFF_PREWORD)
 		    xlen = addlen + 2;	/* space for lead and trail string */
 		else
 		    xlen = 0;
 		/* Get room to store the affitem_T, chop and add strings. */
-		p = (char_u *)getroom(lp, &bl_used,
+		ai = (affitem_T *)getroom(lp, &bl_used,
 			     sizeof(affitem_T) + addlen + choplen + 1 + xlen);
-		if (p == NULL)
+		if (ai == NULL)
-		    goto errorend;
+		    goto endFAIL;
-		ai = (affitem_T *)p;
 		ai->ai_nr = affnr;
 		ai->ai_flags = affflags;
 		ai->ai_choplen = choplen;
 		ai->ai_addlen = addlen;
 		if (affflags & AFF_PREWORD)
 		{
 		    int	    l, leadoff, trailoff;
 		    /*
-		     * Separate lead and trail string, put word at ai_add, so
+		     * A preword is a prefix that's recognized as a word: it
-		     * that it can be used as hashtable key.
+		     * contains a word characters folled by a non-word
+		     * character.
+		     * <affadd> is the whole prefix.  Separate lead and trail
+		     * string, put the word itself at ai_add, so that it can
+		     * be used as hashtable key.
 		     */
 		    /* lead string: up to first word char */
 		    while (*p != NUL && !spell_iswordc(p))
 			mb_ptr_adv(p);
 		    ai->ai_leadlen = p - ai->ai_add;
 		    ai->ai_add[l] = NUL;
 		    hash = hash_hash(ai->ai_add);
 		    hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash);
 		    if (HASHITEM_EMPTY(hi))
 		    {
-			/* First affix with this word, add to hashtable. */
+			/* First preword with this word, add to hashtable. */
 			hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash);
 			ai->ai_next = NULL;
 		    }
 		    else
 		    {
-			/* There already is an affix with this word, link in
+			/* There already is a preword with this word, link in
 			 * the list.  */
 			ai2 = HI2AI(hi);
 			ai->ai_next = ai2->ai_next;
 			ai2->ai_next = ai;
 		    }
 		    {
 			if (gap->ga_len < addlen)
 			{
 			    /* Longer affix, need more hashtables. */
 			    if (ga_grow(gap, addlen - gap->ga_len) == FAIL)
-				goto errorend;
+				goto endFAIL;
 			    /* Re-allocating ga_data means that an ht_array
 			     * pointing to ht_smallarray becomes invalid.  We
 			     * can recognize this: ht_mask is at its init
 			     * value. */
 	 *			  [<region>]
 	 *			  [<addcnt> <add> ...]
 	 */
 	/* Use <nr> bytes from the previous word. */
 	wlen = getc(fd);				/* <nr> */
-	if (wlen == EOF)
+	if (wlen < 0)
 	{
 	    if (widx >= wordcount)	/* normal way to end the file */
 		break;
 	    goto truncerr;
 	}
-	/* Read further word bytes until one below 0x20, that must be the
+	/* Read further word bytes until one below 0x20, that one must be the
 	 * flags.  Keep this fast! */
 	for (;;)
 	{
 	    if ((buf[wlen] = getc(fd)) < 0x20)		/* <string> */
 		break;
 	if (flags & BWF_KEEPCAP)
 	{
 	    /* Read <caselen> and <caseword> first, its length may differ from
 	     * the case-folded word.  Note: this should only happen after the
-	     * basic word! */
+	     * basic word without KEEPCAP! */
 	    wlen = getc(fd);
 	    if (wlen < 0)
 		goto truncerr;
+	    if (wlen >= MAXWLEN)
+		goto formerr;
 	    for (i = 0; i < wlen; ++i)
 		cbuf[i] = getc(fd);
 	    cbuf[i] = NUL;
 	}
 	/* Find room to store the word in an fword_T. */
 	fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen
 							    + (p - affixbuf));
 	if (fw == NULL)
-	    goto errorend;
+	    goto endFAIL;
 	mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1);
 	/* Put the affix NRs just after the word, if any. */
 	if (p > affixbuf)
 	    mch_memmove(fw->fw_word + wlen + 1, affixbuf, p - affixbuf);
 	fw->fw_flags = flags;
 	fw->fw_prefixcnt = prefixcnt;
 	fw->fw_suffixcnt = suffixcnt;
+	/* We store the word in the hashtable case-folded.  For a KEEPCAP word
+	 * the entry must already exist, because fw_word can't be used as the
+	 * key, it differs from "buf"! */
 	hash = hash_hash(buf);
 	hi = hash_lookup(&lp->sl_words, buf, hash);
 	if (HASHITEM_EMPTY(hi))
 	{
 	    if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL)
-		goto errorend;
+		goto endFAIL;
 	    fw->fw_next = NULL;
 	}
 	else
 	{
 	    /* Already have this basic word in the hashtable, this one will
 	     * have different case flags and/or affixes. */
 	    fw2 = HI2FWORD(hi);
 	    fw->fw_next = fw2->fw_next;
 	    fw2->fw_next = fw;
-	    --widx;			/* don't count this one */
+	    --widx;		/* don't count this one as a basic word */
 	}
 	if (flags & BWF_REGION)
 	    fw->fw_region = getc(fd);			/* <region> */
 	else
 	{
 	    if (flags & BWF_ADDS_M)
 		adds = (getc(fd) << 8) + getc(fd);	/* <addcnt> */
 	    else
 		adds = getc(fd);			/* <addcnt> */
+	    if (adds < 0)
+		goto formerr;
 	    if (adds > 30)
 	    {
-		/* Use a hashtable to loopup the part until the next word end.
+		/* Use a hashtable to lookup the part until the next word end.
+		 * Thus for "de-bur-die" "de" is the basic word, "-bur" is key
+		 * in the addition hashtable, "-bur<NUL>die" the whole
+		 * addition and "aw_saveb" is '-'.
 		 * This uses more memory and involves some overhead, thus only
-		 * do it when there are many additions (e.g., for French).  */
+		 * do it when there are many additions (e.g., for French). */
 		ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T));
 		if (ht == NULL)
-		    goto errorend;
+		    goto endFAIL;
 		hash_init(ht);
 		fw->fw_adds = (addword_T *)ht;
 		fw->fw_flags |= BWF_ADDHASH;
 		/* Preset the size of the hashtable. It's never unlocked. */
 		hash_lock_size(ht, adds + 1);
 	    }
 	    else
 		ht = NULL;
+	    /*
+	     * Note: uses cbuf[] to copy bytes from previous addition.
+	     */
 	    while (--adds >= 0)
 	    {
 		/* <add>: <addflags> <addlen> [<leadlen>] [<copylen>]
 		 *				[<addstring>] [<region>] */
 		flags = getc(fd);			/* <addflags> */
 		addlen = getc(fd);			/* <addlen> */
-		if (addlen == EOF)
+		if (addlen < 0)
 		    goto truncerr;
 		if (addlen >= MAXWLEN)
 		    goto formerr;
 		if (flags & ADD_LEADLEN)
+		{
 		    leadlen = getc(fd);			/* <leadlen> */
+		    if (leadlen > addlen)
+			goto formerr;
+		}
 		else
 		    leadlen = 0;
 		if (addlen > 0)
 		{
 		if (flags & ADD_KEEPCAP)
 		{
 		    /* <addstring> is in original case, need to get
 		     * case-folded word too. */
-		    (void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN);
+		    (void)spell_casefold(cbuf, addlen, fbuf, MAXWLEN);
 		    flen = addlen - leadlen + 1;
 		    addlen = STRLEN(fbuf);
 		}
 		else
 		    flen = 0;
 		aw = (addword_T *)getroom(lp, &bl_used,
 					   sizeof(addword_T) + addlen + flen);
 		if (aw == NULL)
-		    goto errorend;
+		    goto endFAIL;
 		if (flags & ADD_KEEPCAP)
 		{
 		    /* Put the addition in original case after the case-folded
 		     * string. */
 			{
 			    /* we use a dummy item as the list header */
 			    naw = (addword_T *)getroom(lp, &bl_used,
 					sizeof(addword_T) + STRLEN(NOWC_KEY));
 			    if (naw == NULL)
-				goto errorend;
+				goto endFAIL;
 			    STRCPY(naw->aw_word, NOWC_KEY);
 			    hash_add_item(ht, hi, naw->aw_word, hash);
 			    naw->aw_next = aw;
 			    aw->aw_next = NULL;
 			}
 		    }
 		}
 	    }
 	}
 }
-goto end_OK;
+goto endOK;
-errorend:
+endFAIL:
 lp->sl_error = TRUE;
-end_OK:
+endOK:
 if (fd != NULL)
 	fclose(fd);
 hash_unlock(&lp->sl_words);
 sourcing_name = save_sourcing_name;
 sourcing_lnum = save_sourcing_lnum;
 #ifdef FEAT_MBYTE
 c = mb_ptr2char_adv(&p);
 #else
 c = *p++;
 #endif
-firstcap = allcap = MB_ISUPPER(c);
+firstcap = allcap = spell_isupper(c);
 /*
 * Need to check all letters to find a word with mixed upper/lower.
 * But a word with an upper char only at start is a ONECAP.
 */
 #ifdef FEAT_MBYTE
 	    c = mb_ptr2char(p);
 #else
 	    c = *p;
 #endif
-	    if (!MB_ISUPPER(c))
+	    if (!spell_isupper(c))
 	    {
 		/* UUl -> KEEPCAP */
 		if (past_second && allcap)
 		    return BWF_KEEPCAP;
 		allcap = FALSE;
 basicword_T	*bw_cnext;	/* next word with same caps */
 int		bw_flags;	/* BWF_ flags */
 garray_T	bw_prefix;	/* table with prefix numbers */
 garray_T	bw_suffix;	/* table with suffix numbers */
 int		bw_region;	/* region bits */
-char_u	*bw_caseword;	/* keep-case word */
+char_u	*bw_caseword;	/* keep-case word or NULL */
-char_u	*bw_leadstring;	/* must come before bw_word */
+char_u	*bw_leadstring;	/* must come before bw_word or NULL */
-char_u	*bw_addstring;	/* must come after bw_word */
+char_u	*bw_addstring;	/* must come after bw_word or NULL */
 char_u	bw_word[1];	/* actually longer: word case folded */
 };
 static basicword_T dumbw;
 #define KEY2BW(p)	((basicword_T *)((p) - (dumbw.bw_word - (char_u *)&dumbw)))
 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2));
 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr));
 static void clear_affhash __ARGS((hashtab_T *ht));
 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords));
 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask));
+static basicword_T *get_basicword __ARGS((char_u *word, int asize));
 static void combine_regions __ARGS((hashtab_T *newwords));
 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw));
-static void expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap));
+static int expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap));
-static void expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae));
+static int expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae));
-static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
+static int add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
-static void put_bytes __ARGS((FILE *fd, long_u nr, int len));
 static void write_affix __ARGS((FILE *fd, affheader_T *ah));
 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes));
 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars));
 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap));
 static void free_wordtable __ARGS((hashtab_T *ht));
 char_u	*p;
 int		lnum = 0;
 affheader_T	*cur_aff = NULL;
 int		aff_todo = 0;
 hashtab_T	*tp;
+char_u	*low = NULL;
+char_u	*fol = NULL;
+char_u	*upp = NULL;
 fd = fopen((char *)fname, "r");
 if (fd == NULL)
 {
 	EMSG2(_(e_notopen), fname);
 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20);
 /*
 * Read all the lines in the file one by one.
 */
-while (!vim_fgets(rline, MAXLINELEN, fd))
+while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
 {
+	line_breakcheck();
 	++lnum;
 	/* Skip comment lines. */
 	if (*rline == '#')
 	    continue;
 	/* Convert from "SET" to 'encoding' when needed. */
 	vim_free(pc);
 	if (conv->vc_type != CONV_NONE)
 	{
 	    pc = string_convert(conv, rline, NULL);
+	    if (pc == NULL)
+	    {
+		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
+							   fname, lnum, rline);
+		continue;
+	    }
 	    line = pc;
 	}
 	else
 	{
 	    pc = NULL;
 		{
 		    aff_entry->ae_next = cur_aff->ah_first;
 		    cur_aff->ah_first = aff_entry;
 		}
 	    }
+	    else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
+	    {
+		if (fol != NULL)
+		    smsg((char_u *)_("Duplicate FOL in %s line %d"),
+								 fname, lnum);
+		else
+		    fol = vim_strsave(items[1]);
+	    }
+	    else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
+	    {
+		if (low != NULL)
+		    smsg((char_u *)_("Duplicate LOW in %s line %d"),
+								 fname, lnum);
+		else
+		    low = vim_strsave(items[1]);
+	    }
+	    else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
+	    {
+		if (upp != NULL)
+		    smsg((char_u *)_("Duplicate UPP in %s line %d"),
+								 fname, lnum);
+		else
+		    upp = vim_strsave(items[1]);
+	    }
 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
 		/* Ignore REP count */;
 	    else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
 	    {
 		repentry_T  *rp;
 	    else if (p_verbose > 0)
 		smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
 						       fname, lnum, items[0]);
 	}
+}
+if (fol != NULL || low != NULL || upp != NULL)
+{
+	if (fol == NULL || low == NULL || upp == NULL)
+	    smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
+	else
+	    set_spell_chartab(fol, low, upp);
+	vim_free(fol);
+	vim_free(low);
+	vim_free(upp);
 }
 vim_free(pc);
 fclose(fd);
 return aff;
 /*
 * Read all the lines in the file one by one.
 * The words are converted to 'encoding' here, before being added to
 * the hashtable.
 */
-while (!vim_fgets(line, MAXLINELEN, fd))
+while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
 {
+	line_breakcheck();
 	++lnum;
 	/* Remove CR, LF and white space from end. */
 	l = STRLEN(line);
 	while (l > 0 && line[l - 1] <= ' ')
 	/* Convert from "SET" to 'encoding' when needed. */
 	if (conv->vc_type != CONV_NONE)
 	{
 	    pc = string_convert(conv, line, NULL);
+	    if (pc == NULL)
+	    {
+		smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
+						       fname, lnum, line);
+		continue;
+	    }
 	    w = pc;
 	}
 	else
 	{
 	    pc = NULL;
 	}
 	dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T)
 							     + STRLEN(w));
 	if (dw == NULL)
+	{
+	    vim_free(pc);
 	    break;
+	}
 	STRCPY(dw->dw_word, w);
 	vim_free(pc);
 	hash = hash_hash(dw->dw_word);
 	hi = hash_lookup(ht, dw->dw_word, hash);
 hashtab_T	*newwords;	/* table with words */
 {
 char_u	key[2];
 char_u	*p;
 char_u	*affnm;
-garray_T	*gap;
+garray_T	*gap, *agap;
 hashitem_T	*aff_hi;
 affheader_T	*ah;
 affentry_T	*ae;
 regmatch_T	regmatch;
 int		i;
 basicword_T *nbw;
 int		alen;
-int		wlen;
 garray_T	suffixga;	/* list of words with non-word suffixes */
 garray_T	prefixga;	/* list of words with non-word prefixes */
 char_u	nword[MAXWLEN];
 int		flags;
 int		n;
 	/* Loop over all the affix entries for this affix name. */
 	ah = HI2AH(aff_hi);
 	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
 	{
 	    /* Setup for regexp matching.  Note that we don't ignore case.
-	     * This is weird, because he rules in an .aff file don't care
+	     * This is weird, because the rules in an .aff file don't care
 	     * about case, but it's necessary for compatibility with Myspell.
 	     */
 	    regmatch.regprog = ae->ae_prog;
 	    regmatch.rm_ic = FALSE;
 	    if (ae->ae_prog == NULL
 			   || vim_regexec(&regmatch, dw->dw_word, (colnr_T)0))
 	    {
 		if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL)
 			&& (gap != &bw->bw_suffix || bw->bw_addstring == NULL))
 		{
-		    /* Affix has a non-word character and isn't prepended to
+		    /*
+		     * Affix has a non-word character and isn't prepended to
 		     * leader or appended to addition.  Need to use another
-		     * word with an addition.  It's a copy of the basicword_T
+		     * word with a leadstring and/or addstring.
-		     * "bw". */
+		     */
-		    if (gap == &bw->bw_suffix)
+		    if (gap == &bw->bw_suffix || ae->ae_add_nw == NULL)
 		    {
-			alen = ae->ae_add_nw - ae->ae_add;
+			/* Suffix or prefix with only non-word chars.
-			nbw = (basicword_T *)alloc((unsigned)(
+			 * Build the new basic word in "nword": Remove chop
-				    sizeof(basicword_T) + STRLEN(bw->bw_word)
+			 * string and append/prepend addition. */
-								 + alen + 1));
+			if (gap == &bw->bw_suffix)
-			if (nbw != NULL)
 			{
-			    *nbw = *bw;
+			    /* suffix goes at the end of the word */
-			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
-			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
-			    /* Adding the suffix may change the caps. */
 			    STRCPY(nword, dw->dw_word);
 			    if (ae->ae_chop != NULL)
 			    {
 				/* Remove chop string. */
 				p = nword + STRLEN(nword);
 				for (i = mb_charlen(ae->ae_chop); i > 0; --i)
 				    mb_ptr_back(nword, p);
 				*p = NUL;
 			    }
 			    STRCAT(nword, ae->ae_add);
-			    flags = captype(nword, nword + STRLEN(nword));
+			    agap = &suffixga;
-			    if (flags & BWF_KEEPCAP)
-			    {
-				/* "caseword" excludes the addition */
-				nword[STRLEN(dw->dw_word) + alen] = NUL;
-				nbw->bw_caseword = vim_strsave(nword);
-			    }
-			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
-							       | BWF_KEEPCAP);
-			    nbw->bw_flags |= flags;
-			    if (bw->bw_leadstring != NULL)
-				nbw->bw_leadstring =
-					       vim_strsave(bw->bw_leadstring);
-			    nbw->bw_addstring = vim_strsave(ae->ae_add_nw);
-			    STRCPY(nbw->bw_word, bw->bw_word);
-			    if (alen > 0 || ae->ae_chop != NULL)
-			    {
-				/* Suffix starts with word character and/or
-				 * chop off something.  Append it to the word.
-				 * Add new word entry. */
-				wlen = STRLEN(nbw->bw_word);
-				if (ae->ae_chop != NULL)
-				    wlen -= STRLEN(ae->ae_chop);
-				mch_memmove(nbw->bw_word + wlen, ae->ae_add,
-									alen);
-				nbw->bw_word[wlen + alen] = NUL;
-				add_to_wordlist(newwords, nbw);
-			    }
-			    else
-				/* Basic word is the same, link "nbw" after
-				 * "bw". */
-				bw->bw_next = nbw;
-			    /* Remember this word, we need to set bw_prefix
-			     * and bw_prefix later. */
-			    if (ga_grow(&suffixga, 1) == OK)
-				((basicword_T **)suffixga.ga_data)
-						    [suffixga.ga_len++] = nbw;
 			}
-		    }
+			else
-		    else if (ae->ae_add_nw == NULL)
-		    {
-			/* Prefix that starts with non-word char(s) and may be
-			 * followed by word chars: Make a leadstring and
-			 * prepend word chars before the word. */
-			alen = STRLEN(ae->ae_add_pw);
-			nbw = (basicword_T *)alloc((unsigned)(
-				    sizeof(basicword_T) + STRLEN(bw->bw_word)
-								 + alen + 1));
-			if (nbw != NULL)
 			{
-			    *nbw = *bw;
+			    /* prefix goes before the word */
-			    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
-			    ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
-			    /* Adding the prefix may change the caps. */
 			    STRCPY(nword, ae->ae_add);
 			    p = dw->dw_word;
 			    if (ae->ae_chop != NULL)
 				/* Skip chop string. */
 				for (i = mb_charlen(ae->ae_chop); i > 0; --i)
 				    mb_ptr_adv( p);
 			    STRCAT(nword, p);
+			    agap = &prefixga;
-			    flags = captype(nword, nword + STRLEN(nword));
+			}
-			    if (flags & BWF_KEEPCAP)
-				/* "caseword" excludes the addition */
+			/* Create a basicword_T from the word. */
-				nbw->bw_caseword = vim_strsave(nword
+			nbw = get_basicword(nword, 1);
-					      + (ae->ae_add_pw - ae->ae_add));
+			if (nbw != NULL)
-			    else
+			{
-				nbw->bw_caseword = NULL;
+			    nbw->bw_region = bw->bw_region;
-			    nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
+			    nbw->bw_flags |= bw->bw_flags
-							       | BWF_KEEPCAP);
+				   & ~(BWF_ONECAP | BWF_ALLCAP | BWF_KEEPCAP);
-			    nbw->bw_flags |= flags;
+			    if (STRCMP(bw->bw_word, nbw->bw_word) != 0)
-			    if (bw->bw_addstring != NULL)
+				/* Basic word differs, add new word entry. */
-				nbw->bw_addstring =
+				(void)add_to_wordlist(newwords, nbw);
-					       vim_strsave(bw->bw_addstring);
-			    else
-				nbw->bw_addstring = NULL;
-			    nbw->bw_leadstring = vim_strnsave(ae->ae_add,
-						  ae->ae_add_pw - ae->ae_add);
-			    if (alen > 0 || ae->ae_chop != NULL)
-			    {
-				/* Prefix ends in word character and/or chop
-				 * off something.  Prepend it to the word.
-				 * Add new word entry. */
-				STRCPY(nbw->bw_word, ae->ae_add_pw);
-				p = bw->bw_word;
-				if (ae->ae_chop != NULL)
-				    p += STRLEN(ae->ae_chop);
-				STRCAT(nbw->bw_word, p);
-				add_to_wordlist(newwords, nbw);
-			    }
 			    else
 			    {
 				/* Basic word is the same, link "nbw" after
 				 * "bw". */
-				STRCPY(nbw->bw_word, bw->bw_word);
+				nbw->bw_next = bw->bw_next;
 				bw->bw_next = nbw;
 			    }
-			    /* Remember this word, we need to set bw_suffix
+			    /* Remember this word, we need to set bw_prefix
-			     * and bw_suffix later. */
+			     * or bw_suffix later. */
-			    if (ga_grow(&prefixga, 1) == OK)
+			    if (ga_grow(agap, 1) == OK)
-				((basicword_T **)prefixga.ga_data)
+				((basicword_T **)agap->ga_data)
-						    [prefixga.ga_len++] = nbw;
+						       [agap->ga_len++] = nbw;
 			}
 		    }
 		    else
 		    {
 			/* Prefix with both non-word and word characters: Turn
 #ifdef FEAT_MBYTE
 			    n = (*mb_ptr2len_check)(p);
 #else
 			    n = 1;
 #endif
-			    (void)str_foldcase(p, n, nword + alen,
+			    (void)spell_casefold(p, n, nword + alen,
 							      MAXWLEN - alen);
 			    alen += STRLEN(nword + alen);
 			}
 			/* Allocate a new word entry. */
 				nbw->bw_leadstring = vim_strnsave(ae->ae_add,
 						  ae->ae_add_pw - ae->ae_add);
 			    else
 				nbw->bw_leadstring = NULL;
-			    add_to_wordlist(newwords, nbw);
+			    (void)add_to_wordlist(newwords, nbw);
 			    /* Remember this word, we need to set bw_suffix
 			     * and bw_suffix later. */
 			    if (ga_grow(&prefixga, 1) == OK)
 				((basicword_T **)prefixga.ga_data)
 {
 int		todo;
 hashitem_T	*old_hi;
 dicword_T	*dw;
 basicword_T *bw;
-char_u	foldword[MAXLINELEN];
-int		leadlen;
-char_u	leadstring[MAXLINELEN];
-int		addlen;
-char_u	addstring[MAXLINELEN];
-int		dwlen;
-char_u	*p;
-int		clen;
-int		flags;
-char_u	*cp = NULL;
-int		l;
 char_u	message[MAXLINELEN + MAXWLEN];
 todo = oldwords->ht_used;
 for (old_hi = oldwords->ht_array; todo > 0; ++old_hi)
 {
 		ui_breakcheck();
 		if (got_int)
 		    break;
 	    }
-	    /* The basic words are always stored with folded case. */
+	    bw = get_basicword(dw->dw_word, 10);
-	    dwlen = STRLEN(dw->dw_word);
-	    (void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN);
-	    flags = captype(dw->dw_word, dw->dw_word + dwlen);
-	    /* Check for non-word characters before the word. */
-	    clen = 0;
-	    leadlen = 0;
-	    if (!spell_iswordc(foldword))
-	    {
-		p = foldword;
-		for (;;)
-		{
-		    mb_ptr_adv(p);
-		    ++clen;
-		    if (*p == NUL)	/* Only non-word chars (bad word!) */
-		    {
-			if (p_verbose > 0)
-			    smsg((char_u *)_("Warning: word without word characters: \"%s\""),
-								    foldword);
-			break;
-		    }
-		    if (spell_iswordc(p))
-		    {
-			/* Move the leader to "leadstring" and remove it from
-			 * "foldword". */
-			leadlen = p - foldword;
-			mch_memmove(leadstring, foldword, leadlen);
-			leadstring[leadlen] = NUL;
-			mch_memmove(foldword, p, STRLEN(p) + 1);
-			break;
-		    }
-		}
-	    }
-	    /* Check for non-word characters after word characters. */
-	    addlen = 0;
-	    for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
-	    {
-		if (*p == NUL)
-		    break;
-		++clen;
-	    }
-	    if (*p != NUL)
-	    {
-		/* Move the addition to "addstring" and truncate "foldword". */
-		if (flags & BWF_KEEPCAP)
-		{
-		    /* Preserve caps, need to skip the right number of
-		     * characters in the original word (case folding may
-		     * change the byte count). */
-		    l = 0;
-		    for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp))
-			++l;
-		    addlen = STRLEN(cp);
-		    mch_memmove(addstring, cp, addlen + 1);
-		}
-		else
-		{
-		    addlen = STRLEN(p);
-		    mch_memmove(addstring, p, addlen + 1);
-		}
-		*p = NUL;
-	    }
-	    bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
-							  + STRLEN(foldword));
 	    if (bw == NULL)
 		break;
-	    STRCPY(bw->bw_word, foldword);
 	    bw->bw_region = regionmask;
-	    if (leadlen > 0)
+	    (void)add_to_wordlist(newwords, bw);
-		bw->bw_leadstring = vim_strsave(leadstring);
-	    else
-		bw->bw_leadstring = NULL;
-	    if (addlen > 0)
-		bw->bw_addstring = vim_strsave(addstring);
-	    else
-		bw->bw_addstring = NULL;
-	    add_to_wordlist(newwords, bw);
-	    if (flags & BWF_KEEPCAP)
-	    {
-		if (addlen == 0)
-		    /* use the whole word */
-		    bw->bw_caseword = vim_strsave(dw->dw_word + leadlen);
-		else
-		    /* use only up to the addition */
-		    bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen,
-						  cp - dw->dw_word - leadlen);
-		if (bw->bw_caseword == NULL)	/* out of memory */
-		    flags &= ~BWF_KEEPCAP;
-	    }
-	    bw->bw_flags = flags;
 	    /* Deal with any affix names on the old word, translate them
 	     * into affix numbers. */
-	    ga_init2(&bw->bw_prefix, sizeof(short_u), 10);
-	    ga_init2(&bw->bw_suffix, sizeof(short_u), 10);
 	    if (dw->dw_affnm != NULL)
 		trans_affixes(dw, bw, oldaff, newwords);
 	}
 }
 if (todo > 0)
 	return FAIL;
 return OK;
+}
+/*
+* Get a basicword_T from a word in original case.
+* Caller must set bw_region.
+* Returns NULL when something fails.
+*/
+static basicword_T *
+get_basicword(word, asize)
+char_u	*word;
+int		asize;	    /* growsize for affix garray */
+{
+int		dwlen;
+char_u	foldword[MAXLINELEN];
+int		flags;
+int		clen;
+int		leadlen;
+char_u	*p;
+char_u	leadstring[MAXLINELEN];
+int		addlen;
+char_u	addstring[MAXLINELEN];
+char_u	*cp = NULL;
+int		l;
+basicword_T *bw;
+/* The basic words are always stored with folded case. */
+dwlen = STRLEN(word);
+(void)spell_casefold(word, dwlen, foldword, MAXLINELEN);
+flags = captype(word, word + dwlen);
+/* Check for non-word characters before the word. */
+clen = 0;
+leadlen = 0;
+if (!spell_iswordc(foldword))
+{
+	p = foldword;
+	for (;;)
+	{
+	    mb_ptr_adv(p);
+	    ++clen;
+	    if (*p == NUL)	/* Only non-word chars (bad word!) */
+	    {
+		if (p_verbose > 0)
+		    smsg((char_u *)_("Warning: word without word characters: \"%s\""),
+							    foldword);
+		break;
+	    }
+	    if (spell_iswordc(p))
+	    {
+		/* Move the leader to "leadstring" and remove it from
+		 * "foldword". */
+		leadlen = p - foldword;
+		mch_memmove(leadstring, foldword, leadlen);
+		leadstring[leadlen] = NUL;
+		mch_memmove(foldword, p, STRLEN(p) + 1);
+		break;
+	    }
+	}
+}
+/* Check for non-word characters after word characters. */
+addlen = 0;
+for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
+{
+	if (*p == NUL)
+	    break;
+	++clen;
+}
+if (*p != NUL)
+{
+	/* Move the addition to "addstring" and truncate "foldword". */
+	if (flags & BWF_KEEPCAP)
+	{
+	    /* Preserve caps, need to skip the right number of
+	     * characters in the original word (case folding may
+	     * change the byte count). */
+	    l = 0;
+	    for (cp = word; l < clen; mb_ptr_adv(cp))
+		++l;
+	    addlen = STRLEN(cp);
+	    mch_memmove(addstring, cp, addlen + 1);
+	}
+	else
+	{
+	    addlen = STRLEN(p);
+	    mch_memmove(addstring, p, addlen + 1);
+	}
+	*p = NUL;
+}
+bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
+							  + STRLEN(foldword));
+if (bw == NULL)
+	return NULL;
+STRCPY(bw->bw_word, foldword);
+if (leadlen > 0)
+	bw->bw_leadstring = vim_strsave(leadstring);
+else
+	bw->bw_leadstring = NULL;
+if (addlen > 0)
+	bw->bw_addstring = vim_strsave(addstring);
+else
+	bw->bw_addstring = NULL;
+if (flags & BWF_KEEPCAP)
+{
+	if (addlen == 0)
+	    /* use the whole word */
+	    bw->bw_caseword = vim_strsave(word + leadlen);
+	else
+	    /* use only up to the addition */
+	    bw->bw_caseword = vim_strnsave(word + leadlen,
+							 cp - word - leadlen);
+}
+bw->bw_flags = flags;
+ga_init2(&bw->bw_prefix, sizeof(short_u), asize);
+ga_init2(&bw->bw_suffix, sizeof(short_u), asize);
+return bw;
 }
 /*
 * Go through the list of words and combine the ones that are identical except
 * for the region.
 			    && (bw->bw_leadstring == NULL)
 					       == (nbw->bw_leadstring == NULL)
 			    && (bw->bw_addstring == NULL)
 						== (nbw->bw_addstring == NULL)
 			    && ((bw->bw_flags & BWF_KEEPCAP) == 0
-				|| (STRCMP(bw->bw_caseword,
+				|| bw->bw_caseword == NULL
-						      nbw->bw_caseword) == 0))
+				|| nbw->bw_caseword == NULL
+				|| STRCMP(bw->bw_caseword,
+						      nbw->bw_caseword) == 0)
 			    && (bw->bw_leadstring == NULL
-				|| (STRCMP(bw->bw_leadstring,
+				|| STRCMP(bw->bw_leadstring,
-						    nbw->bw_leadstring) == 0))
+						    nbw->bw_leadstring) == 0)
 			    && (bw->bw_addstring == NULL
-				|| (STRCMP(bw->bw_addstring,
+				|| STRCMP(bw->bw_addstring,
-						     nbw->bw_addstring) == 0))
+						     nbw->bw_addstring) == 0)
 			    && same_affixes(bw, nbw)
 			    )
 		    {
 			/* Match, combine regions and delete "nbw". */
 			pbw->bw_next = nbw->bw_next;
 * The result is that no affixes apply to the additions or leadstring of a
 * word.
 * This is also needed when a word with an addition has a prefix and the word
 * with prefix also exists.  E.g., "blurp's/D" (D is prefix "de") and
 * "deblurp".  "deblurp" would match and no prefix would be tried.
-*/
+*
-static void
+* Returns FAIL when out of memory.
+*/
+static int
 expand_affixes(newwords, prefgap, suffgap)
 hashtab_T	*newwords;
 garray_T	*prefgap;
 garray_T	*suffgap;
 {
 int		pi, si;
 affentry_T	*pae, *sae;
 garray_T	add_words;
 int		n;
 char_u	message[MAXLINELEN + MAXWLEN];
+int		retval = OK;
 ga_init2(&add_words, sizeof(basicword_T *), 10);
 todo = newwords->ht_used;
 for (hi = newwords->ht_array; todo > 0; ++hi)
 				 */
 				do
 				{
 				    /* Expand the word for this combination of
 				     * prefixes and affixes. */
-				    expand_one_aff(bw, &add_words, pae, sae);
+				    if (expand_one_aff(bw, &add_words,
+							    pae, sae) == FAIL)
+				    {
+					retval = FAIL;
+					goto theend;
+				    }
 				    /* Advance to next suffix entry, if there
 				     * is one. */
 				    if (sae != NULL)
 					sae = sae->ae_next;
 /*
 * Add the new words afterwards, can't change "newwords" while going over
 * all its items.
 */
 for (pi = 0; pi < add_words.ga_len; ++pi)
-	add_to_wordlist(newwords, ((basicword_T **)add_words.ga_data)[pi]);
+{
+	retval = add_to_wordlist(newwords,
+				     ((basicword_T **)add_words.ga_data)[pi]);
+	if (retval == FAIL)
+	    break;
+}
+theend:
 ga_clear(&add_words);
+return retval;
 }
 /*
 * Add one word to "add_words" for basic word "bw" with additions, adding
 * prefix "pae" and suffix "sae".  Either "pae" or "sae" can be NULL.
 * Don't do this when not necessary:
 * - no leadstring and adding prefix doesn't result in existing word.
-*/
+* Returns FAIL when out of memory.
-static void
+*/
+static int
 expand_one_aff(bw, add_words, pae, sae)
 basicword_T	    *bw;
 garray_T	    *add_words;
 affentry_T	    *pae;
 affentry_T	    *sae;
 /* Copy the body of the word. */
 STRCPY(word + l, bw->bw_word + choplen);
 /* Do the same for bw_caseword, if it's there. */
-if (bw->bw_flags & BWF_KEEPCAP)
+if ((bw->bw_flags & BWF_KEEPCAP) && bw->bw_caseword != NULL)
 {
 	if (l > 0)
 	    mch_memmove(caseword, pae->ae_add, l);
 	STRCPY(caseword + l, bw->bw_caseword + choplen);
 }
 	}
 }
 nbw = (basicword_T *)alloc_clear((unsigned)
 					  sizeof(basicword_T) + STRLEN(word));
-if (nbw != NULL)
+if (nbw == NULL)
-{
+	return FAIL;
-	/* Add the new word to the list of words to be added later. */
-	if (ga_grow(add_words, 1) == FAIL)
+/* Add the new word to the list of words to be added later. */
-	{
+if (ga_grow(add_words, 1) == FAIL)
-	    vim_free(nbw);
+{
-	    return;
+	vim_free(nbw);
-	}
+	return FAIL;
-	((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw;
+}
+((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw;
-	/* Copy the (modified) basic word, flags and region. */
-	STRCPY(nbw->bw_word, word);
+/* Copy the (modified) basic word, flags and region. */
-	nbw->bw_flags = bw->bw_flags;
+STRCPY(nbw->bw_word, word);
-	nbw->bw_region = bw->bw_region;
+nbw->bw_flags = bw->bw_flags;
+nbw->bw_region = bw->bw_region;
-	/* Set the (modified) caseword. */
-	if (bw->bw_flags & BWF_KEEPCAP)
+/* Set the (modified) caseword. */
-	    if ((nbw->bw_caseword = vim_strsave(caseword)) == NULL)
+if (bw->bw_flags & BWF_KEEPCAP)
-		nbw->bw_flags &= ~BWF_KEEPCAP;
+	nbw->bw_caseword = vim_strsave(caseword);
+else
-	if (bw->bw_leadstring != NULL)
+	nbw->bw_caseword = NULL;
-	{
-	    if (pae != NULL)
+if (bw->bw_leadstring != NULL)
-	    {
+{
-		/* Prepend prefix to leadstring. */
+	if (pae != NULL)
-		ll = STRLEN(bw->bw_leadstring);
+	{
-		l = choplen = 0;
+	    /* Prepend prefix to leadstring. */
-		if (pae->ae_add != NULL)
+	    ll = STRLEN(bw->bw_leadstring);
-		    l = STRLEN(pae->ae_add);
+	    l = choplen = 0;
-		if (pae->ae_chop != NULL)
+	    if (pae->ae_add != NULL)
-		{
+		l = STRLEN(pae->ae_add);
-		    choplen = STRLEN(pae->ae_chop);
+	    if (pae->ae_chop != NULL)
-		    if (choplen > ll)	    /* TODO: error? */
+	    {
-			choplen = ll;
+		choplen = STRLEN(pae->ae_chop);
-		}
+		if (choplen > ll)	    /* TODO: error? */
-		nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1));
+		    choplen = ll;
-		if (nbw->bw_leadstring != NULL)
+	    }
-		{
+	    nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1));
-		    if (l > 0)
+	    if (nbw->bw_leadstring != NULL)
-			mch_memmove(nbw->bw_leadstring, pae->ae_add, l);
+	    {
-		    STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen);
+		if (l > 0)
-		}
+		    mch_memmove(nbw->bw_leadstring, pae->ae_add, l);
-	    }
+		STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen);
+	    }
+	}
+	else
+	    nbw->bw_leadstring = vim_strsave(bw->bw_leadstring);
+}
+else if (bw->bw_prefix.ga_len > 0)
+{
+	/* There is no leadstring, copy the list of possible prefixes. */
+	ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
+	if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
+	{
+	    mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
+				  bw->bw_prefix.ga_len * sizeof(short_u));
+	    nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len;
+	}
+}
+if (bw->bw_addstring != NULL)
+{
+	if (sae != NULL)
+	{
+	    /* Append suffix to addstring. */
+	    l = STRLEN(bw->bw_addstring);
+	    if (sae->ae_chop != NULL)
+	    {
+		l -= STRLEN(sae->ae_chop);
+		if (l < 0)	    /* TODO: error? */
+		    l = 0;
+	    }
+	    if (sae->ae_add == NULL)
+		ll = 0;
 	    else
-		nbw->bw_leadstring = vim_strsave(bw->bw_leadstring);
+		ll = STRLEN(sae->ae_add);
-	}
+	    nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1));
-	else if (bw->bw_prefix.ga_len > 0)
+	    if (nbw->bw_addstring != NULL)
-	{
+	    {
-	    /* There is no leadstring, copy the list of possible prefixes. */
+		STRCPY(nbw->bw_addstring, bw->bw_addstring);
-	    ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
-	    if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
-	    {
-		mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
-				      bw->bw_prefix.ga_len * sizeof(short_u));
-		nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len;
-	    }
-	}
-	if (bw->bw_addstring != NULL)
-	{
-	    if (sae != NULL)
-	    {
-		/* Append suffix to addstring. */
-		l = STRLEN(bw->bw_addstring);
-		if (sae->ae_chop != NULL)
-		{
-		    l -= STRLEN(sae->ae_chop);
-		    if (l < 0)	    /* TODO: error? */
-			l = 0;
-		}
 		if (sae->ae_add == NULL)
-		    ll = 0;
+		    nbw->bw_addstring[l] = NUL;
 		else
-		    ll = STRLEN(sae->ae_add);
+		    STRCPY(nbw->bw_addstring + l, sae->ae_add);
-		nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1));
+	    }
-		if (nbw->bw_addstring != NULL)
+	}
-		{
+	else
-		    STRCPY(nbw->bw_addstring, bw->bw_addstring);
+	    nbw->bw_addstring = vim_strsave(bw->bw_addstring);
-		    if (sae->ae_add == NULL)
+}
-			nbw->bw_addstring[l] = NUL;
-		    else
+return OK;
-			STRCPY(nbw->bw_addstring + l, sae->ae_add);
-		}
-	    }
-	    else
-		nbw->bw_addstring = vim_strsave(bw->bw_addstring);
-	}
-}
 }
 /*
 * Add basicword_T "*bw" to wordlist "newwords".
 */
-static void
+static int
 add_to_wordlist(newwords, bw)
 hashtab_T	*newwords;
 basicword_T	*bw;
 {
 hashitem_T	*hi;
 basicword_T *bw2;
+int		retval = OK;
 hi = hash_find(newwords, bw->bw_word);
 if (HASHITEM_EMPTY(hi))
 {
 	/* New entry, add to hashlist. */
-	hash_add(newwords, bw->bw_word);
+	retval = hash_add(newwords, bw->bw_word);
 	bw->bw_next = NULL;
 }
 else
 {
 	/* Existing entry, append to list of basic words. */
 	bw2 = HI2BW(hi);
 	bw->bw_next = bw2->bw_next;
 	bw2->bw_next = bw;
 }
+return retval;
 }
 /*
 * Write a number to file "fd", MSB first, in "len" bytes.
 */
-static void
+void
 put_bytes(fd, nr, len)
 FILE    *fd;
 long_u  nr;
 int	    len;
 {
 /*
 * Vim spell file format:  <HEADER> <PREFIXLIST> <SUFFIXLIST>
 *						    <SUGGEST> <WORDLIST>
 *
 * <HEADER>: <fileID> <regioncnt> <regionname> ...
+*		 <charflagslen> <charflags> <fcharslen> <fchars>
 *
-* <fileID>     10 bytes    "VIMspell03"
+* <fileID>     10 bytes    "VIMspell04"
 * <regioncnt>  1 byte	    number of regions following (8 supported)
 * <regionname>	2 bytes     Region name: ca, au, etc.
 *			    First <regionname> is region 1.
 *
+* <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
+* <charflags>  N bytes     List of flags (first one is for character 128):
+*			    0x01  word character
+*			    0x01  upper-case character
+* <fcharslen>  2 bytes     Number of bytes in <fchars>.
+* <fchars>     N bytes	    Folded characters, first one is for character 128.
 *
-* <PREFIXLIST>: <affcount> <afftotcnt> <affix> ...
+*
-* <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ...
+* <PREFIXLIST>: <affcount> <affix> ...
+* <SUFFIXLIST>: <affcount> <affix> ...
 *		list of possible affixes: prefixes and suffixes.
 *
 * <affcount>	2 bytes	    Number of affixes (MSB comes first).
 *                          When more than 256 an affixNR is 2 bytes.
 *                          This is separate for prefixes and suffixes!
 *                          First affixNR is 0.
-* <afftotcnt>	2 bytes	    Total number of affix items (MSB comes first).
 *
 * <affix>: <affitemcnt> <affitem> ...
 *
 * <affitemcnt> 2 bytes	    Number of affixes with this affixNR (MSB first).
 *
 char_u	**wtab;
 int		todo;
 int		flags, aflags;
 basicword_T	*bw, *bwf, *bw2 = NULL;
 int		i;
-int		cnt;
-affentry_T	*ae;
 int		round;
 garray_T	bwga;
 vim_memset(&wif, 0, sizeof(winfo_T));
 {
 	EMSG2(_(e_notopen), fname);
 	return;
 }
-fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd);
+/* <HEADER>: <fileID> <regioncnt> <regionname> ...
+*		 <charflagslen> <charflags> <fcharslen> <fchars> */
+fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); /* <fileID> */
 /* write the region names if there is more than one */
 if (regcount > 1)
 {
-	putc(regcount, wif.wif_fd);
+	putc(regcount, wif.wif_fd);	    /* <regioncnt> <regionname> ... */
 	fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd);
 	wif.wif_regionmask = (1 << regcount) - 1;
 }
 else
 {
 	putc(0, wif.wif_fd);
 	wif.wif_regionmask = 0;
 }
-/* Write the prefix and suffix lists. */
+/* Write the table with character flags and table for case folding.
+* <charflagslen> <charflags>  <fcharlen> <fchars> */
+write_spell_chartab(wif.wif_fd);
+/* <PREFIXLIST>: <affcount> <affix> ...
+* <SUFFIXLIST>: <affcount> <affix> ... */
 for (round = 1; round <= 2; ++round)
 {
 	gap = round == 1 ? prefga : suffga;
 	put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2);	    /* <affcount> */
-	/* Count the total number of affix items. */
-	cnt = 0;
-	for (i = 0; i < gap->ga_len; ++i)
-	    for (ae = ((affheader_T *)gap->ga_data + i)->ah_first;
-						 ae != NULL; ae = ae->ae_next)
-		++cnt;
-	put_bytes(wif.wif_fd, (long_u)cnt, 2);		    /* <afftotcnt> */
 	for (i = 0; i < gap->ga_len; ++i)
 	    write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i);
 }
 /* Number of bytes used for affix NR depends on affix count. */
 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1;
 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1;
-/* Write the suggest info. TODO */
+/* <SUGGEST> : <suggestlen> <more> ...
-put_bytes(wif.wif_fd, 0L, 4);
+*  TODO.  Only write a zero length for now. */
+put_bytes(wif.wif_fd, 0L, 4);			    /* <suggestlen> */
 /*
-* Write the word list.  <wordcount> <worditem> ...
+* <WORDLIST>: <wordcount> <worditem> ...
 */
 /* number of basic words in 4 bytes */
 put_bytes(wif.wif_fd, newwords->ht_used, 4);	    /* <wordcount> */
 /*
 * Sort the word list, so that we can copy as many bytes as possible from
 		    bw2 = ((basicword_T **)bwga.ga_data)[i];
 		    aflags = bw2->bw_flags & (BWF_ONECAP | BWF_KEEPCAP
 								| BWF_ALLCAP);
 		    if (flags == aflags
 			    && ((flags & BWF_KEEPCAP) == 0
-				|| (STRCMP(bw->bw_caseword,
+				|| bw->bw_caseword == NULL
-						     bw2->bw_caseword) == 0))
+				|| bw2->bw_caseword == NULL
+				|| STRCMP(bw->bw_caseword,
+						       bw2->bw_caseword) == 0)
 			    && same_affixes(bw, bw2))
 			break;
 		}
 		if (i == bwga.ga_len)
 		{
 		    write_bword(&wif, bw2, FALSE);
 	    }
 	}
 	ga_clear(&bwga);
+	vim_free(wtab);
 }
 fclose(wif.wif_fd);
 /* Print a few statistics. */
 /* First dummy word doesn't need anything but flags. */
 if (lowcap)
 	return;
-if (flags & BWF_KEEPCAP)
+if ((flags & BWF_KEEPCAP) && bw->bw_caseword != NULL)
 {
 	len = STRLEN(bw->bw_caseword);
 	putc(len, fd);			/* <caselen> */
 	for (i = 0; i < len; ++i)
 	    putc(bw->bw_caseword[i], fd);	/* <caseword> */
 	    if (aflags & ADD_REGION)
 		putc(bw->bw_region, fd);		/* <region> */
 	    bw2 = bw;
 	}
 	vim_free(wtab);
 }
 }
 struct stat	st;
 int		round;
 vimconv_T	conv;
 int		ascii = FALSE;
 char_u	*arg = eap->arg;
+int		error = FALSE;
 if (STRNCMP(arg, "-ascii", 6) == 0)
 {
 	ascii = TRUE;
 	arg = skipwhite(arg + 6);
 					      TOLOWER_ASC(fnames[i][len - 1]);
 		}
 	    }
 	}
+	/* Clear the char type tables, don't want to use any of the currently
+	 * used spell properties. */
+	init_spell_chartab();
 	/*
 	 * Read all the .aff and .dic files.
 	 * Text is converted to 'encoding'.
 	 */
 	for (i = 1; i < fcount; ++i)
 	     * inefficient searching.  Turn the affixes into additions and/or
 	     * the expanded word.
 	     */
 	    MSG(_("Processing words..."));
 	    out_flush();
-	    expand_affixes(&newwords, &prefga, &suffga);
+	    error = expand_affixes(&newwords, &prefga, &suffga) == FAIL;
-	    /* Write the info in the spell file. */
+	    if (!error)
-	    smsg((char_u *)_("Writing spell file %s..."), wfname);
+	    {
-	    out_flush();
+		/* Write the info in the spell file. */
-	    write_vim_spell(wfname, &prefga, &suffga, &newwords,
+		smsg((char_u *)_("Writing spell file %s..."), wfname);
+		out_flush();
+		write_vim_spell(wfname, &prefga, &suffga, &newwords,
 						     fcount - 1, region_name);
-	    MSG(_("Done!"));
+		MSG(_("Done!"));
-	    out_flush();
+		out_flush();
+	    }
 	    /* Free the allocated stuff. */
 	    free_wordtable(&newwords);
 	    for (round = 1; round <= 2; ++round)
 	    {

Mercurial > vim

comparison src/spell.c @ 255:c8742c8da9ab