diff src/spell.c @ 534:c6296b0ad9ea v7.0151

updated for version 7.0151
author vimboss
date Thu, 29 Sep 2005 18:26:07 +0000
parents 7052f11a3dc9
children 792d1b60e546
line wrap: on
line diff
--- a/src/spell.c
+++ b/src/spell.c
@@ -477,6 +477,8 @@ typedef struct suginfo_S
     int		su_badflags;	    /* caps flags for bad word */
     char_u	su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
     char_u	su_fbadword[MAXWLEN]; /* su_badword case-folded */
+    char_u	su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
+    slang_T	*su_slang_first;    /* slang_T used for su_sal_badword */
     hashtab_T	su_banned;	    /* table with banned words */
     slang_T	*su_sallang;	    /* default language for sound folding */
 } suginfo_T;
@@ -749,6 +751,7 @@ static void add_banned __ARGS((suginfo_T
 static int was_banned __ARGS((suginfo_T *su, char_u *word));
 static void free_banned __ARGS((suginfo_T *su));
 static void rescore_suggestions __ARGS((suginfo_T *su));
+static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
@@ -815,8 +818,8 @@ static char *msg_compressing = N_("Compr
 /*
  * Main spell-checking function.
  * "ptr" points to a character that could be the start of a word.
- * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
- * or when it's OK it remains unchanged.
+ * "*attrp" is set to the highlight index for a badly spelled word.  For a
+ * non-word or when it's OK it remains unchanged.
  * This must only be called when 'spelllang' is not empty.
  *
  * "capcol" is used to check for a Capitalised word after the end of a
@@ -831,7 +834,7 @@ static char *msg_compressing = N_("Compr
 spell_check(wp, ptr, attrp, capcol)
     win_T	*wp;		/* current window */
     char_u	*ptr;
-    int		*attrp;
+    hlf_T	*attrp;
     int		*capcol;	/* column to check for Capital */
 {
     matchinf_T	mi;		/* Most things are put in "mi" so that it can
@@ -1008,17 +1011,17 @@ spell_check(wp, ptr, attrp, capcol)
 	}
 
 	if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
-	    *attrp = highlight_attr[HLF_SPB];
+	    *attrp = HLF_SPB;
 	else if (mi.mi_result == SP_RARE)
-	    *attrp = highlight_attr[HLF_SPR];
+	    *attrp = HLF_SPR;
 	else
-	    *attrp = highlight_attr[HLF_SPL];
+	    *attrp = HLF_SPL;
     }
 
     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
     {
 	/* Report SpellCap only when the word isn't badly spelled. */
-	*attrp = highlight_attr[HLF_SPC];
+	*attrp = HLF_SPC;
 	return wrongcaplen;
     }
 
@@ -1822,7 +1825,8 @@ spell_move_to(wp, dir, allwords, curline
     int		dir;		/* FORWARD or BACKWARD */
     int		allwords;	/* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
     int		curline;
-    int		*attrp;		/* return: attributes of bad word or NULL */
+    hlf_T	*attrp;		/* return: attributes of bad word or NULL
+				   (only when "dir" is FORWARD) */
 {
     linenr_T	lnum;
     pos_T	found_pos;
@@ -1830,7 +1834,7 @@ spell_move_to(wp, dir, allwords, curline
     char_u	*line;
     char_u	*p;
     char_u	*endp;
-    int		attr;
+    hlf_T	attr;
     int		len;
     int		has_syntax = syntax_present(wp->w_buffer);
     int		col;
@@ -1900,13 +1904,13 @@ spell_move_to(wp, dir, allwords, curline
 		break;
 
 	    /* start of word */
-	    attr = 0;
+	    attr = HLF_COUNT;
 	    len = spell_check(wp, p, &attr, &capcol);
 
-	    if (attr != 0)
+	    if (attr != HLF_COUNT)
 	    {
 		/* We found a bad word.  Check the attribute. */
-		if (allwords || attr == highlight_attr[HLF_SPB])
+		if (allwords || attr == HLF_SPB)
 		{
 		    found_one = TRUE;
 
@@ -2017,7 +2021,7 @@ spell_move_to(wp, dir, allwords, curline
 
 	    /* Skip the characters at the start of the next line that were
 	     * included in a match crossing line boundaries. */
-	    if (attr == 0)
+	    if (attr == HLF_COUNT)
 		skip = p - endp;
 	    else
 		skip = 0;
@@ -5098,7 +5102,9 @@ spell_read_aff(spin, fname)
 		    ga_append(&spin->si_map, '/');
 		}
 	    }
-	    else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
+	    /* Accept "SAL from to" and "SAL from to # comment". */
+	    else if (STRCMP(items[0], "SAL") == 0
+		    && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
 	    {
 		if (do_sal)
 		{
@@ -8769,7 +8775,7 @@ spell_find_suggest(badptr, su, maxcount,
     int		banbadword;	/* don't include badword in suggestions */
     int		need_cap;	/* word should start with capital */
 {
-    int		attr = 0;
+    hlf_T	attr = HLF_COUNT;
     char_u	buf[MAXPATHL];
     char_u	*p;
     int		do_combine = FALSE;
@@ -8821,11 +8827,17 @@ spell_find_suggest(badptr, su, maxcount,
 	}
     }
 
+    /* Soundfold the bad word with the default sound folding, so that we don't
+     * have to do this many times. */
+    if (su->su_sallang != NULL)
+	spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
+							  su->su_sal_badword);
+
     /* If the word is not capitalised and spell_check() doesn't consider the
      * word to be bad then it might need to be capitalised.  Add a suggestion
      * for that. */
     c = PTR2CHAR(su->su_badptr);
-    if (!SPELL_ISUPPER(c) && attr == 0)
+    if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
     {
 	make_case_word(su->su_badword, buf, WF_ONECAP);
 	add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
@@ -9173,8 +9185,11 @@ suggest_try_special(su)
 	su->su_fbadword[len] = NUL;
 	make_case_word(su->su_fbadword, word, su->su_badflags);
 	su->su_fbadword[len] = c;
-	add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL,
-						     0, TRUE, su->su_sallang);
+
+	/* Give a soundalike score of 0, compute the score as if deleting one
+	 * character. */
+	add_suggestion(su, &su->su_ga, word, su->su_badlen,
+			      RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang);
     }
 }
 
@@ -9226,6 +9241,8 @@ suggest_try_change(su)
     slang_T	*slang;
     int		fword_ends;
     int		lpi;
+    int		maysplit;
+    int		goodword_ends;
 
     /* We make a copy of the case-folded bad word, so that we can modify it
      * to find matches (esp. REP items).  Append some more text, changing
@@ -9401,10 +9418,13 @@ suggest_try_change(su)
 		    }
 		}
 
-		/* Check NEEDCOMPOUND: can't use word without compounding. */
+		/* Check NEEDCOMPOUND: can't use word without compounding.  Do
+		 * try appending another compound word below. */
 		if (sp->ts_complen == sp->ts_compsplit && fword_ends
 						     && (flags & WF_NEEDCOMP))
-		    break;
+		    goodword_ends = FALSE;
+		else
+		    goodword_ends = TRUE;
 
 		if (sp->ts_complen > sp->ts_compsplit)
 		{
@@ -9508,9 +9528,15 @@ suggest_try_change(su)
 		    add_banned(su, preword + sp->ts_prewordlen);
 		    break;
 		}
-		if (was_banned(su, preword + sp->ts_prewordlen)
+		if ((sp->ts_complen == sp->ts_compsplit
+			    && was_banned(su, preword + sp->ts_prewordlen))
 						   || was_banned(su, preword))
-		    break;
+		{
+		    if (slang->sl_compprog == NULL)
+			break;
+		    /* the word so far was banned but we may try compounding */
+		    goodword_ends = FALSE;
+		}
 
 		newscore = 0;
 		if ((flags & WF_REGION)
@@ -9523,7 +9549,9 @@ suggest_try_change(su)
 				  captype(preword + sp->ts_prewordlen, NULL)))
 		    newscore += SCORE_ICASE;
 
-		if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry)
+		maysplit = TRUE;
+		if (fword_ends && goodword_ends
+					     && sp->ts_fidx >= sp->ts_fidxtry)
 		{
 		    /* The badword also ends: add suggestions.  Give a penalty
 		     * when changing non-word char to word char, e.g., "thes,"
@@ -9549,11 +9577,20 @@ suggest_try_change(su)
 		    }
 
 		    add_suggestion(su, &su->su_ga, preword,
-			    sp->ts_fidx - repextra,
-				     sp->ts_score + newscore, 0, FALSE,
-				     lp->lp_sallang);
-		}
-		else if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
+					sp->ts_fidx - repextra,
+					sp->ts_score + newscore, 0, FALSE,
+					lp->lp_sallang);
+
+		    /* When the bad word doesn't end yet, try changing the
+		     * next word.  E.g., find suggestions for "the the" where
+		     * the second "the" is different.  It's done like a split.
+		     */
+		    if (sp->ts_fidx - repextra >= su->su_badlen)
+			maysplit = FALSE;
+		}
+
+		if (maysplit
+			&& (sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
 #ifdef FEAT_MBYTE
 			/* Don't split halfway a character. */
 			&& (!has_mbyte || sp->ts_tcharlen == 0)
@@ -9574,7 +9611,7 @@ suggest_try_change(su)
 		     *    the following word is valid.
 		     */
 		    try_compound = FALSE;
-		    if (!fword_ends
+		    if ((!fword_ends || !goodword_ends)
 			    && slang->sl_compprog != NULL
 			    && ((unsigned)flags >> 24) != 0
 			    && sp->ts_twordlen - sp->ts_splitoff
@@ -9618,7 +9655,7 @@ suggest_try_change(su)
 		    else
 			sp->ts_flags &= ~TSF_DIDSPLIT;
 
-		    if (!try_compound && !fword_ends)
+		    if (!try_compound && (!fword_ends || !goodword_ends))
 		    {
 			/* If we're going to split need to check that the
 			 * words so far are valid for compounding.  If there
@@ -9656,10 +9693,12 @@ suggest_try_change(su)
 			/* If the badword has a non-word character at this
 			 * position skip it.  That means replacing the
 			 * non-word character with a space.  Always skip a
-			 * character when the word ends. */
-			if ((!try_compound
-				   && !spell_iswordp_nmw(fword + sp->ts_fidx))
+			 * character when the word ends.  But only when the
+			 * good word can end. */
+			if (((!try_compound
+				    && !spell_iswordp_nmw(fword + sp->ts_fidx))
 				|| fword_ends)
+			    && goodword_ends)
 			{
 			    int	    l;
 
@@ -10726,12 +10765,17 @@ stp_sal_score(stp, su, slang, badsound)
     char_u	*badsound;	/* sound-folded badword */
 {
     char_u	*p;
+    char_u	*pbad;
+    char_u	*pgood;
     char_u	badsound2[MAXWLEN];
     char_u	fword[MAXWLEN];
     char_u	goodsound[MAXWLEN];
-
-    if (stp->st_orglen <= su->su_badlen)
-	p = badsound;
+    char_u	goodword[MAXWLEN];
+    int		lendiff;
+
+    lendiff = (int)(su->su_badlen - stp->st_orglen);
+    if (lendiff >= 0)
+	pbad = badsound;
     else
     {
 	/* soundfold the bad word with more characters following */
@@ -10747,13 +10791,24 @@ stp_sal_score(stp, su, slang, badsound)
 		mch_memmove(p, p + 1, STRLEN(p));
 
 	spell_soundfold(slang, fword, TRUE, badsound2);
-	p = badsound2;
-    }
+	pbad = badsound2;
+    }
+
+    if (lendiff > 0)
+    {
+	/* Add part of the bad word to the good word, so that we soundfold
+	 * what replaces the bad word. */
+	STRCPY(goodword, stp->st_word);
+	STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff);
+	pgood = goodword;
+    }
+    else
+	pgood = stp->st_word;
 
     /* Sound-fold the word and compute the score for the difference. */
-    spell_soundfold(slang, stp->st_word, FALSE, goodsound);
-
-    return soundalike_score(goodsound, p);
+    spell_soundfold(slang, pgood, FALSE, goodsound);
+
+    return soundalike_score(goodsound, pbad);
 }
 
 /*
@@ -11081,23 +11136,24 @@ similar_chars(slang, c1, c2)
  * with spell_edit_score().
  */
     static void
-add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus, slang)
+add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang)
     suginfo_T	*su;
     garray_T	*gap;
     char_u	*goodword;
-    int		badlen;		/* length of bad word used */
+    int		badlenarg;	/* len of bad word replaced with "goodword" */
     int		score;
     int		altscore;
     int		had_bonus;	/* value for st_had_bonus */
     slang_T	*slang;		/* language for sound folding */
 {
-    int		goodlen = STRLEN(goodword);
+    int		goodlen = STRLEN(goodword); /* len of goodword changed */
+    int		badlen = badlenarg;	    /* len of bad word changed */
     suggest_T   *stp;
+    suggest_T   new_sug;
     int		i;
-    char_u	*p = NULL;
-    int		c = 0;
-    int		attr = 0;
+    hlf_T	attr = HLF_COUNT;
     char_u	longword[MAXWLEN + 1];
+    char_u	*pgood, *pbad;
 
     /* Check that the word really is valid.  Esp. for banned words and for
      * split words, such as "the the".  Need to append what follows to check
@@ -11105,36 +11161,34 @@ add_suggestion(su, gap, goodword, badlen
     STRCPY(longword, goodword);
     vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen);
     (void)spell_check(curwin, longword, &attr, NULL);
-    if (attr != 0)
+    if (attr != HLF_COUNT)
 	return;
 
-    /* If past "su_badlen" and the rest is identical stop at "su_badlen".
-     * Remove the common part from "goodword". */
-    i = badlen - su->su_badlen;
-    if (i > 0)
-    {
-	/* This assumes there was no case folding or it didn't change the
-	 * length... */
-	p = goodword + goodlen - i;
-	if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0)
-	{
-	    badlen = su->su_badlen;
-	    c = *p;
-	    *p = NUL;
+    /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
+     * "thee the" is added next to changing the first "the" the "thee".  */
+    pgood = goodword + STRLEN(goodword);
+    pbad = su->su_badptr + badlen;
+    while (pgood > goodword && pbad > su->su_badptr)
+    {
+	mb_ptr_back(goodword, pgood);
+	mb_ptr_back(su->su_badptr, pbad);
+#ifdef FEAT_MBYTE
+	if (has_mbyte)
+	{
+	    if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
+		break;
 	}
 	else
-	    p = NULL;
-    }
-    else if (i < 0)
-    {
-	/* When replacing part of the word check that we actually change
-	 * something.  For "the the" a suggestion can be replacing the first
-	 * "the" with itself, since "the" wasn't banned. */
-	if (badlen == (int)goodlen
-			    && STRNCMP(su->su_badword, goodword, badlen) == 0)
-	    return;
-    }
-
+#endif
+	    if (*pgood != *pbad)
+		break;
+	badlen = pbad - su->su_badptr;
+	goodlen = pgood - goodword;
+    }
+    if (badlen == 0 && goodlen == 0)
+	/* goodword doesn't change anything; may happen for "the the" changing
+	 * the first "the" to itself. */
+	return;
 
     if (score <= su->su_maxscore)
     {
@@ -11143,18 +11197,44 @@ add_suggestion(su, gap, goodword, badlen
 	 * "thes" -> "these". */
 	stp = &SUG(*gap, 0);
 	for (i = gap->ga_len - 1; i >= 0; --i)
-	    if (STRCMP(stp[i].st_word, goodword) == 0
+	    if (STRLEN(stp[i].st_word) == goodlen
+			&& STRNCMP(stp[i].st_word, goodword, goodlen) == 0
 						&& stp[i].st_orglen == badlen)
 	    {
-		/* Found it.  Remember the lowest score. */
-		if (stp[i].st_score > score)
-		{
-		    stp[i].st_score = score;
-		    stp[i].st_altscore = altscore;
-		    stp[i].st_had_bonus = had_bonus;
-		}
+		/*
+		 * Found it.  Remember the lowest score.
+		 */
 		if (stp[i].st_slang == NULL)
 		    stp[i].st_slang = slang;
+
+		new_sug.st_score = score;
+		new_sug.st_altscore = altscore;
+		new_sug.st_had_bonus = had_bonus;
+
+		if (stp[i].st_had_bonus != had_bonus)
+		{
+		    /* Only one of the two had the soundalike score computed.
+		     * Need to do that for the other one now, otherwise the
+		     * scores can't be compared.  This happens because
+		     * suggest_try_change() doesn't compute the soundalike
+		     * word to keep it fast. */
+		    if (had_bonus)
+			rescore_one(su, &stp[i]);
+		    else
+		    {
+			new_sug.st_word = goodword;
+			new_sug.st_slang = stp[i].st_slang;
+			new_sug.st_orglen = badlen;
+			rescore_one(su, &new_sug);
+		    }
+		}
+
+		if (stp[i].st_score > new_sug.st_score)
+		{
+		    stp[i].st_score = new_sug.st_score;
+		    stp[i].st_altscore = new_sug.st_altscore;
+		    stp[i].st_had_bonus = new_sug.st_had_bonus;
+		}
 		break;
 	    }
 
@@ -11162,7 +11242,7 @@ add_suggestion(su, gap, goodword, badlen
 	{
 	    /* Add a suggestion. */
 	    stp = &SUG(*gap, gap->ga_len);
-	    stp->st_word = vim_strsave(goodword);
+	    stp->st_word = vim_strnsave(goodword, goodlen);
 	    if (stp->st_word != NULL)
 	    {
 		stp->st_score = score;
@@ -11180,9 +11260,6 @@ add_suggestion(su, gap, goodword, badlen
 	    }
 	}
     }
-
-    if (p != NULL)
-	*p = c;		/* restore "goodword" */
 }
 
 /*
@@ -11244,62 +11321,47 @@ free_banned(su)
 }
 
 /*
- * Recompute the score if sound-folding is possible.  This is slow,
- * thus only done for the final results.
+ * Recompute the score for all suggestions if sound-folding is possible.  This
+ * is slow, thus only done for the final results.
  */
     static void
 rescore_suggestions(su)
     suginfo_T	*su;
 {
-    langp_T	*lp;
-    suggest_T	*stp;
-    char_u	sal_badword[MAXWLEN];
-    char_u	sal_badword2[MAXWLEN];
     int		i;
-    int		lpi;
-    slang_T	*slang_first = NULL;
-    slang_T	*slang;
-
-    for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
-    {
-	lp = LANGP_ENTRY(curbuf->b_langp, lpi);
-	if (lp->lp_slang->sl_sal.ga_len > 0)
-	{
-	    /* soundfold the bad word */
-	    slang_first = lp->lp_slang;
-	    spell_soundfold(slang_first, su->su_fbadword, TRUE, sal_badword);
-	    break;
-	}
-    }
-
-    if (slang_first != NULL)
-    {
+
+    if (su->su_sallang != NULL)
 	for (i = 0; i < su->su_ga.ga_len; ++i)
-	{
-	    /* Only rescore suggestions that have no sal score yet and do have
-	     * a language. */
-	    stp = &SUG(su->su_ga, i);
-	    if (!stp->st_had_bonus && stp->st_slang != NULL)
-	    {
-		slang = stp->st_slang;
-		if (slang->sl_sal.ga_len > 0)
-		{
-		    if (slang == slang_first)
-			stp->st_altscore = stp_sal_score(stp, su,
-							  slang, sal_badword);
-		    else
-		    {
-			spell_soundfold(slang, su->su_fbadword,
-							  TRUE, sal_badword2);
-			stp->st_altscore = stp_sal_score(stp, su,
-							 slang, sal_badword2);
-		    }
-		    if (stp->st_altscore == SCORE_MAXMAX)
-			stp->st_altscore = SCORE_BIG;
-		    stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
-		}
-	    }
-	}
+	    rescore_one(su, &SUG(su->su_ga, i));
+}
+
+/*
+ * Recompute the score for one suggestion if sound-folding is possible.
+ */
+    static void
+rescore_one(su, stp)
+    suginfo_T *su;
+    suggest_T *stp;
+{
+    slang_T	*slang = stp->st_slang;
+    char_u	sal_badword[MAXWLEN];
+
+    /* Only rescore suggestions that have no sal score yet and do have a
+     * language. */
+    if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
+    {
+	if (slang == su->su_sallang)
+	    stp->st_altscore = stp_sal_score(stp, su,
+						   slang, su->su_sal_badword);
+	else
+	{
+	    spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
+	    stp->st_altscore = stp_sal_score(stp, su, slang, sal_badword);
+	}
+	if (stp->st_altscore == SCORE_MAXMAX)
+	    stp->st_altscore = SCORE_BIG;
+	stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
+	stp->st_had_bonus = TRUE;
     }
 }