diff src/spell.c @ 358:e111db373ca4

updated for version 7.0093
author vimboss
date Sat, 25 Jun 2005 22:49:46 +0000
parents 644578c9e219
children b498dee21bd3
line wrap: on
line diff
--- a/src/spell.c
+++ b/src/spell.c
@@ -396,7 +396,9 @@ typedef struct suggest_S
 #define SCORE_SUBST	93	/* substitute a character */
 #define SCORE_SIMILAR	33	/* substitute a similar character */
 #define SCORE_DEL	94	/* delete a character */
+#define SCORE_DELDUP	64	/* delete a duplicated character */
 #define SCORE_INS	96	/* insert a character */
+#define SCORE_INSDUP	66	/* insert a duplicate character */
 
 #define SCORE_MAXINIT	350	/* Initial maximum score: higher == slower.
 				 * 350 allows for about three changes. */
@@ -453,11 +455,12 @@ static int	    did_set_spelltab;
 
 static void clear_spell_chartab __ARGS((spelltab_T *sp));
 static int set_spell_finish __ARGS((spelltab_T	*new_st));
+static int spell_iswordp __ARGS((char_u *p));
 static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
 
 /*
- * Return TRUE if "p" points to a word character or "c" is a word character
- * for spelling.
+ * Return TRUE if "p" points to a word character.  Like spell_iswordp() but
+ * without the special handling of a single quote.
  * Checking for a word character is done very often, avoid the function call
  * overhead.
  */
@@ -547,7 +550,7 @@ static int set_spell_charflags __ARGS((c
 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
 static void write_spell_chartab __ARGS((FILE *fd));
 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
-static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount));
+static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword));
 static void spell_find_cleanup __ARGS((suginfo_T *su));
 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
@@ -656,7 +659,7 @@ spell_check(wp, ptr, attrp)
 	    mi.mi_end = skipdigits(ptr);
 	    nrlen = mi.mi_end - ptr;
 	}
-	if (!SPELL_ISWORDP(mi.mi_end))
+	if (!spell_iswordp(mi.mi_end))
 	    return (int)(mi.mi_end - ptr);
 
 	/* Try including the digits in the word. */
@@ -667,12 +670,12 @@ spell_check(wp, ptr, attrp)
 
     /* Find the normal end of the word (until the next non-word character). */
     mi.mi_word = ptr;
-    if (SPELL_ISWORDP(mi.mi_fend))
+    if (spell_iswordp(mi.mi_fend))
     {
 	do
 	{
 	    mb_ptr_adv(mi.mi_fend);
-	} while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend));
+	} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend));
     }
 
     /* We always use the characters up to the next non-word character,
@@ -908,7 +911,7 @@ find_word(mip, mode)
 	if ((*mb_head_off)(ptr, ptr + wlen) > 0)
 	    continue;	    /* not at first byte of character */
 #endif
-	if (SPELL_ISWORDP(ptr + wlen))
+	if (spell_iswordp(ptr + wlen))
 	    continue;	    /* next char is a word character */
 
 #ifdef FEAT_MBYTE
@@ -1163,7 +1166,7 @@ fold_more(mip)
     do
     {
 	mb_ptr_adv(mip->mi_fend);
-    } while (*mip->mi_fend != NUL && SPELL_ISWORDP(mip->mi_fend));
+    } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend));
 
     /* Include the non-word character so that we can check for the
      * word end. */
@@ -2290,7 +2293,7 @@ captype(word, end)
     int		past_second = FALSE;	/* past second word char */
 
     /* find first letter */
-    for (p = word; !SPELL_ISWORDP(p); mb_ptr_adv(p))
+    for (p = word; !spell_iswordp(p); mb_ptr_adv(p))
 	if (end == NULL ? *p == NUL : p >= end)
 	    return 0;	    /* only non-word characters, illegal word */
 #ifdef FEAT_MBYTE
@@ -2306,7 +2309,7 @@ captype(word, end)
      * But a word with an upper char only at start is a ONECAP.
      */
     for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
-	if (SPELL_ISWORDP(p))
+	if (spell_iswordp(p))
 	{
 #ifdef FEAT_MBYTE
 	    c = mb_ptr2char(p);
@@ -2367,7 +2370,7 @@ spell_reload()
     buf_T	*buf;
     win_T	*wp;
 
-    /* Initialize the table for SPELL_ISWORDP(). */
+    /* Initialize the table for spell_iswordp(). */
     init_spell_chartab();
 
     /* Unload all allocated memory. */
@@ -5017,6 +5020,28 @@ set_spell_finish(new_st)
 }
 
 /*
+ * Return TRUE if "p" points to a word character.
+ * As a special case we see a single quote as a word character when it is
+ * followed by a word character.  This finds they'there but not 'they there'.
+ */
+    static int
+spell_iswordp(p)
+    char_u	*p;
+{
+    char_u	*s;
+
+    if (*p == '\'')
+	s = p + 1;
+    else
+	s = p;
+#ifdef FEAT_MBYTE
+    if (has_mbyte && MB_BYTE2LEN(*s) > 1)
+	return mb_get_class(s) >= 2;
+#endif
+    return spelltab.st_isw[*s];
+}
+
+/*
  * Write the table with prefix conditions to the .spl file.
  */
     static void
@@ -5181,7 +5206,7 @@ spell_suggest()
     line = ml_get_curline();
 
     /* Get the list of suggestions */
-    spell_find_suggest(line + curwin->w_cursor.col, &sug, (int)Rows - 2);
+    spell_find_suggest(line + curwin->w_cursor.col, &sug, (int)Rows - 2, TRUE);
 
     if (sug.su_ga.ga_len == 0)
 	MSG(_("Sorry, no suggestions"));
@@ -5281,7 +5306,7 @@ spell_suggest_list(gap, word, maxcount)
     suggest_T	*stp;
     char_u	*wcopy;
 
-    spell_find_suggest(word, &sug, maxcount);
+    spell_find_suggest(word, &sug, maxcount, FALSE);
 
     /* Make room in "gap". */
     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
@@ -5314,10 +5339,11 @@ spell_suggest_list(gap, word, maxcount)
  * This is based on the mechanisms of Aspell, but completely reimplemented.
  */
     static void
-spell_find_suggest(badptr, su, maxcount)
+spell_find_suggest(badptr, su, maxcount, banbadword)
     char_u	*badptr;
     suginfo_T	*su;
     int		maxcount;
+    int		banbadword;	/* don't include badword in suggestions */
 {
     int		attr;
 
@@ -5344,7 +5370,8 @@ spell_find_suggest(badptr, su, maxcount)
     su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
 
     /* Ban the bad word itself.  It may appear in another region. */
-    add_banned(su, su->su_badword);
+    if (banbadword)
+	add_banned(su, su->su_badword);
 
     /*
      * 1. Try special cases, such as repeating a word: "the the" -> "the".
@@ -5688,7 +5715,7 @@ suggest_try_change(su)
 		    newscore += SCORE_ICASE;
 
 		if ((fword[sp->ts_fidx] == NUL
-				       || !SPELL_ISWORDP(fword + sp->ts_fidx))
+				       || !spell_iswordp(fword + sp->ts_fidx))
 			&& sp->ts_fidx >= sp->ts_fidxtry)
 		{
 		    /* The badword also ends: add suggestions, */
@@ -5851,6 +5878,20 @@ suggest_try_change(su)
 					sp->ts_score -=
 						  SCORE_SUBST - SCORE_SIMILAR;
 				}
+				else if (sp->ts_isdiff == DIFF_INSERT
+					&& sp->ts_twordlen > sp->ts_tcharlen)
+				{
+				    /* If the previous character was the same,
+				     * thus doubling a character, give a bonus
+				     * to the score. */
+				    p = tword + sp->ts_twordlen
+							    - sp->ts_tcharlen;
+				    c = mb_ptr2char(p);
+				    mb_ptr_back(tword, p);
+				    if (c == mb_ptr2char(p))
+					sp->ts_score -= SCORE_INS
+							       - SCORE_INSDUP;
+				}
 
 				/* Starting a new char, reset the length. */
 				sp->ts_tcharlen = 0;
@@ -5891,12 +5932,25 @@ suggest_try_change(su)
 			&& try_deeper(su, stack, depth, SCORE_DEL))
 		{
 		    ++depth;
+
+		    /* Advance over the character in fword[]. Give a bonus to
+		     * the score if the same character is following "nn" ->
+		     * "n". */
 #ifdef FEAT_MBYTE
 		    if (has_mbyte)
+		    {
+			c = mb_ptr2char(fword + sp->ts_fidx);
 			stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
+			if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
+			    stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
+		    }
 		    else
 #endif
+		    {
 			++stack[depth].ts_fidx;
+			if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
+			    stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
+		    }
 		    break;
 		}
 		/*FALLTHROUGH*/
@@ -5935,7 +5989,18 @@ suggest_try_change(su)
 				sp->ts_isdiff = DIFF_INSERT;
 			    }
 			}
+			else
+			    fl = 1;
+			if (fl == 1)
 #endif
+			{
+			    /* If the previous character was the same, thus
+			     * doubling a character, give a bonus to the
+			     * score. */
+			    if (sp->ts_twordlen >= 2
+					   && tword[sp->ts_twordlen - 2] == c)
+				sp->ts_score -= SCORE_INS - SCORE_INSDUP;
+			}
 		    }
 		}
 		break;
@@ -7264,7 +7329,7 @@ spell_soundfold(slang, inword, res)
 	    else if (has_mbyte)
 	    {
 		l = mb_ptr2len_check(s);
-		if (SPELL_ISWORDP(s))
+		if (spell_iswordp(s))
 		{
 		    mch_memmove(t, s, l);
 		    t += l;
@@ -7276,7 +7341,7 @@ spell_soundfold(slang, inword, res)
 #endif
 	    else
 	    {
-		if (SPELL_ISWORDP(s))
+		if (spell_iswordp(s))
 		    *t++ = *s;
 		++s;
 	    }
@@ -7376,12 +7441,12 @@ spell_soundfold(slang, inword, res)
 		if (*s == NUL
 			|| (*s == '^'
 			    && (i == 0 || !(word[i - 1] == ' '
-					      || SPELL_ISWORDP(word + i - 1)))
+					      || spell_iswordp(word + i - 1)))
 			    && (*(s + 1) != '$'
-				|| (!SPELL_ISWORDP(word + i + k0))))
+				|| (!spell_iswordp(word + i + k0))))
 			|| (*s == '$' && i > 0
-			    && SPELL_ISWORDP(word + i - 1)
-			    && (!SPELL_ISWORDP(word + i + k0))))
+			    && spell_iswordp(word + i - 1)
+			    && (!spell_iswordp(word + i + k0))))
 		{
 		    /* search for followup rules, if:    */
 		    /* followup and k > 1  and  NO '-' in searchstring */
@@ -7443,7 +7508,7 @@ spell_soundfold(slang, inword, res)
 			    if (*s == NUL
 				    /* *s == '^' cuts */
 				    || (*s == '$'
-					    && !SPELL_ISWORDP(word + i + k0)))
+					    && !spell_iswordp(word + i + k0)))
 			    {
 				if (k0 == k)
 				    /* this is just a piece of the string */