comparison src/spell.c @ 534:c6296b0ad9ea v7.0151

updated for version 7.0151
author vimboss
date Thu, 29 Sep 2005 18:26:07 +0000
parents 7052f11a3dc9
children 792d1b60e546
comparison
equal deleted inserted replaced
533:c8b6b7e1005d 534:c6296b0ad9ea
475 char_u *su_badptr; /* start of bad word in line */ 475 char_u *su_badptr; /* start of bad word in line */
476 int su_badlen; /* length of detected bad word in line */ 476 int su_badlen; /* length of detected bad word in line */
477 int su_badflags; /* caps flags for bad word */ 477 int su_badflags; /* caps flags for bad word */
478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
480 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
481 slang_T *su_slang_first; /* slang_T used for su_sal_badword */
480 hashtab_T su_banned; /* table with banned words */ 482 hashtab_T su_banned; /* table with banned words */
481 slang_T *su_sallang; /* default language for sound folding */ 483 slang_T *su_sallang; /* default language for sound folding */
482 } suginfo_T; 484 } suginfo_T;
483 485
484 /* One word suggestion. Used in "si_ga". */ 486 /* One word suggestion. Used in "si_ga". */
747 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); 749 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang));
748 static void add_banned __ARGS((suginfo_T *su, char_u *word)); 750 static void add_banned __ARGS((suginfo_T *su, char_u *word));
749 static int was_banned __ARGS((suginfo_T *su, char_u *word)); 751 static int was_banned __ARGS((suginfo_T *su, char_u *word));
750 static void free_banned __ARGS((suginfo_T *su)); 752 static void free_banned __ARGS((suginfo_T *su));
751 static void rescore_suggestions __ARGS((suginfo_T *su)); 753 static void rescore_suggestions __ARGS((suginfo_T *su));
754 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
752 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); 755 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
753 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); 756 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
754 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); 757 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
755 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); 758 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
756 #ifdef FEAT_MBYTE 759 #ifdef FEAT_MBYTE
813 static char *msg_compressing = N_("Compressing word tree..."); 816 static char *msg_compressing = N_("Compressing word tree...");
814 817
815 /* 818 /*
816 * Main spell-checking function. 819 * Main spell-checking function.
817 * "ptr" points to a character that could be the start of a word. 820 * "ptr" points to a character that could be the start of a word.
818 * "*attrp" is set to the attributes for a badly spelled word. For a non-word 821 * "*attrp" is set to the highlight index for a badly spelled word. For a
819 * or when it's OK it remains unchanged. 822 * non-word or when it's OK it remains unchanged.
820 * This must only be called when 'spelllang' is not empty. 823 * This must only be called when 'spelllang' is not empty.
821 * 824 *
822 * "capcol" is used to check for a Capitalised word after the end of a 825 * "capcol" is used to check for a Capitalised word after the end of a
823 * sentence. If it's zero then perform the check. Return the column where to 826 * sentence. If it's zero then perform the check. Return the column where to
824 * check next, or -1 when no sentence end was found. If it's NULL then don't 827 * check next, or -1 when no sentence end was found. If it's NULL then don't
829 */ 832 */
830 int 833 int
831 spell_check(wp, ptr, attrp, capcol) 834 spell_check(wp, ptr, attrp, capcol)
832 win_T *wp; /* current window */ 835 win_T *wp; /* current window */
833 char_u *ptr; 836 char_u *ptr;
834 int *attrp; 837 hlf_T *attrp;
835 int *capcol; /* column to check for Capital */ 838 int *capcol; /* column to check for Capital */
836 { 839 {
837 matchinf_T mi; /* Most things are put in "mi" so that it can 840 matchinf_T mi; /* Most things are put in "mi" so that it can
838 be passed to functions quickly. */ 841 be passed to functions quickly. */
839 int nrlen = 0; /* found a number first */ 842 int nrlen = 0; /* found a number first */
1006 mi.mi_result = save_result; 1009 mi.mi_result = save_result;
1007 } 1010 }
1008 } 1011 }
1009 1012
1010 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 1013 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1011 *attrp = highlight_attr[HLF_SPB]; 1014 *attrp = HLF_SPB;
1012 else if (mi.mi_result == SP_RARE) 1015 else if (mi.mi_result == SP_RARE)
1013 *attrp = highlight_attr[HLF_SPR]; 1016 *attrp = HLF_SPR;
1014 else 1017 else
1015 *attrp = highlight_attr[HLF_SPL]; 1018 *attrp = HLF_SPL;
1016 } 1019 }
1017 1020
1018 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) 1021 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1019 { 1022 {
1020 /* Report SpellCap only when the word isn't badly spelled. */ 1023 /* Report SpellCap only when the word isn't badly spelled. */
1021 *attrp = highlight_attr[HLF_SPC]; 1024 *attrp = HLF_SPC;
1022 return wrongcaplen; 1025 return wrongcaplen;
1023 } 1026 }
1024 1027
1025 return (int)(mi.mi_end - ptr); 1028 return (int)(mi.mi_end - ptr);
1026 } 1029 }
1820 spell_move_to(wp, dir, allwords, curline, attrp) 1823 spell_move_to(wp, dir, allwords, curline, attrp)
1821 win_T *wp; 1824 win_T *wp;
1822 int dir; /* FORWARD or BACKWARD */ 1825 int dir; /* FORWARD or BACKWARD */
1823 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ 1826 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1824 int curline; 1827 int curline;
1825 int *attrp; /* return: attributes of bad word or NULL */ 1828 hlf_T *attrp; /* return: attributes of bad word or NULL
1829 (only when "dir" is FORWARD) */
1826 { 1830 {
1827 linenr_T lnum; 1831 linenr_T lnum;
1828 pos_T found_pos; 1832 pos_T found_pos;
1829 int found_len = 0; 1833 int found_len = 0;
1830 char_u *line; 1834 char_u *line;
1831 char_u *p; 1835 char_u *p;
1832 char_u *endp; 1836 char_u *endp;
1833 int attr; 1837 hlf_T attr;
1834 int len; 1838 int len;
1835 int has_syntax = syntax_present(wp->w_buffer); 1839 int has_syntax = syntax_present(wp->w_buffer);
1836 int col; 1840 int col;
1837 int can_spell; 1841 int can_spell;
1838 char_u *buf = NULL; 1842 char_u *buf = NULL;
1898 && !wrapped 1902 && !wrapped
1899 && (colnr_T)(p - buf) >= wp->w_cursor.col) 1903 && (colnr_T)(p - buf) >= wp->w_cursor.col)
1900 break; 1904 break;
1901 1905
1902 /* start of word */ 1906 /* start of word */
1903 attr = 0; 1907 attr = HLF_COUNT;
1904 len = spell_check(wp, p, &attr, &capcol); 1908 len = spell_check(wp, p, &attr, &capcol);
1905 1909
1906 if (attr != 0) 1910 if (attr != HLF_COUNT)
1907 { 1911 {
1908 /* We found a bad word. Check the attribute. */ 1912 /* We found a bad word. Check the attribute. */
1909 if (allwords || attr == highlight_attr[HLF_SPB]) 1913 if (allwords || attr == HLF_SPB)
1910 { 1914 {
1911 found_one = TRUE; 1915 found_one = TRUE;
1912 1916
1913 /* When searching forward only accept a bad word after 1917 /* When searching forward only accept a bad word after
1914 * the cursor. */ 1918 * the cursor. */
2015 if (lnum == wp->w_cursor.lnum && !found_one) 2019 if (lnum == wp->w_cursor.lnum && !found_one)
2016 break; 2020 break;
2017 2021
2018 /* Skip the characters at the start of the next line that were 2022 /* Skip the characters at the start of the next line that were
2019 * included in a match crossing line boundaries. */ 2023 * included in a match crossing line boundaries. */
2020 if (attr == 0) 2024 if (attr == HLF_COUNT)
2021 skip = p - endp; 2025 skip = p - endp;
2022 else 2026 else
2023 skip = 0; 2027 skip = 0;
2024 2028
2025 /* Capscol skips over the inserted space. */ 2029 /* Capscol skips over the inserted space. */
5096 * slashes. */ 5100 * slashes. */
5097 ga_concat(&spin->si_map, items[1]); 5101 ga_concat(&spin->si_map, items[1]);
5098 ga_append(&spin->si_map, '/'); 5102 ga_append(&spin->si_map, '/');
5099 } 5103 }
5100 } 5104 }
5101 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3) 5105 /* Accept "SAL from to" and "SAL from to # comment". */
5106 else if (STRCMP(items[0], "SAL") == 0
5107 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
5102 { 5108 {
5103 if (do_sal) 5109 if (do_sal)
5104 { 5110 {
5105 /* SAL item (sounds-a-like) 5111 /* SAL item (sounds-a-like)
5106 * Either one of the known keys or a from-to pair. */ 5112 * Either one of the known keys or a from-to pair. */
8767 suginfo_T *su; 8773 suginfo_T *su;
8768 int maxcount; 8774 int maxcount;
8769 int banbadword; /* don't include badword in suggestions */ 8775 int banbadword; /* don't include badword in suggestions */
8770 int need_cap; /* word should start with capital */ 8776 int need_cap; /* word should start with capital */
8771 { 8777 {
8772 int attr = 0; 8778 hlf_T attr = HLF_COUNT;
8773 char_u buf[MAXPATHL]; 8779 char_u buf[MAXPATHL];
8774 char_u *p; 8780 char_u *p;
8775 int do_combine = FALSE; 8781 int do_combine = FALSE;
8776 char_u *sps_copy; 8782 char_u *sps_copy;
8777 #ifdef FEAT_EVAL 8783 #ifdef FEAT_EVAL
8819 su->su_sallang = lp->lp_sallang; 8825 su->su_sallang = lp->lp_sallang;
8820 break; 8826 break;
8821 } 8827 }
8822 } 8828 }
8823 8829
8830 /* Soundfold the bad word with the default sound folding, so that we don't
8831 * have to do this many times. */
8832 if (su->su_sallang != NULL)
8833 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
8834 su->su_sal_badword);
8835
8824 /* If the word is not capitalised and spell_check() doesn't consider the 8836 /* If the word is not capitalised and spell_check() doesn't consider the
8825 * word to be bad then it might need to be capitalised. Add a suggestion 8837 * word to be bad then it might need to be capitalised. Add a suggestion
8826 * for that. */ 8838 * for that. */
8827 c = PTR2CHAR(su->su_badptr); 8839 c = PTR2CHAR(su->su_badptr);
8828 if (!SPELL_ISUPPER(c) && attr == 0) 8840 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
8829 { 8841 {
8830 make_case_word(su->su_badword, buf, WF_ONECAP); 8842 make_case_word(su->su_badword, buf, WF_ONECAP);
8831 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, 8843 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
8832 0, TRUE, su->su_sallang); 8844 0, TRUE, su->su_sallang);
8833 } 8845 }
9171 * use that for the goodword too: "The the" -> "The". */ 9183 * use that for the goodword too: "The the" -> "The". */
9172 c = su->su_fbadword[len]; 9184 c = su->su_fbadword[len];
9173 su->su_fbadword[len] = NUL; 9185 su->su_fbadword[len] = NUL;
9174 make_case_word(su->su_fbadword, word, su->su_badflags); 9186 make_case_word(su->su_fbadword, word, su->su_badflags);
9175 su->su_fbadword[len] = c; 9187 su->su_fbadword[len] = c;
9176 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 9188
9177 0, TRUE, su->su_sallang); 9189 /* Give a soundalike score of 0, compute the score as if deleting one
9190 * character. */
9191 add_suggestion(su, &su->su_ga, word, su->su_badlen,
9192 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang);
9178 } 9193 }
9179 } 9194 }
9180 9195
9181 /* 9196 /*
9182 * Try finding suggestions by adding/removing/swapping letters. 9197 * Try finding suggestions by adding/removing/swapping letters.
9224 int fl = 0, tl; 9239 int fl = 0, tl;
9225 int repextra = 0; /* extra bytes in fword[] from REP item */ 9240 int repextra = 0; /* extra bytes in fword[] from REP item */
9226 slang_T *slang; 9241 slang_T *slang;
9227 int fword_ends; 9242 int fword_ends;
9228 int lpi; 9243 int lpi;
9244 int maysplit;
9245 int goodword_ends;
9229 9246
9230 /* We make a copy of the case-folded bad word, so that we can modify it 9247 /* We make a copy of the case-folded bad word, so that we can modify it
9231 * to find matches (esp. REP items). Append some more text, changing 9248 * to find matches (esp. REP items). Append some more text, changing
9232 * chars after the bad word may help. */ 9249 * chars after the bad word may help. */
9233 STRCPY(fword, su->su_fbadword); 9250 STRCPY(fword, su->su_fbadword);
9399 * when arriving at a compound flag. */ 9416 * when arriving at a compound flag. */
9400 sp->ts_flags |= TSF_PREFIXOK; 9417 sp->ts_flags |= TSF_PREFIXOK;
9401 } 9418 }
9402 } 9419 }
9403 9420
9404 /* Check NEEDCOMPOUND: can't use word without compounding. */ 9421 /* Check NEEDCOMPOUND: can't use word without compounding. Do
9422 * try appending another compound word below. */
9405 if (sp->ts_complen == sp->ts_compsplit && fword_ends 9423 if (sp->ts_complen == sp->ts_compsplit && fword_ends
9406 && (flags & WF_NEEDCOMP)) 9424 && (flags & WF_NEEDCOMP))
9407 break; 9425 goodword_ends = FALSE;
9426 else
9427 goodword_ends = TRUE;
9408 9428
9409 if (sp->ts_complen > sp->ts_compsplit) 9429 if (sp->ts_complen > sp->ts_compsplit)
9410 { 9430 {
9411 if (slang->sl_nobreak) 9431 if (slang->sl_nobreak)
9412 { 9432 {
9506 if (flags & WF_BANNED) 9526 if (flags & WF_BANNED)
9507 { 9527 {
9508 add_banned(su, preword + sp->ts_prewordlen); 9528 add_banned(su, preword + sp->ts_prewordlen);
9509 break; 9529 break;
9510 } 9530 }
9511 if (was_banned(su, preword + sp->ts_prewordlen) 9531 if ((sp->ts_complen == sp->ts_compsplit
9532 && was_banned(su, preword + sp->ts_prewordlen))
9512 || was_banned(su, preword)) 9533 || was_banned(su, preword))
9513 break; 9534 {
9535 if (slang->sl_compprog == NULL)
9536 break;
9537 /* the word so far was banned but we may try compounding */
9538 goodword_ends = FALSE;
9539 }
9514 9540
9515 newscore = 0; 9541 newscore = 0;
9516 if ((flags & WF_REGION) 9542 if ((flags & WF_REGION)
9517 && (((unsigned)flags >> 16) & lp->lp_region) == 0) 9543 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
9518 newscore += SCORE_REGION; 9544 newscore += SCORE_REGION;
9521 9547
9522 if (!spell_valid_case(su->su_badflags, 9548 if (!spell_valid_case(su->su_badflags,
9523 captype(preword + sp->ts_prewordlen, NULL))) 9549 captype(preword + sp->ts_prewordlen, NULL)))
9524 newscore += SCORE_ICASE; 9550 newscore += SCORE_ICASE;
9525 9551
9526 if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry) 9552 maysplit = TRUE;
9553 if (fword_ends && goodword_ends
9554 && sp->ts_fidx >= sp->ts_fidxtry)
9527 { 9555 {
9528 /* The badword also ends: add suggestions. Give a penalty 9556 /* The badword also ends: add suggestions. Give a penalty
9529 * when changing non-word char to word char, e.g., "thes," 9557 * when changing non-word char to word char, e.g., "thes,"
9530 * -> "these". */ 9558 * -> "these". */
9531 p = fword + sp->ts_fidx; 9559 p = fword + sp->ts_fidx;
9547 if (spell_iswordp(p, curbuf)) 9575 if (spell_iswordp(p, curbuf))
9548 newscore += SCORE_NONWORD; 9576 newscore += SCORE_NONWORD;
9549 } 9577 }
9550 9578
9551 add_suggestion(su, &su->su_ga, preword, 9579 add_suggestion(su, &su->su_ga, preword,
9552 sp->ts_fidx - repextra, 9580 sp->ts_fidx - repextra,
9553 sp->ts_score + newscore, 0, FALSE, 9581 sp->ts_score + newscore, 0, FALSE,
9554 lp->lp_sallang); 9582 lp->lp_sallang);
9583
9584 /* When the bad word doesn't end yet, try changing the
9585 * next word. E.g., find suggestions for "the the" where
9586 * the second "the" is different. It's done like a split.
9587 */
9588 if (sp->ts_fidx - repextra >= su->su_badlen)
9589 maysplit = FALSE;
9555 } 9590 }
9556 else if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) 9591
9592 if (maysplit
9593 && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
9557 #ifdef FEAT_MBYTE 9594 #ifdef FEAT_MBYTE
9558 /* Don't split halfway a character. */ 9595 /* Don't split halfway a character. */
9559 && (!has_mbyte || sp->ts_tcharlen == 0) 9596 && (!has_mbyte || sp->ts_tcharlen == 0)
9560 #endif 9597 #endif
9561 ) 9598 )
9572 * 2. The badword does end, but it was due to a change 9609 * 2. The badword does end, but it was due to a change
9573 * (e.g., a swap). No need to split, but do check that 9610 * (e.g., a swap). No need to split, but do check that
9574 * the following word is valid. 9611 * the following word is valid.
9575 */ 9612 */
9576 try_compound = FALSE; 9613 try_compound = FALSE;
9577 if (!fword_ends 9614 if ((!fword_ends || !goodword_ends)
9578 && slang->sl_compprog != NULL 9615 && slang->sl_compprog != NULL
9579 && ((unsigned)flags >> 24) != 0 9616 && ((unsigned)flags >> 24) != 0
9580 && sp->ts_twordlen - sp->ts_splitoff 9617 && sp->ts_twordlen - sp->ts_splitoff
9581 >= slang->sl_compminlen 9618 >= slang->sl_compminlen
9582 #ifdef FEAT_MBYTE 9619 #ifdef FEAT_MBYTE
9616 compflags[sp->ts_complen] = NUL; 9653 compflags[sp->ts_complen] = NUL;
9617 } 9654 }
9618 else 9655 else
9619 sp->ts_flags &= ~TSF_DIDSPLIT; 9656 sp->ts_flags &= ~TSF_DIDSPLIT;
9620 9657
9621 if (!try_compound && !fword_ends) 9658 if (!try_compound && (!fword_ends || !goodword_ends))
9622 { 9659 {
9623 /* If we're going to split need to check that the 9660 /* If we're going to split need to check that the
9624 * words so far are valid for compounding. If there 9661 * words so far are valid for compounding. If there
9625 * is only one word it must not have the NEEDCOMPOUND 9662 * is only one word it must not have the NEEDCOMPOUND
9626 * flag. */ 9663 * flag. */
9654 sp->ts_splitfidx = sp->ts_fidx; 9691 sp->ts_splitfidx = sp->ts_fidx;
9655 9692
9656 /* If the badword has a non-word character at this 9693 /* If the badword has a non-word character at this
9657 * position skip it. That means replacing the 9694 * position skip it. That means replacing the
9658 * non-word character with a space. Always skip a 9695 * non-word character with a space. Always skip a
9659 * character when the word ends. */ 9696 * character when the word ends. But only when the
9660 if ((!try_compound 9697 * good word can end. */
9661 && !spell_iswordp_nmw(fword + sp->ts_fidx)) 9698 if (((!try_compound
9699 && !spell_iswordp_nmw(fword + sp->ts_fidx))
9662 || fword_ends) 9700 || fword_ends)
9701 && goodword_ends)
9663 { 9702 {
9664 int l; 9703 int l;
9665 9704
9666 #ifdef FEAT_MBYTE 9705 #ifdef FEAT_MBYTE
9667 if (has_mbyte) 9706 if (has_mbyte)
10724 suginfo_T *su; 10763 suginfo_T *su;
10725 slang_T *slang; 10764 slang_T *slang;
10726 char_u *badsound; /* sound-folded badword */ 10765 char_u *badsound; /* sound-folded badword */
10727 { 10766 {
10728 char_u *p; 10767 char_u *p;
10768 char_u *pbad;
10769 char_u *pgood;
10729 char_u badsound2[MAXWLEN]; 10770 char_u badsound2[MAXWLEN];
10730 char_u fword[MAXWLEN]; 10771 char_u fword[MAXWLEN];
10731 char_u goodsound[MAXWLEN]; 10772 char_u goodsound[MAXWLEN];
10732 10773 char_u goodword[MAXWLEN];
10733 if (stp->st_orglen <= su->su_badlen) 10774 int lendiff;
10734 p = badsound; 10775
10776 lendiff = (int)(su->su_badlen - stp->st_orglen);
10777 if (lendiff >= 0)
10778 pbad = badsound;
10735 else 10779 else
10736 { 10780 {
10737 /* soundfold the bad word with more characters following */ 10781 /* soundfold the bad word with more characters following */
10738 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); 10782 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
10739 10783
10745 && *skiptowhite(stp->st_word) == NUL) 10789 && *skiptowhite(stp->st_word) == NUL)
10746 for (p = fword; *(p = skiptowhite(p)) != NUL; ) 10790 for (p = fword; *(p = skiptowhite(p)) != NUL; )
10747 mch_memmove(p, p + 1, STRLEN(p)); 10791 mch_memmove(p, p + 1, STRLEN(p));
10748 10792
10749 spell_soundfold(slang, fword, TRUE, badsound2); 10793 spell_soundfold(slang, fword, TRUE, badsound2);
10750 p = badsound2; 10794 pbad = badsound2;
10751 } 10795 }
10796
10797 if (lendiff > 0)
10798 {
10799 /* Add part of the bad word to the good word, so that we soundfold
10800 * what replaces the bad word. */
10801 STRCPY(goodword, stp->st_word);
10802 STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff);
10803 pgood = goodword;
10804 }
10805 else
10806 pgood = stp->st_word;
10752 10807
10753 /* Sound-fold the word and compute the score for the difference. */ 10808 /* Sound-fold the word and compute the score for the difference. */
10754 spell_soundfold(slang, stp->st_word, FALSE, goodsound); 10809 spell_soundfold(slang, pgood, FALSE, goodsound);
10755 10810
10756 return soundalike_score(goodsound, p); 10811 return soundalike_score(goodsound, pbad);
10757 } 10812 }
10758 10813
10759 /* 10814 /*
10760 * Find suggestions by comparing the word in a sound-a-like form. 10815 * Find suggestions by comparing the word in a sound-a-like form.
10761 * Note: This doesn't support postponed prefixes. 10816 * Note: This doesn't support postponed prefixes.
11079 * Do not add a duplicate suggestion or suggestions with a bad score. 11134 * Do not add a duplicate suggestion or suggestions with a bad score.
11080 * When "use_score" is not zero it's used, otherwise the score is computed 11135 * When "use_score" is not zero it's used, otherwise the score is computed
11081 * with spell_edit_score(). 11136 * with spell_edit_score().
11082 */ 11137 */
11083 static void 11138 static void
11084 add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus, slang) 11139 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang)
11085 suginfo_T *su; 11140 suginfo_T *su;
11086 garray_T *gap; 11141 garray_T *gap;
11087 char_u *goodword; 11142 char_u *goodword;
11088 int badlen; /* length of bad word used */ 11143 int badlenarg; /* len of bad word replaced with "goodword" */
11089 int score; 11144 int score;
11090 int altscore; 11145 int altscore;
11091 int had_bonus; /* value for st_had_bonus */ 11146 int had_bonus; /* value for st_had_bonus */
11092 slang_T *slang; /* language for sound folding */ 11147 slang_T *slang; /* language for sound folding */
11093 { 11148 {
11094 int goodlen = STRLEN(goodword); 11149 int goodlen = STRLEN(goodword); /* len of goodword changed */
11150 int badlen = badlenarg; /* len of bad word changed */
11095 suggest_T *stp; 11151 suggest_T *stp;
11152 suggest_T new_sug;
11096 int i; 11153 int i;
11097 char_u *p = NULL; 11154 hlf_T attr = HLF_COUNT;
11098 int c = 0;
11099 int attr = 0;
11100 char_u longword[MAXWLEN + 1]; 11155 char_u longword[MAXWLEN + 1];
11156 char_u *pgood, *pbad;
11101 11157
11102 /* Check that the word really is valid. Esp. for banned words and for 11158 /* Check that the word really is valid. Esp. for banned words and for
11103 * split words, such as "the the". Need to append what follows to check 11159 * split words, such as "the the". Need to append what follows to check
11104 * for that. */ 11160 * for that. */
11105 STRCPY(longword, goodword); 11161 STRCPY(longword, goodword);
11106 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); 11162 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen);
11107 (void)spell_check(curwin, longword, &attr, NULL); 11163 (void)spell_check(curwin, longword, &attr, NULL);
11108 if (attr != 0) 11164 if (attr != HLF_COUNT)
11109 return; 11165 return;
11110 11166
11111 /* If past "su_badlen" and the rest is identical stop at "su_badlen". 11167 /* Minimize "badlen" for consistency. Avoids that changing "the the" to
11112 * Remove the common part from "goodword". */ 11168 * "thee the" is added next to changing the first "the" the "thee". */
11113 i = badlen - su->su_badlen; 11169 pgood = goodword + STRLEN(goodword);
11114 if (i > 0) 11170 pbad = su->su_badptr + badlen;
11115 { 11171 while (pgood > goodword && pbad > su->su_badptr)
11116 /* This assumes there was no case folding or it didn't change the 11172 {
11117 * length... */ 11173 mb_ptr_back(goodword, pgood);
11118 p = goodword + goodlen - i; 11174 mb_ptr_back(su->su_badptr, pbad);
11119 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0) 11175 #ifdef FEAT_MBYTE
11120 { 11176 if (has_mbyte)
11121 badlen = su->su_badlen; 11177 {
11122 c = *p; 11178 if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
11123 *p = NUL; 11179 break;
11124 } 11180 }
11125 else 11181 else
11126 p = NULL; 11182 #endif
11127 } 11183 if (*pgood != *pbad)
11128 else if (i < 0) 11184 break;
11129 { 11185 badlen = pbad - su->su_badptr;
11130 /* When replacing part of the word check that we actually change 11186 goodlen = pgood - goodword;
11131 * something. For "the the" a suggestion can be replacing the first 11187 }
11132 * "the" with itself, since "the" wasn't banned. */ 11188 if (badlen == 0 && goodlen == 0)
11133 if (badlen == (int)goodlen 11189 /* goodword doesn't change anything; may happen for "the the" changing
11134 && STRNCMP(su->su_badword, goodword, badlen) == 0) 11190 * the first "the" to itself. */
11135 return; 11191 return;
11136 }
11137
11138 11192
11139 if (score <= su->su_maxscore) 11193 if (score <= su->su_maxscore)
11140 { 11194 {
11141 /* Check if the word is already there. Also check the length that is 11195 /* Check if the word is already there. Also check the length that is
11142 * being replaced "thes," -> "these" is a different suggestion from 11196 * being replaced "thes," -> "these" is a different suggestion from
11143 * "thes" -> "these". */ 11197 * "thes" -> "these". */
11144 stp = &SUG(*gap, 0); 11198 stp = &SUG(*gap, 0);
11145 for (i = gap->ga_len - 1; i >= 0; --i) 11199 for (i = gap->ga_len - 1; i >= 0; --i)
11146 if (STRCMP(stp[i].st_word, goodword) == 0 11200 if (STRLEN(stp[i].st_word) == goodlen
11201 && STRNCMP(stp[i].st_word, goodword, goodlen) == 0
11147 && stp[i].st_orglen == badlen) 11202 && stp[i].st_orglen == badlen)
11148 { 11203 {
11149 /* Found it. Remember the lowest score. */ 11204 /*
11150 if (stp[i].st_score > score) 11205 * Found it. Remember the lowest score.
11151 { 11206 */
11152 stp[i].st_score = score;
11153 stp[i].st_altscore = altscore;
11154 stp[i].st_had_bonus = had_bonus;
11155 }
11156 if (stp[i].st_slang == NULL) 11207 if (stp[i].st_slang == NULL)
11157 stp[i].st_slang = slang; 11208 stp[i].st_slang = slang;
11209
11210 new_sug.st_score = score;
11211 new_sug.st_altscore = altscore;
11212 new_sug.st_had_bonus = had_bonus;
11213
11214 if (stp[i].st_had_bonus != had_bonus)
11215 {
11216 /* Only one of the two had the soundalike score computed.
11217 * Need to do that for the other one now, otherwise the
11218 * scores can't be compared. This happens because
11219 * suggest_try_change() doesn't compute the soundalike
11220 * word to keep it fast. */
11221 if (had_bonus)
11222 rescore_one(su, &stp[i]);
11223 else
11224 {
11225 new_sug.st_word = goodword;
11226 new_sug.st_slang = stp[i].st_slang;
11227 new_sug.st_orglen = badlen;
11228 rescore_one(su, &new_sug);
11229 }
11230 }
11231
11232 if (stp[i].st_score > new_sug.st_score)
11233 {
11234 stp[i].st_score = new_sug.st_score;
11235 stp[i].st_altscore = new_sug.st_altscore;
11236 stp[i].st_had_bonus = new_sug.st_had_bonus;
11237 }
11158 break; 11238 break;
11159 } 11239 }
11160 11240
11161 if (i < 0 && ga_grow(gap, 1) == OK) 11241 if (i < 0 && ga_grow(gap, 1) == OK)
11162 { 11242 {
11163 /* Add a suggestion. */ 11243 /* Add a suggestion. */
11164 stp = &SUG(*gap, gap->ga_len); 11244 stp = &SUG(*gap, gap->ga_len);
11165 stp->st_word = vim_strsave(goodword); 11245 stp->st_word = vim_strnsave(goodword, goodlen);
11166 if (stp->st_word != NULL) 11246 if (stp->st_word != NULL)
11167 { 11247 {
11168 stp->st_score = score; 11248 stp->st_score = score;
11169 stp->st_altscore = altscore; 11249 stp->st_altscore = altscore;
11170 stp->st_had_bonus = had_bonus; 11250 stp->st_had_bonus = had_bonus;
11178 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, 11258 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore,
11179 SUG_CLEAN_COUNT(su)); 11259 SUG_CLEAN_COUNT(su));
11180 } 11260 }
11181 } 11261 }
11182 } 11262 }
11183
11184 if (p != NULL)
11185 *p = c; /* restore "goodword" */
11186 } 11263 }
11187 11264
11188 /* 11265 /*
11189 * Add a word to be banned. 11266 * Add a word to be banned.
11190 */ 11267 */
11242 } 11319 }
11243 hash_clear(&su->su_banned); 11320 hash_clear(&su->su_banned);
11244 } 11321 }
11245 11322
11246 /* 11323 /*
11247 * Recompute the score if sound-folding is possible. This is slow, 11324 * Recompute the score for all suggestions if sound-folding is possible. This
11248 * thus only done for the final results. 11325 * is slow, thus only done for the final results.
11249 */ 11326 */
11250 static void 11327 static void
11251 rescore_suggestions(su) 11328 rescore_suggestions(su)
11252 suginfo_T *su; 11329 suginfo_T *su;
11253 { 11330 {
11254 langp_T *lp; 11331 int i;
11255 suggest_T *stp; 11332
11333 if (su->su_sallang != NULL)
11334 for (i = 0; i < su->su_ga.ga_len; ++i)
11335 rescore_one(su, &SUG(su->su_ga, i));
11336 }
11337
11338 /*
11339 * Recompute the score for one suggestion if sound-folding is possible.
11340 */
11341 static void
11342 rescore_one(su, stp)
11343 suginfo_T *su;
11344 suggest_T *stp;
11345 {
11346 slang_T *slang = stp->st_slang;
11256 char_u sal_badword[MAXWLEN]; 11347 char_u sal_badword[MAXWLEN];
11257 char_u sal_badword2[MAXWLEN]; 11348
11258 int i; 11349 /* Only rescore suggestions that have no sal score yet and do have a
11259 int lpi; 11350 * language. */
11260 slang_T *slang_first = NULL; 11351 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
11261 slang_T *slang; 11352 {
11262 11353 if (slang == su->su_sallang)
11263 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) 11354 stp->st_altscore = stp_sal_score(stp, su,
11264 { 11355 slang, su->su_sal_badword);
11265 lp = LANGP_ENTRY(curbuf->b_langp, lpi); 11356 else
11266 if (lp->lp_slang->sl_sal.ga_len > 0) 11357 {
11267 { 11358 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
11268 /* soundfold the bad word */ 11359 stp->st_altscore = stp_sal_score(stp, su, slang, sal_badword);
11269 slang_first = lp->lp_slang; 11360 }
11270 spell_soundfold(slang_first, su->su_fbadword, TRUE, sal_badword); 11361 if (stp->st_altscore == SCORE_MAXMAX)
11271 break; 11362 stp->st_altscore = SCORE_BIG;
11272 } 11363 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
11273 } 11364 stp->st_had_bonus = TRUE;
11274
11275 if (slang_first != NULL)
11276 {
11277 for (i = 0; i < su->su_ga.ga_len; ++i)
11278 {
11279 /* Only rescore suggestions that have no sal score yet and do have
11280 * a language. */
11281 stp = &SUG(su->su_ga, i);
11282 if (!stp->st_had_bonus && stp->st_slang != NULL)
11283 {
11284 slang = stp->st_slang;
11285 if (slang->sl_sal.ga_len > 0)
11286 {
11287 if (slang == slang_first)
11288 stp->st_altscore = stp_sal_score(stp, su,
11289 slang, sal_badword);
11290 else
11291 {
11292 spell_soundfold(slang, su->su_fbadword,
11293 TRUE, sal_badword2);
11294 stp->st_altscore = stp_sal_score(stp, su,
11295 slang, sal_badword2);
11296 }
11297 if (stp->st_altscore == SCORE_MAXMAX)
11298 stp->st_altscore = SCORE_BIG;
11299 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
11300 }
11301 }
11302 }
11303 } 11365 }
11304 } 11366 }
11305 11367
11306 static int 11368 static int
11307 #ifdef __BORLANDC__ 11369 #ifdef __BORLANDC__