Mercurial > vim
comparison src/spell.c @ 534:c6296b0ad9ea v7.0151
updated for version 7.0151
author | vimboss |
---|---|
date | Thu, 29 Sep 2005 18:26:07 +0000 |
parents | 7052f11a3dc9 |
children | 792d1b60e546 |
comparison
equal
deleted
inserted
replaced
533:c8b6b7e1005d | 534:c6296b0ad9ea |
---|---|
475 char_u *su_badptr; /* start of bad word in line */ | 475 char_u *su_badptr; /* start of bad word in line */ |
476 int su_badlen; /* length of detected bad word in line */ | 476 int su_badlen; /* length of detected bad word in line */ |
477 int su_badflags; /* caps flags for bad word */ | 477 int su_badflags; /* caps flags for bad word */ |
478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ | 478 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ |
479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ | 479 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ |
480 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ | |
481 slang_T *su_slang_first; /* slang_T used for su_sal_badword */ | |
480 hashtab_T su_banned; /* table with banned words */ | 482 hashtab_T su_banned; /* table with banned words */ |
481 slang_T *su_sallang; /* default language for sound folding */ | 483 slang_T *su_sallang; /* default language for sound folding */ |
482 } suginfo_T; | 484 } suginfo_T; |
483 | 485 |
484 /* One word suggestion. Used in "si_ga". */ | 486 /* One word suggestion. Used in "si_ga". */ |
747 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); | 749 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang)); |
748 static void add_banned __ARGS((suginfo_T *su, char_u *word)); | 750 static void add_banned __ARGS((suginfo_T *su, char_u *word)); |
749 static int was_banned __ARGS((suginfo_T *su, char_u *word)); | 751 static int was_banned __ARGS((suginfo_T *su, char_u *word)); |
750 static void free_banned __ARGS((suginfo_T *su)); | 752 static void free_banned __ARGS((suginfo_T *su)); |
751 static void rescore_suggestions __ARGS((suginfo_T *su)); | 753 static void rescore_suggestions __ARGS((suginfo_T *su)); |
754 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); | |
752 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); | 755 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); |
753 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); | 756 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); |
754 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); | 757 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); |
755 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); | 758 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); |
756 #ifdef FEAT_MBYTE | 759 #ifdef FEAT_MBYTE |
813 static char *msg_compressing = N_("Compressing word tree..."); | 816 static char *msg_compressing = N_("Compressing word tree..."); |
814 | 817 |
815 /* | 818 /* |
816 * Main spell-checking function. | 819 * Main spell-checking function. |
817 * "ptr" points to a character that could be the start of a word. | 820 * "ptr" points to a character that could be the start of a word. |
818 * "*attrp" is set to the attributes for a badly spelled word. For a non-word | 821 * "*attrp" is set to the highlight index for a badly spelled word. For a |
819 * or when it's OK it remains unchanged. | 822 * non-word or when it's OK it remains unchanged. |
820 * This must only be called when 'spelllang' is not empty. | 823 * This must only be called when 'spelllang' is not empty. |
821 * | 824 * |
822 * "capcol" is used to check for a Capitalised word after the end of a | 825 * "capcol" is used to check for a Capitalised word after the end of a |
823 * sentence. If it's zero then perform the check. Return the column where to | 826 * sentence. If it's zero then perform the check. Return the column where to |
824 * check next, or -1 when no sentence end was found. If it's NULL then don't | 827 * check next, or -1 when no sentence end was found. If it's NULL then don't |
829 */ | 832 */ |
830 int | 833 int |
831 spell_check(wp, ptr, attrp, capcol) | 834 spell_check(wp, ptr, attrp, capcol) |
832 win_T *wp; /* current window */ | 835 win_T *wp; /* current window */ |
833 char_u *ptr; | 836 char_u *ptr; |
834 int *attrp; | 837 hlf_T *attrp; |
835 int *capcol; /* column to check for Capital */ | 838 int *capcol; /* column to check for Capital */ |
836 { | 839 { |
837 matchinf_T mi; /* Most things are put in "mi" so that it can | 840 matchinf_T mi; /* Most things are put in "mi" so that it can |
838 be passed to functions quickly. */ | 841 be passed to functions quickly. */ |
839 int nrlen = 0; /* found a number first */ | 842 int nrlen = 0; /* found a number first */ |
1006 mi.mi_result = save_result; | 1009 mi.mi_result = save_result; |
1007 } | 1010 } |
1008 } | 1011 } |
1009 | 1012 |
1010 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) | 1013 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) |
1011 *attrp = highlight_attr[HLF_SPB]; | 1014 *attrp = HLF_SPB; |
1012 else if (mi.mi_result == SP_RARE) | 1015 else if (mi.mi_result == SP_RARE) |
1013 *attrp = highlight_attr[HLF_SPR]; | 1016 *attrp = HLF_SPR; |
1014 else | 1017 else |
1015 *attrp = highlight_attr[HLF_SPL]; | 1018 *attrp = HLF_SPL; |
1016 } | 1019 } |
1017 | 1020 |
1018 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) | 1021 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) |
1019 { | 1022 { |
1020 /* Report SpellCap only when the word isn't badly spelled. */ | 1023 /* Report SpellCap only when the word isn't badly spelled. */ |
1021 *attrp = highlight_attr[HLF_SPC]; | 1024 *attrp = HLF_SPC; |
1022 return wrongcaplen; | 1025 return wrongcaplen; |
1023 } | 1026 } |
1024 | 1027 |
1025 return (int)(mi.mi_end - ptr); | 1028 return (int)(mi.mi_end - ptr); |
1026 } | 1029 } |
1820 spell_move_to(wp, dir, allwords, curline, attrp) | 1823 spell_move_to(wp, dir, allwords, curline, attrp) |
1821 win_T *wp; | 1824 win_T *wp; |
1822 int dir; /* FORWARD or BACKWARD */ | 1825 int dir; /* FORWARD or BACKWARD */ |
1823 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ | 1826 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ |
1824 int curline; | 1827 int curline; |
1825 int *attrp; /* return: attributes of bad word or NULL */ | 1828 hlf_T *attrp; /* return: attributes of bad word or NULL |
1829 (only when "dir" is FORWARD) */ | |
1826 { | 1830 { |
1827 linenr_T lnum; | 1831 linenr_T lnum; |
1828 pos_T found_pos; | 1832 pos_T found_pos; |
1829 int found_len = 0; | 1833 int found_len = 0; |
1830 char_u *line; | 1834 char_u *line; |
1831 char_u *p; | 1835 char_u *p; |
1832 char_u *endp; | 1836 char_u *endp; |
1833 int attr; | 1837 hlf_T attr; |
1834 int len; | 1838 int len; |
1835 int has_syntax = syntax_present(wp->w_buffer); | 1839 int has_syntax = syntax_present(wp->w_buffer); |
1836 int col; | 1840 int col; |
1837 int can_spell; | 1841 int can_spell; |
1838 char_u *buf = NULL; | 1842 char_u *buf = NULL; |
1898 && !wrapped | 1902 && !wrapped |
1899 && (colnr_T)(p - buf) >= wp->w_cursor.col) | 1903 && (colnr_T)(p - buf) >= wp->w_cursor.col) |
1900 break; | 1904 break; |
1901 | 1905 |
1902 /* start of word */ | 1906 /* start of word */ |
1903 attr = 0; | 1907 attr = HLF_COUNT; |
1904 len = spell_check(wp, p, &attr, &capcol); | 1908 len = spell_check(wp, p, &attr, &capcol); |
1905 | 1909 |
1906 if (attr != 0) | 1910 if (attr != HLF_COUNT) |
1907 { | 1911 { |
1908 /* We found a bad word. Check the attribute. */ | 1912 /* We found a bad word. Check the attribute. */ |
1909 if (allwords || attr == highlight_attr[HLF_SPB]) | 1913 if (allwords || attr == HLF_SPB) |
1910 { | 1914 { |
1911 found_one = TRUE; | 1915 found_one = TRUE; |
1912 | 1916 |
1913 /* When searching forward only accept a bad word after | 1917 /* When searching forward only accept a bad word after |
1914 * the cursor. */ | 1918 * the cursor. */ |
2015 if (lnum == wp->w_cursor.lnum && !found_one) | 2019 if (lnum == wp->w_cursor.lnum && !found_one) |
2016 break; | 2020 break; |
2017 | 2021 |
2018 /* Skip the characters at the start of the next line that were | 2022 /* Skip the characters at the start of the next line that were |
2019 * included in a match crossing line boundaries. */ | 2023 * included in a match crossing line boundaries. */ |
2020 if (attr == 0) | 2024 if (attr == HLF_COUNT) |
2021 skip = p - endp; | 2025 skip = p - endp; |
2022 else | 2026 else |
2023 skip = 0; | 2027 skip = 0; |
2024 | 2028 |
2025 /* Capscol skips over the inserted space. */ | 2029 /* Capscol skips over the inserted space. */ |
5096 * slashes. */ | 5100 * slashes. */ |
5097 ga_concat(&spin->si_map, items[1]); | 5101 ga_concat(&spin->si_map, items[1]); |
5098 ga_append(&spin->si_map, '/'); | 5102 ga_append(&spin->si_map, '/'); |
5099 } | 5103 } |
5100 } | 5104 } |
5101 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3) | 5105 /* Accept "SAL from to" and "SAL from to # comment". */ |
5106 else if (STRCMP(items[0], "SAL") == 0 | |
5107 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#'))) | |
5102 { | 5108 { |
5103 if (do_sal) | 5109 if (do_sal) |
5104 { | 5110 { |
5105 /* SAL item (sounds-a-like) | 5111 /* SAL item (sounds-a-like) |
5106 * Either one of the known keys or a from-to pair. */ | 5112 * Either one of the known keys or a from-to pair. */ |
8767 suginfo_T *su; | 8773 suginfo_T *su; |
8768 int maxcount; | 8774 int maxcount; |
8769 int banbadword; /* don't include badword in suggestions */ | 8775 int banbadword; /* don't include badword in suggestions */ |
8770 int need_cap; /* word should start with capital */ | 8776 int need_cap; /* word should start with capital */ |
8771 { | 8777 { |
8772 int attr = 0; | 8778 hlf_T attr = HLF_COUNT; |
8773 char_u buf[MAXPATHL]; | 8779 char_u buf[MAXPATHL]; |
8774 char_u *p; | 8780 char_u *p; |
8775 int do_combine = FALSE; | 8781 int do_combine = FALSE; |
8776 char_u *sps_copy; | 8782 char_u *sps_copy; |
8777 #ifdef FEAT_EVAL | 8783 #ifdef FEAT_EVAL |
8819 su->su_sallang = lp->lp_sallang; | 8825 su->su_sallang = lp->lp_sallang; |
8820 break; | 8826 break; |
8821 } | 8827 } |
8822 } | 8828 } |
8823 | 8829 |
8830 /* Soundfold the bad word with the default sound folding, so that we don't | |
8831 * have to do this many times. */ | |
8832 if (su->su_sallang != NULL) | |
8833 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, | |
8834 su->su_sal_badword); | |
8835 | |
8824 /* If the word is not capitalised and spell_check() doesn't consider the | 8836 /* If the word is not capitalised and spell_check() doesn't consider the |
8825 * word to be bad then it might need to be capitalised. Add a suggestion | 8837 * word to be bad then it might need to be capitalised. Add a suggestion |
8826 * for that. */ | 8838 * for that. */ |
8827 c = PTR2CHAR(su->su_badptr); | 8839 c = PTR2CHAR(su->su_badptr); |
8828 if (!SPELL_ISUPPER(c) && attr == 0) | 8840 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) |
8829 { | 8841 { |
8830 make_case_word(su->su_badword, buf, WF_ONECAP); | 8842 make_case_word(su->su_badword, buf, WF_ONECAP); |
8831 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, | 8843 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, |
8832 0, TRUE, su->su_sallang); | 8844 0, TRUE, su->su_sallang); |
8833 } | 8845 } |
9171 * use that for the goodword too: "The the" -> "The". */ | 9183 * use that for the goodword too: "The the" -> "The". */ |
9172 c = su->su_fbadword[len]; | 9184 c = su->su_fbadword[len]; |
9173 su->su_fbadword[len] = NUL; | 9185 su->su_fbadword[len] = NUL; |
9174 make_case_word(su->su_fbadword, word, su->su_badflags); | 9186 make_case_word(su->su_fbadword, word, su->su_badflags); |
9175 su->su_fbadword[len] = c; | 9187 su->su_fbadword[len] = c; |
9176 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, | 9188 |
9177 0, TRUE, su->su_sallang); | 9189 /* Give a soundalike score of 0, compute the score as if deleting one |
9190 * character. */ | |
9191 add_suggestion(su, &su->su_ga, word, su->su_badlen, | |
9192 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang); | |
9178 } | 9193 } |
9179 } | 9194 } |
9180 | 9195 |
9181 /* | 9196 /* |
9182 * Try finding suggestions by adding/removing/swapping letters. | 9197 * Try finding suggestions by adding/removing/swapping letters. |
9224 int fl = 0, tl; | 9239 int fl = 0, tl; |
9225 int repextra = 0; /* extra bytes in fword[] from REP item */ | 9240 int repextra = 0; /* extra bytes in fword[] from REP item */ |
9226 slang_T *slang; | 9241 slang_T *slang; |
9227 int fword_ends; | 9242 int fword_ends; |
9228 int lpi; | 9243 int lpi; |
9244 int maysplit; | |
9245 int goodword_ends; | |
9229 | 9246 |
9230 /* We make a copy of the case-folded bad word, so that we can modify it | 9247 /* We make a copy of the case-folded bad word, so that we can modify it |
9231 * to find matches (esp. REP items). Append some more text, changing | 9248 * to find matches (esp. REP items). Append some more text, changing |
9232 * chars after the bad word may help. */ | 9249 * chars after the bad word may help. */ |
9233 STRCPY(fword, su->su_fbadword); | 9250 STRCPY(fword, su->su_fbadword); |
9399 * when arriving at a compound flag. */ | 9416 * when arriving at a compound flag. */ |
9400 sp->ts_flags |= TSF_PREFIXOK; | 9417 sp->ts_flags |= TSF_PREFIXOK; |
9401 } | 9418 } |
9402 } | 9419 } |
9403 | 9420 |
9404 /* Check NEEDCOMPOUND: can't use word without compounding. */ | 9421 /* Check NEEDCOMPOUND: can't use word without compounding. Do |
9422 * try appending another compound word below. */ | |
9405 if (sp->ts_complen == sp->ts_compsplit && fword_ends | 9423 if (sp->ts_complen == sp->ts_compsplit && fword_ends |
9406 && (flags & WF_NEEDCOMP)) | 9424 && (flags & WF_NEEDCOMP)) |
9407 break; | 9425 goodword_ends = FALSE; |
9426 else | |
9427 goodword_ends = TRUE; | |
9408 | 9428 |
9409 if (sp->ts_complen > sp->ts_compsplit) | 9429 if (sp->ts_complen > sp->ts_compsplit) |
9410 { | 9430 { |
9411 if (slang->sl_nobreak) | 9431 if (slang->sl_nobreak) |
9412 { | 9432 { |
9506 if (flags & WF_BANNED) | 9526 if (flags & WF_BANNED) |
9507 { | 9527 { |
9508 add_banned(su, preword + sp->ts_prewordlen); | 9528 add_banned(su, preword + sp->ts_prewordlen); |
9509 break; | 9529 break; |
9510 } | 9530 } |
9511 if (was_banned(su, preword + sp->ts_prewordlen) | 9531 if ((sp->ts_complen == sp->ts_compsplit |
9532 && was_banned(su, preword + sp->ts_prewordlen)) | |
9512 || was_banned(su, preword)) | 9533 || was_banned(su, preword)) |
9513 break; | 9534 { |
9535 if (slang->sl_compprog == NULL) | |
9536 break; | |
9537 /* the word so far was banned but we may try compounding */ | |
9538 goodword_ends = FALSE; | |
9539 } | |
9514 | 9540 |
9515 newscore = 0; | 9541 newscore = 0; |
9516 if ((flags & WF_REGION) | 9542 if ((flags & WF_REGION) |
9517 && (((unsigned)flags >> 16) & lp->lp_region) == 0) | 9543 && (((unsigned)flags >> 16) & lp->lp_region) == 0) |
9518 newscore += SCORE_REGION; | 9544 newscore += SCORE_REGION; |
9521 | 9547 |
9522 if (!spell_valid_case(su->su_badflags, | 9548 if (!spell_valid_case(su->su_badflags, |
9523 captype(preword + sp->ts_prewordlen, NULL))) | 9549 captype(preword + sp->ts_prewordlen, NULL))) |
9524 newscore += SCORE_ICASE; | 9550 newscore += SCORE_ICASE; |
9525 | 9551 |
9526 if (fword_ends && sp->ts_fidx >= sp->ts_fidxtry) | 9552 maysplit = TRUE; |
9553 if (fword_ends && goodword_ends | |
9554 && sp->ts_fidx >= sp->ts_fidxtry) | |
9527 { | 9555 { |
9528 /* The badword also ends: add suggestions. Give a penalty | 9556 /* The badword also ends: add suggestions. Give a penalty |
9529 * when changing non-word char to word char, e.g., "thes," | 9557 * when changing non-word char to word char, e.g., "thes," |
9530 * -> "these". */ | 9558 * -> "these". */ |
9531 p = fword + sp->ts_fidx; | 9559 p = fword + sp->ts_fidx; |
9547 if (spell_iswordp(p, curbuf)) | 9575 if (spell_iswordp(p, curbuf)) |
9548 newscore += SCORE_NONWORD; | 9576 newscore += SCORE_NONWORD; |
9549 } | 9577 } |
9550 | 9578 |
9551 add_suggestion(su, &su->su_ga, preword, | 9579 add_suggestion(su, &su->su_ga, preword, |
9552 sp->ts_fidx - repextra, | 9580 sp->ts_fidx - repextra, |
9553 sp->ts_score + newscore, 0, FALSE, | 9581 sp->ts_score + newscore, 0, FALSE, |
9554 lp->lp_sallang); | 9582 lp->lp_sallang); |
9583 | |
9584 /* When the bad word doesn't end yet, try changing the | |
9585 * next word. E.g., find suggestions for "the the" where | |
9586 * the second "the" is different. It's done like a split. | |
9587 */ | |
9588 if (sp->ts_fidx - repextra >= su->su_badlen) | |
9589 maysplit = FALSE; | |
9555 } | 9590 } |
9556 else if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) | 9591 |
9592 if (maysplit | |
9593 && (sp->ts_fidx >= sp->ts_fidxtry || fword_ends) | |
9557 #ifdef FEAT_MBYTE | 9594 #ifdef FEAT_MBYTE |
9558 /* Don't split halfway a character. */ | 9595 /* Don't split halfway a character. */ |
9559 && (!has_mbyte || sp->ts_tcharlen == 0) | 9596 && (!has_mbyte || sp->ts_tcharlen == 0) |
9560 #endif | 9597 #endif |
9561 ) | 9598 ) |
9572 * 2. The badword does end, but it was due to a change | 9609 * 2. The badword does end, but it was due to a change |
9573 * (e.g., a swap). No need to split, but do check that | 9610 * (e.g., a swap). No need to split, but do check that |
9574 * the following word is valid. | 9611 * the following word is valid. |
9575 */ | 9612 */ |
9576 try_compound = FALSE; | 9613 try_compound = FALSE; |
9577 if (!fword_ends | 9614 if ((!fword_ends || !goodword_ends) |
9578 && slang->sl_compprog != NULL | 9615 && slang->sl_compprog != NULL |
9579 && ((unsigned)flags >> 24) != 0 | 9616 && ((unsigned)flags >> 24) != 0 |
9580 && sp->ts_twordlen - sp->ts_splitoff | 9617 && sp->ts_twordlen - sp->ts_splitoff |
9581 >= slang->sl_compminlen | 9618 >= slang->sl_compminlen |
9582 #ifdef FEAT_MBYTE | 9619 #ifdef FEAT_MBYTE |
9616 compflags[sp->ts_complen] = NUL; | 9653 compflags[sp->ts_complen] = NUL; |
9617 } | 9654 } |
9618 else | 9655 else |
9619 sp->ts_flags &= ~TSF_DIDSPLIT; | 9656 sp->ts_flags &= ~TSF_DIDSPLIT; |
9620 | 9657 |
9621 if (!try_compound && !fword_ends) | 9658 if (!try_compound && (!fword_ends || !goodword_ends)) |
9622 { | 9659 { |
9623 /* If we're going to split need to check that the | 9660 /* If we're going to split need to check that the |
9624 * words so far are valid for compounding. If there | 9661 * words so far are valid for compounding. If there |
9625 * is only one word it must not have the NEEDCOMPOUND | 9662 * is only one word it must not have the NEEDCOMPOUND |
9626 * flag. */ | 9663 * flag. */ |
9654 sp->ts_splitfidx = sp->ts_fidx; | 9691 sp->ts_splitfidx = sp->ts_fidx; |
9655 | 9692 |
9656 /* If the badword has a non-word character at this | 9693 /* If the badword has a non-word character at this |
9657 * position skip it. That means replacing the | 9694 * position skip it. That means replacing the |
9658 * non-word character with a space. Always skip a | 9695 * non-word character with a space. Always skip a |
9659 * character when the word ends. */ | 9696 * character when the word ends. But only when the |
9660 if ((!try_compound | 9697 * good word can end. */ |
9661 && !spell_iswordp_nmw(fword + sp->ts_fidx)) | 9698 if (((!try_compound |
9699 && !spell_iswordp_nmw(fword + sp->ts_fidx)) | |
9662 || fword_ends) | 9700 || fword_ends) |
9701 && goodword_ends) | |
9663 { | 9702 { |
9664 int l; | 9703 int l; |
9665 | 9704 |
9666 #ifdef FEAT_MBYTE | 9705 #ifdef FEAT_MBYTE |
9667 if (has_mbyte) | 9706 if (has_mbyte) |
10724 suginfo_T *su; | 10763 suginfo_T *su; |
10725 slang_T *slang; | 10764 slang_T *slang; |
10726 char_u *badsound; /* sound-folded badword */ | 10765 char_u *badsound; /* sound-folded badword */ |
10727 { | 10766 { |
10728 char_u *p; | 10767 char_u *p; |
10768 char_u *pbad; | |
10769 char_u *pgood; | |
10729 char_u badsound2[MAXWLEN]; | 10770 char_u badsound2[MAXWLEN]; |
10730 char_u fword[MAXWLEN]; | 10771 char_u fword[MAXWLEN]; |
10731 char_u goodsound[MAXWLEN]; | 10772 char_u goodsound[MAXWLEN]; |
10732 | 10773 char_u goodword[MAXWLEN]; |
10733 if (stp->st_orglen <= su->su_badlen) | 10774 int lendiff; |
10734 p = badsound; | 10775 |
10776 lendiff = (int)(su->su_badlen - stp->st_orglen); | |
10777 if (lendiff >= 0) | |
10778 pbad = badsound; | |
10735 else | 10779 else |
10736 { | 10780 { |
10737 /* soundfold the bad word with more characters following */ | 10781 /* soundfold the bad word with more characters following */ |
10738 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); | 10782 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); |
10739 | 10783 |
10745 && *skiptowhite(stp->st_word) == NUL) | 10789 && *skiptowhite(stp->st_word) == NUL) |
10746 for (p = fword; *(p = skiptowhite(p)) != NUL; ) | 10790 for (p = fword; *(p = skiptowhite(p)) != NUL; ) |
10747 mch_memmove(p, p + 1, STRLEN(p)); | 10791 mch_memmove(p, p + 1, STRLEN(p)); |
10748 | 10792 |
10749 spell_soundfold(slang, fword, TRUE, badsound2); | 10793 spell_soundfold(slang, fword, TRUE, badsound2); |
10750 p = badsound2; | 10794 pbad = badsound2; |
10751 } | 10795 } |
10796 | |
10797 if (lendiff > 0) | |
10798 { | |
10799 /* Add part of the bad word to the good word, so that we soundfold | |
10800 * what replaces the bad word. */ | |
10801 STRCPY(goodword, stp->st_word); | |
10802 STRNCAT(goodword, su->su_badptr + su->su_badlen - lendiff, lendiff); | |
10803 pgood = goodword; | |
10804 } | |
10805 else | |
10806 pgood = stp->st_word; | |
10752 | 10807 |
10753 /* Sound-fold the word and compute the score for the difference. */ | 10808 /* Sound-fold the word and compute the score for the difference. */ |
10754 spell_soundfold(slang, stp->st_word, FALSE, goodsound); | 10809 spell_soundfold(slang, pgood, FALSE, goodsound); |
10755 | 10810 |
10756 return soundalike_score(goodsound, p); | 10811 return soundalike_score(goodsound, pbad); |
10757 } | 10812 } |
10758 | 10813 |
10759 /* | 10814 /* |
10760 * Find suggestions by comparing the word in a sound-a-like form. | 10815 * Find suggestions by comparing the word in a sound-a-like form. |
10761 * Note: This doesn't support postponed prefixes. | 10816 * Note: This doesn't support postponed prefixes. |
11079 * Do not add a duplicate suggestion or suggestions with a bad score. | 11134 * Do not add a duplicate suggestion or suggestions with a bad score. |
11080 * When "use_score" is not zero it's used, otherwise the score is computed | 11135 * When "use_score" is not zero it's used, otherwise the score is computed |
11081 * with spell_edit_score(). | 11136 * with spell_edit_score(). |
11082 */ | 11137 */ |
11083 static void | 11138 static void |
11084 add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus, slang) | 11139 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, slang) |
11085 suginfo_T *su; | 11140 suginfo_T *su; |
11086 garray_T *gap; | 11141 garray_T *gap; |
11087 char_u *goodword; | 11142 char_u *goodword; |
11088 int badlen; /* length of bad word used */ | 11143 int badlenarg; /* len of bad word replaced with "goodword" */ |
11089 int score; | 11144 int score; |
11090 int altscore; | 11145 int altscore; |
11091 int had_bonus; /* value for st_had_bonus */ | 11146 int had_bonus; /* value for st_had_bonus */ |
11092 slang_T *slang; /* language for sound folding */ | 11147 slang_T *slang; /* language for sound folding */ |
11093 { | 11148 { |
11094 int goodlen = STRLEN(goodword); | 11149 int goodlen = STRLEN(goodword); /* len of goodword changed */ |
11150 int badlen = badlenarg; /* len of bad word changed */ | |
11095 suggest_T *stp; | 11151 suggest_T *stp; |
11152 suggest_T new_sug; | |
11096 int i; | 11153 int i; |
11097 char_u *p = NULL; | 11154 hlf_T attr = HLF_COUNT; |
11098 int c = 0; | |
11099 int attr = 0; | |
11100 char_u longword[MAXWLEN + 1]; | 11155 char_u longword[MAXWLEN + 1]; |
11156 char_u *pgood, *pbad; | |
11101 | 11157 |
11102 /* Check that the word really is valid. Esp. for banned words and for | 11158 /* Check that the word really is valid. Esp. for banned words and for |
11103 * split words, such as "the the". Need to append what follows to check | 11159 * split words, such as "the the". Need to append what follows to check |
11104 * for that. */ | 11160 * for that. */ |
11105 STRCPY(longword, goodword); | 11161 STRCPY(longword, goodword); |
11106 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); | 11162 vim_strncpy(longword + goodlen, su->su_badptr + badlen, MAXWLEN - goodlen); |
11107 (void)spell_check(curwin, longword, &attr, NULL); | 11163 (void)spell_check(curwin, longword, &attr, NULL); |
11108 if (attr != 0) | 11164 if (attr != HLF_COUNT) |
11109 return; | 11165 return; |
11110 | 11166 |
11111 /* If past "su_badlen" and the rest is identical stop at "su_badlen". | 11167 /* Minimize "badlen" for consistency. Avoids that changing "the the" to |
11112 * Remove the common part from "goodword". */ | 11168 * "thee the" is added next to changing the first "the" the "thee". */ |
11113 i = badlen - su->su_badlen; | 11169 pgood = goodword + STRLEN(goodword); |
11114 if (i > 0) | 11170 pbad = su->su_badptr + badlen; |
11115 { | 11171 while (pgood > goodword && pbad > su->su_badptr) |
11116 /* This assumes there was no case folding or it didn't change the | 11172 { |
11117 * length... */ | 11173 mb_ptr_back(goodword, pgood); |
11118 p = goodword + goodlen - i; | 11174 mb_ptr_back(su->su_badptr, pbad); |
11119 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0) | 11175 #ifdef FEAT_MBYTE |
11120 { | 11176 if (has_mbyte) |
11121 badlen = su->su_badlen; | 11177 { |
11122 c = *p; | 11178 if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) |
11123 *p = NUL; | 11179 break; |
11124 } | 11180 } |
11125 else | 11181 else |
11126 p = NULL; | 11182 #endif |
11127 } | 11183 if (*pgood != *pbad) |
11128 else if (i < 0) | 11184 break; |
11129 { | 11185 badlen = pbad - su->su_badptr; |
11130 /* When replacing part of the word check that we actually change | 11186 goodlen = pgood - goodword; |
11131 * something. For "the the" a suggestion can be replacing the first | 11187 } |
11132 * "the" with itself, since "the" wasn't banned. */ | 11188 if (badlen == 0 && goodlen == 0) |
11133 if (badlen == (int)goodlen | 11189 /* goodword doesn't change anything; may happen for "the the" changing |
11134 && STRNCMP(su->su_badword, goodword, badlen) == 0) | 11190 * the first "the" to itself. */ |
11135 return; | 11191 return; |
11136 } | |
11137 | |
11138 | 11192 |
11139 if (score <= su->su_maxscore) | 11193 if (score <= su->su_maxscore) |
11140 { | 11194 { |
11141 /* Check if the word is already there. Also check the length that is | 11195 /* Check if the word is already there. Also check the length that is |
11142 * being replaced "thes," -> "these" is a different suggestion from | 11196 * being replaced "thes," -> "these" is a different suggestion from |
11143 * "thes" -> "these". */ | 11197 * "thes" -> "these". */ |
11144 stp = &SUG(*gap, 0); | 11198 stp = &SUG(*gap, 0); |
11145 for (i = gap->ga_len - 1; i >= 0; --i) | 11199 for (i = gap->ga_len - 1; i >= 0; --i) |
11146 if (STRCMP(stp[i].st_word, goodword) == 0 | 11200 if (STRLEN(stp[i].st_word) == goodlen |
11201 && STRNCMP(stp[i].st_word, goodword, goodlen) == 0 | |
11147 && stp[i].st_orglen == badlen) | 11202 && stp[i].st_orglen == badlen) |
11148 { | 11203 { |
11149 /* Found it. Remember the lowest score. */ | 11204 /* |
11150 if (stp[i].st_score > score) | 11205 * Found it. Remember the lowest score. |
11151 { | 11206 */ |
11152 stp[i].st_score = score; | |
11153 stp[i].st_altscore = altscore; | |
11154 stp[i].st_had_bonus = had_bonus; | |
11155 } | |
11156 if (stp[i].st_slang == NULL) | 11207 if (stp[i].st_slang == NULL) |
11157 stp[i].st_slang = slang; | 11208 stp[i].st_slang = slang; |
11209 | |
11210 new_sug.st_score = score; | |
11211 new_sug.st_altscore = altscore; | |
11212 new_sug.st_had_bonus = had_bonus; | |
11213 | |
11214 if (stp[i].st_had_bonus != had_bonus) | |
11215 { | |
11216 /* Only one of the two had the soundalike score computed. | |
11217 * Need to do that for the other one now, otherwise the | |
11218 * scores can't be compared. This happens because | |
11219 * suggest_try_change() doesn't compute the soundalike | |
11220 * word to keep it fast. */ | |
11221 if (had_bonus) | |
11222 rescore_one(su, &stp[i]); | |
11223 else | |
11224 { | |
11225 new_sug.st_word = goodword; | |
11226 new_sug.st_slang = stp[i].st_slang; | |
11227 new_sug.st_orglen = badlen; | |
11228 rescore_one(su, &new_sug); | |
11229 } | |
11230 } | |
11231 | |
11232 if (stp[i].st_score > new_sug.st_score) | |
11233 { | |
11234 stp[i].st_score = new_sug.st_score; | |
11235 stp[i].st_altscore = new_sug.st_altscore; | |
11236 stp[i].st_had_bonus = new_sug.st_had_bonus; | |
11237 } | |
11158 break; | 11238 break; |
11159 } | 11239 } |
11160 | 11240 |
11161 if (i < 0 && ga_grow(gap, 1) == OK) | 11241 if (i < 0 && ga_grow(gap, 1) == OK) |
11162 { | 11242 { |
11163 /* Add a suggestion. */ | 11243 /* Add a suggestion. */ |
11164 stp = &SUG(*gap, gap->ga_len); | 11244 stp = &SUG(*gap, gap->ga_len); |
11165 stp->st_word = vim_strsave(goodword); | 11245 stp->st_word = vim_strnsave(goodword, goodlen); |
11166 if (stp->st_word != NULL) | 11246 if (stp->st_word != NULL) |
11167 { | 11247 { |
11168 stp->st_score = score; | 11248 stp->st_score = score; |
11169 stp->st_altscore = altscore; | 11249 stp->st_altscore = altscore; |
11170 stp->st_had_bonus = had_bonus; | 11250 stp->st_had_bonus = had_bonus; |
11178 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, | 11258 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, |
11179 SUG_CLEAN_COUNT(su)); | 11259 SUG_CLEAN_COUNT(su)); |
11180 } | 11260 } |
11181 } | 11261 } |
11182 } | 11262 } |
11183 | |
11184 if (p != NULL) | |
11185 *p = c; /* restore "goodword" */ | |
11186 } | 11263 } |
11187 | 11264 |
11188 /* | 11265 /* |
11189 * Add a word to be banned. | 11266 * Add a word to be banned. |
11190 */ | 11267 */ |
11242 } | 11319 } |
11243 hash_clear(&su->su_banned); | 11320 hash_clear(&su->su_banned); |
11244 } | 11321 } |
11245 | 11322 |
11246 /* | 11323 /* |
11247 * Recompute the score if sound-folding is possible. This is slow, | 11324 * Recompute the score for all suggestions if sound-folding is possible. This |
11248 * thus only done for the final results. | 11325 * is slow, thus only done for the final results. |
11249 */ | 11326 */ |
11250 static void | 11327 static void |
11251 rescore_suggestions(su) | 11328 rescore_suggestions(su) |
11252 suginfo_T *su; | 11329 suginfo_T *su; |
11253 { | 11330 { |
11254 langp_T *lp; | 11331 int i; |
11255 suggest_T *stp; | 11332 |
11333 if (su->su_sallang != NULL) | |
11334 for (i = 0; i < su->su_ga.ga_len; ++i) | |
11335 rescore_one(su, &SUG(su->su_ga, i)); | |
11336 } | |
11337 | |
11338 /* | |
11339 * Recompute the score for one suggestion if sound-folding is possible. | |
11340 */ | |
11341 static void | |
11342 rescore_one(su, stp) | |
11343 suginfo_T *su; | |
11344 suggest_T *stp; | |
11345 { | |
11346 slang_T *slang = stp->st_slang; | |
11256 char_u sal_badword[MAXWLEN]; | 11347 char_u sal_badword[MAXWLEN]; |
11257 char_u sal_badword2[MAXWLEN]; | 11348 |
11258 int i; | 11349 /* Only rescore suggestions that have no sal score yet and do have a |
11259 int lpi; | 11350 * language. */ |
11260 slang_T *slang_first = NULL; | 11351 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) |
11261 slang_T *slang; | 11352 { |
11262 | 11353 if (slang == su->su_sallang) |
11263 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) | 11354 stp->st_altscore = stp_sal_score(stp, su, |
11264 { | 11355 slang, su->su_sal_badword); |
11265 lp = LANGP_ENTRY(curbuf->b_langp, lpi); | 11356 else |
11266 if (lp->lp_slang->sl_sal.ga_len > 0) | 11357 { |
11267 { | 11358 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); |
11268 /* soundfold the bad word */ | 11359 stp->st_altscore = stp_sal_score(stp, su, slang, sal_badword); |
11269 slang_first = lp->lp_slang; | 11360 } |
11270 spell_soundfold(slang_first, su->su_fbadword, TRUE, sal_badword); | 11361 if (stp->st_altscore == SCORE_MAXMAX) |
11271 break; | 11362 stp->st_altscore = SCORE_BIG; |
11272 } | 11363 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); |
11273 } | 11364 stp->st_had_bonus = TRUE; |
11274 | |
11275 if (slang_first != NULL) | |
11276 { | |
11277 for (i = 0; i < su->su_ga.ga_len; ++i) | |
11278 { | |
11279 /* Only rescore suggestions that have no sal score yet and do have | |
11280 * a language. */ | |
11281 stp = &SUG(su->su_ga, i); | |
11282 if (!stp->st_had_bonus && stp->st_slang != NULL) | |
11283 { | |
11284 slang = stp->st_slang; | |
11285 if (slang->sl_sal.ga_len > 0) | |
11286 { | |
11287 if (slang == slang_first) | |
11288 stp->st_altscore = stp_sal_score(stp, su, | |
11289 slang, sal_badword); | |
11290 else | |
11291 { | |
11292 spell_soundfold(slang, su->su_fbadword, | |
11293 TRUE, sal_badword2); | |
11294 stp->st_altscore = stp_sal_score(stp, su, | |
11295 slang, sal_badword2); | |
11296 } | |
11297 if (stp->st_altscore == SCORE_MAXMAX) | |
11298 stp->st_altscore = SCORE_BIG; | |
11299 stp->st_score = RESCORE(stp->st_score, stp->st_altscore); | |
11300 } | |
11301 } | |
11302 } | |
11303 } | 11365 } |
11304 } | 11366 } |
11305 | 11367 |
11306 static int | 11368 static int |
11307 #ifdef __BORLANDC__ | 11369 #ifdef __BORLANDC__ |