Mercurial > vim
comparison src/spell.c @ 346:8ed2a5098a31
updated for version 7.0090
author | vimboss |
---|---|
date | Wed, 22 Jun 2005 22:26:26 +0000 |
parents | 7033303ea0c0 |
children | a89aebda7f37 |
comparison
equal
deleted
inserted
replaced
345:b3989ac62a21 | 346:8ed2a5098a31 |
---|---|
355 int su_maxcount; /* max. number of suggestions displayed */ | 355 int su_maxcount; /* max. number of suggestions displayed */ |
356 int su_maxscore; /* maximum score for adding to su_ga */ | 356 int su_maxscore; /* maximum score for adding to su_ga */ |
357 garray_T su_sga; /* like su_ga, sound-folded scoring */ | 357 garray_T su_sga; /* like su_ga, sound-folded scoring */ |
358 char_u *su_badptr; /* start of bad word in line */ | 358 char_u *su_badptr; /* start of bad word in line */ |
359 int su_badlen; /* length of detected bad word in line */ | 359 int su_badlen; /* length of detected bad word in line */ |
360 int su_badflags; /* caps flags for bad word */ | |
360 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ | 361 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ |
361 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ | 362 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ |
362 hashtab_T su_banned; /* table with banned words */ | 363 hashtab_T su_banned; /* table with banned words */ |
363 } suginfo_T; | 364 } suginfo_T; |
364 | 365 |
482 STATE_INS, /* Insert a byte in the bad word. */ | 483 STATE_INS, /* Insert a byte in the bad word. */ |
483 STATE_SWAP, /* Swap two bytes. */ | 484 STATE_SWAP, /* Swap two bytes. */ |
484 STATE_UNSWAP, /* Undo swap two characters. */ | 485 STATE_UNSWAP, /* Undo swap two characters. */ |
485 STATE_SWAP3, /* Swap two characters over three. */ | 486 STATE_SWAP3, /* Swap two characters over three. */ |
486 STATE_UNSWAP3, /* Undo Swap two characters over three. */ | 487 STATE_UNSWAP3, /* Undo Swap two characters over three. */ |
487 STATE_ROT3L, /* Rotate three characters left */ | |
488 STATE_UNROT3L, /* Undo rotate three characters left */ | 488 STATE_UNROT3L, /* Undo rotate three characters left */ |
489 STATE_ROT3R, /* Rotate three characters right */ | |
490 STATE_UNROT3R, /* Undo rotate three characters right */ | 489 STATE_UNROT3R, /* Undo rotate three characters right */ |
491 STATE_REP_INI, /* Prepare for using REP items. */ | 490 STATE_REP_INI, /* Prepare for using REP items. */ |
492 STATE_REP, /* Use matching REP items from the .aff file. */ | 491 STATE_REP, /* Use matching REP items from the .aff file. */ |
493 STATE_REP_UNDO, /* Undo a REP item replacement. */ | 492 STATE_REP_UNDO, /* Undo a REP item replacement. */ |
494 STATE_FINAL /* End of this node. */ | 493 STATE_FINAL /* End of this node. */ |
495 } state_T; | 494 } state_T; |
496 | 495 |
497 /* | 496 /* |
498 * Struct to keep the state at each level in spell_try_change(). | 497 * Struct to keep the state at each level in suggest_try_change(). |
499 */ | 498 */ |
500 typedef struct trystate_S | 499 typedef struct trystate_S |
501 { | 500 { |
502 state_T ts_state; /* state at this level, STATE_ */ | 501 state_T ts_state; /* state at this level, STATE_ */ |
503 int ts_score; /* score */ | 502 int ts_score; /* score */ |
512 char_u ts_isdiff; /* DIFF_ values */ | 511 char_u ts_isdiff; /* DIFF_ values */ |
513 char_u ts_fcharstart; /* index in fword where badword char started */ | 512 char_u ts_fcharstart; /* index in fword where badword char started */ |
514 #endif | 513 #endif |
515 char_u ts_save_prewordlen; /* saved "prewordlen" */ | 514 char_u ts_save_prewordlen; /* saved "prewordlen" */ |
516 char_u ts_save_splitoff; /* su_splitoff saved here */ | 515 char_u ts_save_splitoff; /* su_splitoff saved here */ |
517 char_u ts_save_badflags; /* badflags saved here */ | 516 char_u ts_save_badflags; /* su_badflags saved here */ |
518 } trystate_T; | 517 } trystate_T; |
519 | 518 |
520 /* values for ts_isdiff */ | 519 /* values for ts_isdiff */ |
521 #define DIFF_NONE 0 /* no different byte (yet) */ | 520 #define DIFF_NONE 0 /* no different byte (yet) */ |
522 #define DIFF_YES 1 /* different byte found */ | 521 #define DIFF_YES 1 /* different byte found */ |
548 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); | 547 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); |
549 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount)); | 548 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount)); |
550 static void spell_find_cleanup __ARGS((suginfo_T *su)); | 549 static void spell_find_cleanup __ARGS((suginfo_T *su)); |
551 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); | 550 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); |
552 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); | 551 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); |
553 static void spell_try_change __ARGS((suginfo_T *su)); | 552 static void suggest_try_special __ARGS((suginfo_T *su)); |
553 static void suggest_try_change __ARGS((suginfo_T *su)); | |
554 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); | 554 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); |
555 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); | 555 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); |
556 static void score_comp_sal __ARGS((suginfo_T *su)); | 556 static void score_comp_sal __ARGS((suginfo_T *su)); |
557 static void score_combine __ARGS((suginfo_T *su)); | 557 static void score_combine __ARGS((suginfo_T *su)); |
558 static void spell_try_soundalike __ARGS((suginfo_T *su)); | 558 static void suggest_try_soundalike __ARGS((suginfo_T *su)); |
559 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); | 559 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); |
560 static void set_map_str __ARGS((slang_T *lp, char_u *map)); | 560 static void set_map_str __ARGS((slang_T *lp, char_u *map)); |
561 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); | 561 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); |
562 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int use_score, int had_bonus)); | 562 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int use_score, int had_bonus)); |
563 static void add_banned __ARGS((suginfo_T *su, char_u *word)); | 563 static void add_banned __ARGS((suginfo_T *su, char_u *word)); |
564 static int was_banned __ARGS((suginfo_T *su, char_u *word)); | 564 static int was_banned __ARGS((suginfo_T *su, char_u *word)); |
565 static void free_banned __ARGS((suginfo_T *su)); | 565 static void free_banned __ARGS((suginfo_T *su)); |
566 static void rescore_suggestions __ARGS((suginfo_T *su)); | 566 static void rescore_suggestions __ARGS((suginfo_T *su)); |
567 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); | 567 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); |
639 * then, skipping over the character. */ | 639 * then, skipping over the character. */ |
640 if (*ptr <= ' ') | 640 if (*ptr <= ' ') |
641 return 1; | 641 return 1; |
642 | 642 |
643 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and | 643 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and |
644 * 0X99FF. But when a word character follows do check spelling. */ | 644 * 0X99FF. But when a word character follows do check spelling to find |
645 * "3GPP". */ | |
645 if (*ptr >= '0' && *ptr <= '9') | 646 if (*ptr >= '0' && *ptr <= '9') |
646 { | 647 { |
647 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) | 648 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) |
648 mi.mi_end = skiphex(ptr + 2); | 649 mi.mi_end = skiphex(ptr + 2); |
649 else | 650 else |
651 mi.mi_end = skipdigits(ptr); | 652 mi.mi_end = skipdigits(ptr); |
652 nrlen = mi.mi_end - ptr; | 653 nrlen = mi.mi_end - ptr; |
653 } | 654 } |
654 if (!SPELL_ISWORDP(mi.mi_end)) | 655 if (!SPELL_ISWORDP(mi.mi_end)) |
655 return (int)(mi.mi_end - ptr); | 656 return (int)(mi.mi_end - ptr); |
656 } | 657 |
657 | 658 /* Try including the digits in the word. */ |
658 /* Find the end of the word. */ | 659 mi.mi_fend = ptr + nrlen; |
660 } | |
661 else | |
662 mi.mi_fend = ptr; | |
663 | |
664 /* Find the normal end of the word (until the next non-word character). */ | |
659 mi.mi_word = ptr; | 665 mi.mi_word = ptr; |
660 mi.mi_fend = ptr; | |
661 | |
662 if (SPELL_ISWORDP(mi.mi_fend)) | 666 if (SPELL_ISWORDP(mi.mi_fend)) |
663 { | 667 { |
664 /* Make case-folded copy of the characters until the next non-word | |
665 * character. */ | |
666 do | 668 do |
667 { | 669 { |
668 mb_ptr_adv(mi.mi_fend); | 670 mb_ptr_adv(mi.mi_fend); |
669 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend)); | 671 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend)); |
670 } | 672 } |
707 find_prefix(&mi); | 709 find_prefix(&mi); |
708 } | 710 } |
709 | 711 |
710 if (mi.mi_result != SP_OK) | 712 if (mi.mi_result != SP_OK) |
711 { | 713 { |
712 /* If we found a number skip over it. Allows for "42nd". */ | 714 /* If we found a number skip over it. Allows for "42nd". Do flag |
715 * rare and local words, e.g., "3GPP". */ | |
713 if (nrlen > 0) | 716 if (nrlen > 0) |
714 return nrlen; | 717 { |
718 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) | |
719 return nrlen; | |
720 } | |
715 | 721 |
716 /* When we are at a non-word character there is no error, just | 722 /* When we are at a non-word character there is no error, just |
717 * skip over the character (try looking for a word after it). */ | 723 * skip over the character (try looking for a word after it). */ |
718 if (!SPELL_ISWORDP(ptr)) | 724 else if (!SPELL_ISWORDP(ptr)) |
719 { | 725 { |
720 #ifdef FEAT_MBYTE | 726 #ifdef FEAT_MBYTE |
721 if (has_mbyte) | 727 if (has_mbyte) |
722 return mb_ptr2len_check(ptr); | 728 return mb_ptr2len_check(ptr); |
723 #endif | 729 #endif |
808 * - we reach the end of the tree, | 814 * - we reach the end of the tree, |
809 * - or we reach the end of the line. | 815 * - or we reach the end of the line. |
810 */ | 816 */ |
811 for (;;) | 817 for (;;) |
812 { | 818 { |
813 if (flen == 0 && *mip->mi_fend != NUL) | 819 if (flen <= 0 && *mip->mi_fend != NUL) |
814 flen = fold_more(mip); | 820 flen = fold_more(mip); |
815 | 821 |
816 len = byts[arridx++]; | 822 len = byts[arridx++]; |
817 | 823 |
818 /* If the first possible byte is a zero the word could end here. | 824 /* If the first possible byte is a zero the word could end here. |
844 if (ptr[wlen] == NUL) | 850 if (ptr[wlen] == NUL) |
845 break; | 851 break; |
846 | 852 |
847 /* Perform a binary search in the list of accepted bytes. */ | 853 /* Perform a binary search in the list of accepted bytes. */ |
848 c = ptr[wlen]; | 854 c = ptr[wlen]; |
855 if (c == TAB) /* <Tab> is handled like <Space> */ | |
856 c = ' '; | |
849 lo = arridx; | 857 lo = arridx; |
850 hi = arridx + len - 1; | 858 hi = arridx + len - 1; |
851 while (lo < hi) | 859 while (lo < hi) |
852 { | 860 { |
853 m = (lo + hi) / 2; | 861 m = (lo + hi) / 2; |
868 | 876 |
869 /* Continue at the child (if there is one). */ | 877 /* Continue at the child (if there is one). */ |
870 arridx = idxs[lo]; | 878 arridx = idxs[lo]; |
871 ++wlen; | 879 ++wlen; |
872 --flen; | 880 --flen; |
881 | |
882 /* One space in the good word may stand for several spaces in the | |
883 * checked word. */ | |
884 if (c == ' ') | |
885 { | |
886 for (;;) | |
887 { | |
888 if (flen <= 0 && *mip->mi_fend != NUL) | |
889 flen = fold_more(mip); | |
890 if (ptr[wlen] != ' ' && ptr[wlen] != TAB) | |
891 break; | |
892 ++wlen; | |
893 --flen; | |
894 } | |
895 } | |
873 } | 896 } |
874 | 897 |
875 /* | 898 /* |
876 * Verify that one of the possible endings is valid. Try the longest | 899 * Verify that one of the possible endings is valid. Try the longest |
877 * first. | 900 * first. |
927 * to do it again. */ | 950 * to do it again. */ |
928 mip->mi_cend = mip->mi_word + wlen; | 951 mip->mi_cend = mip->mi_word + wlen; |
929 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); | 952 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); |
930 } | 953 } |
931 | 954 |
932 if (!spell_valid_case(mip->mi_capflags, flags)) | 955 if (mip->mi_capflags == WF_KEEPCAP |
956 || !spell_valid_case(mip->mi_capflags, flags)) | |
933 continue; | 957 continue; |
934 } | 958 } |
935 | 959 |
936 /* When mode is FIND_PREFIX the word must support the prefix: | 960 /* When mode is FIND_PREFIX the word must support the prefix: |
937 * check the prefix ID and the condition. Do that for the list at | 961 * check the prefix ID and the condition. Do that for the list at |
1168 { | 1192 { |
1169 linenr_T lnum; | 1193 linenr_T lnum; |
1170 pos_T found_pos; | 1194 pos_T found_pos; |
1171 char_u *line; | 1195 char_u *line; |
1172 char_u *p; | 1196 char_u *p; |
1173 int attr = 0; | 1197 char_u *endp; |
1198 int attr; | |
1174 int len; | 1199 int len; |
1175 int has_syntax = syntax_present(curbuf); | 1200 int has_syntax = syntax_present(curbuf); |
1176 int col; | 1201 int col; |
1177 int can_spell; | 1202 int can_spell; |
1203 char_u *buf = NULL; | |
1204 int buflen = 0; | |
1205 int skip = 0; | |
1178 | 1206 |
1179 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) | 1207 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) |
1180 { | 1208 { |
1181 EMSG(_("E756: Spell checking not enabled")); | 1209 EMSG(_("E756: Spell checking not enabled")); |
1182 return FAIL; | 1210 return FAIL; |
1183 } | 1211 } |
1184 | 1212 |
1185 /* | 1213 /* |
1186 * Start looking for bad word at the start of the line, because we can't | 1214 * Start looking for bad word at the start of the line, because we can't |
1187 * start halfway a word, we don't know where it starts or ends. | 1215 * start halfway a word, we don't know where the it starts or ends. |
1188 * | 1216 * |
1189 * When searching backwards, we continue in the line to find the last | 1217 * When searching backwards, we continue in the line to find the last |
1190 * bad word (in the cursor line: before the cursor). | 1218 * bad word (in the cursor line: before the cursor). |
1219 * | |
1220 * We concatenate the start of the next line, so that wrapped words work | |
1221 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards | |
1222 * though... | |
1191 */ | 1223 */ |
1192 lnum = curwin->w_cursor.lnum; | 1224 lnum = curwin->w_cursor.lnum; |
1193 found_pos.lnum = 0; | 1225 found_pos.lnum = 0; |
1194 | 1226 |
1195 while (!got_int) | 1227 while (!got_int) |
1196 { | 1228 { |
1197 line = ml_get(lnum); | 1229 line = ml_get(lnum); |
1198 p = line; | 1230 |
1199 | 1231 len = STRLEN(line); |
1200 while (*p != NUL) | 1232 if (buflen < len + MAXWLEN + 2) |
1233 { | |
1234 vim_free(buf); | |
1235 buflen = len + MAXWLEN + 2; | |
1236 buf = alloc(buflen); | |
1237 if (buf == NULL) | |
1238 break; | |
1239 } | |
1240 | |
1241 /* Copy the line into "buf" and append the start of the next line if | |
1242 * possible. */ | |
1243 STRCPY(buf, line); | |
1244 if (lnum < curbuf->b_ml.ml_line_count) | |
1245 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); | |
1246 | |
1247 p = buf + skip; | |
1248 endp = buf + len; | |
1249 while (p < endp) | |
1201 { | 1250 { |
1202 /* When searching backward don't search after the cursor. */ | 1251 /* When searching backward don't search after the cursor. */ |
1203 if (dir == BACKWARD | 1252 if (dir == BACKWARD |
1204 && lnum == curwin->w_cursor.lnum | 1253 && lnum == curwin->w_cursor.lnum |
1205 && (colnr_T)(p - line) >= curwin->w_cursor.col) | 1254 && (colnr_T)(p - buf) >= curwin->w_cursor.col) |
1206 break; | 1255 break; |
1207 | 1256 |
1208 /* start of word */ | 1257 /* start of word */ |
1258 attr = 0; | |
1209 len = spell_check(curwin, p, &attr); | 1259 len = spell_check(curwin, p, &attr); |
1210 | 1260 |
1211 if (attr != 0) | 1261 if (attr != 0) |
1212 { | 1262 { |
1213 /* We found a bad word. Check the attribute. */ | 1263 /* We found a bad word. Check the attribute. */ |
1216 /* When searching forward only accept a bad word after | 1266 /* When searching forward only accept a bad word after |
1217 * the cursor. */ | 1267 * the cursor. */ |
1218 if (dir == BACKWARD | 1268 if (dir == BACKWARD |
1219 || lnum > curwin->w_cursor.lnum | 1269 || lnum > curwin->w_cursor.lnum |
1220 || (lnum == curwin->w_cursor.lnum | 1270 || (lnum == curwin->w_cursor.lnum |
1221 && (colnr_T)(curline ? p - line + len | 1271 && (colnr_T)(curline ? p - buf + len |
1222 : p - line) | 1272 : p - buf) |
1223 > curwin->w_cursor.col)) | 1273 > curwin->w_cursor.col)) |
1224 { | 1274 { |
1225 if (has_syntax) | 1275 if (has_syntax) |
1226 { | 1276 { |
1227 col = p - line; | 1277 col = p - buf; |
1228 (void)syn_get_id(lnum, (colnr_T)col, | 1278 (void)syn_get_id(lnum, (colnr_T)col, |
1229 FALSE, &can_spell); | 1279 FALSE, &can_spell); |
1230 | |
1231 /* have to get the line again, a multi-line | |
1232 * regexp may make it invalid */ | |
1233 line = ml_get(lnum); | |
1234 p = line + col; | |
1235 } | 1280 } |
1236 else | 1281 else |
1237 can_spell = TRUE; | 1282 can_spell = TRUE; |
1238 | 1283 |
1239 if (can_spell) | 1284 if (can_spell) |
1240 { | 1285 { |
1241 found_pos.lnum = lnum; | 1286 found_pos.lnum = lnum; |
1242 found_pos.col = p - line; | 1287 found_pos.col = p - buf; |
1243 #ifdef FEAT_VIRTUALEDIT | 1288 #ifdef FEAT_VIRTUALEDIT |
1244 found_pos.coladd = 0; | 1289 found_pos.coladd = 0; |
1245 #endif | 1290 #endif |
1246 if (dir == FORWARD) | 1291 if (dir == FORWARD) |
1247 { | 1292 { |
1248 /* No need to search further. */ | 1293 /* No need to search further. */ |
1249 curwin->w_cursor = found_pos; | 1294 curwin->w_cursor = found_pos; |
1295 vim_free(buf); | |
1250 return OK; | 1296 return OK; |
1251 } | 1297 } |
1252 } | 1298 } |
1253 } | 1299 } |
1254 } | 1300 } |
1255 attr = 0; | |
1256 } | 1301 } |
1257 | 1302 |
1258 /* advance to character after the word */ | 1303 /* advance to character after the word */ |
1259 p += len; | 1304 p += len; |
1260 if (*p == NUL) | |
1261 break; | |
1262 } | 1305 } |
1263 | 1306 |
1264 if (curline) | 1307 if (curline) |
1265 return FAIL; /* only check cursor line */ | 1308 break; /* only check cursor line */ |
1266 | 1309 |
1267 /* Advance to next line. */ | 1310 /* Advance to next line. */ |
1268 if (dir == BACKWARD) | 1311 if (dir == BACKWARD) |
1269 { | 1312 { |
1270 if (found_pos.lnum != 0) | 1313 if (found_pos.lnum != 0) |
1271 { | 1314 { |
1272 /* Use the last match in the line. */ | 1315 /* Use the last match in the line. */ |
1273 curwin->w_cursor = found_pos; | 1316 curwin->w_cursor = found_pos; |
1317 vim_free(buf); | |
1274 return OK; | 1318 return OK; |
1275 } | 1319 } |
1276 if (lnum == 1) | 1320 if (lnum == 1) |
1277 return FAIL; | 1321 break; |
1278 --lnum; | 1322 --lnum; |
1279 } | 1323 } |
1280 else | 1324 else |
1281 { | 1325 { |
1282 if (lnum == curbuf->b_ml.ml_line_count) | 1326 if (lnum == curbuf->b_ml.ml_line_count) |
1283 return FAIL; | 1327 break; |
1284 ++lnum; | 1328 ++lnum; |
1329 | |
1330 /* Skip the characters at the start of the next line that were | |
1331 * included in a match crossing line boundaries. */ | |
1332 if (attr == 0) | |
1333 skip = p - endp; | |
1334 else | |
1335 skip = 0; | |
1285 } | 1336 } |
1286 | 1337 |
1287 line_breakcheck(); | 1338 line_breakcheck(); |
1288 } | 1339 } |
1289 | 1340 |
1290 return FAIL; /* interrupted */ | 1341 vim_free(buf); |
1342 return FAIL; | |
1343 } | |
1344 | |
1345 /* | |
1346 * For spell checking: concatenate the start of the following line "line" into | |
1347 * "buf", blanking-out special characters. Copy less then "maxlen" bytes. | |
1348 */ | |
1349 void | |
1350 spell_cat_line(buf, line, maxlen) | |
1351 char_u *buf; | |
1352 char_u *line; | |
1353 int maxlen; | |
1354 { | |
1355 char_u *p; | |
1356 int n; | |
1357 | |
1358 p = skipwhite(line); | |
1359 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) | |
1360 p = skipwhite(p + 1); | |
1361 | |
1362 if (*p != NUL) | |
1363 { | |
1364 *buf = ' '; | |
1365 vim_strncpy(buf + 1, line, maxlen - 1); | |
1366 n = p - line; | |
1367 if (n >= maxlen) | |
1368 n = maxlen - 1; | |
1369 vim_memset(buf + 1, ' ', n); | |
1370 } | |
1291 } | 1371 } |
1292 | 1372 |
1293 /* | 1373 /* |
1294 * Load word list(s) for "lang" from Vim spell file(s). | 1374 * Load word list(s) for "lang" from Vim spell file(s). |
1295 * "lang" must be the language without the region: e.g., "en". | 1375 * "lang" must be the language without the region: e.g., "en". |
1872 | 1952 |
1873 /* | 1953 /* |
1874 * Read one row of siblings from the spell file and store it in the byte array | 1954 * Read one row of siblings from the spell file and store it in the byte array |
1875 * "byts" and index array "idxs". Recursively read the children. | 1955 * "byts" and index array "idxs". Recursively read the children. |
1876 * | 1956 * |
1877 * NOTE: The code here must match put_tree(). | 1957 * NOTE: The code here must match put_node(). |
1878 * | 1958 * |
1879 * Returns the index follosing the siblings. | 1959 * Returns the index follosing the siblings. |
1880 * Returns -1 if the file is shorter than expected. | 1960 * Returns -1 if the file is shorter than expected. |
1881 * Returns -2 if there is a format error. | 1961 * Returns -2 if there is a format error. |
1882 */ | 1962 */ |
2291 typedef struct afffile_S | 2371 typedef struct afffile_S |
2292 { | 2372 { |
2293 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ | 2373 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ |
2294 int af_rar; /* RAR ID for rare word */ | 2374 int af_rar; /* RAR ID for rare word */ |
2295 int af_kep; /* KEP ID for keep-case word */ | 2375 int af_kep; /* KEP ID for keep-case word */ |
2376 int af_bad; /* BAD ID for banned word */ | |
2296 int af_pfxpostpone; /* postpone prefixes without chop string */ | 2377 int af_pfxpostpone; /* postpone prefixes without chop string */ |
2297 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ | 2378 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ |
2298 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ | 2379 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ |
2299 } afffile_T; | 2380 } afffile_T; |
2300 | 2381 |
2338 * A node in the tree. | 2419 * A node in the tree. |
2339 */ | 2420 */ |
2340 typedef struct wordnode_S wordnode_T; | 2421 typedef struct wordnode_S wordnode_T; |
2341 struct wordnode_S | 2422 struct wordnode_S |
2342 { | 2423 { |
2343 char_u wn_hashkey[6]; /* room for the hash key */ | 2424 union /* shared to save space */ |
2344 wordnode_T *wn_next; /* next node with same hash key */ | 2425 { |
2426 char_u hashkey[6]; /* room for the hash key */ | |
2427 int index; /* index in written nodes (valid after first | |
2428 round) */ | |
2429 } wn_u1; | |
2430 union /* shared to save space */ | |
2431 { | |
2432 wordnode_T *next; /* next node with same hash key */ | |
2433 wordnode_T *wnode; /* parent node that will write this node */ | |
2434 } wn_u2; | |
2345 wordnode_T *wn_child; /* child (next byte in word) */ | 2435 wordnode_T *wn_child; /* child (next byte in word) */ |
2346 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, | 2436 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, |
2347 always sorted) */ | 2437 always sorted) */ |
2348 wordnode_T *wn_wnode; /* parent node that will write this node */ | |
2349 int wn_index; /* index in written nodes (valid after first | |
2350 round) */ | |
2351 char_u wn_byte; /* Byte for this node. NUL for word end */ | 2438 char_u wn_byte; /* Byte for this node. NUL for word end */ |
2352 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */ | 2439 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */ |
2353 short wn_region; /* when wn_byte is NUL: region mask; for | 2440 short wn_region; /* when wn_byte is NUL: region mask; for |
2354 PREFIXTREE it's the prefcondnr */ | 2441 PREFIXTREE it's the prefcondnr */ |
2355 char_u wn_prefixID; /* supported/required prefix ID or 0 */ | 2442 char_u wn_prefixID; /* supported/required prefix ID or 0 */ |
2407 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp)); | 2494 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp)); |
2408 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); | 2495 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); |
2409 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); | 2496 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); |
2410 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); | 2497 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); |
2411 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin)); | 2498 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin)); |
2412 static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); | 2499 static void clear_node __ARGS((wordnode_T *node)); |
2500 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); | |
2413 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); | 2501 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); |
2414 static void init_spellfile __ARGS((void)); | 2502 static void init_spellfile __ARGS((void)); |
2415 | 2503 |
2416 /* | 2504 /* |
2417 * Read the affix file "fname". | 2505 * Read the affix file "fname". |
2573 { | 2661 { |
2574 aff->af_kep = items[1][0]; | 2662 aff->af_kep = items[1][0]; |
2575 if (items[1][1] != NUL) | 2663 if (items[1][1] != NUL) |
2576 smsg((char_u *)_(e_affname), fname, lnum, items[1]); | 2664 smsg((char_u *)_(e_affname), fname, lnum, items[1]); |
2577 } | 2665 } |
2666 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 | |
2667 && aff->af_bad == 0) | |
2668 { | |
2669 aff->af_bad = items[1][0]; | |
2670 if (items[1][1] != NUL) | |
2671 smsg((char_u *)_(e_affname), fname, lnum, items[1]); | |
2672 } | |
2578 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) | 2673 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) |
2579 { | 2674 { |
2580 aff->af_pfxpostpone = TRUE; | 2675 aff->af_pfxpostpone = TRUE; |
2581 } | 2676 } |
2582 else if ((STRCMP(items[0], "PFX") == 0 | 2677 else if ((STRCMP(items[0], "PFX") == 0 |
2761 smsg((char_u *)_("Expected MAP count in %s line %d"), | 2856 smsg((char_u *)_("Expected MAP count in %s line %d"), |
2762 fname, lnum); | 2857 fname, lnum); |
2763 } | 2858 } |
2764 else if (do_map) | 2859 else if (do_map) |
2765 { | 2860 { |
2861 int c; | |
2862 | |
2863 /* Check that every character appears only once. */ | |
2864 for (p = items[1]; *p != NUL; ) | |
2865 { | |
2866 #ifdef FEAT_MBYTE | |
2867 c = mb_ptr2char_adv(&p); | |
2868 #else | |
2869 c = *p++; | |
2870 #endif | |
2871 if ((spin->si_map.ga_len > 0 | |
2872 && vim_strchr(spin->si_map.ga_data, c) | |
2873 != NULL) | |
2874 || vim_strchr(p, c) != NULL) | |
2875 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), | |
2876 fname, lnum); | |
2877 } | |
2878 | |
2766 /* We simply concatenate all the MAP strings, separated by | 2879 /* We simply concatenate all the MAP strings, separated by |
2767 * slashes. */ | 2880 * slashes. */ |
2768 ga_concat(&spin->si_map, items[1]); | 2881 ga_concat(&spin->si_map, items[1]); |
2769 ga_append(&spin->si_map, '/'); | 2882 ga_append(&spin->si_map, '/'); |
2770 } | 2883 } |
3076 && vim_strchr(afflist, affile->af_kep) != NULL) | 3189 && vim_strchr(afflist, affile->af_kep) != NULL) |
3077 flags |= WF_KEEPCAP; | 3190 flags |= WF_KEEPCAP; |
3078 if (affile->af_rar != NUL | 3191 if (affile->af_rar != NUL |
3079 && vim_strchr(afflist, affile->af_rar) != NULL) | 3192 && vim_strchr(afflist, affile->af_rar) != NULL) |
3080 flags |= WF_RARE; | 3193 flags |= WF_RARE; |
3194 if (affile->af_bad != NUL | |
3195 && vim_strchr(afflist, affile->af_bad) != NULL) | |
3196 flags |= WF_BANNED; | |
3081 | 3197 |
3082 if (affile->af_pfxpostpone) | 3198 if (affile->af_pfxpostpone) |
3083 /* Need to store the list of prefix IDs with the word. */ | 3199 /* Need to store the list of prefix IDs with the word. */ |
3084 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks); | 3200 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks); |
3085 } | 3201 } |
3753 for (np = node; np != NULL; np = np->wn_sibling) | 3869 for (np = node; np != NULL; np = np->wn_sibling) |
3754 { | 3870 { |
3755 ++len; | 3871 ++len; |
3756 if ((child = np->wn_child) != NULL) | 3872 if ((child = np->wn_child) != NULL) |
3757 { | 3873 { |
3758 /* Compress the child. This fills wn_hashkey. */ | 3874 /* Compress the child. This fills hashkey. */ |
3759 compressed += node_compress(child, ht, tot); | 3875 compressed += node_compress(child, ht, tot); |
3760 | 3876 |
3761 /* Try to find an identical child. */ | 3877 /* Try to find an identical child. */ |
3762 hash = hash_hash(child->wn_hashkey); | 3878 hash = hash_hash(child->wn_u1.hashkey); |
3763 hi = hash_lookup(ht, child->wn_hashkey, hash); | 3879 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); |
3764 tp = NULL; | 3880 tp = NULL; |
3765 if (!HASHITEM_EMPTY(hi)) | 3881 if (!HASHITEM_EMPTY(hi)) |
3766 { | 3882 { |
3767 /* There are children with an identical hash value. Now check | 3883 /* There are children with an identical hash value. Now check |
3768 * if there is one that is really identical. */ | 3884 * if there is one that is really identical. */ |
3769 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next) | 3885 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) |
3770 if (node_equal(child, tp)) | 3886 if (node_equal(child, tp)) |
3771 { | 3887 { |
3772 /* Found one! Now use that child in place of the | 3888 /* Found one! Now use that child in place of the |
3773 * current one. This means the current child is | 3889 * current one. This means the current child is |
3774 * dropped from the tree. */ | 3890 * dropped from the tree. */ |
3780 { | 3896 { |
3781 /* No other child with this hash value equals the child of | 3897 /* No other child with this hash value equals the child of |
3782 * the node, add it to the linked list after the first | 3898 * the node, add it to the linked list after the first |
3783 * item. */ | 3899 * item. */ |
3784 tp = HI2WN(hi); | 3900 tp = HI2WN(hi); |
3785 child->wn_next = tp->wn_next; | 3901 child->wn_u2.next = tp->wn_u2.next; |
3786 tp->wn_next = child; | 3902 tp->wn_u2.next = child; |
3787 } | 3903 } |
3788 } | 3904 } |
3789 else | 3905 else |
3790 /* No other child has this hash value, add it to the | 3906 /* No other child has this hash value, add it to the |
3791 * hashtable. */ | 3907 * hashtable. */ |
3792 hash_add_item(ht, hi, child->wn_hashkey, hash); | 3908 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); |
3793 } | 3909 } |
3794 } | 3910 } |
3795 *tot += len; | 3911 *tot += len; |
3796 | 3912 |
3797 /* | 3913 /* |
3798 * Make a hash key for the node and its siblings, so that we can quickly | 3914 * Make a hash key for the node and its siblings, so that we can quickly |
3799 * find a lookalike node. This must be done after compressing the sibling | 3915 * find a lookalike node. This must be done after compressing the sibling |
3800 * list, otherwise the hash key would become invalid by the compression. | 3916 * list, otherwise the hash key would become invalid by the compression. |
3801 */ | 3917 */ |
3802 node->wn_hashkey[0] = len; | 3918 node->wn_u1.hashkey[0] = len; |
3803 nr = 0; | 3919 nr = 0; |
3804 for (np = node; np != NULL; np = np->wn_sibling) | 3920 for (np = node; np != NULL; np = np->wn_sibling) |
3805 { | 3921 { |
3806 if (np->wn_byte == NUL) | 3922 if (np->wn_byte == NUL) |
3807 /* end node: use wn_flags, wn_region and wn_prefixID */ | 3923 /* end node: use wn_flags, wn_region and wn_prefixID */ |
3812 nr = nr * 101 + n; | 3928 nr = nr * 101 + n; |
3813 } | 3929 } |
3814 | 3930 |
3815 /* Avoid NUL bytes, it terminates the hash key. */ | 3931 /* Avoid NUL bytes, it terminates the hash key. */ |
3816 n = nr & 0xff; | 3932 n = nr & 0xff; |
3817 node->wn_hashkey[1] = n == 0 ? 1 : n; | 3933 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; |
3818 n = (nr >> 8) & 0xff; | 3934 n = (nr >> 8) & 0xff; |
3819 node->wn_hashkey[2] = n == 0 ? 1 : n; | 3935 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; |
3820 n = (nr >> 16) & 0xff; | 3936 n = (nr >> 16) & 0xff; |
3821 node->wn_hashkey[3] = n == 0 ? 1 : n; | 3937 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; |
3822 n = (nr >> 24) & 0xff; | 3938 n = (nr >> 24) & 0xff; |
3823 node->wn_hashkey[4] = n == 0 ? 1 : n; | 3939 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; |
3824 node->wn_hashkey[5] = NUL; | 3940 node->wn_u1.hashkey[5] = NUL; |
3825 | 3941 |
3826 return compressed; | 3942 return compressed; |
3827 } | 3943 } |
3828 | 3944 |
3829 /* | 3945 /* |
4015 else if (round == 2) | 4131 else if (round == 2) |
4016 tree = spin->si_keeproot; | 4132 tree = spin->si_keeproot; |
4017 else | 4133 else |
4018 tree = spin->si_prefroot; | 4134 tree = spin->si_prefroot; |
4019 | 4135 |
4136 /* Clear the index and wnode fields in the tree. */ | |
4137 clear_node(tree); | |
4138 | |
4020 /* Count the number of nodes. Needed to be able to allocate the | 4139 /* Count the number of nodes. Needed to be able to allocate the |
4021 * memory when reading the nodes. Also fills in the index for shared | 4140 * memory when reading the nodes. Also fills in index for shared |
4022 * nodes. */ | 4141 * nodes. */ |
4023 nodecount = put_tree(NULL, tree, 0, regionmask, round == 3); | 4142 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); |
4024 | 4143 |
4025 /* number of nodes in 4 bytes */ | 4144 /* number of nodes in 4 bytes */ |
4026 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | 4145 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ |
4027 spin->si_memtot += nodecount + nodecount * sizeof(int); | 4146 spin->si_memtot += nodecount + nodecount * sizeof(int); |
4028 | 4147 |
4029 /* Write the nodes. */ | 4148 /* Write the nodes. */ |
4030 (void)put_tree(fd, tree, 0, regionmask, round == 3); | 4149 (void)put_node(fd, tree, 0, regionmask, round == 3); |
4031 } | 4150 } |
4032 | 4151 |
4033 fclose(fd); | 4152 fclose(fd); |
4034 } | 4153 } |
4154 | |
4155 /* | |
4156 * Clear the index and wnode fields of "node", it siblings and its | |
4157 * children. This is needed because they are a union with other items to save | |
4158 * space. | |
4159 */ | |
4160 static void | |
4161 clear_node(node) | |
4162 wordnode_T *node; | |
4163 { | |
4164 wordnode_T *np; | |
4165 | |
4166 if (node != NULL) | |
4167 for (np = node; np != NULL; np = np->wn_sibling) | |
4168 { | |
4169 np->wn_u1.index = 0; | |
4170 np->wn_u2.wnode = NULL; | |
4171 | |
4172 if (np->wn_byte != NUL) | |
4173 clear_node(np->wn_child); | |
4174 } | |
4175 } | |
4176 | |
4035 | 4177 |
4036 /* | 4178 /* |
4037 * Dump a word tree at node "node". | 4179 * Dump a word tree at node "node". |
4038 * | 4180 * |
4039 * This first writes the list of possible bytes (siblings). Then for each | 4181 * This first writes the list of possible bytes (siblings). Then for each |
4044 * file). | 4186 * file). |
4045 * | 4187 * |
4046 * Returns the number of nodes used. | 4188 * Returns the number of nodes used. |
4047 */ | 4189 */ |
4048 static int | 4190 static int |
4049 put_tree(fd, node, index, regionmask, prefixtree) | 4191 put_node(fd, node, index, regionmask, prefixtree) |
4050 FILE *fd; /* NULL when only counting */ | 4192 FILE *fd; /* NULL when only counting */ |
4051 wordnode_T *node; | 4193 wordnode_T *node; |
4052 int index; | 4194 int index; |
4053 int regionmask; | 4195 int regionmask; |
4054 int prefixtree; /* TRUE for PREFIXTREE */ | 4196 int prefixtree; /* TRUE for PREFIXTREE */ |
4061 /* If "node" is zero the tree is empty. */ | 4203 /* If "node" is zero the tree is empty. */ |
4062 if (node == NULL) | 4204 if (node == NULL) |
4063 return 0; | 4205 return 0; |
4064 | 4206 |
4065 /* Store the index where this node is written. */ | 4207 /* Store the index where this node is written. */ |
4066 node->wn_index = index; | 4208 node->wn_u1.index = index; |
4067 | 4209 |
4068 /* Count the number of siblings. */ | 4210 /* Count the number of siblings. */ |
4069 for (np = node; np != NULL; np = np->wn_sibling) | 4211 for (np = node; np != NULL; np = np->wn_sibling) |
4070 ++siblingcount; | 4212 ++siblingcount; |
4071 | 4213 |
4114 } | 4256 } |
4115 } | 4257 } |
4116 } | 4258 } |
4117 else | 4259 else |
4118 { | 4260 { |
4119 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node) | 4261 if (np->wn_child->wn_u1.index != 0 |
4262 && np->wn_child->wn_u2.wnode != node) | |
4120 { | 4263 { |
4121 /* The child is written elsewhere, write the reference. */ | 4264 /* The child is written elsewhere, write the reference. */ |
4122 if (fd != NULL) | 4265 if (fd != NULL) |
4123 { | 4266 { |
4124 putc(BY_INDEX, fd); /* <byte> */ | 4267 putc(BY_INDEX, fd); /* <byte> */ |
4125 /* <nodeidx> */ | 4268 /* <nodeidx> */ |
4126 put_bytes(fd, (long_u)np->wn_child->wn_index, 3); | 4269 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); |
4127 } | 4270 } |
4128 } | 4271 } |
4129 else if (np->wn_child->wn_wnode == NULL) | 4272 else if (np->wn_child->wn_u2.wnode == NULL) |
4130 /* We will write the child below and give it an index. */ | 4273 /* We will write the child below and give it an index. */ |
4131 np->wn_child->wn_wnode = node; | 4274 np->wn_child->wn_u2.wnode = node; |
4132 | 4275 |
4133 if (fd != NULL) | 4276 if (fd != NULL) |
4134 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ | 4277 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ |
4135 { | 4278 { |
4136 EMSG(_(e_write)); | 4279 EMSG(_(e_write)); |
4143 * the count. */ | 4286 * the count. */ |
4144 newindex += siblingcount + 1; | 4287 newindex += siblingcount + 1; |
4145 | 4288 |
4146 /* Recursively dump the children of each sibling. */ | 4289 /* Recursively dump the children of each sibling. */ |
4147 for (np = node; np != NULL; np = np->wn_sibling) | 4290 for (np = node; np != NULL; np = np->wn_sibling) |
4148 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node) | 4291 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) |
4149 newindex = put_tree(fd, np->wn_child, newindex, regionmask, | 4292 newindex = put_node(fd, np->wn_child, newindex, regionmask, |
4150 prefixtree); | 4293 prefixtree); |
4151 | 4294 |
4152 return newindex; | 4295 return newindex; |
4153 } | 4296 } |
4154 | 4297 |
4898 int c; | 5041 int c; |
4899 suginfo_T sug; | 5042 suginfo_T sug; |
4900 suggest_T *stp; | 5043 suggest_T *stp; |
4901 | 5044 |
4902 /* Find the start of the badly spelled word. */ | 5045 /* Find the start of the badly spelled word. */ |
4903 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL) | 5046 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL |
4904 { | 5047 || curwin->w_cursor.col > prev_cursor.col) |
4905 beep_flush(); | 5048 { |
4906 return; | 5049 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) |
5050 return; | |
5051 | |
5052 /* No bad word or it starts after the cursor: use the word under the | |
5053 * cursor. */ | |
5054 curwin->w_cursor = prev_cursor; | |
5055 line = ml_get_curline(); | |
5056 p = line + curwin->w_cursor.col; | |
5057 /* Backup to before start of word. */ | |
5058 while (p > line && SPELL_ISWORDP(p)) | |
5059 mb_ptr_back(line, p); | |
5060 /* Forward to start of word. */ | |
5061 while (!SPELL_ISWORDP(p)) | |
5062 mb_ptr_adv(p); | |
5063 | |
5064 if (!SPELL_ISWORDP(p)) /* No word found. */ | |
5065 { | |
5066 beep_flush(); | |
5067 return; | |
5068 } | |
5069 curwin->w_cursor.col = p - line; | |
4907 } | 5070 } |
4908 | 5071 |
4909 /* Get the word and its length. */ | 5072 /* Get the word and its length. */ |
4910 line = ml_get_curline(); | 5073 line = ml_get_curline(); |
4911 | 5074 |
4921 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), | 5084 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), |
4922 sug.su_badlen, sug.su_badptr); | 5085 sug.su_badlen, sug.su_badptr); |
4923 msg_puts(IObuff); | 5086 msg_puts(IObuff); |
4924 msg_clr_eos(); | 5087 msg_clr_eos(); |
4925 msg_putchar('\n'); | 5088 msg_putchar('\n'); |
5089 | |
4926 msg_scroll = TRUE; | 5090 msg_scroll = TRUE; |
4927 for (i = 0; i < sug.su_ga.ga_len; ++i) | 5091 for (i = 0; i < sug.su_ga.ga_len; ++i) |
4928 { | 5092 { |
4929 stp = &SUG(sug.su_ga, i); | 5093 stp = &SUG(sug.su_ga, i); |
4930 | 5094 |
4933 STRCPY(wcopy, stp->st_word); | 5097 STRCPY(wcopy, stp->st_word); |
4934 if (sug.su_badlen > stp->st_orglen) | 5098 if (sug.su_badlen > stp->st_orglen) |
4935 vim_strncpy(wcopy + STRLEN(wcopy), | 5099 vim_strncpy(wcopy + STRLEN(wcopy), |
4936 sug.su_badptr + stp->st_orglen, | 5100 sug.su_badptr + stp->st_orglen, |
4937 sug.su_badlen - stp->st_orglen); | 5101 sug.su_badlen - stp->st_orglen); |
5102 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), i + 1, wcopy); | |
5103 msg_puts(IObuff); | |
5104 | |
5105 /* The word may replace more than "su_badlen". */ | |
5106 if (sug.su_badlen < stp->st_orglen) | |
5107 { | |
5108 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), | |
5109 stp->st_orglen, sug.su_badptr); | |
5110 msg_puts(IObuff); | |
5111 } | |
5112 | |
4938 if (p_verbose > 0) | 5113 if (p_verbose > 0) |
4939 { | 5114 { |
5115 /* Add the score. */ | |
4940 if (sps_flags & SPS_DOUBLE) | 5116 if (sps_flags & SPS_DOUBLE) |
4941 vim_snprintf((char *)IObuff, IOSIZE, | 5117 vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"), |
4942 _("%2d \"%s\" (%s%d - %d)"), | |
4943 i + 1, wcopy, | |
4944 stp->st_salscore ? "s " : "", | 5118 stp->st_salscore ? "s " : "", |
4945 stp->st_score, stp->st_altscore); | 5119 stp->st_score, stp->st_altscore); |
4946 else | 5120 else |
4947 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"), | 5121 vim_snprintf((char *)IObuff, IOSIZE, _(" (%d)"), |
4948 i + 1, wcopy, stp->st_score); | 5122 stp->st_score); |
4949 } | 5123 msg_advance(30); |
4950 else | 5124 msg_puts(IObuff); |
4951 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), | 5125 } |
4952 i + 1, wcopy); | |
4953 msg_puts(IObuff); | |
4954 lines_left = 3; /* avoid more prompt */ | 5126 lines_left = 3; /* avoid more prompt */ |
4955 msg_putchar('\n'); | 5127 msg_putchar('\n'); |
4956 } | 5128 } |
4957 | 5129 |
4958 /* Ask for choice. */ | 5130 /* Ask for choice. */ |
5056 if (su->su_badlen >= MAXWLEN) | 5228 if (su->su_badlen >= MAXWLEN) |
5057 su->su_badlen = MAXWLEN - 1; /* just in case */ | 5229 su->su_badlen = MAXWLEN - 1; /* just in case */ |
5058 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); | 5230 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); |
5059 (void)spell_casefold(su->su_badptr, su->su_badlen, | 5231 (void)spell_casefold(su->su_badptr, su->su_badlen, |
5060 su->su_fbadword, MAXWLEN); | 5232 su->su_fbadword, MAXWLEN); |
5233 /* get caps flags for bad word */ | |
5234 su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen); | |
5061 | 5235 |
5062 /* Ban the bad word itself. It may appear in another region. */ | 5236 /* Ban the bad word itself. It may appear in another region. */ |
5063 add_banned(su, su->su_badword); | 5237 add_banned(su, su->su_badword); |
5064 | 5238 |
5065 /* | 5239 /* |
5066 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries | 5240 * 1. Try special cases, such as repeating a word: "the the" -> "the". |
5067 * from the .aff file and inserting a space (split the word). | |
5068 * | 5241 * |
5069 * Set a maximum score to limit the combination of operations that is | 5242 * Set a maximum score to limit the combination of operations that is |
5070 * tried. | 5243 * tried. |
5071 */ | 5244 */ |
5072 su->su_maxscore = SCORE_MAXINIT; | 5245 su->su_maxscore = SCORE_MAXINIT; |
5073 spell_try_change(su); | 5246 suggest_try_special(su); |
5247 | |
5248 /* | |
5249 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries | |
5250 * from the .aff file and inserting a space (split the word). | |
5251 */ | |
5252 suggest_try_change(su); | |
5074 | 5253 |
5075 /* For the resulting top-scorers compute the sound-a-like score. */ | 5254 /* For the resulting top-scorers compute the sound-a-like score. */ |
5076 if (sps_flags & SPS_DOUBLE) | 5255 if (sps_flags & SPS_DOUBLE) |
5077 score_comp_sal(su); | 5256 score_comp_sal(su); |
5078 | 5257 |
5079 /* | 5258 /* |
5080 * 2. Try finding sound-a-like words. | 5259 * 3. Try finding sound-a-like words. |
5081 * | 5260 * |
5082 * Only do this when we don't have a lot of suggestions yet, because it's | 5261 * Only do this when we don't have a lot of suggestions yet, because it's |
5083 * very slow and often doesn't find new suggestions. | 5262 * very slow and often doesn't find new suggestions. |
5084 */ | 5263 */ |
5085 if ((sps_flags & SPS_DOUBLE) | 5264 if ((sps_flags & SPS_DOUBLE) |
5086 || (!(sps_flags & SPS_FAST) | 5265 || (!(sps_flags & SPS_FAST) |
5087 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) | 5266 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) |
5088 { | 5267 { |
5089 /* Allow a higher score now. */ | 5268 /* Allow a higher score now. */ |
5090 su->su_maxscore = SCORE_MAXMAX; | 5269 su->su_maxscore = SCORE_MAXMAX; |
5091 spell_try_soundalike(su); | 5270 suggest_try_soundalike(su); |
5092 } | 5271 } |
5093 | 5272 |
5094 /* When CTRL-C was hit while searching do show the results. */ | 5273 /* When CTRL-C was hit while searching do show the results. */ |
5095 ui_breakcheck(); | 5274 ui_breakcheck(); |
5096 if (got_int) | 5275 if (got_int) |
5215 } | 5394 } |
5216 *d = NUL; | 5395 *d = NUL; |
5217 } | 5396 } |
5218 | 5397 |
5219 /* | 5398 /* |
5399 * Try finding suggestions by recognizing specific situations. | |
5400 */ | |
5401 static void | |
5402 suggest_try_special(su) | |
5403 suginfo_T *su; | |
5404 { | |
5405 char_u *p; | |
5406 int len; | |
5407 int c; | |
5408 char_u word[MAXWLEN]; | |
5409 | |
5410 /* | |
5411 * Recognize a word that is repeated: "the the". | |
5412 */ | |
5413 p = skiptowhite(su->su_fbadword); | |
5414 len = p - su->su_fbadword; | |
5415 p = skipwhite(p); | |
5416 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) | |
5417 { | |
5418 /* Include badflags: if the badword is onecap or allcap | |
5419 * use that for the goodword too: "The the" -> "The". */ | |
5420 c = su->su_fbadword[len]; | |
5421 su->su_fbadword[len] = NUL; | |
5422 make_case_word(su->su_fbadword, word, su->su_badflags); | |
5423 su->su_fbadword[len] = c; | |
5424 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, TRUE); | |
5425 } | |
5426 } | |
5427 | |
5428 /* | |
5220 * Try finding suggestions by adding/removing/swapping letters. | 5429 * Try finding suggestions by adding/removing/swapping letters. |
5221 * | 5430 * |
5222 * This uses a state machine. At each node in the tree we try various | 5431 * This uses a state machine. At each node in the tree we try various |
5223 * operations. When trying if an operation work "depth" is increased and the | 5432 * operations. When trying if an operation work "depth" is increased and the |
5224 * stack[] is used to store info. This allows combinations, thus insert one | 5433 * stack[] is used to store info. This allows combinations, thus insert one |
5225 * character, replace one and delete another. The number of changes is | 5434 * character, replace one and delete another. The number of changes is |
5226 * limited by su->su_maxscore, checked in try_deeper(). | 5435 * limited by su->su_maxscore, checked in try_deeper(). |
5227 */ | 5436 */ |
5228 static void | 5437 static void |
5229 spell_try_change(su) | 5438 suggest_try_change(su) |
5230 suginfo_T *su; | 5439 suginfo_T *su; |
5231 { | 5440 { |
5232 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ | 5441 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ |
5233 char_u tword[MAXWLEN]; /* good word collected so far */ | 5442 char_u tword[MAXWLEN]; /* good word collected so far */ |
5234 trystate_T stack[MAXWLEN]; | 5443 trystate_T stack[MAXWLEN]; |
5243 idx_T *idxs; | 5452 idx_T *idxs; |
5244 int depth; | 5453 int depth; |
5245 int c, c2, c3; | 5454 int c, c2, c3; |
5246 int n = 0; | 5455 int n = 0; |
5247 int flags; | 5456 int flags; |
5248 int badflags; | |
5249 garray_T *gap; | 5457 garray_T *gap; |
5250 idx_T arridx; | 5458 idx_T arridx; |
5251 int len; | 5459 int len; |
5252 char_u *p; | 5460 char_u *p; |
5253 fromto_T *ftp; | 5461 fromto_T *ftp; |
5254 int fl = 0, tl; | 5462 int fl = 0, tl; |
5255 | 5463 int repextra = 0; /* extra bytes in fword[] from REP item */ |
5256 /* get caps flags for bad word */ | |
5257 badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen); | |
5258 | 5464 |
5259 /* We make a copy of the case-folded bad word, so that we can modify it | 5465 /* We make a copy of the case-folded bad word, so that we can modify it |
5260 * to find matches (esp. REP items). */ | 5466 * to find matches (esp. REP items). Append some more text, changing |
5467 * chars after the bad word may help. */ | |
5261 STRCPY(fword, su->su_fbadword); | 5468 STRCPY(fword, su->su_fbadword); |
5262 | 5469 n = STRLEN(fword); |
5470 p = su->su_badptr + su->su_badlen; | |
5471 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); | |
5263 | 5472 |
5264 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); | 5473 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); |
5265 lp->lp_slang != NULL; ++lp) | 5474 lp->lp_slang != NULL; ++lp) |
5266 { | 5475 { |
5267 /* | 5476 /* |
5304 */ | 5513 */ |
5305 arridx = sp->ts_arridx; /* current node in the tree */ | 5514 arridx = sp->ts_arridx; /* current node in the tree */ |
5306 len = byts[arridx]; /* bytes in this node */ | 5515 len = byts[arridx]; /* bytes in this node */ |
5307 arridx += sp->ts_curi; /* index of current byte */ | 5516 arridx += sp->ts_curi; /* index of current byte */ |
5308 | 5517 |
5309 if (sp->ts_curi > len || (c = byts[arridx]) != 0) | 5518 if (sp->ts_curi > len || byts[arridx] != 0) |
5310 { | 5519 { |
5311 /* Past bytes in node and/or past NUL bytes. */ | 5520 /* Past bytes in node and/or past NUL bytes. */ |
5312 sp->ts_state = STATE_ENDNUL; | 5521 sp->ts_state = STATE_ENDNUL; |
5313 break; | 5522 break; |
5314 } | 5523 } |
5328 if (flags & WF_KEEPCAP) | 5537 if (flags & WF_KEEPCAP) |
5329 /* Must find the word in the keep-case tree. */ | 5538 /* Must find the word in the keep-case tree. */ |
5330 find_keepcap_word(lp->lp_slang, tword + splitoff, | 5539 find_keepcap_word(lp->lp_slang, tword + splitoff, |
5331 preword + prewordlen); | 5540 preword + prewordlen); |
5332 else | 5541 else |
5542 { | |
5333 /* Include badflags: if the badword is onecap or allcap | 5543 /* Include badflags: if the badword is onecap or allcap |
5334 * use that for the goodword too. */ | 5544 * use that for the goodword too. But if the badword is |
5545 * allcap and it's only one char long use onecap. */ | |
5546 c = su->su_badflags; | |
5547 if ((c & WF_ALLCAP) | |
5548 #ifdef FEAT_MBYTE | |
5549 && su->su_badlen == mb_ptr2len_check(su->su_badptr) | |
5550 #else | |
5551 && su->su_badlen == 1 | |
5552 #endif | |
5553 ) | |
5554 c = WF_ONECAP; | |
5335 make_case_word(tword + splitoff, | 5555 make_case_word(tword + splitoff, |
5336 preword + prewordlen, flags | badflags); | 5556 preword + prewordlen, flags | c); |
5557 } | |
5337 | 5558 |
5338 /* Don't use a banned word. It may appear again as a good | 5559 /* Don't use a banned word. It may appear again as a good |
5339 * word, thus remember it. */ | 5560 * word, thus remember it. */ |
5340 if (flags & WF_BANNED) | 5561 if (flags & WF_BANNED) |
5341 { | 5562 { |
5350 && (((unsigned)flags >> 8) & lp->lp_region) == 0) | 5571 && (((unsigned)flags >> 8) & lp->lp_region) == 0) |
5351 newscore += SCORE_REGION; | 5572 newscore += SCORE_REGION; |
5352 if (flags & WF_RARE) | 5573 if (flags & WF_RARE) |
5353 newscore += SCORE_RARE; | 5574 newscore += SCORE_RARE; |
5354 | 5575 |
5355 if (!spell_valid_case(badflags, | 5576 if (!spell_valid_case(su->su_badflags, |
5356 captype(preword + prewordlen, NULL))) | 5577 captype(preword + prewordlen, NULL))) |
5357 newscore += SCORE_ICASE; | 5578 newscore += SCORE_ICASE; |
5358 | 5579 |
5359 if (fword[sp->ts_fidx] == 0) | 5580 if ((fword[sp->ts_fidx] == NUL |
5581 || !SPELL_ISWORDP(fword + sp->ts_fidx)) | |
5582 && sp->ts_fidx >= sp->ts_fidxtry) | |
5360 { | 5583 { |
5361 /* The badword also ends: add suggestions, */ | 5584 /* The badword also ends: add suggestions, */ |
5362 add_suggestion(su, &su->su_ga, preword, | 5585 add_suggestion(su, &su->su_ga, preword, |
5586 sp->ts_fidx - repextra, | |
5363 sp->ts_score + newscore, FALSE); | 5587 sp->ts_score + newscore, FALSE); |
5364 } | 5588 } |
5365 else if (sp->ts_fidx >= sp->ts_fidxtry | 5589 else if (sp->ts_fidx >= sp->ts_fidxtry |
5366 #ifdef FEAT_MBYTE | 5590 #ifdef FEAT_MBYTE |
5367 /* Don't split halfway a character. */ | 5591 /* Don't split halfway a character. */ |
5374 * words starts at fword[sp->ts_fidx]. */ | 5598 * words starts at fword[sp->ts_fidx]. */ |
5375 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT)) | 5599 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT)) |
5376 { | 5600 { |
5377 /* Save things to be restored at STATE_SPLITUNDO. */ | 5601 /* Save things to be restored at STATE_SPLITUNDO. */ |
5378 sp->ts_save_prewordlen = prewordlen; | 5602 sp->ts_save_prewordlen = prewordlen; |
5379 sp->ts_save_badflags = badflags; | 5603 sp->ts_save_badflags = su->su_badflags; |
5380 sp->ts_save_splitoff = splitoff; | 5604 sp->ts_save_splitoff = splitoff; |
5381 | 5605 |
5382 /* Append a space to preword. */ | 5606 /* Append a space to preword. */ |
5383 STRCAT(preword, " "); | 5607 STRCAT(preword, " "); |
5384 prewordlen = STRLEN(preword); | 5608 prewordlen = STRLEN(preword); |
5398 --i; | 5622 --i; |
5399 } | 5623 } |
5400 else | 5624 else |
5401 #endif | 5625 #endif |
5402 p = su->su_badptr + sp->ts_fidx; | 5626 p = su->su_badptr + sp->ts_fidx; |
5403 badflags = captype(p, su->su_badptr + su->su_badlen); | 5627 su->su_badflags = captype(p, su->su_badptr |
5628 + su->su_badlen); | |
5404 | 5629 |
5405 sp->ts_state = STATE_SPLITUNDO; | 5630 sp->ts_state = STATE_SPLITUNDO; |
5406 ++depth; | 5631 ++depth; |
5407 /* Restart at top of the tree. */ | 5632 /* Restart at top of the tree. */ |
5408 stack[depth].ts_arridx = 0; | 5633 stack[depth].ts_arridx = 0; |
5409 } | 5634 } |
5410 } | 5635 } |
5411 break; | 5636 break; |
5412 | 5637 |
5413 case STATE_SPLITUNDO: | 5638 case STATE_SPLITUNDO: |
5414 /* Fixup the changes done for word split. */ | 5639 /* Undo the changes done for word split. */ |
5415 badflags = sp->ts_save_badflags; | 5640 su->su_badflags = sp->ts_save_badflags; |
5416 splitoff = sp->ts_save_splitoff; | 5641 splitoff = sp->ts_save_splitoff; |
5417 prewordlen = sp->ts_save_prewordlen; | 5642 prewordlen = sp->ts_save_prewordlen; |
5418 | 5643 |
5419 /* Continue looking for NUL bytes. */ | 5644 /* Continue looking for NUL bytes. */ |
5420 sp->ts_state = STATE_START; | 5645 sp->ts_state = STATE_START; |
5421 break; | 5646 break; |
5422 | 5647 |
5423 case STATE_ENDNUL: | 5648 case STATE_ENDNUL: |
5424 /* Past the NUL bytes in the node. */ | 5649 /* Past the NUL bytes in the node. */ |
5425 if (fword[sp->ts_fidx] == 0) | 5650 if (fword[sp->ts_fidx] == NUL) |
5426 { | 5651 { |
5427 /* The badword ends, can't use the bytes in this node. */ | 5652 /* The badword ends, can't use the bytes in this node. */ |
5428 sp->ts_state = STATE_DEL; | 5653 sp->ts_state = STATE_DEL; |
5429 break; | 5654 break; |
5430 } | 5655 } |
5754 { | 5979 { |
5755 c = *p; | 5980 c = *p; |
5756 *p = p[2]; | 5981 *p = p[2]; |
5757 p[2] = c; | 5982 p[2] = c; |
5758 } | 5983 } |
5759 /*FALLTHROUGH*/ | 5984 |
5760 | |
5761 case STATE_ROT3L: | |
5762 /* Rotate three characters left: "123" -> "231". We change | 5985 /* Rotate three characters left: "123" -> "231". We change |
5763 * "fword" here, it's changed back afterwards. */ | 5986 * "fword" here, it's changed back afterwards. */ |
5764 if (try_deeper(su, stack, depth, SCORE_SWAP3)) | 5987 if (try_deeper(su, stack, depth, SCORE_SWAP3)) |
5765 { | 5988 { |
5766 sp->ts_state = STATE_UNROT3L; | 5989 sp->ts_state = STATE_UNROT3L; |
5790 else | 6013 else |
5791 sp->ts_state = STATE_REP_INI; | 6014 sp->ts_state = STATE_REP_INI; |
5792 break; | 6015 break; |
5793 | 6016 |
5794 case STATE_UNROT3L: | 6017 case STATE_UNROT3L: |
5795 /* Undo STATE_ROT3L: "231" -> "123" */ | 6018 /* Undo ROT3L: "231" -> "123" */ |
5796 p = fword + sp->ts_fidx; | 6019 p = fword + sp->ts_fidx; |
5797 #ifdef FEAT_MBYTE | 6020 #ifdef FEAT_MBYTE |
5798 if (has_mbyte) | 6021 if (has_mbyte) |
5799 { | 6022 { |
5800 n = MB_BYTE2LEN(*p); | 6023 n = MB_BYTE2LEN(*p); |
5810 c = p[2]; | 6033 c = p[2]; |
5811 p[2] = p[1]; | 6034 p[2] = p[1]; |
5812 p[1] = *p; | 6035 p[1] = *p; |
5813 *p = c; | 6036 *p = c; |
5814 } | 6037 } |
5815 /*FALLTHROUGH*/ | 6038 |
5816 | |
5817 case STATE_ROT3R: | |
5818 /* Rotate three bytes right: "123" -> "312". We change | 6039 /* Rotate three bytes right: "123" -> "312". We change |
5819 * "fword" here, it's changed back afterwards. */ | 6040 * "fword" here, it's changed back afterwards. */ |
5820 if (try_deeper(su, stack, depth, SCORE_SWAP3)) | 6041 if (try_deeper(su, stack, depth, SCORE_SWAP3)) |
5821 { | 6042 { |
5822 sp->ts_state = STATE_UNROT3R; | 6043 sp->ts_state = STATE_UNROT3R; |
5846 else | 6067 else |
5847 sp->ts_state = STATE_REP_INI; | 6068 sp->ts_state = STATE_REP_INI; |
5848 break; | 6069 break; |
5849 | 6070 |
5850 case STATE_UNROT3R: | 6071 case STATE_UNROT3R: |
5851 /* Undo STATE_ROT3R: "312" -> "123" */ | 6072 /* Undo ROT3R: "312" -> "123" */ |
5852 p = fword + sp->ts_fidx; | 6073 p = fword + sp->ts_fidx; |
5853 #ifdef FEAT_MBYTE | 6074 #ifdef FEAT_MBYTE |
5854 if (has_mbyte) | 6075 if (has_mbyte) |
5855 { | 6076 { |
5856 c = mb_ptr2char(p); | 6077 c = mb_ptr2char(p); |
5919 /* Change the "from" to the "to" string. */ | 6140 /* Change the "from" to the "to" string. */ |
5920 ++depth; | 6141 ++depth; |
5921 fl = STRLEN(ftp->ft_from); | 6142 fl = STRLEN(ftp->ft_from); |
5922 tl = STRLEN(ftp->ft_to); | 6143 tl = STRLEN(ftp->ft_to); |
5923 if (fl != tl) | 6144 if (fl != tl) |
6145 { | |
5924 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); | 6146 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); |
6147 repextra += tl - fl; | |
6148 } | |
5925 mch_memmove(p, ftp->ft_to, tl); | 6149 mch_memmove(p, ftp->ft_to, tl); |
5926 stack[depth].ts_fidxtry = sp->ts_fidx + tl; | 6150 stack[depth].ts_fidxtry = sp->ts_fidx + tl; |
5927 #ifdef FEAT_MBYTE | 6151 #ifdef FEAT_MBYTE |
5928 stack[depth].ts_tcharlen = 0; | 6152 stack[depth].ts_tcharlen = 0; |
5929 #endif | 6153 #endif |
5943 + sp->ts_curi - 1; | 6167 + sp->ts_curi - 1; |
5944 fl = STRLEN(ftp->ft_from); | 6168 fl = STRLEN(ftp->ft_from); |
5945 tl = STRLEN(ftp->ft_to); | 6169 tl = STRLEN(ftp->ft_to); |
5946 p = fword + sp->ts_fidx; | 6170 p = fword + sp->ts_fidx; |
5947 if (fl != tl) | 6171 if (fl != tl) |
6172 { | |
5948 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); | 6173 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); |
6174 repextra -= tl - fl; | |
6175 } | |
5949 mch_memmove(p, ftp->ft_from, fl); | 6176 mch_memmove(p, ftp->ft_from, fl); |
5950 sp->ts_state = STATE_REP; | 6177 sp->ts_state = STATE_REP; |
5951 break; | 6178 break; |
5952 | 6179 |
5953 default: | 6180 default: |
6215 garray_T *gap; | 6442 garray_T *gap; |
6216 langp_T *lp; | 6443 langp_T *lp; |
6217 suggest_T *stp; | 6444 suggest_T *stp; |
6218 char_u *p; | 6445 char_u *p; |
6219 char_u badsound[MAXWLEN]; | 6446 char_u badsound[MAXWLEN]; |
6447 char_u badsound2[MAXWLEN]; | |
6220 char_u goodsound[MAXWLEN]; | 6448 char_u goodsound[MAXWLEN]; |
6221 char_u fword[MAXWLEN]; | 6449 char_u fword[MAXWLEN]; |
6222 int round; | 6450 int round; |
6223 | 6451 |
6224 /* Add the alternate score to su_ga. */ | 6452 /* Add the alternate score to su_ga. */ |
6232 | 6460 |
6233 for (i = 0; i < su->su_ga.ga_len; ++i) | 6461 for (i = 0; i < su->su_ga.ga_len; ++i) |
6234 { | 6462 { |
6235 stp = &SUG(su->su_ga, i); | 6463 stp = &SUG(su->su_ga, i); |
6236 | 6464 |
6465 if (stp->st_orglen <= su->su_badlen) | |
6466 p = badsound; | |
6467 else | |
6468 { | |
6469 /* soundfold the bad word with a different length */ | |
6470 (void)spell_casefold(su->su_badptr, stp->st_orglen, | |
6471 fword, MAXWLEN); | |
6472 spell_soundfold(lp->lp_slang, fword, badsound2); | |
6473 p = badsound2; | |
6474 } | |
6475 | |
6237 /* Case-fold the word, sound-fold the word and compute the | 6476 /* Case-fold the word, sound-fold the word and compute the |
6238 * score for the difference. */ | 6477 * score for the difference. */ |
6239 (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), | 6478 (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), |
6240 fword, MAXWLEN); | 6479 fword, MAXWLEN); |
6241 spell_soundfold(lp->lp_slang, fword, goodsound); | 6480 spell_soundfold(lp->lp_slang, fword, goodsound); |
6242 stp->st_altscore = soundalike_score(goodsound, badsound); | 6481 |
6482 stp->st_altscore = soundalike_score(goodsound, p); | |
6243 if (stp->st_altscore == SCORE_MAXMAX) | 6483 if (stp->st_altscore == SCORE_MAXMAX) |
6244 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; | 6484 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; |
6245 else | 6485 else |
6246 stp->st_score = (stp->st_score * 3 | 6486 stp->st_score = (stp->st_score * 3 |
6247 + stp->st_altscore) / 4; | 6487 + stp->st_altscore) / 4; |
6310 | 6550 |
6311 /* | 6551 /* |
6312 * Find suggestions by comparing the word in a sound-a-like form. | 6552 * Find suggestions by comparing the word in a sound-a-like form. |
6313 */ | 6553 */ |
6314 static void | 6554 static void |
6315 spell_try_soundalike(su) | 6555 suggest_try_soundalike(su) |
6316 suginfo_T *su; | 6556 suginfo_T *su; |
6317 { | 6557 { |
6318 char_u salword[MAXWLEN]; | 6558 char_u salword[MAXWLEN]; |
6319 char_u tword[MAXWLEN]; | 6559 char_u tword[MAXWLEN]; |
6320 char_u tfword[MAXWLEN]; | 6560 char_u tfword[MAXWLEN]; |
6412 else | 6652 else |
6413 p = tword; | 6653 p = tword; |
6414 | 6654 |
6415 if (sps_flags & SPS_DOUBLE) | 6655 if (sps_flags & SPS_DOUBLE) |
6416 add_suggestion(su, &su->su_sga, p, | 6656 add_suggestion(su, &su->su_sga, p, |
6657 su->su_badlen, | |
6417 sound_score, FALSE); | 6658 sound_score, FALSE); |
6418 else | 6659 else |
6419 { | 6660 { |
6420 /* Compute the score. */ | 6661 /* Compute the score. */ |
6421 score = spell_edit_score( | 6662 score = spell_edit_score( |
6423 if (sps_flags & SPS_BEST) | 6664 if (sps_flags & SPS_BEST) |
6424 /* give a bonus for the good word | 6665 /* give a bonus for the good word |
6425 * sounding the same as the bad | 6666 * sounding the same as the bad |
6426 * word */ | 6667 * word */ |
6427 add_suggestion(su, &su->su_ga, p, | 6668 add_suggestion(su, &su->su_ga, p, |
6669 su->su_badlen, | |
6428 RESCORE(score, sound_score), | 6670 RESCORE(score, sound_score), |
6429 TRUE); | 6671 TRUE); |
6430 else | 6672 else |
6431 add_suggestion(su, &su->su_ga, p, | 6673 add_suggestion(su, &su->su_ga, p, |
6674 su->su_badlen, | |
6432 score + sound_score, FALSE); | 6675 score + sound_score, FALSE); |
6433 } | 6676 } |
6434 } | 6677 } |
6435 } | 6678 } |
6436 | 6679 |
6614 * Do not add a duplicate suggestion or suggestions with a bad score. | 6857 * Do not add a duplicate suggestion or suggestions with a bad score. |
6615 * When "use_score" is not zero it's used, otherwise the score is computed | 6858 * When "use_score" is not zero it's used, otherwise the score is computed |
6616 * with spell_edit_score(). | 6859 * with spell_edit_score(). |
6617 */ | 6860 */ |
6618 static void | 6861 static void |
6619 add_suggestion(su, gap, goodword, score, had_bonus) | 6862 add_suggestion(su, gap, goodword, badlen, score, had_bonus) |
6620 suginfo_T *su; | 6863 suginfo_T *su; |
6621 garray_T *gap; | 6864 garray_T *gap; |
6622 char_u *goodword; | 6865 char_u *goodword; |
6866 int badlen; /* length of bad word used */ | |
6623 int score; | 6867 int score; |
6624 int had_bonus; /* value for st_had_bonus */ | 6868 int had_bonus; /* value for st_had_bonus */ |
6625 { | 6869 { |
6626 suggest_T *stp; | 6870 suggest_T *stp; |
6627 int i; | 6871 int i; |
6872 char_u *p = NULL; | |
6873 int c = 0; | |
6628 | 6874 |
6629 /* Check that the word wasn't banned. */ | 6875 /* Check that the word wasn't banned. */ |
6630 if (was_banned(su, goodword)) | 6876 if (was_banned(su, goodword)) |
6631 return; | 6877 return; |
6878 | |
6879 /* If past "su_badlen" and the rest is identical stop at "su_badlen". | |
6880 * Remove the common part from "goodword". */ | |
6881 i = badlen - su->su_badlen; | |
6882 if (i > 0) | |
6883 { | |
6884 /* This assumes there was no case folding or it didn't change the | |
6885 * length... */ | |
6886 p = goodword + STRLEN(goodword) - i; | |
6887 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0) | |
6888 { | |
6889 badlen = su->su_badlen; | |
6890 c = *p; | |
6891 *p = NUL; | |
6892 } | |
6893 else | |
6894 p = NULL; | |
6895 } | |
6632 | 6896 |
6633 if (score <= su->su_maxscore) | 6897 if (score <= su->su_maxscore) |
6634 { | 6898 { |
6635 /* Check if the word is already there. */ | 6899 /* Check if the word is already there. */ |
6636 stp = &SUG(*gap, 0); | 6900 stp = &SUG(*gap, 0); |
6654 if (stp->st_word != NULL) | 6918 if (stp->st_word != NULL) |
6655 { | 6919 { |
6656 stp->st_score = score; | 6920 stp->st_score = score; |
6657 stp->st_altscore = 0; | 6921 stp->st_altscore = 0; |
6658 stp->st_had_bonus = had_bonus; | 6922 stp->st_had_bonus = had_bonus; |
6659 stp->st_orglen = su->su_badlen; | 6923 stp->st_orglen = badlen; |
6660 ++gap->ga_len; | 6924 ++gap->ga_len; |
6661 | 6925 |
6662 /* If we have too many suggestions now, sort the list and keep | 6926 /* If we have too many suggestions now, sort the list and keep |
6663 * the best suggestions. */ | 6927 * the best suggestions. */ |
6664 if (gap->ga_len > SUG_MAX_COUNT(su)) | 6928 if (gap->ga_len > SUG_MAX_COUNT(su)) |
6665 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, | 6929 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, |
6666 SUG_CLEAN_COUNT(su)); | 6930 SUG_CLEAN_COUNT(su)); |
6667 } | 6931 } |
6668 } | 6932 } |
6669 } | 6933 } |
6934 | |
6935 if (p != NULL) | |
6936 *p = c; /* restore "goodword" */ | |
6670 } | 6937 } |
6671 | 6938 |
6672 /* | 6939 /* |
6673 * Add a word to be banned. | 6940 * Add a word to be banned. |
6674 */ | 6941 */ |
6734 suginfo_T *su; | 7001 suginfo_T *su; |
6735 { | 7002 { |
6736 langp_T *lp; | 7003 langp_T *lp; |
6737 suggest_T *stp; | 7004 suggest_T *stp; |
6738 char_u sal_badword[MAXWLEN]; | 7005 char_u sal_badword[MAXWLEN]; |
7006 char_u tword[MAXWLEN]; | |
7007 char_u salword[MAXWLEN]; | |
7008 char_u *p; | |
6739 int score; | 7009 int score; |
6740 int i; | 7010 int i; |
6741 | 7011 |
6742 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); | 7012 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); |
6743 lp->lp_slang != NULL; ++lp) | 7013 lp->lp_slang != NULL; ++lp) |
6750 for (i = 0; i < su->su_ga.ga_len; ++i) | 7020 for (i = 0; i < su->su_ga.ga_len; ++i) |
6751 { | 7021 { |
6752 stp = &SUG(su->su_ga, i); | 7022 stp = &SUG(su->su_ga, i); |
6753 if (!stp->st_had_bonus) | 7023 if (!stp->st_had_bonus) |
6754 { | 7024 { |
6755 score = spell_sound_score(lp->lp_slang, stp->st_word, | 7025 if (stp->st_orglen <= su->su_badlen) |
6756 sal_badword); | 7026 p = sal_badword; |
7027 else | |
7028 { | |
7029 /* soundfold the bad word with a different length */ | |
7030 (void)spell_casefold(su->su_badptr, stp->st_orglen, | |
7031 tword, MAXWLEN); | |
7032 spell_soundfold(lp->lp_slang, tword, salword); | |
7033 p = salword; | |
7034 } | |
7035 score = spell_sound_score(lp->lp_slang, stp->st_word, p); | |
6757 stp->st_score = RESCORE(stp->st_score, score); | 7036 stp->st_score = RESCORE(stp->st_score, score); |
6758 } | 7037 } |
6759 } | 7038 } |
6760 break; | 7039 break; |
6761 } | 7040 } |