comparison src/spell.c @ 346:8ed2a5098a31

updated for version 7.0090
author vimboss
date Wed, 22 Jun 2005 22:26:26 +0000
parents 7033303ea0c0
children a89aebda7f37
comparison
equal deleted inserted replaced
345:b3989ac62a21 346:8ed2a5098a31
355 int su_maxcount; /* max. number of suggestions displayed */ 355 int su_maxcount; /* max. number of suggestions displayed */
356 int su_maxscore; /* maximum score for adding to su_ga */ 356 int su_maxscore; /* maximum score for adding to su_ga */
357 garray_T su_sga; /* like su_ga, sound-folded scoring */ 357 garray_T su_sga; /* like su_ga, sound-folded scoring */
358 char_u *su_badptr; /* start of bad word in line */ 358 char_u *su_badptr; /* start of bad word in line */
359 int su_badlen; /* length of detected bad word in line */ 359 int su_badlen; /* length of detected bad word in line */
360 int su_badflags; /* caps flags for bad word */
360 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ 361 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
361 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ 362 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
362 hashtab_T su_banned; /* table with banned words */ 363 hashtab_T su_banned; /* table with banned words */
363 } suginfo_T; 364 } suginfo_T;
364 365
482 STATE_INS, /* Insert a byte in the bad word. */ 483 STATE_INS, /* Insert a byte in the bad word. */
483 STATE_SWAP, /* Swap two bytes. */ 484 STATE_SWAP, /* Swap two bytes. */
484 STATE_UNSWAP, /* Undo swap two characters. */ 485 STATE_UNSWAP, /* Undo swap two characters. */
485 STATE_SWAP3, /* Swap two characters over three. */ 486 STATE_SWAP3, /* Swap two characters over three. */
486 STATE_UNSWAP3, /* Undo Swap two characters over three. */ 487 STATE_UNSWAP3, /* Undo Swap two characters over three. */
487 STATE_ROT3L, /* Rotate three characters left */
488 STATE_UNROT3L, /* Undo rotate three characters left */ 488 STATE_UNROT3L, /* Undo rotate three characters left */
489 STATE_ROT3R, /* Rotate three characters right */
490 STATE_UNROT3R, /* Undo rotate three characters right */ 489 STATE_UNROT3R, /* Undo rotate three characters right */
491 STATE_REP_INI, /* Prepare for using REP items. */ 490 STATE_REP_INI, /* Prepare for using REP items. */
492 STATE_REP, /* Use matching REP items from the .aff file. */ 491 STATE_REP, /* Use matching REP items from the .aff file. */
493 STATE_REP_UNDO, /* Undo a REP item replacement. */ 492 STATE_REP_UNDO, /* Undo a REP item replacement. */
494 STATE_FINAL /* End of this node. */ 493 STATE_FINAL /* End of this node. */
495 } state_T; 494 } state_T;
496 495
497 /* 496 /*
498 * Struct to keep the state at each level in spell_try_change(). 497 * Struct to keep the state at each level in suggest_try_change().
499 */ 498 */
500 typedef struct trystate_S 499 typedef struct trystate_S
501 { 500 {
502 state_T ts_state; /* state at this level, STATE_ */ 501 state_T ts_state; /* state at this level, STATE_ */
503 int ts_score; /* score */ 502 int ts_score; /* score */
512 char_u ts_isdiff; /* DIFF_ values */ 511 char_u ts_isdiff; /* DIFF_ values */
513 char_u ts_fcharstart; /* index in fword where badword char started */ 512 char_u ts_fcharstart; /* index in fword where badword char started */
514 #endif 513 #endif
515 char_u ts_save_prewordlen; /* saved "prewordlen" */ 514 char_u ts_save_prewordlen; /* saved "prewordlen" */
516 char_u ts_save_splitoff; /* su_splitoff saved here */ 515 char_u ts_save_splitoff; /* su_splitoff saved here */
517 char_u ts_save_badflags; /* badflags saved here */ 516 char_u ts_save_badflags; /* su_badflags saved here */
518 } trystate_T; 517 } trystate_T;
519 518
520 /* values for ts_isdiff */ 519 /* values for ts_isdiff */
521 #define DIFF_NONE 0 /* no different byte (yet) */ 520 #define DIFF_NONE 0 /* no different byte (yet) */
522 #define DIFF_YES 1 /* different byte found */ 521 #define DIFF_YES 1 /* different byte found */
548 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); 547 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
549 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount)); 548 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount));
550 static void spell_find_cleanup __ARGS((suginfo_T *su)); 549 static void spell_find_cleanup __ARGS((suginfo_T *su));
551 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); 550 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
552 static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); 551 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
553 static void spell_try_change __ARGS((suginfo_T *su)); 552 static void suggest_try_special __ARGS((suginfo_T *su));
553 static void suggest_try_change __ARGS((suginfo_T *su));
554 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); 554 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
555 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); 555 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
556 static void score_comp_sal __ARGS((suginfo_T *su)); 556 static void score_comp_sal __ARGS((suginfo_T *su));
557 static void score_combine __ARGS((suginfo_T *su)); 557 static void score_combine __ARGS((suginfo_T *su));
558 static void spell_try_soundalike __ARGS((suginfo_T *su)); 558 static void suggest_try_soundalike __ARGS((suginfo_T *su));
559 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); 559 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
560 static void set_map_str __ARGS((slang_T *lp, char_u *map)); 560 static void set_map_str __ARGS((slang_T *lp, char_u *map));
561 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); 561 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
562 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int use_score, int had_bonus)); 562 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int use_score, int had_bonus));
563 static void add_banned __ARGS((suginfo_T *su, char_u *word)); 563 static void add_banned __ARGS((suginfo_T *su, char_u *word));
564 static int was_banned __ARGS((suginfo_T *su, char_u *word)); 564 static int was_banned __ARGS((suginfo_T *su, char_u *word));
565 static void free_banned __ARGS((suginfo_T *su)); 565 static void free_banned __ARGS((suginfo_T *su));
566 static void rescore_suggestions __ARGS((suginfo_T *su)); 566 static void rescore_suggestions __ARGS((suginfo_T *su));
567 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); 567 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
639 * then, skipping over the character. */ 639 * then, skipping over the character. */
640 if (*ptr <= ' ') 640 if (*ptr <= ' ')
641 return 1; 641 return 1;
642 642
643 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and 643 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
644 * 0X99FF. But when a word character follows do check spelling. */ 644 * 0X99FF. But when a word character follows do check spelling to find
645 * "3GPP". */
645 if (*ptr >= '0' && *ptr <= '9') 646 if (*ptr >= '0' && *ptr <= '9')
646 { 647 {
647 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) 648 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
648 mi.mi_end = skiphex(ptr + 2); 649 mi.mi_end = skiphex(ptr + 2);
649 else 650 else
651 mi.mi_end = skipdigits(ptr); 652 mi.mi_end = skipdigits(ptr);
652 nrlen = mi.mi_end - ptr; 653 nrlen = mi.mi_end - ptr;
653 } 654 }
654 if (!SPELL_ISWORDP(mi.mi_end)) 655 if (!SPELL_ISWORDP(mi.mi_end))
655 return (int)(mi.mi_end - ptr); 656 return (int)(mi.mi_end - ptr);
656 } 657
657 658 /* Try including the digits in the word. */
658 /* Find the end of the word. */ 659 mi.mi_fend = ptr + nrlen;
660 }
661 else
662 mi.mi_fend = ptr;
663
664 /* Find the normal end of the word (until the next non-word character). */
659 mi.mi_word = ptr; 665 mi.mi_word = ptr;
660 mi.mi_fend = ptr;
661
662 if (SPELL_ISWORDP(mi.mi_fend)) 666 if (SPELL_ISWORDP(mi.mi_fend))
663 { 667 {
664 /* Make case-folded copy of the characters until the next non-word
665 * character. */
666 do 668 do
667 { 669 {
668 mb_ptr_adv(mi.mi_fend); 670 mb_ptr_adv(mi.mi_fend);
669 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend)); 671 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend));
670 } 672 }
707 find_prefix(&mi); 709 find_prefix(&mi);
708 } 710 }
709 711
710 if (mi.mi_result != SP_OK) 712 if (mi.mi_result != SP_OK)
711 { 713 {
712 /* If we found a number skip over it. Allows for "42nd". */ 714 /* If we found a number skip over it. Allows for "42nd". Do flag
715 * rare and local words, e.g., "3GPP". */
713 if (nrlen > 0) 716 if (nrlen > 0)
714 return nrlen; 717 {
718 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
719 return nrlen;
720 }
715 721
716 /* When we are at a non-word character there is no error, just 722 /* When we are at a non-word character there is no error, just
717 * skip over the character (try looking for a word after it). */ 723 * skip over the character (try looking for a word after it). */
718 if (!SPELL_ISWORDP(ptr)) 724 else if (!SPELL_ISWORDP(ptr))
719 { 725 {
720 #ifdef FEAT_MBYTE 726 #ifdef FEAT_MBYTE
721 if (has_mbyte) 727 if (has_mbyte)
722 return mb_ptr2len_check(ptr); 728 return mb_ptr2len_check(ptr);
723 #endif 729 #endif
808 * - we reach the end of the tree, 814 * - we reach the end of the tree,
809 * - or we reach the end of the line. 815 * - or we reach the end of the line.
810 */ 816 */
811 for (;;) 817 for (;;)
812 { 818 {
813 if (flen == 0 && *mip->mi_fend != NUL) 819 if (flen <= 0 && *mip->mi_fend != NUL)
814 flen = fold_more(mip); 820 flen = fold_more(mip);
815 821
816 len = byts[arridx++]; 822 len = byts[arridx++];
817 823
818 /* If the first possible byte is a zero the word could end here. 824 /* If the first possible byte is a zero the word could end here.
844 if (ptr[wlen] == NUL) 850 if (ptr[wlen] == NUL)
845 break; 851 break;
846 852
847 /* Perform a binary search in the list of accepted bytes. */ 853 /* Perform a binary search in the list of accepted bytes. */
848 c = ptr[wlen]; 854 c = ptr[wlen];
855 if (c == TAB) /* <Tab> is handled like <Space> */
856 c = ' ';
849 lo = arridx; 857 lo = arridx;
850 hi = arridx + len - 1; 858 hi = arridx + len - 1;
851 while (lo < hi) 859 while (lo < hi)
852 { 860 {
853 m = (lo + hi) / 2; 861 m = (lo + hi) / 2;
868 876
869 /* Continue at the child (if there is one). */ 877 /* Continue at the child (if there is one). */
870 arridx = idxs[lo]; 878 arridx = idxs[lo];
871 ++wlen; 879 ++wlen;
872 --flen; 880 --flen;
881
882 /* One space in the good word may stand for several spaces in the
883 * checked word. */
884 if (c == ' ')
885 {
886 for (;;)
887 {
888 if (flen <= 0 && *mip->mi_fend != NUL)
889 flen = fold_more(mip);
890 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
891 break;
892 ++wlen;
893 --flen;
894 }
895 }
873 } 896 }
874 897
875 /* 898 /*
876 * Verify that one of the possible endings is valid. Try the longest 899 * Verify that one of the possible endings is valid. Try the longest
877 * first. 900 * first.
927 * to do it again. */ 950 * to do it again. */
928 mip->mi_cend = mip->mi_word + wlen; 951 mip->mi_cend = mip->mi_word + wlen;
929 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); 952 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
930 } 953 }
931 954
932 if (!spell_valid_case(mip->mi_capflags, flags)) 955 if (mip->mi_capflags == WF_KEEPCAP
956 || !spell_valid_case(mip->mi_capflags, flags))
933 continue; 957 continue;
934 } 958 }
935 959
936 /* When mode is FIND_PREFIX the word must support the prefix: 960 /* When mode is FIND_PREFIX the word must support the prefix:
937 * check the prefix ID and the condition. Do that for the list at 961 * check the prefix ID and the condition. Do that for the list at
1168 { 1192 {
1169 linenr_T lnum; 1193 linenr_T lnum;
1170 pos_T found_pos; 1194 pos_T found_pos;
1171 char_u *line; 1195 char_u *line;
1172 char_u *p; 1196 char_u *p;
1173 int attr = 0; 1197 char_u *endp;
1198 int attr;
1174 int len; 1199 int len;
1175 int has_syntax = syntax_present(curbuf); 1200 int has_syntax = syntax_present(curbuf);
1176 int col; 1201 int col;
1177 int can_spell; 1202 int can_spell;
1203 char_u *buf = NULL;
1204 int buflen = 0;
1205 int skip = 0;
1178 1206
1179 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL) 1207 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
1180 { 1208 {
1181 EMSG(_("E756: Spell checking not enabled")); 1209 EMSG(_("E756: Spell checking not enabled"));
1182 return FAIL; 1210 return FAIL;
1183 } 1211 }
1184 1212
1185 /* 1213 /*
1186 * Start looking for bad word at the start of the line, because we can't 1214 * Start looking for bad word at the start of the line, because we can't
1187 * start halfway a word, we don't know where it starts or ends. 1215 * start halfway a word, we don't know where the it starts or ends.
1188 * 1216 *
1189 * When searching backwards, we continue in the line to find the last 1217 * When searching backwards, we continue in the line to find the last
1190 * bad word (in the cursor line: before the cursor). 1218 * bad word (in the cursor line: before the cursor).
1219 *
1220 * We concatenate the start of the next line, so that wrapped words work
1221 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
1222 * though...
1191 */ 1223 */
1192 lnum = curwin->w_cursor.lnum; 1224 lnum = curwin->w_cursor.lnum;
1193 found_pos.lnum = 0; 1225 found_pos.lnum = 0;
1194 1226
1195 while (!got_int) 1227 while (!got_int)
1196 { 1228 {
1197 line = ml_get(lnum); 1229 line = ml_get(lnum);
1198 p = line; 1230
1199 1231 len = STRLEN(line);
1200 while (*p != NUL) 1232 if (buflen < len + MAXWLEN + 2)
1233 {
1234 vim_free(buf);
1235 buflen = len + MAXWLEN + 2;
1236 buf = alloc(buflen);
1237 if (buf == NULL)
1238 break;
1239 }
1240
1241 /* Copy the line into "buf" and append the start of the next line if
1242 * possible. */
1243 STRCPY(buf, line);
1244 if (lnum < curbuf->b_ml.ml_line_count)
1245 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
1246
1247 p = buf + skip;
1248 endp = buf + len;
1249 while (p < endp)
1201 { 1250 {
1202 /* When searching backward don't search after the cursor. */ 1251 /* When searching backward don't search after the cursor. */
1203 if (dir == BACKWARD 1252 if (dir == BACKWARD
1204 && lnum == curwin->w_cursor.lnum 1253 && lnum == curwin->w_cursor.lnum
1205 && (colnr_T)(p - line) >= curwin->w_cursor.col) 1254 && (colnr_T)(p - buf) >= curwin->w_cursor.col)
1206 break; 1255 break;
1207 1256
1208 /* start of word */ 1257 /* start of word */
1258 attr = 0;
1209 len = spell_check(curwin, p, &attr); 1259 len = spell_check(curwin, p, &attr);
1210 1260
1211 if (attr != 0) 1261 if (attr != 0)
1212 { 1262 {
1213 /* We found a bad word. Check the attribute. */ 1263 /* We found a bad word. Check the attribute. */
1216 /* When searching forward only accept a bad word after 1266 /* When searching forward only accept a bad word after
1217 * the cursor. */ 1267 * the cursor. */
1218 if (dir == BACKWARD 1268 if (dir == BACKWARD
1219 || lnum > curwin->w_cursor.lnum 1269 || lnum > curwin->w_cursor.lnum
1220 || (lnum == curwin->w_cursor.lnum 1270 || (lnum == curwin->w_cursor.lnum
1221 && (colnr_T)(curline ? p - line + len 1271 && (colnr_T)(curline ? p - buf + len
1222 : p - line) 1272 : p - buf)
1223 > curwin->w_cursor.col)) 1273 > curwin->w_cursor.col))
1224 { 1274 {
1225 if (has_syntax) 1275 if (has_syntax)
1226 { 1276 {
1227 col = p - line; 1277 col = p - buf;
1228 (void)syn_get_id(lnum, (colnr_T)col, 1278 (void)syn_get_id(lnum, (colnr_T)col,
1229 FALSE, &can_spell); 1279 FALSE, &can_spell);
1230
1231 /* have to get the line again, a multi-line
1232 * regexp may make it invalid */
1233 line = ml_get(lnum);
1234 p = line + col;
1235 } 1280 }
1236 else 1281 else
1237 can_spell = TRUE; 1282 can_spell = TRUE;
1238 1283
1239 if (can_spell) 1284 if (can_spell)
1240 { 1285 {
1241 found_pos.lnum = lnum; 1286 found_pos.lnum = lnum;
1242 found_pos.col = p - line; 1287 found_pos.col = p - buf;
1243 #ifdef FEAT_VIRTUALEDIT 1288 #ifdef FEAT_VIRTUALEDIT
1244 found_pos.coladd = 0; 1289 found_pos.coladd = 0;
1245 #endif 1290 #endif
1246 if (dir == FORWARD) 1291 if (dir == FORWARD)
1247 { 1292 {
1248 /* No need to search further. */ 1293 /* No need to search further. */
1249 curwin->w_cursor = found_pos; 1294 curwin->w_cursor = found_pos;
1295 vim_free(buf);
1250 return OK; 1296 return OK;
1251 } 1297 }
1252 } 1298 }
1253 } 1299 }
1254 } 1300 }
1255 attr = 0;
1256 } 1301 }
1257 1302
1258 /* advance to character after the word */ 1303 /* advance to character after the word */
1259 p += len; 1304 p += len;
1260 if (*p == NUL)
1261 break;
1262 } 1305 }
1263 1306
1264 if (curline) 1307 if (curline)
1265 return FAIL; /* only check cursor line */ 1308 break; /* only check cursor line */
1266 1309
1267 /* Advance to next line. */ 1310 /* Advance to next line. */
1268 if (dir == BACKWARD) 1311 if (dir == BACKWARD)
1269 { 1312 {
1270 if (found_pos.lnum != 0) 1313 if (found_pos.lnum != 0)
1271 { 1314 {
1272 /* Use the last match in the line. */ 1315 /* Use the last match in the line. */
1273 curwin->w_cursor = found_pos; 1316 curwin->w_cursor = found_pos;
1317 vim_free(buf);
1274 return OK; 1318 return OK;
1275 } 1319 }
1276 if (lnum == 1) 1320 if (lnum == 1)
1277 return FAIL; 1321 break;
1278 --lnum; 1322 --lnum;
1279 } 1323 }
1280 else 1324 else
1281 { 1325 {
1282 if (lnum == curbuf->b_ml.ml_line_count) 1326 if (lnum == curbuf->b_ml.ml_line_count)
1283 return FAIL; 1327 break;
1284 ++lnum; 1328 ++lnum;
1329
1330 /* Skip the characters at the start of the next line that were
1331 * included in a match crossing line boundaries. */
1332 if (attr == 0)
1333 skip = p - endp;
1334 else
1335 skip = 0;
1285 } 1336 }
1286 1337
1287 line_breakcheck(); 1338 line_breakcheck();
1288 } 1339 }
1289 1340
1290 return FAIL; /* interrupted */ 1341 vim_free(buf);
1342 return FAIL;
1343 }
1344
1345 /*
1346 * For spell checking: concatenate the start of the following line "line" into
1347 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
1348 */
1349 void
1350 spell_cat_line(buf, line, maxlen)
1351 char_u *buf;
1352 char_u *line;
1353 int maxlen;
1354 {
1355 char_u *p;
1356 int n;
1357
1358 p = skipwhite(line);
1359 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1360 p = skipwhite(p + 1);
1361
1362 if (*p != NUL)
1363 {
1364 *buf = ' ';
1365 vim_strncpy(buf + 1, line, maxlen - 1);
1366 n = p - line;
1367 if (n >= maxlen)
1368 n = maxlen - 1;
1369 vim_memset(buf + 1, ' ', n);
1370 }
1291 } 1371 }
1292 1372
1293 /* 1373 /*
1294 * Load word list(s) for "lang" from Vim spell file(s). 1374 * Load word list(s) for "lang" from Vim spell file(s).
1295 * "lang" must be the language without the region: e.g., "en". 1375 * "lang" must be the language without the region: e.g., "en".
1872 1952
1873 /* 1953 /*
1874 * Read one row of siblings from the spell file and store it in the byte array 1954 * Read one row of siblings from the spell file and store it in the byte array
1875 * "byts" and index array "idxs". Recursively read the children. 1955 * "byts" and index array "idxs". Recursively read the children.
1876 * 1956 *
1877 * NOTE: The code here must match put_tree(). 1957 * NOTE: The code here must match put_node().
1878 * 1958 *
1879 * Returns the index follosing the siblings. 1959 * Returns the index follosing the siblings.
1880 * Returns -1 if the file is shorter than expected. 1960 * Returns -1 if the file is shorter than expected.
1881 * Returns -2 if there is a format error. 1961 * Returns -2 if there is a format error.
1882 */ 1962 */
2291 typedef struct afffile_S 2371 typedef struct afffile_S
2292 { 2372 {
2293 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ 2373 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
2294 int af_rar; /* RAR ID for rare word */ 2374 int af_rar; /* RAR ID for rare word */
2295 int af_kep; /* KEP ID for keep-case word */ 2375 int af_kep; /* KEP ID for keep-case word */
2376 int af_bad; /* BAD ID for banned word */
2296 int af_pfxpostpone; /* postpone prefixes without chop string */ 2377 int af_pfxpostpone; /* postpone prefixes without chop string */
2297 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ 2378 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
2298 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ 2379 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
2299 } afffile_T; 2380 } afffile_T;
2300 2381
2338 * A node in the tree. 2419 * A node in the tree.
2339 */ 2420 */
2340 typedef struct wordnode_S wordnode_T; 2421 typedef struct wordnode_S wordnode_T;
2341 struct wordnode_S 2422 struct wordnode_S
2342 { 2423 {
2343 char_u wn_hashkey[6]; /* room for the hash key */ 2424 union /* shared to save space */
2344 wordnode_T *wn_next; /* next node with same hash key */ 2425 {
2426 char_u hashkey[6]; /* room for the hash key */
2427 int index; /* index in written nodes (valid after first
2428 round) */
2429 } wn_u1;
2430 union /* shared to save space */
2431 {
2432 wordnode_T *next; /* next node with same hash key */
2433 wordnode_T *wnode; /* parent node that will write this node */
2434 } wn_u2;
2345 wordnode_T *wn_child; /* child (next byte in word) */ 2435 wordnode_T *wn_child; /* child (next byte in word) */
2346 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, 2436 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
2347 always sorted) */ 2437 always sorted) */
2348 wordnode_T *wn_wnode; /* parent node that will write this node */
2349 int wn_index; /* index in written nodes (valid after first
2350 round) */
2351 char_u wn_byte; /* Byte for this node. NUL for word end */ 2438 char_u wn_byte; /* Byte for this node. NUL for word end */
2352 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */ 2439 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
2353 short wn_region; /* when wn_byte is NUL: region mask; for 2440 short wn_region; /* when wn_byte is NUL: region mask; for
2354 PREFIXTREE it's the prefcondnr */ 2441 PREFIXTREE it's the prefcondnr */
2355 char_u wn_prefixID; /* supported/required prefix ID or 0 */ 2442 char_u wn_prefixID; /* supported/required prefix ID or 0 */
2407 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp)); 2494 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp));
2408 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); 2495 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
2409 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); 2496 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
2410 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); 2497 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
2411 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin)); 2498 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin));
2412 static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); 2499 static void clear_node __ARGS((wordnode_T *node));
2500 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
2413 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); 2501 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
2414 static void init_spellfile __ARGS((void)); 2502 static void init_spellfile __ARGS((void));
2415 2503
2416 /* 2504 /*
2417 * Read the affix file "fname". 2505 * Read the affix file "fname".
2573 { 2661 {
2574 aff->af_kep = items[1][0]; 2662 aff->af_kep = items[1][0];
2575 if (items[1][1] != NUL) 2663 if (items[1][1] != NUL)
2576 smsg((char_u *)_(e_affname), fname, lnum, items[1]); 2664 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
2577 } 2665 }
2666 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
2667 && aff->af_bad == 0)
2668 {
2669 aff->af_bad = items[1][0];
2670 if (items[1][1] != NUL)
2671 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
2672 }
2578 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) 2673 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
2579 { 2674 {
2580 aff->af_pfxpostpone = TRUE; 2675 aff->af_pfxpostpone = TRUE;
2581 } 2676 }
2582 else if ((STRCMP(items[0], "PFX") == 0 2677 else if ((STRCMP(items[0], "PFX") == 0
2761 smsg((char_u *)_("Expected MAP count in %s line %d"), 2856 smsg((char_u *)_("Expected MAP count in %s line %d"),
2762 fname, lnum); 2857 fname, lnum);
2763 } 2858 }
2764 else if (do_map) 2859 else if (do_map)
2765 { 2860 {
2861 int c;
2862
2863 /* Check that every character appears only once. */
2864 for (p = items[1]; *p != NUL; )
2865 {
2866 #ifdef FEAT_MBYTE
2867 c = mb_ptr2char_adv(&p);
2868 #else
2869 c = *p++;
2870 #endif
2871 if ((spin->si_map.ga_len > 0
2872 && vim_strchr(spin->si_map.ga_data, c)
2873 != NULL)
2874 || vim_strchr(p, c) != NULL)
2875 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
2876 fname, lnum);
2877 }
2878
2766 /* We simply concatenate all the MAP strings, separated by 2879 /* We simply concatenate all the MAP strings, separated by
2767 * slashes. */ 2880 * slashes. */
2768 ga_concat(&spin->si_map, items[1]); 2881 ga_concat(&spin->si_map, items[1]);
2769 ga_append(&spin->si_map, '/'); 2882 ga_append(&spin->si_map, '/');
2770 } 2883 }
3076 && vim_strchr(afflist, affile->af_kep) != NULL) 3189 && vim_strchr(afflist, affile->af_kep) != NULL)
3077 flags |= WF_KEEPCAP; 3190 flags |= WF_KEEPCAP;
3078 if (affile->af_rar != NUL 3191 if (affile->af_rar != NUL
3079 && vim_strchr(afflist, affile->af_rar) != NULL) 3192 && vim_strchr(afflist, affile->af_rar) != NULL)
3080 flags |= WF_RARE; 3193 flags |= WF_RARE;
3194 if (affile->af_bad != NUL
3195 && vim_strchr(afflist, affile->af_bad) != NULL)
3196 flags |= WF_BANNED;
3081 3197
3082 if (affile->af_pfxpostpone) 3198 if (affile->af_pfxpostpone)
3083 /* Need to store the list of prefix IDs with the word. */ 3199 /* Need to store the list of prefix IDs with the word. */
3084 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks); 3200 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks);
3085 } 3201 }
3753 for (np = node; np != NULL; np = np->wn_sibling) 3869 for (np = node; np != NULL; np = np->wn_sibling)
3754 { 3870 {
3755 ++len; 3871 ++len;
3756 if ((child = np->wn_child) != NULL) 3872 if ((child = np->wn_child) != NULL)
3757 { 3873 {
3758 /* Compress the child. This fills wn_hashkey. */ 3874 /* Compress the child. This fills hashkey. */
3759 compressed += node_compress(child, ht, tot); 3875 compressed += node_compress(child, ht, tot);
3760 3876
3761 /* Try to find an identical child. */ 3877 /* Try to find an identical child. */
3762 hash = hash_hash(child->wn_hashkey); 3878 hash = hash_hash(child->wn_u1.hashkey);
3763 hi = hash_lookup(ht, child->wn_hashkey, hash); 3879 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
3764 tp = NULL; 3880 tp = NULL;
3765 if (!HASHITEM_EMPTY(hi)) 3881 if (!HASHITEM_EMPTY(hi))
3766 { 3882 {
3767 /* There are children with an identical hash value. Now check 3883 /* There are children with an identical hash value. Now check
3768 * if there is one that is really identical. */ 3884 * if there is one that is really identical. */
3769 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next) 3885 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
3770 if (node_equal(child, tp)) 3886 if (node_equal(child, tp))
3771 { 3887 {
3772 /* Found one! Now use that child in place of the 3888 /* Found one! Now use that child in place of the
3773 * current one. This means the current child is 3889 * current one. This means the current child is
3774 * dropped from the tree. */ 3890 * dropped from the tree. */
3780 { 3896 {
3781 /* No other child with this hash value equals the child of 3897 /* No other child with this hash value equals the child of
3782 * the node, add it to the linked list after the first 3898 * the node, add it to the linked list after the first
3783 * item. */ 3899 * item. */
3784 tp = HI2WN(hi); 3900 tp = HI2WN(hi);
3785 child->wn_next = tp->wn_next; 3901 child->wn_u2.next = tp->wn_u2.next;
3786 tp->wn_next = child; 3902 tp->wn_u2.next = child;
3787 } 3903 }
3788 } 3904 }
3789 else 3905 else
3790 /* No other child has this hash value, add it to the 3906 /* No other child has this hash value, add it to the
3791 * hashtable. */ 3907 * hashtable. */
3792 hash_add_item(ht, hi, child->wn_hashkey, hash); 3908 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
3793 } 3909 }
3794 } 3910 }
3795 *tot += len; 3911 *tot += len;
3796 3912
3797 /* 3913 /*
3798 * Make a hash key for the node and its siblings, so that we can quickly 3914 * Make a hash key for the node and its siblings, so that we can quickly
3799 * find a lookalike node. This must be done after compressing the sibling 3915 * find a lookalike node. This must be done after compressing the sibling
3800 * list, otherwise the hash key would become invalid by the compression. 3916 * list, otherwise the hash key would become invalid by the compression.
3801 */ 3917 */
3802 node->wn_hashkey[0] = len; 3918 node->wn_u1.hashkey[0] = len;
3803 nr = 0; 3919 nr = 0;
3804 for (np = node; np != NULL; np = np->wn_sibling) 3920 for (np = node; np != NULL; np = np->wn_sibling)
3805 { 3921 {
3806 if (np->wn_byte == NUL) 3922 if (np->wn_byte == NUL)
3807 /* end node: use wn_flags, wn_region and wn_prefixID */ 3923 /* end node: use wn_flags, wn_region and wn_prefixID */
3812 nr = nr * 101 + n; 3928 nr = nr * 101 + n;
3813 } 3929 }
3814 3930
3815 /* Avoid NUL bytes, it terminates the hash key. */ 3931 /* Avoid NUL bytes, it terminates the hash key. */
3816 n = nr & 0xff; 3932 n = nr & 0xff;
3817 node->wn_hashkey[1] = n == 0 ? 1 : n; 3933 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
3818 n = (nr >> 8) & 0xff; 3934 n = (nr >> 8) & 0xff;
3819 node->wn_hashkey[2] = n == 0 ? 1 : n; 3935 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
3820 n = (nr >> 16) & 0xff; 3936 n = (nr >> 16) & 0xff;
3821 node->wn_hashkey[3] = n == 0 ? 1 : n; 3937 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
3822 n = (nr >> 24) & 0xff; 3938 n = (nr >> 24) & 0xff;
3823 node->wn_hashkey[4] = n == 0 ? 1 : n; 3939 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
3824 node->wn_hashkey[5] = NUL; 3940 node->wn_u1.hashkey[5] = NUL;
3825 3941
3826 return compressed; 3942 return compressed;
3827 } 3943 }
3828 3944
3829 /* 3945 /*
4015 else if (round == 2) 4131 else if (round == 2)
4016 tree = spin->si_keeproot; 4132 tree = spin->si_keeproot;
4017 else 4133 else
4018 tree = spin->si_prefroot; 4134 tree = spin->si_prefroot;
4019 4135
4136 /* Clear the index and wnode fields in the tree. */
4137 clear_node(tree);
4138
4020 /* Count the number of nodes. Needed to be able to allocate the 4139 /* Count the number of nodes. Needed to be able to allocate the
4021 * memory when reading the nodes. Also fills in the index for shared 4140 * memory when reading the nodes. Also fills in index for shared
4022 * nodes. */ 4141 * nodes. */
4023 nodecount = put_tree(NULL, tree, 0, regionmask, round == 3); 4142 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
4024 4143
4025 /* number of nodes in 4 bytes */ 4144 /* number of nodes in 4 bytes */
4026 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ 4145 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
4027 spin->si_memtot += nodecount + nodecount * sizeof(int); 4146 spin->si_memtot += nodecount + nodecount * sizeof(int);
4028 4147
4029 /* Write the nodes. */ 4148 /* Write the nodes. */
4030 (void)put_tree(fd, tree, 0, regionmask, round == 3); 4149 (void)put_node(fd, tree, 0, regionmask, round == 3);
4031 } 4150 }
4032 4151
4033 fclose(fd); 4152 fclose(fd);
4034 } 4153 }
4154
4155 /*
4156 * Clear the index and wnode fields of "node", it siblings and its
4157 * children. This is needed because they are a union with other items to save
4158 * space.
4159 */
4160 static void
4161 clear_node(node)
4162 wordnode_T *node;
4163 {
4164 wordnode_T *np;
4165
4166 if (node != NULL)
4167 for (np = node; np != NULL; np = np->wn_sibling)
4168 {
4169 np->wn_u1.index = 0;
4170 np->wn_u2.wnode = NULL;
4171
4172 if (np->wn_byte != NUL)
4173 clear_node(np->wn_child);
4174 }
4175 }
4176
4035 4177
4036 /* 4178 /*
4037 * Dump a word tree at node "node". 4179 * Dump a word tree at node "node".
4038 * 4180 *
4039 * This first writes the list of possible bytes (siblings). Then for each 4181 * This first writes the list of possible bytes (siblings). Then for each
4044 * file). 4186 * file).
4045 * 4187 *
4046 * Returns the number of nodes used. 4188 * Returns the number of nodes used.
4047 */ 4189 */
4048 static int 4190 static int
4049 put_tree(fd, node, index, regionmask, prefixtree) 4191 put_node(fd, node, index, regionmask, prefixtree)
4050 FILE *fd; /* NULL when only counting */ 4192 FILE *fd; /* NULL when only counting */
4051 wordnode_T *node; 4193 wordnode_T *node;
4052 int index; 4194 int index;
4053 int regionmask; 4195 int regionmask;
4054 int prefixtree; /* TRUE for PREFIXTREE */ 4196 int prefixtree; /* TRUE for PREFIXTREE */
4061 /* If "node" is zero the tree is empty. */ 4203 /* If "node" is zero the tree is empty. */
4062 if (node == NULL) 4204 if (node == NULL)
4063 return 0; 4205 return 0;
4064 4206
4065 /* Store the index where this node is written. */ 4207 /* Store the index where this node is written. */
4066 node->wn_index = index; 4208 node->wn_u1.index = index;
4067 4209
4068 /* Count the number of siblings. */ 4210 /* Count the number of siblings. */
4069 for (np = node; np != NULL; np = np->wn_sibling) 4211 for (np = node; np != NULL; np = np->wn_sibling)
4070 ++siblingcount; 4212 ++siblingcount;
4071 4213
4114 } 4256 }
4115 } 4257 }
4116 } 4258 }
4117 else 4259 else
4118 { 4260 {
4119 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node) 4261 if (np->wn_child->wn_u1.index != 0
4262 && np->wn_child->wn_u2.wnode != node)
4120 { 4263 {
4121 /* The child is written elsewhere, write the reference. */ 4264 /* The child is written elsewhere, write the reference. */
4122 if (fd != NULL) 4265 if (fd != NULL)
4123 { 4266 {
4124 putc(BY_INDEX, fd); /* <byte> */ 4267 putc(BY_INDEX, fd); /* <byte> */
4125 /* <nodeidx> */ 4268 /* <nodeidx> */
4126 put_bytes(fd, (long_u)np->wn_child->wn_index, 3); 4269 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
4127 } 4270 }
4128 } 4271 }
4129 else if (np->wn_child->wn_wnode == NULL) 4272 else if (np->wn_child->wn_u2.wnode == NULL)
4130 /* We will write the child below and give it an index. */ 4273 /* We will write the child below and give it an index. */
4131 np->wn_child->wn_wnode = node; 4274 np->wn_child->wn_u2.wnode = node;
4132 4275
4133 if (fd != NULL) 4276 if (fd != NULL)
4134 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ 4277 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
4135 { 4278 {
4136 EMSG(_(e_write)); 4279 EMSG(_(e_write));
4143 * the count. */ 4286 * the count. */
4144 newindex += siblingcount + 1; 4287 newindex += siblingcount + 1;
4145 4288
4146 /* Recursively dump the children of each sibling. */ 4289 /* Recursively dump the children of each sibling. */
4147 for (np = node; np != NULL; np = np->wn_sibling) 4290 for (np = node; np != NULL; np = np->wn_sibling)
4148 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node) 4291 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
4149 newindex = put_tree(fd, np->wn_child, newindex, regionmask, 4292 newindex = put_node(fd, np->wn_child, newindex, regionmask,
4150 prefixtree); 4293 prefixtree);
4151 4294
4152 return newindex; 4295 return newindex;
4153 } 4296 }
4154 4297
4898 int c; 5041 int c;
4899 suginfo_T sug; 5042 suginfo_T sug;
4900 suggest_T *stp; 5043 suggest_T *stp;
4901 5044
4902 /* Find the start of the badly spelled word. */ 5045 /* Find the start of the badly spelled word. */
4903 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL) 5046 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL
4904 { 5047 || curwin->w_cursor.col > prev_cursor.col)
4905 beep_flush(); 5048 {
4906 return; 5049 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
5050 return;
5051
5052 /* No bad word or it starts after the cursor: use the word under the
5053 * cursor. */
5054 curwin->w_cursor = prev_cursor;
5055 line = ml_get_curline();
5056 p = line + curwin->w_cursor.col;
5057 /* Backup to before start of word. */
5058 while (p > line && SPELL_ISWORDP(p))
5059 mb_ptr_back(line, p);
5060 /* Forward to start of word. */
5061 while (!SPELL_ISWORDP(p))
5062 mb_ptr_adv(p);
5063
5064 if (!SPELL_ISWORDP(p)) /* No word found. */
5065 {
5066 beep_flush();
5067 return;
5068 }
5069 curwin->w_cursor.col = p - line;
4907 } 5070 }
4908 5071
4909 /* Get the word and its length. */ 5072 /* Get the word and its length. */
4910 line = ml_get_curline(); 5073 line = ml_get_curline();
4911 5074
4921 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), 5084 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
4922 sug.su_badlen, sug.su_badptr); 5085 sug.su_badlen, sug.su_badptr);
4923 msg_puts(IObuff); 5086 msg_puts(IObuff);
4924 msg_clr_eos(); 5087 msg_clr_eos();
4925 msg_putchar('\n'); 5088 msg_putchar('\n');
5089
4926 msg_scroll = TRUE; 5090 msg_scroll = TRUE;
4927 for (i = 0; i < sug.su_ga.ga_len; ++i) 5091 for (i = 0; i < sug.su_ga.ga_len; ++i)
4928 { 5092 {
4929 stp = &SUG(sug.su_ga, i); 5093 stp = &SUG(sug.su_ga, i);
4930 5094
4933 STRCPY(wcopy, stp->st_word); 5097 STRCPY(wcopy, stp->st_word);
4934 if (sug.su_badlen > stp->st_orglen) 5098 if (sug.su_badlen > stp->st_orglen)
4935 vim_strncpy(wcopy + STRLEN(wcopy), 5099 vim_strncpy(wcopy + STRLEN(wcopy),
4936 sug.su_badptr + stp->st_orglen, 5100 sug.su_badptr + stp->st_orglen,
4937 sug.su_badlen - stp->st_orglen); 5101 sug.su_badlen - stp->st_orglen);
5102 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), i + 1, wcopy);
5103 msg_puts(IObuff);
5104
5105 /* The word may replace more than "su_badlen". */
5106 if (sug.su_badlen < stp->st_orglen)
5107 {
5108 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
5109 stp->st_orglen, sug.su_badptr);
5110 msg_puts(IObuff);
5111 }
5112
4938 if (p_verbose > 0) 5113 if (p_verbose > 0)
4939 { 5114 {
5115 /* Add the score. */
4940 if (sps_flags & SPS_DOUBLE) 5116 if (sps_flags & SPS_DOUBLE)
4941 vim_snprintf((char *)IObuff, IOSIZE, 5117 vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"),
4942 _("%2d \"%s\" (%s%d - %d)"),
4943 i + 1, wcopy,
4944 stp->st_salscore ? "s " : "", 5118 stp->st_salscore ? "s " : "",
4945 stp->st_score, stp->st_altscore); 5119 stp->st_score, stp->st_altscore);
4946 else 5120 else
4947 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"), 5121 vim_snprintf((char *)IObuff, IOSIZE, _(" (%d)"),
4948 i + 1, wcopy, stp->st_score); 5122 stp->st_score);
4949 } 5123 msg_advance(30);
4950 else 5124 msg_puts(IObuff);
4951 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), 5125 }
4952 i + 1, wcopy);
4953 msg_puts(IObuff);
4954 lines_left = 3; /* avoid more prompt */ 5126 lines_left = 3; /* avoid more prompt */
4955 msg_putchar('\n'); 5127 msg_putchar('\n');
4956 } 5128 }
4957 5129
4958 /* Ask for choice. */ 5130 /* Ask for choice. */
5056 if (su->su_badlen >= MAXWLEN) 5228 if (su->su_badlen >= MAXWLEN)
5057 su->su_badlen = MAXWLEN - 1; /* just in case */ 5229 su->su_badlen = MAXWLEN - 1; /* just in case */
5058 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); 5230 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
5059 (void)spell_casefold(su->su_badptr, su->su_badlen, 5231 (void)spell_casefold(su->su_badptr, su->su_badlen,
5060 su->su_fbadword, MAXWLEN); 5232 su->su_fbadword, MAXWLEN);
5233 /* get caps flags for bad word */
5234 su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
5061 5235
5062 /* Ban the bad word itself. It may appear in another region. */ 5236 /* Ban the bad word itself. It may appear in another region. */
5063 add_banned(su, su->su_badword); 5237 add_banned(su, su->su_badword);
5064 5238
5065 /* 5239 /*
5066 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries 5240 * 1. Try special cases, such as repeating a word: "the the" -> "the".
5067 * from the .aff file and inserting a space (split the word).
5068 * 5241 *
5069 * Set a maximum score to limit the combination of operations that is 5242 * Set a maximum score to limit the combination of operations that is
5070 * tried. 5243 * tried.
5071 */ 5244 */
5072 su->su_maxscore = SCORE_MAXINIT; 5245 su->su_maxscore = SCORE_MAXINIT;
5073 spell_try_change(su); 5246 suggest_try_special(su);
5247
5248 /*
5249 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
5250 * from the .aff file and inserting a space (split the word).
5251 */
5252 suggest_try_change(su);
5074 5253
5075 /* For the resulting top-scorers compute the sound-a-like score. */ 5254 /* For the resulting top-scorers compute the sound-a-like score. */
5076 if (sps_flags & SPS_DOUBLE) 5255 if (sps_flags & SPS_DOUBLE)
5077 score_comp_sal(su); 5256 score_comp_sal(su);
5078 5257
5079 /* 5258 /*
5080 * 2. Try finding sound-a-like words. 5259 * 3. Try finding sound-a-like words.
5081 * 5260 *
5082 * Only do this when we don't have a lot of suggestions yet, because it's 5261 * Only do this when we don't have a lot of suggestions yet, because it's
5083 * very slow and often doesn't find new suggestions. 5262 * very slow and often doesn't find new suggestions.
5084 */ 5263 */
5085 if ((sps_flags & SPS_DOUBLE) 5264 if ((sps_flags & SPS_DOUBLE)
5086 || (!(sps_flags & SPS_FAST) 5265 || (!(sps_flags & SPS_FAST)
5087 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su))) 5266 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su)))
5088 { 5267 {
5089 /* Allow a higher score now. */ 5268 /* Allow a higher score now. */
5090 su->su_maxscore = SCORE_MAXMAX; 5269 su->su_maxscore = SCORE_MAXMAX;
5091 spell_try_soundalike(su); 5270 suggest_try_soundalike(su);
5092 } 5271 }
5093 5272
5094 /* When CTRL-C was hit while searching do show the results. */ 5273 /* When CTRL-C was hit while searching do show the results. */
5095 ui_breakcheck(); 5274 ui_breakcheck();
5096 if (got_int) 5275 if (got_int)
5215 } 5394 }
5216 *d = NUL; 5395 *d = NUL;
5217 } 5396 }
5218 5397
5219 /* 5398 /*
5399 * Try finding suggestions by recognizing specific situations.
5400 */
5401 static void
5402 suggest_try_special(su)
5403 suginfo_T *su;
5404 {
5405 char_u *p;
5406 int len;
5407 int c;
5408 char_u word[MAXWLEN];
5409
5410 /*
5411 * Recognize a word that is repeated: "the the".
5412 */
5413 p = skiptowhite(su->su_fbadword);
5414 len = p - su->su_fbadword;
5415 p = skipwhite(p);
5416 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
5417 {
5418 /* Include badflags: if the badword is onecap or allcap
5419 * use that for the goodword too: "The the" -> "The". */
5420 c = su->su_fbadword[len];
5421 su->su_fbadword[len] = NUL;
5422 make_case_word(su->su_fbadword, word, su->su_badflags);
5423 su->su_fbadword[len] = c;
5424 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, TRUE);
5425 }
5426 }
5427
5428 /*
5220 * Try finding suggestions by adding/removing/swapping letters. 5429 * Try finding suggestions by adding/removing/swapping letters.
5221 * 5430 *
5222 * This uses a state machine. At each node in the tree we try various 5431 * This uses a state machine. At each node in the tree we try various
5223 * operations. When trying if an operation work "depth" is increased and the 5432 * operations. When trying if an operation work "depth" is increased and the
5224 * stack[] is used to store info. This allows combinations, thus insert one 5433 * stack[] is used to store info. This allows combinations, thus insert one
5225 * character, replace one and delete another. The number of changes is 5434 * character, replace one and delete another. The number of changes is
5226 * limited by su->su_maxscore, checked in try_deeper(). 5435 * limited by su->su_maxscore, checked in try_deeper().
5227 */ 5436 */
5228 static void 5437 static void
5229 spell_try_change(su) 5438 suggest_try_change(su)
5230 suginfo_T *su; 5439 suginfo_T *su;
5231 { 5440 {
5232 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ 5441 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
5233 char_u tword[MAXWLEN]; /* good word collected so far */ 5442 char_u tword[MAXWLEN]; /* good word collected so far */
5234 trystate_T stack[MAXWLEN]; 5443 trystate_T stack[MAXWLEN];
5243 idx_T *idxs; 5452 idx_T *idxs;
5244 int depth; 5453 int depth;
5245 int c, c2, c3; 5454 int c, c2, c3;
5246 int n = 0; 5455 int n = 0;
5247 int flags; 5456 int flags;
5248 int badflags;
5249 garray_T *gap; 5457 garray_T *gap;
5250 idx_T arridx; 5458 idx_T arridx;
5251 int len; 5459 int len;
5252 char_u *p; 5460 char_u *p;
5253 fromto_T *ftp; 5461 fromto_T *ftp;
5254 int fl = 0, tl; 5462 int fl = 0, tl;
5255 5463 int repextra = 0; /* extra bytes in fword[] from REP item */
5256 /* get caps flags for bad word */
5257 badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
5258 5464
5259 /* We make a copy of the case-folded bad word, so that we can modify it 5465 /* We make a copy of the case-folded bad word, so that we can modify it
5260 * to find matches (esp. REP items). */ 5466 * to find matches (esp. REP items). Append some more text, changing
5467 * chars after the bad word may help. */
5261 STRCPY(fword, su->su_fbadword); 5468 STRCPY(fword, su->su_fbadword);
5262 5469 n = STRLEN(fword);
5470 p = su->su_badptr + su->su_badlen;
5471 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
5263 5472
5264 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); 5473 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
5265 lp->lp_slang != NULL; ++lp) 5474 lp->lp_slang != NULL; ++lp)
5266 { 5475 {
5267 /* 5476 /*
5304 */ 5513 */
5305 arridx = sp->ts_arridx; /* current node in the tree */ 5514 arridx = sp->ts_arridx; /* current node in the tree */
5306 len = byts[arridx]; /* bytes in this node */ 5515 len = byts[arridx]; /* bytes in this node */
5307 arridx += sp->ts_curi; /* index of current byte */ 5516 arridx += sp->ts_curi; /* index of current byte */
5308 5517
5309 if (sp->ts_curi > len || (c = byts[arridx]) != 0) 5518 if (sp->ts_curi > len || byts[arridx] != 0)
5310 { 5519 {
5311 /* Past bytes in node and/or past NUL bytes. */ 5520 /* Past bytes in node and/or past NUL bytes. */
5312 sp->ts_state = STATE_ENDNUL; 5521 sp->ts_state = STATE_ENDNUL;
5313 break; 5522 break;
5314 } 5523 }
5328 if (flags & WF_KEEPCAP) 5537 if (flags & WF_KEEPCAP)
5329 /* Must find the word in the keep-case tree. */ 5538 /* Must find the word in the keep-case tree. */
5330 find_keepcap_word(lp->lp_slang, tword + splitoff, 5539 find_keepcap_word(lp->lp_slang, tword + splitoff,
5331 preword + prewordlen); 5540 preword + prewordlen);
5332 else 5541 else
5542 {
5333 /* Include badflags: if the badword is onecap or allcap 5543 /* Include badflags: if the badword is onecap or allcap
5334 * use that for the goodword too. */ 5544 * use that for the goodword too. But if the badword is
5545 * allcap and it's only one char long use onecap. */
5546 c = su->su_badflags;
5547 if ((c & WF_ALLCAP)
5548 #ifdef FEAT_MBYTE
5549 && su->su_badlen == mb_ptr2len_check(su->su_badptr)
5550 #else
5551 && su->su_badlen == 1
5552 #endif
5553 )
5554 c = WF_ONECAP;
5335 make_case_word(tword + splitoff, 5555 make_case_word(tword + splitoff,
5336 preword + prewordlen, flags | badflags); 5556 preword + prewordlen, flags | c);
5557 }
5337 5558
5338 /* Don't use a banned word. It may appear again as a good 5559 /* Don't use a banned word. It may appear again as a good
5339 * word, thus remember it. */ 5560 * word, thus remember it. */
5340 if (flags & WF_BANNED) 5561 if (flags & WF_BANNED)
5341 { 5562 {
5350 && (((unsigned)flags >> 8) & lp->lp_region) == 0) 5571 && (((unsigned)flags >> 8) & lp->lp_region) == 0)
5351 newscore += SCORE_REGION; 5572 newscore += SCORE_REGION;
5352 if (flags & WF_RARE) 5573 if (flags & WF_RARE)
5353 newscore += SCORE_RARE; 5574 newscore += SCORE_RARE;
5354 5575
5355 if (!spell_valid_case(badflags, 5576 if (!spell_valid_case(su->su_badflags,
5356 captype(preword + prewordlen, NULL))) 5577 captype(preword + prewordlen, NULL)))
5357 newscore += SCORE_ICASE; 5578 newscore += SCORE_ICASE;
5358 5579
5359 if (fword[sp->ts_fidx] == 0) 5580 if ((fword[sp->ts_fidx] == NUL
5581 || !SPELL_ISWORDP(fword + sp->ts_fidx))
5582 && sp->ts_fidx >= sp->ts_fidxtry)
5360 { 5583 {
5361 /* The badword also ends: add suggestions, */ 5584 /* The badword also ends: add suggestions, */
5362 add_suggestion(su, &su->su_ga, preword, 5585 add_suggestion(su, &su->su_ga, preword,
5586 sp->ts_fidx - repextra,
5363 sp->ts_score + newscore, FALSE); 5587 sp->ts_score + newscore, FALSE);
5364 } 5588 }
5365 else if (sp->ts_fidx >= sp->ts_fidxtry 5589 else if (sp->ts_fidx >= sp->ts_fidxtry
5366 #ifdef FEAT_MBYTE 5590 #ifdef FEAT_MBYTE
5367 /* Don't split halfway a character. */ 5591 /* Don't split halfway a character. */
5374 * words starts at fword[sp->ts_fidx]. */ 5598 * words starts at fword[sp->ts_fidx]. */
5375 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT)) 5599 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT))
5376 { 5600 {
5377 /* Save things to be restored at STATE_SPLITUNDO. */ 5601 /* Save things to be restored at STATE_SPLITUNDO. */
5378 sp->ts_save_prewordlen = prewordlen; 5602 sp->ts_save_prewordlen = prewordlen;
5379 sp->ts_save_badflags = badflags; 5603 sp->ts_save_badflags = su->su_badflags;
5380 sp->ts_save_splitoff = splitoff; 5604 sp->ts_save_splitoff = splitoff;
5381 5605
5382 /* Append a space to preword. */ 5606 /* Append a space to preword. */
5383 STRCAT(preword, " "); 5607 STRCAT(preword, " ");
5384 prewordlen = STRLEN(preword); 5608 prewordlen = STRLEN(preword);
5398 --i; 5622 --i;
5399 } 5623 }
5400 else 5624 else
5401 #endif 5625 #endif
5402 p = su->su_badptr + sp->ts_fidx; 5626 p = su->su_badptr + sp->ts_fidx;
5403 badflags = captype(p, su->su_badptr + su->su_badlen); 5627 su->su_badflags = captype(p, su->su_badptr
5628 + su->su_badlen);
5404 5629
5405 sp->ts_state = STATE_SPLITUNDO; 5630 sp->ts_state = STATE_SPLITUNDO;
5406 ++depth; 5631 ++depth;
5407 /* Restart at top of the tree. */ 5632 /* Restart at top of the tree. */
5408 stack[depth].ts_arridx = 0; 5633 stack[depth].ts_arridx = 0;
5409 } 5634 }
5410 } 5635 }
5411 break; 5636 break;
5412 5637
5413 case STATE_SPLITUNDO: 5638 case STATE_SPLITUNDO:
5414 /* Fixup the changes done for word split. */ 5639 /* Undo the changes done for word split. */
5415 badflags = sp->ts_save_badflags; 5640 su->su_badflags = sp->ts_save_badflags;
5416 splitoff = sp->ts_save_splitoff; 5641 splitoff = sp->ts_save_splitoff;
5417 prewordlen = sp->ts_save_prewordlen; 5642 prewordlen = sp->ts_save_prewordlen;
5418 5643
5419 /* Continue looking for NUL bytes. */ 5644 /* Continue looking for NUL bytes. */
5420 sp->ts_state = STATE_START; 5645 sp->ts_state = STATE_START;
5421 break; 5646 break;
5422 5647
5423 case STATE_ENDNUL: 5648 case STATE_ENDNUL:
5424 /* Past the NUL bytes in the node. */ 5649 /* Past the NUL bytes in the node. */
5425 if (fword[sp->ts_fidx] == 0) 5650 if (fword[sp->ts_fidx] == NUL)
5426 { 5651 {
5427 /* The badword ends, can't use the bytes in this node. */ 5652 /* The badword ends, can't use the bytes in this node. */
5428 sp->ts_state = STATE_DEL; 5653 sp->ts_state = STATE_DEL;
5429 break; 5654 break;
5430 } 5655 }
5754 { 5979 {
5755 c = *p; 5980 c = *p;
5756 *p = p[2]; 5981 *p = p[2];
5757 p[2] = c; 5982 p[2] = c;
5758 } 5983 }
5759 /*FALLTHROUGH*/ 5984
5760
5761 case STATE_ROT3L:
5762 /* Rotate three characters left: "123" -> "231". We change 5985 /* Rotate three characters left: "123" -> "231". We change
5763 * "fword" here, it's changed back afterwards. */ 5986 * "fword" here, it's changed back afterwards. */
5764 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 5987 if (try_deeper(su, stack, depth, SCORE_SWAP3))
5765 { 5988 {
5766 sp->ts_state = STATE_UNROT3L; 5989 sp->ts_state = STATE_UNROT3L;
5790 else 6013 else
5791 sp->ts_state = STATE_REP_INI; 6014 sp->ts_state = STATE_REP_INI;
5792 break; 6015 break;
5793 6016
5794 case STATE_UNROT3L: 6017 case STATE_UNROT3L:
5795 /* Undo STATE_ROT3L: "231" -> "123" */ 6018 /* Undo ROT3L: "231" -> "123" */
5796 p = fword + sp->ts_fidx; 6019 p = fword + sp->ts_fidx;
5797 #ifdef FEAT_MBYTE 6020 #ifdef FEAT_MBYTE
5798 if (has_mbyte) 6021 if (has_mbyte)
5799 { 6022 {
5800 n = MB_BYTE2LEN(*p); 6023 n = MB_BYTE2LEN(*p);
5810 c = p[2]; 6033 c = p[2];
5811 p[2] = p[1]; 6034 p[2] = p[1];
5812 p[1] = *p; 6035 p[1] = *p;
5813 *p = c; 6036 *p = c;
5814 } 6037 }
5815 /*FALLTHROUGH*/ 6038
5816
5817 case STATE_ROT3R:
5818 /* Rotate three bytes right: "123" -> "312". We change 6039 /* Rotate three bytes right: "123" -> "312". We change
5819 * "fword" here, it's changed back afterwards. */ 6040 * "fword" here, it's changed back afterwards. */
5820 if (try_deeper(su, stack, depth, SCORE_SWAP3)) 6041 if (try_deeper(su, stack, depth, SCORE_SWAP3))
5821 { 6042 {
5822 sp->ts_state = STATE_UNROT3R; 6043 sp->ts_state = STATE_UNROT3R;
5846 else 6067 else
5847 sp->ts_state = STATE_REP_INI; 6068 sp->ts_state = STATE_REP_INI;
5848 break; 6069 break;
5849 6070
5850 case STATE_UNROT3R: 6071 case STATE_UNROT3R:
5851 /* Undo STATE_ROT3R: "312" -> "123" */ 6072 /* Undo ROT3R: "312" -> "123" */
5852 p = fword + sp->ts_fidx; 6073 p = fword + sp->ts_fidx;
5853 #ifdef FEAT_MBYTE 6074 #ifdef FEAT_MBYTE
5854 if (has_mbyte) 6075 if (has_mbyte)
5855 { 6076 {
5856 c = mb_ptr2char(p); 6077 c = mb_ptr2char(p);
5919 /* Change the "from" to the "to" string. */ 6140 /* Change the "from" to the "to" string. */
5920 ++depth; 6141 ++depth;
5921 fl = STRLEN(ftp->ft_from); 6142 fl = STRLEN(ftp->ft_from);
5922 tl = STRLEN(ftp->ft_to); 6143 tl = STRLEN(ftp->ft_to);
5923 if (fl != tl) 6144 if (fl != tl)
6145 {
5924 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); 6146 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
6147 repextra += tl - fl;
6148 }
5925 mch_memmove(p, ftp->ft_to, tl); 6149 mch_memmove(p, ftp->ft_to, tl);
5926 stack[depth].ts_fidxtry = sp->ts_fidx + tl; 6150 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
5927 #ifdef FEAT_MBYTE 6151 #ifdef FEAT_MBYTE
5928 stack[depth].ts_tcharlen = 0; 6152 stack[depth].ts_tcharlen = 0;
5929 #endif 6153 #endif
5943 + sp->ts_curi - 1; 6167 + sp->ts_curi - 1;
5944 fl = STRLEN(ftp->ft_from); 6168 fl = STRLEN(ftp->ft_from);
5945 tl = STRLEN(ftp->ft_to); 6169 tl = STRLEN(ftp->ft_to);
5946 p = fword + sp->ts_fidx; 6170 p = fword + sp->ts_fidx;
5947 if (fl != tl) 6171 if (fl != tl)
6172 {
5948 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); 6173 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
6174 repextra -= tl - fl;
6175 }
5949 mch_memmove(p, ftp->ft_from, fl); 6176 mch_memmove(p, ftp->ft_from, fl);
5950 sp->ts_state = STATE_REP; 6177 sp->ts_state = STATE_REP;
5951 break; 6178 break;
5952 6179
5953 default: 6180 default:
6215 garray_T *gap; 6442 garray_T *gap;
6216 langp_T *lp; 6443 langp_T *lp;
6217 suggest_T *stp; 6444 suggest_T *stp;
6218 char_u *p; 6445 char_u *p;
6219 char_u badsound[MAXWLEN]; 6446 char_u badsound[MAXWLEN];
6447 char_u badsound2[MAXWLEN];
6220 char_u goodsound[MAXWLEN]; 6448 char_u goodsound[MAXWLEN];
6221 char_u fword[MAXWLEN]; 6449 char_u fword[MAXWLEN];
6222 int round; 6450 int round;
6223 6451
6224 /* Add the alternate score to su_ga. */ 6452 /* Add the alternate score to su_ga. */
6232 6460
6233 for (i = 0; i < su->su_ga.ga_len; ++i) 6461 for (i = 0; i < su->su_ga.ga_len; ++i)
6234 { 6462 {
6235 stp = &SUG(su->su_ga, i); 6463 stp = &SUG(su->su_ga, i);
6236 6464
6465 if (stp->st_orglen <= su->su_badlen)
6466 p = badsound;
6467 else
6468 {
6469 /* soundfold the bad word with a different length */
6470 (void)spell_casefold(su->su_badptr, stp->st_orglen,
6471 fword, MAXWLEN);
6472 spell_soundfold(lp->lp_slang, fword, badsound2);
6473 p = badsound2;
6474 }
6475
6237 /* Case-fold the word, sound-fold the word and compute the 6476 /* Case-fold the word, sound-fold the word and compute the
6238 * score for the difference. */ 6477 * score for the difference. */
6239 (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), 6478 (void)spell_casefold(stp->st_word, STRLEN(stp->st_word),
6240 fword, MAXWLEN); 6479 fword, MAXWLEN);
6241 spell_soundfold(lp->lp_slang, fword, goodsound); 6480 spell_soundfold(lp->lp_slang, fword, goodsound);
6242 stp->st_altscore = soundalike_score(goodsound, badsound); 6481
6482 stp->st_altscore = soundalike_score(goodsound, p);
6243 if (stp->st_altscore == SCORE_MAXMAX) 6483 if (stp->st_altscore == SCORE_MAXMAX)
6244 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; 6484 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
6245 else 6485 else
6246 stp->st_score = (stp->st_score * 3 6486 stp->st_score = (stp->st_score * 3
6247 + stp->st_altscore) / 4; 6487 + stp->st_altscore) / 4;
6310 6550
6311 /* 6551 /*
6312 * Find suggestions by comparing the word in a sound-a-like form. 6552 * Find suggestions by comparing the word in a sound-a-like form.
6313 */ 6553 */
6314 static void 6554 static void
6315 spell_try_soundalike(su) 6555 suggest_try_soundalike(su)
6316 suginfo_T *su; 6556 suginfo_T *su;
6317 { 6557 {
6318 char_u salword[MAXWLEN]; 6558 char_u salword[MAXWLEN];
6319 char_u tword[MAXWLEN]; 6559 char_u tword[MAXWLEN];
6320 char_u tfword[MAXWLEN]; 6560 char_u tfword[MAXWLEN];
6412 else 6652 else
6413 p = tword; 6653 p = tword;
6414 6654
6415 if (sps_flags & SPS_DOUBLE) 6655 if (sps_flags & SPS_DOUBLE)
6416 add_suggestion(su, &su->su_sga, p, 6656 add_suggestion(su, &su->su_sga, p,
6657 su->su_badlen,
6417 sound_score, FALSE); 6658 sound_score, FALSE);
6418 else 6659 else
6419 { 6660 {
6420 /* Compute the score. */ 6661 /* Compute the score. */
6421 score = spell_edit_score( 6662 score = spell_edit_score(
6423 if (sps_flags & SPS_BEST) 6664 if (sps_flags & SPS_BEST)
6424 /* give a bonus for the good word 6665 /* give a bonus for the good word
6425 * sounding the same as the bad 6666 * sounding the same as the bad
6426 * word */ 6667 * word */
6427 add_suggestion(su, &su->su_ga, p, 6668 add_suggestion(su, &su->su_ga, p,
6669 su->su_badlen,
6428 RESCORE(score, sound_score), 6670 RESCORE(score, sound_score),
6429 TRUE); 6671 TRUE);
6430 else 6672 else
6431 add_suggestion(su, &su->su_ga, p, 6673 add_suggestion(su, &su->su_ga, p,
6674 su->su_badlen,
6432 score + sound_score, FALSE); 6675 score + sound_score, FALSE);
6433 } 6676 }
6434 } 6677 }
6435 } 6678 }
6436 6679
6614 * Do not add a duplicate suggestion or suggestions with a bad score. 6857 * Do not add a duplicate suggestion or suggestions with a bad score.
6615 * When "use_score" is not zero it's used, otherwise the score is computed 6858 * When "use_score" is not zero it's used, otherwise the score is computed
6616 * with spell_edit_score(). 6859 * with spell_edit_score().
6617 */ 6860 */
6618 static void 6861 static void
6619 add_suggestion(su, gap, goodword, score, had_bonus) 6862 add_suggestion(su, gap, goodword, badlen, score, had_bonus)
6620 suginfo_T *su; 6863 suginfo_T *su;
6621 garray_T *gap; 6864 garray_T *gap;
6622 char_u *goodword; 6865 char_u *goodword;
6866 int badlen; /* length of bad word used */
6623 int score; 6867 int score;
6624 int had_bonus; /* value for st_had_bonus */ 6868 int had_bonus; /* value for st_had_bonus */
6625 { 6869 {
6626 suggest_T *stp; 6870 suggest_T *stp;
6627 int i; 6871 int i;
6872 char_u *p = NULL;
6873 int c = 0;
6628 6874
6629 /* Check that the word wasn't banned. */ 6875 /* Check that the word wasn't banned. */
6630 if (was_banned(su, goodword)) 6876 if (was_banned(su, goodword))
6631 return; 6877 return;
6878
6879 /* If past "su_badlen" and the rest is identical stop at "su_badlen".
6880 * Remove the common part from "goodword". */
6881 i = badlen - su->su_badlen;
6882 if (i > 0)
6883 {
6884 /* This assumes there was no case folding or it didn't change the
6885 * length... */
6886 p = goodword + STRLEN(goodword) - i;
6887 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0)
6888 {
6889 badlen = su->su_badlen;
6890 c = *p;
6891 *p = NUL;
6892 }
6893 else
6894 p = NULL;
6895 }
6632 6896
6633 if (score <= su->su_maxscore) 6897 if (score <= su->su_maxscore)
6634 { 6898 {
6635 /* Check if the word is already there. */ 6899 /* Check if the word is already there. */
6636 stp = &SUG(*gap, 0); 6900 stp = &SUG(*gap, 0);
6654 if (stp->st_word != NULL) 6918 if (stp->st_word != NULL)
6655 { 6919 {
6656 stp->st_score = score; 6920 stp->st_score = score;
6657 stp->st_altscore = 0; 6921 stp->st_altscore = 0;
6658 stp->st_had_bonus = had_bonus; 6922 stp->st_had_bonus = had_bonus;
6659 stp->st_orglen = su->su_badlen; 6923 stp->st_orglen = badlen;
6660 ++gap->ga_len; 6924 ++gap->ga_len;
6661 6925
6662 /* If we have too many suggestions now, sort the list and keep 6926 /* If we have too many suggestions now, sort the list and keep
6663 * the best suggestions. */ 6927 * the best suggestions. */
6664 if (gap->ga_len > SUG_MAX_COUNT(su)) 6928 if (gap->ga_len > SUG_MAX_COUNT(su))
6665 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore, 6929 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore,
6666 SUG_CLEAN_COUNT(su)); 6930 SUG_CLEAN_COUNT(su));
6667 } 6931 }
6668 } 6932 }
6669 } 6933 }
6934
6935 if (p != NULL)
6936 *p = c; /* restore "goodword" */
6670 } 6937 }
6671 6938
6672 /* 6939 /*
6673 * Add a word to be banned. 6940 * Add a word to be banned.
6674 */ 6941 */
6734 suginfo_T *su; 7001 suginfo_T *su;
6735 { 7002 {
6736 langp_T *lp; 7003 langp_T *lp;
6737 suggest_T *stp; 7004 suggest_T *stp;
6738 char_u sal_badword[MAXWLEN]; 7005 char_u sal_badword[MAXWLEN];
7006 char_u tword[MAXWLEN];
7007 char_u salword[MAXWLEN];
7008 char_u *p;
6739 int score; 7009 int score;
6740 int i; 7010 int i;
6741 7011
6742 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); 7012 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
6743 lp->lp_slang != NULL; ++lp) 7013 lp->lp_slang != NULL; ++lp)
6750 for (i = 0; i < su->su_ga.ga_len; ++i) 7020 for (i = 0; i < su->su_ga.ga_len; ++i)
6751 { 7021 {
6752 stp = &SUG(su->su_ga, i); 7022 stp = &SUG(su->su_ga, i);
6753 if (!stp->st_had_bonus) 7023 if (!stp->st_had_bonus)
6754 { 7024 {
6755 score = spell_sound_score(lp->lp_slang, stp->st_word, 7025 if (stp->st_orglen <= su->su_badlen)
6756 sal_badword); 7026 p = sal_badword;
7027 else
7028 {
7029 /* soundfold the bad word with a different length */
7030 (void)spell_casefold(su->su_badptr, stp->st_orglen,
7031 tword, MAXWLEN);
7032 spell_soundfold(lp->lp_slang, tword, salword);
7033 p = salword;
7034 }
7035 score = spell_sound_score(lp->lp_slang, stp->st_word, p);
6757 stp->st_score = RESCORE(stp->st_score, score); 7036 stp->st_score = RESCORE(stp->st_score, score);
6758 } 7037 }
6759 } 7038 }
6760 break; 7039 break;
6761 } 7040 }