Mercurial > vim
comparison src/spell.c @ 324:548525d9da24
updated for version 7.0085
author | vimboss |
---|---|
date | Tue, 14 Jun 2005 22:01:04 +0000 |
parents | 03b3684919e3 |
children | f76b0d38b6bd |
comparison
equal
deleted
inserted
replaced
323:03b3684919e3 | 324:548525d9da24 |
---|---|
11 * spell.c: code for spell checking | 11 * spell.c: code for spell checking |
12 * | 12 * |
13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree | 13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree |
14 * has a list of bytes that can appear (siblings). For each byte there is a | 14 * has a list of bytes that can appear (siblings). For each byte there is a |
15 * pointer to the node with the byte that follows in the word (child). | 15 * pointer to the node with the byte that follows in the word (child). |
16 * A NUL byte is used where the word may end. | 16 * |
17 * A NUL byte is used where the word may end. The bytes are sorted, so that | |
18 * binary searching can be used and the NUL bytes are at the start. The | |
19 * number of possible bytes is stored before the list of bytes. | |
20 * | |
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores | |
22 * either the next index or flags. The tree starts at index 0. For example, | |
23 * to lookup "vi" this sequence is followed: | |
24 * i = 0 | |
25 * len = byts[i] | |
26 * n = where "v" appears in byts[i + 1] to byts[i + len] | |
27 * i = idxs[n] | |
28 * len = byts[i] | |
29 * n = where "i" appears in byts[i + 1] to byts[i + len] | |
30 * i = idxs[n] | |
31 * len = byts[i] | |
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". | |
17 * | 33 * |
18 * There are two trees: one with case-folded words and one with words in | 34 * There are two trees: one with case-folded words and one with words in |
19 * original case. The second one is only used for keep-case words and is | 35 * original case. The second one is only used for keep-case words and is |
20 * usually small. | 36 * usually small. |
21 * | 37 * |
28 * See ":help develop-spell". | 44 * See ":help develop-spell". |
29 */ | 45 */ |
30 | 46 |
31 /* | 47 /* |
32 * Use this to let the score depend in how much a suggestion sounds like the | 48 * Use this to let the score depend in how much a suggestion sounds like the |
33 * bad word. It's quite slow and doesn't make the sorting much better.... | 49 * bad word. It's quite slow and only occasionally makes the sorting better. |
34 * #define SOUNDFOLD_SCORE | 50 #define SOUNDFOLD_SCORE |
51 */ | |
52 | |
53 /* | |
54 * Use this to adjust the score after finding suggestions, based on the | |
55 * suggested word sounding like the bad word. This is much faster than doing | |
56 * it for every possible suggestion. | |
57 * Disadvantage: When "the" is typed as "hte" it sounds different and goes | |
58 * down in the list. | |
59 #define RESCORE(word_score, sound_score) ((2 * word_score + sound_score) / 3) | |
35 */ | 60 */ |
36 | 61 |
37 /* | 62 /* |
38 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE> | 63 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE> |
39 * | 64 * |
45 * <regionname> 2 bytes Region name: ca, au, etc. Lower case. | 70 * <regionname> 2 bytes Region name: ca, au, etc. Lower case. |
46 * First <regionname> is region 1. | 71 * First <regionname> is region 1. |
47 * | 72 * |
48 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). | 73 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). |
49 * <charflags> N bytes List of flags (first one is for character 128): | 74 * <charflags> N bytes List of flags (first one is for character 128): |
50 * 0x01 word character | 75 * 0x01 word character CF_WORD |
51 * 0x02 upper-case character | 76 * 0x02 upper-case character CF_UPPER |
52 * <fcharslen> 2 bytes Number of bytes in <fchars>. | 77 * <fcharslen> 2 bytes Number of bytes in <fchars>. |
53 * <fchars> N bytes Folded characters, first one is for character 128. | 78 * <fchars> N bytes Folded characters, first one is for character 128. |
54 * | 79 * |
55 * | 80 * |
56 * <SUGGEST> : <repcount> <rep> ... | 81 * <SUGGEST> : <repcount> <rep> ... |
143 | 168 |
144 #define MAXWLEN 250 /* Assume max. word len is this many bytes. | 169 #define MAXWLEN 250 /* Assume max. word len is this many bytes. |
145 Some places assume a word length fits in a | 170 Some places assume a word length fits in a |
146 byte, thus it can't be above 255. */ | 171 byte, thus it can't be above 255. */ |
147 | 172 |
148 /* Flags used for a word. */ | 173 /* Type used for indexes in the word tree need to be at least 3 bytes. If int |
174 * is 8 bytes we could use something smaller, but what? */ | |
175 #if SIZEOF_INT > 2 | |
176 typedef int idx_T; | |
177 #else | |
178 typedef long idx_T; | |
179 #endif | |
180 | |
181 /* Flags used for a word. Only the lowest byte can be used, the region byte | |
182 * comes above it. */ | |
149 #define WF_REGION 0x01 /* region byte follows */ | 183 #define WF_REGION 0x01 /* region byte follows */ |
150 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ | 184 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ |
151 #define WF_ALLCAP 0x04 /* word must be all capitals */ | 185 #define WF_ALLCAP 0x04 /* word must be all capitals */ |
152 #define WF_RARE 0x08 /* rare word */ | 186 #define WF_RARE 0x08 /* rare word */ |
153 #define WF_BANNED 0x10 /* bad word */ | 187 #define WF_BANNED 0x10 /* bad word */ |
154 #define WF_KEEPCAP 0x80 /* keep-case word */ | 188 #define WF_KEEPCAP 0x80 /* keep-case word */ |
155 | 189 |
156 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) | 190 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP) |
191 | |
192 #define WF_USED 0x10000 /* Word was found in text. Must be in separate | |
193 byte before region and flags. */ | |
157 | 194 |
158 #define BY_NOFLAGS 0 /* end of word without flags or region */ | 195 #define BY_NOFLAGS 0 /* end of word without flags or region */ |
159 #define BY_FLAGS 1 /* end of word, flag byte follows */ | 196 #define BY_FLAGS 1 /* end of word, flag byte follows */ |
160 #define BY_INDEX 2 /* child is shared, index follows */ | 197 #define BY_INDEX 2 /* child is shared, index follows */ |
161 #define BY_SPECIAL BY_INDEX /* hightest special byte value */ | 198 #define BY_SPECIAL BY_INDEX /* hightest special byte value */ |
190 slang_T *sl_next; /* next language */ | 227 slang_T *sl_next; /* next language */ |
191 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ | 228 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ |
192 char_u *sl_fname; /* name of .spl file */ | 229 char_u *sl_fname; /* name of .spl file */ |
193 int sl_add; /* TRUE if it's a .add file. */ | 230 int sl_add; /* TRUE if it's a .add file. */ |
194 char_u *sl_fbyts; /* case-folded word bytes */ | 231 char_u *sl_fbyts; /* case-folded word bytes */ |
195 int *sl_fidxs; /* case-folded word indexes */ | 232 idx_T *sl_fidxs; /* case-folded word indexes */ |
196 char_u *sl_kbyts; /* keep-case word bytes */ | 233 char_u *sl_kbyts; /* keep-case word bytes */ |
197 int *sl_kidxs; /* keep-case word indexes */ | 234 idx_T *sl_kidxs; /* keep-case word indexes */ |
198 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ | 235 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ |
199 | 236 |
200 garray_T sl_rep; /* list of fromto_T entries from REP lines */ | 237 garray_T sl_rep; /* list of fromto_T entries from REP lines */ |
201 short sl_rep_first[256]; /* indexes where byte first appears, -1 if | 238 short sl_rep_first[256]; /* indexes where byte first appears, -1 if |
202 there is none */ | 239 there is none */ |
265 typedef struct suggest_S | 302 typedef struct suggest_S |
266 { | 303 { |
267 char_u *st_word; /* suggested word, allocated string */ | 304 char_u *st_word; /* suggested word, allocated string */ |
268 int st_orglen; /* length of replaced text */ | 305 int st_orglen; /* length of replaced text */ |
269 int st_score; /* lower is better */ | 306 int st_score; /* lower is better */ |
307 #ifdef RESCORE | |
308 int st_had_bonus; /* bonus already included in score */ | |
309 #endif | |
270 } suggest_T; | 310 } suggest_T; |
271 | 311 |
272 #define SUG(sup, i) (((suggest_T *)(sup)->su_ga.ga_data)[i]) | 312 #define SUG(sup, i) (((suggest_T *)(sup)->su_ga.ga_data)[i]) |
273 | 313 |
274 /* Number of suggestions displayed. */ | 314 /* Number of suggestions displayed. */ |
275 #define SUG_PROMPT_COUNT ((int)Rows - 2) | 315 #define SUG_PROMPT_COUNT ((int)Rows - 2) |
276 | 316 |
277 /* Threshold for sorting and cleaning up suggestions. */ | 317 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is |
278 #define SUG_CLEANUP_COUNT (SUG_PROMPT_COUNT + 50) | 318 * called the score may change, thus we need to keep more than what is |
319 * displayed. */ | |
320 #define SUG_CLEAN_COUNT (SUG_PROMPT_COUNT < 25 ? 25 : SUG_PROMPT_COUNT) | |
321 | |
322 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots | |
323 * of suggestions that are not going to be displayed. */ | |
324 #define SUG_MAX_COUNT (SUG_PROMPT_COUNT + 50) | |
279 | 325 |
280 /* score for various changes */ | 326 /* score for various changes */ |
281 #define SCORE_SPLIT 99 /* split bad word */ | 327 #define SCORE_SPLIT 99 /* split bad word */ |
282 #define SCORE_ICASE 52 /* slightly different case */ | 328 #define SCORE_ICASE 52 /* slightly different case */ |
283 #define SCORE_ALLCAP 120 /* need all-cap case */ | 329 #define SCORE_ALLCAP 120 /* need all-cap case */ |
284 #define SCORE_REGION 70 /* word is for different region */ | 330 #define SCORE_REGION 70 /* word is for different region */ |
285 #define SCORE_RARE 180 /* rare word */ | 331 #define SCORE_RARE 180 /* rare word */ |
332 #define SCORE_NOTUSED 11 /* word not found in text yet */ | |
286 | 333 |
287 /* score for edit distance */ | 334 /* score for edit distance */ |
288 #define SCORE_SWAP 90 /* swap two characters */ | 335 #define SCORE_SWAP 90 /* swap two characters */ |
289 #define SCORE_SWAP3 110 /* swap two characters in three */ | 336 #define SCORE_SWAP3 110 /* swap two characters in three */ |
290 #define SCORE_REP 87 /* REP replacement */ | 337 #define SCORE_REP 87 /* REP replacement */ |
291 #define SCORE_SUBST 93 /* substitute a character */ | 338 #define SCORE_SUBST 93 /* substitute a character */ |
292 #define SCORE_SIMILAR 33 /* substitute a similar character */ | 339 #define SCORE_SIMILAR 33 /* substitute a similar character */ |
293 #define SCORE_DEL 96 /* delete a character */ | 340 #define SCORE_DEL 94 /* delete a character */ |
294 #define SCORE_INS 94 /* insert a character */ | 341 #define SCORE_INS 96 /* insert a character */ |
295 | 342 |
296 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. | 343 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. |
297 * 350 allows for about three changes. */ | 344 * 350 allows for about three changes. */ |
298 #define SCORE_MAXMAX 999999 /* accept any score */ | 345 #define SCORE_MAXMAX 999999 /* accept any score */ |
299 | 346 |
327 typedef struct spelltab_S | 374 typedef struct spelltab_S |
328 { | 375 { |
329 char_u st_isw[256]; /* flags: is word char */ | 376 char_u st_isw[256]; /* flags: is word char */ |
330 char_u st_isu[256]; /* flags: is uppercase char */ | 377 char_u st_isu[256]; /* flags: is uppercase char */ |
331 char_u st_fold[256]; /* chars: folded case */ | 378 char_u st_fold[256]; /* chars: folded case */ |
379 char_u st_upper[256]; /* chars: upper case */ | |
332 } spelltab_T; | 380 } spelltab_T; |
333 | 381 |
334 static spelltab_T spelltab; | 382 static spelltab_T spelltab; |
335 static int did_set_spelltab; | 383 static int did_set_spelltab; |
336 | 384 |
337 #define SPELL_ISWORD 1 | 385 #define CF_WORD 0x01 |
338 #define SPELL_ISUPPER 2 | 386 #define CF_UPPER 0x02 |
339 | 387 |
340 static void clear_spell_chartab __ARGS((spelltab_T *sp)); | 388 static void clear_spell_chartab __ARGS((spelltab_T *sp)); |
341 static int set_spell_finish __ARGS((spelltab_T *new_st)); | 389 static int set_spell_finish __ARGS((spelltab_T *new_st)); |
342 | 390 |
343 /* | 391 /* |
362 int ts_score; /* score */ | 410 int ts_score; /* score */ |
363 int ts_curi; /* index in list of child nodes */ | 411 int ts_curi; /* index in list of child nodes */ |
364 int ts_fidx; /* index in fword[], case-folded bad word */ | 412 int ts_fidx; /* index in fword[], case-folded bad word */ |
365 int ts_fidxtry; /* ts_fidx at which bytes may be changed */ | 413 int ts_fidxtry; /* ts_fidx at which bytes may be changed */ |
366 int ts_twordlen; /* valid length of tword[] */ | 414 int ts_twordlen; /* valid length of tword[] */ |
367 int ts_arridx; /* index in tree array, start of node */ | 415 idx_T ts_arridx; /* index in tree array, start of node */ |
368 char_u ts_save_prewordlen; /* saved "prewordlen" */ | 416 char_u ts_save_prewordlen; /* saved "prewordlen" */ |
369 int ts_save_splitoff; /* su_splitoff saved here */ | 417 int ts_save_splitoff; /* su_splitoff saved here */ |
370 int ts_save_badflags; /* badflags saved here */ | 418 int ts_save_badflags; /* badflags saved here */ |
371 } trystate_T; | 419 } trystate_T; |
372 | 420 |
377 static int spell_valid_case __ARGS((int origflags, int treeflags)); | 425 static int spell_valid_case __ARGS((int origflags, int treeflags)); |
378 static void spell_load_lang __ARGS((char_u *lang)); | 426 static void spell_load_lang __ARGS((char_u *lang)); |
379 static char_u *spell_enc __ARGS((void)); | 427 static char_u *spell_enc __ARGS((void)); |
380 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); | 428 static void spell_load_cb __ARGS((char_u *fname, void *cookie)); |
381 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); | 429 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); |
382 static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx)); | 430 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx)); |
383 static int find_region __ARGS((char_u *rp, char_u *region)); | 431 static int find_region __ARGS((char_u *rp, char_u *region)); |
384 static int captype __ARGS((char_u *word, char_u *end)); | 432 static int captype __ARGS((char_u *word, char_u *end)); |
385 static void spell_reload_one __ARGS((char_u *fname, int added_word)); | 433 static void spell_reload_one __ARGS((char_u *fname, int added_word)); |
386 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); | 434 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); |
387 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); | 435 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); |
388 static void write_spell_chartab __ARGS((FILE *fd)); | 436 static void write_spell_chartab __ARGS((FILE *fd)); |
389 static int spell_isupper __ARGS((int c)); | |
390 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); | 437 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); |
391 static void onecap_copy __ARGS((char_u *word, int len, char_u *wcopy, int upper)); | 438 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); |
392 static void spell_try_change __ARGS((suginfo_T *su)); | 439 static void spell_try_change __ARGS((suginfo_T *su)); |
393 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); | 440 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add)); |
394 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); | 441 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); |
395 static void spell_try_soundalike __ARGS((suginfo_T *su)); | 442 static void spell_try_soundalike __ARGS((suginfo_T *su)); |
396 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); | 443 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); |
444 #if 0 | |
397 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); | 445 static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); |
446 #endif | |
447 #ifdef RESCORE | |
448 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus)); | |
449 #else | |
398 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score)); | 450 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score)); |
451 #endif | |
399 static void add_banned __ARGS((suginfo_T *su, char_u *word)); | 452 static void add_banned __ARGS((suginfo_T *su, char_u *word)); |
400 static int was_banned __ARGS((suginfo_T *su, char_u *word)); | 453 static int was_banned __ARGS((suginfo_T *su, char_u *word)); |
401 static void free_banned __ARGS((suginfo_T *su)); | 454 static void free_banned __ARGS((suginfo_T *su)); |
402 static void cleanup_suggestions __ARGS((suginfo_T *su)); | 455 #ifdef RESCORE |
456 static void rescore_suggestions __ARGS((suginfo_T *su)); | |
457 #endif | |
458 static void cleanup_suggestions __ARGS((suginfo_T *su, int keep)); | |
403 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res)); | 459 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res)); |
460 #if defined(RESCORE) || defined(SOUNDFOLD_SCORE) | |
461 static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u *badsound)); | |
462 #endif | |
404 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); | 463 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword)); |
464 | |
465 /* | |
466 * Use our own character-case definitions, because the current locale may | |
467 * differ from what the .spl file uses. | |
468 * These must not be called with negative number! | |
469 */ | |
470 #ifndef FEAT_MBYTE | |
471 /* Non-multi-byte implementation. */ | |
472 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c)) | |
473 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c)) | |
474 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) | |
475 #else | |
476 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do | |
477 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use | |
478 * the "w" library function for characters above 255 if available. */ | |
479 # ifdef HAVE_TOWLOWER | |
480 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
481 : (c) < 256 ? spelltab.st_fold[c] : towlower(c)) | |
482 # else | |
483 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
484 : (c) < 256 ? spelltab.st_fold[c] : (c)) | |
485 # endif | |
486 | |
487 # ifdef HAVE_TOWUPPER | |
488 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
489 : (c) < 256 ? spelltab.st_upper[c] : towupper(c)) | |
490 # else | |
491 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
492 : (c) < 256 ? spelltab.st_upper[c] : (c)) | |
493 # endif | |
494 | |
495 # ifdef HAVE_ISWUPPER | |
496 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
497 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) | |
498 # else | |
499 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
500 : (c) < 256 ? spelltab.st_isu[c] : (c)) | |
501 # endif | |
502 #endif | |
405 | 503 |
406 | 504 |
407 static char *e_format = N_("E759: Format error in spell file"); | 505 static char *e_format = N_("E759: Format error in spell file"); |
408 | 506 |
409 /* | 507 /* |
487 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp) | 585 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp) |
488 { | 586 { |
489 /* Check for a matching word in case-folded words. */ | 587 /* Check for a matching word in case-folded words. */ |
490 find_word(&mi, FALSE); | 588 find_word(&mi, FALSE); |
491 | 589 |
590 /* Check for a matching word in keep-case words. */ | |
492 find_word(&mi, TRUE); | 591 find_word(&mi, TRUE); |
493 } | 592 } |
494 | 593 |
495 if (mi.mi_result != SP_OK) | 594 if (mi.mi_result != SP_OK) |
496 { | 595 { |
526 static void | 625 static void |
527 find_word(mip, keepcap) | 626 find_word(mip, keepcap) |
528 matchinf_T *mip; | 627 matchinf_T *mip; |
529 int keepcap; | 628 int keepcap; |
530 { | 629 { |
531 int arridx = 0; | 630 idx_T arridx = 0; |
532 int endlen[MAXWLEN]; /* length at possible word endings */ | 631 int endlen[MAXWLEN]; /* length at possible word endings */ |
533 int endidx[MAXWLEN]; /* possible word endings */ | 632 idx_T endidx[MAXWLEN]; /* possible word endings */ |
534 int endidxcnt = 0; | 633 int endidxcnt = 0; |
535 int len; | 634 int len; |
536 int wlen = 0; | 635 int wlen = 0; |
537 int flen; | 636 int flen; |
538 int c; | 637 int c; |
539 char_u *ptr; | 638 char_u *ptr; |
540 unsigned lo, hi, m; | 639 idx_T lo, hi, m; |
541 #ifdef FEAT_MBYTE | 640 #ifdef FEAT_MBYTE |
542 char_u *s; | 641 char_u *s; |
543 #endif | 642 #endif |
544 char_u *p; | 643 char_u *p; |
545 int res = SP_BAD; | 644 int res = SP_BAD; |
546 int valid; | 645 int valid; |
547 slang_T *slang = mip->mi_lp->lp_slang; | 646 slang_T *slang = mip->mi_lp->lp_slang; |
548 unsigned flags; | 647 unsigned flags; |
549 char_u *byts; | 648 char_u *byts; |
550 int *idxs; | 649 idx_T *idxs; |
551 | 650 |
552 if (keepcap) | 651 if (keepcap) |
553 { | 652 { |
554 /* Check for word with matching case in keep-case tree. */ | 653 /* Check for word with matching case in keep-case tree. */ |
555 ptr = mip->mi_word; | 654 ptr = mip->mi_word; |
690 /* Check flags and region. Repeat this if there are more | 789 /* Check flags and region. Repeat this if there are more |
691 * flags/region alternatives until there is a match. */ | 790 * flags/region alternatives until there is a match. */ |
692 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len) | 791 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len) |
693 { | 792 { |
694 flags = idxs[arridx]; | 793 flags = idxs[arridx]; |
794 | |
795 /* Set a flag for words that were used. The region and case | |
796 * doesn't matter here, it's only used to rate the suggestions. */ | |
797 idxs[arridx] = flags | WF_USED; | |
798 | |
695 if (keepcap) | 799 if (keepcap) |
696 { | 800 { |
697 /* For "keepcap" tree the case is always right. */ | 801 /* For "keepcap" tree the case is always right. */ |
698 valid = TRUE; | 802 valid = TRUE; |
699 } | 803 } |
821 len = spell_check(curwin, p, &attr); | 925 len = spell_check(curwin, p, &attr); |
822 | 926 |
823 if (attr != 0) | 927 if (attr != 0) |
824 { | 928 { |
825 /* We found a bad word. Check the attribute. */ | 929 /* We found a bad word. Check the attribute. */ |
826 /* TODO: check for syntax @Spell cluster. */ | |
827 if (allwords || attr == highlight_attr[HLF_SPB]) | 930 if (allwords || attr == highlight_attr[HLF_SPB]) |
828 { | 931 { |
829 /* When searching forward only accept a bad word after | 932 /* When searching forward only accept a bad word after |
830 * the cursor. */ | 933 * the cursor. */ |
831 if (dir == BACKWARD | 934 if (dir == BACKWARD |
1071 slang_T *lp = NULL; | 1174 slang_T *lp = NULL; |
1072 garray_T *gap; | 1175 garray_T *gap; |
1073 fromto_T *ftp; | 1176 fromto_T *ftp; |
1074 int rr; | 1177 int rr; |
1075 short *first; | 1178 short *first; |
1179 idx_T idx; | |
1076 | 1180 |
1077 fd = mch_fopen((char *)fname, "r"); | 1181 fd = mch_fopen((char *)fname, "r"); |
1078 if (fd == NULL) | 1182 if (fd == NULL) |
1079 { | 1183 { |
1080 if (!silent) | 1184 if (!silent) |
1168 } | 1272 } |
1169 for (i = 0; i < ccnt; ++i) | 1273 for (i = 0; i < ccnt; ++i) |
1170 fol[i] = getc(fd); /* <fchars> */ | 1274 fol[i] = getc(fd); /* <fchars> */ |
1171 fol[i] = NUL; | 1275 fol[i] = NUL; |
1172 | 1276 |
1173 /* Set the word-char flags and fill spell_isupper() table. */ | 1277 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ |
1174 i = set_spell_charflags(p, cnt, fol); | 1278 i = set_spell_charflags(p, cnt, fol); |
1175 vim_free(p); | 1279 vim_free(p); |
1176 vim_free(fol); | 1280 vim_free(fol); |
1177 if (i == FAIL) | 1281 if (i == FAIL) |
1178 goto formerr; | 1282 goto formerr; |
1291 /* Allocate the index array. */ | 1395 /* Allocate the index array. */ |
1292 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE); | 1396 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE); |
1293 if (p == NULL) | 1397 if (p == NULL) |
1294 goto endFAIL; | 1398 goto endFAIL; |
1295 if (round == 1) | 1399 if (round == 1) |
1296 lp->sl_fidxs = (int *)p; | 1400 lp->sl_fidxs = (idx_T *)p; |
1297 else | 1401 else |
1298 lp->sl_kidxs = (int *)p; | 1402 lp->sl_kidxs = (idx_T *)p; |
1299 | 1403 |
1300 | 1404 |
1301 /* Read the tree and store it in the array. */ | 1405 /* Read the tree and store it in the array. */ |
1302 i = read_tree(fd, | 1406 idx = read_tree(fd, |
1303 round == 1 ? lp->sl_fbyts : lp->sl_kbyts, | 1407 round == 1 ? lp->sl_fbyts : lp->sl_kbyts, |
1304 round == 1 ? lp->sl_fidxs : lp->sl_kidxs, | 1408 round == 1 ? lp->sl_fidxs : lp->sl_kidxs, |
1305 len, 0); | 1409 len, 0); |
1306 if (i == -1) | 1410 if (idx == -1) |
1307 goto truncerr; | 1411 goto truncerr; |
1308 if (i < 0) | 1412 if (idx < 0) |
1309 goto formerr; | 1413 goto formerr; |
1310 } | 1414 } |
1311 } | 1415 } |
1312 | 1416 |
1313 /* For a new file link it in the list of spell files. */ | 1417 /* For a new file link it in the list of spell files. */ |
1346 * | 1450 * |
1347 * Returns the index follosing the siblings. | 1451 * Returns the index follosing the siblings. |
1348 * Returns -1 if the file is shorter than expected. | 1452 * Returns -1 if the file is shorter than expected. |
1349 * Returns -2 if there is a format error. | 1453 * Returns -2 if there is a format error. |
1350 */ | 1454 */ |
1351 static int | 1455 static idx_T |
1352 read_tree(fd, byts, idxs, maxidx, startidx) | 1456 read_tree(fd, byts, idxs, maxidx, startidx) |
1353 FILE *fd; | 1457 FILE *fd; |
1354 char_u *byts; | 1458 char_u *byts; |
1355 int *idxs; | 1459 idx_T *idxs; |
1356 int maxidx; /* size of arrays */ | 1460 int maxidx; /* size of arrays */ |
1357 int startidx; /* current index in "byts" and "idxs" */ | 1461 idx_T startidx; /* current index in "byts" and "idxs" */ |
1358 { | 1462 { |
1359 int len; | 1463 int len; |
1360 int i; | 1464 int i; |
1361 int n; | 1465 int n; |
1362 int idx = startidx; | 1466 idx_T idx = startidx; |
1363 int c; | 1467 int c; |
1364 #define SHARED_MASK 0x8000000 | 1468 #define SHARED_MASK 0x8000000 |
1365 | 1469 |
1366 len = getc(fd); /* <siblingcount> */ | 1470 len = getc(fd); /* <siblingcount> */ |
1367 if (len <= 0) | 1471 if (len <= 0) |
1617 if (has_mbyte) | 1721 if (has_mbyte) |
1618 c = mb_ptr2char_adv(&p); | 1722 c = mb_ptr2char_adv(&p); |
1619 else | 1723 else |
1620 #endif | 1724 #endif |
1621 c = *p++; | 1725 c = *p++; |
1622 firstcap = allcap = spell_isupper(c); | 1726 firstcap = allcap = SPELL_ISUPPER(c); |
1623 | 1727 |
1624 /* | 1728 /* |
1625 * Need to check all letters to find a word with mixed upper/lower. | 1729 * Need to check all letters to find a word with mixed upper/lower. |
1626 * But a word with an upper char only at start is a ONECAP. | 1730 * But a word with an upper char only at start is a ONECAP. |
1627 */ | 1731 */ |
1631 #ifdef FEAT_MBYTE | 1735 #ifdef FEAT_MBYTE |
1632 c = mb_ptr2char(p); | 1736 c = mb_ptr2char(p); |
1633 #else | 1737 #else |
1634 c = *p; | 1738 c = *p; |
1635 #endif | 1739 #endif |
1636 if (!spell_isupper(c)) | 1740 if (!SPELL_ISUPPER(c)) |
1637 { | 1741 { |
1638 /* UUl -> KEEPCAP */ | 1742 /* UUl -> KEEPCAP */ |
1639 if (past_second && allcap) | 1743 if (past_second && allcap) |
1640 return WF_KEEPCAP; | 1744 return WF_KEEPCAP; |
1641 allcap = FALSE; | 1745 allcap = FALSE; |
1874 static char *e_affname = N_("Affix name too long in %s line %d: %s"); | 1978 static char *e_affname = N_("Affix name too long in %s line %d: %s"); |
1875 int do_rep; | 1979 int do_rep; |
1876 int do_sal; | 1980 int do_sal; |
1877 int do_map; | 1981 int do_map; |
1878 int found_map = FALSE; | 1982 int found_map = FALSE; |
1983 hashitem_T *hi; | |
1879 | 1984 |
1880 /* | 1985 /* |
1881 * Open the file. | 1986 * Open the file. |
1882 */ | 1987 */ |
1883 fd = mch_fopen((char *)fname, "r"); | 1988 fd = mch_fopen((char *)fname, "r"); |
2029 if (*items[0] == 'P') | 2134 if (*items[0] == 'P') |
2030 tp = &aff->af_pref; | 2135 tp = &aff->af_pref; |
2031 else | 2136 else |
2032 tp = &aff->af_suff; | 2137 tp = &aff->af_suff; |
2033 aff_todo = atoi((char *)items[3]); | 2138 aff_todo = atoi((char *)items[3]); |
2034 if (!HASHITEM_EMPTY(hash_find(tp, cur_aff->ah_key))) | 2139 hi = hash_find(tp, cur_aff->ah_key); |
2140 if (!HASHITEM_EMPTY(hi)) | |
2035 { | 2141 { |
2036 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), | 2142 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), |
2037 fname, lnum, items[1]); | 2143 fname, lnum, items[1]); |
2038 aff_todo = 0; | 2144 aff_todo = 0; |
2039 } | 2145 } |
2169 if (fol != NULL || low != NULL || upp != NULL) | 2275 if (fol != NULL || low != NULL || upp != NULL) |
2170 { | 2276 { |
2171 /* | 2277 /* |
2172 * Don't write a word table for an ASCII file, so that we don't check | 2278 * Don't write a word table for an ASCII file, so that we don't check |
2173 * for conflicts with a word table that matches 'encoding'. | 2279 * for conflicts with a word table that matches 'encoding'. |
2174 * Don't write one for utf-8 either, we use utf_isupper() and | 2280 * Don't write one for utf-8 either, we use utf_*() and |
2175 * mb_get_class(), the list of chars in the file will be incomplete. | 2281 * mb_get_class(), the list of chars in the file will be incomplete. |
2176 */ | 2282 */ |
2177 if (!spin->si_ascii | 2283 if (!spin->si_ascii |
2178 #ifdef FEAT_MBYTE | 2284 #ifdef FEAT_MBYTE |
2179 && !enc_utf8 | 2285 && !enc_utf8 |
2334 verbose_leave(); | 2440 verbose_leave(); |
2335 } | 2441 } |
2336 | 2442 |
2337 /* Read and ignore the first line: word count. */ | 2443 /* Read and ignore the first line: word count. */ |
2338 (void)vim_fgets(line, MAXLINELEN, fd); | 2444 (void)vim_fgets(line, MAXLINELEN, fd); |
2339 if (!isdigit(*skipwhite(line))) | 2445 if (!vim_isdigit(*skipwhite(line))) |
2340 EMSG2(_("E760: No word count in %s"), fname); | 2446 EMSG2(_("E760: No word count in %s"), fname); |
2341 | 2447 |
2342 /* | 2448 /* |
2343 * Read all the lines in the file one by one. | 2449 * Read all the lines in the file one by one. |
2344 * The words are converted to 'encoding' here, before being added to | 2450 * The words are converted to 'encoding' here, before being added to |
2526 if (ae->ae_chop != NULL) | 2632 if (ae->ae_chop != NULL) |
2527 { | 2633 { |
2528 /* Skip chop string. */ | 2634 /* Skip chop string. */ |
2529 #ifdef FEAT_MBYTE | 2635 #ifdef FEAT_MBYTE |
2530 if (has_mbyte) | 2636 if (has_mbyte) |
2637 { | |
2531 i = mb_charlen(ae->ae_chop); | 2638 i = mb_charlen(ae->ae_chop); |
2639 for ( ; i > 0; --i) | |
2640 mb_ptr_adv(p); | |
2641 } | |
2532 else | 2642 else |
2533 #endif | 2643 #endif |
2534 i = STRLEN(ae->ae_chop); | 2644 p += STRLEN(ae->ae_chop); |
2535 for ( ; i > 0; --i) | |
2536 mb_ptr_adv(p); | |
2537 } | 2645 } |
2538 STRCAT(newword, p); | 2646 STRCAT(newword, p); |
2539 } | 2647 } |
2540 else | 2648 else |
2541 { | 2649 { |
3752 */ | 3860 */ |
3753 static void | 3861 static void |
3754 clear_spell_chartab(sp) | 3862 clear_spell_chartab(sp) |
3755 spelltab_T *sp; | 3863 spelltab_T *sp; |
3756 { | 3864 { |
3757 int i; | 3865 int i; |
3758 | 3866 |
3759 /* Init everything to FALSE. */ | 3867 /* Init everything to FALSE. */ |
3760 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); | 3868 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); |
3761 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); | 3869 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); |
3762 for (i = 0; i < 256; ++i) | 3870 for (i = 0; i < 256; ++i) |
3871 { | |
3763 sp->st_fold[i] = i; | 3872 sp->st_fold[i] = i; |
3873 sp->st_upper[i] = i; | |
3874 } | |
3764 | 3875 |
3765 /* We include digits. A word shouldn't start with a digit, but handling | 3876 /* We include digits. A word shouldn't start with a digit, but handling |
3766 * that is done separately. */ | 3877 * that is done separately. */ |
3767 for (i = '0'; i <= '9'; ++i) | 3878 for (i = '0'; i <= '9'; ++i) |
3768 sp->st_isw[i] = TRUE; | 3879 sp->st_isw[i] = TRUE; |
3771 sp->st_isw[i] = TRUE; | 3882 sp->st_isw[i] = TRUE; |
3772 sp->st_isu[i] = TRUE; | 3883 sp->st_isu[i] = TRUE; |
3773 sp->st_fold[i] = i + 0x20; | 3884 sp->st_fold[i] = i + 0x20; |
3774 } | 3885 } |
3775 for (i = 'a'; i <= 'z'; ++i) | 3886 for (i = 'a'; i <= 'z'; ++i) |
3887 { | |
3776 sp->st_isw[i] = TRUE; | 3888 sp->st_isw[i] = TRUE; |
3889 sp->st_upper[i] = i - 0x20; | |
3890 } | |
3777 } | 3891 } |
3778 | 3892 |
3779 /* | 3893 /* |
3780 * Init the chartab used for spelling. Only depends on 'encoding'. | 3894 * Init the chartab used for spelling. Only depends on 'encoding'. |
3781 * Called once while starting up and when 'encoding' changes. | 3895 * Called once while starting up and when 'encoding' changes. |
3797 /* DBCS: assume double-wide characters are word characters. */ | 3911 /* DBCS: assume double-wide characters are word characters. */ |
3798 for (i = 128; i <= 255; ++i) | 3912 for (i = 128; i <= 255; ++i) |
3799 if (MB_BYTE2LEN(i) == 2) | 3913 if (MB_BYTE2LEN(i) == 2) |
3800 spelltab.st_isw[i] = TRUE; | 3914 spelltab.st_isw[i] = TRUE; |
3801 } | 3915 } |
3916 else if (enc_utf8) | |
3917 { | |
3918 for (i = 128; i < 256; ++i) | |
3919 { | |
3920 spelltab.st_isu[i] = utf_isupper(i); | |
3921 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); | |
3922 spelltab.st_fold[i] = utf_fold(i); | |
3923 spelltab.st_upper[i] = utf_toupper(i); | |
3924 } | |
3925 } | |
3802 else | 3926 else |
3803 #endif | 3927 #endif |
3804 { | 3928 { |
3805 /* Rough guess: use isalpha() and isupper() for characters above 128. */ | 3929 /* Rough guess: use locale-dependent library functions. */ |
3806 for (i = 128; i < 256; ++i) | 3930 for (i = 128; i < 256; ++i) |
3807 { | 3931 { |
3808 spelltab.st_isw[i] = MB_ISUPPER(i) || MB_ISLOWER(i); | |
3809 if (MB_ISUPPER(i)) | 3932 if (MB_ISUPPER(i)) |
3810 { | 3933 { |
3934 spelltab.st_isw[i] = TRUE; | |
3811 spelltab.st_isu[i] = TRUE; | 3935 spelltab.st_isu[i] = TRUE; |
3812 spelltab.st_fold[i] = MB_TOLOWER(i); | 3936 spelltab.st_fold[i] = MB_TOLOWER(i); |
3937 } | |
3938 else if (MB_ISLOWER(i)) | |
3939 { | |
3940 spelltab.st_isw[i] = TRUE; | |
3941 spelltab.st_upper[i] = MB_TOUPPER(i); | |
3813 } | 3942 } |
3814 } | 3943 } |
3815 } | 3944 } |
3816 } | 3945 } |
3817 | 3946 |
3870 } | 3999 } |
3871 new_st.st_fold[l] = f; | 4000 new_st.st_fold[l] = f; |
3872 } | 4001 } |
3873 | 4002 |
3874 /* if "UPP" and "FOL" are not the same the "UPP" char needs | 4003 /* if "UPP" and "FOL" are not the same the "UPP" char needs |
3875 * case-folding and it's upper case. */ | 4004 * case-folding, it's upper case and the "UPP" is the upper case of |
4005 * "FOL" . */ | |
3876 if (u < 256 && u != f) | 4006 if (u < 256 && u != f) |
3877 { | 4007 { |
3878 if (f >= 256) | 4008 if (f >= 256) |
3879 { | 4009 { |
3880 EMSG(_(e_affrange)); | 4010 EMSG(_(e_affrange)); |
3881 return FAIL; | 4011 return FAIL; |
3882 } | 4012 } |
3883 new_st.st_fold[u] = f; | 4013 new_st.st_fold[u] = f; |
3884 new_st.st_isu[u] = TRUE; | 4014 new_st.st_isu[u] = TRUE; |
4015 new_st.st_upper[f] = u; | |
3885 } | 4016 } |
3886 } | 4017 } |
3887 | 4018 |
3888 if (*pl != NUL || *pu != NUL) | 4019 if (*pl != NUL || *pu != NUL) |
3889 { | 4020 { |
3906 /* We build the new tables here first, so that we can compare with the | 4037 /* We build the new tables here first, so that we can compare with the |
3907 * previous one. */ | 4038 * previous one. */ |
3908 spelltab_T new_st; | 4039 spelltab_T new_st; |
3909 int i; | 4040 int i; |
3910 char_u *p = upp; | 4041 char_u *p = upp; |
4042 int c; | |
3911 | 4043 |
3912 clear_spell_chartab(&new_st); | 4044 clear_spell_chartab(&new_st); |
3913 | 4045 |
3914 for (i = 0; i < cnt; ++i) | 4046 for (i = 0; i < cnt; ++i) |
3915 { | 4047 { |
3916 new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0; | 4048 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; |
3917 new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0; | 4049 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; |
3918 | 4050 |
3919 if (*p == NUL) | 4051 if (*p == NUL) |
3920 return FAIL; | 4052 return FAIL; |
3921 #ifdef FEAT_MBYTE | 4053 #ifdef FEAT_MBYTE |
3922 new_st.st_fold[i + 128] = mb_ptr2char_adv(&p); | 4054 c = mb_ptr2char_adv(&p); |
3923 #else | 4055 #else |
3924 new_st.st_fold[i + 128] = *p++; | 4056 c = *p++; |
3925 #endif | 4057 #endif |
4058 new_st.st_fold[i + 128] = c; | |
4059 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) | |
4060 new_st.st_upper[c] = i + 128; | |
3926 } | 4061 } |
3927 | 4062 |
3928 return set_spell_finish(&new_st); | 4063 return set_spell_finish(&new_st); |
3929 } | 4064 } |
3930 | 4065 |
3939 /* check that it's the same table */ | 4074 /* check that it's the same table */ |
3940 for (i = 0; i < 256; ++i) | 4075 for (i = 0; i < 256; ++i) |
3941 { | 4076 { |
3942 if (spelltab.st_isw[i] != new_st->st_isw[i] | 4077 if (spelltab.st_isw[i] != new_st->st_isw[i] |
3943 || spelltab.st_isu[i] != new_st->st_isu[i] | 4078 || spelltab.st_isu[i] != new_st->st_isu[i] |
3944 || spelltab.st_fold[i] != new_st->st_fold[i]) | 4079 || spelltab.st_fold[i] != new_st->st_fold[i] |
4080 || spelltab.st_upper[i] != new_st->st_upper[i]) | |
3945 { | 4081 { |
3946 EMSG(_("E763: Word characters differ between spell files")); | 4082 EMSG(_("E763: Word characters differ between spell files")); |
3947 return FAIL; | 4083 return FAIL; |
3948 } | 4084 } |
3949 } | 4085 } |
3975 fputc(128, fd); /* <charflagslen> */ | 4111 fputc(128, fd); /* <charflagslen> */ |
3976 for (i = 128; i < 256; ++i) | 4112 for (i = 128; i < 256; ++i) |
3977 { | 4113 { |
3978 flags = 0; | 4114 flags = 0; |
3979 if (spelltab.st_isw[i]) | 4115 if (spelltab.st_isw[i]) |
3980 flags |= SPELL_ISWORD; | 4116 flags |= CF_WORD; |
3981 if (spelltab.st_isu[i]) | 4117 if (spelltab.st_isu[i]) |
3982 flags |= SPELL_ISUPPER; | 4118 flags |= CF_UPPER; |
3983 fputc(flags, fd); /* <charflags> */ | 4119 fputc(flags, fd); /* <charflags> */ |
3984 | 4120 |
3985 #ifdef FEAT_MBYTE | 4121 #ifdef FEAT_MBYTE |
3986 if (has_mbyte) | 4122 if (has_mbyte) |
3987 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len); | 4123 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len); |
3993 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */ | 4129 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */ |
3994 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */ | 4130 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */ |
3995 } | 4131 } |
3996 | 4132 |
3997 /* | 4133 /* |
3998 * Return TRUE if "c" is an upper-case character for spelling. | 4134 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. |
3999 */ | 4135 * Uses the character definitions from the .spl file. |
4000 static int | |
4001 spell_isupper(c) | |
4002 int c; | |
4003 { | |
4004 # ifdef FEAT_MBYTE | |
4005 if (enc_utf8) | |
4006 { | |
4007 /* For Unicode we can call utf_isupper(), but don't do that for ASCII, | |
4008 * because we don't want to use 'casemap' here. */ | |
4009 if (c >= 128) | |
4010 return utf_isupper(c); | |
4011 } | |
4012 else if (has_mbyte && c > 256) | |
4013 { | |
4014 /* For characters above 255 we don't have something specfied. | |
4015 * Fall back to locale-dependent iswupper(). If not available | |
4016 * simply return FALSE. */ | |
4017 # ifdef HAVE_ISWUPPER | |
4018 return iswupper(c); | |
4019 # else | |
4020 return FALSE; | |
4021 # endif | |
4022 } | |
4023 # endif | |
4024 return spelltab.st_isu[c]; | |
4025 } | |
4026 | |
4027 /* | |
4028 * Case-fold "p[len]" into "buf[buflen]". Used for spell checking. | |
4029 * When using a multi-byte 'encoding' the length may change! | 4136 * When using a multi-byte 'encoding' the length may change! |
4030 * Returns FAIL when something wrong. | 4137 * Returns FAIL when something wrong. |
4031 */ | 4138 */ |
4032 static int | 4139 static int |
4033 spell_casefold(p, len, buf, buflen) | 4140 spell_casefold(str, len, buf, buflen) |
4034 char_u *p; | 4141 char_u *str; |
4035 int len; | 4142 int len; |
4036 char_u *buf; | 4143 char_u *buf; |
4037 int buflen; | 4144 int buflen; |
4038 { | 4145 { |
4039 int i; | 4146 int i; |
4045 } | 4152 } |
4046 | 4153 |
4047 #ifdef FEAT_MBYTE | 4154 #ifdef FEAT_MBYTE |
4048 if (has_mbyte) | 4155 if (has_mbyte) |
4049 { | 4156 { |
4157 int outi = 0; | |
4158 char_u *p; | |
4050 int c; | 4159 int c; |
4051 int outi = 0; | |
4052 | 4160 |
4053 /* Fold one character at a time. */ | 4161 /* Fold one character at a time. */ |
4054 for (i = 0; i < len; i += mb_ptr2len_check(p + i)) | 4162 for (p = str; p < str + len; ) |
4055 { | 4163 { |
4056 c = mb_ptr2char(p + i); | |
4057 if (enc_utf8) | |
4058 /* For Unicode case folding is always the same, no need to use | |
4059 * the table from the spell file. */ | |
4060 c = utf_fold(c); | |
4061 else if (c < 256) | |
4062 /* Use the table from the spell file. */ | |
4063 c = spelltab.st_fold[c]; | |
4064 # ifdef HAVE_TOWLOWER | |
4065 else | |
4066 /* We don't know what to do, fall back to towlower(), it | |
4067 * depends on the current locale. */ | |
4068 c = towlower(c); | |
4069 # endif | |
4070 if (outi + MB_MAXBYTES > buflen) | 4164 if (outi + MB_MAXBYTES > buflen) |
4071 { | 4165 { |
4072 buf[outi] = NUL; | 4166 buf[outi] = NUL; |
4073 return FAIL; | 4167 return FAIL; |
4074 } | 4168 } |
4075 outi += mb_char2bytes(c, buf + outi); | 4169 c = mb_ptr2char_adv(&p); |
4170 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); | |
4076 } | 4171 } |
4077 buf[outi] = NUL; | 4172 buf[outi] = NUL; |
4078 } | 4173 } |
4079 else | 4174 else |
4080 #endif | 4175 #endif |
4081 { | 4176 { |
4082 /* Be quick for non-multibyte encodings. */ | 4177 /* Be quick for non-multibyte encodings. */ |
4083 for (i = 0; i < len; ++i) | 4178 for (i = 0; i < len; ++i) |
4084 buf[i] = spelltab.st_fold[p[i]]; | 4179 buf[i] = spelltab.st_fold[str[i]]; |
4085 buf[i] = NUL; | 4180 buf[i] = NUL; |
4086 } | 4181 } |
4087 | 4182 |
4088 return OK; | 4183 return OK; |
4089 } | 4184 } |
4134 add_banned(&sug, sug.su_badword); | 4229 add_banned(&sug, sug.su_badword); |
4135 | 4230 |
4136 /* | 4231 /* |
4137 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries | 4232 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries |
4138 * from the .aff file and inserting a space (split the word). | 4233 * from the .aff file and inserting a space (split the word). |
4234 * | |
4235 * Set a maximum score to limit the combination of operations that is | |
4236 * tried. | |
4139 */ | 4237 */ |
4140 /* Set a maximum score to limit the combination of operations that is | |
4141 * tried. */ | |
4142 sug.su_maxscore = SCORE_MAXINIT; | 4238 sug.su_maxscore = SCORE_MAXINIT; |
4143 spell_try_change(&sug); | 4239 spell_try_change(&sug); |
4144 cleanup_suggestions(&sug); | |
4145 | 4240 |
4146 /* | 4241 /* |
4147 * 2. Try finding sound-a-like words. | 4242 * 2. Try finding sound-a-like words. |
4243 * | |
4244 * Only do this when we don't have a lot of suggestions yet, because it's | |
4245 * very slow and often doesn't find new suggestions. | |
4148 */ | 4246 */ |
4149 /* Allow a higher score if we don't have many suggestions yet. */ | 4247 if (sug.su_ga.ga_len < SUG_CLEAN_COUNT) |
4150 if (sug.su_maxscore == SCORE_MAXINIT) | 4248 { |
4249 /* Allow a higher score now. */ | |
4151 sug.su_maxscore = SCORE_MAXMAX; | 4250 sug.su_maxscore = SCORE_MAXMAX; |
4152 spell_try_soundalike(&sug); | 4251 spell_try_soundalike(&sug); |
4252 } | |
4153 | 4253 |
4154 /* When CTRL-C was hit while searching do show the results. */ | 4254 /* When CTRL-C was hit while searching do show the results. */ |
4255 ui_breakcheck(); | |
4155 if (got_int) | 4256 if (got_int) |
4156 { | 4257 { |
4157 (void)vgetc(); | 4258 (void)vgetc(); |
4158 got_int = FALSE; | 4259 got_int = FALSE; |
4159 } | 4260 } |
4160 | 4261 |
4161 if (sug.su_ga.ga_len == 0) | 4262 if (sug.su_ga.ga_len == 0) |
4162 MSG(_("Sorry, no suggestions")); | 4263 MSG(_("Sorry, no suggestions")); |
4163 else | 4264 else |
4164 { | 4265 { |
4165 /* Cleanup, sort the suggestions and truncate at SUG_PROMPT_COUNT. */ | 4266 #ifdef RESCORE |
4166 cleanup_suggestions(&sug); | 4267 /* Do slow but more accurate computation of the word score. */ |
4268 rescore_suggestions(&sug); | |
4269 #endif | |
4270 | |
4271 /* Sort the suggestions and truncate at SUG_PROMPT_COUNT. */ | |
4272 cleanup_suggestions(&sug, SUG_PROMPT_COUNT); | |
4167 | 4273 |
4168 /* List the suggestions. */ | 4274 /* List the suggestions. */ |
4169 msg_start(); | 4275 msg_start(); |
4170 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), | 4276 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), |
4171 sug.su_badlen, sug.su_badptr); | 4277 sug.su_badlen, sug.su_badptr); |
4182 STRCPY(wcopy, stp->st_word); | 4288 STRCPY(wcopy, stp->st_word); |
4183 if (sug.su_badlen > stp->st_orglen) | 4289 if (sug.su_badlen > stp->st_orglen) |
4184 vim_strncpy(wcopy + STRLEN(wcopy), | 4290 vim_strncpy(wcopy + STRLEN(wcopy), |
4185 sug.su_badptr + stp->st_orglen, | 4291 sug.su_badptr + stp->st_orglen, |
4186 sug.su_badlen - stp->st_orglen); | 4292 sug.su_badlen - stp->st_orglen); |
4187 /* TODO: remove score */ | 4293 if (p_verbose > 0) |
4188 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"), | 4294 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"), |
4189 i + 1, wcopy, stp->st_score); | 4295 i + 1, wcopy, stp->st_score); |
4296 else | |
4297 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), | |
4298 i + 1, wcopy); | |
4190 msg_puts(IObuff); | 4299 msg_puts(IObuff); |
4191 lines_left = 3; /* avoid more prompt */ | 4300 lines_left = 3; /* avoid more prompt */ |
4192 msg_putchar('\n'); | 4301 msg_putchar('\n'); |
4193 } | 4302 } |
4194 | 4303 |
4222 /* Free the banned words. */ | 4331 /* Free the banned words. */ |
4223 free_banned(&sug); | 4332 free_banned(&sug); |
4224 } | 4333 } |
4225 | 4334 |
4226 /* | 4335 /* |
4227 * Make a copy of "word[len]", with the first letter upper or lower cased, | 4336 * Make a copy of "word", with the first letter upper or lower cased, to |
4228 * to "wcopy[MAXWLEN]". | 4337 * "wcopy[MAXWLEN]". "word" must not be empty. |
4338 * The result is NUL terminated. | |
4229 */ | 4339 */ |
4230 static void | 4340 static void |
4231 onecap_copy(word, len, wcopy, upper) | 4341 onecap_copy(word, wcopy, upper) |
4232 char_u *word; | 4342 char_u *word; |
4233 int len; | |
4234 char_u *wcopy; | 4343 char_u *wcopy; |
4235 int upper; /* TRUE: first letter made upper case */ | 4344 int upper; /* TRUE: first letter made upper case */ |
4236 { | 4345 { |
4237 char_u *p; | 4346 char_u *p; |
4238 int c; | 4347 int c; |
4244 c = mb_ptr2char_adv(&p); | 4353 c = mb_ptr2char_adv(&p); |
4245 else | 4354 else |
4246 #endif | 4355 #endif |
4247 c = *p++; | 4356 c = *p++; |
4248 if (upper) | 4357 if (upper) |
4249 c = MB_TOUPPER(c); | 4358 c = SPELL_TOUPPER(c); |
4250 else | 4359 else |
4251 c = MB_TOLOWER(c); | 4360 c = SPELL_TOFOLD(c); |
4252 #ifdef FEAT_MBYTE | 4361 #ifdef FEAT_MBYTE |
4253 if (has_mbyte) | 4362 if (has_mbyte) |
4254 l = mb_char2bytes(c, wcopy); | 4363 l = mb_char2bytes(c, wcopy); |
4255 else | 4364 else |
4256 #endif | 4365 #endif |
4257 { | 4366 { |
4258 l = 1; | 4367 l = 1; |
4259 wcopy[0] = c; | 4368 wcopy[0] = c; |
4260 } | 4369 } |
4261 vim_strncpy(wcopy + l, p, len - (p - word)); | 4370 vim_strncpy(wcopy + l, p, MAXWLEN - l); |
4262 } | 4371 } |
4263 | 4372 |
4264 /* | 4373 /* |
4265 * Make a copy of "word[len]" with all the letters upper cased into | 4374 * Make a copy of "word" with all the letters upper cased into |
4266 * "wcopy[MAXWLEN]". | 4375 * "wcopy[MAXWLEN]". The result is NUL terminated. |
4267 */ | 4376 */ |
4268 static void | 4377 static void |
4269 allcap_copy(word, wcopy) | 4378 allcap_copy(word, wcopy) |
4270 char_u *word; | 4379 char_u *word; |
4271 char_u *wcopy; | 4380 char_u *wcopy; |
4281 if (has_mbyte) | 4390 if (has_mbyte) |
4282 c = mb_ptr2char_adv(&s); | 4391 c = mb_ptr2char_adv(&s); |
4283 else | 4392 else |
4284 #endif | 4393 #endif |
4285 c = *s++; | 4394 c = *s++; |
4286 | 4395 c = SPELL_TOUPPER(c); |
4287 c = MB_TOUPPER(c); /* TODO: use spell toupper */ | |
4288 | 4396 |
4289 #ifdef FEAT_MBYTE | 4397 #ifdef FEAT_MBYTE |
4290 if (has_mbyte) | 4398 if (has_mbyte) |
4291 { | 4399 { |
4292 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) | 4400 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) |
4320 int splitoff = 0; /* index in tword after last split */ | 4428 int splitoff = 0; /* index in tword after last split */ |
4321 trystate_T *sp; | 4429 trystate_T *sp; |
4322 int newscore; | 4430 int newscore; |
4323 langp_T *lp; | 4431 langp_T *lp; |
4324 char_u *byts; | 4432 char_u *byts; |
4325 int *idxs; | 4433 idx_T *idxs; |
4326 int depth; | 4434 int depth; |
4327 int c; | 4435 int c; |
4328 int n; | 4436 int n; |
4329 int flags; | 4437 int flags; |
4330 int badflags; | 4438 int badflags; |
4331 garray_T *gap; | 4439 garray_T *gap; |
4332 int arridx; | 4440 idx_T arridx; |
4333 int len; | 4441 int len; |
4334 char_u *p; | 4442 char_u *p; |
4335 fromto_T *ftp; | 4443 fromto_T *ftp; |
4336 int fl, tl; | 4444 int fl, tl; |
4337 | 4445 |
4415 /* | 4523 /* |
4416 * End of word in tree. | 4524 * End of word in tree. |
4417 */ | 4525 */ |
4418 ++sp->ts_curi; /* eat one NUL byte */ | 4526 ++sp->ts_curi; /* eat one NUL byte */ |
4419 | 4527 |
4420 flags = idxs[arridx]; | 4528 flags = (int)idxs[arridx]; |
4421 | 4529 |
4422 /* | 4530 /* |
4423 * Form the word with proper case in preword. | 4531 * Form the word with proper case in preword. |
4424 * If there is a word from a previous split, append. | 4532 * If there is a word from a previous split, append. |
4425 */ | 4533 */ |
4449 && (((unsigned)flags >> 8) & lp->lp_region) == 0) | 4557 && (((unsigned)flags >> 8) & lp->lp_region) == 0) |
4450 newscore += SCORE_REGION; | 4558 newscore += SCORE_REGION; |
4451 if (flags & WF_RARE) | 4559 if (flags & WF_RARE) |
4452 newscore += SCORE_RARE; | 4560 newscore += SCORE_RARE; |
4453 | 4561 |
4562 /* Words that were not found in the text get a penalty. */ | |
4563 if ((flags & WF_USED) == 0) | |
4564 newscore += SCORE_NOTUSED; | |
4565 | |
4454 if (!spell_valid_case(badflags, | 4566 if (!spell_valid_case(badflags, |
4455 captype(preword + prewordlen, NULL))) | 4567 captype(preword + prewordlen, NULL))) |
4456 newscore += SCORE_ICASE; | 4568 newscore += SCORE_ICASE; |
4457 | 4569 |
4458 if (fword[sp->ts_fidx] == 0) | 4570 if (fword[sp->ts_fidx] == 0) |
4459 { | 4571 { |
4460 /* The badword also ends: add suggestions, */ | 4572 /* The badword also ends: add suggestions, */ |
4461 add_suggestion(su, preword, sp->ts_score + newscore); | 4573 add_suggestion(su, preword, sp->ts_score + newscore |
4574 #ifdef RESCORE | |
4575 , FALSE | |
4576 #endif | |
4577 ); | |
4462 } | 4578 } |
4463 else if (sp->ts_fidx >= sp->ts_fidxtry) | 4579 else if (sp->ts_fidx >= sp->ts_fidxtry) |
4464 { | 4580 { |
4465 /* The word in the tree ends but the badword | 4581 /* The word in the tree ends but the badword |
4466 * continues: try inserting a space and check that a valid | 4582 * continues: try inserting a space and check that a valid |
4474 | 4590 |
4475 /* Append a space to preword. */ | 4591 /* Append a space to preword. */ |
4476 STRCAT(preword, " "); | 4592 STRCAT(preword, " "); |
4477 prewordlen = STRLEN(preword); | 4593 prewordlen = STRLEN(preword); |
4478 splitoff = sp->ts_twordlen; | 4594 splitoff = sp->ts_twordlen; |
4479 /* TODO: when case-folding changed the number of bytes | 4595 #ifdef FEAT_MBYTE |
4480 * this doesn't work... */ | 4596 if (has_mbyte) |
4481 badflags = captype(su->su_badptr + sp->ts_fidx, | 4597 { |
4482 su->su_badptr + su->su_badlen); | 4598 int i = 0; |
4599 | |
4600 /* Case-folding may change the number of bytes: | |
4601 * Count nr of chars in fword[sp->ts_fidx] and | |
4602 * advance that many chars in su->su_badptr. */ | |
4603 for (p = fword; p < fword + sp->ts_fidx; | |
4604 mb_ptr_adv(p)) | |
4605 ++i; | |
4606 for (p = su->su_badptr; i > 0; mb_ptr_adv(p)) | |
4607 --i; | |
4608 } | |
4609 else | |
4610 #endif | |
4611 p = su->su_badptr + sp->ts_fidx; | |
4612 badflags = captype(p, su->su_badptr + su->su_badlen); | |
4483 | 4613 |
4484 sp->ts_state = STATE_SPLITUNDO; | 4614 sp->ts_state = STATE_SPLITUNDO; |
4485 ++depth; | 4615 ++depth; |
4486 /* Restart at top of the tree. */ | 4616 /* Restart at top of the tree. */ |
4487 stack[depth].ts_arridx = 0; | 4617 stack[depth].ts_arridx = 0; |
4533 /* Normal byte, go one level deeper. If it's not equal to | 4663 /* Normal byte, go one level deeper. If it's not equal to |
4534 * the byte in the bad word adjust the score. But don't | 4664 * the byte in the bad word adjust the score. But don't |
4535 * even try when the byte was already changed. */ | 4665 * even try when the byte was already changed. */ |
4536 if (c == fword[sp->ts_fidx]) | 4666 if (c == fword[sp->ts_fidx]) |
4537 newscore = 0; | 4667 newscore = 0; |
4538 /* TODO: multi-byte characters */ | 4668 |
4669 /* TODO: this is too slow and comparing bytes isn't right | |
4670 * for multi-byte characters. */ | |
4671 #if 0 | |
4539 else if (lp->lp_slang->sl_map != NULL | 4672 else if (lp->lp_slang->sl_map != NULL |
4540 && similar_chars(lp->lp_slang, | 4673 && similar_chars(lp->lp_slang, |
4541 c, fword[sp->ts_fidx])) | 4674 c, fword[sp->ts_fidx])) |
4542 newscore = SCORE_SIMILAR; | 4675 newscore = SCORE_SIMILAR; |
4676 #endif | |
4543 else | 4677 else |
4544 newscore = SCORE_SUBST; | 4678 newscore = SCORE_SUBST; |
4545 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) | 4679 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry) |
4546 && try_deeper(su, stack, depth, newscore)) | 4680 && try_deeper(su, stack, depth, newscore)) |
4547 { | 4681 { |
4816 char_u *fword; | 4950 char_u *fword; |
4817 char_u *kword; | 4951 char_u *kword; |
4818 { | 4952 { |
4819 char_u uword[MAXWLEN]; /* "fword" in upper-case */ | 4953 char_u uword[MAXWLEN]; /* "fword" in upper-case */ |
4820 int depth; | 4954 int depth; |
4821 int tryidx; | 4955 idx_T tryidx; |
4822 | 4956 |
4823 /* The following arrays are used at each depth in the tree. */ | 4957 /* The following arrays are used at each depth in the tree. */ |
4824 int arridx[MAXWLEN]; | 4958 idx_T arridx[MAXWLEN]; |
4825 int round[MAXWLEN]; | 4959 int round[MAXWLEN]; |
4826 int fwordidx[MAXWLEN]; | 4960 int fwordidx[MAXWLEN]; |
4827 int uwordidx[MAXWLEN]; | 4961 int uwordidx[MAXWLEN]; |
4828 int kwordlen[MAXWLEN]; | 4962 int kwordlen[MAXWLEN]; |
4829 | 4963 |
4830 int flen, ulen; | 4964 int flen, ulen; |
4831 int l; | 4965 int l; |
4832 int len; | 4966 int len; |
4833 int c; | 4967 int c; |
4834 unsigned lo, hi, m; | 4968 idx_T lo, hi, m; |
4835 char_u *p; | 4969 char_u *p; |
4836 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ | 4970 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ |
4837 int *idxs = slang->sl_kidxs; /* array with indexes */ | 4971 idx_T *idxs = slang->sl_kidxs; /* array with indexes */ |
4838 | 4972 |
4839 if (byts == NULL) | 4973 if (byts == NULL) |
4840 { | 4974 { |
4841 /* array is empty: "cannot happen" */ | 4975 /* array is empty: "cannot happen" */ |
4842 *kword = NUL; | 4976 *kword = NUL; |
4974 { | 5108 { |
4975 char_u salword[MAXWLEN]; | 5109 char_u salword[MAXWLEN]; |
4976 char_u tword[MAXWLEN]; | 5110 char_u tword[MAXWLEN]; |
4977 char_u tfword[MAXWLEN]; | 5111 char_u tfword[MAXWLEN]; |
4978 char_u tsalword[MAXWLEN]; | 5112 char_u tsalword[MAXWLEN]; |
4979 int arridx[MAXWLEN]; | 5113 idx_T arridx[MAXWLEN]; |
4980 int curi[MAXWLEN]; | 5114 int curi[MAXWLEN]; |
4981 langp_T *lp; | 5115 langp_T *lp; |
4982 char_u *byts; | 5116 char_u *byts; |
4983 int *idxs; | 5117 idx_T *idxs; |
4984 int depth; | 5118 int depth; |
4985 int c; | 5119 int c; |
4986 int n; | 5120 idx_T n; |
4987 int round; | 5121 int round; |
4988 int flags; | 5122 int flags; |
5123 int score, sound_score; | |
5124 char_u *bp, *sp; | |
4989 | 5125 |
4990 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); | 5126 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); |
4991 lp->lp_slang != NULL; ++lp) | 5127 lp->lp_slang != NULL; ++lp) |
4992 { | 5128 { |
4993 if (lp->lp_slang->sl_sal.ga_len > 0) | 5129 if (lp->lp_slang->sl_sal.ga_len > 0) |
5028 ++curi[depth]; | 5164 ++curi[depth]; |
5029 c = byts[n]; | 5165 c = byts[n]; |
5030 if (c == 0) | 5166 if (c == 0) |
5031 { | 5167 { |
5032 /* End of word, deal with the word. */ | 5168 /* End of word, deal with the word. */ |
5033 flags = idxs[n]; | 5169 flags = (int)idxs[n]; |
5034 if (round == 2 || (flags & WF_KEEPCAP) == 0) | 5170 if (round == 2 || (flags & WF_KEEPCAP) == 0) |
5035 { | 5171 { |
5036 tword[depth] = NUL; | 5172 tword[depth] = NUL; |
5037 if (round == 1) | 5173 if (round == 1) |
5038 spell_soundfold(lp->lp_slang, | 5174 spell_soundfold(lp->lp_slang, |
5045 tfword, MAXWLEN); | 5181 tfword, MAXWLEN); |
5046 spell_soundfold(lp->lp_slang, | 5182 spell_soundfold(lp->lp_slang, |
5047 tfword, tsalword); | 5183 tfword, tsalword); |
5048 } | 5184 } |
5049 | 5185 |
5050 /* TODO: also compare with small changes | 5186 /* |
5051 * (insert char, swap char, etc.) */ | 5187 * Accept the word if the sound-folded words |
5052 if (STRCMP(salword, tsalword) == 0) | 5188 * are (almost) equal. |
5189 */ | |
5190 for (bp = salword, sp = tsalword; *bp == *sp; | |
5191 ++bp, ++sp) | |
5192 if (*bp == NUL) | |
5193 break; | |
5194 | |
5195 if (*bp == *sp) | |
5196 /* equal */ | |
5197 sound_score = 0; | |
5198 else if (*bp != NUL && bp[1] != NUL | |
5199 && *bp == sp[1] && bp[1] == *sp | |
5200 && STRCMP(bp + 2, sp + 2) == 0) | |
5201 /* swap two bytes */ | |
5202 sound_score = SCORE_SWAP; | |
5203 else if (STRCMP(bp + 1, sp) == 0) | |
5204 /* delete byte */ | |
5205 sound_score = SCORE_DEL; | |
5206 else if (STRCMP(bp, sp + 1) == 0) | |
5207 /* insert byte */ | |
5208 sound_score = SCORE_INS; | |
5209 else if (STRCMP(bp + 1, sp + 1) == 0) | |
5210 /* skip one byte */ | |
5211 sound_score = SCORE_SUBST; | |
5212 else | |
5213 /* not equal or similar */ | |
5214 sound_score = SCORE_MAXMAX; | |
5215 | |
5216 if (sound_score < SCORE_MAXMAX) | |
5053 { | 5217 { |
5218 char_u cword[MAXWLEN]; | |
5219 char_u *p; | |
5220 | |
5054 if (round == 1 && flags != 0) | 5221 if (round == 1 && flags != 0) |
5055 { | 5222 { |
5056 char_u cword[MAXWLEN]; | 5223 /* Need to fix case according to |
5057 | 5224 * "flags". */ |
5058 make_case_word(tword, cword, flags); | 5225 make_case_word(tword, cword, flags); |
5059 add_suggestion(su, cword, 0); | 5226 p = cword; |
5060 } | 5227 } |
5061 else | 5228 else |
5062 add_suggestion(su, tword, 0); | 5229 p = tword; |
5230 | |
5231 /* Compute the score. */ | |
5232 score = spell_edit_score(su->su_badword, p); | |
5233 #ifdef RESCORE | |
5234 /* give a bonus for the good word sounding | |
5235 * the same as the bad word */ | |
5236 add_suggestion(su, tword, | |
5237 RESCORE(score, sound_score), | |
5238 TRUE); | |
5239 #else | |
5240 add_suggestion(su, tword, | |
5241 score + sound_score); | |
5242 #endif | |
5063 } | 5243 } |
5064 } | 5244 } |
5065 | 5245 |
5066 /* Skip over other NUL bytes. */ | 5246 /* Skip over other NUL bytes. */ |
5067 while (byts[n + 1] == 0) | 5247 while (byts[n + 1] == 0) |
5076 tword[depth++] = c; | 5256 tword[depth++] = c; |
5077 arridx[depth] = idxs[n]; | 5257 arridx[depth] = idxs[n]; |
5078 curi[depth] = 1; | 5258 curi[depth] = 1; |
5079 } | 5259 } |
5080 } | 5260 } |
5261 | |
5262 line_breakcheck(); | |
5081 } | 5263 } |
5082 line_breakcheck(); | 5264 } |
5083 } | 5265 } |
5084 } | 5266 } |
5085 } | 5267 } |
5086 } | 5268 |
5087 | 5269 /* |
5088 /* | 5270 * Copy "fword" to "cword", fixing case according to "flags". |
5089 * Copy "fword" to "cword", fixing according to "flags". | |
5090 */ | 5271 */ |
5091 static void | 5272 static void |
5092 make_case_word(fword, cword, flags) | 5273 make_case_word(fword, cword, flags) |
5093 char_u *fword; | 5274 char_u *fword; |
5094 char_u *cword; | 5275 char_u *cword; |
5097 if (flags & WF_ALLCAP) | 5278 if (flags & WF_ALLCAP) |
5098 /* Make it all upper-case */ | 5279 /* Make it all upper-case */ |
5099 allcap_copy(fword, cword); | 5280 allcap_copy(fword, cword); |
5100 else if (flags & WF_ONECAP) | 5281 else if (flags & WF_ONECAP) |
5101 /* Make the first letter upper-case */ | 5282 /* Make the first letter upper-case */ |
5102 onecap_copy(fword, STRLEN(fword), cword, TRUE); | 5283 onecap_copy(fword, cword, TRUE); |
5103 else | 5284 else |
5104 /* Use goodword as-is. */ | 5285 /* Use goodword as-is. */ |
5105 STRCPY(cword, fword); | 5286 STRCPY(cword, fword); |
5106 } | 5287 } |
5107 | 5288 |
5289 #if 0 | |
5108 /* | 5290 /* |
5109 * Return TRUE if "c1" and "c2" are similar characters according to the MAP | 5291 * Return TRUE if "c1" and "c2" are similar characters according to the MAP |
5110 * lines in the .aff file. | 5292 * lines in the .aff file. |
5111 */ | 5293 */ |
5112 static int | 5294 static int |
5127 p2 = vim_strchr(slang->sl_map, c2); | 5309 p2 = vim_strchr(slang->sl_map, c2); |
5128 if (p2 == NULL) | 5310 if (p2 == NULL) |
5129 return FALSE; | 5311 return FALSE; |
5130 return vim_strchr(p1, '/') == vim_strchr(p2, '/'); | 5312 return vim_strchr(p1, '/') == vim_strchr(p2, '/'); |
5131 } | 5313 } |
5314 #endif | |
5132 | 5315 |
5133 /* | 5316 /* |
5134 * Add a suggestion to the list of suggestions. | 5317 * Add a suggestion to the list of suggestions. |
5135 * Do not add a duplicate suggestion or suggestions with a bad score. | 5318 * Do not add a duplicate suggestion or suggestions with a bad score. |
5136 * When "use_score" is not zero it's used, otherwise the score is computed | 5319 * When "use_score" is not zero it's used, otherwise the score is computed |
5137 * with spell_edit_score(). | 5320 * with spell_edit_score(). |
5138 */ | 5321 */ |
5139 static void | 5322 static void |
5140 add_suggestion(su, goodword, use_score) | 5323 add_suggestion(su, goodword, score |
5324 #ifdef RESCORE | |
5325 , had_bonus | |
5326 #endif | |
5327 ) | |
5141 suginfo_T *su; | 5328 suginfo_T *su; |
5142 char_u *goodword; | 5329 char_u *goodword; |
5143 int use_score; | 5330 int score; |
5331 #ifdef RESCORE | |
5332 int had_bonus; /* set st_had_bonus */ | |
5333 #endif | |
5144 { | 5334 { |
5145 suggest_T *stp; | 5335 suggest_T *stp; |
5146 int score; | |
5147 int i; | 5336 int i; |
5148 #ifdef SOUNDFOLD_SCORE | 5337 #ifdef SOUNDFOLD_SCORE |
5149 char_u fword[MAXWLEN]; | 5338 char_u fword[MAXWLEN]; |
5150 char_u salword[MAXWLEN]; | 5339 char_u salword[MAXWLEN]; |
5151 #endif | 5340 #endif |
5152 | 5341 |
5153 /* Check that the word wasn't banned. */ | 5342 /* Check that the word wasn't banned. */ |
5154 if (was_banned(su, goodword)) | 5343 if (was_banned(su, goodword)) |
5155 return; | 5344 return; |
5156 | 5345 |
5157 /* Compute the score and add the suggestion if it's good enough. */ | |
5158 if (use_score != 0) | |
5159 score = use_score; | |
5160 else | |
5161 score = spell_edit_score(su->su_badword, goodword); | |
5162 | |
5163 if (score <= su->su_maxscore) | 5346 if (score <= su->su_maxscore) |
5164 { | 5347 { |
5165 #ifdef SOUNDFOLD_SCORE | 5348 #ifdef SOUNDFOLD_SCORE |
5166 /* Add to the score when the word sounds differently. | 5349 /* Add to the score when the word sounds differently. |
5167 * This is slow... */ | 5350 * This is slow... */ |
5168 if (su->su_slang->sl_sal.ga_len > 0) | 5351 if (su->su_slang->sl_sal.ga_len > 0) |
5169 { | 5352 score += spell_sound_score(su->su_slang, fword, su->su_salword); |
5170 (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN); | |
5171 spell_soundfold(su->su_slang, fword, salword); | |
5172 score += spell_edit_score(su->su_salword, salword); | |
5173 } | |
5174 #endif | 5353 #endif |
5175 | 5354 |
5176 /* Check if the word is already there. */ | 5355 /* Check if the word is already there. */ |
5177 stp = &SUG(su, 0); | 5356 stp = &SUG(su, 0); |
5178 for (i = su->su_ga.ga_len - 1; i >= 0; --i) | 5357 for (i = su->su_ga.ga_len - 1; i >= 0; --i) |
5179 if (STRCMP(stp[i].st_word, goodword) == 0) | 5358 if (STRCMP(stp[i].st_word, goodword) == 0) |
5180 { | 5359 { |
5181 /* Found it. Remember the lowest score. */ | 5360 /* Found it. Remember the lowest score. */ |
5182 if (stp[i].st_score > score) | 5361 if (stp[i].st_score > score) |
5362 { | |
5183 stp[i].st_score = score; | 5363 stp[i].st_score = score; |
5364 #ifdef RESCORE | |
5365 stp[i].st_had_bonus = had_bonus; | |
5366 #endif | |
5367 } | |
5184 break; | 5368 break; |
5185 } | 5369 } |
5186 | 5370 |
5187 if (i < 0 && ga_grow(&su->su_ga, 1) == OK) | 5371 if (i < 0 && ga_grow(&su->su_ga, 1) == OK) |
5188 { | 5372 { |
5190 stp = &SUG(su, su->su_ga.ga_len); | 5374 stp = &SUG(su, su->su_ga.ga_len); |
5191 stp->st_word = vim_strsave(goodword); | 5375 stp->st_word = vim_strsave(goodword); |
5192 if (stp->st_word != NULL) | 5376 if (stp->st_word != NULL) |
5193 { | 5377 { |
5194 stp->st_score = score; | 5378 stp->st_score = score; |
5379 #ifdef RESCORE | |
5380 stp->st_had_bonus = had_bonus; | |
5381 #endif | |
5195 stp->st_orglen = su->su_badlen; | 5382 stp->st_orglen = su->su_badlen; |
5196 ++su->su_ga.ga_len; | 5383 ++su->su_ga.ga_len; |
5197 | 5384 |
5198 /* If we have too many suggestions now, sort the list and keep | 5385 /* If we have too many suggestions now, sort the list and keep |
5199 * the best suggestions. */ | 5386 * the best suggestions. */ |
5200 if (su->su_ga.ga_len > SUG_CLEANUP_COUNT) | 5387 if (su->su_ga.ga_len > SUG_MAX_COUNT) |
5201 cleanup_suggestions(su); | 5388 cleanup_suggestions(su, SUG_CLEAN_COUNT); |
5202 } | 5389 } |
5203 } | 5390 } |
5204 } | 5391 } |
5205 } | 5392 } |
5206 | 5393 |
5231 static int | 5418 static int |
5232 was_banned(su, word) | 5419 was_banned(su, word) |
5233 suginfo_T *su; | 5420 suginfo_T *su; |
5234 char_u *word; | 5421 char_u *word; |
5235 { | 5422 { |
5236 return !HASHITEM_EMPTY(hash_find(&su->su_banned, word)); | 5423 hashitem_T *hi = hash_find(&su->su_banned, word); |
5424 | |
5425 return !HASHITEM_EMPTY(hi); | |
5237 } | 5426 } |
5238 | 5427 |
5239 /* | 5428 /* |
5240 * Free the banned words in "su". | 5429 * Free the banned words in "su". |
5241 */ | 5430 */ |
5256 } | 5445 } |
5257 } | 5446 } |
5258 hash_clear(&su->su_banned); | 5447 hash_clear(&su->su_banned); |
5259 } | 5448 } |
5260 | 5449 |
5450 #ifdef RESCORE | |
5451 /* | |
5452 * Recompute the score if sound-folding is possible. This is slow, | |
5453 * thus only done for the final results. | |
5454 */ | |
5455 static void | |
5456 rescore_suggestions(su) | |
5457 suginfo_T *su; | |
5458 { | |
5459 langp_T *lp; | |
5460 suggest_T *stp; | |
5461 char_u sal_badword[MAXWLEN]; | |
5462 int score; | |
5463 int i; | |
5464 | |
5465 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0); | |
5466 lp->lp_slang != NULL; ++lp) | |
5467 { | |
5468 if (lp->lp_slang->sl_sal.ga_len > 0) | |
5469 { | |
5470 /* soundfold the bad word */ | |
5471 spell_soundfold(lp->lp_slang, su->su_fbadword, sal_badword); | |
5472 | |
5473 for (i = 0; i < su->su_ga.ga_len; ++i) | |
5474 { | |
5475 stp = &SUG(su, i); | |
5476 if (!stp->st_had_bonus) | |
5477 { | |
5478 score = spell_sound_score(lp->lp_slang, stp->st_word, | |
5479 sal_badword); | |
5480 stp->st_score = RESCORE(stp->st_score, score); | |
5481 } | |
5482 } | |
5483 break; | |
5484 } | |
5485 } | |
5486 } | |
5487 #endif | |
5488 | |
5261 static int | 5489 static int |
5262 #ifdef __BORLANDC__ | 5490 #ifdef __BORLANDC__ |
5263 _RTLENTRYF | 5491 _RTLENTRYF |
5264 #endif | 5492 #endif |
5265 sug_compare __ARGS((const void *s1, const void *s2)); | 5493 sug_compare __ARGS((const void *s1, const void *s2)); |
5285 * Cleanup the suggestions: | 5513 * Cleanup the suggestions: |
5286 * - Sort on score. | 5514 * - Sort on score. |
5287 * - Remove words that won't be displayed. | 5515 * - Remove words that won't be displayed. |
5288 */ | 5516 */ |
5289 static void | 5517 static void |
5290 cleanup_suggestions(su) | 5518 cleanup_suggestions(su, keep) |
5291 suginfo_T *su; | 5519 suginfo_T *su; |
5520 int keep; /* nr of suggestions to keep */ | |
5292 { | 5521 { |
5293 suggest_T *stp = &SUG(su, 0); | 5522 suggest_T *stp = &SUG(su, 0); |
5294 int i; | 5523 int i; |
5295 | 5524 |
5296 /* Sort the list. */ | 5525 /* Sort the list. */ |
5297 qsort(su->su_ga.ga_data, (size_t)su->su_ga.ga_len, | 5526 qsort(su->su_ga.ga_data, (size_t)su->su_ga.ga_len, |
5298 sizeof(suggest_T), sug_compare); | 5527 sizeof(suggest_T), sug_compare); |
5299 | 5528 |
5300 /* Truncate the list to the number of suggestions that will be displayed. */ | 5529 /* Truncate the list to the number of suggestions that will be displayed. */ |
5301 if (su->su_ga.ga_len > SUG_PROMPT_COUNT) | 5530 if (su->su_ga.ga_len > keep) |
5302 { | 5531 { |
5303 for (i = SUG_PROMPT_COUNT; i < su->su_ga.ga_len; ++i) | 5532 for (i = keep; i < su->su_ga.ga_len; ++i) |
5304 vim_free(stp[i].st_word); | 5533 vim_free(stp[i].st_word); |
5305 su->su_ga.ga_len = SUG_PROMPT_COUNT; | 5534 su->su_ga.ga_len = keep; |
5306 su->su_maxscore = stp[SUG_PROMPT_COUNT - 1].st_score; | 5535 su->su_maxscore = stp[keep - 1].st_score; |
5307 } | 5536 } |
5308 } | 5537 } |
5309 | 5538 |
5310 /* | 5539 /* |
5311 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". | 5540 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". |
5318 { | 5547 { |
5319 fromto_T *ftp; | 5548 fromto_T *ftp; |
5320 char_u word[MAXWLEN]; | 5549 char_u word[MAXWLEN]; |
5321 #ifdef FEAT_MBYTE | 5550 #ifdef FEAT_MBYTE |
5322 int l; | 5551 int l; |
5552 int found_mbyte = FALSE; | |
5323 #endif | 5553 #endif |
5324 char_u *s; | 5554 char_u *s; |
5325 char_u *t; | 5555 char_u *t; |
5326 int i, j, z; | 5556 int i, j, z; |
5327 int n, k = 0; | 5557 int n, k = 0; |
5331 int c; | 5561 int c; |
5332 int pri; | 5562 int pri; |
5333 int p0 = -333; | 5563 int p0 = -333; |
5334 int c0; | 5564 int c0; |
5335 | 5565 |
5336 /* Remove accents, if wanted. | 5566 /* Remove accents, if wanted. We actually remove all non-word characters. |
5337 * We actually remove all non-word characters. */ | 5567 * But keep white space. */ |
5338 if (slang->sl_rem_accents) | 5568 if (slang->sl_rem_accents) |
5339 { | 5569 { |
5340 t = word; | 5570 t = word; |
5341 for (s = inword; *s != NUL; ) | 5571 for (s = inword; *s != NUL; ) |
5342 { | 5572 { |
5573 if (vim_iswhite(*s)) | |
5574 *t++ = *s++; | |
5343 #ifdef FEAT_MBYTE | 5575 #ifdef FEAT_MBYTE |
5344 if (has_mbyte) | 5576 else if (has_mbyte) |
5345 { | 5577 { |
5346 l = mb_ptr2len_check(s); | 5578 l = mb_ptr2len_check(s); |
5347 if (SPELL_ISWORDP(s)) | 5579 if (SPELL_ISWORDP(s)) |
5348 { | 5580 { |
5349 mch_memmove(t, s, l); | 5581 mch_memmove(t, s, l); |
5350 t += l; | 5582 t += l; |
5583 if (l > 1) | |
5584 found_mbyte = TRUE; | |
5351 } | 5585 } |
5352 s += l; | 5586 s += l; |
5353 } | 5587 } |
5588 #endif | |
5354 else | 5589 else |
5355 #endif | |
5356 { | 5590 { |
5357 if (SPELL_ISWORDP(s)) | 5591 if (SPELL_ISWORDP(s)) |
5358 *t++ = *s; | 5592 *t++ = *s; |
5359 ++s; | 5593 ++s; |
5360 } | 5594 } |
5361 } | 5595 } |
5362 *t = NUL; | 5596 *t = NUL; |
5363 } | 5597 } |
5364 else | 5598 else |
5599 { | |
5600 #ifdef FEAT_MBYTE | |
5601 if (has_mbyte) | |
5602 for (s = inword; *s != NUL; s += l) | |
5603 if ((l = mb_ptr2len_check(s)) > 1) | |
5604 { | |
5605 found_mbyte = TRUE; | |
5606 break; | |
5607 } | |
5608 #endif | |
5365 STRCPY(word, inword); | 5609 STRCPY(word, inword); |
5610 } | |
5611 | |
5612 #ifdef FEAT_MBYTE | |
5613 /* If there are multi-byte characters in the word return it as-is, because | |
5614 * the following won't work. */ | |
5615 if (found_mbyte) | |
5616 { | |
5617 STRCPY(res, word); | |
5618 return; | |
5619 } | |
5620 #endif | |
5366 | 5621 |
5367 ftp = (fromto_T *)slang->sl_sal.ga_data; | 5622 ftp = (fromto_T *)slang->sl_sal.ga_data; |
5368 | 5623 |
5369 /* | 5624 /* |
5370 * This comes from Aspell phonet.cpp. Converted from C++ to C. | 5625 * This comes from Aspell phonet.cpp. Converted from C++ to C. |
5626 * Changed to keep spaces. | |
5371 * TODO: support for multi-byte chars. | 5627 * TODO: support for multi-byte chars. |
5372 */ | 5628 */ |
5373 i = j = z = 0; | 5629 i = j = z = 0; |
5374 while ((c = word[i]) != NUL) | 5630 while ((c = word[i]) != NUL) |
5375 { | 5631 { |
5431 if (*s == '^' && *(s + 1) == '^') | 5687 if (*s == '^' && *(s + 1) == '^') |
5432 s++; | 5688 s++; |
5433 | 5689 |
5434 if (*s == NUL | 5690 if (*s == NUL |
5435 || (*s == '^' | 5691 || (*s == '^' |
5436 && (i == 0 || !SPELL_ISWORDP(word + i - 1)) | 5692 && (i == 0 || !(word[i - 1] == ' ' |
5693 || SPELL_ISWORDP(word + i - 1))) | |
5437 && (*(s + 1) != '$' | 5694 && (*(s + 1) != '$' |
5438 || (!SPELL_ISWORDP(word + i + k0)))) | 5695 || (!SPELL_ISWORDP(word + i + k0)))) |
5439 || (*s == '$' && i > 0 | 5696 || (*s == '$' && i > 0 |
5440 && SPELL_ISWORDP(word + i - 1) | 5697 && SPELL_ISWORDP(word + i - 1) |
5441 && (!SPELL_ISWORDP(word + i + k0)))) | 5698 && (!SPELL_ISWORDP(word + i + k0)))) |
5587 break; | 5844 break; |
5588 } | 5845 } |
5589 ++n; | 5846 ++n; |
5590 } | 5847 } |
5591 } | 5848 } |
5849 else if (vim_iswhite(c)) | |
5850 { | |
5851 c = ' '; | |
5852 k = 1; | |
5853 } | |
5592 | 5854 |
5593 if (z0 == 0) | 5855 if (z0 == 0) |
5594 { | 5856 { |
5595 if (k && !p0 && j < MAXWLEN && c != NUL | 5857 if (k && !p0 && j < MAXWLEN && c != NUL |
5596 && (!slang->sl_collapse || j == 0 || res[j - 1] != c)) | 5858 && (!slang->sl_collapse || j == 0 || res[j - 1] != c)) |
5607 } | 5869 } |
5608 | 5870 |
5609 res[j] = NUL; | 5871 res[j] = NUL; |
5610 } | 5872 } |
5611 | 5873 |
5874 #if defined(RESCORE) || defined(SOUNDFOLD_SCORE) | |
5875 /* | |
5876 * Return the score for how much words sound different. | |
5877 */ | |
5878 static int | |
5879 spell_sound_score(slang, goodword, badsound) | |
5880 slang_T *slang; | |
5881 char_u *goodword; /* good word */ | |
5882 char_u *badsound; /* sound-folded bad word */ | |
5883 { | |
5884 char_u fword[MAXWLEN]; | |
5885 char_u goodsound[MAXWLEN]; | |
5886 int score; | |
5887 | |
5888 /* Case-fold the word, needed for sound folding. */ | |
5889 (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN); | |
5890 | |
5891 /* sound-fold the good word */ | |
5892 spell_soundfold(slang, fword, goodsound); | |
5893 | |
5894 /* compute the edit distance-score of the sounds */ | |
5895 score = spell_edit_score(badsound, goodsound); | |
5896 | |
5897 /* Correction: adding/inserting "*" at the start (word starts with vowel) | |
5898 * shouldn't be counted so much, vowels halfway the word aren't counted at | |
5899 * all. */ | |
5900 if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*')) | |
5901 score -= SCORE_DEL / 2; | |
5902 | |
5903 return score; | |
5904 } | |
5905 #endif | |
5906 | |
5612 /* | 5907 /* |
5613 * Compute the "edit distance" to turn "badword" into "goodword". The less | 5908 * Compute the "edit distance" to turn "badword" into "goodword". The less |
5614 * deletes/inserts/swaps are required the lower the score. | 5909 * deletes/inserts/swaps are required the lower the score. |
5910 * | |
5615 * The algorithm comes from Aspell editdist.cpp, edit_distance(). | 5911 * The algorithm comes from Aspell editdist.cpp, edit_distance(). |
5616 * TODO: make this work with multi-byte chars. | 5912 * It has been converted from C++ to C and modified to support multi-byte |
5913 * characters. | |
5617 */ | 5914 */ |
5618 static int | 5915 static int |
5619 spell_edit_score(badword, goodword) | 5916 spell_edit_score(badword, goodword) |
5620 char_u *badword; | 5917 char_u *badword; |
5621 char_u *goodword; | 5918 char_u *goodword; |
5623 int *cnt; | 5920 int *cnt; |
5624 int badlen, goodlen; | 5921 int badlen, goodlen; |
5625 int j, i; | 5922 int j, i; |
5626 int t; | 5923 int t; |
5627 int bc, gc; | 5924 int bc, gc; |
5925 int pbc, pgc; | |
5926 #ifdef FEAT_MBYTE | |
5927 char_u *p; | |
5928 int wbadword[MAXWLEN]; | |
5929 int wgoodword[MAXWLEN]; | |
5930 | |
5931 if (has_mbyte) | |
5932 { | |
5933 /* Get the characters from the multi-byte strings and put them in an | |
5934 * int array for easy access. */ | |
5935 for (p = badword, badlen = 0; *p != NUL; ) | |
5936 wbadword[badlen++] = mb_ptr2char_adv(&p); | |
5937 ++badlen; | |
5938 for (p = goodword, goodlen = 0; *p != NUL; ) | |
5939 wgoodword[goodlen++] = mb_ptr2char_adv(&p); | |
5940 ++goodlen; | |
5941 } | |
5942 else | |
5943 #endif | |
5944 { | |
5945 badlen = STRLEN(badword) + 1; | |
5946 goodlen = STRLEN(goodword) + 1; | |
5947 } | |
5628 | 5948 |
5629 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ | 5949 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ |
5630 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] | 5950 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] |
5631 badlen = STRLEN(badword) + 1; | |
5632 goodlen = STRLEN(goodword) + 1; | |
5633 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), | 5951 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), |
5634 TRUE); | 5952 TRUE); |
5635 if (cnt == 0) | 5953 if (cnt == NULL) |
5636 return 0; | 5954 return 0; /* out of memory */ |
5637 | 5955 |
5638 CNT(0, 0) = 0; | 5956 CNT(0, 0) = 0; |
5639 for (j = 1; j <= goodlen; ++j) | 5957 for (j = 1; j <= goodlen; ++j) |
5640 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; | 5958 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL; |
5641 | 5959 |
5642 for (i = 1; i <= badlen; ++i) | 5960 for (i = 1; i <= badlen; ++i) |
5643 { | 5961 { |
5644 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; | 5962 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS; |
5645 for (j = 1; j <= goodlen; ++j) | 5963 for (j = 1; j <= goodlen; ++j) |
5646 { | 5964 { |
5647 bc = badword[i - 1]; | 5965 #ifdef FEAT_MBYTE |
5648 gc = goodword[j - 1]; | 5966 if (has_mbyte) |
5967 { | |
5968 bc = wbadword[i - 1]; | |
5969 gc = wgoodword[j - 1]; | |
5970 } | |
5971 else | |
5972 #endif | |
5973 { | |
5974 bc = badword[i - 1]; | |
5975 gc = goodword[j - 1]; | |
5976 } | |
5649 if (bc == gc) | 5977 if (bc == gc) |
5650 CNT(i, j) = CNT(i - 1, j - 1); | 5978 CNT(i, j) = CNT(i - 1, j - 1); |
5651 else | 5979 else |
5652 { | 5980 { |
5653 /* Use a better score when there is only a case difference. */ | 5981 /* Use a better score when there is only a case difference. */ |
5654 if (spelltab.st_fold[bc] == spelltab.st_fold[gc]) | 5982 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) |
5655 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); | 5983 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); |
5656 else | 5984 else |
5657 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); | 5985 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); |
5658 | 5986 |
5659 if (i > 1 && j > 1 && bc == goodword[j - 2] | 5987 if (i > 1 && j > 1) |
5660 && badword[i - 2] == gc) | |
5661 { | 5988 { |
5662 t = SCORE_SWAP + CNT(i - 2, j - 2); | 5989 #ifdef FEAT_MBYTE |
5663 if (t < CNT(i, j)) | 5990 if (has_mbyte) |
5664 CNT(i, j) = t; | 5991 { |
5992 pbc = wbadword[i - 2]; | |
5993 pgc = wgoodword[j - 2]; | |
5994 } | |
5995 else | |
5996 #endif | |
5997 { | |
5998 pbc = badword[i - 2]; | |
5999 pgc = goodword[j - 2]; | |
6000 } | |
6001 if (bc == pgc && pbc == gc) | |
6002 { | |
6003 t = SCORE_SWAP + CNT(i - 2, j - 2); | |
6004 if (t < CNT(i, j)) | |
6005 CNT(i, j) = t; | |
6006 } | |
5665 } | 6007 } |
5666 t = SCORE_DEL + CNT(i - 1, j); | 6008 t = SCORE_DEL + CNT(i - 1, j); |
5667 if (t < CNT(i, j)) | 6009 if (t < CNT(i, j)) |
5668 CNT(i, j) = t; | 6010 CNT(i, j) = t; |
5669 t = SCORE_INS + CNT(i, j - 1); | 6011 t = SCORE_INS + CNT(i, j - 1); |