Mercurial > vim
comparison src/spell.c @ 492:81c06952fb1d
updated for version 7.0135
author | vimboss |
---|---|
date | Tue, 23 Aug 2005 21:00:13 +0000 |
parents | 4321aae7e769 |
children | 409dced94617 |
comparison
equal
deleted
inserted
replaced
491:21c3634c2113 | 492:81c06952fb1d |
---|---|
160 * <compminlen> 1 byte Minimal word length for compounding. | 160 * <compminlen> 1 byte Minimal word length for compounding. |
161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. | 161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. |
162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by | 162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by |
163 * slashes. | 163 * slashes. |
164 * | 164 * |
165 * sectionID == SN_NOBREAK: (empty, its presence is enough) | |
166 * | |
165 * sectionID == SN_SYLLABLE: <syllable> | 167 * sectionID == SN_SYLLABLE: <syllable> |
166 * <syllable> N bytes String from SYLLABLE item. | 168 * <syllable> N bytes String from SYLLABLE item. |
167 * | 169 * |
168 * <LWORDTREE>: <wordtree> | 170 * <LWORDTREE>: <wordtree> |
169 * | 171 * |
376 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ | 378 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ |
377 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm | 379 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm |
378 * (NULL when no compounding) */ | 380 * (NULL when no compounding) */ |
379 char_u *sl_compstartflags; /* flags for first compound word */ | 381 char_u *sl_compstartflags; /* flags for first compound word */ |
380 char_u *sl_compallflags; /* all flags for compound words */ | 382 char_u *sl_compallflags; /* all flags for compound words */ |
383 char_u sl_nobreak; /* When TRUE: no spaces between words */ | |
381 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ | 384 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ |
382 garray_T sl_syl_items; /* syllable items */ | 385 garray_T sl_syl_items; /* syllable items */ |
383 | 386 |
384 int sl_prefixcnt; /* number of items in "sl_prefprog" */ | 387 int sl_prefixcnt; /* number of items in "sl_prefprog" */ |
385 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ | 388 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ |
440 #define SN_SAL 5 /* SAL items section */ | 443 #define SN_SAL 5 /* SAL items section */ |
441 #define SN_SOFO 6 /* soundfolding section */ | 444 #define SN_SOFO 6 /* soundfolding section */ |
442 #define SN_MAP 7 /* MAP items section */ | 445 #define SN_MAP 7 /* MAP items section */ |
443 #define SN_COMPOUND 8 /* compound words section */ | 446 #define SN_COMPOUND 8 /* compound words section */ |
444 #define SN_SYLLABLE 9 /* syllable section */ | 447 #define SN_SYLLABLE 9 /* syllable section */ |
448 #define SN_NOBREAK 10 /* NOBREAK section */ | |
445 #define SN_END 255 /* end of sections */ | 449 #define SN_END 255 /* end of sections */ |
446 | 450 |
447 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ | 451 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ |
448 | 452 |
449 /* Result values. Lower number is accepted over higher one. */ | 453 /* Result values. Lower number is accepted over higher one. */ |
558 | 562 |
559 /* others */ | 563 /* others */ |
560 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ | 564 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ |
561 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ | 565 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ |
562 buf_T *mi_buf; /* buffer being checked */ | 566 buf_T *mi_buf; /* buffer being checked */ |
567 | |
568 /* for NOBREAK */ | |
569 int mi_result2; /* "mi_resul" without following word */ | |
570 char_u *mi_end2; /* "mi_end" without following word */ | |
563 } matchinf_T; | 571 } matchinf_T; |
564 | 572 |
565 /* | 573 /* |
566 * The tables used for recognizing word characters according to spelling. | 574 * The tables used for recognizing word characters according to spelling. |
567 * These are only used for the first 256 characters of 'encoding'. | 575 * These are only used for the first 256 characters of 'encoding'. |
636 char_u ts_isdiff; /* DIFF_ values */ | 644 char_u ts_isdiff; /* DIFF_ values */ |
637 char_u ts_fcharstart; /* index in fword where badword char started */ | 645 char_u ts_fcharstart; /* index in fword where badword char started */ |
638 #endif | 646 #endif |
639 char_u ts_prewordlen; /* length of word in "preword[]" */ | 647 char_u ts_prewordlen; /* length of word in "preword[]" */ |
640 char_u ts_splitoff; /* index in "tword" after last split */ | 648 char_u ts_splitoff; /* index in "tword" after last split */ |
649 char_u ts_splitfidx; /* "ts_fidx" at word split */ | |
641 char_u ts_complen; /* nr of compound words used */ | 650 char_u ts_complen; /* nr of compound words used */ |
642 char_u ts_compsplit; /* index for "compflags" where word was spit */ | 651 char_u ts_compsplit; /* index for "compflags" where word was spit */ |
643 char_u ts_save_badflags; /* su_badflags saved here */ | 652 char_u ts_save_badflags; /* su_badflags saved here */ |
644 } trystate_T; | 653 } trystate_T; |
645 | 654 |
885 MAXWLEN + 1); | 894 MAXWLEN + 1); |
886 mi.mi_fwordlen = STRLEN(mi.mi_fword); | 895 mi.mi_fwordlen = STRLEN(mi.mi_fword); |
887 | 896 |
888 /* The word is bad unless we recognize it. */ | 897 /* The word is bad unless we recognize it. */ |
889 mi.mi_result = SP_BAD; | 898 mi.mi_result = SP_BAD; |
899 mi.mi_result2 = SP_BAD; | |
890 | 900 |
891 /* | 901 /* |
892 * Loop over the languages specified in 'spelllang'. | 902 * Loop over the languages specified in 'spelllang'. |
893 * We check them all, because a matching word may be longer than an | 903 * We check them all, because a matching word may be longer than an |
894 * already found matching word. | 904 * already found matching word. |
902 /* Check for a matching word in keep-case words. */ | 912 /* Check for a matching word in keep-case words. */ |
903 find_word(&mi, FIND_KEEPWORD); | 913 find_word(&mi, FIND_KEEPWORD); |
904 | 914 |
905 /* Check for matching prefixes. */ | 915 /* Check for matching prefixes. */ |
906 find_prefix(&mi, FIND_FOLDWORD); | 916 find_prefix(&mi, FIND_FOLDWORD); |
917 | |
918 /* For a NOBREAK language, may want to use a word without a following | |
919 * word as a backup. */ | |
920 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD | |
921 && mi.mi_result2 != SP_BAD) | |
922 { | |
923 mi.mi_result = mi.mi_result2; | |
924 mi.mi_end = mi.mi_end2; | |
925 } | |
907 } | 926 } |
908 | 927 |
909 if (mi.mi_result != SP_OK) | 928 if (mi.mi_result != SP_OK) |
910 { | 929 { |
911 /* If we found a number skip over it. Allows for "42nd". Do flag | 930 /* If we found a number skip over it. Allows for "42nd". Do flag |
939 } | 958 } |
940 else if (mi.mi_end == ptr) | 959 else if (mi.mi_end == ptr) |
941 /* Always include at least one character. Required for when there | 960 /* Always include at least one character. Required for when there |
942 * is a mixup in "midword". */ | 961 * is a mixup in "midword". */ |
943 mb_ptr_adv(mi.mi_end); | 962 mb_ptr_adv(mi.mi_end); |
963 else if (mi.mi_result == SP_BAD | |
964 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) | |
965 { | |
966 char_u *p, *fp; | |
967 int save_result = mi.mi_result; | |
968 | |
969 /* First language in 'spelllang' is NOBREAK. Find first position | |
970 * at which any word would be valid. */ | |
971 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); | |
972 p = mi.mi_word; | |
973 fp = mi.mi_fword; | |
974 for (;;) | |
975 { | |
976 mb_ptr_adv(p); | |
977 mb_ptr_adv(fp); | |
978 if (p >= mi.mi_end) | |
979 break; | |
980 mi.mi_compoff = fp - mi.mi_fword; | |
981 find_word(&mi, FIND_COMPOUND); | |
982 if (mi.mi_result != SP_BAD) | |
983 { | |
984 mi.mi_end = p; | |
985 break; | |
986 } | |
987 } | |
988 mi.mi_result = save_result; | |
989 } | |
944 | 990 |
945 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) | 991 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) |
946 *attrp = highlight_attr[HLF_SPB]; | 992 *attrp = highlight_attr[HLF_SPB]; |
947 else if (mi.mi_result == SP_RARE) | 993 else if (mi.mi_result == SP_RARE) |
948 *attrp = highlight_attr[HLF_SPR]; | 994 *attrp = highlight_attr[HLF_SPR]; |
993 unsigned flags; | 1039 unsigned flags; |
994 char_u *byts; | 1040 char_u *byts; |
995 idx_T *idxs; | 1041 idx_T *idxs; |
996 int word_ends; | 1042 int word_ends; |
997 int prefix_found; | 1043 int prefix_found; |
1044 int nobreak_result; | |
998 | 1045 |
999 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) | 1046 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) |
1000 { | 1047 { |
1001 /* Check for word with matching case in keep-case tree. */ | 1048 /* Check for word with matching case in keep-case tree. */ |
1002 ptr = mip->mi_word; | 1049 ptr = mip->mi_word; |
1135 if ((*mb_head_off)(ptr, ptr + wlen) > 0) | 1182 if ((*mb_head_off)(ptr, ptr + wlen) > 0) |
1136 continue; /* not at first byte of character */ | 1183 continue; /* not at first byte of character */ |
1137 #endif | 1184 #endif |
1138 if (spell_iswordp(ptr + wlen, mip->mi_buf)) | 1185 if (spell_iswordp(ptr + wlen, mip->mi_buf)) |
1139 { | 1186 { |
1140 if (slang->sl_compprog == NULL) | 1187 if (slang->sl_compprog == NULL && !slang->sl_nobreak) |
1141 continue; /* next char is a word character */ | 1188 continue; /* next char is a word character */ |
1142 word_ends = FALSE; | 1189 word_ends = FALSE; |
1143 } | 1190 } |
1144 else | 1191 else |
1145 word_ends = TRUE; | 1192 word_ends = TRUE; |
1208 if (c & WF_RAREPFX) | 1255 if (c & WF_RAREPFX) |
1209 flags |= WF_RARE; | 1256 flags |= WF_RARE; |
1210 prefix_found = TRUE; | 1257 prefix_found = TRUE; |
1211 } | 1258 } |
1212 | 1259 |
1213 if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND | 1260 if (slang->sl_nobreak) |
1214 || !word_ends) | 1261 { |
1262 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) | |
1263 && (flags & WF_BANNED) == 0) | |
1264 { | |
1265 /* NOBREAK: found a valid following word. That's all we | |
1266 * need to know, so return. */ | |
1267 mip->mi_result = SP_OK; | |
1268 break; | |
1269 } | |
1270 } | |
1271 | |
1272 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND | |
1273 || !word_ends)) | |
1215 { | 1274 { |
1216 /* If there is no flag or the word is shorter than | 1275 /* If there is no flag or the word is shorter than |
1217 * COMPOUNDMIN reject it quickly. | 1276 * COMPOUNDMIN reject it quickly. |
1218 * Makes you wonder why someone puts a compound flag on a word | 1277 * Makes you wonder why someone puts a compound flag on a word |
1219 * that's too short... Myspell compatibility requires this | 1278 * that's too short... Myspell compatibility requires this |
1293 if (!can_compound(slang, fword, mip->mi_compflags)) | 1352 if (!can_compound(slang, fword, mip->mi_compflags)) |
1294 continue; | 1353 continue; |
1295 } | 1354 } |
1296 } | 1355 } |
1297 | 1356 |
1357 nobreak_result = SP_OK; | |
1358 | |
1298 if (!word_ends) | 1359 if (!word_ends) |
1299 { | 1360 { |
1300 /* Check that a valid word follows. If there is one, it will | 1361 int save_result = mip->mi_result; |
1301 * set "mi_result", thus we are always finished here. | 1362 char_u *save_end = mip->mi_end; |
1363 | |
1364 /* Check that a valid word follows. If there is one and we | |
1365 * are compounding, it will set "mi_result", thus we are | |
1366 * always finished here. For NOBREAK we only check that a | |
1367 * valid word follows. | |
1302 * Recursive! */ | 1368 * Recursive! */ |
1369 if (slang->sl_nobreak) | |
1370 mip->mi_result = SP_BAD; | |
1303 | 1371 |
1304 /* Find following word in case-folded tree. */ | 1372 /* Find following word in case-folded tree. */ |
1305 mip->mi_compoff = endlen[endidxcnt]; | 1373 mip->mi_compoff = endlen[endidxcnt]; |
1306 #ifdef FEAT_MBYTE | 1374 #ifdef FEAT_MBYTE |
1307 if (has_mbyte && mode == FIND_KEEPWORD) | 1375 if (has_mbyte && mode == FIND_KEEPWORD) |
1321 #endif | 1389 #endif |
1322 c = mip->mi_compoff; | 1390 c = mip->mi_compoff; |
1323 ++mip->mi_complen; | 1391 ++mip->mi_complen; |
1324 find_word(mip, FIND_COMPOUND); | 1392 find_word(mip, FIND_COMPOUND); |
1325 | 1393 |
1326 /* Find following word in keep-case tree. */ | 1394 /* When NOBREAK any word that matches is OK. Otherwise we |
1327 mip->mi_compoff = wlen; | 1395 * need to find the longest match, thus try with keep-case and |
1328 find_word(mip, FIND_KEEPCOMPOUND); | 1396 * prefix too. */ |
1329 | 1397 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) |
1330 /* Check for following word with prefix. */ | 1398 { |
1331 mip->mi_compoff = c; | 1399 /* Find following word in keep-case tree. */ |
1332 find_prefix(mip, FIND_COMPOUND); | 1400 mip->mi_compoff = wlen; |
1401 find_word(mip, FIND_KEEPCOMPOUND); | |
1402 | |
1403 if (!slang->sl_nobreak || mip->mi_result == SP_BAD) | |
1404 { | |
1405 /* Check for following word with prefix. */ | |
1406 mip->mi_compoff = c; | |
1407 find_prefix(mip, FIND_COMPOUND); | |
1408 } | |
1409 } | |
1333 --mip->mi_complen; | 1410 --mip->mi_complen; |
1334 | 1411 |
1335 if (mip->mi_result == SP_OK) | 1412 if (slang->sl_nobreak) |
1336 break; | 1413 { |
1337 continue; | 1414 nobreak_result = mip->mi_result; |
1415 mip->mi_result = save_result; | |
1416 mip->mi_end = save_end; | |
1417 } | |
1418 else | |
1419 { | |
1420 if (mip->mi_result == SP_OK) | |
1421 break; | |
1422 continue; | |
1423 } | |
1338 } | 1424 } |
1339 | 1425 |
1340 if (flags & WF_BANNED) | 1426 if (flags & WF_BANNED) |
1341 res = SP_BANNED; | 1427 res = SP_BANNED; |
1342 else if (flags & WF_REGION) | 1428 else if (flags & WF_REGION) |
1350 else if (flags & WF_RARE) | 1436 else if (flags & WF_RARE) |
1351 res = SP_RARE; | 1437 res = SP_RARE; |
1352 else | 1438 else |
1353 res = SP_OK; | 1439 res = SP_OK; |
1354 | 1440 |
1355 /* Always use the longest match and the best result. */ | 1441 /* Always use the longest match and the best result. For NOBREAK |
1356 if (mip->mi_result > res) | 1442 * we separately keep the longest match without a following good |
1443 * word as a fall-back. */ | |
1444 if (nobreak_result == SP_BAD) | |
1445 { | |
1446 if (mip->mi_result2 > res) | |
1447 { | |
1448 mip->mi_result2 = res; | |
1449 mip->mi_end2 = mip->mi_word + wlen; | |
1450 } | |
1451 else if (mip->mi_result2 == res | |
1452 && mip->mi_end2 < mip->mi_word + wlen) | |
1453 mip->mi_end2 = mip->mi_word + wlen; | |
1454 } | |
1455 else if (mip->mi_result > res) | |
1357 { | 1456 { |
1358 mip->mi_result = res; | 1457 mip->mi_result = res; |
1359 mip->mi_end = mip->mi_word + wlen; | 1458 mip->mi_end = mip->mi_word + wlen; |
1360 } | 1459 } |
1361 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) | 1460 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) |
1362 mip->mi_end = mip->mi_word + wlen; | 1461 mip->mi_end = mip->mi_word + wlen; |
1363 | 1462 |
1364 if (res == SP_OK) | 1463 if (mip->mi_result == SP_OK) |
1365 break; | 1464 break; |
1366 } | 1465 } |
1367 | 1466 |
1368 if (res == SP_OK) | 1467 if (mip->mi_result == SP_OK) |
1369 break; | 1468 break; |
1370 } | 1469 } |
1371 } | 1470 } |
1372 | 1471 |
1373 /* | 1472 /* |
2220 vim_free(p); | 2319 vim_free(p); |
2221 break; | 2320 break; |
2222 | 2321 |
2223 case SN_COMPOUND: | 2322 case SN_COMPOUND: |
2224 res = read_compound(fd, lp, len); | 2323 res = read_compound(fd, lp, len); |
2324 break; | |
2325 | |
2326 case SN_NOBREAK: | |
2327 lp->sl_nobreak = TRUE; | |
2225 break; | 2328 break; |
2226 | 2329 |
2227 case SN_SYLLABLE: | 2330 case SN_SYLLABLE: |
2228 lp->sl_syllable = read_string(fd, len); /* <syllable> */ | 2331 lp->sl_syllable = read_string(fd, len); /* <syllable> */ |
2229 if (lp->sl_syllable == NULL) | 2332 if (lp->sl_syllable == NULL) |
3949 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ | 4052 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ |
3950 int si_compmax; /* max nr of words for compounding */ | 4053 int si_compmax; /* max nr of words for compounding */ |
3951 int si_compminlen; /* minimal length for compounding */ | 4054 int si_compminlen; /* minimal length for compounding */ |
3952 int si_compsylmax; /* max nr of syllables for compounding */ | 4055 int si_compsylmax; /* max nr of syllables for compounding */ |
3953 char_u *si_compflags; /* flags used for compounding */ | 4056 char_u *si_compflags; /* flags used for compounding */ |
4057 char_u si_nobreak; /* NOBREAK */ | |
3954 char_u *si_syllable; /* syllable string */ | 4058 char_u *si_syllable; /* syllable string */ |
3955 garray_T si_prefcond; /* table with conditions for postponed | 4059 garray_T si_prefcond; /* table with conditions for postponed |
3956 * prefixes, each stored as a string */ | 4060 * prefixes, each stored as a string */ |
3957 int si_newID; /* current value for ah_newID */ | 4061 int si_newID; /* current value for ah_newID */ |
3958 } spellinfo_T; | 4062 } spellinfo_T; |
4359 } | 4463 } |
4360 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 | 4464 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 |
4361 && aff->af_syllable == NULL) | 4465 && aff->af_syllable == NULL) |
4362 { | 4466 { |
4363 aff->af_syllable = getroom_save(spin, items[1]); | 4467 aff->af_syllable = getroom_save(spin, items[1]); |
4468 } | |
4469 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) | |
4470 { | |
4471 spin->si_nobreak = TRUE; | |
4364 } | 4472 } |
4365 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) | 4473 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) |
4366 { | 4474 { |
4367 aff->af_pfxpostpone = TRUE; | 4475 aff->af_pfxpostpone = TRUE; |
4368 } | 4476 } |
6439 putc(spin->si_compsylmax, fd); /* <compsylmax> */ | 6547 putc(spin->si_compsylmax, fd); /* <compsylmax> */ |
6440 /* <compflags> */ | 6548 /* <compflags> */ |
6441 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); | 6549 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); |
6442 } | 6550 } |
6443 | 6551 |
6552 /* SN_NOBREAK: NOBREAK flag */ | |
6553 if (spin->si_nobreak) | |
6554 { | |
6555 putc(SN_NOBREAK, fd); /* <sectionID> */ | |
6556 putc(0, fd); /* <sectionflags> */ | |
6557 | |
6558 /* It's empty, the precense of the section flags the feature. */ | |
6559 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
6560 } | |
6561 | |
6444 /* SN_SYLLABLE: syllable info. | 6562 /* SN_SYLLABLE: syllable info. |
6445 * We don't mark it required, when not supported syllables will not be | 6563 * We don't mark it required, when not supported syllables will not be |
6446 * counted. */ | 6564 * counted. */ |
6447 if (spin->si_syllable != NULL) | 6565 if (spin->si_syllable != NULL) |
6448 { | 6566 { |
6859 /* Free any conversion stuff. */ | 6977 /* Free any conversion stuff. */ |
6860 convert_setup(&spin.si_conv, NULL, NULL); | 6978 convert_setup(&spin.si_conv, NULL, NULL); |
6861 #endif | 6979 #endif |
6862 } | 6980 } |
6863 | 6981 |
6982 if (spin.si_compflags != NULL && spin.si_nobreak) | |
6983 MSG(_("Warning: both compounding and NOBREAK specified")); | |
6984 | |
6864 if (!error) | 6985 if (!error) |
6865 { | 6986 { |
6866 /* | 6987 /* |
6867 * Combine tails in the tree. | 6988 * Combine tails in the tree. |
6868 */ | 6989 */ |
8358 if (has_mbyte) | 8479 if (has_mbyte) |
8359 c = mb_cptr2char_adv(&s); | 8480 c = mb_cptr2char_adv(&s); |
8360 else | 8481 else |
8361 #endif | 8482 #endif |
8362 c = *s++; | 8483 c = *s++; |
8363 c = SPELL_TOUPPER(c); | 8484 |
8485 #ifdef FEAT_MBYTE | |
8486 /* We only change ß to SS when we are certain latin1 is used. It | |
8487 * would cause weird errors in other 8-bit encodings. */ | |
8488 if (enc_latin1like && c == 0xdf) | |
8489 { | |
8490 c = 'S'; | |
8491 if (d - wcopy >= MAXWLEN - 1) | |
8492 break; | |
8493 *d++ = c; | |
8494 } | |
8495 else | |
8496 #endif | |
8497 c = SPELL_TOUPPER(c); | |
8364 | 8498 |
8365 #ifdef FEAT_MBYTE | 8499 #ifdef FEAT_MBYTE |
8366 if (has_mbyte) | 8500 if (has_mbyte) |
8367 { | 8501 { |
8368 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) | 8502 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) |
8627 } | 8761 } |
8628 } | 8762 } |
8629 | 8763 |
8630 if (sp->ts_complen > sp->ts_compsplit) | 8764 if (sp->ts_complen > sp->ts_compsplit) |
8631 { | 8765 { |
8632 /* There was a compound word before this word. If this | 8766 if (slang->sl_nobreak) |
8633 * word does not support compounding then give up | 8767 { |
8634 * (splitting is tried for the word without compound | 8768 /* There was a word before this word. When there was |
8635 * flag). */ | 8769 * no change in this word (it was correct) add the |
8636 if (((unsigned)flags >> 24) == 0 | 8770 * first word as a suggestion. If this word was |
8637 || sp->ts_twordlen - sp->ts_splitoff | 8771 * corrected too, we need to check if a correct word |
8772 * follows. */ | |
8773 if (sp->ts_fidx - sp->ts_splitfidx | |
8774 == sp->ts_twordlen - sp->ts_splitoff | |
8775 && STRNCMP(fword + sp->ts_splitfidx, | |
8776 tword + sp->ts_splitoff, | |
8777 sp->ts_fidx - sp->ts_splitfidx) == 0) | |
8778 { | |
8779 preword[sp->ts_prewordlen] = NUL; | |
8780 add_suggestion(su, &su->su_ga, preword, | |
8781 sp->ts_splitfidx - repextra, | |
8782 sp->ts_score, 0, FALSE); | |
8783 break; | |
8784 } | |
8785 } | |
8786 else | |
8787 { | |
8788 /* There was a compound word before this word. If | |
8789 * this word does not support compounding then give up | |
8790 * (splitting is tried for the word without compound | |
8791 * flag). */ | |
8792 if (((unsigned)flags >> 24) == 0 | |
8793 || sp->ts_twordlen - sp->ts_splitoff | |
8638 < slang->sl_compminlen) | 8794 < slang->sl_compminlen) |
8639 break; | 8795 break; |
8640 compflags[sp->ts_complen] = ((unsigned)flags >> 24); | 8796 compflags[sp->ts_complen] = ((unsigned)flags >> 24); |
8641 compflags[sp->ts_complen + 1] = NUL; | 8797 compflags[sp->ts_complen + 1] = NUL; |
8642 vim_strncpy(preword + sp->ts_prewordlen, | 8798 vim_strncpy(preword + sp->ts_prewordlen, |
8643 tword + sp->ts_splitoff, | 8799 tword + sp->ts_splitoff, |
8644 sp->ts_twordlen - sp->ts_splitoff); | 8800 sp->ts_twordlen - sp->ts_splitoff); |
8645 p = preword; | 8801 p = preword; |
8646 while (*skiptowhite(p) != NUL) | 8802 while (*skiptowhite(p) != NUL) |
8647 p = skipwhite(skiptowhite(p)); | 8803 p = skipwhite(skiptowhite(p)); |
8648 if (fword_ends && !can_compound(slang, p, | 8804 if (fword_ends && !can_compound(slang, p, |
8649 compflags + sp->ts_compsplit)) | 8805 compflags + sp->ts_compsplit)) |
8650 break; | 8806 break; |
8651 | 8807 |
8652 /* Get pointer to last char of previous word. */ | 8808 /* Get pointer to last char of previous word. */ |
8653 p = preword + sp->ts_prewordlen; | 8809 p = preword + sp->ts_prewordlen; |
8654 mb_ptr_back(preword, p); | 8810 mb_ptr_back(preword, p); |
8811 } | |
8655 } | 8812 } |
8656 else | 8813 else |
8657 p = NULL; | 8814 p = NULL; |
8658 | 8815 |
8659 /* | 8816 /* |
8751 /* Get here in two situations: | 8908 /* Get here in two situations: |
8752 * 1. The word in the tree ends but the badword continues: | 8909 * 1. The word in the tree ends but the badword continues: |
8753 * If the word allows compounding try that. Otherwise | 8910 * If the word allows compounding try that. Otherwise |
8754 * try a split by inserting a space. For both check | 8911 * try a split by inserting a space. For both check |
8755 * that a valid words starts at fword[sp->ts_fidx]. | 8912 * that a valid words starts at fword[sp->ts_fidx]. |
8913 * For NOBREAK do like compounding to be able to check | |
8914 * if the next word is valid. | |
8756 * 2. The badword does end, but it was due to a change | 8915 * 2. The badword does end, but it was due to a change |
8757 * (e.g., a swap). No need to split, but do check that | 8916 * (e.g., a swap). No need to split, but do check that |
8758 * the following word is valid. | 8917 * the following word is valid. |
8759 */ | 8918 */ |
8760 try_compound = FALSE; | 8919 try_compound = FALSE; |
8773 try_compound = TRUE; | 8932 try_compound = TRUE; |
8774 compflags[sp->ts_complen] = ((unsigned)flags >> 24); | 8933 compflags[sp->ts_complen] = ((unsigned)flags >> 24); |
8775 compflags[sp->ts_complen + 1] = NUL; | 8934 compflags[sp->ts_complen + 1] = NUL; |
8776 } | 8935 } |
8777 | 8936 |
8937 /* For NOBREAK we never try splitting, it won't make any | |
8938 * word valid. */ | |
8939 if (slang->sl_nobreak) | |
8940 try_compound = TRUE; | |
8941 | |
8778 /* If we could add a compound word, and it's also possible | 8942 /* If we could add a compound word, and it's also possible |
8779 * to split at this point, do the split first and set | 8943 * to split at this point, do the split first and set |
8780 * TSF_DIDSPLIT to avoid doing it again. */ | 8944 * TSF_DIDSPLIT to avoid doing it again. */ |
8781 if (!fword_ends | 8945 else if (!fword_ends |
8782 && try_compound | 8946 && try_compound |
8783 && (sp->ts_flags & TSF_DIDSPLIT) == 0) | 8947 && (sp->ts_flags & TSF_DIDSPLIT) == 0) |
8784 { | 8948 { |
8785 try_compound = FALSE; | 8949 try_compound = FALSE; |
8786 sp->ts_flags |= TSF_DIDSPLIT; | 8950 sp->ts_flags |= TSF_DIDSPLIT; |
8816 /* Append a space to preword when splitting. */ | 8980 /* Append a space to preword when splitting. */ |
8817 if (!try_compound && !fword_ends) | 8981 if (!try_compound && !fword_ends) |
8818 STRCAT(preword, " "); | 8982 STRCAT(preword, " "); |
8819 sp->ts_prewordlen = STRLEN(preword); | 8983 sp->ts_prewordlen = STRLEN(preword); |
8820 sp->ts_splitoff = sp->ts_twordlen; | 8984 sp->ts_splitoff = sp->ts_twordlen; |
8985 sp->ts_splitfidx = sp->ts_fidx; | |
8821 | 8986 |
8822 /* If the badword has a non-word character at this | 8987 /* If the badword has a non-word character at this |
8823 * position skip it. That means replacing the | 8988 * position skip it. That means replacing the |
8824 * non-word character with a space. Always skip a | 8989 * non-word character with a space. Always skip a |
8825 * character when the word ends. */ | 8990 * character when the word ends. */ |