comparison src/spell.c @ 492:81c06952fb1d

updated for version 7.0135
author vimboss
date Tue, 23 Aug 2005 21:00:13 +0000
parents 4321aae7e769
children 409dced94617
comparison
equal deleted inserted replaced
491:21c3634c2113 492:81c06952fb1d
160 * <compminlen> 1 byte Minimal word length for compounding. 160 * <compminlen> 1 byte Minimal word length for compounding.
161 * <compsylmax> 1 byte Maximum nr of syllables in compound word. 161 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by 162 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by
163 * slashes. 163 * slashes.
164 * 164 *
165 * sectionID == SN_NOBREAK: (empty, its presence is enough)
166 *
165 * sectionID == SN_SYLLABLE: <syllable> 167 * sectionID == SN_SYLLABLE: <syllable>
166 * <syllable> N bytes String from SYLLABLE item. 168 * <syllable> N bytes String from SYLLABLE item.
167 * 169 *
168 * <LWORDTREE>: <wordtree> 170 * <LWORDTREE>: <wordtree>
169 * 171 *
376 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ 378 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
377 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm 379 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm
378 * (NULL when no compounding) */ 380 * (NULL when no compounding) */
379 char_u *sl_compstartflags; /* flags for first compound word */ 381 char_u *sl_compstartflags; /* flags for first compound word */
380 char_u *sl_compallflags; /* all flags for compound words */ 382 char_u *sl_compallflags; /* all flags for compound words */
383 char_u sl_nobreak; /* When TRUE: no spaces between words */
381 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ 384 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
382 garray_T sl_syl_items; /* syllable items */ 385 garray_T sl_syl_items; /* syllable items */
383 386
384 int sl_prefixcnt; /* number of items in "sl_prefprog" */ 387 int sl_prefixcnt; /* number of items in "sl_prefprog" */
385 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ 388 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
440 #define SN_SAL 5 /* SAL items section */ 443 #define SN_SAL 5 /* SAL items section */
441 #define SN_SOFO 6 /* soundfolding section */ 444 #define SN_SOFO 6 /* soundfolding section */
442 #define SN_MAP 7 /* MAP items section */ 445 #define SN_MAP 7 /* MAP items section */
443 #define SN_COMPOUND 8 /* compound words section */ 446 #define SN_COMPOUND 8 /* compound words section */
444 #define SN_SYLLABLE 9 /* syllable section */ 447 #define SN_SYLLABLE 9 /* syllable section */
448 #define SN_NOBREAK 10 /* NOBREAK section */
445 #define SN_END 255 /* end of sections */ 449 #define SN_END 255 /* end of sections */
446 450
447 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 451 #define SNF_REQUIRED 1 /* <sectionflags>: required section */
448 452
449 /* Result values. Lower number is accepted over higher one. */ 453 /* Result values. Lower number is accepted over higher one. */
558 562
559 /* others */ 563 /* others */
560 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ 564 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
561 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ 565 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
562 buf_T *mi_buf; /* buffer being checked */ 566 buf_T *mi_buf; /* buffer being checked */
567
568 /* for NOBREAK */
569 int mi_result2; /* "mi_resul" without following word */
570 char_u *mi_end2; /* "mi_end" without following word */
563 } matchinf_T; 571 } matchinf_T;
564 572
565 /* 573 /*
566 * The tables used for recognizing word characters according to spelling. 574 * The tables used for recognizing word characters according to spelling.
567 * These are only used for the first 256 characters of 'encoding'. 575 * These are only used for the first 256 characters of 'encoding'.
636 char_u ts_isdiff; /* DIFF_ values */ 644 char_u ts_isdiff; /* DIFF_ values */
637 char_u ts_fcharstart; /* index in fword where badword char started */ 645 char_u ts_fcharstart; /* index in fword where badword char started */
638 #endif 646 #endif
639 char_u ts_prewordlen; /* length of word in "preword[]" */ 647 char_u ts_prewordlen; /* length of word in "preword[]" */
640 char_u ts_splitoff; /* index in "tword" after last split */ 648 char_u ts_splitoff; /* index in "tword" after last split */
649 char_u ts_splitfidx; /* "ts_fidx" at word split */
641 char_u ts_complen; /* nr of compound words used */ 650 char_u ts_complen; /* nr of compound words used */
642 char_u ts_compsplit; /* index for "compflags" where word was spit */ 651 char_u ts_compsplit; /* index for "compflags" where word was spit */
643 char_u ts_save_badflags; /* su_badflags saved here */ 652 char_u ts_save_badflags; /* su_badflags saved here */
644 } trystate_T; 653 } trystate_T;
645 654
885 MAXWLEN + 1); 894 MAXWLEN + 1);
886 mi.mi_fwordlen = STRLEN(mi.mi_fword); 895 mi.mi_fwordlen = STRLEN(mi.mi_fword);
887 896
888 /* The word is bad unless we recognize it. */ 897 /* The word is bad unless we recognize it. */
889 mi.mi_result = SP_BAD; 898 mi.mi_result = SP_BAD;
899 mi.mi_result2 = SP_BAD;
890 900
891 /* 901 /*
892 * Loop over the languages specified in 'spelllang'. 902 * Loop over the languages specified in 'spelllang'.
893 * We check them all, because a matching word may be longer than an 903 * We check them all, because a matching word may be longer than an
894 * already found matching word. 904 * already found matching word.
902 /* Check for a matching word in keep-case words. */ 912 /* Check for a matching word in keep-case words. */
903 find_word(&mi, FIND_KEEPWORD); 913 find_word(&mi, FIND_KEEPWORD);
904 914
905 /* Check for matching prefixes. */ 915 /* Check for matching prefixes. */
906 find_prefix(&mi, FIND_FOLDWORD); 916 find_prefix(&mi, FIND_FOLDWORD);
917
918 /* For a NOBREAK language, may want to use a word without a following
919 * word as a backup. */
920 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
921 && mi.mi_result2 != SP_BAD)
922 {
923 mi.mi_result = mi.mi_result2;
924 mi.mi_end = mi.mi_end2;
925 }
907 } 926 }
908 927
909 if (mi.mi_result != SP_OK) 928 if (mi.mi_result != SP_OK)
910 { 929 {
911 /* If we found a number skip over it. Allows for "42nd". Do flag 930 /* If we found a number skip over it. Allows for "42nd". Do flag
939 } 958 }
940 else if (mi.mi_end == ptr) 959 else if (mi.mi_end == ptr)
941 /* Always include at least one character. Required for when there 960 /* Always include at least one character. Required for when there
942 * is a mixup in "midword". */ 961 * is a mixup in "midword". */
943 mb_ptr_adv(mi.mi_end); 962 mb_ptr_adv(mi.mi_end);
963 else if (mi.mi_result == SP_BAD
964 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
965 {
966 char_u *p, *fp;
967 int save_result = mi.mi_result;
968
969 /* First language in 'spelllang' is NOBREAK. Find first position
970 * at which any word would be valid. */
971 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
972 p = mi.mi_word;
973 fp = mi.mi_fword;
974 for (;;)
975 {
976 mb_ptr_adv(p);
977 mb_ptr_adv(fp);
978 if (p >= mi.mi_end)
979 break;
980 mi.mi_compoff = fp - mi.mi_fword;
981 find_word(&mi, FIND_COMPOUND);
982 if (mi.mi_result != SP_BAD)
983 {
984 mi.mi_end = p;
985 break;
986 }
987 }
988 mi.mi_result = save_result;
989 }
944 990
945 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) 991 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
946 *attrp = highlight_attr[HLF_SPB]; 992 *attrp = highlight_attr[HLF_SPB];
947 else if (mi.mi_result == SP_RARE) 993 else if (mi.mi_result == SP_RARE)
948 *attrp = highlight_attr[HLF_SPR]; 994 *attrp = highlight_attr[HLF_SPR];
993 unsigned flags; 1039 unsigned flags;
994 char_u *byts; 1040 char_u *byts;
995 idx_T *idxs; 1041 idx_T *idxs;
996 int word_ends; 1042 int word_ends;
997 int prefix_found; 1043 int prefix_found;
1044 int nobreak_result;
998 1045
999 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) 1046 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
1000 { 1047 {
1001 /* Check for word with matching case in keep-case tree. */ 1048 /* Check for word with matching case in keep-case tree. */
1002 ptr = mip->mi_word; 1049 ptr = mip->mi_word;
1135 if ((*mb_head_off)(ptr, ptr + wlen) > 0) 1182 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1136 continue; /* not at first byte of character */ 1183 continue; /* not at first byte of character */
1137 #endif 1184 #endif
1138 if (spell_iswordp(ptr + wlen, mip->mi_buf)) 1185 if (spell_iswordp(ptr + wlen, mip->mi_buf))
1139 { 1186 {
1140 if (slang->sl_compprog == NULL) 1187 if (slang->sl_compprog == NULL && !slang->sl_nobreak)
1141 continue; /* next char is a word character */ 1188 continue; /* next char is a word character */
1142 word_ends = FALSE; 1189 word_ends = FALSE;
1143 } 1190 }
1144 else 1191 else
1145 word_ends = TRUE; 1192 word_ends = TRUE;
1208 if (c & WF_RAREPFX) 1255 if (c & WF_RAREPFX)
1209 flags |= WF_RARE; 1256 flags |= WF_RARE;
1210 prefix_found = TRUE; 1257 prefix_found = TRUE;
1211 } 1258 }
1212 1259
1213 if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND 1260 if (slang->sl_nobreak)
1214 || !word_ends) 1261 {
1262 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1263 && (flags & WF_BANNED) == 0)
1264 {
1265 /* NOBREAK: found a valid following word. That's all we
1266 * need to know, so return. */
1267 mip->mi_result = SP_OK;
1268 break;
1269 }
1270 }
1271
1272 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1273 || !word_ends))
1215 { 1274 {
1216 /* If there is no flag or the word is shorter than 1275 /* If there is no flag or the word is shorter than
1217 * COMPOUNDMIN reject it quickly. 1276 * COMPOUNDMIN reject it quickly.
1218 * Makes you wonder why someone puts a compound flag on a word 1277 * Makes you wonder why someone puts a compound flag on a word
1219 * that's too short... Myspell compatibility requires this 1278 * that's too short... Myspell compatibility requires this
1293 if (!can_compound(slang, fword, mip->mi_compflags)) 1352 if (!can_compound(slang, fword, mip->mi_compflags))
1294 continue; 1353 continue;
1295 } 1354 }
1296 } 1355 }
1297 1356
1357 nobreak_result = SP_OK;
1358
1298 if (!word_ends) 1359 if (!word_ends)
1299 { 1360 {
1300 /* Check that a valid word follows. If there is one, it will 1361 int save_result = mip->mi_result;
1301 * set "mi_result", thus we are always finished here. 1362 char_u *save_end = mip->mi_end;
1363
1364 /* Check that a valid word follows. If there is one and we
1365 * are compounding, it will set "mi_result", thus we are
1366 * always finished here. For NOBREAK we only check that a
1367 * valid word follows.
1302 * Recursive! */ 1368 * Recursive! */
1369 if (slang->sl_nobreak)
1370 mip->mi_result = SP_BAD;
1303 1371
1304 /* Find following word in case-folded tree. */ 1372 /* Find following word in case-folded tree. */
1305 mip->mi_compoff = endlen[endidxcnt]; 1373 mip->mi_compoff = endlen[endidxcnt];
1306 #ifdef FEAT_MBYTE 1374 #ifdef FEAT_MBYTE
1307 if (has_mbyte && mode == FIND_KEEPWORD) 1375 if (has_mbyte && mode == FIND_KEEPWORD)
1321 #endif 1389 #endif
1322 c = mip->mi_compoff; 1390 c = mip->mi_compoff;
1323 ++mip->mi_complen; 1391 ++mip->mi_complen;
1324 find_word(mip, FIND_COMPOUND); 1392 find_word(mip, FIND_COMPOUND);
1325 1393
1326 /* Find following word in keep-case tree. */ 1394 /* When NOBREAK any word that matches is OK. Otherwise we
1327 mip->mi_compoff = wlen; 1395 * need to find the longest match, thus try with keep-case and
1328 find_word(mip, FIND_KEEPCOMPOUND); 1396 * prefix too. */
1329 1397 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1330 /* Check for following word with prefix. */ 1398 {
1331 mip->mi_compoff = c; 1399 /* Find following word in keep-case tree. */
1332 find_prefix(mip, FIND_COMPOUND); 1400 mip->mi_compoff = wlen;
1401 find_word(mip, FIND_KEEPCOMPOUND);
1402
1403 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1404 {
1405 /* Check for following word with prefix. */
1406 mip->mi_compoff = c;
1407 find_prefix(mip, FIND_COMPOUND);
1408 }
1409 }
1333 --mip->mi_complen; 1410 --mip->mi_complen;
1334 1411
1335 if (mip->mi_result == SP_OK) 1412 if (slang->sl_nobreak)
1336 break; 1413 {
1337 continue; 1414 nobreak_result = mip->mi_result;
1415 mip->mi_result = save_result;
1416 mip->mi_end = save_end;
1417 }
1418 else
1419 {
1420 if (mip->mi_result == SP_OK)
1421 break;
1422 continue;
1423 }
1338 } 1424 }
1339 1425
1340 if (flags & WF_BANNED) 1426 if (flags & WF_BANNED)
1341 res = SP_BANNED; 1427 res = SP_BANNED;
1342 else if (flags & WF_REGION) 1428 else if (flags & WF_REGION)
1350 else if (flags & WF_RARE) 1436 else if (flags & WF_RARE)
1351 res = SP_RARE; 1437 res = SP_RARE;
1352 else 1438 else
1353 res = SP_OK; 1439 res = SP_OK;
1354 1440
1355 /* Always use the longest match and the best result. */ 1441 /* Always use the longest match and the best result. For NOBREAK
1356 if (mip->mi_result > res) 1442 * we separately keep the longest match without a following good
1443 * word as a fall-back. */
1444 if (nobreak_result == SP_BAD)
1445 {
1446 if (mip->mi_result2 > res)
1447 {
1448 mip->mi_result2 = res;
1449 mip->mi_end2 = mip->mi_word + wlen;
1450 }
1451 else if (mip->mi_result2 == res
1452 && mip->mi_end2 < mip->mi_word + wlen)
1453 mip->mi_end2 = mip->mi_word + wlen;
1454 }
1455 else if (mip->mi_result > res)
1357 { 1456 {
1358 mip->mi_result = res; 1457 mip->mi_result = res;
1359 mip->mi_end = mip->mi_word + wlen; 1458 mip->mi_end = mip->mi_word + wlen;
1360 } 1459 }
1361 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) 1460 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1362 mip->mi_end = mip->mi_word + wlen; 1461 mip->mi_end = mip->mi_word + wlen;
1363 1462
1364 if (res == SP_OK) 1463 if (mip->mi_result == SP_OK)
1365 break; 1464 break;
1366 } 1465 }
1367 1466
1368 if (res == SP_OK) 1467 if (mip->mi_result == SP_OK)
1369 break; 1468 break;
1370 } 1469 }
1371 } 1470 }
1372 1471
1373 /* 1472 /*
2220 vim_free(p); 2319 vim_free(p);
2221 break; 2320 break;
2222 2321
2223 case SN_COMPOUND: 2322 case SN_COMPOUND:
2224 res = read_compound(fd, lp, len); 2323 res = read_compound(fd, lp, len);
2324 break;
2325
2326 case SN_NOBREAK:
2327 lp->sl_nobreak = TRUE;
2225 break; 2328 break;
2226 2329
2227 case SN_SYLLABLE: 2330 case SN_SYLLABLE:
2228 lp->sl_syllable = read_string(fd, len); /* <syllable> */ 2331 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2229 if (lp->sl_syllable == NULL) 2332 if (lp->sl_syllable == NULL)
3949 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */ 4052 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */
3950 int si_compmax; /* max nr of words for compounding */ 4053 int si_compmax; /* max nr of words for compounding */
3951 int si_compminlen; /* minimal length for compounding */ 4054 int si_compminlen; /* minimal length for compounding */
3952 int si_compsylmax; /* max nr of syllables for compounding */ 4055 int si_compsylmax; /* max nr of syllables for compounding */
3953 char_u *si_compflags; /* flags used for compounding */ 4056 char_u *si_compflags; /* flags used for compounding */
4057 char_u si_nobreak; /* NOBREAK */
3954 char_u *si_syllable; /* syllable string */ 4058 char_u *si_syllable; /* syllable string */
3955 garray_T si_prefcond; /* table with conditions for postponed 4059 garray_T si_prefcond; /* table with conditions for postponed
3956 * prefixes, each stored as a string */ 4060 * prefixes, each stored as a string */
3957 int si_newID; /* current value for ah_newID */ 4061 int si_newID; /* current value for ah_newID */
3958 } spellinfo_T; 4062 } spellinfo_T;
4359 } 4463 }
4360 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 4464 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
4361 && aff->af_syllable == NULL) 4465 && aff->af_syllable == NULL)
4362 { 4466 {
4363 aff->af_syllable = getroom_save(spin, items[1]); 4467 aff->af_syllable = getroom_save(spin, items[1]);
4468 }
4469 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
4470 {
4471 spin->si_nobreak = TRUE;
4364 } 4472 }
4365 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) 4473 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
4366 { 4474 {
4367 aff->af_pfxpostpone = TRUE; 4475 aff->af_pfxpostpone = TRUE;
4368 } 4476 }
6439 putc(spin->si_compsylmax, fd); /* <compsylmax> */ 6547 putc(spin->si_compsylmax, fd); /* <compsylmax> */
6440 /* <compflags> */ 6548 /* <compflags> */
6441 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); 6549 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd);
6442 } 6550 }
6443 6551
6552 /* SN_NOBREAK: NOBREAK flag */
6553 if (spin->si_nobreak)
6554 {
6555 putc(SN_NOBREAK, fd); /* <sectionID> */
6556 putc(0, fd); /* <sectionflags> */
6557
6558 /* It's empty, the precense of the section flags the feature. */
6559 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
6560 }
6561
6444 /* SN_SYLLABLE: syllable info. 6562 /* SN_SYLLABLE: syllable info.
6445 * We don't mark it required, when not supported syllables will not be 6563 * We don't mark it required, when not supported syllables will not be
6446 * counted. */ 6564 * counted. */
6447 if (spin->si_syllable != NULL) 6565 if (spin->si_syllable != NULL)
6448 { 6566 {
6859 /* Free any conversion stuff. */ 6977 /* Free any conversion stuff. */
6860 convert_setup(&spin.si_conv, NULL, NULL); 6978 convert_setup(&spin.si_conv, NULL, NULL);
6861 #endif 6979 #endif
6862 } 6980 }
6863 6981
6982 if (spin.si_compflags != NULL && spin.si_nobreak)
6983 MSG(_("Warning: both compounding and NOBREAK specified"));
6984
6864 if (!error) 6985 if (!error)
6865 { 6986 {
6866 /* 6987 /*
6867 * Combine tails in the tree. 6988 * Combine tails in the tree.
6868 */ 6989 */
8358 if (has_mbyte) 8479 if (has_mbyte)
8359 c = mb_cptr2char_adv(&s); 8480 c = mb_cptr2char_adv(&s);
8360 else 8481 else
8361 #endif 8482 #endif
8362 c = *s++; 8483 c = *s++;
8363 c = SPELL_TOUPPER(c); 8484
8485 #ifdef FEAT_MBYTE
8486 /* We only change ß to SS when we are certain latin1 is used. It
8487 * would cause weird errors in other 8-bit encodings. */
8488 if (enc_latin1like && c == 0xdf)
8489 {
8490 c = 'S';
8491 if (d - wcopy >= MAXWLEN - 1)
8492 break;
8493 *d++ = c;
8494 }
8495 else
8496 #endif
8497 c = SPELL_TOUPPER(c);
8364 8498
8365 #ifdef FEAT_MBYTE 8499 #ifdef FEAT_MBYTE
8366 if (has_mbyte) 8500 if (has_mbyte)
8367 { 8501 {
8368 if (d - wcopy >= MAXWLEN - MB_MAXBYTES) 8502 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
8627 } 8761 }
8628 } 8762 }
8629 8763
8630 if (sp->ts_complen > sp->ts_compsplit) 8764 if (sp->ts_complen > sp->ts_compsplit)
8631 { 8765 {
8632 /* There was a compound word before this word. If this 8766 if (slang->sl_nobreak)
8633 * word does not support compounding then give up 8767 {
8634 * (splitting is tried for the word without compound 8768 /* There was a word before this word. When there was
8635 * flag). */ 8769 * no change in this word (it was correct) add the
8636 if (((unsigned)flags >> 24) == 0 8770 * first word as a suggestion. If this word was
8637 || sp->ts_twordlen - sp->ts_splitoff 8771 * corrected too, we need to check if a correct word
8772 * follows. */
8773 if (sp->ts_fidx - sp->ts_splitfidx
8774 == sp->ts_twordlen - sp->ts_splitoff
8775 && STRNCMP(fword + sp->ts_splitfidx,
8776 tword + sp->ts_splitoff,
8777 sp->ts_fidx - sp->ts_splitfidx) == 0)
8778 {
8779 preword[sp->ts_prewordlen] = NUL;
8780 add_suggestion(su, &su->su_ga, preword,
8781 sp->ts_splitfidx - repextra,
8782 sp->ts_score, 0, FALSE);
8783 break;
8784 }
8785 }
8786 else
8787 {
8788 /* There was a compound word before this word. If
8789 * this word does not support compounding then give up
8790 * (splitting is tried for the word without compound
8791 * flag). */
8792 if (((unsigned)flags >> 24) == 0
8793 || sp->ts_twordlen - sp->ts_splitoff
8638 < slang->sl_compminlen) 8794 < slang->sl_compminlen)
8639 break; 8795 break;
8640 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 8796 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
8641 compflags[sp->ts_complen + 1] = NUL; 8797 compflags[sp->ts_complen + 1] = NUL;
8642 vim_strncpy(preword + sp->ts_prewordlen, 8798 vim_strncpy(preword + sp->ts_prewordlen,
8643 tword + sp->ts_splitoff, 8799 tword + sp->ts_splitoff,
8644 sp->ts_twordlen - sp->ts_splitoff); 8800 sp->ts_twordlen - sp->ts_splitoff);
8645 p = preword; 8801 p = preword;
8646 while (*skiptowhite(p) != NUL) 8802 while (*skiptowhite(p) != NUL)
8647 p = skipwhite(skiptowhite(p)); 8803 p = skipwhite(skiptowhite(p));
8648 if (fword_ends && !can_compound(slang, p, 8804 if (fword_ends && !can_compound(slang, p,
8649 compflags + sp->ts_compsplit)) 8805 compflags + sp->ts_compsplit))
8650 break; 8806 break;
8651 8807
8652 /* Get pointer to last char of previous word. */ 8808 /* Get pointer to last char of previous word. */
8653 p = preword + sp->ts_prewordlen; 8809 p = preword + sp->ts_prewordlen;
8654 mb_ptr_back(preword, p); 8810 mb_ptr_back(preword, p);
8811 }
8655 } 8812 }
8656 else 8813 else
8657 p = NULL; 8814 p = NULL;
8658 8815
8659 /* 8816 /*
8751 /* Get here in two situations: 8908 /* Get here in two situations:
8752 * 1. The word in the tree ends but the badword continues: 8909 * 1. The word in the tree ends but the badword continues:
8753 * If the word allows compounding try that. Otherwise 8910 * If the word allows compounding try that. Otherwise
8754 * try a split by inserting a space. For both check 8911 * try a split by inserting a space. For both check
8755 * that a valid words starts at fword[sp->ts_fidx]. 8912 * that a valid words starts at fword[sp->ts_fidx].
8913 * For NOBREAK do like compounding to be able to check
8914 * if the next word is valid.
8756 * 2. The badword does end, but it was due to a change 8915 * 2. The badword does end, but it was due to a change
8757 * (e.g., a swap). No need to split, but do check that 8916 * (e.g., a swap). No need to split, but do check that
8758 * the following word is valid. 8917 * the following word is valid.
8759 */ 8918 */
8760 try_compound = FALSE; 8919 try_compound = FALSE;
8773 try_compound = TRUE; 8932 try_compound = TRUE;
8774 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 8933 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
8775 compflags[sp->ts_complen + 1] = NUL; 8934 compflags[sp->ts_complen + 1] = NUL;
8776 } 8935 }
8777 8936
8937 /* For NOBREAK we never try splitting, it won't make any
8938 * word valid. */
8939 if (slang->sl_nobreak)
8940 try_compound = TRUE;
8941
8778 /* If we could add a compound word, and it's also possible 8942 /* If we could add a compound word, and it's also possible
8779 * to split at this point, do the split first and set 8943 * to split at this point, do the split first and set
8780 * TSF_DIDSPLIT to avoid doing it again. */ 8944 * TSF_DIDSPLIT to avoid doing it again. */
8781 if (!fword_ends 8945 else if (!fword_ends
8782 && try_compound 8946 && try_compound
8783 && (sp->ts_flags & TSF_DIDSPLIT) == 0) 8947 && (sp->ts_flags & TSF_DIDSPLIT) == 0)
8784 { 8948 {
8785 try_compound = FALSE; 8949 try_compound = FALSE;
8786 sp->ts_flags |= TSF_DIDSPLIT; 8950 sp->ts_flags |= TSF_DIDSPLIT;
8816 /* Append a space to preword when splitting. */ 8980 /* Append a space to preword when splitting. */
8817 if (!try_compound && !fword_ends) 8981 if (!try_compound && !fword_ends)
8818 STRCAT(preword, " "); 8982 STRCAT(preword, " ");
8819 sp->ts_prewordlen = STRLEN(preword); 8983 sp->ts_prewordlen = STRLEN(preword);
8820 sp->ts_splitoff = sp->ts_twordlen; 8984 sp->ts_splitoff = sp->ts_twordlen;
8985 sp->ts_splitfidx = sp->ts_fidx;
8821 8986
8822 /* If the badword has a non-word character at this 8987 /* If the badword has a non-word character at this
8823 * position skip it. That means replacing the 8988 * position skip it. That means replacing the
8824 * non-word character with a space. Always skip a 8989 * non-word character with a space. Always skip a
8825 * character when the word ends. */ 8990 * character when the word ends. */