Mercurial > vim
comparison src/spell.c @ 255:c8742c8da9ab
updated for version 7.0070
author | vimboss |
---|---|
date | Sat, 23 Apr 2005 20:42:23 +0000 |
parents | f146656fb903 |
children | ed33f83b42d8 |
comparison
equal
deleted
inserted
replaced
254:c50c82c5e230 | 255:c8742c8da9ab |
---|---|
95 #define AFF_PREWORD 0x02 /* prefix includes word */ | 95 #define AFF_PREWORD 0x02 /* prefix includes word */ |
96 | 96 |
97 /* | 97 /* |
98 * Structure used to store words and other info for one language, loaded from | 98 * Structure used to store words and other info for one language, loaded from |
99 * a .spl file. | 99 * a .spl file. |
100 * The main access is through hashtable "sl_word", using the case-folded | |
101 * word as the key. This finds a linked list of fword_T. | |
100 */ | 102 */ |
101 typedef struct slang_S slang_T; | 103 typedef struct slang_S slang_T; |
102 struct slang_S | 104 struct slang_S |
103 { | 105 { |
104 slang_T *sl_next; /* next language */ | 106 slang_T *sl_next; /* next language */ |
147 #define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw))) | 149 #define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw))) |
148 | 150 |
149 /* | 151 /* |
150 * Structure to store a basic word. | 152 * Structure to store a basic word. |
151 * There are many of these, keep it small! | 153 * There are many of these, keep it small! |
154 * The list of prefix and suffix NRs is stored after "fw_word" to avoid the | |
155 * need for two extra pointers. | |
152 */ | 156 */ |
153 typedef struct fword_S fword_T; | 157 typedef struct fword_S fword_T; |
154 struct fword_S | 158 struct fword_S |
155 { | 159 { |
156 fword_T *fw_next; /* same basic word with different caps and/or | 160 fword_T *fw_next; /* same basic word with different caps and/or |
221 | 225 |
222 /* Translate ADD_ flags to BWF_ flags. | 226 /* Translate ADD_ flags to BWF_ flags. |
223 * (Needed to keep ADD_ flags in one byte.) */ | 227 * (Needed to keep ADD_ flags in one byte.) */ |
224 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) | 228 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) |
225 | 229 |
226 #define VIMSPELLMAGIC "VIMspell03" /* string at start of Vim spell file */ | 230 #define VIMSPELLMAGIC "VIMspell04" /* string at start of Vim spell file */ |
227 #define VIMSPELLMAGICL 10 | 231 #define VIMSPELLMAGICL 10 |
228 | 232 |
229 /* | 233 /* |
230 * Structure to store info for word matching. | 234 * Structure to store info for word matching. |
231 */ | 235 */ |
305 /* A word starting with a number is always OK. */ | 309 /* A word starting with a number is always OK. */ |
306 if (*ptr >= '0' && *ptr <= '9') | 310 if (*ptr >= '0' && *ptr <= '9') |
307 return (int)(mi.mi_end - ptr); | 311 return (int)(mi.mi_end - ptr); |
308 | 312 |
309 /* Make case-folded copy of the word. */ | 313 /* Make case-folded copy of the word. */ |
310 (void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); | 314 (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); |
311 mi.mi_cword = mi.mi_fword; | 315 mi.mi_cword = mi.mi_fword; |
312 mi.mi_fendlen = STRLEN(mi.mi_fword); | 316 mi.mi_fendlen = STRLEN(mi.mi_fword); |
313 mi.mi_faddlen = 0; | 317 mi.mi_faddlen = 0; |
314 mi.mi_fend = mi.mi_end; | 318 mi.mi_fend = mi.mi_end; |
315 | 319 |
402 * Try finding a matching preword for "mip->mi_word". These are | 406 * Try finding a matching preword for "mip->mi_word". These are |
403 * prefixes that have a non-word character after a word character: | 407 * prefixes that have a non-word character after a word character: |
404 * "d'", "de-", "'s-", "l'de-". But not "'s". | 408 * "d'", "de-", "'s-", "l'de-". But not "'s". |
405 * Also need to do this when a matching word was already found, because we | 409 * Also need to do this when a matching word was already found, because we |
406 * might find a longer match this way (French: "qu" and "qu'a-t-elle"). | 410 * might find a longer match this way (French: "qu" and "qu'a-t-elle"). |
411 * The check above may have added characters to mi_fword, thus we need to | |
412 * truncate it after the basic word for the hash lookup. | |
407 */ | 413 */ |
408 cc = mip->mi_fword[mip->mi_fendlen]; | 414 cc = mip->mi_fword[mip->mi_fendlen]; |
409 mip->mi_fword[mip->mi_fendlen] = NUL; | 415 mip->mi_fword[mip->mi_fendlen] = NUL; |
410 hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash); | 416 hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash); |
411 mip->mi_fword[mip->mi_fendlen] = cc; | 417 mip->mi_fword[mip->mi_fendlen] = cc; |
770 if (has_mbyte) | 776 if (has_mbyte) |
771 l = (*mb_ptr2len_check)(mip->mi_fend); | 777 l = (*mb_ptr2len_check)(mip->mi_fend); |
772 else | 778 else |
773 #endif | 779 #endif |
774 l = 1; | 780 l = 1; |
775 (void)str_foldcase(mip->mi_fend, l, p + mip->mi_faddlen, | 781 (void)spell_casefold(mip->mi_fend, l, p + mip->mi_faddlen, |
776 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen); | 782 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen); |
777 mip->mi_fend += l; | 783 mip->mi_fend += l; |
778 mip->mi_faddlen += STRLEN(p + mip->mi_faddlen); | 784 mip->mi_faddlen += STRLEN(p + mip->mi_faddlen); |
779 } | 785 } |
780 } | 786 } |
990 * Try suffixes of different length, starting with an empty suffix (chop | 996 * Try suffixes of different length, starting with an empty suffix (chop |
991 * only, thus adds something). | 997 * only, thus adds something). |
992 * Stop checking if there are no suffixes with so many characters. | 998 * Stop checking if there are no suffixes with so many characters. |
993 */ | 999 */ |
994 sufp = endw; | 1000 sufp = endw; |
1001 *endw = NUL; /* truncate after possible suffix */ | |
1002 | |
995 for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen) | 1003 for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen) |
996 { | 1004 { |
997 /* Move the pointer to the possible suffix back one character, unless | 1005 /* Move the pointer to the possible suffix back one character, unless |
998 * doing the first round (empty suffix). */ | 1006 * doing the first round (empty suffix). */ |
999 if (charlen > 0) | 1007 if (charlen > 0) |
1010 /* Get pointer to hashtab for suffix of this many chars. */ | 1018 /* Get pointer to hashtab for suffix of this many chars. */ |
1011 ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1; | 1019 ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1; |
1012 if (ht->ht_used == 0) | 1020 if (ht->ht_used == 0) |
1013 continue; | 1021 continue; |
1014 | 1022 |
1015 *endw = NUL; /* truncate after possible suffix */ | |
1016 hi = hash_find(ht, sufp); | 1023 hi = hash_find(ht, sufp); |
1017 if (HASHITEM_EMPTY(hi)) | 1024 if (HASHITEM_EMPTY(hi)) |
1018 ai = NULL; | 1025 ai = NULL; |
1019 else | 1026 else |
1020 ai = HI2AI(hi); | 1027 ai = HI2AI(hi); |
1021 *endw = endw_c; | |
1022 } | 1028 } |
1023 | 1029 |
1024 if (ai != NULL) | 1030 if (ai != NULL) |
1025 { | 1031 { |
1026 /* Found a list of matching suffixes. Now check that there is one | 1032 /* Found a list of matching suffixes. Now check that there is one |
1027 * we can use. */ | 1033 * we can use. */ |
1028 tlen = sufp - mip->mi_cword; /* length of word without suffix */ | 1034 tlen = sufp - mip->mi_cword; /* length of word without suffix */ |
1029 mch_memmove(pword, mip->mi_cword, tlen); | 1035 mch_memmove(pword, mip->mi_cword, tlen); |
1036 *endw = endw_c; | |
1030 | 1037 |
1031 for ( ; ai != NULL; ai = ai->ai_next) | 1038 for ( ; ai != NULL; ai = ai->ai_next) |
1032 { | 1039 { |
1033 /* Found a matching suffix. Create the basic word by removing | 1040 /* Found a matching suffix. Create the basic word by removing |
1034 * the suffix and adding the chop string. */ | 1041 * the suffix and adding the chop string. */ |
1066 mip->mi_capflags = capflags_save; | 1073 mip->mi_capflags = capflags_save; |
1067 return TRUE; | 1074 return TRUE; |
1068 } | 1075 } |
1069 } | 1076 } |
1070 } | 1077 } |
1071 } | 1078 |
1072 } | 1079 *endw = NUL; /* truncate after possible suffix */ |
1073 | 1080 } |
1081 } | |
1082 | |
1083 *endw = endw_c; | |
1074 mip->mi_capflags = capflags_save; | 1084 mip->mi_capflags = capflags_save; |
1075 return FALSE; | 1085 return FALSE; |
1076 } | 1086 } |
1077 | 1087 |
1078 /* | 1088 /* |
1113 if (has_mbyte) | 1123 if (has_mbyte) |
1114 c = mb_ptr2char_adv(&p); | 1124 c = mb_ptr2char_adv(&p); |
1115 else | 1125 else |
1116 #endif | 1126 #endif |
1117 c = *p++; | 1127 c = *p++; |
1118 if (MB_ISUPPER(c)) | 1128 if (spell_isupper(c)) |
1119 { | 1129 { |
1120 if (capflags == 0 || (capflags & BWF_ONECAP)) | 1130 if (capflags == 0 || (capflags & BWF_ONECAP)) |
1121 { | 1131 { |
1122 capflags = BWF_KEEPCAP; /* lU or UlU */ | 1132 capflags = BWF_KEEPCAP; /* lU or UlU */ |
1123 break; | 1133 break; |
1458 int flags; | 1468 int flags; |
1459 affitem_T *ai, *ai2, **aip; | 1469 affitem_T *ai, *ai2, **aip; |
1460 int round; | 1470 int round; |
1461 char_u *save_sourcing_name = sourcing_name; | 1471 char_u *save_sourcing_name = sourcing_name; |
1462 linenr_T save_sourcing_lnum = sourcing_lnum; | 1472 linenr_T save_sourcing_lnum = sourcing_lnum; |
1463 int cnt; | 1473 int cnt, ccnt; |
1464 int choplen; | 1474 int choplen; |
1465 int addlen; | 1475 int addlen; |
1466 int leadlen; | 1476 int leadlen; |
1467 int wordcount; | 1477 int wordcount; |
1468 fword_T *fw, *fw2; | 1478 fword_T *fw, *fw2; |
1472 hash_T hash; | 1482 hash_T hash; |
1473 int adds; | 1483 int adds; |
1474 addword_T *aw, *naw; | 1484 addword_T *aw, *naw; |
1475 int flen; | 1485 int flen; |
1476 int xlen; | 1486 int xlen; |
1487 char_u *fol; | |
1477 | 1488 |
1478 fd = fopen((char *)fname, "r"); | 1489 fd = fopen((char *)fname, "r"); |
1479 if (fd == NULL) | 1490 if (fd == NULL) |
1480 { | 1491 { |
1481 EMSG2(_(e_notopen), fname); | 1492 EMSG2(_(e_notopen), fname); |
1482 goto errorend; | 1493 goto endFAIL; |
1483 } | 1494 } |
1484 | 1495 |
1485 /* Set sourcing_name, so that error messages mention the file name. */ | 1496 /* Set sourcing_name, so that error messages mention the file name. */ |
1486 sourcing_name = fname; | 1497 sourcing_name = fname; |
1487 sourcing_lnum = 0; | 1498 sourcing_lnum = 0; |
1488 | 1499 |
1489 /* <HEADER>: <fileID> <regioncnt> <regionname> ... */ | 1500 /* <HEADER>: <fileID> <regioncnt> <regionname> ... |
1501 * <charflagslen> <charflags> <fcharslen> <fchars> */ | |
1490 for (i = 0; i < VIMSPELLMAGICL; ++i) | 1502 for (i = 0; i < VIMSPELLMAGICL; ++i) |
1491 buf[i] = getc(fd); /* <fileID> */ | 1503 buf[i] = getc(fd); /* <fileID> */ |
1492 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) | 1504 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) |
1493 { | 1505 { |
1494 EMSG(_("E757: Wrong file ID in spell file")); | 1506 EMSG(_("E757: Wrong file ID in spell file")); |
1495 goto errorend; | 1507 goto endFAIL; |
1496 } | 1508 } |
1497 | 1509 |
1498 cnt = getc(fd); /* <regioncnt> */ | 1510 cnt = getc(fd); /* <regioncnt> */ |
1499 if (cnt == EOF) | 1511 if (cnt < 0) |
1500 { | 1512 { |
1501 truncerr: | 1513 truncerr: |
1502 EMSG(_("E758: Truncated spell file")); | 1514 EMSG(_("E758: Truncated spell file")); |
1503 goto errorend; | 1515 goto endFAIL; |
1504 } | 1516 } |
1505 if (cnt > 8) | 1517 if (cnt > 8) |
1506 { | 1518 { |
1507 formerr: | 1519 formerr: |
1508 EMSG(_("E759: Format error in spell file")); | 1520 EMSG(_("E759: Format error in spell file")); |
1509 goto errorend; | 1521 goto endFAIL; |
1510 } | 1522 } |
1511 for (i = 0; i < cnt; ++i) | 1523 for (i = 0; i < cnt; ++i) |
1512 { | 1524 { |
1513 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */ | 1525 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */ |
1514 lp->sl_regions[i * 2 + 1] = getc(fd); | 1526 lp->sl_regions[i * 2 + 1] = getc(fd); |
1515 } | 1527 } |
1516 lp->sl_regions[cnt * 2] = NUL; | 1528 lp->sl_regions[cnt * 2] = NUL; |
1517 | 1529 |
1518 /* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... | 1530 cnt = getc(fd); /* <charflagslen> */ |
1519 * round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... */ | 1531 if (cnt > 0) |
1532 { | |
1533 p = (char_u *)getroom(lp, &bl_used, cnt); | |
1534 if (p == NULL) | |
1535 goto endFAIL; | |
1536 for (i = 0; i < cnt; ++i) | |
1537 p[i] = getc(fd); /* <charflags> */ | |
1538 | |
1539 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */ | |
1540 if (ccnt <= 0) | |
1541 goto formerr; | |
1542 fol = (char_u *)getroom(lp, &bl_used, ccnt + 1); | |
1543 if (fol == NULL) | |
1544 goto endFAIL; | |
1545 for (i = 0; i < ccnt; ++i) | |
1546 fol[i] = getc(fd); /* <fchars> */ | |
1547 fol[i] = NUL; | |
1548 | |
1549 /* Set the word-char flags and fill spell_isupper() table. */ | |
1550 if (set_spell_charflags(p, cnt, fol) == FAIL) | |
1551 goto formerr; | |
1552 } | |
1553 else | |
1554 { | |
1555 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ | |
1556 cnt = (getc(fd) << 8) + getc(fd); | |
1557 if (cnt != 0) | |
1558 goto formerr; | |
1559 } | |
1560 | |
1561 /* round 1: <PREFIXLIST>: <affcount> <affix> ... | |
1562 * round 2: <SUFFIXLIST>: <affcount> <affix> ... */ | |
1520 for (round = 1; round <= 2; ++round) | 1563 for (round = 1; round <= 2; ++round) |
1521 { | 1564 { |
1522 affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */ | 1565 affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */ |
1523 if (affcount < 0) | 1566 if (affcount < 0) |
1524 goto truncerr; | 1567 goto truncerr; |
1534 gap = &lp->sl_sufftab; | 1577 gap = &lp->sl_sufftab; |
1535 aip = &lp->sl_suffzero; | 1578 aip = &lp->sl_suffzero; |
1536 lp->sl_suffcnt = affcount; | 1579 lp->sl_suffcnt = affcount; |
1537 suffm = affcount > 256 ? 2 : 1; | 1580 suffm = affcount > 256 ? 2 : 1; |
1538 } | 1581 } |
1539 | |
1540 i = (getc(fd) << 8) + getc(fd); /* <afftotcnt> */ | |
1541 /* afftotcnt is not used */ | |
1542 | 1582 |
1543 /* | 1583 /* |
1544 * For each affix NR there can be several affixes. | 1584 * For each affix NR there can be several affixes. |
1545 */ | 1585 */ |
1546 for (affnr = 0; affnr < affcount; ++affnr) | 1586 for (affnr = 0; affnr < affcount; ++affnr) |
1553 { | 1593 { |
1554 /* <affitem>: <affflags> <affchoplen> <affchop> | 1594 /* <affitem>: <affflags> <affchoplen> <affchop> |
1555 * <affaddlen> <affadd> */ | 1595 * <affaddlen> <affadd> */ |
1556 affflags = getc(fd); /* <affflags> */ | 1596 affflags = getc(fd); /* <affflags> */ |
1557 choplen = getc(fd); /* <affchoplen> */ | 1597 choplen = getc(fd); /* <affchoplen> */ |
1558 if (choplen == EOF) | 1598 if (choplen < 0) |
1559 goto truncerr; | 1599 goto truncerr; |
1560 if (choplen >= MAXWLEN) | 1600 if (choplen >= MAXWLEN) |
1561 goto formerr; | 1601 goto formerr; |
1562 for (i = 0; i < choplen; ++i) /* <affchop> */ | 1602 for (i = 0; i < choplen; ++i) /* <affchop> */ |
1563 buf[i] = getc(fd); | 1603 buf[i] = getc(fd); |
1564 buf[i] = NUL; | 1604 buf[i] = NUL; |
1565 addlen = getc(fd); /* <affaddlen> */ | 1605 addlen = getc(fd); /* <affaddlen> */ |
1566 if (addlen == EOF) | 1606 if (addlen < 0) |
1567 goto truncerr; | 1607 goto truncerr; |
1568 if (affflags & AFF_PREWORD) | 1608 if (affflags & AFF_PREWORD) |
1569 xlen = addlen + 2; /* space for lead and trail string */ | 1609 xlen = addlen + 2; /* space for lead and trail string */ |
1570 else | 1610 else |
1571 xlen = 0; | 1611 xlen = 0; |
1572 | 1612 |
1573 /* Get room to store the affitem_T, chop and add strings. */ | 1613 /* Get room to store the affitem_T, chop and add strings. */ |
1574 p = (char_u *)getroom(lp, &bl_used, | 1614 ai = (affitem_T *)getroom(lp, &bl_used, |
1575 sizeof(affitem_T) + addlen + choplen + 1 + xlen); | 1615 sizeof(affitem_T) + addlen + choplen + 1 + xlen); |
1576 if (p == NULL) | 1616 if (ai == NULL) |
1577 goto errorend; | 1617 goto endFAIL; |
1578 | 1618 |
1579 ai = (affitem_T *)p; | |
1580 ai->ai_nr = affnr; | 1619 ai->ai_nr = affnr; |
1581 ai->ai_flags = affflags; | 1620 ai->ai_flags = affflags; |
1582 ai->ai_choplen = choplen; | 1621 ai->ai_choplen = choplen; |
1583 ai->ai_addlen = addlen; | 1622 ai->ai_addlen = addlen; |
1584 | 1623 |
1594 if (affflags & AFF_PREWORD) | 1633 if (affflags & AFF_PREWORD) |
1595 { | 1634 { |
1596 int l, leadoff, trailoff; | 1635 int l, leadoff, trailoff; |
1597 | 1636 |
1598 /* | 1637 /* |
1599 * Separate lead and trail string, put word at ai_add, so | 1638 * A preword is a prefix that's recognized as a word: it |
1600 * that it can be used as hashtable key. | 1639 * contains a word characters folled by a non-word |
1640 * character. | |
1641 * <affadd> is the whole prefix. Separate lead and trail | |
1642 * string, put the word itself at ai_add, so that it can | |
1643 * be used as hashtable key. | |
1601 */ | 1644 */ |
1602 /* lead string: up to first word char */ | 1645 /* lead string: up to first word char */ |
1603 while (*p != NUL && !spell_iswordc(p)) | 1646 while (*p != NUL && !spell_iswordc(p)) |
1604 mb_ptr_adv(p); | 1647 mb_ptr_adv(p); |
1605 ai->ai_leadlen = p - ai->ai_add; | 1648 ai->ai_leadlen = p - ai->ai_add; |
1621 ai->ai_add[l] = NUL; | 1664 ai->ai_add[l] = NUL; |
1622 hash = hash_hash(ai->ai_add); | 1665 hash = hash_hash(ai->ai_add); |
1623 hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash); | 1666 hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash); |
1624 if (HASHITEM_EMPTY(hi)) | 1667 if (HASHITEM_EMPTY(hi)) |
1625 { | 1668 { |
1626 /* First affix with this word, add to hashtable. */ | 1669 /* First preword with this word, add to hashtable. */ |
1627 hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash); | 1670 hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash); |
1628 ai->ai_next = NULL; | 1671 ai->ai_next = NULL; |
1629 } | 1672 } |
1630 else | 1673 else |
1631 { | 1674 { |
1632 /* There already is an affix with this word, link in | 1675 /* There already is a preword with this word, link in |
1633 * the list. */ | 1676 * the list. */ |
1634 ai2 = HI2AI(hi); | 1677 ai2 = HI2AI(hi); |
1635 ai->ai_next = ai2->ai_next; | 1678 ai->ai_next = ai2->ai_next; |
1636 ai2->ai_next = ai; | 1679 ai2->ai_next = ai; |
1637 } | 1680 } |
1658 { | 1701 { |
1659 if (gap->ga_len < addlen) | 1702 if (gap->ga_len < addlen) |
1660 { | 1703 { |
1661 /* Longer affix, need more hashtables. */ | 1704 /* Longer affix, need more hashtables. */ |
1662 if (ga_grow(gap, addlen - gap->ga_len) == FAIL) | 1705 if (ga_grow(gap, addlen - gap->ga_len) == FAIL) |
1663 goto errorend; | 1706 goto endFAIL; |
1664 | 1707 |
1665 /* Re-allocating ga_data means that an ht_array | 1708 /* Re-allocating ga_data means that an ht_array |
1666 * pointing to ht_smallarray becomes invalid. We | 1709 * pointing to ht_smallarray becomes invalid. We |
1667 * can recognize this: ht_mask is at its init | 1710 * can recognize this: ht_mask is at its init |
1668 * value. */ | 1711 * value. */ |
1731 * [<region>] | 1774 * [<region>] |
1732 * [<addcnt> <add> ...] | 1775 * [<addcnt> <add> ...] |
1733 */ | 1776 */ |
1734 /* Use <nr> bytes from the previous word. */ | 1777 /* Use <nr> bytes from the previous word. */ |
1735 wlen = getc(fd); /* <nr> */ | 1778 wlen = getc(fd); /* <nr> */ |
1736 if (wlen == EOF) | 1779 if (wlen < 0) |
1737 { | 1780 { |
1738 if (widx >= wordcount) /* normal way to end the file */ | 1781 if (widx >= wordcount) /* normal way to end the file */ |
1739 break; | 1782 break; |
1740 goto truncerr; | 1783 goto truncerr; |
1741 } | 1784 } |
1742 | 1785 |
1743 /* Read further word bytes until one below 0x20, that must be the | 1786 /* Read further word bytes until one below 0x20, that one must be the |
1744 * flags. Keep this fast! */ | 1787 * flags. Keep this fast! */ |
1745 for (;;) | 1788 for (;;) |
1746 { | 1789 { |
1747 if ((buf[wlen] = getc(fd)) < 0x20) /* <string> */ | 1790 if ((buf[wlen] = getc(fd)) < 0x20) /* <string> */ |
1748 break; | 1791 break; |
1758 | 1801 |
1759 if (flags & BWF_KEEPCAP) | 1802 if (flags & BWF_KEEPCAP) |
1760 { | 1803 { |
1761 /* Read <caselen> and <caseword> first, its length may differ from | 1804 /* Read <caselen> and <caseword> first, its length may differ from |
1762 * the case-folded word. Note: this should only happen after the | 1805 * the case-folded word. Note: this should only happen after the |
1763 * basic word! */ | 1806 * basic word without KEEPCAP! */ |
1764 wlen = getc(fd); | 1807 wlen = getc(fd); |
1765 if (wlen < 0) | 1808 if (wlen < 0) |
1766 goto truncerr; | 1809 goto truncerr; |
1810 if (wlen >= MAXWLEN) | |
1811 goto formerr; | |
1767 for (i = 0; i < wlen; ++i) | 1812 for (i = 0; i < wlen; ++i) |
1768 cbuf[i] = getc(fd); | 1813 cbuf[i] = getc(fd); |
1769 cbuf[i] = NUL; | 1814 cbuf[i] = NUL; |
1770 } | 1815 } |
1771 | 1816 |
1798 | 1843 |
1799 /* Find room to store the word in an fword_T. */ | 1844 /* Find room to store the word in an fword_T. */ |
1800 fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen | 1845 fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen |
1801 + (p - affixbuf)); | 1846 + (p - affixbuf)); |
1802 if (fw == NULL) | 1847 if (fw == NULL) |
1803 goto errorend; | 1848 goto endFAIL; |
1804 mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1); | 1849 mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1); |
1805 | 1850 |
1806 /* Put the affix NRs just after the word, if any. */ | 1851 /* Put the affix NRs just after the word, if any. */ |
1807 if (p > affixbuf) | 1852 if (p > affixbuf) |
1808 mch_memmove(fw->fw_word + wlen + 1, affixbuf, p - affixbuf); | 1853 mch_memmove(fw->fw_word + wlen + 1, affixbuf, p - affixbuf); |
1809 | 1854 |
1810 fw->fw_flags = flags; | 1855 fw->fw_flags = flags; |
1811 fw->fw_prefixcnt = prefixcnt; | 1856 fw->fw_prefixcnt = prefixcnt; |
1812 fw->fw_suffixcnt = suffixcnt; | 1857 fw->fw_suffixcnt = suffixcnt; |
1813 | 1858 |
1859 /* We store the word in the hashtable case-folded. For a KEEPCAP word | |
1860 * the entry must already exist, because fw_word can't be used as the | |
1861 * key, it differs from "buf"! */ | |
1814 hash = hash_hash(buf); | 1862 hash = hash_hash(buf); |
1815 hi = hash_lookup(&lp->sl_words, buf, hash); | 1863 hi = hash_lookup(&lp->sl_words, buf, hash); |
1816 if (HASHITEM_EMPTY(hi)) | 1864 if (HASHITEM_EMPTY(hi)) |
1817 { | 1865 { |
1818 if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL) | 1866 if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL) |
1819 goto errorend; | 1867 goto endFAIL; |
1820 fw->fw_next = NULL; | 1868 fw->fw_next = NULL; |
1821 } | 1869 } |
1822 else | 1870 else |
1823 { | 1871 { |
1824 /* Already have this basic word in the hashtable, this one will | 1872 /* Already have this basic word in the hashtable, this one will |
1825 * have different case flags and/or affixes. */ | 1873 * have different case flags and/or affixes. */ |
1826 fw2 = HI2FWORD(hi); | 1874 fw2 = HI2FWORD(hi); |
1827 fw->fw_next = fw2->fw_next; | 1875 fw->fw_next = fw2->fw_next; |
1828 fw2->fw_next = fw; | 1876 fw2->fw_next = fw; |
1829 --widx; /* don't count this one */ | 1877 --widx; /* don't count this one as a basic word */ |
1830 } | 1878 } |
1831 | 1879 |
1832 if (flags & BWF_REGION) | 1880 if (flags & BWF_REGION) |
1833 fw->fw_region = getc(fd); /* <region> */ | 1881 fw->fw_region = getc(fd); /* <region> */ |
1834 else | 1882 else |
1839 { | 1887 { |
1840 if (flags & BWF_ADDS_M) | 1888 if (flags & BWF_ADDS_M) |
1841 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ | 1889 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ |
1842 else | 1890 else |
1843 adds = getc(fd); /* <addcnt> */ | 1891 adds = getc(fd); /* <addcnt> */ |
1892 if (adds < 0) | |
1893 goto formerr; | |
1844 | 1894 |
1845 if (adds > 30) | 1895 if (adds > 30) |
1846 { | 1896 { |
1847 /* Use a hashtable to loopup the part until the next word end. | 1897 /* Use a hashtable to lookup the part until the next word end. |
1898 * Thus for "de-bur-die" "de" is the basic word, "-bur" is key | |
1899 * in the addition hashtable, "-bur<NUL>die" the whole | |
1900 * addition and "aw_saveb" is '-'. | |
1848 * This uses more memory and involves some overhead, thus only | 1901 * This uses more memory and involves some overhead, thus only |
1849 * do it when there are many additions (e.g., for French). */ | 1902 * do it when there are many additions (e.g., for French). */ |
1850 ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T)); | 1903 ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T)); |
1851 if (ht == NULL) | 1904 if (ht == NULL) |
1852 goto errorend; | 1905 goto endFAIL; |
1853 hash_init(ht); | 1906 hash_init(ht); |
1854 fw->fw_adds = (addword_T *)ht; | 1907 fw->fw_adds = (addword_T *)ht; |
1855 fw->fw_flags |= BWF_ADDHASH; | 1908 fw->fw_flags |= BWF_ADDHASH; |
1856 | 1909 |
1857 /* Preset the size of the hashtable. It's never unlocked. */ | 1910 /* Preset the size of the hashtable. It's never unlocked. */ |
1858 hash_lock_size(ht, adds + 1); | 1911 hash_lock_size(ht, adds + 1); |
1859 } | 1912 } |
1860 else | 1913 else |
1861 ht = NULL; | 1914 ht = NULL; |
1862 | 1915 |
1916 /* | |
1917 * Note: uses cbuf[] to copy bytes from previous addition. | |
1918 */ | |
1863 while (--adds >= 0) | 1919 while (--adds >= 0) |
1864 { | 1920 { |
1865 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] | 1921 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] |
1866 * [<addstring>] [<region>] */ | 1922 * [<addstring>] [<region>] */ |
1867 flags = getc(fd); /* <addflags> */ | 1923 flags = getc(fd); /* <addflags> */ |
1868 addlen = getc(fd); /* <addlen> */ | 1924 addlen = getc(fd); /* <addlen> */ |
1869 if (addlen == EOF) | 1925 if (addlen < 0) |
1870 goto truncerr; | 1926 goto truncerr; |
1871 if (addlen >= MAXWLEN) | 1927 if (addlen >= MAXWLEN) |
1872 goto formerr; | 1928 goto formerr; |
1873 | 1929 |
1874 if (flags & ADD_LEADLEN) | 1930 if (flags & ADD_LEADLEN) |
1931 { | |
1875 leadlen = getc(fd); /* <leadlen> */ | 1932 leadlen = getc(fd); /* <leadlen> */ |
1933 if (leadlen > addlen) | |
1934 goto formerr; | |
1935 } | |
1876 else | 1936 else |
1877 leadlen = 0; | 1937 leadlen = 0; |
1878 | 1938 |
1879 if (addlen > 0) | 1939 if (addlen > 0) |
1880 { | 1940 { |
1889 | 1949 |
1890 if (flags & ADD_KEEPCAP) | 1950 if (flags & ADD_KEEPCAP) |
1891 { | 1951 { |
1892 /* <addstring> is in original case, need to get | 1952 /* <addstring> is in original case, need to get |
1893 * case-folded word too. */ | 1953 * case-folded word too. */ |
1894 (void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN); | 1954 (void)spell_casefold(cbuf, addlen, fbuf, MAXWLEN); |
1895 flen = addlen - leadlen + 1; | 1955 flen = addlen - leadlen + 1; |
1896 addlen = STRLEN(fbuf); | 1956 addlen = STRLEN(fbuf); |
1897 } | 1957 } |
1898 else | 1958 else |
1899 flen = 0; | 1959 flen = 0; |
1900 | 1960 |
1901 aw = (addword_T *)getroom(lp, &bl_used, | 1961 aw = (addword_T *)getroom(lp, &bl_used, |
1902 sizeof(addword_T) + addlen + flen); | 1962 sizeof(addword_T) + addlen + flen); |
1903 if (aw == NULL) | 1963 if (aw == NULL) |
1904 goto errorend; | 1964 goto endFAIL; |
1905 | 1965 |
1906 if (flags & ADD_KEEPCAP) | 1966 if (flags & ADD_KEEPCAP) |
1907 { | 1967 { |
1908 /* Put the addition in original case after the case-folded | 1968 /* Put the addition in original case after the case-folded |
1909 * string. */ | 1969 * string. */ |
1952 { | 2012 { |
1953 /* we use a dummy item as the list header */ | 2013 /* we use a dummy item as the list header */ |
1954 naw = (addword_T *)getroom(lp, &bl_used, | 2014 naw = (addword_T *)getroom(lp, &bl_used, |
1955 sizeof(addword_T) + STRLEN(NOWC_KEY)); | 2015 sizeof(addword_T) + STRLEN(NOWC_KEY)); |
1956 if (naw == NULL) | 2016 if (naw == NULL) |
1957 goto errorend; | 2017 goto endFAIL; |
1958 STRCPY(naw->aw_word, NOWC_KEY); | 2018 STRCPY(naw->aw_word, NOWC_KEY); |
1959 hash_add_item(ht, hi, naw->aw_word, hash); | 2019 hash_add_item(ht, hi, naw->aw_word, hash); |
1960 naw->aw_next = aw; | 2020 naw->aw_next = aw; |
1961 aw->aw_next = NULL; | 2021 aw->aw_next = NULL; |
1962 } | 2022 } |
1992 } | 2052 } |
1993 } | 2053 } |
1994 } | 2054 } |
1995 } | 2055 } |
1996 } | 2056 } |
1997 goto end_OK; | 2057 goto endOK; |
1998 | 2058 |
1999 errorend: | 2059 endFAIL: |
2000 lp->sl_error = TRUE; | 2060 lp->sl_error = TRUE; |
2001 end_OK: | 2061 |
2062 endOK: | |
2002 if (fd != NULL) | 2063 if (fd != NULL) |
2003 fclose(fd); | 2064 fclose(fd); |
2004 hash_unlock(&lp->sl_words); | 2065 hash_unlock(&lp->sl_words); |
2005 sourcing_name = save_sourcing_name; | 2066 sourcing_name = save_sourcing_name; |
2006 sourcing_lnum = save_sourcing_lnum; | 2067 sourcing_lnum = save_sourcing_lnum; |
2185 #ifdef FEAT_MBYTE | 2246 #ifdef FEAT_MBYTE |
2186 c = mb_ptr2char_adv(&p); | 2247 c = mb_ptr2char_adv(&p); |
2187 #else | 2248 #else |
2188 c = *p++; | 2249 c = *p++; |
2189 #endif | 2250 #endif |
2190 firstcap = allcap = MB_ISUPPER(c); | 2251 firstcap = allcap = spell_isupper(c); |
2191 | 2252 |
2192 /* | 2253 /* |
2193 * Need to check all letters to find a word with mixed upper/lower. | 2254 * Need to check all letters to find a word with mixed upper/lower. |
2194 * But a word with an upper char only at start is a ONECAP. | 2255 * But a word with an upper char only at start is a ONECAP. |
2195 */ | 2256 */ |
2199 #ifdef FEAT_MBYTE | 2260 #ifdef FEAT_MBYTE |
2200 c = mb_ptr2char(p); | 2261 c = mb_ptr2char(p); |
2201 #else | 2262 #else |
2202 c = *p; | 2263 c = *p; |
2203 #endif | 2264 #endif |
2204 if (!MB_ISUPPER(c)) | 2265 if (!spell_isupper(c)) |
2205 { | 2266 { |
2206 /* UUl -> KEEPCAP */ | 2267 /* UUl -> KEEPCAP */ |
2207 if (past_second && allcap) | 2268 if (past_second && allcap) |
2208 return BWF_KEEPCAP; | 2269 return BWF_KEEPCAP; |
2209 allcap = FALSE; | 2270 allcap = FALSE; |
2343 basicword_T *bw_cnext; /* next word with same caps */ | 2404 basicword_T *bw_cnext; /* next word with same caps */ |
2344 int bw_flags; /* BWF_ flags */ | 2405 int bw_flags; /* BWF_ flags */ |
2345 garray_T bw_prefix; /* table with prefix numbers */ | 2406 garray_T bw_prefix; /* table with prefix numbers */ |
2346 garray_T bw_suffix; /* table with suffix numbers */ | 2407 garray_T bw_suffix; /* table with suffix numbers */ |
2347 int bw_region; /* region bits */ | 2408 int bw_region; /* region bits */ |
2348 char_u *bw_caseword; /* keep-case word */ | 2409 char_u *bw_caseword; /* keep-case word or NULL */ |
2349 char_u *bw_leadstring; /* must come before bw_word */ | 2410 char_u *bw_leadstring; /* must come before bw_word or NULL */ |
2350 char_u *bw_addstring; /* must come after bw_word */ | 2411 char_u *bw_addstring; /* must come after bw_word or NULL */ |
2351 char_u bw_word[1]; /* actually longer: word case folded */ | 2412 char_u bw_word[1]; /* actually longer: word case folded */ |
2352 }; | 2413 }; |
2353 | 2414 |
2354 static basicword_T dumbw; | 2415 static basicword_T dumbw; |
2355 #define KEY2BW(p) ((basicword_T *)((p) - (dumbw.bw_word - (char_u *)&dumbw))) | 2416 #define KEY2BW(p) ((basicword_T *)((p) - (dumbw.bw_word - (char_u *)&dumbw))) |
2389 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2)); | 2450 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2)); |
2390 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr)); | 2451 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr)); |
2391 static void clear_affhash __ARGS((hashtab_T *ht)); | 2452 static void clear_affhash __ARGS((hashtab_T *ht)); |
2392 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords)); | 2453 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords)); |
2393 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask)); | 2454 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask)); |
2455 static basicword_T *get_basicword __ARGS((char_u *word, int asize)); | |
2394 static void combine_regions __ARGS((hashtab_T *newwords)); | 2456 static void combine_regions __ARGS((hashtab_T *newwords)); |
2395 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw)); | 2457 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw)); |
2396 static void expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap)); | 2458 static int expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap)); |
2397 static void expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae)); | 2459 static int expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae)); |
2398 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); | 2460 static int add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); |
2399 static void put_bytes __ARGS((FILE *fd, long_u nr, int len)); | |
2400 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); | 2461 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); |
2401 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); | 2462 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); |
2402 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); | 2463 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); |
2403 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap)); | 2464 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap)); |
2404 static void free_wordtable __ARGS((hashtab_T *ht)); | 2465 static void free_wordtable __ARGS((hashtab_T *ht)); |
2426 char_u *p; | 2487 char_u *p; |
2427 int lnum = 0; | 2488 int lnum = 0; |
2428 affheader_T *cur_aff = NULL; | 2489 affheader_T *cur_aff = NULL; |
2429 int aff_todo = 0; | 2490 int aff_todo = 0; |
2430 hashtab_T *tp; | 2491 hashtab_T *tp; |
2492 char_u *low = NULL; | |
2493 char_u *fol = NULL; | |
2494 char_u *upp = NULL; | |
2431 | 2495 |
2432 fd = fopen((char *)fname, "r"); | 2496 fd = fopen((char *)fname, "r"); |
2433 if (fd == NULL) | 2497 if (fd == NULL) |
2434 { | 2498 { |
2435 EMSG2(_(e_notopen), fname); | 2499 EMSG2(_(e_notopen), fname); |
2447 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20); | 2511 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20); |
2448 | 2512 |
2449 /* | 2513 /* |
2450 * Read all the lines in the file one by one. | 2514 * Read all the lines in the file one by one. |
2451 */ | 2515 */ |
2452 while (!vim_fgets(rline, MAXLINELEN, fd)) | 2516 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) |
2453 { | 2517 { |
2518 line_breakcheck(); | |
2454 ++lnum; | 2519 ++lnum; |
2455 | 2520 |
2456 /* Skip comment lines. */ | 2521 /* Skip comment lines. */ |
2457 if (*rline == '#') | 2522 if (*rline == '#') |
2458 continue; | 2523 continue; |
2460 /* Convert from "SET" to 'encoding' when needed. */ | 2525 /* Convert from "SET" to 'encoding' when needed. */ |
2461 vim_free(pc); | 2526 vim_free(pc); |
2462 if (conv->vc_type != CONV_NONE) | 2527 if (conv->vc_type != CONV_NONE) |
2463 { | 2528 { |
2464 pc = string_convert(conv, rline, NULL); | 2529 pc = string_convert(conv, rline, NULL); |
2530 if (pc == NULL) | |
2531 { | |
2532 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
2533 fname, lnum, rline); | |
2534 continue; | |
2535 } | |
2465 line = pc; | 2536 line = pc; |
2466 } | 2537 } |
2467 else | 2538 else |
2468 { | 2539 { |
2469 pc = NULL; | 2540 pc = NULL; |
2585 { | 2656 { |
2586 aff_entry->ae_next = cur_aff->ah_first; | 2657 aff_entry->ae_next = cur_aff->ah_first; |
2587 cur_aff->ah_first = aff_entry; | 2658 cur_aff->ah_first = aff_entry; |
2588 } | 2659 } |
2589 } | 2660 } |
2661 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2) | |
2662 { | |
2663 if (fol != NULL) | |
2664 smsg((char_u *)_("Duplicate FOL in %s line %d"), | |
2665 fname, lnum); | |
2666 else | |
2667 fol = vim_strsave(items[1]); | |
2668 } | |
2669 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2) | |
2670 { | |
2671 if (low != NULL) | |
2672 smsg((char_u *)_("Duplicate LOW in %s line %d"), | |
2673 fname, lnum); | |
2674 else | |
2675 low = vim_strsave(items[1]); | |
2676 } | |
2677 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2) | |
2678 { | |
2679 if (upp != NULL) | |
2680 smsg((char_u *)_("Duplicate UPP in %s line %d"), | |
2681 fname, lnum); | |
2682 else | |
2683 upp = vim_strsave(items[1]); | |
2684 } | |
2590 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) | 2685 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) |
2591 /* Ignore REP count */; | 2686 /* Ignore REP count */; |
2592 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3) | 2687 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3) |
2593 { | 2688 { |
2594 repentry_T *rp; | 2689 repentry_T *rp; |
2604 else if (p_verbose > 0) | 2699 else if (p_verbose > 0) |
2605 smsg((char_u *)_("Unrecognized item in %s line %d: %s"), | 2700 smsg((char_u *)_("Unrecognized item in %s line %d: %s"), |
2606 fname, lnum, items[0]); | 2701 fname, lnum, items[0]); |
2607 } | 2702 } |
2608 | 2703 |
2704 } | |
2705 | |
2706 if (fol != NULL || low != NULL || upp != NULL) | |
2707 { | |
2708 if (fol == NULL || low == NULL || upp == NULL) | |
2709 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); | |
2710 else | |
2711 set_spell_chartab(fol, low, upp); | |
2712 | |
2713 vim_free(fol); | |
2714 vim_free(low); | |
2715 vim_free(upp); | |
2609 } | 2716 } |
2610 | 2717 |
2611 vim_free(pc); | 2718 vim_free(pc); |
2612 fclose(fd); | 2719 fclose(fd); |
2613 return aff; | 2720 return aff; |
2718 /* | 2825 /* |
2719 * Read all the lines in the file one by one. | 2826 * Read all the lines in the file one by one. |
2720 * The words are converted to 'encoding' here, before being added to | 2827 * The words are converted to 'encoding' here, before being added to |
2721 * the hashtable. | 2828 * the hashtable. |
2722 */ | 2829 */ |
2723 while (!vim_fgets(line, MAXLINELEN, fd)) | 2830 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) |
2724 { | 2831 { |
2832 line_breakcheck(); | |
2725 ++lnum; | 2833 ++lnum; |
2726 | 2834 |
2727 /* Remove CR, LF and white space from end. */ | 2835 /* Remove CR, LF and white space from end. */ |
2728 l = STRLEN(line); | 2836 l = STRLEN(line); |
2729 while (l > 0 && line[l - 1] <= ' ') | 2837 while (l > 0 && line[l - 1] <= ' ') |
2743 | 2851 |
2744 /* Convert from "SET" to 'encoding' when needed. */ | 2852 /* Convert from "SET" to 'encoding' when needed. */ |
2745 if (conv->vc_type != CONV_NONE) | 2853 if (conv->vc_type != CONV_NONE) |
2746 { | 2854 { |
2747 pc = string_convert(conv, line, NULL); | 2855 pc = string_convert(conv, line, NULL); |
2856 if (pc == NULL) | |
2857 { | |
2858 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
2859 fname, lnum, line); | |
2860 continue; | |
2861 } | |
2748 w = pc; | 2862 w = pc; |
2749 } | 2863 } |
2750 else | 2864 else |
2751 { | 2865 { |
2752 pc = NULL; | 2866 pc = NULL; |
2754 } | 2868 } |
2755 | 2869 |
2756 dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T) | 2870 dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T) |
2757 + STRLEN(w)); | 2871 + STRLEN(w)); |
2758 if (dw == NULL) | 2872 if (dw == NULL) |
2873 { | |
2874 vim_free(pc); | |
2759 break; | 2875 break; |
2876 } | |
2760 STRCPY(dw->dw_word, w); | 2877 STRCPY(dw->dw_word, w); |
2761 vim_free(pc); | 2878 vim_free(pc); |
2762 | 2879 |
2763 hash = hash_hash(dw->dw_word); | 2880 hash = hash_hash(dw->dw_word); |
2764 hi = hash_lookup(ht, dw->dw_word, hash); | 2881 hi = hash_lookup(ht, dw->dw_word, hash); |
3134 hashtab_T *newwords; /* table with words */ | 3251 hashtab_T *newwords; /* table with words */ |
3135 { | 3252 { |
3136 char_u key[2]; | 3253 char_u key[2]; |
3137 char_u *p; | 3254 char_u *p; |
3138 char_u *affnm; | 3255 char_u *affnm; |
3139 garray_T *gap; | 3256 garray_T *gap, *agap; |
3140 hashitem_T *aff_hi; | 3257 hashitem_T *aff_hi; |
3141 affheader_T *ah; | 3258 affheader_T *ah; |
3142 affentry_T *ae; | 3259 affentry_T *ae; |
3143 regmatch_T regmatch; | 3260 regmatch_T regmatch; |
3144 int i; | 3261 int i; |
3145 basicword_T *nbw; | 3262 basicword_T *nbw; |
3146 int alen; | 3263 int alen; |
3147 int wlen; | |
3148 garray_T suffixga; /* list of words with non-word suffixes */ | 3264 garray_T suffixga; /* list of words with non-word suffixes */ |
3149 garray_T prefixga; /* list of words with non-word prefixes */ | 3265 garray_T prefixga; /* list of words with non-word prefixes */ |
3150 char_u nword[MAXWLEN]; | 3266 char_u nword[MAXWLEN]; |
3151 int flags; | 3267 int flags; |
3152 int n; | 3268 int n; |
3177 /* Loop over all the affix entries for this affix name. */ | 3293 /* Loop over all the affix entries for this affix name. */ |
3178 ah = HI2AH(aff_hi); | 3294 ah = HI2AH(aff_hi); |
3179 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | 3295 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) |
3180 { | 3296 { |
3181 /* Setup for regexp matching. Note that we don't ignore case. | 3297 /* Setup for regexp matching. Note that we don't ignore case. |
3182 * This is weird, because he rules in an .aff file don't care | 3298 * This is weird, because the rules in an .aff file don't care |
3183 * about case, but it's necessary for compatibility with Myspell. | 3299 * about case, but it's necessary for compatibility with Myspell. |
3184 */ | 3300 */ |
3185 regmatch.regprog = ae->ae_prog; | 3301 regmatch.regprog = ae->ae_prog; |
3186 regmatch.rm_ic = FALSE; | 3302 regmatch.rm_ic = FALSE; |
3187 if (ae->ae_prog == NULL | 3303 if (ae->ae_prog == NULL |
3188 || vim_regexec(®match, dw->dw_word, (colnr_T)0)) | 3304 || vim_regexec(®match, dw->dw_word, (colnr_T)0)) |
3189 { | 3305 { |
3190 if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL) | 3306 if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL) |
3191 && (gap != &bw->bw_suffix || bw->bw_addstring == NULL)) | 3307 && (gap != &bw->bw_suffix || bw->bw_addstring == NULL)) |
3192 { | 3308 { |
3193 /* Affix has a non-word character and isn't prepended to | 3309 /* |
3310 * Affix has a non-word character and isn't prepended to | |
3194 * leader or appended to addition. Need to use another | 3311 * leader or appended to addition. Need to use another |
3195 * word with an addition. It's a copy of the basicword_T | 3312 * word with a leadstring and/or addstring. |
3196 * "bw". */ | 3313 */ |
3197 if (gap == &bw->bw_suffix) | 3314 if (gap == &bw->bw_suffix || ae->ae_add_nw == NULL) |
3198 { | 3315 { |
3199 alen = ae->ae_add_nw - ae->ae_add; | 3316 /* Suffix or prefix with only non-word chars. |
3200 nbw = (basicword_T *)alloc((unsigned)( | 3317 * Build the new basic word in "nword": Remove chop |
3201 sizeof(basicword_T) + STRLEN(bw->bw_word) | 3318 * string and append/prepend addition. */ |
3202 + alen + 1)); | 3319 if (gap == &bw->bw_suffix) |
3203 if (nbw != NULL) | |
3204 { | 3320 { |
3205 *nbw = *bw; | 3321 /* suffix goes at the end of the word */ |
3206 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); | |
3207 ga_init2(&nbw->bw_suffix, sizeof(short_u), 1); | |
3208 | |
3209 /* Adding the suffix may change the caps. */ | |
3210 STRCPY(nword, dw->dw_word); | 3322 STRCPY(nword, dw->dw_word); |
3211 if (ae->ae_chop != NULL) | 3323 if (ae->ae_chop != NULL) |
3212 { | 3324 { |
3213 /* Remove chop string. */ | 3325 /* Remove chop string. */ |
3214 p = nword + STRLEN(nword); | 3326 p = nword + STRLEN(nword); |
3215 for (i = mb_charlen(ae->ae_chop); i > 0; --i) | 3327 for (i = mb_charlen(ae->ae_chop); i > 0; --i) |
3216 mb_ptr_back(nword, p); | 3328 mb_ptr_back(nword, p); |
3217 *p = NUL; | 3329 *p = NUL; |
3218 } | 3330 } |
3219 STRCAT(nword, ae->ae_add); | 3331 STRCAT(nword, ae->ae_add); |
3220 flags = captype(nword, nword + STRLEN(nword)); | 3332 agap = &suffixga; |
3221 if (flags & BWF_KEEPCAP) | |
3222 { | |
3223 /* "caseword" excludes the addition */ | |
3224 nword[STRLEN(dw->dw_word) + alen] = NUL; | |
3225 nbw->bw_caseword = vim_strsave(nword); | |
3226 } | |
3227 nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP | |
3228 | BWF_KEEPCAP); | |
3229 nbw->bw_flags |= flags; | |
3230 | |
3231 if (bw->bw_leadstring != NULL) | |
3232 nbw->bw_leadstring = | |
3233 vim_strsave(bw->bw_leadstring); | |
3234 nbw->bw_addstring = vim_strsave(ae->ae_add_nw); | |
3235 | |
3236 STRCPY(nbw->bw_word, bw->bw_word); | |
3237 if (alen > 0 || ae->ae_chop != NULL) | |
3238 { | |
3239 /* Suffix starts with word character and/or | |
3240 * chop off something. Append it to the word. | |
3241 * Add new word entry. */ | |
3242 wlen = STRLEN(nbw->bw_word); | |
3243 if (ae->ae_chop != NULL) | |
3244 wlen -= STRLEN(ae->ae_chop); | |
3245 mch_memmove(nbw->bw_word + wlen, ae->ae_add, | |
3246 alen); | |
3247 nbw->bw_word[wlen + alen] = NUL; | |
3248 add_to_wordlist(newwords, nbw); | |
3249 } | |
3250 else | |
3251 /* Basic word is the same, link "nbw" after | |
3252 * "bw". */ | |
3253 bw->bw_next = nbw; | |
3254 | |
3255 /* Remember this word, we need to set bw_prefix | |
3256 * and bw_prefix later. */ | |
3257 if (ga_grow(&suffixga, 1) == OK) | |
3258 ((basicword_T **)suffixga.ga_data) | |
3259 [suffixga.ga_len++] = nbw; | |
3260 } | 3333 } |
3261 } | 3334 else |
3262 else if (ae->ae_add_nw == NULL) | |
3263 { | |
3264 /* Prefix that starts with non-word char(s) and may be | |
3265 * followed by word chars: Make a leadstring and | |
3266 * prepend word chars before the word. */ | |
3267 alen = STRLEN(ae->ae_add_pw); | |
3268 nbw = (basicword_T *)alloc((unsigned)( | |
3269 sizeof(basicword_T) + STRLEN(bw->bw_word) | |
3270 + alen + 1)); | |
3271 if (nbw != NULL) | |
3272 { | 3335 { |
3273 *nbw = *bw; | 3336 /* prefix goes before the word */ |
3274 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); | |
3275 ga_init2(&nbw->bw_suffix, sizeof(short_u), 1); | |
3276 | |
3277 /* Adding the prefix may change the caps. */ | |
3278 STRCPY(nword, ae->ae_add); | 3337 STRCPY(nword, ae->ae_add); |
3279 p = dw->dw_word; | 3338 p = dw->dw_word; |
3280 if (ae->ae_chop != NULL) | 3339 if (ae->ae_chop != NULL) |
3281 /* Skip chop string. */ | 3340 /* Skip chop string. */ |
3282 for (i = mb_charlen(ae->ae_chop); i > 0; --i) | 3341 for (i = mb_charlen(ae->ae_chop); i > 0; --i) |
3283 mb_ptr_adv( p); | 3342 mb_ptr_adv( p); |
3284 STRCAT(nword, p); | 3343 STRCAT(nword, p); |
3285 | 3344 agap = &prefixga; |
3286 flags = captype(nword, nword + STRLEN(nword)); | 3345 } |
3287 if (flags & BWF_KEEPCAP) | 3346 |
3288 /* "caseword" excludes the addition */ | 3347 /* Create a basicword_T from the word. */ |
3289 nbw->bw_caseword = vim_strsave(nword | 3348 nbw = get_basicword(nword, 1); |
3290 + (ae->ae_add_pw - ae->ae_add)); | 3349 if (nbw != NULL) |
3291 else | 3350 { |
3292 nbw->bw_caseword = NULL; | 3351 nbw->bw_region = bw->bw_region; |
3293 nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP | 3352 nbw->bw_flags |= bw->bw_flags |
3294 | BWF_KEEPCAP); | 3353 & ~(BWF_ONECAP | BWF_ALLCAP | BWF_KEEPCAP); |
3295 nbw->bw_flags |= flags; | 3354 |
3296 | 3355 if (STRCMP(bw->bw_word, nbw->bw_word) != 0) |
3297 if (bw->bw_addstring != NULL) | 3356 /* Basic word differs, add new word entry. */ |
3298 nbw->bw_addstring = | 3357 (void)add_to_wordlist(newwords, nbw); |
3299 vim_strsave(bw->bw_addstring); | |
3300 else | |
3301 nbw->bw_addstring = NULL; | |
3302 nbw->bw_leadstring = vim_strnsave(ae->ae_add, | |
3303 ae->ae_add_pw - ae->ae_add); | |
3304 | |
3305 if (alen > 0 || ae->ae_chop != NULL) | |
3306 { | |
3307 /* Prefix ends in word character and/or chop | |
3308 * off something. Prepend it to the word. | |
3309 * Add new word entry. */ | |
3310 STRCPY(nbw->bw_word, ae->ae_add_pw); | |
3311 p = bw->bw_word; | |
3312 if (ae->ae_chop != NULL) | |
3313 p += STRLEN(ae->ae_chop); | |
3314 STRCAT(nbw->bw_word, p); | |
3315 add_to_wordlist(newwords, nbw); | |
3316 } | |
3317 else | 3358 else |
3318 { | 3359 { |
3319 /* Basic word is the same, link "nbw" after | 3360 /* Basic word is the same, link "nbw" after |
3320 * "bw". */ | 3361 * "bw". */ |
3321 STRCPY(nbw->bw_word, bw->bw_word); | 3362 nbw->bw_next = bw->bw_next; |
3322 bw->bw_next = nbw; | 3363 bw->bw_next = nbw; |
3323 } | 3364 } |
3324 | 3365 |
3325 /* Remember this word, we need to set bw_suffix | 3366 /* Remember this word, we need to set bw_prefix |
3326 * and bw_suffix later. */ | 3367 * or bw_suffix later. */ |
3327 if (ga_grow(&prefixga, 1) == OK) | 3368 if (ga_grow(agap, 1) == OK) |
3328 ((basicword_T **)prefixga.ga_data) | 3369 ((basicword_T **)agap->ga_data) |
3329 [prefixga.ga_len++] = nbw; | 3370 [agap->ga_len++] = nbw; |
3330 } | 3371 } |
3331 } | 3372 } |
3332 else | 3373 else |
3333 { | 3374 { |
3334 /* Prefix with both non-word and word characters: Turn | 3375 /* Prefix with both non-word and word characters: Turn |
3343 #ifdef FEAT_MBYTE | 3384 #ifdef FEAT_MBYTE |
3344 n = (*mb_ptr2len_check)(p); | 3385 n = (*mb_ptr2len_check)(p); |
3345 #else | 3386 #else |
3346 n = 1; | 3387 n = 1; |
3347 #endif | 3388 #endif |
3348 (void)str_foldcase(p, n, nword + alen, | 3389 (void)spell_casefold(p, n, nword + alen, |
3349 MAXWLEN - alen); | 3390 MAXWLEN - alen); |
3350 alen += STRLEN(nword + alen); | 3391 alen += STRLEN(nword + alen); |
3351 } | 3392 } |
3352 | 3393 |
3353 /* Allocate a new word entry. */ | 3394 /* Allocate a new word entry. */ |
3391 nbw->bw_leadstring = vim_strnsave(ae->ae_add, | 3432 nbw->bw_leadstring = vim_strnsave(ae->ae_add, |
3392 ae->ae_add_pw - ae->ae_add); | 3433 ae->ae_add_pw - ae->ae_add); |
3393 else | 3434 else |
3394 nbw->bw_leadstring = NULL; | 3435 nbw->bw_leadstring = NULL; |
3395 | 3436 |
3396 add_to_wordlist(newwords, nbw); | 3437 (void)add_to_wordlist(newwords, nbw); |
3397 | 3438 |
3398 /* Remember this word, we need to set bw_suffix | 3439 /* Remember this word, we need to set bw_suffix |
3399 * and bw_suffix later. */ | 3440 * and bw_suffix later. */ |
3400 if (ga_grow(&prefixga, 1) == OK) | 3441 if (ga_grow(&prefixga, 1) == OK) |
3401 ((basicword_T **)prefixga.ga_data) | 3442 ((basicword_T **)prefixga.ga_data) |
3480 { | 3521 { |
3481 int todo; | 3522 int todo; |
3482 hashitem_T *old_hi; | 3523 hashitem_T *old_hi; |
3483 dicword_T *dw; | 3524 dicword_T *dw; |
3484 basicword_T *bw; | 3525 basicword_T *bw; |
3485 char_u foldword[MAXLINELEN]; | |
3486 int leadlen; | |
3487 char_u leadstring[MAXLINELEN]; | |
3488 int addlen; | |
3489 char_u addstring[MAXLINELEN]; | |
3490 int dwlen; | |
3491 char_u *p; | |
3492 int clen; | |
3493 int flags; | |
3494 char_u *cp = NULL; | |
3495 int l; | |
3496 char_u message[MAXLINELEN + MAXWLEN]; | 3526 char_u message[MAXLINELEN + MAXWLEN]; |
3497 | 3527 |
3498 todo = oldwords->ht_used; | 3528 todo = oldwords->ht_used; |
3499 for (old_hi = oldwords->ht_array; todo > 0; ++old_hi) | 3529 for (old_hi = oldwords->ht_array; todo > 0; ++old_hi) |
3500 { | 3530 { |
3517 ui_breakcheck(); | 3547 ui_breakcheck(); |
3518 if (got_int) | 3548 if (got_int) |
3519 break; | 3549 break; |
3520 } | 3550 } |
3521 | 3551 |
3522 /* The basic words are always stored with folded case. */ | 3552 bw = get_basicword(dw->dw_word, 10); |
3523 dwlen = STRLEN(dw->dw_word); | |
3524 (void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN); | |
3525 flags = captype(dw->dw_word, dw->dw_word + dwlen); | |
3526 | |
3527 /* Check for non-word characters before the word. */ | |
3528 clen = 0; | |
3529 leadlen = 0; | |
3530 if (!spell_iswordc(foldword)) | |
3531 { | |
3532 p = foldword; | |
3533 for (;;) | |
3534 { | |
3535 mb_ptr_adv(p); | |
3536 ++clen; | |
3537 if (*p == NUL) /* Only non-word chars (bad word!) */ | |
3538 { | |
3539 if (p_verbose > 0) | |
3540 smsg((char_u *)_("Warning: word without word characters: \"%s\""), | |
3541 foldword); | |
3542 break; | |
3543 } | |
3544 if (spell_iswordc(p)) | |
3545 { | |
3546 /* Move the leader to "leadstring" and remove it from | |
3547 * "foldword". */ | |
3548 leadlen = p - foldword; | |
3549 mch_memmove(leadstring, foldword, leadlen); | |
3550 leadstring[leadlen] = NUL; | |
3551 mch_memmove(foldword, p, STRLEN(p) + 1); | |
3552 break; | |
3553 } | |
3554 } | |
3555 } | |
3556 | |
3557 /* Check for non-word characters after word characters. */ | |
3558 addlen = 0; | |
3559 for (p = foldword; spell_iswordc(p); mb_ptr_adv(p)) | |
3560 { | |
3561 if (*p == NUL) | |
3562 break; | |
3563 ++clen; | |
3564 } | |
3565 if (*p != NUL) | |
3566 { | |
3567 /* Move the addition to "addstring" and truncate "foldword". */ | |
3568 if (flags & BWF_KEEPCAP) | |
3569 { | |
3570 /* Preserve caps, need to skip the right number of | |
3571 * characters in the original word (case folding may | |
3572 * change the byte count). */ | |
3573 l = 0; | |
3574 for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp)) | |
3575 ++l; | |
3576 addlen = STRLEN(cp); | |
3577 mch_memmove(addstring, cp, addlen + 1); | |
3578 } | |
3579 else | |
3580 { | |
3581 addlen = STRLEN(p); | |
3582 mch_memmove(addstring, p, addlen + 1); | |
3583 } | |
3584 *p = NUL; | |
3585 } | |
3586 | |
3587 bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T) | |
3588 + STRLEN(foldword)); | |
3589 if (bw == NULL) | 3553 if (bw == NULL) |
3590 break; | 3554 break; |
3591 STRCPY(bw->bw_word, foldword); | |
3592 bw->bw_region = regionmask; | 3555 bw->bw_region = regionmask; |
3593 | 3556 |
3594 if (leadlen > 0) | 3557 (void)add_to_wordlist(newwords, bw); |
3595 bw->bw_leadstring = vim_strsave(leadstring); | |
3596 else | |
3597 bw->bw_leadstring = NULL; | |
3598 if (addlen > 0) | |
3599 bw->bw_addstring = vim_strsave(addstring); | |
3600 else | |
3601 bw->bw_addstring = NULL; | |
3602 | |
3603 add_to_wordlist(newwords, bw); | |
3604 | |
3605 if (flags & BWF_KEEPCAP) | |
3606 { | |
3607 if (addlen == 0) | |
3608 /* use the whole word */ | |
3609 bw->bw_caseword = vim_strsave(dw->dw_word + leadlen); | |
3610 else | |
3611 /* use only up to the addition */ | |
3612 bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen, | |
3613 cp - dw->dw_word - leadlen); | |
3614 if (bw->bw_caseword == NULL) /* out of memory */ | |
3615 flags &= ~BWF_KEEPCAP; | |
3616 } | |
3617 bw->bw_flags = flags; | |
3618 | 3558 |
3619 /* Deal with any affix names on the old word, translate them | 3559 /* Deal with any affix names on the old word, translate them |
3620 * into affix numbers. */ | 3560 * into affix numbers. */ |
3621 ga_init2(&bw->bw_prefix, sizeof(short_u), 10); | |
3622 ga_init2(&bw->bw_suffix, sizeof(short_u), 10); | |
3623 if (dw->dw_affnm != NULL) | 3561 if (dw->dw_affnm != NULL) |
3624 trans_affixes(dw, bw, oldaff, newwords); | 3562 trans_affixes(dw, bw, oldaff, newwords); |
3625 } | 3563 } |
3626 } | 3564 } |
3627 if (todo > 0) | 3565 if (todo > 0) |
3628 return FAIL; | 3566 return FAIL; |
3629 return OK; | 3567 return OK; |
3568 } | |
3569 | |
3570 /* | |
3571 * Get a basicword_T from a word in original case. | |
3572 * Caller must set bw_region. | |
3573 * Returns NULL when something fails. | |
3574 */ | |
3575 static basicword_T * | |
3576 get_basicword(word, asize) | |
3577 char_u *word; | |
3578 int asize; /* growsize for affix garray */ | |
3579 { | |
3580 int dwlen; | |
3581 char_u foldword[MAXLINELEN]; | |
3582 int flags; | |
3583 int clen; | |
3584 int leadlen; | |
3585 char_u *p; | |
3586 char_u leadstring[MAXLINELEN]; | |
3587 int addlen; | |
3588 char_u addstring[MAXLINELEN]; | |
3589 char_u *cp = NULL; | |
3590 int l; | |
3591 basicword_T *bw; | |
3592 | |
3593 /* The basic words are always stored with folded case. */ | |
3594 dwlen = STRLEN(word); | |
3595 (void)spell_casefold(word, dwlen, foldword, MAXLINELEN); | |
3596 flags = captype(word, word + dwlen); | |
3597 | |
3598 /* Check for non-word characters before the word. */ | |
3599 clen = 0; | |
3600 leadlen = 0; | |
3601 if (!spell_iswordc(foldword)) | |
3602 { | |
3603 p = foldword; | |
3604 for (;;) | |
3605 { | |
3606 mb_ptr_adv(p); | |
3607 ++clen; | |
3608 if (*p == NUL) /* Only non-word chars (bad word!) */ | |
3609 { | |
3610 if (p_verbose > 0) | |
3611 smsg((char_u *)_("Warning: word without word characters: \"%s\""), | |
3612 foldword); | |
3613 break; | |
3614 } | |
3615 if (spell_iswordc(p)) | |
3616 { | |
3617 /* Move the leader to "leadstring" and remove it from | |
3618 * "foldword". */ | |
3619 leadlen = p - foldword; | |
3620 mch_memmove(leadstring, foldword, leadlen); | |
3621 leadstring[leadlen] = NUL; | |
3622 mch_memmove(foldword, p, STRLEN(p) + 1); | |
3623 break; | |
3624 } | |
3625 } | |
3626 } | |
3627 | |
3628 /* Check for non-word characters after word characters. */ | |
3629 addlen = 0; | |
3630 for (p = foldword; spell_iswordc(p); mb_ptr_adv(p)) | |
3631 { | |
3632 if (*p == NUL) | |
3633 break; | |
3634 ++clen; | |
3635 } | |
3636 if (*p != NUL) | |
3637 { | |
3638 /* Move the addition to "addstring" and truncate "foldword". */ | |
3639 if (flags & BWF_KEEPCAP) | |
3640 { | |
3641 /* Preserve caps, need to skip the right number of | |
3642 * characters in the original word (case folding may | |
3643 * change the byte count). */ | |
3644 l = 0; | |
3645 for (cp = word; l < clen; mb_ptr_adv(cp)) | |
3646 ++l; | |
3647 addlen = STRLEN(cp); | |
3648 mch_memmove(addstring, cp, addlen + 1); | |
3649 } | |
3650 else | |
3651 { | |
3652 addlen = STRLEN(p); | |
3653 mch_memmove(addstring, p, addlen + 1); | |
3654 } | |
3655 *p = NUL; | |
3656 } | |
3657 | |
3658 bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T) | |
3659 + STRLEN(foldword)); | |
3660 if (bw == NULL) | |
3661 return NULL; | |
3662 | |
3663 STRCPY(bw->bw_word, foldword); | |
3664 | |
3665 if (leadlen > 0) | |
3666 bw->bw_leadstring = vim_strsave(leadstring); | |
3667 else | |
3668 bw->bw_leadstring = NULL; | |
3669 if (addlen > 0) | |
3670 bw->bw_addstring = vim_strsave(addstring); | |
3671 else | |
3672 bw->bw_addstring = NULL; | |
3673 | |
3674 if (flags & BWF_KEEPCAP) | |
3675 { | |
3676 if (addlen == 0) | |
3677 /* use the whole word */ | |
3678 bw->bw_caseword = vim_strsave(word + leadlen); | |
3679 else | |
3680 /* use only up to the addition */ | |
3681 bw->bw_caseword = vim_strnsave(word + leadlen, | |
3682 cp - word - leadlen); | |
3683 } | |
3684 | |
3685 bw->bw_flags = flags; | |
3686 ga_init2(&bw->bw_prefix, sizeof(short_u), asize); | |
3687 ga_init2(&bw->bw_suffix, sizeof(short_u), asize); | |
3688 | |
3689 return bw; | |
3630 } | 3690 } |
3631 | 3691 |
3632 /* | 3692 /* |
3633 * Go through the list of words and combine the ones that are identical except | 3693 * Go through the list of words and combine the ones that are identical except |
3634 * for the region. | 3694 * for the region. |
3660 && (bw->bw_leadstring == NULL) | 3720 && (bw->bw_leadstring == NULL) |
3661 == (nbw->bw_leadstring == NULL) | 3721 == (nbw->bw_leadstring == NULL) |
3662 && (bw->bw_addstring == NULL) | 3722 && (bw->bw_addstring == NULL) |
3663 == (nbw->bw_addstring == NULL) | 3723 == (nbw->bw_addstring == NULL) |
3664 && ((bw->bw_flags & BWF_KEEPCAP) == 0 | 3724 && ((bw->bw_flags & BWF_KEEPCAP) == 0 |
3665 || (STRCMP(bw->bw_caseword, | 3725 || bw->bw_caseword == NULL |
3666 nbw->bw_caseword) == 0)) | 3726 || nbw->bw_caseword == NULL |
3727 || STRCMP(bw->bw_caseword, | |
3728 nbw->bw_caseword) == 0) | |
3667 && (bw->bw_leadstring == NULL | 3729 && (bw->bw_leadstring == NULL |
3668 || (STRCMP(bw->bw_leadstring, | 3730 || STRCMP(bw->bw_leadstring, |
3669 nbw->bw_leadstring) == 0)) | 3731 nbw->bw_leadstring) == 0) |
3670 && (bw->bw_addstring == NULL | 3732 && (bw->bw_addstring == NULL |
3671 || (STRCMP(bw->bw_addstring, | 3733 || STRCMP(bw->bw_addstring, |
3672 nbw->bw_addstring) == 0)) | 3734 nbw->bw_addstring) == 0) |
3673 && same_affixes(bw, nbw) | 3735 && same_affixes(bw, nbw) |
3674 ) | 3736 ) |
3675 { | 3737 { |
3676 /* Match, combine regions and delete "nbw". */ | 3738 /* Match, combine regions and delete "nbw". */ |
3677 pbw->bw_next = nbw->bw_next; | 3739 pbw->bw_next = nbw->bw_next; |
3714 * The result is that no affixes apply to the additions or leadstring of a | 3776 * The result is that no affixes apply to the additions or leadstring of a |
3715 * word. | 3777 * word. |
3716 * This is also needed when a word with an addition has a prefix and the word | 3778 * This is also needed when a word with an addition has a prefix and the word |
3717 * with prefix also exists. E.g., "blurp's/D" (D is prefix "de") and | 3779 * with prefix also exists. E.g., "blurp's/D" (D is prefix "de") and |
3718 * "deblurp". "deblurp" would match and no prefix would be tried. | 3780 * "deblurp". "deblurp" would match and no prefix would be tried. |
3719 */ | 3781 * |
3720 static void | 3782 * Returns FAIL when out of memory. |
3783 */ | |
3784 static int | |
3721 expand_affixes(newwords, prefgap, suffgap) | 3785 expand_affixes(newwords, prefgap, suffgap) |
3722 hashtab_T *newwords; | 3786 hashtab_T *newwords; |
3723 garray_T *prefgap; | 3787 garray_T *prefgap; |
3724 garray_T *suffgap; | 3788 garray_T *suffgap; |
3725 { | 3789 { |
3729 int pi, si; | 3793 int pi, si; |
3730 affentry_T *pae, *sae; | 3794 affentry_T *pae, *sae; |
3731 garray_T add_words; | 3795 garray_T add_words; |
3732 int n; | 3796 int n; |
3733 char_u message[MAXLINELEN + MAXWLEN]; | 3797 char_u message[MAXLINELEN + MAXWLEN]; |
3798 int retval = OK; | |
3734 | 3799 |
3735 ga_init2(&add_words, sizeof(basicword_T *), 10); | 3800 ga_init2(&add_words, sizeof(basicword_T *), 10); |
3736 | 3801 |
3737 todo = newwords->ht_used; | 3802 todo = newwords->ht_used; |
3738 for (hi = newwords->ht_array; todo > 0; ++hi) | 3803 for (hi = newwords->ht_array; todo > 0; ++hi) |
3804 */ | 3869 */ |
3805 do | 3870 do |
3806 { | 3871 { |
3807 /* Expand the word for this combination of | 3872 /* Expand the word for this combination of |
3808 * prefixes and affixes. */ | 3873 * prefixes and affixes. */ |
3809 expand_one_aff(bw, &add_words, pae, sae); | 3874 if (expand_one_aff(bw, &add_words, |
3875 pae, sae) == FAIL) | |
3876 { | |
3877 retval = FAIL; | |
3878 goto theend; | |
3879 } | |
3810 | 3880 |
3811 /* Advance to next suffix entry, if there | 3881 /* Advance to next suffix entry, if there |
3812 * is one. */ | 3882 * is one. */ |
3813 if (sae != NULL) | 3883 if (sae != NULL) |
3814 sae = sae->ae_next; | 3884 sae = sae->ae_next; |
3829 /* | 3899 /* |
3830 * Add the new words afterwards, can't change "newwords" while going over | 3900 * Add the new words afterwards, can't change "newwords" while going over |
3831 * all its items. | 3901 * all its items. |
3832 */ | 3902 */ |
3833 for (pi = 0; pi < add_words.ga_len; ++pi) | 3903 for (pi = 0; pi < add_words.ga_len; ++pi) |
3834 add_to_wordlist(newwords, ((basicword_T **)add_words.ga_data)[pi]); | 3904 { |
3835 | 3905 retval = add_to_wordlist(newwords, |
3906 ((basicword_T **)add_words.ga_data)[pi]); | |
3907 if (retval == FAIL) | |
3908 break; | |
3909 } | |
3910 | |
3911 theend: | |
3836 ga_clear(&add_words); | 3912 ga_clear(&add_words); |
3913 return retval; | |
3837 } | 3914 } |
3838 | 3915 |
3839 /* | 3916 /* |
3840 * Add one word to "add_words" for basic word "bw" with additions, adding | 3917 * Add one word to "add_words" for basic word "bw" with additions, adding |
3841 * prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL. | 3918 * prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL. |
3842 * Don't do this when not necessary: | 3919 * Don't do this when not necessary: |
3843 * - no leadstring and adding prefix doesn't result in existing word. | 3920 * - no leadstring and adding prefix doesn't result in existing word. |
3844 */ | 3921 * Returns FAIL when out of memory. |
3845 static void | 3922 */ |
3923 static int | |
3846 expand_one_aff(bw, add_words, pae, sae) | 3924 expand_one_aff(bw, add_words, pae, sae) |
3847 basicword_T *bw; | 3925 basicword_T *bw; |
3848 garray_T *add_words; | 3926 garray_T *add_words; |
3849 affentry_T *pae; | 3927 affentry_T *pae; |
3850 affentry_T *sae; | 3928 affentry_T *sae; |
3871 | 3949 |
3872 /* Copy the body of the word. */ | 3950 /* Copy the body of the word. */ |
3873 STRCPY(word + l, bw->bw_word + choplen); | 3951 STRCPY(word + l, bw->bw_word + choplen); |
3874 | 3952 |
3875 /* Do the same for bw_caseword, if it's there. */ | 3953 /* Do the same for bw_caseword, if it's there. */ |
3876 if (bw->bw_flags & BWF_KEEPCAP) | 3954 if ((bw->bw_flags & BWF_KEEPCAP) && bw->bw_caseword != NULL) |
3877 { | 3955 { |
3878 if (l > 0) | 3956 if (l > 0) |
3879 mch_memmove(caseword, pae->ae_add, l); | 3957 mch_memmove(caseword, pae->ae_add, l); |
3880 STRCPY(caseword + l, bw->bw_caseword + choplen); | 3958 STRCPY(caseword + l, bw->bw_caseword + choplen); |
3881 } | 3959 } |
3905 } | 3983 } |
3906 } | 3984 } |
3907 | 3985 |
3908 nbw = (basicword_T *)alloc_clear((unsigned) | 3986 nbw = (basicword_T *)alloc_clear((unsigned) |
3909 sizeof(basicword_T) + STRLEN(word)); | 3987 sizeof(basicword_T) + STRLEN(word)); |
3910 if (nbw != NULL) | 3988 if (nbw == NULL) |
3911 { | 3989 return FAIL; |
3912 /* Add the new word to the list of words to be added later. */ | 3990 |
3913 if (ga_grow(add_words, 1) == FAIL) | 3991 /* Add the new word to the list of words to be added later. */ |
3914 { | 3992 if (ga_grow(add_words, 1) == FAIL) |
3915 vim_free(nbw); | 3993 { |
3916 return; | 3994 vim_free(nbw); |
3917 } | 3995 return FAIL; |
3918 ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw; | 3996 } |
3919 | 3997 ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw; |
3920 /* Copy the (modified) basic word, flags and region. */ | 3998 |
3921 STRCPY(nbw->bw_word, word); | 3999 /* Copy the (modified) basic word, flags and region. */ |
3922 nbw->bw_flags = bw->bw_flags; | 4000 STRCPY(nbw->bw_word, word); |
3923 nbw->bw_region = bw->bw_region; | 4001 nbw->bw_flags = bw->bw_flags; |
3924 | 4002 nbw->bw_region = bw->bw_region; |
3925 /* Set the (modified) caseword. */ | 4003 |
3926 if (bw->bw_flags & BWF_KEEPCAP) | 4004 /* Set the (modified) caseword. */ |
3927 if ((nbw->bw_caseword = vim_strsave(caseword)) == NULL) | 4005 if (bw->bw_flags & BWF_KEEPCAP) |
3928 nbw->bw_flags &= ~BWF_KEEPCAP; | 4006 nbw->bw_caseword = vim_strsave(caseword); |
3929 | 4007 else |
3930 if (bw->bw_leadstring != NULL) | 4008 nbw->bw_caseword = NULL; |
3931 { | 4009 |
3932 if (pae != NULL) | 4010 if (bw->bw_leadstring != NULL) |
3933 { | 4011 { |
3934 /* Prepend prefix to leadstring. */ | 4012 if (pae != NULL) |
3935 ll = STRLEN(bw->bw_leadstring); | 4013 { |
3936 l = choplen = 0; | 4014 /* Prepend prefix to leadstring. */ |
3937 if (pae->ae_add != NULL) | 4015 ll = STRLEN(bw->bw_leadstring); |
3938 l = STRLEN(pae->ae_add); | 4016 l = choplen = 0; |
3939 if (pae->ae_chop != NULL) | 4017 if (pae->ae_add != NULL) |
3940 { | 4018 l = STRLEN(pae->ae_add); |
3941 choplen = STRLEN(pae->ae_chop); | 4019 if (pae->ae_chop != NULL) |
3942 if (choplen > ll) /* TODO: error? */ | 4020 { |
3943 choplen = ll; | 4021 choplen = STRLEN(pae->ae_chop); |
3944 } | 4022 if (choplen > ll) /* TODO: error? */ |
3945 nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1)); | 4023 choplen = ll; |
3946 if (nbw->bw_leadstring != NULL) | 4024 } |
3947 { | 4025 nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1)); |
3948 if (l > 0) | 4026 if (nbw->bw_leadstring != NULL) |
3949 mch_memmove(nbw->bw_leadstring, pae->ae_add, l); | 4027 { |
3950 STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen); | 4028 if (l > 0) |
3951 } | 4029 mch_memmove(nbw->bw_leadstring, pae->ae_add, l); |
3952 } | 4030 STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen); |
4031 } | |
4032 } | |
4033 else | |
4034 nbw->bw_leadstring = vim_strsave(bw->bw_leadstring); | |
4035 } | |
4036 else if (bw->bw_prefix.ga_len > 0) | |
4037 { | |
4038 /* There is no leadstring, copy the list of possible prefixes. */ | |
4039 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); | |
4040 if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK) | |
4041 { | |
4042 mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data, | |
4043 bw->bw_prefix.ga_len * sizeof(short_u)); | |
4044 nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len; | |
4045 } | |
4046 } | |
4047 | |
4048 if (bw->bw_addstring != NULL) | |
4049 { | |
4050 if (sae != NULL) | |
4051 { | |
4052 /* Append suffix to addstring. */ | |
4053 l = STRLEN(bw->bw_addstring); | |
4054 if (sae->ae_chop != NULL) | |
4055 { | |
4056 l -= STRLEN(sae->ae_chop); | |
4057 if (l < 0) /* TODO: error? */ | |
4058 l = 0; | |
4059 } | |
4060 if (sae->ae_add == NULL) | |
4061 ll = 0; | |
3953 else | 4062 else |
3954 nbw->bw_leadstring = vim_strsave(bw->bw_leadstring); | 4063 ll = STRLEN(sae->ae_add); |
3955 } | 4064 nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1)); |
3956 else if (bw->bw_prefix.ga_len > 0) | 4065 if (nbw->bw_addstring != NULL) |
3957 { | 4066 { |
3958 /* There is no leadstring, copy the list of possible prefixes. */ | 4067 STRCPY(nbw->bw_addstring, bw->bw_addstring); |
3959 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1); | |
3960 if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK) | |
3961 { | |
3962 mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data, | |
3963 bw->bw_prefix.ga_len * sizeof(short_u)); | |
3964 nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len; | |
3965 } | |
3966 } | |
3967 | |
3968 if (bw->bw_addstring != NULL) | |
3969 { | |
3970 if (sae != NULL) | |
3971 { | |
3972 /* Append suffix to addstring. */ | |
3973 l = STRLEN(bw->bw_addstring); | |
3974 if (sae->ae_chop != NULL) | |
3975 { | |
3976 l -= STRLEN(sae->ae_chop); | |
3977 if (l < 0) /* TODO: error? */ | |
3978 l = 0; | |
3979 } | |
3980 if (sae->ae_add == NULL) | 4068 if (sae->ae_add == NULL) |
3981 ll = 0; | 4069 nbw->bw_addstring[l] = NUL; |
3982 else | 4070 else |
3983 ll = STRLEN(sae->ae_add); | 4071 STRCPY(nbw->bw_addstring + l, sae->ae_add); |
3984 nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1)); | 4072 } |
3985 if (nbw->bw_addstring != NULL) | 4073 } |
3986 { | 4074 else |
3987 STRCPY(nbw->bw_addstring, bw->bw_addstring); | 4075 nbw->bw_addstring = vim_strsave(bw->bw_addstring); |
3988 if (sae->ae_add == NULL) | 4076 } |
3989 nbw->bw_addstring[l] = NUL; | 4077 |
3990 else | 4078 return OK; |
3991 STRCPY(nbw->bw_addstring + l, sae->ae_add); | |
3992 } | |
3993 } | |
3994 else | |
3995 nbw->bw_addstring = vim_strsave(bw->bw_addstring); | |
3996 } | |
3997 } | |
3998 } | 4079 } |
3999 | 4080 |
4000 /* | 4081 /* |
4001 * Add basicword_T "*bw" to wordlist "newwords". | 4082 * Add basicword_T "*bw" to wordlist "newwords". |
4002 */ | 4083 */ |
4003 static void | 4084 static int |
4004 add_to_wordlist(newwords, bw) | 4085 add_to_wordlist(newwords, bw) |
4005 hashtab_T *newwords; | 4086 hashtab_T *newwords; |
4006 basicword_T *bw; | 4087 basicword_T *bw; |
4007 { | 4088 { |
4008 hashitem_T *hi; | 4089 hashitem_T *hi; |
4009 basicword_T *bw2; | 4090 basicword_T *bw2; |
4091 int retval = OK; | |
4010 | 4092 |
4011 hi = hash_find(newwords, bw->bw_word); | 4093 hi = hash_find(newwords, bw->bw_word); |
4012 if (HASHITEM_EMPTY(hi)) | 4094 if (HASHITEM_EMPTY(hi)) |
4013 { | 4095 { |
4014 /* New entry, add to hashlist. */ | 4096 /* New entry, add to hashlist. */ |
4015 hash_add(newwords, bw->bw_word); | 4097 retval = hash_add(newwords, bw->bw_word); |
4016 bw->bw_next = NULL; | 4098 bw->bw_next = NULL; |
4017 } | 4099 } |
4018 else | 4100 else |
4019 { | 4101 { |
4020 /* Existing entry, append to list of basic words. */ | 4102 /* Existing entry, append to list of basic words. */ |
4021 bw2 = HI2BW(hi); | 4103 bw2 = HI2BW(hi); |
4022 bw->bw_next = bw2->bw_next; | 4104 bw->bw_next = bw2->bw_next; |
4023 bw2->bw_next = bw; | 4105 bw2->bw_next = bw; |
4024 } | 4106 } |
4107 return retval; | |
4025 } | 4108 } |
4026 | 4109 |
4027 /* | 4110 /* |
4028 * Write a number to file "fd", MSB first, in "len" bytes. | 4111 * Write a number to file "fd", MSB first, in "len" bytes. |
4029 */ | 4112 */ |
4030 static void | 4113 void |
4031 put_bytes(fd, nr, len) | 4114 put_bytes(fd, nr, len) |
4032 FILE *fd; | 4115 FILE *fd; |
4033 long_u nr; | 4116 long_u nr; |
4034 int len; | 4117 int len; |
4035 { | 4118 { |
4103 /* | 4186 /* |
4104 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> | 4187 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> |
4105 * <SUGGEST> <WORDLIST> | 4188 * <SUGGEST> <WORDLIST> |
4106 * | 4189 * |
4107 * <HEADER>: <fileID> <regioncnt> <regionname> ... | 4190 * <HEADER>: <fileID> <regioncnt> <regionname> ... |
4191 * <charflagslen> <charflags> <fcharslen> <fchars> | |
4108 * | 4192 * |
4109 * <fileID> 10 bytes "VIMspell03" | 4193 * <fileID> 10 bytes "VIMspell04" |
4110 * <regioncnt> 1 byte number of regions following (8 supported) | 4194 * <regioncnt> 1 byte number of regions following (8 supported) |
4111 * <regionname> 2 bytes Region name: ca, au, etc. | 4195 * <regionname> 2 bytes Region name: ca, au, etc. |
4112 * First <regionname> is region 1. | 4196 * First <regionname> is region 1. |
4113 * | 4197 * |
4198 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). | |
4199 * <charflags> N bytes List of flags (first one is for character 128): | |
4200 * 0x01 word character | |
4201 * 0x01 upper-case character | |
4202 * <fcharslen> 2 bytes Number of bytes in <fchars>. | |
4203 * <fchars> N bytes Folded characters, first one is for character 128. | |
4114 * | 4204 * |
4115 * <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... | 4205 * |
4116 * <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... | 4206 * <PREFIXLIST>: <affcount> <affix> ... |
4207 * <SUFFIXLIST>: <affcount> <affix> ... | |
4117 * list of possible affixes: prefixes and suffixes. | 4208 * list of possible affixes: prefixes and suffixes. |
4118 * | 4209 * |
4119 * <affcount> 2 bytes Number of affixes (MSB comes first). | 4210 * <affcount> 2 bytes Number of affixes (MSB comes first). |
4120 * When more than 256 an affixNR is 2 bytes. | 4211 * When more than 256 an affixNR is 2 bytes. |
4121 * This is separate for prefixes and suffixes! | 4212 * This is separate for prefixes and suffixes! |
4122 * First affixNR is 0. | 4213 * First affixNR is 0. |
4123 * <afftotcnt> 2 bytes Total number of affix items (MSB comes first). | |
4124 * | 4214 * |
4125 * <affix>: <affitemcnt> <affitem> ... | 4215 * <affix>: <affitemcnt> <affitem> ... |
4126 * | 4216 * |
4127 * <affitemcnt> 2 bytes Number of affixes with this affixNR (MSB first). | 4217 * <affitemcnt> 2 bytes Number of affixes with this affixNR (MSB first). |
4128 * | 4218 * |
4226 char_u **wtab; | 4316 char_u **wtab; |
4227 int todo; | 4317 int todo; |
4228 int flags, aflags; | 4318 int flags, aflags; |
4229 basicword_T *bw, *bwf, *bw2 = NULL; | 4319 basicword_T *bw, *bwf, *bw2 = NULL; |
4230 int i; | 4320 int i; |
4231 int cnt; | |
4232 affentry_T *ae; | |
4233 int round; | 4321 int round; |
4234 garray_T bwga; | 4322 garray_T bwga; |
4235 | 4323 |
4236 vim_memset(&wif, 0, sizeof(winfo_T)); | 4324 vim_memset(&wif, 0, sizeof(winfo_T)); |
4237 | 4325 |
4240 { | 4328 { |
4241 EMSG2(_(e_notopen), fname); | 4329 EMSG2(_(e_notopen), fname); |
4242 return; | 4330 return; |
4243 } | 4331 } |
4244 | 4332 |
4245 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); | 4333 /* <HEADER>: <fileID> <regioncnt> <regionname> ... |
4334 * <charflagslen> <charflags> <fcharslen> <fchars> */ | |
4335 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); /* <fileID> */ | |
4246 | 4336 |
4247 /* write the region names if there is more than one */ | 4337 /* write the region names if there is more than one */ |
4248 if (regcount > 1) | 4338 if (regcount > 1) |
4249 { | 4339 { |
4250 putc(regcount, wif.wif_fd); | 4340 putc(regcount, wif.wif_fd); /* <regioncnt> <regionname> ... */ |
4251 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd); | 4341 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd); |
4252 wif.wif_regionmask = (1 << regcount) - 1; | 4342 wif.wif_regionmask = (1 << regcount) - 1; |
4253 } | 4343 } |
4254 else | 4344 else |
4255 { | 4345 { |
4256 putc(0, wif.wif_fd); | 4346 putc(0, wif.wif_fd); |
4257 wif.wif_regionmask = 0; | 4347 wif.wif_regionmask = 0; |
4258 } | 4348 } |
4259 | 4349 |
4260 /* Write the prefix and suffix lists. */ | 4350 /* Write the table with character flags and table for case folding. |
4351 * <charflagslen> <charflags> <fcharlen> <fchars> */ | |
4352 write_spell_chartab(wif.wif_fd); | |
4353 | |
4354 /* <PREFIXLIST>: <affcount> <affix> ... | |
4355 * <SUFFIXLIST>: <affcount> <affix> ... */ | |
4261 for (round = 1; round <= 2; ++round) | 4356 for (round = 1; round <= 2; ++round) |
4262 { | 4357 { |
4263 gap = round == 1 ? prefga : suffga; | 4358 gap = round == 1 ? prefga : suffga; |
4264 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */ | 4359 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */ |
4265 | |
4266 /* Count the total number of affix items. */ | |
4267 cnt = 0; | |
4268 for (i = 0; i < gap->ga_len; ++i) | |
4269 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first; | |
4270 ae != NULL; ae = ae->ae_next) | |
4271 ++cnt; | |
4272 put_bytes(wif.wif_fd, (long_u)cnt, 2); /* <afftotcnt> */ | |
4273 | 4360 |
4274 for (i = 0; i < gap->ga_len; ++i) | 4361 for (i = 0; i < gap->ga_len; ++i) |
4275 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i); | 4362 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i); |
4276 } | 4363 } |
4277 | 4364 |
4278 /* Number of bytes used for affix NR depends on affix count. */ | 4365 /* Number of bytes used for affix NR depends on affix count. */ |
4279 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1; | 4366 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1; |
4280 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1; | 4367 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1; |
4281 | 4368 |
4282 /* Write the suggest info. TODO */ | 4369 /* <SUGGEST> : <suggestlen> <more> ... |
4283 put_bytes(wif.wif_fd, 0L, 4); | 4370 * TODO. Only write a zero length for now. */ |
4371 put_bytes(wif.wif_fd, 0L, 4); /* <suggestlen> */ | |
4284 | 4372 |
4285 /* | 4373 /* |
4286 * Write the word list. <wordcount> <worditem> ... | 4374 * <WORDLIST>: <wordcount> <worditem> ... |
4287 */ | 4375 */ |
4376 | |
4288 /* number of basic words in 4 bytes */ | 4377 /* number of basic words in 4 bytes */ |
4289 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */ | 4378 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */ |
4290 | 4379 |
4291 /* | 4380 /* |
4292 * Sort the word list, so that we can copy as many bytes as possible from | 4381 * Sort the word list, so that we can copy as many bytes as possible from |
4331 bw2 = ((basicword_T **)bwga.ga_data)[i]; | 4420 bw2 = ((basicword_T **)bwga.ga_data)[i]; |
4332 aflags = bw2->bw_flags & (BWF_ONECAP | BWF_KEEPCAP | 4421 aflags = bw2->bw_flags & (BWF_ONECAP | BWF_KEEPCAP |
4333 | BWF_ALLCAP); | 4422 | BWF_ALLCAP); |
4334 if (flags == aflags | 4423 if (flags == aflags |
4335 && ((flags & BWF_KEEPCAP) == 0 | 4424 && ((flags & BWF_KEEPCAP) == 0 |
4336 || (STRCMP(bw->bw_caseword, | 4425 || bw->bw_caseword == NULL |
4337 bw2->bw_caseword) == 0)) | 4426 || bw2->bw_caseword == NULL |
4427 || STRCMP(bw->bw_caseword, | |
4428 bw2->bw_caseword) == 0) | |
4338 && same_affixes(bw, bw2)) | 4429 && same_affixes(bw, bw2)) |
4339 break; | 4430 break; |
4340 } | 4431 } |
4341 if (i == bwga.ga_len) | 4432 if (i == bwga.ga_len) |
4342 { | 4433 { |
4383 write_bword(&wif, bw2, FALSE); | 4474 write_bword(&wif, bw2, FALSE); |
4384 } | 4475 } |
4385 } | 4476 } |
4386 | 4477 |
4387 ga_clear(&bwga); | 4478 ga_clear(&bwga); |
4479 vim_free(wtab); | |
4388 } | 4480 } |
4389 | 4481 |
4390 fclose(wif.wif_fd); | 4482 fclose(wif.wif_fd); |
4391 | 4483 |
4392 /* Print a few statistics. */ | 4484 /* Print a few statistics. */ |
4546 | 4638 |
4547 /* First dummy word doesn't need anything but flags. */ | 4639 /* First dummy word doesn't need anything but flags. */ |
4548 if (lowcap) | 4640 if (lowcap) |
4549 return; | 4641 return; |
4550 | 4642 |
4551 if (flags & BWF_KEEPCAP) | 4643 if ((flags & BWF_KEEPCAP) && bw->bw_caseword != NULL) |
4552 { | 4644 { |
4553 len = STRLEN(bw->bw_caseword); | 4645 len = STRLEN(bw->bw_caseword); |
4554 putc(len, fd); /* <caselen> */ | 4646 putc(len, fd); /* <caselen> */ |
4555 for (i = 0; i < len; ++i) | 4647 for (i = 0; i < len; ++i) |
4556 putc(bw->bw_caseword[i], fd); /* <caseword> */ | 4648 putc(bw->bw_caseword[i], fd); /* <caseword> */ |
4682 if (aflags & ADD_REGION) | 4774 if (aflags & ADD_REGION) |
4683 putc(bw->bw_region, fd); /* <region> */ | 4775 putc(bw->bw_region, fd); /* <region> */ |
4684 | 4776 |
4685 bw2 = bw; | 4777 bw2 = bw; |
4686 } | 4778 } |
4779 | |
4687 vim_free(wtab); | 4780 vim_free(wtab); |
4688 } | 4781 } |
4689 } | 4782 } |
4690 | 4783 |
4691 | 4784 |
4708 struct stat st; | 4801 struct stat st; |
4709 int round; | 4802 int round; |
4710 vimconv_T conv; | 4803 vimconv_T conv; |
4711 int ascii = FALSE; | 4804 int ascii = FALSE; |
4712 char_u *arg = eap->arg; | 4805 char_u *arg = eap->arg; |
4806 int error = FALSE; | |
4713 | 4807 |
4714 if (STRNCMP(arg, "-ascii", 6) == 0) | 4808 if (STRNCMP(arg, "-ascii", 6) == 0) |
4715 { | 4809 { |
4716 ascii = TRUE; | 4810 ascii = TRUE; |
4717 arg = skipwhite(arg + 6); | 4811 arg = skipwhite(arg + 6); |
4764 TOLOWER_ASC(fnames[i][len - 1]); | 4858 TOLOWER_ASC(fnames[i][len - 1]); |
4765 } | 4859 } |
4766 } | 4860 } |
4767 } | 4861 } |
4768 | 4862 |
4863 /* Clear the char type tables, don't want to use any of the currently | |
4864 * used spell properties. */ | |
4865 init_spell_chartab(); | |
4866 | |
4769 /* | 4867 /* |
4770 * Read all the .aff and .dic files. | 4868 * Read all the .aff and .dic files. |
4771 * Text is converted to 'encoding'. | 4869 * Text is converted to 'encoding'. |
4772 */ | 4870 */ |
4773 for (i = 1; i < fcount; ++i) | 4871 for (i = 1; i < fcount; ++i) |
4844 * inefficient searching. Turn the affixes into additions and/or | 4942 * inefficient searching. Turn the affixes into additions and/or |
4845 * the expanded word. | 4943 * the expanded word. |
4846 */ | 4944 */ |
4847 MSG(_("Processing words...")); | 4945 MSG(_("Processing words...")); |
4848 out_flush(); | 4946 out_flush(); |
4849 expand_affixes(&newwords, &prefga, &suffga); | 4947 error = expand_affixes(&newwords, &prefga, &suffga) == FAIL; |
4850 | 4948 |
4851 /* Write the info in the spell file. */ | 4949 if (!error) |
4852 smsg((char_u *)_("Writing spell file %s..."), wfname); | 4950 { |
4853 out_flush(); | 4951 /* Write the info in the spell file. */ |
4854 write_vim_spell(wfname, &prefga, &suffga, &newwords, | 4952 smsg((char_u *)_("Writing spell file %s..."), wfname); |
4953 out_flush(); | |
4954 write_vim_spell(wfname, &prefga, &suffga, &newwords, | |
4855 fcount - 1, region_name); | 4955 fcount - 1, region_name); |
4856 MSG(_("Done!")); | 4956 MSG(_("Done!")); |
4857 out_flush(); | 4957 out_flush(); |
4958 } | |
4858 | 4959 |
4859 /* Free the allocated stuff. */ | 4960 /* Free the allocated stuff. */ |
4860 free_wordtable(&newwords); | 4961 free_wordtable(&newwords); |
4861 for (round = 1; round <= 2; ++round) | 4962 for (round = 1; round <= 2; ++round) |
4862 { | 4963 { |