comparison src/spell.c @ 255:c8742c8da9ab

updated for version 7.0070
author vimboss
date Sat, 23 Apr 2005 20:42:23 +0000
parents f146656fb903
children ed33f83b42d8
comparison
equal deleted inserted replaced
254:c50c82c5e230 255:c8742c8da9ab
95 #define AFF_PREWORD 0x02 /* prefix includes word */ 95 #define AFF_PREWORD 0x02 /* prefix includes word */
96 96
97 /* 97 /*
98 * Structure used to store words and other info for one language, loaded from 98 * Structure used to store words and other info for one language, loaded from
99 * a .spl file. 99 * a .spl file.
100 * The main access is through hashtable "sl_word", using the case-folded
101 * word as the key. This finds a linked list of fword_T.
100 */ 102 */
101 typedef struct slang_S slang_T; 103 typedef struct slang_S slang_T;
102 struct slang_S 104 struct slang_S
103 { 105 {
104 slang_T *sl_next; /* next language */ 106 slang_T *sl_next; /* next language */
147 #define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw))) 149 #define HI2ADDWORD(hi) ((addword_T *)((hi)->hi_key - (dumaw.aw_word - (char_u *)&dumaw)))
148 150
149 /* 151 /*
150 * Structure to store a basic word. 152 * Structure to store a basic word.
151 * There are many of these, keep it small! 153 * There are many of these, keep it small!
154 * The list of prefix and suffix NRs is stored after "fw_word" to avoid the
155 * need for two extra pointers.
152 */ 156 */
153 typedef struct fword_S fword_T; 157 typedef struct fword_S fword_T;
154 struct fword_S 158 struct fword_S
155 { 159 {
156 fword_T *fw_next; /* same basic word with different caps and/or 160 fword_T *fw_next; /* same basic word with different caps and/or
221 225
222 /* Translate ADD_ flags to BWF_ flags. 226 /* Translate ADD_ flags to BWF_ flags.
223 * (Needed to keep ADD_ flags in one byte.) */ 227 * (Needed to keep ADD_ flags in one byte.) */
224 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) 228 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4))
225 229
226 #define VIMSPELLMAGIC "VIMspell03" /* string at start of Vim spell file */ 230 #define VIMSPELLMAGIC "VIMspell04" /* string at start of Vim spell file */
227 #define VIMSPELLMAGICL 10 231 #define VIMSPELLMAGICL 10
228 232
229 /* 233 /*
230 * Structure to store info for word matching. 234 * Structure to store info for word matching.
231 */ 235 */
305 /* A word starting with a number is always OK. */ 309 /* A word starting with a number is always OK. */
306 if (*ptr >= '0' && *ptr <= '9') 310 if (*ptr >= '0' && *ptr <= '9')
307 return (int)(mi.mi_end - ptr); 311 return (int)(mi.mi_end - ptr);
308 312
309 /* Make case-folded copy of the word. */ 313 /* Make case-folded copy of the word. */
310 (void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1); 314 (void)spell_casefold(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
311 mi.mi_cword = mi.mi_fword; 315 mi.mi_cword = mi.mi_fword;
312 mi.mi_fendlen = STRLEN(mi.mi_fword); 316 mi.mi_fendlen = STRLEN(mi.mi_fword);
313 mi.mi_faddlen = 0; 317 mi.mi_faddlen = 0;
314 mi.mi_fend = mi.mi_end; 318 mi.mi_fend = mi.mi_end;
315 319
402 * Try finding a matching preword for "mip->mi_word". These are 406 * Try finding a matching preword for "mip->mi_word". These are
403 * prefixes that have a non-word character after a word character: 407 * prefixes that have a non-word character after a word character:
404 * "d'", "de-", "'s-", "l'de-". But not "'s". 408 * "d'", "de-", "'s-", "l'de-". But not "'s".
405 * Also need to do this when a matching word was already found, because we 409 * Also need to do this when a matching word was already found, because we
406 * might find a longer match this way (French: "qu" and "qu'a-t-elle"). 410 * might find a longer match this way (French: "qu" and "qu'a-t-elle").
411 * The check above may have added characters to mi_fword, thus we need to
412 * truncate it after the basic word for the hash lookup.
407 */ 413 */
408 cc = mip->mi_fword[mip->mi_fendlen]; 414 cc = mip->mi_fword[mip->mi_fendlen];
409 mip->mi_fword[mip->mi_fendlen] = NUL; 415 mip->mi_fword[mip->mi_fendlen] = NUL;
410 hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash); 416 hi = hash_lookup(&mip->mi_slang->sl_prewords, mip->mi_fword, fhash);
411 mip->mi_fword[mip->mi_fendlen] = cc; 417 mip->mi_fword[mip->mi_fendlen] = cc;
770 if (has_mbyte) 776 if (has_mbyte)
771 l = (*mb_ptr2len_check)(mip->mi_fend); 777 l = (*mb_ptr2len_check)(mip->mi_fend);
772 else 778 else
773 #endif 779 #endif
774 l = 1; 780 l = 1;
775 (void)str_foldcase(mip->mi_fend, l, p + mip->mi_faddlen, 781 (void)spell_casefold(mip->mi_fend, l, p + mip->mi_faddlen,
776 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen); 782 MAXWLEN - mip->mi_fendlen - mip->mi_faddlen);
777 mip->mi_fend += l; 783 mip->mi_fend += l;
778 mip->mi_faddlen += STRLEN(p + mip->mi_faddlen); 784 mip->mi_faddlen += STRLEN(p + mip->mi_faddlen);
779 } 785 }
780 } 786 }
990 * Try suffixes of different length, starting with an empty suffix (chop 996 * Try suffixes of different length, starting with an empty suffix (chop
991 * only, thus adds something). 997 * only, thus adds something).
992 * Stop checking if there are no suffixes with so many characters. 998 * Stop checking if there are no suffixes with so many characters.
993 */ 999 */
994 sufp = endw; 1000 sufp = endw;
1001 *endw = NUL; /* truncate after possible suffix */
1002
995 for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen) 1003 for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen)
996 { 1004 {
997 /* Move the pointer to the possible suffix back one character, unless 1005 /* Move the pointer to the possible suffix back one character, unless
998 * doing the first round (empty suffix). */ 1006 * doing the first round (empty suffix). */
999 if (charlen > 0) 1007 if (charlen > 0)
1010 /* Get pointer to hashtab for suffix of this many chars. */ 1018 /* Get pointer to hashtab for suffix of this many chars. */
1011 ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1; 1019 ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1;
1012 if (ht->ht_used == 0) 1020 if (ht->ht_used == 0)
1013 continue; 1021 continue;
1014 1022
1015 *endw = NUL; /* truncate after possible suffix */
1016 hi = hash_find(ht, sufp); 1023 hi = hash_find(ht, sufp);
1017 if (HASHITEM_EMPTY(hi)) 1024 if (HASHITEM_EMPTY(hi))
1018 ai = NULL; 1025 ai = NULL;
1019 else 1026 else
1020 ai = HI2AI(hi); 1027 ai = HI2AI(hi);
1021 *endw = endw_c;
1022 } 1028 }
1023 1029
1024 if (ai != NULL) 1030 if (ai != NULL)
1025 { 1031 {
1026 /* Found a list of matching suffixes. Now check that there is one 1032 /* Found a list of matching suffixes. Now check that there is one
1027 * we can use. */ 1033 * we can use. */
1028 tlen = sufp - mip->mi_cword; /* length of word without suffix */ 1034 tlen = sufp - mip->mi_cword; /* length of word without suffix */
1029 mch_memmove(pword, mip->mi_cword, tlen); 1035 mch_memmove(pword, mip->mi_cword, tlen);
1036 *endw = endw_c;
1030 1037
1031 for ( ; ai != NULL; ai = ai->ai_next) 1038 for ( ; ai != NULL; ai = ai->ai_next)
1032 { 1039 {
1033 /* Found a matching suffix. Create the basic word by removing 1040 /* Found a matching suffix. Create the basic word by removing
1034 * the suffix and adding the chop string. */ 1041 * the suffix and adding the chop string. */
1066 mip->mi_capflags = capflags_save; 1073 mip->mi_capflags = capflags_save;
1067 return TRUE; 1074 return TRUE;
1068 } 1075 }
1069 } 1076 }
1070 } 1077 }
1071 } 1078
1072 } 1079 *endw = NUL; /* truncate after possible suffix */
1073 1080 }
1081 }
1082
1083 *endw = endw_c;
1074 mip->mi_capflags = capflags_save; 1084 mip->mi_capflags = capflags_save;
1075 return FALSE; 1085 return FALSE;
1076 } 1086 }
1077 1087
1078 /* 1088 /*
1113 if (has_mbyte) 1123 if (has_mbyte)
1114 c = mb_ptr2char_adv(&p); 1124 c = mb_ptr2char_adv(&p);
1115 else 1125 else
1116 #endif 1126 #endif
1117 c = *p++; 1127 c = *p++;
1118 if (MB_ISUPPER(c)) 1128 if (spell_isupper(c))
1119 { 1129 {
1120 if (capflags == 0 || (capflags & BWF_ONECAP)) 1130 if (capflags == 0 || (capflags & BWF_ONECAP))
1121 { 1131 {
1122 capflags = BWF_KEEPCAP; /* lU or UlU */ 1132 capflags = BWF_KEEPCAP; /* lU or UlU */
1123 break; 1133 break;
1458 int flags; 1468 int flags;
1459 affitem_T *ai, *ai2, **aip; 1469 affitem_T *ai, *ai2, **aip;
1460 int round; 1470 int round;
1461 char_u *save_sourcing_name = sourcing_name; 1471 char_u *save_sourcing_name = sourcing_name;
1462 linenr_T save_sourcing_lnum = sourcing_lnum; 1472 linenr_T save_sourcing_lnum = sourcing_lnum;
1463 int cnt; 1473 int cnt, ccnt;
1464 int choplen; 1474 int choplen;
1465 int addlen; 1475 int addlen;
1466 int leadlen; 1476 int leadlen;
1467 int wordcount; 1477 int wordcount;
1468 fword_T *fw, *fw2; 1478 fword_T *fw, *fw2;
1472 hash_T hash; 1482 hash_T hash;
1473 int adds; 1483 int adds;
1474 addword_T *aw, *naw; 1484 addword_T *aw, *naw;
1475 int flen; 1485 int flen;
1476 int xlen; 1486 int xlen;
1487 char_u *fol;
1477 1488
1478 fd = fopen((char *)fname, "r"); 1489 fd = fopen((char *)fname, "r");
1479 if (fd == NULL) 1490 if (fd == NULL)
1480 { 1491 {
1481 EMSG2(_(e_notopen), fname); 1492 EMSG2(_(e_notopen), fname);
1482 goto errorend; 1493 goto endFAIL;
1483 } 1494 }
1484 1495
1485 /* Set sourcing_name, so that error messages mention the file name. */ 1496 /* Set sourcing_name, so that error messages mention the file name. */
1486 sourcing_name = fname; 1497 sourcing_name = fname;
1487 sourcing_lnum = 0; 1498 sourcing_lnum = 0;
1488 1499
1489 /* <HEADER>: <fileID> <regioncnt> <regionname> ... */ 1500 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
1501 * <charflagslen> <charflags> <fcharslen> <fchars> */
1490 for (i = 0; i < VIMSPELLMAGICL; ++i) 1502 for (i = 0; i < VIMSPELLMAGICL; ++i)
1491 buf[i] = getc(fd); /* <fileID> */ 1503 buf[i] = getc(fd); /* <fileID> */
1492 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) 1504 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
1493 { 1505 {
1494 EMSG(_("E757: Wrong file ID in spell file")); 1506 EMSG(_("E757: Wrong file ID in spell file"));
1495 goto errorend; 1507 goto endFAIL;
1496 } 1508 }
1497 1509
1498 cnt = getc(fd); /* <regioncnt> */ 1510 cnt = getc(fd); /* <regioncnt> */
1499 if (cnt == EOF) 1511 if (cnt < 0)
1500 { 1512 {
1501 truncerr: 1513 truncerr:
1502 EMSG(_("E758: Truncated spell file")); 1514 EMSG(_("E758: Truncated spell file"));
1503 goto errorend; 1515 goto endFAIL;
1504 } 1516 }
1505 if (cnt > 8) 1517 if (cnt > 8)
1506 { 1518 {
1507 formerr: 1519 formerr:
1508 EMSG(_("E759: Format error in spell file")); 1520 EMSG(_("E759: Format error in spell file"));
1509 goto errorend; 1521 goto endFAIL;
1510 } 1522 }
1511 for (i = 0; i < cnt; ++i) 1523 for (i = 0; i < cnt; ++i)
1512 { 1524 {
1513 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */ 1525 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
1514 lp->sl_regions[i * 2 + 1] = getc(fd); 1526 lp->sl_regions[i * 2 + 1] = getc(fd);
1515 } 1527 }
1516 lp->sl_regions[cnt * 2] = NUL; 1528 lp->sl_regions[cnt * 2] = NUL;
1517 1529
1518 /* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... 1530 cnt = getc(fd); /* <charflagslen> */
1519 * round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... */ 1531 if (cnt > 0)
1532 {
1533 p = (char_u *)getroom(lp, &bl_used, cnt);
1534 if (p == NULL)
1535 goto endFAIL;
1536 for (i = 0; i < cnt; ++i)
1537 p[i] = getc(fd); /* <charflags> */
1538
1539 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
1540 if (ccnt <= 0)
1541 goto formerr;
1542 fol = (char_u *)getroom(lp, &bl_used, ccnt + 1);
1543 if (fol == NULL)
1544 goto endFAIL;
1545 for (i = 0; i < ccnt; ++i)
1546 fol[i] = getc(fd); /* <fchars> */
1547 fol[i] = NUL;
1548
1549 /* Set the word-char flags and fill spell_isupper() table. */
1550 if (set_spell_charflags(p, cnt, fol) == FAIL)
1551 goto formerr;
1552 }
1553 else
1554 {
1555 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
1556 cnt = (getc(fd) << 8) + getc(fd);
1557 if (cnt != 0)
1558 goto formerr;
1559 }
1560
1561 /* round 1: <PREFIXLIST>: <affcount> <affix> ...
1562 * round 2: <SUFFIXLIST>: <affcount> <affix> ... */
1520 for (round = 1; round <= 2; ++round) 1563 for (round = 1; round <= 2; ++round)
1521 { 1564 {
1522 affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */ 1565 affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */
1523 if (affcount < 0) 1566 if (affcount < 0)
1524 goto truncerr; 1567 goto truncerr;
1534 gap = &lp->sl_sufftab; 1577 gap = &lp->sl_sufftab;
1535 aip = &lp->sl_suffzero; 1578 aip = &lp->sl_suffzero;
1536 lp->sl_suffcnt = affcount; 1579 lp->sl_suffcnt = affcount;
1537 suffm = affcount > 256 ? 2 : 1; 1580 suffm = affcount > 256 ? 2 : 1;
1538 } 1581 }
1539
1540 i = (getc(fd) << 8) + getc(fd); /* <afftotcnt> */
1541 /* afftotcnt is not used */
1542 1582
1543 /* 1583 /*
1544 * For each affix NR there can be several affixes. 1584 * For each affix NR there can be several affixes.
1545 */ 1585 */
1546 for (affnr = 0; affnr < affcount; ++affnr) 1586 for (affnr = 0; affnr < affcount; ++affnr)
1553 { 1593 {
1554 /* <affitem>: <affflags> <affchoplen> <affchop> 1594 /* <affitem>: <affflags> <affchoplen> <affchop>
1555 * <affaddlen> <affadd> */ 1595 * <affaddlen> <affadd> */
1556 affflags = getc(fd); /* <affflags> */ 1596 affflags = getc(fd); /* <affflags> */
1557 choplen = getc(fd); /* <affchoplen> */ 1597 choplen = getc(fd); /* <affchoplen> */
1558 if (choplen == EOF) 1598 if (choplen < 0)
1559 goto truncerr; 1599 goto truncerr;
1560 if (choplen >= MAXWLEN) 1600 if (choplen >= MAXWLEN)
1561 goto formerr; 1601 goto formerr;
1562 for (i = 0; i < choplen; ++i) /* <affchop> */ 1602 for (i = 0; i < choplen; ++i) /* <affchop> */
1563 buf[i] = getc(fd); 1603 buf[i] = getc(fd);
1564 buf[i] = NUL; 1604 buf[i] = NUL;
1565 addlen = getc(fd); /* <affaddlen> */ 1605 addlen = getc(fd); /* <affaddlen> */
1566 if (addlen == EOF) 1606 if (addlen < 0)
1567 goto truncerr; 1607 goto truncerr;
1568 if (affflags & AFF_PREWORD) 1608 if (affflags & AFF_PREWORD)
1569 xlen = addlen + 2; /* space for lead and trail string */ 1609 xlen = addlen + 2; /* space for lead and trail string */
1570 else 1610 else
1571 xlen = 0; 1611 xlen = 0;
1572 1612
1573 /* Get room to store the affitem_T, chop and add strings. */ 1613 /* Get room to store the affitem_T, chop and add strings. */
1574 p = (char_u *)getroom(lp, &bl_used, 1614 ai = (affitem_T *)getroom(lp, &bl_used,
1575 sizeof(affitem_T) + addlen + choplen + 1 + xlen); 1615 sizeof(affitem_T) + addlen + choplen + 1 + xlen);
1576 if (p == NULL) 1616 if (ai == NULL)
1577 goto errorend; 1617 goto endFAIL;
1578 1618
1579 ai = (affitem_T *)p;
1580 ai->ai_nr = affnr; 1619 ai->ai_nr = affnr;
1581 ai->ai_flags = affflags; 1620 ai->ai_flags = affflags;
1582 ai->ai_choplen = choplen; 1621 ai->ai_choplen = choplen;
1583 ai->ai_addlen = addlen; 1622 ai->ai_addlen = addlen;
1584 1623
1594 if (affflags & AFF_PREWORD) 1633 if (affflags & AFF_PREWORD)
1595 { 1634 {
1596 int l, leadoff, trailoff; 1635 int l, leadoff, trailoff;
1597 1636
1598 /* 1637 /*
1599 * Separate lead and trail string, put word at ai_add, so 1638 * A preword is a prefix that's recognized as a word: it
1600 * that it can be used as hashtable key. 1639 * contains a word characters folled by a non-word
1640 * character.
1641 * <affadd> is the whole prefix. Separate lead and trail
1642 * string, put the word itself at ai_add, so that it can
1643 * be used as hashtable key.
1601 */ 1644 */
1602 /* lead string: up to first word char */ 1645 /* lead string: up to first word char */
1603 while (*p != NUL && !spell_iswordc(p)) 1646 while (*p != NUL && !spell_iswordc(p))
1604 mb_ptr_adv(p); 1647 mb_ptr_adv(p);
1605 ai->ai_leadlen = p - ai->ai_add; 1648 ai->ai_leadlen = p - ai->ai_add;
1621 ai->ai_add[l] = NUL; 1664 ai->ai_add[l] = NUL;
1622 hash = hash_hash(ai->ai_add); 1665 hash = hash_hash(ai->ai_add);
1623 hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash); 1666 hi = hash_lookup(&lp->sl_prewords, ai->ai_add, hash);
1624 if (HASHITEM_EMPTY(hi)) 1667 if (HASHITEM_EMPTY(hi))
1625 { 1668 {
1626 /* First affix with this word, add to hashtable. */ 1669 /* First preword with this word, add to hashtable. */
1627 hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash); 1670 hash_add_item(&lp->sl_prewords, hi, ai->ai_add, hash);
1628 ai->ai_next = NULL; 1671 ai->ai_next = NULL;
1629 } 1672 }
1630 else 1673 else
1631 { 1674 {
1632 /* There already is an affix with this word, link in 1675 /* There already is a preword with this word, link in
1633 * the list. */ 1676 * the list. */
1634 ai2 = HI2AI(hi); 1677 ai2 = HI2AI(hi);
1635 ai->ai_next = ai2->ai_next; 1678 ai->ai_next = ai2->ai_next;
1636 ai2->ai_next = ai; 1679 ai2->ai_next = ai;
1637 } 1680 }
1658 { 1701 {
1659 if (gap->ga_len < addlen) 1702 if (gap->ga_len < addlen)
1660 { 1703 {
1661 /* Longer affix, need more hashtables. */ 1704 /* Longer affix, need more hashtables. */
1662 if (ga_grow(gap, addlen - gap->ga_len) == FAIL) 1705 if (ga_grow(gap, addlen - gap->ga_len) == FAIL)
1663 goto errorend; 1706 goto endFAIL;
1664 1707
1665 /* Re-allocating ga_data means that an ht_array 1708 /* Re-allocating ga_data means that an ht_array
1666 * pointing to ht_smallarray becomes invalid. We 1709 * pointing to ht_smallarray becomes invalid. We
1667 * can recognize this: ht_mask is at its init 1710 * can recognize this: ht_mask is at its init
1668 * value. */ 1711 * value. */
1731 * [<region>] 1774 * [<region>]
1732 * [<addcnt> <add> ...] 1775 * [<addcnt> <add> ...]
1733 */ 1776 */
1734 /* Use <nr> bytes from the previous word. */ 1777 /* Use <nr> bytes from the previous word. */
1735 wlen = getc(fd); /* <nr> */ 1778 wlen = getc(fd); /* <nr> */
1736 if (wlen == EOF) 1779 if (wlen < 0)
1737 { 1780 {
1738 if (widx >= wordcount) /* normal way to end the file */ 1781 if (widx >= wordcount) /* normal way to end the file */
1739 break; 1782 break;
1740 goto truncerr; 1783 goto truncerr;
1741 } 1784 }
1742 1785
1743 /* Read further word bytes until one below 0x20, that must be the 1786 /* Read further word bytes until one below 0x20, that one must be the
1744 * flags. Keep this fast! */ 1787 * flags. Keep this fast! */
1745 for (;;) 1788 for (;;)
1746 { 1789 {
1747 if ((buf[wlen] = getc(fd)) < 0x20) /* <string> */ 1790 if ((buf[wlen] = getc(fd)) < 0x20) /* <string> */
1748 break; 1791 break;
1758 1801
1759 if (flags & BWF_KEEPCAP) 1802 if (flags & BWF_KEEPCAP)
1760 { 1803 {
1761 /* Read <caselen> and <caseword> first, its length may differ from 1804 /* Read <caselen> and <caseword> first, its length may differ from
1762 * the case-folded word. Note: this should only happen after the 1805 * the case-folded word. Note: this should only happen after the
1763 * basic word! */ 1806 * basic word without KEEPCAP! */
1764 wlen = getc(fd); 1807 wlen = getc(fd);
1765 if (wlen < 0) 1808 if (wlen < 0)
1766 goto truncerr; 1809 goto truncerr;
1810 if (wlen >= MAXWLEN)
1811 goto formerr;
1767 for (i = 0; i < wlen; ++i) 1812 for (i = 0; i < wlen; ++i)
1768 cbuf[i] = getc(fd); 1813 cbuf[i] = getc(fd);
1769 cbuf[i] = NUL; 1814 cbuf[i] = NUL;
1770 } 1815 }
1771 1816
1798 1843
1799 /* Find room to store the word in an fword_T. */ 1844 /* Find room to store the word in an fword_T. */
1800 fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen 1845 fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen
1801 + (p - affixbuf)); 1846 + (p - affixbuf));
1802 if (fw == NULL) 1847 if (fw == NULL)
1803 goto errorend; 1848 goto endFAIL;
1804 mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1); 1849 mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1);
1805 1850
1806 /* Put the affix NRs just after the word, if any. */ 1851 /* Put the affix NRs just after the word, if any. */
1807 if (p > affixbuf) 1852 if (p > affixbuf)
1808 mch_memmove(fw->fw_word + wlen + 1, affixbuf, p - affixbuf); 1853 mch_memmove(fw->fw_word + wlen + 1, affixbuf, p - affixbuf);
1809 1854
1810 fw->fw_flags = flags; 1855 fw->fw_flags = flags;
1811 fw->fw_prefixcnt = prefixcnt; 1856 fw->fw_prefixcnt = prefixcnt;
1812 fw->fw_suffixcnt = suffixcnt; 1857 fw->fw_suffixcnt = suffixcnt;
1813 1858
1859 /* We store the word in the hashtable case-folded. For a KEEPCAP word
1860 * the entry must already exist, because fw_word can't be used as the
1861 * key, it differs from "buf"! */
1814 hash = hash_hash(buf); 1862 hash = hash_hash(buf);
1815 hi = hash_lookup(&lp->sl_words, buf, hash); 1863 hi = hash_lookup(&lp->sl_words, buf, hash);
1816 if (HASHITEM_EMPTY(hi)) 1864 if (HASHITEM_EMPTY(hi))
1817 { 1865 {
1818 if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL) 1866 if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL)
1819 goto errorend; 1867 goto endFAIL;
1820 fw->fw_next = NULL; 1868 fw->fw_next = NULL;
1821 } 1869 }
1822 else 1870 else
1823 { 1871 {
1824 /* Already have this basic word in the hashtable, this one will 1872 /* Already have this basic word in the hashtable, this one will
1825 * have different case flags and/or affixes. */ 1873 * have different case flags and/or affixes. */
1826 fw2 = HI2FWORD(hi); 1874 fw2 = HI2FWORD(hi);
1827 fw->fw_next = fw2->fw_next; 1875 fw->fw_next = fw2->fw_next;
1828 fw2->fw_next = fw; 1876 fw2->fw_next = fw;
1829 --widx; /* don't count this one */ 1877 --widx; /* don't count this one as a basic word */
1830 } 1878 }
1831 1879
1832 if (flags & BWF_REGION) 1880 if (flags & BWF_REGION)
1833 fw->fw_region = getc(fd); /* <region> */ 1881 fw->fw_region = getc(fd); /* <region> */
1834 else 1882 else
1839 { 1887 {
1840 if (flags & BWF_ADDS_M) 1888 if (flags & BWF_ADDS_M)
1841 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ 1889 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */
1842 else 1890 else
1843 adds = getc(fd); /* <addcnt> */ 1891 adds = getc(fd); /* <addcnt> */
1892 if (adds < 0)
1893 goto formerr;
1844 1894
1845 if (adds > 30) 1895 if (adds > 30)
1846 { 1896 {
1847 /* Use a hashtable to loopup the part until the next word end. 1897 /* Use a hashtable to lookup the part until the next word end.
1898 * Thus for "de-bur-die" "de" is the basic word, "-bur" is key
1899 * in the addition hashtable, "-bur<NUL>die" the whole
1900 * addition and "aw_saveb" is '-'.
1848 * This uses more memory and involves some overhead, thus only 1901 * This uses more memory and involves some overhead, thus only
1849 * do it when there are many additions (e.g., for French). */ 1902 * do it when there are many additions (e.g., for French). */
1850 ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T)); 1903 ht = (hashtab_T *)getroom(lp, &bl_used, sizeof(hashtab_T));
1851 if (ht == NULL) 1904 if (ht == NULL)
1852 goto errorend; 1905 goto endFAIL;
1853 hash_init(ht); 1906 hash_init(ht);
1854 fw->fw_adds = (addword_T *)ht; 1907 fw->fw_adds = (addword_T *)ht;
1855 fw->fw_flags |= BWF_ADDHASH; 1908 fw->fw_flags |= BWF_ADDHASH;
1856 1909
1857 /* Preset the size of the hashtable. It's never unlocked. */ 1910 /* Preset the size of the hashtable. It's never unlocked. */
1858 hash_lock_size(ht, adds + 1); 1911 hash_lock_size(ht, adds + 1);
1859 } 1912 }
1860 else 1913 else
1861 ht = NULL; 1914 ht = NULL;
1862 1915
1916 /*
1917 * Note: uses cbuf[] to copy bytes from previous addition.
1918 */
1863 while (--adds >= 0) 1919 while (--adds >= 0)
1864 { 1920 {
1865 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] 1921 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>]
1866 * [<addstring>] [<region>] */ 1922 * [<addstring>] [<region>] */
1867 flags = getc(fd); /* <addflags> */ 1923 flags = getc(fd); /* <addflags> */
1868 addlen = getc(fd); /* <addlen> */ 1924 addlen = getc(fd); /* <addlen> */
1869 if (addlen == EOF) 1925 if (addlen < 0)
1870 goto truncerr; 1926 goto truncerr;
1871 if (addlen >= MAXWLEN) 1927 if (addlen >= MAXWLEN)
1872 goto formerr; 1928 goto formerr;
1873 1929
1874 if (flags & ADD_LEADLEN) 1930 if (flags & ADD_LEADLEN)
1931 {
1875 leadlen = getc(fd); /* <leadlen> */ 1932 leadlen = getc(fd); /* <leadlen> */
1933 if (leadlen > addlen)
1934 goto formerr;
1935 }
1876 else 1936 else
1877 leadlen = 0; 1937 leadlen = 0;
1878 1938
1879 if (addlen > 0) 1939 if (addlen > 0)
1880 { 1940 {
1889 1949
1890 if (flags & ADD_KEEPCAP) 1950 if (flags & ADD_KEEPCAP)
1891 { 1951 {
1892 /* <addstring> is in original case, need to get 1952 /* <addstring> is in original case, need to get
1893 * case-folded word too. */ 1953 * case-folded word too. */
1894 (void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN); 1954 (void)spell_casefold(cbuf, addlen, fbuf, MAXWLEN);
1895 flen = addlen - leadlen + 1; 1955 flen = addlen - leadlen + 1;
1896 addlen = STRLEN(fbuf); 1956 addlen = STRLEN(fbuf);
1897 } 1957 }
1898 else 1958 else
1899 flen = 0; 1959 flen = 0;
1900 1960
1901 aw = (addword_T *)getroom(lp, &bl_used, 1961 aw = (addword_T *)getroom(lp, &bl_used,
1902 sizeof(addword_T) + addlen + flen); 1962 sizeof(addword_T) + addlen + flen);
1903 if (aw == NULL) 1963 if (aw == NULL)
1904 goto errorend; 1964 goto endFAIL;
1905 1965
1906 if (flags & ADD_KEEPCAP) 1966 if (flags & ADD_KEEPCAP)
1907 { 1967 {
1908 /* Put the addition in original case after the case-folded 1968 /* Put the addition in original case after the case-folded
1909 * string. */ 1969 * string. */
1952 { 2012 {
1953 /* we use a dummy item as the list header */ 2013 /* we use a dummy item as the list header */
1954 naw = (addword_T *)getroom(lp, &bl_used, 2014 naw = (addword_T *)getroom(lp, &bl_used,
1955 sizeof(addword_T) + STRLEN(NOWC_KEY)); 2015 sizeof(addword_T) + STRLEN(NOWC_KEY));
1956 if (naw == NULL) 2016 if (naw == NULL)
1957 goto errorend; 2017 goto endFAIL;
1958 STRCPY(naw->aw_word, NOWC_KEY); 2018 STRCPY(naw->aw_word, NOWC_KEY);
1959 hash_add_item(ht, hi, naw->aw_word, hash); 2019 hash_add_item(ht, hi, naw->aw_word, hash);
1960 naw->aw_next = aw; 2020 naw->aw_next = aw;
1961 aw->aw_next = NULL; 2021 aw->aw_next = NULL;
1962 } 2022 }
1992 } 2052 }
1993 } 2053 }
1994 } 2054 }
1995 } 2055 }
1996 } 2056 }
1997 goto end_OK; 2057 goto endOK;
1998 2058
1999 errorend: 2059 endFAIL:
2000 lp->sl_error = TRUE; 2060 lp->sl_error = TRUE;
2001 end_OK: 2061
2062 endOK:
2002 if (fd != NULL) 2063 if (fd != NULL)
2003 fclose(fd); 2064 fclose(fd);
2004 hash_unlock(&lp->sl_words); 2065 hash_unlock(&lp->sl_words);
2005 sourcing_name = save_sourcing_name; 2066 sourcing_name = save_sourcing_name;
2006 sourcing_lnum = save_sourcing_lnum; 2067 sourcing_lnum = save_sourcing_lnum;
2185 #ifdef FEAT_MBYTE 2246 #ifdef FEAT_MBYTE
2186 c = mb_ptr2char_adv(&p); 2247 c = mb_ptr2char_adv(&p);
2187 #else 2248 #else
2188 c = *p++; 2249 c = *p++;
2189 #endif 2250 #endif
2190 firstcap = allcap = MB_ISUPPER(c); 2251 firstcap = allcap = spell_isupper(c);
2191 2252
2192 /* 2253 /*
2193 * Need to check all letters to find a word with mixed upper/lower. 2254 * Need to check all letters to find a word with mixed upper/lower.
2194 * But a word with an upper char only at start is a ONECAP. 2255 * But a word with an upper char only at start is a ONECAP.
2195 */ 2256 */
2199 #ifdef FEAT_MBYTE 2260 #ifdef FEAT_MBYTE
2200 c = mb_ptr2char(p); 2261 c = mb_ptr2char(p);
2201 #else 2262 #else
2202 c = *p; 2263 c = *p;
2203 #endif 2264 #endif
2204 if (!MB_ISUPPER(c)) 2265 if (!spell_isupper(c))
2205 { 2266 {
2206 /* UUl -> KEEPCAP */ 2267 /* UUl -> KEEPCAP */
2207 if (past_second && allcap) 2268 if (past_second && allcap)
2208 return BWF_KEEPCAP; 2269 return BWF_KEEPCAP;
2209 allcap = FALSE; 2270 allcap = FALSE;
2343 basicword_T *bw_cnext; /* next word with same caps */ 2404 basicword_T *bw_cnext; /* next word with same caps */
2344 int bw_flags; /* BWF_ flags */ 2405 int bw_flags; /* BWF_ flags */
2345 garray_T bw_prefix; /* table with prefix numbers */ 2406 garray_T bw_prefix; /* table with prefix numbers */
2346 garray_T bw_suffix; /* table with suffix numbers */ 2407 garray_T bw_suffix; /* table with suffix numbers */
2347 int bw_region; /* region bits */ 2408 int bw_region; /* region bits */
2348 char_u *bw_caseword; /* keep-case word */ 2409 char_u *bw_caseword; /* keep-case word or NULL */
2349 char_u *bw_leadstring; /* must come before bw_word */ 2410 char_u *bw_leadstring; /* must come before bw_word or NULL */
2350 char_u *bw_addstring; /* must come after bw_word */ 2411 char_u *bw_addstring; /* must come after bw_word or NULL */
2351 char_u bw_word[1]; /* actually longer: word case folded */ 2412 char_u bw_word[1]; /* actually longer: word case folded */
2352 }; 2413 };
2353 2414
2354 static basicword_T dumbw; 2415 static basicword_T dumbw;
2355 #define KEY2BW(p) ((basicword_T *)((p) - (dumbw.bw_word - (char_u *)&dumbw))) 2416 #define KEY2BW(p) ((basicword_T *)((p) - (dumbw.bw_word - (char_u *)&dumbw)))
2389 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2)); 2450 static int same_affentries __ARGS((affheader_T *ah1, affheader_T *ah2));
2390 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr)); 2451 static void add_affhash __ARGS((hashtab_T *ht, char_u *key, int newnr));
2391 static void clear_affhash __ARGS((hashtab_T *ht)); 2452 static void clear_affhash __ARGS((hashtab_T *ht));
2392 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords)); 2453 static void trans_affixes __ARGS((dicword_T *dw, basicword_T *bw, afffile_T *oldaff, hashtab_T *newwords));
2393 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask)); 2454 static int build_wordlist __ARGS((hashtab_T *newwords, hashtab_T *oldwords, afffile_T *oldaff, int regionmask));
2455 static basicword_T *get_basicword __ARGS((char_u *word, int asize));
2394 static void combine_regions __ARGS((hashtab_T *newwords)); 2456 static void combine_regions __ARGS((hashtab_T *newwords));
2395 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw)); 2457 static int same_affixes __ARGS((basicword_T *bw, basicword_T *nbw));
2396 static void expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap)); 2458 static int expand_affixes __ARGS((hashtab_T *newwords, garray_T *prefgap, garray_T *suffgap));
2397 static void expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae)); 2459 static int expand_one_aff __ARGS((basicword_T *bw, garray_T *add_words, affentry_T *pae, affentry_T *sae));
2398 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); 2460 static int add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
2399 static void put_bytes __ARGS((FILE *fd, long_u nr, int len));
2400 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); 2461 static void write_affix __ARGS((FILE *fd, affheader_T *ah));
2401 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); 2462 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes));
2402 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); 2463 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars));
2403 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap)); 2464 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap));
2404 static void free_wordtable __ARGS((hashtab_T *ht)); 2465 static void free_wordtable __ARGS((hashtab_T *ht));
2426 char_u *p; 2487 char_u *p;
2427 int lnum = 0; 2488 int lnum = 0;
2428 affheader_T *cur_aff = NULL; 2489 affheader_T *cur_aff = NULL;
2429 int aff_todo = 0; 2490 int aff_todo = 0;
2430 hashtab_T *tp; 2491 hashtab_T *tp;
2492 char_u *low = NULL;
2493 char_u *fol = NULL;
2494 char_u *upp = NULL;
2431 2495
2432 fd = fopen((char *)fname, "r"); 2496 fd = fopen((char *)fname, "r");
2433 if (fd == NULL) 2497 if (fd == NULL)
2434 { 2498 {
2435 EMSG2(_(e_notopen), fname); 2499 EMSG2(_(e_notopen), fname);
2447 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20); 2511 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20);
2448 2512
2449 /* 2513 /*
2450 * Read all the lines in the file one by one. 2514 * Read all the lines in the file one by one.
2451 */ 2515 */
2452 while (!vim_fgets(rline, MAXLINELEN, fd)) 2516 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2453 { 2517 {
2518 line_breakcheck();
2454 ++lnum; 2519 ++lnum;
2455 2520
2456 /* Skip comment lines. */ 2521 /* Skip comment lines. */
2457 if (*rline == '#') 2522 if (*rline == '#')
2458 continue; 2523 continue;
2460 /* Convert from "SET" to 'encoding' when needed. */ 2525 /* Convert from "SET" to 'encoding' when needed. */
2461 vim_free(pc); 2526 vim_free(pc);
2462 if (conv->vc_type != CONV_NONE) 2527 if (conv->vc_type != CONV_NONE)
2463 { 2528 {
2464 pc = string_convert(conv, rline, NULL); 2529 pc = string_convert(conv, rline, NULL);
2530 if (pc == NULL)
2531 {
2532 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2533 fname, lnum, rline);
2534 continue;
2535 }
2465 line = pc; 2536 line = pc;
2466 } 2537 }
2467 else 2538 else
2468 { 2539 {
2469 pc = NULL; 2540 pc = NULL;
2585 { 2656 {
2586 aff_entry->ae_next = cur_aff->ah_first; 2657 aff_entry->ae_next = cur_aff->ah_first;
2587 cur_aff->ah_first = aff_entry; 2658 cur_aff->ah_first = aff_entry;
2588 } 2659 }
2589 } 2660 }
2661 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
2662 {
2663 if (fol != NULL)
2664 smsg((char_u *)_("Duplicate FOL in %s line %d"),
2665 fname, lnum);
2666 else
2667 fol = vim_strsave(items[1]);
2668 }
2669 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
2670 {
2671 if (low != NULL)
2672 smsg((char_u *)_("Duplicate LOW in %s line %d"),
2673 fname, lnum);
2674 else
2675 low = vim_strsave(items[1]);
2676 }
2677 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
2678 {
2679 if (upp != NULL)
2680 smsg((char_u *)_("Duplicate UPP in %s line %d"),
2681 fname, lnum);
2682 else
2683 upp = vim_strsave(items[1]);
2684 }
2590 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2) 2685 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
2591 /* Ignore REP count */; 2686 /* Ignore REP count */;
2592 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3) 2687 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
2593 { 2688 {
2594 repentry_T *rp; 2689 repentry_T *rp;
2604 else if (p_verbose > 0) 2699 else if (p_verbose > 0)
2605 smsg((char_u *)_("Unrecognized item in %s line %d: %s"), 2700 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
2606 fname, lnum, items[0]); 2701 fname, lnum, items[0]);
2607 } 2702 }
2608 2703
2704 }
2705
2706 if (fol != NULL || low != NULL || upp != NULL)
2707 {
2708 if (fol == NULL || low == NULL || upp == NULL)
2709 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
2710 else
2711 set_spell_chartab(fol, low, upp);
2712
2713 vim_free(fol);
2714 vim_free(low);
2715 vim_free(upp);
2609 } 2716 }
2610 2717
2611 vim_free(pc); 2718 vim_free(pc);
2612 fclose(fd); 2719 fclose(fd);
2613 return aff; 2720 return aff;
2718 /* 2825 /*
2719 * Read all the lines in the file one by one. 2826 * Read all the lines in the file one by one.
2720 * The words are converted to 'encoding' here, before being added to 2827 * The words are converted to 'encoding' here, before being added to
2721 * the hashtable. 2828 * the hashtable.
2722 */ 2829 */
2723 while (!vim_fgets(line, MAXLINELEN, fd)) 2830 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
2724 { 2831 {
2832 line_breakcheck();
2725 ++lnum; 2833 ++lnum;
2726 2834
2727 /* Remove CR, LF and white space from end. */ 2835 /* Remove CR, LF and white space from end. */
2728 l = STRLEN(line); 2836 l = STRLEN(line);
2729 while (l > 0 && line[l - 1] <= ' ') 2837 while (l > 0 && line[l - 1] <= ' ')
2743 2851
2744 /* Convert from "SET" to 'encoding' when needed. */ 2852 /* Convert from "SET" to 'encoding' when needed. */
2745 if (conv->vc_type != CONV_NONE) 2853 if (conv->vc_type != CONV_NONE)
2746 { 2854 {
2747 pc = string_convert(conv, line, NULL); 2855 pc = string_convert(conv, line, NULL);
2856 if (pc == NULL)
2857 {
2858 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2859 fname, lnum, line);
2860 continue;
2861 }
2748 w = pc; 2862 w = pc;
2749 } 2863 }
2750 else 2864 else
2751 { 2865 {
2752 pc = NULL; 2866 pc = NULL;
2754 } 2868 }
2755 2869
2756 dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T) 2870 dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T)
2757 + STRLEN(w)); 2871 + STRLEN(w));
2758 if (dw == NULL) 2872 if (dw == NULL)
2873 {
2874 vim_free(pc);
2759 break; 2875 break;
2876 }
2760 STRCPY(dw->dw_word, w); 2877 STRCPY(dw->dw_word, w);
2761 vim_free(pc); 2878 vim_free(pc);
2762 2879
2763 hash = hash_hash(dw->dw_word); 2880 hash = hash_hash(dw->dw_word);
2764 hi = hash_lookup(ht, dw->dw_word, hash); 2881 hi = hash_lookup(ht, dw->dw_word, hash);
3134 hashtab_T *newwords; /* table with words */ 3251 hashtab_T *newwords; /* table with words */
3135 { 3252 {
3136 char_u key[2]; 3253 char_u key[2];
3137 char_u *p; 3254 char_u *p;
3138 char_u *affnm; 3255 char_u *affnm;
3139 garray_T *gap; 3256 garray_T *gap, *agap;
3140 hashitem_T *aff_hi; 3257 hashitem_T *aff_hi;
3141 affheader_T *ah; 3258 affheader_T *ah;
3142 affentry_T *ae; 3259 affentry_T *ae;
3143 regmatch_T regmatch; 3260 regmatch_T regmatch;
3144 int i; 3261 int i;
3145 basicword_T *nbw; 3262 basicword_T *nbw;
3146 int alen; 3263 int alen;
3147 int wlen;
3148 garray_T suffixga; /* list of words with non-word suffixes */ 3264 garray_T suffixga; /* list of words with non-word suffixes */
3149 garray_T prefixga; /* list of words with non-word prefixes */ 3265 garray_T prefixga; /* list of words with non-word prefixes */
3150 char_u nword[MAXWLEN]; 3266 char_u nword[MAXWLEN];
3151 int flags; 3267 int flags;
3152 int n; 3268 int n;
3177 /* Loop over all the affix entries for this affix name. */ 3293 /* Loop over all the affix entries for this affix name. */
3178 ah = HI2AH(aff_hi); 3294 ah = HI2AH(aff_hi);
3179 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) 3295 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3180 { 3296 {
3181 /* Setup for regexp matching. Note that we don't ignore case. 3297 /* Setup for regexp matching. Note that we don't ignore case.
3182 * This is weird, because he rules in an .aff file don't care 3298 * This is weird, because the rules in an .aff file don't care
3183 * about case, but it's necessary for compatibility with Myspell. 3299 * about case, but it's necessary for compatibility with Myspell.
3184 */ 3300 */
3185 regmatch.regprog = ae->ae_prog; 3301 regmatch.regprog = ae->ae_prog;
3186 regmatch.rm_ic = FALSE; 3302 regmatch.rm_ic = FALSE;
3187 if (ae->ae_prog == NULL 3303 if (ae->ae_prog == NULL
3188 || vim_regexec(&regmatch, dw->dw_word, (colnr_T)0)) 3304 || vim_regexec(&regmatch, dw->dw_word, (colnr_T)0))
3189 { 3305 {
3190 if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL) 3306 if ((ae->ae_add_nw != NULL || ae->ae_add_pw != NULL)
3191 && (gap != &bw->bw_suffix || bw->bw_addstring == NULL)) 3307 && (gap != &bw->bw_suffix || bw->bw_addstring == NULL))
3192 { 3308 {
3193 /* Affix has a non-word character and isn't prepended to 3309 /*
3310 * Affix has a non-word character and isn't prepended to
3194 * leader or appended to addition. Need to use another 3311 * leader or appended to addition. Need to use another
3195 * word with an addition. It's a copy of the basicword_T 3312 * word with a leadstring and/or addstring.
3196 * "bw". */ 3313 */
3197 if (gap == &bw->bw_suffix) 3314 if (gap == &bw->bw_suffix || ae->ae_add_nw == NULL)
3198 { 3315 {
3199 alen = ae->ae_add_nw - ae->ae_add; 3316 /* Suffix or prefix with only non-word chars.
3200 nbw = (basicword_T *)alloc((unsigned)( 3317 * Build the new basic word in "nword": Remove chop
3201 sizeof(basicword_T) + STRLEN(bw->bw_word) 3318 * string and append/prepend addition. */
3202 + alen + 1)); 3319 if (gap == &bw->bw_suffix)
3203 if (nbw != NULL)
3204 { 3320 {
3205 *nbw = *bw; 3321 /* suffix goes at the end of the word */
3206 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
3207 ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
3208
3209 /* Adding the suffix may change the caps. */
3210 STRCPY(nword, dw->dw_word); 3322 STRCPY(nword, dw->dw_word);
3211 if (ae->ae_chop != NULL) 3323 if (ae->ae_chop != NULL)
3212 { 3324 {
3213 /* Remove chop string. */ 3325 /* Remove chop string. */
3214 p = nword + STRLEN(nword); 3326 p = nword + STRLEN(nword);
3215 for (i = mb_charlen(ae->ae_chop); i > 0; --i) 3327 for (i = mb_charlen(ae->ae_chop); i > 0; --i)
3216 mb_ptr_back(nword, p); 3328 mb_ptr_back(nword, p);
3217 *p = NUL; 3329 *p = NUL;
3218 } 3330 }
3219 STRCAT(nword, ae->ae_add); 3331 STRCAT(nword, ae->ae_add);
3220 flags = captype(nword, nword + STRLEN(nword)); 3332 agap = &suffixga;
3221 if (flags & BWF_KEEPCAP)
3222 {
3223 /* "caseword" excludes the addition */
3224 nword[STRLEN(dw->dw_word) + alen] = NUL;
3225 nbw->bw_caseword = vim_strsave(nword);
3226 }
3227 nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP
3228 | BWF_KEEPCAP);
3229 nbw->bw_flags |= flags;
3230
3231 if (bw->bw_leadstring != NULL)
3232 nbw->bw_leadstring =
3233 vim_strsave(bw->bw_leadstring);
3234 nbw->bw_addstring = vim_strsave(ae->ae_add_nw);
3235
3236 STRCPY(nbw->bw_word, bw->bw_word);
3237 if (alen > 0 || ae->ae_chop != NULL)
3238 {
3239 /* Suffix starts with word character and/or
3240 * chop off something. Append it to the word.
3241 * Add new word entry. */
3242 wlen = STRLEN(nbw->bw_word);
3243 if (ae->ae_chop != NULL)
3244 wlen -= STRLEN(ae->ae_chop);
3245 mch_memmove(nbw->bw_word + wlen, ae->ae_add,
3246 alen);
3247 nbw->bw_word[wlen + alen] = NUL;
3248 add_to_wordlist(newwords, nbw);
3249 }
3250 else
3251 /* Basic word is the same, link "nbw" after
3252 * "bw". */
3253 bw->bw_next = nbw;
3254
3255 /* Remember this word, we need to set bw_prefix
3256 * and bw_prefix later. */
3257 if (ga_grow(&suffixga, 1) == OK)
3258 ((basicword_T **)suffixga.ga_data)
3259 [suffixga.ga_len++] = nbw;
3260 } 3333 }
3261 } 3334 else
3262 else if (ae->ae_add_nw == NULL)
3263 {
3264 /* Prefix that starts with non-word char(s) and may be
3265 * followed by word chars: Make a leadstring and
3266 * prepend word chars before the word. */
3267 alen = STRLEN(ae->ae_add_pw);
3268 nbw = (basicword_T *)alloc((unsigned)(
3269 sizeof(basicword_T) + STRLEN(bw->bw_word)
3270 + alen + 1));
3271 if (nbw != NULL)
3272 { 3335 {
3273 *nbw = *bw; 3336 /* prefix goes before the word */
3274 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
3275 ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
3276
3277 /* Adding the prefix may change the caps. */
3278 STRCPY(nword, ae->ae_add); 3337 STRCPY(nword, ae->ae_add);
3279 p = dw->dw_word; 3338 p = dw->dw_word;
3280 if (ae->ae_chop != NULL) 3339 if (ae->ae_chop != NULL)
3281 /* Skip chop string. */ 3340 /* Skip chop string. */
3282 for (i = mb_charlen(ae->ae_chop); i > 0; --i) 3341 for (i = mb_charlen(ae->ae_chop); i > 0; --i)
3283 mb_ptr_adv( p); 3342 mb_ptr_adv( p);
3284 STRCAT(nword, p); 3343 STRCAT(nword, p);
3285 3344 agap = &prefixga;
3286 flags = captype(nword, nword + STRLEN(nword)); 3345 }
3287 if (flags & BWF_KEEPCAP) 3346
3288 /* "caseword" excludes the addition */ 3347 /* Create a basicword_T from the word. */
3289 nbw->bw_caseword = vim_strsave(nword 3348 nbw = get_basicword(nword, 1);
3290 + (ae->ae_add_pw - ae->ae_add)); 3349 if (nbw != NULL)
3291 else 3350 {
3292 nbw->bw_caseword = NULL; 3351 nbw->bw_region = bw->bw_region;
3293 nbw->bw_flags &= ~(BWF_ONECAP | BWF_ALLCAP 3352 nbw->bw_flags |= bw->bw_flags
3294 | BWF_KEEPCAP); 3353 & ~(BWF_ONECAP | BWF_ALLCAP | BWF_KEEPCAP);
3295 nbw->bw_flags |= flags; 3354
3296 3355 if (STRCMP(bw->bw_word, nbw->bw_word) != 0)
3297 if (bw->bw_addstring != NULL) 3356 /* Basic word differs, add new word entry. */
3298 nbw->bw_addstring = 3357 (void)add_to_wordlist(newwords, nbw);
3299 vim_strsave(bw->bw_addstring);
3300 else
3301 nbw->bw_addstring = NULL;
3302 nbw->bw_leadstring = vim_strnsave(ae->ae_add,
3303 ae->ae_add_pw - ae->ae_add);
3304
3305 if (alen > 0 || ae->ae_chop != NULL)
3306 {
3307 /* Prefix ends in word character and/or chop
3308 * off something. Prepend it to the word.
3309 * Add new word entry. */
3310 STRCPY(nbw->bw_word, ae->ae_add_pw);
3311 p = bw->bw_word;
3312 if (ae->ae_chop != NULL)
3313 p += STRLEN(ae->ae_chop);
3314 STRCAT(nbw->bw_word, p);
3315 add_to_wordlist(newwords, nbw);
3316 }
3317 else 3358 else
3318 { 3359 {
3319 /* Basic word is the same, link "nbw" after 3360 /* Basic word is the same, link "nbw" after
3320 * "bw". */ 3361 * "bw". */
3321 STRCPY(nbw->bw_word, bw->bw_word); 3362 nbw->bw_next = bw->bw_next;
3322 bw->bw_next = nbw; 3363 bw->bw_next = nbw;
3323 } 3364 }
3324 3365
3325 /* Remember this word, we need to set bw_suffix 3366 /* Remember this word, we need to set bw_prefix
3326 * and bw_suffix later. */ 3367 * or bw_suffix later. */
3327 if (ga_grow(&prefixga, 1) == OK) 3368 if (ga_grow(agap, 1) == OK)
3328 ((basicword_T **)prefixga.ga_data) 3369 ((basicword_T **)agap->ga_data)
3329 [prefixga.ga_len++] = nbw; 3370 [agap->ga_len++] = nbw;
3330 } 3371 }
3331 } 3372 }
3332 else 3373 else
3333 { 3374 {
3334 /* Prefix with both non-word and word characters: Turn 3375 /* Prefix with both non-word and word characters: Turn
3343 #ifdef FEAT_MBYTE 3384 #ifdef FEAT_MBYTE
3344 n = (*mb_ptr2len_check)(p); 3385 n = (*mb_ptr2len_check)(p);
3345 #else 3386 #else
3346 n = 1; 3387 n = 1;
3347 #endif 3388 #endif
3348 (void)str_foldcase(p, n, nword + alen, 3389 (void)spell_casefold(p, n, nword + alen,
3349 MAXWLEN - alen); 3390 MAXWLEN - alen);
3350 alen += STRLEN(nword + alen); 3391 alen += STRLEN(nword + alen);
3351 } 3392 }
3352 3393
3353 /* Allocate a new word entry. */ 3394 /* Allocate a new word entry. */
3391 nbw->bw_leadstring = vim_strnsave(ae->ae_add, 3432 nbw->bw_leadstring = vim_strnsave(ae->ae_add,
3392 ae->ae_add_pw - ae->ae_add); 3433 ae->ae_add_pw - ae->ae_add);
3393 else 3434 else
3394 nbw->bw_leadstring = NULL; 3435 nbw->bw_leadstring = NULL;
3395 3436
3396 add_to_wordlist(newwords, nbw); 3437 (void)add_to_wordlist(newwords, nbw);
3397 3438
3398 /* Remember this word, we need to set bw_suffix 3439 /* Remember this word, we need to set bw_suffix
3399 * and bw_suffix later. */ 3440 * and bw_suffix later. */
3400 if (ga_grow(&prefixga, 1) == OK) 3441 if (ga_grow(&prefixga, 1) == OK)
3401 ((basicword_T **)prefixga.ga_data) 3442 ((basicword_T **)prefixga.ga_data)
3480 { 3521 {
3481 int todo; 3522 int todo;
3482 hashitem_T *old_hi; 3523 hashitem_T *old_hi;
3483 dicword_T *dw; 3524 dicword_T *dw;
3484 basicword_T *bw; 3525 basicword_T *bw;
3485 char_u foldword[MAXLINELEN];
3486 int leadlen;
3487 char_u leadstring[MAXLINELEN];
3488 int addlen;
3489 char_u addstring[MAXLINELEN];
3490 int dwlen;
3491 char_u *p;
3492 int clen;
3493 int flags;
3494 char_u *cp = NULL;
3495 int l;
3496 char_u message[MAXLINELEN + MAXWLEN]; 3526 char_u message[MAXLINELEN + MAXWLEN];
3497 3527
3498 todo = oldwords->ht_used; 3528 todo = oldwords->ht_used;
3499 for (old_hi = oldwords->ht_array; todo > 0; ++old_hi) 3529 for (old_hi = oldwords->ht_array; todo > 0; ++old_hi)
3500 { 3530 {
3517 ui_breakcheck(); 3547 ui_breakcheck();
3518 if (got_int) 3548 if (got_int)
3519 break; 3549 break;
3520 } 3550 }
3521 3551
3522 /* The basic words are always stored with folded case. */ 3552 bw = get_basicword(dw->dw_word, 10);
3523 dwlen = STRLEN(dw->dw_word);
3524 (void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN);
3525 flags = captype(dw->dw_word, dw->dw_word + dwlen);
3526
3527 /* Check for non-word characters before the word. */
3528 clen = 0;
3529 leadlen = 0;
3530 if (!spell_iswordc(foldword))
3531 {
3532 p = foldword;
3533 for (;;)
3534 {
3535 mb_ptr_adv(p);
3536 ++clen;
3537 if (*p == NUL) /* Only non-word chars (bad word!) */
3538 {
3539 if (p_verbose > 0)
3540 smsg((char_u *)_("Warning: word without word characters: \"%s\""),
3541 foldword);
3542 break;
3543 }
3544 if (spell_iswordc(p))
3545 {
3546 /* Move the leader to "leadstring" and remove it from
3547 * "foldword". */
3548 leadlen = p - foldword;
3549 mch_memmove(leadstring, foldword, leadlen);
3550 leadstring[leadlen] = NUL;
3551 mch_memmove(foldword, p, STRLEN(p) + 1);
3552 break;
3553 }
3554 }
3555 }
3556
3557 /* Check for non-word characters after word characters. */
3558 addlen = 0;
3559 for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
3560 {
3561 if (*p == NUL)
3562 break;
3563 ++clen;
3564 }
3565 if (*p != NUL)
3566 {
3567 /* Move the addition to "addstring" and truncate "foldword". */
3568 if (flags & BWF_KEEPCAP)
3569 {
3570 /* Preserve caps, need to skip the right number of
3571 * characters in the original word (case folding may
3572 * change the byte count). */
3573 l = 0;
3574 for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp))
3575 ++l;
3576 addlen = STRLEN(cp);
3577 mch_memmove(addstring, cp, addlen + 1);
3578 }
3579 else
3580 {
3581 addlen = STRLEN(p);
3582 mch_memmove(addstring, p, addlen + 1);
3583 }
3584 *p = NUL;
3585 }
3586
3587 bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
3588 + STRLEN(foldword));
3589 if (bw == NULL) 3553 if (bw == NULL)
3590 break; 3554 break;
3591 STRCPY(bw->bw_word, foldword);
3592 bw->bw_region = regionmask; 3555 bw->bw_region = regionmask;
3593 3556
3594 if (leadlen > 0) 3557 (void)add_to_wordlist(newwords, bw);
3595 bw->bw_leadstring = vim_strsave(leadstring);
3596 else
3597 bw->bw_leadstring = NULL;
3598 if (addlen > 0)
3599 bw->bw_addstring = vim_strsave(addstring);
3600 else
3601 bw->bw_addstring = NULL;
3602
3603 add_to_wordlist(newwords, bw);
3604
3605 if (flags & BWF_KEEPCAP)
3606 {
3607 if (addlen == 0)
3608 /* use the whole word */
3609 bw->bw_caseword = vim_strsave(dw->dw_word + leadlen);
3610 else
3611 /* use only up to the addition */
3612 bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen,
3613 cp - dw->dw_word - leadlen);
3614 if (bw->bw_caseword == NULL) /* out of memory */
3615 flags &= ~BWF_KEEPCAP;
3616 }
3617 bw->bw_flags = flags;
3618 3558
3619 /* Deal with any affix names on the old word, translate them 3559 /* Deal with any affix names on the old word, translate them
3620 * into affix numbers. */ 3560 * into affix numbers. */
3621 ga_init2(&bw->bw_prefix, sizeof(short_u), 10);
3622 ga_init2(&bw->bw_suffix, sizeof(short_u), 10);
3623 if (dw->dw_affnm != NULL) 3561 if (dw->dw_affnm != NULL)
3624 trans_affixes(dw, bw, oldaff, newwords); 3562 trans_affixes(dw, bw, oldaff, newwords);
3625 } 3563 }
3626 } 3564 }
3627 if (todo > 0) 3565 if (todo > 0)
3628 return FAIL; 3566 return FAIL;
3629 return OK; 3567 return OK;
3568 }
3569
3570 /*
3571 * Get a basicword_T from a word in original case.
3572 * Caller must set bw_region.
3573 * Returns NULL when something fails.
3574 */
3575 static basicword_T *
3576 get_basicword(word, asize)
3577 char_u *word;
3578 int asize; /* growsize for affix garray */
3579 {
3580 int dwlen;
3581 char_u foldword[MAXLINELEN];
3582 int flags;
3583 int clen;
3584 int leadlen;
3585 char_u *p;
3586 char_u leadstring[MAXLINELEN];
3587 int addlen;
3588 char_u addstring[MAXLINELEN];
3589 char_u *cp = NULL;
3590 int l;
3591 basicword_T *bw;
3592
3593 /* The basic words are always stored with folded case. */
3594 dwlen = STRLEN(word);
3595 (void)spell_casefold(word, dwlen, foldword, MAXLINELEN);
3596 flags = captype(word, word + dwlen);
3597
3598 /* Check for non-word characters before the word. */
3599 clen = 0;
3600 leadlen = 0;
3601 if (!spell_iswordc(foldword))
3602 {
3603 p = foldword;
3604 for (;;)
3605 {
3606 mb_ptr_adv(p);
3607 ++clen;
3608 if (*p == NUL) /* Only non-word chars (bad word!) */
3609 {
3610 if (p_verbose > 0)
3611 smsg((char_u *)_("Warning: word without word characters: \"%s\""),
3612 foldword);
3613 break;
3614 }
3615 if (spell_iswordc(p))
3616 {
3617 /* Move the leader to "leadstring" and remove it from
3618 * "foldword". */
3619 leadlen = p - foldword;
3620 mch_memmove(leadstring, foldword, leadlen);
3621 leadstring[leadlen] = NUL;
3622 mch_memmove(foldword, p, STRLEN(p) + 1);
3623 break;
3624 }
3625 }
3626 }
3627
3628 /* Check for non-word characters after word characters. */
3629 addlen = 0;
3630 for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
3631 {
3632 if (*p == NUL)
3633 break;
3634 ++clen;
3635 }
3636 if (*p != NUL)
3637 {
3638 /* Move the addition to "addstring" and truncate "foldword". */
3639 if (flags & BWF_KEEPCAP)
3640 {
3641 /* Preserve caps, need to skip the right number of
3642 * characters in the original word (case folding may
3643 * change the byte count). */
3644 l = 0;
3645 for (cp = word; l < clen; mb_ptr_adv(cp))
3646 ++l;
3647 addlen = STRLEN(cp);
3648 mch_memmove(addstring, cp, addlen + 1);
3649 }
3650 else
3651 {
3652 addlen = STRLEN(p);
3653 mch_memmove(addstring, p, addlen + 1);
3654 }
3655 *p = NUL;
3656 }
3657
3658 bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
3659 + STRLEN(foldword));
3660 if (bw == NULL)
3661 return NULL;
3662
3663 STRCPY(bw->bw_word, foldword);
3664
3665 if (leadlen > 0)
3666 bw->bw_leadstring = vim_strsave(leadstring);
3667 else
3668 bw->bw_leadstring = NULL;
3669 if (addlen > 0)
3670 bw->bw_addstring = vim_strsave(addstring);
3671 else
3672 bw->bw_addstring = NULL;
3673
3674 if (flags & BWF_KEEPCAP)
3675 {
3676 if (addlen == 0)
3677 /* use the whole word */
3678 bw->bw_caseword = vim_strsave(word + leadlen);
3679 else
3680 /* use only up to the addition */
3681 bw->bw_caseword = vim_strnsave(word + leadlen,
3682 cp - word - leadlen);
3683 }
3684
3685 bw->bw_flags = flags;
3686 ga_init2(&bw->bw_prefix, sizeof(short_u), asize);
3687 ga_init2(&bw->bw_suffix, sizeof(short_u), asize);
3688
3689 return bw;
3630 } 3690 }
3631 3691
3632 /* 3692 /*
3633 * Go through the list of words and combine the ones that are identical except 3693 * Go through the list of words and combine the ones that are identical except
3634 * for the region. 3694 * for the region.
3660 && (bw->bw_leadstring == NULL) 3720 && (bw->bw_leadstring == NULL)
3661 == (nbw->bw_leadstring == NULL) 3721 == (nbw->bw_leadstring == NULL)
3662 && (bw->bw_addstring == NULL) 3722 && (bw->bw_addstring == NULL)
3663 == (nbw->bw_addstring == NULL) 3723 == (nbw->bw_addstring == NULL)
3664 && ((bw->bw_flags & BWF_KEEPCAP) == 0 3724 && ((bw->bw_flags & BWF_KEEPCAP) == 0
3665 || (STRCMP(bw->bw_caseword, 3725 || bw->bw_caseword == NULL
3666 nbw->bw_caseword) == 0)) 3726 || nbw->bw_caseword == NULL
3727 || STRCMP(bw->bw_caseword,
3728 nbw->bw_caseword) == 0)
3667 && (bw->bw_leadstring == NULL 3729 && (bw->bw_leadstring == NULL
3668 || (STRCMP(bw->bw_leadstring, 3730 || STRCMP(bw->bw_leadstring,
3669 nbw->bw_leadstring) == 0)) 3731 nbw->bw_leadstring) == 0)
3670 && (bw->bw_addstring == NULL 3732 && (bw->bw_addstring == NULL
3671 || (STRCMP(bw->bw_addstring, 3733 || STRCMP(bw->bw_addstring,
3672 nbw->bw_addstring) == 0)) 3734 nbw->bw_addstring) == 0)
3673 && same_affixes(bw, nbw) 3735 && same_affixes(bw, nbw)
3674 ) 3736 )
3675 { 3737 {
3676 /* Match, combine regions and delete "nbw". */ 3738 /* Match, combine regions and delete "nbw". */
3677 pbw->bw_next = nbw->bw_next; 3739 pbw->bw_next = nbw->bw_next;
3714 * The result is that no affixes apply to the additions or leadstring of a 3776 * The result is that no affixes apply to the additions or leadstring of a
3715 * word. 3777 * word.
3716 * This is also needed when a word with an addition has a prefix and the word 3778 * This is also needed when a word with an addition has a prefix and the word
3717 * with prefix also exists. E.g., "blurp's/D" (D is prefix "de") and 3779 * with prefix also exists. E.g., "blurp's/D" (D is prefix "de") and
3718 * "deblurp". "deblurp" would match and no prefix would be tried. 3780 * "deblurp". "deblurp" would match and no prefix would be tried.
3719 */ 3781 *
3720 static void 3782 * Returns FAIL when out of memory.
3783 */
3784 static int
3721 expand_affixes(newwords, prefgap, suffgap) 3785 expand_affixes(newwords, prefgap, suffgap)
3722 hashtab_T *newwords; 3786 hashtab_T *newwords;
3723 garray_T *prefgap; 3787 garray_T *prefgap;
3724 garray_T *suffgap; 3788 garray_T *suffgap;
3725 { 3789 {
3729 int pi, si; 3793 int pi, si;
3730 affentry_T *pae, *sae; 3794 affentry_T *pae, *sae;
3731 garray_T add_words; 3795 garray_T add_words;
3732 int n; 3796 int n;
3733 char_u message[MAXLINELEN + MAXWLEN]; 3797 char_u message[MAXLINELEN + MAXWLEN];
3798 int retval = OK;
3734 3799
3735 ga_init2(&add_words, sizeof(basicword_T *), 10); 3800 ga_init2(&add_words, sizeof(basicword_T *), 10);
3736 3801
3737 todo = newwords->ht_used; 3802 todo = newwords->ht_used;
3738 for (hi = newwords->ht_array; todo > 0; ++hi) 3803 for (hi = newwords->ht_array; todo > 0; ++hi)
3804 */ 3869 */
3805 do 3870 do
3806 { 3871 {
3807 /* Expand the word for this combination of 3872 /* Expand the word for this combination of
3808 * prefixes and affixes. */ 3873 * prefixes and affixes. */
3809 expand_one_aff(bw, &add_words, pae, sae); 3874 if (expand_one_aff(bw, &add_words,
3875 pae, sae) == FAIL)
3876 {
3877 retval = FAIL;
3878 goto theend;
3879 }
3810 3880
3811 /* Advance to next suffix entry, if there 3881 /* Advance to next suffix entry, if there
3812 * is one. */ 3882 * is one. */
3813 if (sae != NULL) 3883 if (sae != NULL)
3814 sae = sae->ae_next; 3884 sae = sae->ae_next;
3829 /* 3899 /*
3830 * Add the new words afterwards, can't change "newwords" while going over 3900 * Add the new words afterwards, can't change "newwords" while going over
3831 * all its items. 3901 * all its items.
3832 */ 3902 */
3833 for (pi = 0; pi < add_words.ga_len; ++pi) 3903 for (pi = 0; pi < add_words.ga_len; ++pi)
3834 add_to_wordlist(newwords, ((basicword_T **)add_words.ga_data)[pi]); 3904 {
3835 3905 retval = add_to_wordlist(newwords,
3906 ((basicword_T **)add_words.ga_data)[pi]);
3907 if (retval == FAIL)
3908 break;
3909 }
3910
3911 theend:
3836 ga_clear(&add_words); 3912 ga_clear(&add_words);
3913 return retval;
3837 } 3914 }
3838 3915
3839 /* 3916 /*
3840 * Add one word to "add_words" for basic word "bw" with additions, adding 3917 * Add one word to "add_words" for basic word "bw" with additions, adding
3841 * prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL. 3918 * prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL.
3842 * Don't do this when not necessary: 3919 * Don't do this when not necessary:
3843 * - no leadstring and adding prefix doesn't result in existing word. 3920 * - no leadstring and adding prefix doesn't result in existing word.
3844 */ 3921 * Returns FAIL when out of memory.
3845 static void 3922 */
3923 static int
3846 expand_one_aff(bw, add_words, pae, sae) 3924 expand_one_aff(bw, add_words, pae, sae)
3847 basicword_T *bw; 3925 basicword_T *bw;
3848 garray_T *add_words; 3926 garray_T *add_words;
3849 affentry_T *pae; 3927 affentry_T *pae;
3850 affentry_T *sae; 3928 affentry_T *sae;
3871 3949
3872 /* Copy the body of the word. */ 3950 /* Copy the body of the word. */
3873 STRCPY(word + l, bw->bw_word + choplen); 3951 STRCPY(word + l, bw->bw_word + choplen);
3874 3952
3875 /* Do the same for bw_caseword, if it's there. */ 3953 /* Do the same for bw_caseword, if it's there. */
3876 if (bw->bw_flags & BWF_KEEPCAP) 3954 if ((bw->bw_flags & BWF_KEEPCAP) && bw->bw_caseword != NULL)
3877 { 3955 {
3878 if (l > 0) 3956 if (l > 0)
3879 mch_memmove(caseword, pae->ae_add, l); 3957 mch_memmove(caseword, pae->ae_add, l);
3880 STRCPY(caseword + l, bw->bw_caseword + choplen); 3958 STRCPY(caseword + l, bw->bw_caseword + choplen);
3881 } 3959 }
3905 } 3983 }
3906 } 3984 }
3907 3985
3908 nbw = (basicword_T *)alloc_clear((unsigned) 3986 nbw = (basicword_T *)alloc_clear((unsigned)
3909 sizeof(basicword_T) + STRLEN(word)); 3987 sizeof(basicword_T) + STRLEN(word));
3910 if (nbw != NULL) 3988 if (nbw == NULL)
3911 { 3989 return FAIL;
3912 /* Add the new word to the list of words to be added later. */ 3990
3913 if (ga_grow(add_words, 1) == FAIL) 3991 /* Add the new word to the list of words to be added later. */
3914 { 3992 if (ga_grow(add_words, 1) == FAIL)
3915 vim_free(nbw); 3993 {
3916 return; 3994 vim_free(nbw);
3917 } 3995 return FAIL;
3918 ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw; 3996 }
3919 3997 ((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw;
3920 /* Copy the (modified) basic word, flags and region. */ 3998
3921 STRCPY(nbw->bw_word, word); 3999 /* Copy the (modified) basic word, flags and region. */
3922 nbw->bw_flags = bw->bw_flags; 4000 STRCPY(nbw->bw_word, word);
3923 nbw->bw_region = bw->bw_region; 4001 nbw->bw_flags = bw->bw_flags;
3924 4002 nbw->bw_region = bw->bw_region;
3925 /* Set the (modified) caseword. */ 4003
3926 if (bw->bw_flags & BWF_KEEPCAP) 4004 /* Set the (modified) caseword. */
3927 if ((nbw->bw_caseword = vim_strsave(caseword)) == NULL) 4005 if (bw->bw_flags & BWF_KEEPCAP)
3928 nbw->bw_flags &= ~BWF_KEEPCAP; 4006 nbw->bw_caseword = vim_strsave(caseword);
3929 4007 else
3930 if (bw->bw_leadstring != NULL) 4008 nbw->bw_caseword = NULL;
3931 { 4009
3932 if (pae != NULL) 4010 if (bw->bw_leadstring != NULL)
3933 { 4011 {
3934 /* Prepend prefix to leadstring. */ 4012 if (pae != NULL)
3935 ll = STRLEN(bw->bw_leadstring); 4013 {
3936 l = choplen = 0; 4014 /* Prepend prefix to leadstring. */
3937 if (pae->ae_add != NULL) 4015 ll = STRLEN(bw->bw_leadstring);
3938 l = STRLEN(pae->ae_add); 4016 l = choplen = 0;
3939 if (pae->ae_chop != NULL) 4017 if (pae->ae_add != NULL)
3940 { 4018 l = STRLEN(pae->ae_add);
3941 choplen = STRLEN(pae->ae_chop); 4019 if (pae->ae_chop != NULL)
3942 if (choplen > ll) /* TODO: error? */ 4020 {
3943 choplen = ll; 4021 choplen = STRLEN(pae->ae_chop);
3944 } 4022 if (choplen > ll) /* TODO: error? */
3945 nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1)); 4023 choplen = ll;
3946 if (nbw->bw_leadstring != NULL) 4024 }
3947 { 4025 nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1));
3948 if (l > 0) 4026 if (nbw->bw_leadstring != NULL)
3949 mch_memmove(nbw->bw_leadstring, pae->ae_add, l); 4027 {
3950 STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen); 4028 if (l > 0)
3951 } 4029 mch_memmove(nbw->bw_leadstring, pae->ae_add, l);
3952 } 4030 STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen);
4031 }
4032 }
4033 else
4034 nbw->bw_leadstring = vim_strsave(bw->bw_leadstring);
4035 }
4036 else if (bw->bw_prefix.ga_len > 0)
4037 {
4038 /* There is no leadstring, copy the list of possible prefixes. */
4039 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
4040 if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
4041 {
4042 mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
4043 bw->bw_prefix.ga_len * sizeof(short_u));
4044 nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len;
4045 }
4046 }
4047
4048 if (bw->bw_addstring != NULL)
4049 {
4050 if (sae != NULL)
4051 {
4052 /* Append suffix to addstring. */
4053 l = STRLEN(bw->bw_addstring);
4054 if (sae->ae_chop != NULL)
4055 {
4056 l -= STRLEN(sae->ae_chop);
4057 if (l < 0) /* TODO: error? */
4058 l = 0;
4059 }
4060 if (sae->ae_add == NULL)
4061 ll = 0;
3953 else 4062 else
3954 nbw->bw_leadstring = vim_strsave(bw->bw_leadstring); 4063 ll = STRLEN(sae->ae_add);
3955 } 4064 nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1));
3956 else if (bw->bw_prefix.ga_len > 0) 4065 if (nbw->bw_addstring != NULL)
3957 { 4066 {
3958 /* There is no leadstring, copy the list of possible prefixes. */ 4067 STRCPY(nbw->bw_addstring, bw->bw_addstring);
3959 ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
3960 if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
3961 {
3962 mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
3963 bw->bw_prefix.ga_len * sizeof(short_u));
3964 nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len;
3965 }
3966 }
3967
3968 if (bw->bw_addstring != NULL)
3969 {
3970 if (sae != NULL)
3971 {
3972 /* Append suffix to addstring. */
3973 l = STRLEN(bw->bw_addstring);
3974 if (sae->ae_chop != NULL)
3975 {
3976 l -= STRLEN(sae->ae_chop);
3977 if (l < 0) /* TODO: error? */
3978 l = 0;
3979 }
3980 if (sae->ae_add == NULL) 4068 if (sae->ae_add == NULL)
3981 ll = 0; 4069 nbw->bw_addstring[l] = NUL;
3982 else 4070 else
3983 ll = STRLEN(sae->ae_add); 4071 STRCPY(nbw->bw_addstring + l, sae->ae_add);
3984 nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1)); 4072 }
3985 if (nbw->bw_addstring != NULL) 4073 }
3986 { 4074 else
3987 STRCPY(nbw->bw_addstring, bw->bw_addstring); 4075 nbw->bw_addstring = vim_strsave(bw->bw_addstring);
3988 if (sae->ae_add == NULL) 4076 }
3989 nbw->bw_addstring[l] = NUL; 4077
3990 else 4078 return OK;
3991 STRCPY(nbw->bw_addstring + l, sae->ae_add);
3992 }
3993 }
3994 else
3995 nbw->bw_addstring = vim_strsave(bw->bw_addstring);
3996 }
3997 }
3998 } 4079 }
3999 4080
4000 /* 4081 /*
4001 * Add basicword_T "*bw" to wordlist "newwords". 4082 * Add basicword_T "*bw" to wordlist "newwords".
4002 */ 4083 */
4003 static void 4084 static int
4004 add_to_wordlist(newwords, bw) 4085 add_to_wordlist(newwords, bw)
4005 hashtab_T *newwords; 4086 hashtab_T *newwords;
4006 basicword_T *bw; 4087 basicword_T *bw;
4007 { 4088 {
4008 hashitem_T *hi; 4089 hashitem_T *hi;
4009 basicword_T *bw2; 4090 basicword_T *bw2;
4091 int retval = OK;
4010 4092
4011 hi = hash_find(newwords, bw->bw_word); 4093 hi = hash_find(newwords, bw->bw_word);
4012 if (HASHITEM_EMPTY(hi)) 4094 if (HASHITEM_EMPTY(hi))
4013 { 4095 {
4014 /* New entry, add to hashlist. */ 4096 /* New entry, add to hashlist. */
4015 hash_add(newwords, bw->bw_word); 4097 retval = hash_add(newwords, bw->bw_word);
4016 bw->bw_next = NULL; 4098 bw->bw_next = NULL;
4017 } 4099 }
4018 else 4100 else
4019 { 4101 {
4020 /* Existing entry, append to list of basic words. */ 4102 /* Existing entry, append to list of basic words. */
4021 bw2 = HI2BW(hi); 4103 bw2 = HI2BW(hi);
4022 bw->bw_next = bw2->bw_next; 4104 bw->bw_next = bw2->bw_next;
4023 bw2->bw_next = bw; 4105 bw2->bw_next = bw;
4024 } 4106 }
4107 return retval;
4025 } 4108 }
4026 4109
4027 /* 4110 /*
4028 * Write a number to file "fd", MSB first, in "len" bytes. 4111 * Write a number to file "fd", MSB first, in "len" bytes.
4029 */ 4112 */
4030 static void 4113 void
4031 put_bytes(fd, nr, len) 4114 put_bytes(fd, nr, len)
4032 FILE *fd; 4115 FILE *fd;
4033 long_u nr; 4116 long_u nr;
4034 int len; 4117 int len;
4035 { 4118 {
4103 /* 4186 /*
4104 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> 4187 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST>
4105 * <SUGGEST> <WORDLIST> 4188 * <SUGGEST> <WORDLIST>
4106 * 4189 *
4107 * <HEADER>: <fileID> <regioncnt> <regionname> ... 4190 * <HEADER>: <fileID> <regioncnt> <regionname> ...
4191 * <charflagslen> <charflags> <fcharslen> <fchars>
4108 * 4192 *
4109 * <fileID> 10 bytes "VIMspell03" 4193 * <fileID> 10 bytes "VIMspell04"
4110 * <regioncnt> 1 byte number of regions following (8 supported) 4194 * <regioncnt> 1 byte number of regions following (8 supported)
4111 * <regionname> 2 bytes Region name: ca, au, etc. 4195 * <regionname> 2 bytes Region name: ca, au, etc.
4112 * First <regionname> is region 1. 4196 * First <regionname> is region 1.
4113 * 4197 *
4198 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
4199 * <charflags> N bytes List of flags (first one is for character 128):
4200 * 0x01 word character
4201 * 0x01 upper-case character
4202 * <fcharslen> 2 bytes Number of bytes in <fchars>.
4203 * <fchars> N bytes Folded characters, first one is for character 128.
4114 * 4204 *
4115 * <PREFIXLIST>: <affcount> <afftotcnt> <affix> ... 4205 *
4116 * <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... 4206 * <PREFIXLIST>: <affcount> <affix> ...
4207 * <SUFFIXLIST>: <affcount> <affix> ...
4117 * list of possible affixes: prefixes and suffixes. 4208 * list of possible affixes: prefixes and suffixes.
4118 * 4209 *
4119 * <affcount> 2 bytes Number of affixes (MSB comes first). 4210 * <affcount> 2 bytes Number of affixes (MSB comes first).
4120 * When more than 256 an affixNR is 2 bytes. 4211 * When more than 256 an affixNR is 2 bytes.
4121 * This is separate for prefixes and suffixes! 4212 * This is separate for prefixes and suffixes!
4122 * First affixNR is 0. 4213 * First affixNR is 0.
4123 * <afftotcnt> 2 bytes Total number of affix items (MSB comes first).
4124 * 4214 *
4125 * <affix>: <affitemcnt> <affitem> ... 4215 * <affix>: <affitemcnt> <affitem> ...
4126 * 4216 *
4127 * <affitemcnt> 2 bytes Number of affixes with this affixNR (MSB first). 4217 * <affitemcnt> 2 bytes Number of affixes with this affixNR (MSB first).
4128 * 4218 *
4226 char_u **wtab; 4316 char_u **wtab;
4227 int todo; 4317 int todo;
4228 int flags, aflags; 4318 int flags, aflags;
4229 basicword_T *bw, *bwf, *bw2 = NULL; 4319 basicword_T *bw, *bwf, *bw2 = NULL;
4230 int i; 4320 int i;
4231 int cnt;
4232 affentry_T *ae;
4233 int round; 4321 int round;
4234 garray_T bwga; 4322 garray_T bwga;
4235 4323
4236 vim_memset(&wif, 0, sizeof(winfo_T)); 4324 vim_memset(&wif, 0, sizeof(winfo_T));
4237 4325
4240 { 4328 {
4241 EMSG2(_(e_notopen), fname); 4329 EMSG2(_(e_notopen), fname);
4242 return; 4330 return;
4243 } 4331 }
4244 4332
4245 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); 4333 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
4334 * <charflagslen> <charflags> <fcharslen> <fchars> */
4335 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); /* <fileID> */
4246 4336
4247 /* write the region names if there is more than one */ 4337 /* write the region names if there is more than one */
4248 if (regcount > 1) 4338 if (regcount > 1)
4249 { 4339 {
4250 putc(regcount, wif.wif_fd); 4340 putc(regcount, wif.wif_fd); /* <regioncnt> <regionname> ... */
4251 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd); 4341 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd);
4252 wif.wif_regionmask = (1 << regcount) - 1; 4342 wif.wif_regionmask = (1 << regcount) - 1;
4253 } 4343 }
4254 else 4344 else
4255 { 4345 {
4256 putc(0, wif.wif_fd); 4346 putc(0, wif.wif_fd);
4257 wif.wif_regionmask = 0; 4347 wif.wif_regionmask = 0;
4258 } 4348 }
4259 4349
4260 /* Write the prefix and suffix lists. */ 4350 /* Write the table with character flags and table for case folding.
4351 * <charflagslen> <charflags> <fcharlen> <fchars> */
4352 write_spell_chartab(wif.wif_fd);
4353
4354 /* <PREFIXLIST>: <affcount> <affix> ...
4355 * <SUFFIXLIST>: <affcount> <affix> ... */
4261 for (round = 1; round <= 2; ++round) 4356 for (round = 1; round <= 2; ++round)
4262 { 4357 {
4263 gap = round == 1 ? prefga : suffga; 4358 gap = round == 1 ? prefga : suffga;
4264 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */ 4359 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */
4265
4266 /* Count the total number of affix items. */
4267 cnt = 0;
4268 for (i = 0; i < gap->ga_len; ++i)
4269 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first;
4270 ae != NULL; ae = ae->ae_next)
4271 ++cnt;
4272 put_bytes(wif.wif_fd, (long_u)cnt, 2); /* <afftotcnt> */
4273 4360
4274 for (i = 0; i < gap->ga_len; ++i) 4361 for (i = 0; i < gap->ga_len; ++i)
4275 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i); 4362 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i);
4276 } 4363 }
4277 4364
4278 /* Number of bytes used for affix NR depends on affix count. */ 4365 /* Number of bytes used for affix NR depends on affix count. */
4279 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1; 4366 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1;
4280 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1; 4367 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1;
4281 4368
4282 /* Write the suggest info. TODO */ 4369 /* <SUGGEST> : <suggestlen> <more> ...
4283 put_bytes(wif.wif_fd, 0L, 4); 4370 * TODO. Only write a zero length for now. */
4371 put_bytes(wif.wif_fd, 0L, 4); /* <suggestlen> */
4284 4372
4285 /* 4373 /*
4286 * Write the word list. <wordcount> <worditem> ... 4374 * <WORDLIST>: <wordcount> <worditem> ...
4287 */ 4375 */
4376
4288 /* number of basic words in 4 bytes */ 4377 /* number of basic words in 4 bytes */
4289 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */ 4378 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */
4290 4379
4291 /* 4380 /*
4292 * Sort the word list, so that we can copy as many bytes as possible from 4381 * Sort the word list, so that we can copy as many bytes as possible from
4331 bw2 = ((basicword_T **)bwga.ga_data)[i]; 4420 bw2 = ((basicword_T **)bwga.ga_data)[i];
4332 aflags = bw2->bw_flags & (BWF_ONECAP | BWF_KEEPCAP 4421 aflags = bw2->bw_flags & (BWF_ONECAP | BWF_KEEPCAP
4333 | BWF_ALLCAP); 4422 | BWF_ALLCAP);
4334 if (flags == aflags 4423 if (flags == aflags
4335 && ((flags & BWF_KEEPCAP) == 0 4424 && ((flags & BWF_KEEPCAP) == 0
4336 || (STRCMP(bw->bw_caseword, 4425 || bw->bw_caseword == NULL
4337 bw2->bw_caseword) == 0)) 4426 || bw2->bw_caseword == NULL
4427 || STRCMP(bw->bw_caseword,
4428 bw2->bw_caseword) == 0)
4338 && same_affixes(bw, bw2)) 4429 && same_affixes(bw, bw2))
4339 break; 4430 break;
4340 } 4431 }
4341 if (i == bwga.ga_len) 4432 if (i == bwga.ga_len)
4342 { 4433 {
4383 write_bword(&wif, bw2, FALSE); 4474 write_bword(&wif, bw2, FALSE);
4384 } 4475 }
4385 } 4476 }
4386 4477
4387 ga_clear(&bwga); 4478 ga_clear(&bwga);
4479 vim_free(wtab);
4388 } 4480 }
4389 4481
4390 fclose(wif.wif_fd); 4482 fclose(wif.wif_fd);
4391 4483
4392 /* Print a few statistics. */ 4484 /* Print a few statistics. */
4546 4638
4547 /* First dummy word doesn't need anything but flags. */ 4639 /* First dummy word doesn't need anything but flags. */
4548 if (lowcap) 4640 if (lowcap)
4549 return; 4641 return;
4550 4642
4551 if (flags & BWF_KEEPCAP) 4643 if ((flags & BWF_KEEPCAP) && bw->bw_caseword != NULL)
4552 { 4644 {
4553 len = STRLEN(bw->bw_caseword); 4645 len = STRLEN(bw->bw_caseword);
4554 putc(len, fd); /* <caselen> */ 4646 putc(len, fd); /* <caselen> */
4555 for (i = 0; i < len; ++i) 4647 for (i = 0; i < len; ++i)
4556 putc(bw->bw_caseword[i], fd); /* <caseword> */ 4648 putc(bw->bw_caseword[i], fd); /* <caseword> */
4682 if (aflags & ADD_REGION) 4774 if (aflags & ADD_REGION)
4683 putc(bw->bw_region, fd); /* <region> */ 4775 putc(bw->bw_region, fd); /* <region> */
4684 4776
4685 bw2 = bw; 4777 bw2 = bw;
4686 } 4778 }
4779
4687 vim_free(wtab); 4780 vim_free(wtab);
4688 } 4781 }
4689 } 4782 }
4690 4783
4691 4784
4708 struct stat st; 4801 struct stat st;
4709 int round; 4802 int round;
4710 vimconv_T conv; 4803 vimconv_T conv;
4711 int ascii = FALSE; 4804 int ascii = FALSE;
4712 char_u *arg = eap->arg; 4805 char_u *arg = eap->arg;
4806 int error = FALSE;
4713 4807
4714 if (STRNCMP(arg, "-ascii", 6) == 0) 4808 if (STRNCMP(arg, "-ascii", 6) == 0)
4715 { 4809 {
4716 ascii = TRUE; 4810 ascii = TRUE;
4717 arg = skipwhite(arg + 6); 4811 arg = skipwhite(arg + 6);
4764 TOLOWER_ASC(fnames[i][len - 1]); 4858 TOLOWER_ASC(fnames[i][len - 1]);
4765 } 4859 }
4766 } 4860 }
4767 } 4861 }
4768 4862
4863 /* Clear the char type tables, don't want to use any of the currently
4864 * used spell properties. */
4865 init_spell_chartab();
4866
4769 /* 4867 /*
4770 * Read all the .aff and .dic files. 4868 * Read all the .aff and .dic files.
4771 * Text is converted to 'encoding'. 4869 * Text is converted to 'encoding'.
4772 */ 4870 */
4773 for (i = 1; i < fcount; ++i) 4871 for (i = 1; i < fcount; ++i)
4844 * inefficient searching. Turn the affixes into additions and/or 4942 * inefficient searching. Turn the affixes into additions and/or
4845 * the expanded word. 4943 * the expanded word.
4846 */ 4944 */
4847 MSG(_("Processing words...")); 4945 MSG(_("Processing words..."));
4848 out_flush(); 4946 out_flush();
4849 expand_affixes(&newwords, &prefga, &suffga); 4947 error = expand_affixes(&newwords, &prefga, &suffga) == FAIL;
4850 4948
4851 /* Write the info in the spell file. */ 4949 if (!error)
4852 smsg((char_u *)_("Writing spell file %s..."), wfname); 4950 {
4853 out_flush(); 4951 /* Write the info in the spell file. */
4854 write_vim_spell(wfname, &prefga, &suffga, &newwords, 4952 smsg((char_u *)_("Writing spell file %s..."), wfname);
4953 out_flush();
4954 write_vim_spell(wfname, &prefga, &suffga, &newwords,
4855 fcount - 1, region_name); 4955 fcount - 1, region_name);
4856 MSG(_("Done!")); 4956 MSG(_("Done!"));
4857 out_flush(); 4957 out_flush();
4958 }
4858 4959
4859 /* Free the allocated stuff. */ 4960 /* Free the allocated stuff. */
4860 free_wordtable(&newwords); 4961 free_wordtable(&newwords);
4861 for (round = 1; round <= 2; ++round) 4962 for (round = 1; round <= 2; ++round)
4862 { 4963 {