comparison src/spell.c @ 9583:b0c7061d6439 v7.4.2069

commit https://github.com/vim/vim/commit/9ccfebddc3ff2a3c2853cf706fd4c26f639bf381 Author: Bram Moolenaar <Bram@vim.org> Date: Tue Jul 19 16:39:08 2016 +0200 patch 7.4.2069 Problem: spell.c is too big. Solution: Split it in spell file handling and spell checking.
author Christian Brabandt <cb@256bit.org>
date Tue, 19 Jul 2016 16:45:06 +0200
parents 695186e11daa
children fd9727ae3c49
comparison
equal deleted inserted replaced
9582:96737caf272d 9583:b0c7061d6439
7 * See README.txt for an overview of the Vim source code. 7 * See README.txt for an overview of the Vim source code.
8 */ 8 */
9 9
10 /* 10 /*
11 * spell.c: code for spell checking 11 * spell.c: code for spell checking
12 *
13 * See spellfile.c for the Vim spell file format.
12 * 14 *
13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree 15 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a 16 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child). 17 * pointer to the node with the byte that follows in the word (child).
16 * 18 *
51 * 53 *
52 * Why doesn't Vim use aspell/ispell/myspell/etc.? 54 * Why doesn't Vim use aspell/ispell/myspell/etc.?
53 * See ":help develop-spell". 55 * See ":help develop-spell".
54 */ 56 */
55 57
56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
57 * Only use it for small word lists! */
58 #if 0
59 # define SPELL_PRINTTREE
60 #endif
61
62 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after
63 * adding a word. Only use it for small word lists! */
64 #if 0
65 # define SPELL_COMPRESS_ALLWAYS
66 #endif
67
68 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
69 * specific word. */
70 #if 0
71 # define DEBUG_TRIEWALK
72 #endif
73
74 /* 58 /*
75 * Use this to adjust the score after finding suggestions, based on the 59 * Use this to adjust the score after finding suggestions, based on the
76 * suggested word sounding like the bad word. This is much faster than doing 60 * suggested word sounding like the bad word. This is much faster than doing
77 * it for every possible suggestion. 61 * it for every possible suggestion.
78 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" 62 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
85 * Do the opposite: based on a maximum end score and a known sound score, 69 * Do the opposite: based on a maximum end score and a known sound score,
86 * compute the maximum word score that can be used. 70 * compute the maximum word score that can be used.
87 */ 71 */
88 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) 72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
89 73
90 /* 74 #define IN_SPELL_C
91 * Vim spell file format: <HEADER>
92 * <SECTIONS>
93 * <LWORDTREE>
94 * <KWORDTREE>
95 * <PREFIXTREE>
96 *
97 * <HEADER>: <fileID> <versionnr>
98 *
99 * <fileID> 8 bytes "VIMspell"
100 * <versionnr> 1 byte VIMSPELLVERSION
101 *
102 *
103 * Sections make it possible to add information to the .spl file without
104 * making it incompatible with previous versions. There are two kinds of
105 * sections:
106 * 1. Not essential for correct spell checking. E.g. for making suggestions.
107 * These are skipped when not supported.
108 * 2. Optional information, but essential for spell checking when present.
109 * E.g. conditions for affixes. When this section is present but not
110 * supported an error message is given.
111 *
112 * <SECTIONS>: <section> ... <sectionend>
113 *
114 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
115 *
116 * <sectionID> 1 byte number from 0 to 254 identifying the section
117 *
118 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
119 * spell checking
120 *
121 * <sectionlen> 4 bytes length of section contents, MSB first
122 *
123 * <sectionend> 1 byte SN_END
124 *
125 *
126 * sectionID == SN_INFO: <infotext>
127 * <infotext> N bytes free format text with spell file info (version,
128 * website, etc)
129 *
130 * sectionID == SN_REGION: <regionname> ...
131 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
132 * First <regionname> is region 1.
133 *
134 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
135 * <folcharslen> <folchars>
136 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
137 * <charflags> N bytes List of flags (first one is for character 128):
138 * 0x01 word character CF_WORD
139 * 0x02 upper-case character CF_UPPER
140 * <folcharslen> 2 bytes Number of bytes in <folchars>.
141 * <folchars> N bytes Folded characters, first one is for character 128.
142 *
143 * sectionID == SN_MIDWORD: <midword>
144 * <midword> N bytes Characters that are word characters only when used
145 * in the middle of a word.
146 *
147 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
148 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
149 * <prefcond> : <condlen> <condstr>
150 * <condlen> 1 byte Length of <condstr>.
151 * <condstr> N bytes Condition for the prefix.
152 *
153 * sectionID == SN_REP: <repcount> <rep> ...
154 * <repcount> 2 bytes number of <rep> items, MSB first.
155 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
156 * <repfromlen> 1 byte length of <repfrom>
157 * <repfrom> N bytes "from" part of replacement
158 * <reptolen> 1 byte length of <repto>
159 * <repto> N bytes "to" part of replacement
160 *
161 * sectionID == SN_REPSAL: <repcount> <rep> ...
162 * just like SN_REP but for soundfolded words
163 *
164 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
165 * <salflags> 1 byte flags for soundsalike conversion:
166 * SAL_F0LLOWUP
167 * SAL_COLLAPSE
168 * SAL_REM_ACCENTS
169 * <salcount> 2 bytes number of <sal> items following
170 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
171 * <salfromlen> 1 byte length of <salfrom>
172 * <salfrom> N bytes "from" part of soundsalike
173 * <saltolen> 1 byte length of <salto>
174 * <salto> N bytes "to" part of soundsalike
175 *
176 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
177 * <sofofromlen> 2 bytes length of <sofofrom>
178 * <sofofrom> N bytes "from" part of soundfold
179 * <sofotolen> 2 bytes length of <sofoto>
180 * <sofoto> N bytes "to" part of soundfold
181 *
182 * sectionID == SN_SUGFILE: <timestamp>
183 * <timestamp> 8 bytes time in seconds that must match with .sug file
184 *
185 * sectionID == SN_NOSPLITSUGS: nothing
186 *
187 * sectionID == SN_NOCOMPOUNDSUGS: nothing
188 *
189 * sectionID == SN_WORDS: <word> ...
190 * <word> N bytes NUL terminated common word
191 *
192 * sectionID == SN_MAP: <mapstr>
193 * <mapstr> N bytes String with sequences of similar characters,
194 * separated by slashes.
195 *
196 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
197 * <comppatcount> <comppattern> ... <compflags>
198 * <compmax> 1 byte Maximum nr of words in compound word.
199 * <compminlen> 1 byte Minimal word length for compounding.
200 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
201 * <compoptions> 2 bytes COMP_ flags.
202 * <comppatcount> 2 bytes number of <comppattern> following
203 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
204 * slashes.
205 *
206 * <comppattern>: <comppatlen> <comppattext>
207 * <comppatlen> 1 byte length of <comppattext>
208 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
209 *
210 * sectionID == SN_NOBREAK: (empty, its presence is what matters)
211 *
212 * sectionID == SN_SYLLABLE: <syllable>
213 * <syllable> N bytes String from SYLLABLE item.
214 *
215 * <LWORDTREE>: <wordtree>
216 *
217 * <KWORDTREE>: <wordtree>
218 *
219 * <PREFIXTREE>: <wordtree>
220 *
221 *
222 * <wordtree>: <nodecount> <nodedata> ...
223 *
224 * <nodecount> 4 bytes Number of nodes following. MSB first.
225 *
226 * <nodedata>: <siblingcount> <sibling> ...
227 *
228 * <siblingcount> 1 byte Number of siblings in this node. The siblings
229 * follow in sorted order.
230 *
231 * <sibling>: <byte> [ <nodeidx> <xbyte>
232 * | <flags> [<flags2>] [<region>] [<affixID>]
233 * | [<pflags>] <affixID> <prefcondnr> ]
234 *
235 * <byte> 1 byte Byte value of the sibling. Special cases:
236 * BY_NOFLAGS: End of word without flags and for all
237 * regions.
238 * For PREFIXTREE <affixID> and
239 * <prefcondnr> follow.
240 * BY_FLAGS: End of word, <flags> follow.
241 * For PREFIXTREE <pflags>, <affixID>
242 * and <prefcondnr> follow.
243 * BY_FLAGS2: End of word, <flags> and <flags2>
244 * follow. Not used in PREFIXTREE.
245 * BY_INDEX: Child of sibling is shared, <nodeidx>
246 * and <xbyte> follow.
247 *
248 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
249 *
250 * <xbyte> 1 byte byte value of the sibling.
251 *
252 * <flags> 1 byte bitmask of:
253 * WF_ALLCAP word must have only capitals
254 * WF_ONECAP first char of word must be capital
255 * WF_KEEPCAP keep-case word
256 * WF_FIXCAP keep-case word, all caps not allowed
257 * WF_RARE rare word
258 * WF_BANNED bad word
259 * WF_REGION <region> follows
260 * WF_AFX <affixID> follows
261 *
262 * <flags2> 1 byte Bitmask of:
263 * WF_HAS_AFF >> 8 word includes affix
264 * WF_NEEDCOMP >> 8 word only valid in compound
265 * WF_NOSUGGEST >> 8 word not used for suggestions
266 * WF_COMPROOT >> 8 word already a compound
267 * WF_NOCOMPBEF >> 8 no compounding before this word
268 * WF_NOCOMPAFT >> 8 no compounding after this word
269 *
270 * <pflags> 1 byte bitmask of:
271 * WFP_RARE rare prefix
272 * WFP_NC non-combining prefix
273 * WFP_UP letter after prefix made upper case
274 *
275 * <region> 1 byte Bitmask for regions in which word is valid. When
276 * omitted it's valid in all regions.
277 * Lowest bit is for region 1.
278 *
279 * <affixID> 1 byte ID of affix that can be used with this word. In
280 * PREFIXTREE used for the required prefix ID.
281 *
282 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
283 * from HEADER.
284 *
285 * All text characters are in 'encoding', but stored as single bytes.
286 */
287
288 /*
289 * Vim .sug file format: <SUGHEADER>
290 * <SUGWORDTREE>
291 * <SUGTABLE>
292 *
293 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
294 *
295 * <fileID> 6 bytes "VIMsug"
296 * <versionnr> 1 byte VIMSUGVERSION
297 * <timestamp> 8 bytes timestamp that must match with .spl file
298 *
299 *
300 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
301 *
302 *
303 * <SUGTABLE>: <sugwcount> <sugline> ...
304 *
305 * <sugwcount> 4 bytes number of <sugline> following
306 *
307 * <sugline>: <sugnr> ... NUL
308 *
309 * <sugnr>: X bytes word number that results in this soundfolded word,
310 * stored as an offset to the previous number in as
311 * few bytes as possible, see offset2bytes())
312 */
313
314 #include "vim.h" 75 #include "vim.h"
315 76
316 #if defined(FEAT_SPELL) || defined(PROTO) 77 #if defined(FEAT_SPELL) || defined(PROTO)
317 78
318 #ifndef UNIX /* it's in os_unix.h for Unix */ 79 #ifndef UNIX /* it's in os_unix.h for Unix */
319 # include <time.h> /* for time_t */ 80 # include <time.h> /* for time_t */
320 #endif 81 #endif
321 82
322 #define MAXWLEN 254 /* Assume max. word len is this many bytes.
323 Some places assume a word length fits in a
324 byte, thus it can't be above 255.
325 Must be >= PFD_NOTSPECIAL. */
326
327 /* Type used for indexes in the word tree need to be at least 4 bytes. If int
328 * is 8 bytes we could use something smaller, but what? */
329 #if VIM_SIZEOF_INT > 3
330 typedef int idx_T;
331 #else
332 typedef long idx_T;
333 #endif
334
335 #ifdef VMS
336 # define SPL_FNAME_TMPL "%s_%s.spl"
337 # define SPL_FNAME_ADD "_add."
338 # define SPL_FNAME_ASCII "_ascii."
339 #else
340 # define SPL_FNAME_TMPL "%s.%s.spl"
341 # define SPL_FNAME_ADD ".add."
342 # define SPL_FNAME_ASCII ".ascii."
343 #endif
344
345 /* Flags used for a word. Only the lowest byte can be used, the region byte
346 * comes above it. */
347 #define WF_REGION 0x01 /* region byte follows */
348 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
349 #define WF_ALLCAP 0x04 /* word must be all capitals */
350 #define WF_RARE 0x08 /* rare word */
351 #define WF_BANNED 0x10 /* bad word */
352 #define WF_AFX 0x20 /* affix ID follows */
353 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */
354 #define WF_KEEPCAP 0x80 /* keep-case word */
355
356 /* for <flags2>, shifted up one byte to be used in wn_flags */
357 #define WF_HAS_AFF 0x0100 /* word includes affix */
358 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */
359 #define WF_NOSUGGEST 0x0400 /* word not to be suggested */
360 #define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */
361 #define WF_NOCOMPBEF 0x1000 /* no compounding before this word */
362 #define WF_NOCOMPAFT 0x2000 /* no compounding after this word */
363
364 /* only used for su_badflags */ 83 /* only used for su_badflags */
365 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ 84 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */
366 85
367 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) 86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
368 87
369 /* flags for <pflags> */
370 #define WFP_RARE 0x01 /* rare prefix */
371 #define WFP_NC 0x02 /* prefix is not combining */
372 #define WFP_UP 0x04 /* to-upper prefix */
373 #define WFP_COMPPERMIT 0x08 /* prefix with COMPOUNDPERMITFLAG */
374 #define WFP_COMPFORBID 0x10 /* prefix with COMPOUNDFORBIDFLAG */
375
376 /* Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one
377 * byte) and prefcondnr (two bytes). */
378 #define WF_RAREPFX (WFP_RARE << 24) /* rare postponed prefix */
379 #define WF_PFX_NC (WFP_NC << 24) /* non-combining postponed prefix */
380 #define WF_PFX_UP (WFP_UP << 24) /* to-upper postponed prefix */
381 #define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with
382 * COMPOUNDPERMITFLAG */
383 #define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with
384 * COMPOUNDFORBIDFLAG */
385
386
387 /* flags for <compoptions> */
388 #define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */
389 #define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */
390 #define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */
391 #define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */
392
393 /* Special byte values for <byte>. Some are only used in the tree for
394 * postponed prefixes, some only in the other trees. This is a bit messy... */
395 #define BY_NOFLAGS 0 /* end of word without flags or region; for
396 * postponed prefix: no <pflags> */
397 #define BY_INDEX 1 /* child is shared, index follows */
398 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for
399 * postponed prefix: <pflags> follows */
400 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
401 * follow; never used in prefix tree */
402 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
403
404 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
405 * si_repsal, sl_rep, and si_sal. Not for sl_sal!
406 * One replacement: from "ft_from" to "ft_to". */
407 typedef struct fromto_S
408 {
409 char_u *ft_from;
410 char_u *ft_to;
411 } fromto_T;
412
413 /* Info from "SAL" entries in ".aff" file used in sl_sal.
414 * The info is split for quick processing by spell_soundfold().
415 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
416 typedef struct salitem_S
417 {
418 char_u *sm_lead; /* leading letters */
419 int sm_leadlen; /* length of "sm_lead" */
420 char_u *sm_oneof; /* letters from () or NULL */
421 char_u *sm_rules; /* rules like ^, $, priority */
422 char_u *sm_to; /* replacement. */
423 #ifdef FEAT_MBYTE
424 int *sm_lead_w; /* wide character copy of "sm_lead" */
425 int *sm_oneof_w; /* wide character copy of "sm_oneof" */
426 int *sm_to_w; /* wide character copy of "sm_to" */
427 #endif
428 } salitem_T;
429
430 #ifdef FEAT_MBYTE
431 typedef int salfirst_T;
432 #else
433 typedef short salfirst_T;
434 #endif
435
436 /* Values for SP_*ERROR are negative, positive values are used by
437 * read_cnt_string(). */
438 #define SP_TRUNCERROR -1 /* spell file truncated error */
439 #define SP_FORMERROR -2 /* format error in spell file */
440 #define SP_OTHERERROR -3 /* other error while reading spell file */
441
442 /*
443 * Structure used to store words and other info for one language, loaded from
444 * a .spl file.
445 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
446 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
447 *
448 * The "byts" array stores the possible bytes in each tree node, preceded by
449 * the number of possible bytes, sorted on byte value:
450 * <len> <byte1> <byte2> ...
451 * The "idxs" array stores the index of the child node corresponding to the
452 * byte in "byts".
453 * Exception: when the byte is zero, the word may end here and "idxs" holds
454 * the flags, region mask and affixID for the word. There may be several
455 * zeros in sequence for alternative flag/region/affixID combinations.
456 */
457 typedef struct slang_S slang_T;
458 struct slang_S
459 {
460 slang_T *sl_next; /* next language */
461 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
462 char_u *sl_fname; /* name of .spl file */
463 int sl_add; /* TRUE if it's a .add file. */
464
465 char_u *sl_fbyts; /* case-folded word bytes */
466 idx_T *sl_fidxs; /* case-folded word indexes */
467 char_u *sl_kbyts; /* keep-case word bytes */
468 idx_T *sl_kidxs; /* keep-case word indexes */
469 char_u *sl_pbyts; /* prefix tree word bytes */
470 idx_T *sl_pidxs; /* prefix tree word indexes */
471
472 char_u *sl_info; /* infotext string or NULL */
473
474 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
475
476 char_u *sl_midword; /* MIDWORD string or NULL */
477
478 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */
479
480 int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */
481 int sl_compminlen; /* COMPOUNDMIN (default: 0) */
482 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
483 int sl_compoptions; /* COMP_* flags */
484 garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */
485 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm
486 * (NULL when no compounding) */
487 char_u *sl_comprules; /* all COMPOUNDRULE concatenated (or NULL) */
488 char_u *sl_compstartflags; /* flags for first compound word */
489 char_u *sl_compallflags; /* all flags for compound words */
490 char_u sl_nobreak; /* When TRUE: no spaces between words */
491 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
492 garray_T sl_syl_items; /* syllable items */
493
494 int sl_prefixcnt; /* number of items in "sl_prefprog" */
495 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
496
497 garray_T sl_rep; /* list of fromto_T entries from REP lines */
498 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
499 there is none */
500 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
501 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if
502 there is none */
503 int sl_followup; /* SAL followup */
504 int sl_collapse; /* SAL collapse_result */
505 int sl_rem_accents; /* SAL remove_accents */
506 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items:
507 * "sl_sal_first" maps chars, when has_mbyte
508 * "sl_sal" is a list of wide char lists. */
509 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */
510 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */
511 int sl_nosplitsugs; /* don't suggest splitting a word */
512 int sl_nocompoundsugs; /* don't suggest compounding */
513
514 /* Info from the .sug file. Loaded on demand. */
515 time_t sl_sugtime; /* timestamp for .sug file */
516 char_u *sl_sbyts; /* soundfolded word bytes */
517 idx_T *sl_sidxs; /* soundfolded word indexes */
518 buf_T *sl_sugbuf; /* buffer with word number table */
519 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to
520 load */
521
522 int sl_has_map; /* TRUE if there is a MAP line */
523 #ifdef FEAT_MBYTE
524 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
525 int sl_map_array[256]; /* MAP for first 256 chars */
526 #else
527 char_u sl_map_array[256]; /* MAP for first 256 chars */
528 #endif
529 hashtab_T sl_sounddone; /* table with soundfolded words that have
530 handled, see add_sound_suggest() */
531 };
532
533 /* First language that is loaded, start of the linked list of loaded
534 * languages. */
535 static slang_T *first_lang = NULL;
536
537 /* Flags used in .spl file for soundsalike flags. */
538 #define SAL_F0LLOWUP 1
539 #define SAL_COLLAPSE 2
540 #define SAL_REM_ACCENTS 4
541
542 /*
543 * Structure used in "b_langp", filled from 'spelllang'.
544 */
545 typedef struct langp_S
546 {
547 slang_T *lp_slang; /* info for this language */
548 slang_T *lp_sallang; /* language used for sound folding or NULL */
549 slang_T *lp_replang; /* language used for REP items or NULL */
550 int lp_region; /* bitmask for region or REGION_ALL */
551 } langp_T;
552
553 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
554
555 #define REGION_ALL 0xff /* word valid in all regions */ 88 #define REGION_ALL 0xff /* word valid in all regions */
556
557 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
558 #define VIMSPELLMAGICL 8
559 #define VIMSPELLVERSION 50
560 89
561 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ 90 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */
562 #define VIMSUGMAGICL 6 91 #define VIMSUGMAGICL 6
563 #define VIMSUGVERSION 1 92 #define VIMSUGVERSION 1
564
565 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
566 #define SN_REGION 0 /* <regionname> section */
567 #define SN_CHARFLAGS 1 /* charflags section */
568 #define SN_MIDWORD 2 /* <midword> section */
569 #define SN_PREFCOND 3 /* <prefcond> section */
570 #define SN_REP 4 /* REP items section */
571 #define SN_SAL 5 /* SAL items section */
572 #define SN_SOFO 6 /* soundfolding section */
573 #define SN_MAP 7 /* MAP items section */
574 #define SN_COMPOUND 8 /* compound words section */
575 #define SN_SYLLABLE 9 /* syllable section */
576 #define SN_NOBREAK 10 /* NOBREAK section */
577 #define SN_SUGFILE 11 /* timestamp for .sug file */
578 #define SN_REPSAL 12 /* REPSAL items section */
579 #define SN_WORDS 13 /* common words */
580 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
581 #define SN_INFO 15 /* info section */
582 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
583 #define SN_END 255 /* end of sections */
584
585 #define SNF_REQUIRED 1 /* <sectionflags>: required section */
586 93
587 /* Result values. Lower number is accepted over higher one. */ 94 /* Result values. Lower number is accepted over higher one. */
588 #define SP_BANNED -1 95 #define SP_BANNED -1
589 #define SP_OK 0 96 #define SP_OK 0
590 #define SP_RARE 1 97 #define SP_RARE 1
591 #define SP_LOCAL 2 98 #define SP_LOCAL 2
592 #define SP_BAD 3 99 #define SP_BAD 3
593
594 /* file used for "zG" and "zW" */
595 static char_u *int_wordlist = NULL;
596 100
597 typedef struct wordcount_S 101 typedef struct wordcount_S
598 { 102 {
599 short_u wc_count; /* nr of times word was seen */ 103 short_u wc_count; /* nr of times word was seen */
600 char_u wc_word[1]; /* word, actually longer */ 104 char_u wc_word[1]; /* word, actually longer */
741 /* for NOBREAK */ 245 /* for NOBREAK */
742 int mi_result2; /* "mi_resul" without following word */ 246 int mi_result2; /* "mi_resul" without following word */
743 char_u *mi_end2; /* "mi_end" without following word */ 247 char_u *mi_end2; /* "mi_end" without following word */
744 } matchinf_T; 248 } matchinf_T;
745 249
746 /* 250
747 * The tables used for recognizing word characters according to spelling.
748 * These are only used for the first 256 characters of 'encoding'.
749 */
750 typedef struct spelltab_S
751 {
752 char_u st_isw[256]; /* flags: is word char */
753 char_u st_isu[256]; /* flags: is uppercase char */
754 char_u st_fold[256]; /* chars: folded case */
755 char_u st_upper[256]; /* chars: upper case */
756 } spelltab_T;
757
758 static spelltab_T spelltab;
759 static int did_set_spelltab;
760
761 #define CF_WORD 0x01
762 #define CF_UPPER 0x02
763
764 static void clear_spell_chartab(spelltab_T *sp);
765 static int set_spell_finish(spelltab_T *new_st);
766 static int spell_iswordp(char_u *p, win_T *wp); 251 static int spell_iswordp(char_u *p, win_T *wp);
767 static int spell_iswordp_nmw(char_u *p, win_T *wp);
768 #ifdef FEAT_MBYTE 252 #ifdef FEAT_MBYTE
769 static int spell_mb_isword_class(int cl, win_T *wp); 253 static int spell_mb_isword_class(int cl, win_T *wp);
770 static int spell_iswordp_w(int *p, win_T *wp); 254 static int spell_iswordp_w(int *p, win_T *wp);
771 #endif 255 #endif
772 static int write_spell_prefcond(FILE *fd, garray_T *gap);
773 256
774 /* 257 /*
775 * For finding suggestions: At each node in the tree these states are tried: 258 * For finding suggestions: At each node in the tree these states are tried:
776 */ 259 */
777 typedef enum 260 typedef enum
849 #define FIND_KEEPWORD 1 /* find keep-case word */ 332 #define FIND_KEEPWORD 1 /* find keep-case word */
850 #define FIND_PREFIX 2 /* find word after prefix */ 333 #define FIND_PREFIX 2 /* find word after prefix */
851 #define FIND_COMPOUND 3 /* find case-folded compound word */ 334 #define FIND_COMPOUND 3 /* find case-folded compound word */
852 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ 335 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
853 336
854 static slang_T *slang_alloc(char_u *lang);
855 static void slang_free(slang_T *lp);
856 static void slang_clear(slang_T *lp);
857 static void slang_clear_sug(slang_T *lp);
858 static void find_word(matchinf_T *mip, int mode); 337 static void find_word(matchinf_T *mip, int mode);
859 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); 338 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap);
860 static int can_compound(slang_T *slang, char_u *word, char_u *flags); 339 static int can_compound(slang_T *slang, char_u *word, char_u *flags);
861 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); 340 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag);
862 static int match_compoundrule(slang_T *slang, char_u *compflags); 341 static int match_compoundrule(slang_T *slang, char_u *compflags);
864 static void find_prefix(matchinf_T *mip, int mode); 343 static void find_prefix(matchinf_T *mip, int mode);
865 static int fold_more(matchinf_T *mip); 344 static int fold_more(matchinf_T *mip);
866 static int spell_valid_case(int wordflags, int treeflags); 345 static int spell_valid_case(int wordflags, int treeflags);
867 static int no_spell_checking(win_T *wp); 346 static int no_spell_checking(win_T *wp);
868 static void spell_load_lang(char_u *lang); 347 static void spell_load_lang(char_u *lang);
869 static char_u *spell_enc(void);
870 static void int_wordlist_spl(char_u *fname); 348 static void int_wordlist_spl(char_u *fname);
871 static void spell_load_cb(char_u *fname, void *cookie); 349 static void spell_load_cb(char_u *fname, void *cookie);
872 static slang_T *spell_load_file(char_u *fname, char_u *lang, slang_T *old_lp, int silent);
873 static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *lenp);
874 static int read_region_section(FILE *fd, slang_T *slang, int len);
875 static int read_charflags_section(FILE *fd);
876 static int read_prefcond_section(FILE *fd, slang_T *lp);
877 static int read_rep_section(FILE *fd, garray_T *gap, short *first);
878 static int read_sal_section(FILE *fd, slang_T *slang);
879 static int read_words_section(FILE *fd, slang_T *lp, int len);
880 static void count_common_word(slang_T *lp, char_u *word, int len, int count);
881 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); 350 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split);
882 static int read_sofo_section(FILE *fd, slang_T *slang);
883 static int read_compound(FILE *fd, slang_T *slang, int len);
884 static int byte_in_str(char_u *str, int byte);
885 static int init_syl_tab(slang_T *slang);
886 static int count_syllables(slang_T *slang, char_u *word); 351 static int count_syllables(slang_T *slang, char_u *word);
887 static int set_sofo(slang_T *lp, char_u *from, char_u *to);
888 static void set_sal_first(slang_T *lp);
889 #ifdef FEAT_MBYTE
890 static int *mb_str2wide(char_u *s);
891 #endif
892 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt);
893 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
894 static void clear_midword(win_T *buf); 352 static void clear_midword(win_T *buf);
895 static void use_midword(slang_T *lp, win_T *buf); 353 static void use_midword(slang_T *lp, win_T *buf);
896 static int find_region(char_u *rp, char_u *region); 354 static int find_region(char_u *rp, char_u *region);
897 static int captype(char_u *word, char_u *end);
898 static int badword_captype(char_u *word, char_u *end); 355 static int badword_captype(char_u *word, char_u *end);
899 static void spell_reload_one(char_u *fname, int added_word);
900 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp);
901 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp);
902 static int spell_casefold(char_u *p, int len, char_u *buf, int buflen);
903 static int check_need_cap(linenr_T lnum, colnr_T col); 356 static int check_need_cap(linenr_T lnum, colnr_T col);
904 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); 357 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
905 #ifdef FEAT_EVAL 358 #ifdef FEAT_EVAL
906 static void spell_suggest_expr(suginfo_T *su, char_u *expr); 359 static void spell_suggest_expr(suginfo_T *su, char_u *expr);
907 #endif 360 #endif
908 static void spell_suggest_file(suginfo_T *su, char_u *fname); 361 static void spell_suggest_file(suginfo_T *su, char_u *fname);
909 static void spell_suggest_intern(suginfo_T *su, int interactive); 362 static void spell_suggest_intern(suginfo_T *su, int interactive);
910 static void suggest_load_files(void);
911 static void tree_count_words(char_u *byts, idx_T *idxs);
912 static void spell_find_cleanup(suginfo_T *su); 363 static void spell_find_cleanup(suginfo_T *su);
913 static void onecap_copy(char_u *word, char_u *wcopy, int upper);
914 static void allcap_copy(char_u *word, char_u *wcopy); 364 static void allcap_copy(char_u *word, char_u *wcopy);
915 static void suggest_try_special(suginfo_T *su); 365 static void suggest_try_special(suginfo_T *su);
916 static void suggest_try_change(suginfo_T *su); 366 static void suggest_try_change(suginfo_T *su);
917 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); 367 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
918 static void go_deeper(trystate_T *stack, int depth, int score_add); 368 static void go_deeper(trystate_T *stack, int depth, int score_add);
927 static void suggest_try_soundalike(suginfo_T *su); 377 static void suggest_try_soundalike(suginfo_T *su);
928 static void suggest_try_soundalike_finish(void); 378 static void suggest_try_soundalike_finish(void);
929 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); 379 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
930 static int soundfold_find(slang_T *slang, char_u *word); 380 static int soundfold_find(slang_T *slang, char_u *word);
931 static void make_case_word(char_u *fword, char_u *cword, int flags); 381 static void make_case_word(char_u *fword, char_u *cword, int flags);
932 static void set_map_str(slang_T *lp, char_u *map);
933 static int similar_chars(slang_T *slang, int c1, int c2); 382 static int similar_chars(slang_T *slang, int c1, int c2);
934 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); 383 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
935 static void check_suggestions(suginfo_T *su, garray_T *gap); 384 static void check_suggestions(suginfo_T *su, garray_T *gap);
936 static void add_banned(suginfo_T *su, char_u *word); 385 static void add_banned(suginfo_T *su, char_u *word);
937 static void rescore_suggestions(suginfo_T *su); 386 static void rescore_suggestions(suginfo_T *su);
938 static void rescore_one(suginfo_T *su, suggest_T *stp); 387 static void rescore_one(suginfo_T *su, suggest_T *stp);
939 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); 388 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
940 static void spell_soundfold(slang_T *slang, char_u *inword, int folded, char_u *res);
941 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); 389 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
942 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); 390 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
943 #ifdef FEAT_MBYTE 391 #ifdef FEAT_MBYTE
944 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); 392 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
945 #endif 393 #endif
949 #ifdef FEAT_MBYTE 397 #ifdef FEAT_MBYTE
950 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); 398 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
951 #endif 399 #endif
952 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); 400 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
953 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); 401 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
954 static buf_T *open_spellbuf(void); 402
955 static void close_spellbuf(buf_T *buf);
956
957 /*
958 * Use our own character-case definitions, because the current locale may
959 * differ from what the .spl file uses.
960 * These must not be called with negative number!
961 */
962 #ifndef FEAT_MBYTE
963 /* Non-multi-byte implementation. */
964 # define SPELL_TOFOLD(c) ((c) < 256 ? (int)spelltab.st_fold[c] : (c))
965 # define SPELL_TOUPPER(c) ((c) < 256 ? (int)spelltab.st_upper[c] : (c))
966 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
967 #else
968 # if defined(HAVE_WCHAR_H)
969 # include <wchar.h> /* for towupper() and towlower() */
970 # endif
971 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
972 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
973 * the "w" library function for characters above 255 if available. */
974 # ifdef HAVE_TOWLOWER
975 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
976 : (c) < 256 ? (int)spelltab.st_fold[c] : (int)towlower(c))
977 # else
978 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
979 : (c) < 256 ? (int)spelltab.st_fold[c] : (c))
980 # endif
981
982 # ifdef HAVE_TOWUPPER
983 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
984 : (c) < 256 ? (int)spelltab.st_upper[c] : (int)towupper(c))
985 # else
986 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
987 : (c) < 256 ? (int)spelltab.st_upper[c] : (c))
988 # endif
989
990 # ifdef HAVE_ISWUPPER
991 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
992 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
993 # else
994 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
995 : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
996 # endif
997 #endif
998
999
1000 static char *e_format = N_("E759: Format error in spell file");
1001 static char *e_spell_trunc = N_("E758: Truncated spell file");
1002 static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
1003 static char *e_affname = N_("Affix name too long in %s line %d: %s");
1004 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
1005 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
1006 static char *msg_compressing = N_("Compressing word tree...");
1007 403
1008 /* Remember what "z?" replaced. */ 404 /* Remember what "z?" replaced. */
1009 static char_u *repl_from = NULL; 405 static char_u *repl_from = NULL;
1010 static char_u *repl_to = NULL; 406 static char_u *repl_to = NULL;
1011 407
2526 1922
2527 /* 1923 /*
2528 * Return the encoding used for spell checking: Use 'encoding', except that we 1924 * Return the encoding used for spell checking: Use 'encoding', except that we
2529 * use "latin1" for "latin9". And limit to 60 characters (just in case). 1925 * use "latin1" for "latin9". And limit to 60 characters (just in case).
2530 */ 1926 */
2531 static char_u * 1927 char_u *
2532 spell_enc(void) 1928 spell_enc(void)
2533 { 1929 {
2534 1930
2535 #ifdef FEAT_MBYTE 1931 #ifdef FEAT_MBYTE
2536 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) 1932 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2552 1948
2553 /* 1949 /*
2554 * Allocate a new slang_T for language "lang". "lang" can be NULL. 1950 * Allocate a new slang_T for language "lang". "lang" can be NULL.
2555 * Caller must fill "sl_next". 1951 * Caller must fill "sl_next".
2556 */ 1952 */
2557 static slang_T * 1953 slang_T *
2558 slang_alloc(char_u *lang) 1954 slang_alloc(char_u *lang)
2559 { 1955 {
2560 slang_T *lp; 1956 slang_T *lp;
2561 1957
2562 lp = (slang_T *)alloc_clear(sizeof(slang_T)); 1958 lp = (slang_T *)alloc_clear(sizeof(slang_T));
2575 } 1971 }
2576 1972
2577 /* 1973 /*
2578 * Free the contents of an slang_T and the structure itself. 1974 * Free the contents of an slang_T and the structure itself.
2579 */ 1975 */
2580 static void 1976 void
2581 slang_free(slang_T *lp) 1977 slang_free(slang_T *lp)
2582 { 1978 {
2583 vim_free(lp->sl_name); 1979 vim_free(lp->sl_name);
2584 vim_free(lp->sl_fname); 1980 vim_free(lp->sl_fname);
2585 slang_clear(lp); 1981 slang_clear(lp);
2587 } 1983 }
2588 1984
2589 /* 1985 /*
2590 * Clear an slang_T so that the file can be reloaded. 1986 * Clear an slang_T so that the file can be reloaded.
2591 */ 1987 */
2592 static void 1988 void
2593 slang_clear(slang_T *lp) 1989 slang_clear(slang_T *lp)
2594 { 1990 {
2595 garray_T *gap; 1991 garray_T *gap;
2596 fromto_T *ftp; 1992 fromto_T *ftp;
2597 salitem_T *smp; 1993 salitem_T *smp;
2693 } 2089 }
2694 2090
2695 /* 2091 /*
2696 * Clear the info from the .sug file in "lp". 2092 * Clear the info from the .sug file in "lp".
2697 */ 2093 */
2698 static void 2094 void
2699 slang_clear_sug(slang_T *lp) 2095 slang_clear_sug(slang_T *lp)
2700 { 2096 {
2701 vim_free(lp->sl_sbyts); 2097 vim_free(lp->sl_sbyts);
2702 lp->sl_sbyts = NULL; 2098 lp->sl_sbyts = NULL;
2703 vim_free(lp->sl_sidxs); 2099 vim_free(lp->sl_sidxs);
2730 2126
2731 slp->sl_slang = slang; 2127 slp->sl_slang = slang;
2732 } 2128 }
2733 } 2129 }
2734 2130
2735 /*
2736 * Load one spell file and store the info into a slang_T.
2737 *
2738 * This is invoked in three ways:
2739 * - From spell_load_cb() to load a spell file for the first time. "lang" is
2740 * the language name, "old_lp" is NULL. Will allocate an slang_T.
2741 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
2742 * points to the existing slang_T.
2743 * - Just after writing a .spl file; it's read back to produce the .sug file.
2744 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
2745 *
2746 * Returns the slang_T the spell file was loaded into. NULL for error.
2747 */
2748 static slang_T *
2749 spell_load_file(
2750 char_u *fname,
2751 char_u *lang,
2752 slang_T *old_lp,
2753 int silent) /* no error if file doesn't exist */
2754 {
2755 FILE *fd;
2756 char_u buf[VIMSPELLMAGICL];
2757 char_u *p;
2758 int i;
2759 int n;
2760 int len;
2761 char_u *save_sourcing_name = sourcing_name;
2762 linenr_T save_sourcing_lnum = sourcing_lnum;
2763 slang_T *lp = NULL;
2764 int c = 0;
2765 int res;
2766
2767 fd = mch_fopen((char *)fname, "r");
2768 if (fd == NULL)
2769 {
2770 if (!silent)
2771 EMSG2(_(e_notopen), fname);
2772 else if (p_verbose > 2)
2773 {
2774 verbose_enter();
2775 smsg((char_u *)e_notopen, fname);
2776 verbose_leave();
2777 }
2778 goto endFAIL;
2779 }
2780 if (p_verbose > 2)
2781 {
2782 verbose_enter();
2783 smsg((char_u *)_("Reading spell file \"%s\""), fname);
2784 verbose_leave();
2785 }
2786
2787 if (old_lp == NULL)
2788 {
2789 lp = slang_alloc(lang);
2790 if (lp == NULL)
2791 goto endFAIL;
2792
2793 /* Remember the file name, used to reload the file when it's updated. */
2794 lp->sl_fname = vim_strsave(fname);
2795 if (lp->sl_fname == NULL)
2796 goto endFAIL;
2797
2798 /* Check for .add.spl (_add.spl for VMS). */
2799 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
2800 }
2801 else
2802 lp = old_lp;
2803
2804 /* Set sourcing_name, so that error messages mention the file name. */
2805 sourcing_name = fname;
2806 sourcing_lnum = 0;
2807
2808 /*
2809 * <HEADER>: <fileID>
2810 */
2811 for (i = 0; i < VIMSPELLMAGICL; ++i)
2812 buf[i] = getc(fd); /* <fileID> */
2813 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2814 {
2815 EMSG(_("E757: This does not look like a spell file"));
2816 goto endFAIL;
2817 }
2818 c = getc(fd); /* <versionnr> */
2819 if (c < VIMSPELLVERSION)
2820 {
2821 EMSG(_("E771: Old spell file, needs to be updated"));
2822 goto endFAIL;
2823 }
2824 else if (c > VIMSPELLVERSION)
2825 {
2826 EMSG(_("E772: Spell file is for newer version of Vim"));
2827 goto endFAIL;
2828 }
2829
2830
2831 /*
2832 * <SECTIONS>: <section> ... <sectionend>
2833 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2834 */
2835 for (;;)
2836 {
2837 n = getc(fd); /* <sectionID> or <sectionend> */
2838 if (n == SN_END)
2839 break;
2840 c = getc(fd); /* <sectionflags> */
2841 len = get4c(fd); /* <sectionlen> */
2842 if (len < 0)
2843 goto truncerr;
2844
2845 res = 0;
2846 switch (n)
2847 {
2848 case SN_INFO:
2849 lp->sl_info = read_string(fd, len); /* <infotext> */
2850 if (lp->sl_info == NULL)
2851 goto endFAIL;
2852 break;
2853
2854 case SN_REGION:
2855 res = read_region_section(fd, lp, len);
2856 break;
2857
2858 case SN_CHARFLAGS:
2859 res = read_charflags_section(fd);
2860 break;
2861
2862 case SN_MIDWORD:
2863 lp->sl_midword = read_string(fd, len); /* <midword> */
2864 if (lp->sl_midword == NULL)
2865 goto endFAIL;
2866 break;
2867
2868 case SN_PREFCOND:
2869 res = read_prefcond_section(fd, lp);
2870 break;
2871
2872 case SN_REP:
2873 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2874 break;
2875
2876 case SN_REPSAL:
2877 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
2878 break;
2879
2880 case SN_SAL:
2881 res = read_sal_section(fd, lp);
2882 break;
2883
2884 case SN_SOFO:
2885 res = read_sofo_section(fd, lp);
2886 break;
2887
2888 case SN_MAP:
2889 p = read_string(fd, len); /* <mapstr> */
2890 if (p == NULL)
2891 goto endFAIL;
2892 set_map_str(lp, p);
2893 vim_free(p);
2894 break;
2895
2896 case SN_WORDS:
2897 res = read_words_section(fd, lp, len);
2898 break;
2899
2900 case SN_SUGFILE:
2901 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
2902 break;
2903
2904 case SN_NOSPLITSUGS:
2905 lp->sl_nosplitsugs = TRUE;
2906 break;
2907
2908 case SN_NOCOMPOUNDSUGS:
2909 lp->sl_nocompoundsugs = TRUE;
2910 break;
2911
2912 case SN_COMPOUND:
2913 res = read_compound(fd, lp, len);
2914 break;
2915
2916 case SN_NOBREAK:
2917 lp->sl_nobreak = TRUE;
2918 break;
2919
2920 case SN_SYLLABLE:
2921 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2922 if (lp->sl_syllable == NULL)
2923 goto endFAIL;
2924 if (init_syl_tab(lp) == FAIL)
2925 goto endFAIL;
2926 break;
2927
2928 default:
2929 /* Unsupported section. When it's required give an error
2930 * message. When it's not required skip the contents. */
2931 if (c & SNF_REQUIRED)
2932 {
2933 EMSG(_("E770: Unsupported section in spell file"));
2934 goto endFAIL;
2935 }
2936 while (--len >= 0)
2937 if (getc(fd) < 0)
2938 goto truncerr;
2939 break;
2940 }
2941 someerror:
2942 if (res == SP_FORMERROR)
2943 {
2944 EMSG(_(e_format));
2945 goto endFAIL;
2946 }
2947 if (res == SP_TRUNCERROR)
2948 {
2949 truncerr:
2950 EMSG(_(e_spell_trunc));
2951 goto endFAIL;
2952 }
2953 if (res == SP_OTHERERROR)
2954 goto endFAIL;
2955 }
2956
2957 /* <LWORDTREE> */
2958 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2959 if (res != 0)
2960 goto someerror;
2961
2962 /* <KWORDTREE> */
2963 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2964 if (res != 0)
2965 goto someerror;
2966
2967 /* <PREFIXTREE> */
2968 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2969 lp->sl_prefixcnt);
2970 if (res != 0)
2971 goto someerror;
2972
2973 /* For a new file link it in the list of spell files. */
2974 if (old_lp == NULL && lang != NULL)
2975 {
2976 lp->sl_next = first_lang;
2977 first_lang = lp;
2978 }
2979
2980 goto endOK;
2981
2982 endFAIL:
2983 if (lang != NULL)
2984 /* truncating the name signals the error to spell_load_lang() */
2985 *lang = NUL;
2986 if (lp != NULL && old_lp == NULL)
2987 slang_free(lp);
2988 lp = NULL;
2989
2990 endOK:
2991 if (fd != NULL)
2992 fclose(fd);
2993 sourcing_name = save_sourcing_name;
2994 sourcing_lnum = save_sourcing_lnum;
2995
2996 return lp;
2997 }
2998
2999 /*
3000 * Read a length field from "fd" in "cnt_bytes" bytes.
3001 * Allocate memory, read the string into it and add a NUL at the end.
3002 * Returns NULL when the count is zero.
3003 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
3004 * otherwise.
3005 */
3006 static char_u *
3007 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
3008 {
3009 int cnt = 0;
3010 int i;
3011 char_u *str;
3012
3013 /* read the length bytes, MSB first */
3014 for (i = 0; i < cnt_bytes; ++i)
3015 cnt = (cnt << 8) + getc(fd);
3016 if (cnt < 0)
3017 {
3018 *cntp = SP_TRUNCERROR;
3019 return NULL;
3020 }
3021 *cntp = cnt;
3022 if (cnt == 0)
3023 return NULL; /* nothing to read, return NULL */
3024
3025 str = read_string(fd, cnt);
3026 if (str == NULL)
3027 *cntp = SP_OTHERERROR;
3028 return str;
3029 }
3030
3031 /*
3032 * Read SN_REGION: <regionname> ...
3033 * Return SP_*ERROR flags.
3034 */
3035 static int
3036 read_region_section(FILE *fd, slang_T *lp, int len)
3037 {
3038 int i;
3039
3040 if (len > 16)
3041 return SP_FORMERROR;
3042 for (i = 0; i < len; ++i)
3043 lp->sl_regions[i] = getc(fd); /* <regionname> */
3044 lp->sl_regions[len] = NUL;
3045 return 0;
3046 }
3047
3048 /*
3049 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
3050 * <folcharslen> <folchars>
3051 * Return SP_*ERROR flags.
3052 */
3053 static int
3054 read_charflags_section(FILE *fd)
3055 {
3056 char_u *flags;
3057 char_u *fol;
3058 int flagslen, follen;
3059
3060 /* <charflagslen> <charflags> */
3061 flags = read_cnt_string(fd, 1, &flagslen);
3062 if (flagslen < 0)
3063 return flagslen;
3064
3065 /* <folcharslen> <folchars> */
3066 fol = read_cnt_string(fd, 2, &follen);
3067 if (follen < 0)
3068 {
3069 vim_free(flags);
3070 return follen;
3071 }
3072
3073 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
3074 if (flags != NULL && fol != NULL)
3075 set_spell_charflags(flags, flagslen, fol);
3076
3077 vim_free(flags);
3078 vim_free(fol);
3079
3080 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
3081 if ((flags == NULL) != (fol == NULL))
3082 return SP_FORMERROR;
3083 return 0;
3084 }
3085
3086 /*
3087 * Read SN_PREFCOND section.
3088 * Return SP_*ERROR flags.
3089 */
3090 static int
3091 read_prefcond_section(FILE *fd, slang_T *lp)
3092 {
3093 int cnt;
3094 int i;
3095 int n;
3096 char_u *p;
3097 char_u buf[MAXWLEN + 1];
3098
3099 /* <prefcondcnt> <prefcond> ... */
3100 cnt = get2c(fd); /* <prefcondcnt> */
3101 if (cnt <= 0)
3102 return SP_FORMERROR;
3103
3104 lp->sl_prefprog = (regprog_T **)alloc_clear(
3105 (unsigned)sizeof(regprog_T *) * cnt);
3106 if (lp->sl_prefprog == NULL)
3107 return SP_OTHERERROR;
3108 lp->sl_prefixcnt = cnt;
3109
3110 for (i = 0; i < cnt; ++i)
3111 {
3112 /* <prefcond> : <condlen> <condstr> */
3113 n = getc(fd); /* <condlen> */
3114 if (n < 0 || n >= MAXWLEN)
3115 return SP_FORMERROR;
3116
3117 /* When <condlen> is zero we have an empty condition. Otherwise
3118 * compile the regexp program used to check for the condition. */
3119 if (n > 0)
3120 {
3121 buf[0] = '^'; /* always match at one position only */
3122 p = buf + 1;
3123 while (n-- > 0)
3124 *p++ = getc(fd); /* <condstr> */
3125 *p = NUL;
3126 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
3127 }
3128 }
3129 return 0;
3130 }
3131
3132 /*
3133 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
3134 * Return SP_*ERROR flags.
3135 */
3136 static int
3137 read_rep_section(FILE *fd, garray_T *gap, short *first)
3138 {
3139 int cnt;
3140 fromto_T *ftp;
3141 int i;
3142
3143 cnt = get2c(fd); /* <repcount> */
3144 if (cnt < 0)
3145 return SP_TRUNCERROR;
3146
3147 if (ga_grow(gap, cnt) == FAIL)
3148 return SP_OTHERERROR;
3149
3150 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3151 for (; gap->ga_len < cnt; ++gap->ga_len)
3152 {
3153 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
3154 ftp->ft_from = read_cnt_string(fd, 1, &i);
3155 if (i < 0)
3156 return i;
3157 if (i == 0)
3158 return SP_FORMERROR;
3159 ftp->ft_to = read_cnt_string(fd, 1, &i);
3160 if (i <= 0)
3161 {
3162 vim_free(ftp->ft_from);
3163 if (i < 0)
3164 return i;
3165 return SP_FORMERROR;
3166 }
3167 }
3168
3169 /* Fill the first-index table. */
3170 for (i = 0; i < 256; ++i)
3171 first[i] = -1;
3172 for (i = 0; i < gap->ga_len; ++i)
3173 {
3174 ftp = &((fromto_T *)gap->ga_data)[i];
3175 if (first[*ftp->ft_from] == -1)
3176 first[*ftp->ft_from] = i;
3177 }
3178 return 0;
3179 }
3180
3181 /*
3182 * Read SN_SAL section: <salflags> <salcount> <sal> ...
3183 * Return SP_*ERROR flags.
3184 */
3185 static int
3186 read_sal_section(FILE *fd, slang_T *slang)
3187 {
3188 int i;
3189 int cnt;
3190 garray_T *gap;
3191 salitem_T *smp;
3192 int ccnt;
3193 char_u *p;
3194 int c = NUL;
3195
3196 slang->sl_sofo = FALSE;
3197
3198 i = getc(fd); /* <salflags> */
3199 if (i & SAL_F0LLOWUP)
3200 slang->sl_followup = TRUE;
3201 if (i & SAL_COLLAPSE)
3202 slang->sl_collapse = TRUE;
3203 if (i & SAL_REM_ACCENTS)
3204 slang->sl_rem_accents = TRUE;
3205
3206 cnt = get2c(fd); /* <salcount> */
3207 if (cnt < 0)
3208 return SP_TRUNCERROR;
3209
3210 gap = &slang->sl_sal;
3211 ga_init2(gap, sizeof(salitem_T), 10);
3212 if (ga_grow(gap, cnt + 1) == FAIL)
3213 return SP_OTHERERROR;
3214
3215 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3216 for (; gap->ga_len < cnt; ++gap->ga_len)
3217 {
3218 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3219 ccnt = getc(fd); /* <salfromlen> */
3220 if (ccnt < 0)
3221 return SP_TRUNCERROR;
3222 if ((p = alloc(ccnt + 2)) == NULL)
3223 return SP_OTHERERROR;
3224 smp->sm_lead = p;
3225
3226 /* Read up to the first special char into sm_lead. */
3227 for (i = 0; i < ccnt; ++i)
3228 {
3229 c = getc(fd); /* <salfrom> */
3230 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3231 break;
3232 *p++ = c;
3233 }
3234 smp->sm_leadlen = (int)(p - smp->sm_lead);
3235 *p++ = NUL;
3236
3237 /* Put (abc) chars in sm_oneof, if any. */
3238 if (c == '(')
3239 {
3240 smp->sm_oneof = p;
3241 for (++i; i < ccnt; ++i)
3242 {
3243 c = getc(fd); /* <salfrom> */
3244 if (c == ')')
3245 break;
3246 *p++ = c;
3247 }
3248 *p++ = NUL;
3249 if (++i < ccnt)
3250 c = getc(fd);
3251 }
3252 else
3253 smp->sm_oneof = NULL;
3254
3255 /* Any following chars go in sm_rules. */
3256 smp->sm_rules = p;
3257 if (i < ccnt)
3258 /* store the char we got while checking for end of sm_lead */
3259 *p++ = c;
3260 for (++i; i < ccnt; ++i)
3261 *p++ = getc(fd); /* <salfrom> */
3262 *p++ = NUL;
3263
3264 /* <saltolen> <salto> */
3265 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3266 if (ccnt < 0)
3267 {
3268 vim_free(smp->sm_lead);
3269 return ccnt;
3270 }
3271
3272 #ifdef FEAT_MBYTE
3273 if (has_mbyte)
3274 {
3275 /* convert the multi-byte strings to wide char strings */
3276 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3277 smp->sm_leadlen = mb_charlen(smp->sm_lead);
3278 if (smp->sm_oneof == NULL)
3279 smp->sm_oneof_w = NULL;
3280 else
3281 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3282 if (smp->sm_to == NULL)
3283 smp->sm_to_w = NULL;
3284 else
3285 smp->sm_to_w = mb_str2wide(smp->sm_to);
3286 if (smp->sm_lead_w == NULL
3287 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3288 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3289 {
3290 vim_free(smp->sm_lead);
3291 vim_free(smp->sm_to);
3292 vim_free(smp->sm_lead_w);
3293 vim_free(smp->sm_oneof_w);
3294 vim_free(smp->sm_to_w);
3295 return SP_OTHERERROR;
3296 }
3297 }
3298 #endif
3299 }
3300
3301 if (gap->ga_len > 0)
3302 {
3303 /* Add one extra entry to mark the end with an empty sm_lead. Avoids
3304 * that we need to check the index every time. */
3305 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3306 if ((p = alloc(1)) == NULL)
3307 return SP_OTHERERROR;
3308 p[0] = NUL;
3309 smp->sm_lead = p;
3310 smp->sm_leadlen = 0;
3311 smp->sm_oneof = NULL;
3312 smp->sm_rules = p;
3313 smp->sm_to = NULL;
3314 #ifdef FEAT_MBYTE
3315 if (has_mbyte)
3316 {
3317 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3318 smp->sm_leadlen = 0;
3319 smp->sm_oneof_w = NULL;
3320 smp->sm_to_w = NULL;
3321 }
3322 #endif
3323 ++gap->ga_len;
3324 }
3325
3326 /* Fill the first-index table. */
3327 set_sal_first(slang);
3328
3329 return 0;
3330 }
3331
3332 /*
3333 * Read SN_WORDS: <word> ...
3334 * Return SP_*ERROR flags.
3335 */
3336 static int
3337 read_words_section(FILE *fd, slang_T *lp, int len)
3338 {
3339 int done = 0;
3340 int i;
3341 int c;
3342 char_u word[MAXWLEN];
3343
3344 while (done < len)
3345 {
3346 /* Read one word at a time. */
3347 for (i = 0; ; ++i)
3348 {
3349 c = getc(fd);
3350 if (c == EOF)
3351 return SP_TRUNCERROR;
3352 word[i] = c;
3353 if (word[i] == NUL)
3354 break;
3355 if (i == MAXWLEN - 1)
3356 return SP_FORMERROR;
3357 }
3358
3359 /* Init the count to 10. */
3360 count_common_word(lp, word, -1, 10);
3361 done += i + 1;
3362 }
3363 return 0;
3364 }
3365 2131
3366 /* 2132 /*
3367 * Add a word to the hashtable of common words. 2133 * Add a word to the hashtable of common words.
3368 * If it's already there then the counter is increased. 2134 * If it's already there then the counter is increased.
3369 */ 2135 */
3370 static void 2136 void
3371 count_common_word( 2137 count_common_word(
3372 slang_T *lp, 2138 slang_T *lp,
3373 char_u *word, 2139 char_u *word,
3374 int len, /* word length, -1 for upto NUL */ 2140 int len, /* word length, -1 for upto NUL */
3375 int count) /* 1 to count once, 10 to init */ 2141 int count) /* 1 to count once, 10 to init */
3441 return newscore; 2207 return newscore;
3442 } 2208 }
3443 return score; 2209 return score;
3444 } 2210 }
3445 2211
3446 /*
3447 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3448 * Return SP_*ERROR flags.
3449 */
3450 static int
3451 read_sofo_section(FILE *fd, slang_T *slang)
3452 {
3453 int cnt;
3454 char_u *from, *to;
3455 int res;
3456
3457 slang->sl_sofo = TRUE;
3458
3459 /* <sofofromlen> <sofofrom> */
3460 from = read_cnt_string(fd, 2, &cnt);
3461 if (cnt < 0)
3462 return cnt;
3463
3464 /* <sofotolen> <sofoto> */
3465 to = read_cnt_string(fd, 2, &cnt);
3466 if (cnt < 0)
3467 {
3468 vim_free(from);
3469 return cnt;
3470 }
3471
3472 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3473 if (from != NULL && to != NULL)
3474 res = set_sofo(slang, from, to);
3475 else if (from != NULL || to != NULL)
3476 res = SP_FORMERROR; /* only one of two strings is an error */
3477 else
3478 res = 0;
3479
3480 vim_free(from);
3481 vim_free(to);
3482 return res;
3483 }
3484
3485 /*
3486 * Read the compound section from the .spl file:
3487 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
3488 * Returns SP_*ERROR flags.
3489 */
3490 static int
3491 read_compound(FILE *fd, slang_T *slang, int len)
3492 {
3493 int todo = len;
3494 int c;
3495 int atstart;
3496 char_u *pat;
3497 char_u *pp;
3498 char_u *cp;
3499 char_u *ap;
3500 char_u *crp;
3501 int cnt;
3502 garray_T *gap;
3503
3504 if (todo < 2)
3505 return SP_FORMERROR; /* need at least two bytes */
3506
3507 --todo;
3508 c = getc(fd); /* <compmax> */
3509 if (c < 2)
3510 c = MAXWLEN;
3511 slang->sl_compmax = c;
3512
3513 --todo;
3514 c = getc(fd); /* <compminlen> */
3515 if (c < 1)
3516 c = 0;
3517 slang->sl_compminlen = c;
3518
3519 --todo;
3520 c = getc(fd); /* <compsylmax> */
3521 if (c < 1)
3522 c = MAXWLEN;
3523 slang->sl_compsylmax = c;
3524
3525 c = getc(fd); /* <compoptions> */
3526 if (c != 0)
3527 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
3528 else
3529 {
3530 --todo;
3531 c = getc(fd); /* only use the lower byte for now */
3532 --todo;
3533 slang->sl_compoptions = c;
3534
3535 gap = &slang->sl_comppat;
3536 c = get2c(fd); /* <comppatcount> */
3537 todo -= 2;
3538 ga_init2(gap, sizeof(char_u *), c);
3539 if (ga_grow(gap, c) == OK)
3540 while (--c >= 0)
3541 {
3542 ((char_u **)(gap->ga_data))[gap->ga_len++] =
3543 read_cnt_string(fd, 1, &cnt);
3544 /* <comppatlen> <comppattext> */
3545 if (cnt < 0)
3546 return cnt;
3547 todo -= cnt + 1;
3548 }
3549 }
3550 if (todo < 0)
3551 return SP_FORMERROR;
3552
3553 /* Turn the COMPOUNDRULE items into a regexp pattern:
3554 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
3555 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3556 * Conversion to utf-8 may double the size. */
3557 c = todo * 2 + 7;
3558 #ifdef FEAT_MBYTE
3559 if (enc_utf8)
3560 c += todo * 2;
3561 #endif
3562 pat = alloc((unsigned)c);
3563 if (pat == NULL)
3564 return SP_OTHERERROR;
3565
3566 /* We also need a list of all flags that can appear at the start and one
3567 * for all flags. */
3568 cp = alloc(todo + 1);
3569 if (cp == NULL)
3570 {
3571 vim_free(pat);
3572 return SP_OTHERERROR;
3573 }
3574 slang->sl_compstartflags = cp;
3575 *cp = NUL;
3576
3577 ap = alloc(todo + 1);
3578 if (ap == NULL)
3579 {
3580 vim_free(pat);
3581 return SP_OTHERERROR;
3582 }
3583 slang->sl_compallflags = ap;
3584 *ap = NUL;
3585
3586 /* And a list of all patterns in their original form, for checking whether
3587 * compounding may work in match_compoundrule(). This is freed when we
3588 * encounter a wildcard, the check doesn't work then. */
3589 crp = alloc(todo + 1);
3590 slang->sl_comprules = crp;
3591
3592 pp = pat;
3593 *pp++ = '^';
3594 *pp++ = '\\';
3595 *pp++ = '(';
3596
3597 atstart = 1;
3598 while (todo-- > 0)
3599 {
3600 c = getc(fd); /* <compflags> */
3601 if (c == EOF)
3602 {
3603 vim_free(pat);
3604 return SP_TRUNCERROR;
3605 }
3606
3607 /* Add all flags to "sl_compallflags". */
3608 if (vim_strchr((char_u *)"?*+[]/", c) == NULL
3609 && !byte_in_str(slang->sl_compallflags, c))
3610 {
3611 *ap++ = c;
3612 *ap = NUL;
3613 }
3614
3615 if (atstart != 0)
3616 {
3617 /* At start of item: copy flags to "sl_compstartflags". For a
3618 * [abc] item set "atstart" to 2 and copy up to the ']'. */
3619 if (c == '[')
3620 atstart = 2;
3621 else if (c == ']')
3622 atstart = 0;
3623 else
3624 {
3625 if (!byte_in_str(slang->sl_compstartflags, c))
3626 {
3627 *cp++ = c;
3628 *cp = NUL;
3629 }
3630 if (atstart == 1)
3631 atstart = 0;
3632 }
3633 }
3634
3635 /* Copy flag to "sl_comprules", unless we run into a wildcard. */
3636 if (crp != NULL)
3637 {
3638 if (c == '?' || c == '+' || c == '*')
3639 {
3640 vim_free(slang->sl_comprules);
3641 slang->sl_comprules = NULL;
3642 crp = NULL;
3643 }
3644 else
3645 *crp++ = c;
3646 }
3647
3648 if (c == '/') /* slash separates two items */
3649 {
3650 *pp++ = '\\';
3651 *pp++ = '|';
3652 atstart = 1;
3653 }
3654 else /* normal char, "[abc]" and '*' are copied as-is */
3655 {
3656 if (c == '?' || c == '+' || c == '~')
3657 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */
3658 #ifdef FEAT_MBYTE
3659 if (enc_utf8)
3660 pp += mb_char2bytes(c, pp);
3661 else
3662 #endif
3663 *pp++ = c;
3664 }
3665 }
3666
3667 *pp++ = '\\';
3668 *pp++ = ')';
3669 *pp++ = '$';
3670 *pp = NUL;
3671
3672 if (crp != NULL)
3673 *crp = NUL;
3674
3675 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3676 vim_free(pat);
3677 if (slang->sl_compprog == NULL)
3678 return SP_FORMERROR;
3679
3680 return 0;
3681 }
3682 2212
3683 /* 2213 /*
3684 * Return TRUE if byte "n" appears in "str". 2214 * Return TRUE if byte "n" appears in "str".
3685 * Like strchr() but independent of locale. 2215 * Like strchr() but independent of locale.
3686 */ 2216 */
3687 static int 2217 int
3688 byte_in_str(char_u *str, int n) 2218 byte_in_str(char_u *str, int n)
3689 { 2219 {
3690 char_u *p; 2220 char_u *p;
3691 2221
3692 for (p = str; *p != NUL; ++p) 2222 for (p = str; *p != NUL; ++p)
3704 2234
3705 /* 2235 /*
3706 * Truncate "slang->sl_syllable" at the first slash and put the following items 2236 * Truncate "slang->sl_syllable" at the first slash and put the following items
3707 * in "slang->sl_syl_items". 2237 * in "slang->sl_syl_items".
3708 */ 2238 */
3709 static int 2239 int
3710 init_syl_tab(slang_T *slang) 2240 init_syl_tab(slang_T *slang)
3711 { 2241 {
3712 char_u *p; 2242 char_u *p;
3713 char_u *s; 2243 char_u *s;
3714 int l; 2244 int l;
3800 skip = TRUE; /* don't count following syllable chars */ 2330 skip = TRUE; /* don't count following syllable chars */
3801 } 2331 }
3802 } 2332 }
3803 } 2333 }
3804 return cnt; 2334 return cnt;
3805 }
3806
3807 /*
3808 * Set the SOFOFROM and SOFOTO items in language "lp".
3809 * Returns SP_*ERROR flags when there is something wrong.
3810 */
3811 static int
3812 set_sofo(slang_T *lp, char_u *from, char_u *to)
3813 {
3814 int i;
3815
3816 #ifdef FEAT_MBYTE
3817 garray_T *gap;
3818 char_u *s;
3819 char_u *p;
3820 int c;
3821 int *inp;
3822
3823 if (has_mbyte)
3824 {
3825 /* Use "sl_sal" as an array with 256 pointers to a list of wide
3826 * characters. The index is the low byte of the character.
3827 * The list contains from-to pairs with a terminating NUL.
3828 * sl_sal_first[] is used for latin1 "from" characters. */
3829 gap = &lp->sl_sal;
3830 ga_init2(gap, sizeof(int *), 1);
3831 if (ga_grow(gap, 256) == FAIL)
3832 return SP_OTHERERROR;
3833 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3834 gap->ga_len = 256;
3835
3836 /* First count the number of items for each list. Temporarily use
3837 * sl_sal_first[] for this. */
3838 for (p = from, s = to; *p != NUL && *s != NUL; )
3839 {
3840 c = mb_cptr2char_adv(&p);
3841 mb_cptr_adv(s);
3842 if (c >= 256)
3843 ++lp->sl_sal_first[c & 0xff];
3844 }
3845 if (*p != NUL || *s != NUL) /* lengths differ */
3846 return SP_FORMERROR;
3847
3848 /* Allocate the lists. */
3849 for (i = 0; i < 256; ++i)
3850 if (lp->sl_sal_first[i] > 0)
3851 {
3852 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3853 if (p == NULL)
3854 return SP_OTHERERROR;
3855 ((int **)gap->ga_data)[i] = (int *)p;
3856 *(int *)p = 0;
3857 }
3858
3859 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3860 * list. */
3861 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3862 for (p = from, s = to; *p != NUL && *s != NUL; )
3863 {
3864 c = mb_cptr2char_adv(&p);
3865 i = mb_cptr2char_adv(&s);
3866 if (c >= 256)
3867 {
3868 /* Append the from-to chars at the end of the list with
3869 * the low byte. */
3870 inp = ((int **)gap->ga_data)[c & 0xff];
3871 while (*inp != 0)
3872 ++inp;
3873 *inp++ = c; /* from char */
3874 *inp++ = i; /* to char */
3875 *inp++ = NUL; /* NUL at the end */
3876 }
3877 else
3878 /* mapping byte to char is done in sl_sal_first[] */
3879 lp->sl_sal_first[c] = i;
3880 }
3881 }
3882 else
3883 #endif
3884 {
3885 /* mapping bytes to bytes is done in sl_sal_first[] */
3886 if (STRLEN(from) != STRLEN(to))
3887 return SP_FORMERROR;
3888
3889 for (i = 0; to[i] != NUL; ++i)
3890 lp->sl_sal_first[from[i]] = to[i];
3891 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
3892 }
3893
3894 return 0;
3895 }
3896
3897 /*
3898 * Fill the first-index table for "lp".
3899 */
3900 static void
3901 set_sal_first(slang_T *lp)
3902 {
3903 salfirst_T *sfirst;
3904 int i;
3905 salitem_T *smp;
3906 int c;
3907 garray_T *gap = &lp->sl_sal;
3908
3909 sfirst = lp->sl_sal_first;
3910 for (i = 0; i < 256; ++i)
3911 sfirst[i] = -1;
3912 smp = (salitem_T *)gap->ga_data;
3913 for (i = 0; i < gap->ga_len; ++i)
3914 {
3915 #ifdef FEAT_MBYTE
3916 if (has_mbyte)
3917 /* Use the lowest byte of the first character. For latin1 it's
3918 * the character, for other encodings it should differ for most
3919 * characters. */
3920 c = *smp[i].sm_lead_w & 0xff;
3921 else
3922 #endif
3923 c = *smp[i].sm_lead;
3924 if (sfirst[c] == -1)
3925 {
3926 sfirst[c] = i;
3927 #ifdef FEAT_MBYTE
3928 if (has_mbyte)
3929 {
3930 int n;
3931
3932 /* Make sure all entries with this byte are following each
3933 * other. Move the ones that are in the wrong position. Do
3934 * keep the same ordering! */
3935 while (i + 1 < gap->ga_len
3936 && (*smp[i + 1].sm_lead_w & 0xff) == c)
3937 /* Skip over entry with same index byte. */
3938 ++i;
3939
3940 for (n = 1; i + n < gap->ga_len; ++n)
3941 if ((*smp[i + n].sm_lead_w & 0xff) == c)
3942 {
3943 salitem_T tsal;
3944
3945 /* Move entry with same index byte after the entries
3946 * we already found. */
3947 ++i;
3948 --n;
3949 tsal = smp[i + n];
3950 mch_memmove(smp + i + 1, smp + i,
3951 sizeof(salitem_T) * n);
3952 smp[i] = tsal;
3953 }
3954 }
3955 #endif
3956 }
3957 }
3958 }
3959
3960 #ifdef FEAT_MBYTE
3961 /*
3962 * Turn a multi-byte string into a wide character string.
3963 * Return it in allocated memory (NULL for out-of-memory)
3964 */
3965 static int *
3966 mb_str2wide(char_u *s)
3967 {
3968 int *res;
3969 char_u *p;
3970 int i = 0;
3971
3972 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3973 if (res != NULL)
3974 {
3975 for (p = s; *p != NUL; )
3976 res[i++] = mb_ptr2char_adv(&p);
3977 res[i] = NUL;
3978 }
3979 return res;
3980 }
3981 #endif
3982
3983 /*
3984 * Read a tree from the .spl or .sug file.
3985 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
3986 * This is skipped when the tree has zero length.
3987 * Returns zero when OK, SP_ value for an error.
3988 */
3989 static int
3990 spell_read_tree(
3991 FILE *fd,
3992 char_u **bytsp,
3993 idx_T **idxsp,
3994 int prefixtree, /* TRUE for the prefix tree */
3995 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */
3996 {
3997 int len;
3998 int idx;
3999 char_u *bp;
4000 idx_T *ip;
4001
4002 /* The tree size was computed when writing the file, so that we can
4003 * allocate it as one long block. <nodecount> */
4004 len = get4c(fd);
4005 if (len < 0)
4006 return SP_TRUNCERROR;
4007 if (len > 0)
4008 {
4009 /* Allocate the byte array. */
4010 bp = lalloc((long_u)len, TRUE);
4011 if (bp == NULL)
4012 return SP_OTHERERROR;
4013 *bytsp = bp;
4014
4015 /* Allocate the index array. */
4016 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
4017 if (ip == NULL)
4018 return SP_OTHERERROR;
4019 *idxsp = ip;
4020
4021 /* Recursively read the tree and store it in the array. */
4022 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
4023 if (idx < 0)
4024 return idx;
4025 }
4026 return 0;
4027 }
4028
4029 /*
4030 * Read one row of siblings from the spell file and store it in the byte array
4031 * "byts" and index array "idxs". Recursively read the children.
4032 *
4033 * NOTE: The code here must match put_node()!
4034 *
4035 * Returns the index (>= 0) following the siblings.
4036 * Returns SP_TRUNCERROR if the file is shorter than expected.
4037 * Returns SP_FORMERROR if there is a format error.
4038 */
4039 static idx_T
4040 read_tree_node(
4041 FILE *fd,
4042 char_u *byts,
4043 idx_T *idxs,
4044 int maxidx, /* size of arrays */
4045 idx_T startidx, /* current index in "byts" and "idxs" */
4046 int prefixtree, /* TRUE for reading PREFIXTREE */
4047 int maxprefcondnr) /* maximum for <prefcondnr> */
4048 {
4049 int len;
4050 int i;
4051 int n;
4052 idx_T idx = startidx;
4053 int c;
4054 int c2;
4055 #define SHARED_MASK 0x8000000
4056
4057 len = getc(fd); /* <siblingcount> */
4058 if (len <= 0)
4059 return SP_TRUNCERROR;
4060
4061 if (startidx + len >= maxidx)
4062 return SP_FORMERROR;
4063 byts[idx++] = len;
4064
4065 /* Read the byte values, flag/region bytes and shared indexes. */
4066 for (i = 1; i <= len; ++i)
4067 {
4068 c = getc(fd); /* <byte> */
4069 if (c < 0)
4070 return SP_TRUNCERROR;
4071 if (c <= BY_SPECIAL)
4072 {
4073 if (c == BY_NOFLAGS && !prefixtree)
4074 {
4075 /* No flags, all regions. */
4076 idxs[idx] = 0;
4077 c = 0;
4078 }
4079 else if (c != BY_INDEX)
4080 {
4081 if (prefixtree)
4082 {
4083 /* Read the optional pflags byte, the prefix ID and the
4084 * condition nr. In idxs[] store the prefix ID in the low
4085 * byte, the condition index shifted up 8 bits, the flags
4086 * shifted up 24 bits. */
4087 if (c == BY_FLAGS)
4088 c = getc(fd) << 24; /* <pflags> */
4089 else
4090 c = 0;
4091
4092 c |= getc(fd); /* <affixID> */
4093
4094 n = get2c(fd); /* <prefcondnr> */
4095 if (n >= maxprefcondnr)
4096 return SP_FORMERROR;
4097 c |= (n << 8);
4098 }
4099 else /* c must be BY_FLAGS or BY_FLAGS2 */
4100 {
4101 /* Read flags and optional region and prefix ID. In
4102 * idxs[] the flags go in the low two bytes, region above
4103 * that and prefix ID above the region. */
4104 c2 = c;
4105 c = getc(fd); /* <flags> */
4106 if (c2 == BY_FLAGS2)
4107 c = (getc(fd) << 8) + c; /* <flags2> */
4108 if (c & WF_REGION)
4109 c = (getc(fd) << 16) + c; /* <region> */
4110 if (c & WF_AFX)
4111 c = (getc(fd) << 24) + c; /* <affixID> */
4112 }
4113
4114 idxs[idx] = c;
4115 c = 0;
4116 }
4117 else /* c == BY_INDEX */
4118 {
4119 /* <nodeidx> */
4120 n = get3c(fd);
4121 if (n < 0 || n >= maxidx)
4122 return SP_FORMERROR;
4123 idxs[idx] = n + SHARED_MASK;
4124 c = getc(fd); /* <xbyte> */
4125 }
4126 }
4127 byts[idx++] = c;
4128 }
4129
4130 /* Recursively read the children for non-shared siblings.
4131 * Skip the end-of-word ones (zero byte value) and the shared ones (and
4132 * remove SHARED_MASK) */
4133 for (i = 1; i <= len; ++i)
4134 if (byts[startidx + i] != 0)
4135 {
4136 if (idxs[startidx + i] & SHARED_MASK)
4137 idxs[startidx + i] &= ~SHARED_MASK;
4138 else
4139 {
4140 idxs[startidx + i] = idx;
4141 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
4142 prefixtree, maxprefcondnr);
4143 if (idx < 0)
4144 break;
4145 }
4146 }
4147
4148 return idx;
4149 } 2335 }
4150 2336
4151 /* 2337 /*
4152 * Parse 'spelllang' and set w_s->b_langp accordingly. 2338 * Parse 'spelllang' and set w_s->b_langp accordingly.
4153 * Returns NULL if it's OK, an error message otherwise. 2339 * Returns NULL if it's OK, an error message otherwise.
4560 * w word 0 2746 * w word 0
4561 * Word WF_ONECAP 2747 * Word WF_ONECAP
4562 * W WORD WF_ALLCAP 2748 * W WORD WF_ALLCAP
4563 * WoRd wOrd WF_KEEPCAP 2749 * WoRd wOrd WF_KEEPCAP
4564 */ 2750 */
4565 static int 2751 int
4566 captype( 2752 captype(
4567 char_u *word, 2753 char_u *word,
4568 char_u *end) /* When NULL use up to NUL byte. */ 2754 char_u *end) /* When NULL use up to NUL byte. */
4569 { 2755 {
4570 char_u *p; 2756 char_u *p;
4742 } 2928 }
4743 } 2929 }
4744 #endif 2930 #endif
4745 2931
4746 /* 2932 /*
4747 * Reload the spell file "fname" if it's loaded.
4748 */
4749 static void
4750 spell_reload_one(
4751 char_u *fname,
4752 int added_word) /* invoked through "zg" */
4753 {
4754 slang_T *slang;
4755 int didit = FALSE;
4756
4757 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4758 {
4759 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
4760 {
4761 slang_clear(slang);
4762 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
4763 /* reloading failed, clear the language */
4764 slang_clear(slang);
4765 redraw_all_later(SOME_VALID);
4766 didit = TRUE;
4767 }
4768 }
4769
4770 /* When "zg" was used and the file wasn't loaded yet, should redo
4771 * 'spelllang' to load it now. */
4772 if (added_word && !didit)
4773 did_set_spelllang(curwin);
4774 }
4775
4776
4777 /*
4778 * Functions for ":mkspell".
4779 */
4780
4781 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
4782 and .dic file. */
4783 /*
4784 * Main structure to store the contents of a ".aff" file.
4785 */
4786 typedef struct afffile_S
4787 {
4788 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
4789 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
4790 unsigned af_rare; /* RARE ID for rare word */
4791 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
4792 unsigned af_bad; /* BAD ID for banned word */
4793 unsigned af_needaffix; /* NEEDAFFIX ID */
4794 unsigned af_circumfix; /* CIRCUMFIX ID */
4795 unsigned af_needcomp; /* NEEDCOMPOUND ID */
4796 unsigned af_comproot; /* COMPOUNDROOT ID */
4797 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
4798 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
4799 unsigned af_nosuggest; /* NOSUGGEST ID */
4800 int af_pfxpostpone; /* postpone prefixes without chop string and
4801 without flags */
4802 int af_ignoreextra; /* IGNOREEXTRA present */
4803 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
4804 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
4805 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
4806 } afffile_T;
4807
4808 #define AFT_CHAR 0 /* flags are one character */
4809 #define AFT_LONG 1 /* flags are two characters */
4810 #define AFT_CAPLONG 2 /* flags are one or two characters */
4811 #define AFT_NUM 3 /* flags are numbers, comma separated */
4812
4813 typedef struct affentry_S affentry_T;
4814 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */
4815 struct affentry_S
4816 {
4817 affentry_T *ae_next; /* next affix with same name/number */
4818 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
4819 char_u *ae_add; /* text to add to basic word (can be NULL) */
4820 char_u *ae_flags; /* flags on the affix (can be NULL) */
4821 char_u *ae_cond; /* condition (NULL for ".") */
4822 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
4823 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
4824 char ae_comppermit; /* COMPOUNDPERMITFLAG found */
4825 };
4826
4827 #ifdef FEAT_MBYTE
4828 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
4829 #else
4830 # define AH_KEY_LEN 7 /* 6 digits + NUL */
4831 #endif
4832
4833 /* Affix header from ".aff" file. Used for af_pref and af_suff. */
4834 typedef struct affheader_S
4835 {
4836 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4837 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
4838 int ah_newID; /* prefix ID after renumbering; 0 if not used */
4839 int ah_combine; /* suffix may combine with prefix */
4840 int ah_follows; /* another affix block should be following */
4841 affentry_T *ah_first; /* first affix entry */
4842 } affheader_T;
4843
4844 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
4845
4846 /* Flag used in compound items. */
4847 typedef struct compitem_S
4848 {
4849 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4850 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
4851 int ci_newID; /* affix ID after renumbering. */
4852 } compitem_T;
4853
4854 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
4855
4856 /*
4857 * Structure that is used to store the items in the word tree. This avoids
4858 * the need to keep track of each allocated thing, everything is freed all at
4859 * once after ":mkspell" is done.
4860 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
4861 * "sb_data" is correct for systems where pointers must be aligned on
4862 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
4863 */
4864 #define SBLOCKSIZE 16000 /* size of sb_data */
4865 typedef struct sblock_S sblock_T;
4866 struct sblock_S
4867 {
4868 int sb_used; /* nr of bytes already in use */
4869 sblock_T *sb_next; /* next block in list */
4870 char_u sb_data[1]; /* data, actually longer */
4871 };
4872
4873 /*
4874 * A node in the tree.
4875 */
4876 typedef struct wordnode_S wordnode_T;
4877 struct wordnode_S
4878 {
4879 union /* shared to save space */
4880 {
4881 char_u hashkey[6]; /* the hash key, only used while compressing */
4882 int index; /* index in written nodes (valid after first
4883 round) */
4884 } wn_u1;
4885 union /* shared to save space */
4886 {
4887 wordnode_T *next; /* next node with same hash key */
4888 wordnode_T *wnode; /* parent node that will write this node */
4889 } wn_u2;
4890 wordnode_T *wn_child; /* child (next byte in word) */
4891 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
4892 always sorted) */
4893 int wn_refs; /* Nr. of references to this node. Only
4894 relevant for first node in a list of
4895 siblings, in following siblings it is
4896 always one. */
4897 char_u wn_byte; /* Byte for this node. NUL for word end */
4898
4899 /* Info for when "wn_byte" is NUL.
4900 * In PREFIXTREE "wn_region" is used for the prefcondnr.
4901 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4902 * "wn_region" the LSW of the wordnr. */
4903 char_u wn_affixID; /* supported/required prefix ID or 0 */
4904 short_u wn_flags; /* WF_ flags */
4905 short wn_region; /* region mask */
4906
4907 #ifdef SPELL_PRINTTREE
4908 int wn_nr; /* sequence nr for printing */
4909 #endif
4910 };
4911
4912 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
4913
4914 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
4915
4916 /*
4917 * Info used while reading the spell files.
4918 */
4919 typedef struct spellinfo_S
4920 {
4921 wordnode_T *si_foldroot; /* tree with case-folded words */
4922 long si_foldwcount; /* nr of words in si_foldroot */
4923
4924 wordnode_T *si_keeproot; /* tree with keep-case words */
4925 long si_keepwcount; /* nr of words in si_keeproot */
4926
4927 wordnode_T *si_prefroot; /* tree with postponed prefixes */
4928
4929 long si_sugtree; /* creating the soundfolding trie */
4930
4931 sblock_T *si_blocks; /* memory blocks used */
4932 long si_blocks_cnt; /* memory blocks allocated */
4933 int si_did_emsg; /* TRUE when ran out of memory */
4934
4935 long si_compress_cnt; /* words to add before lowering
4936 compression limit */
4937 wordnode_T *si_first_free; /* List of nodes that have been freed during
4938 compression, linked by "wn_child" field. */
4939 long si_free_count; /* number of nodes in si_first_free */
4940 #ifdef SPELL_PRINTTREE
4941 int si_wordnode_nr; /* sequence nr for nodes */
4942 #endif
4943 buf_T *si_spellbuf; /* buffer used to store soundfold word table */
4944
4945 int si_ascii; /* handling only ASCII words */
4946 int si_add; /* addition file */
4947 int si_clear_chartab; /* when TRUE clear char tables */
4948 int si_region; /* region mask */
4949 vimconv_T si_conv; /* for conversion to 'encoding' */
4950 int si_memtot; /* runtime memory used */
4951 int si_verbose; /* verbose messages */
4952 int si_msg_count; /* number of words added since last message */
4953 char_u *si_info; /* info text chars or NULL */
4954 int si_region_count; /* number of regions supported (1 when there
4955 are no regions) */
4956 char_u si_region_name[17]; /* region names; used only if
4957 * si_region_count > 1) */
4958
4959 garray_T si_rep; /* list of fromto_T entries from REP lines */
4960 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
4961 garray_T si_sal; /* list of fromto_T entries from SAL lines */
4962 char_u *si_sofofr; /* SOFOFROM text */
4963 char_u *si_sofoto; /* SOFOTO text */
4964 int si_nosugfile; /* NOSUGFILE item found */
4965 int si_nosplitsugs; /* NOSPLITSUGS item found */
4966 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
4967 int si_followup; /* soundsalike: ? */
4968 int si_collapse; /* soundsalike: ? */
4969 hashtab_T si_commonwords; /* hashtable for common words */
4970 time_t si_sugtime; /* timestamp for .sug file */
4971 int si_rem_accents; /* soundsalike: remove accents */
4972 garray_T si_map; /* MAP info concatenated */
4973 char_u *si_midword; /* MIDWORD chars or NULL */
4974 int si_compmax; /* max nr of words for compounding */
4975 int si_compminlen; /* minimal length for compounding */
4976 int si_compsylmax; /* max nr of syllables for compounding */
4977 int si_compoptions; /* COMP_ flags */
4978 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
4979 a string */
4980 char_u *si_compflags; /* flags used for compounding */
4981 char_u si_nobreak; /* NOBREAK */
4982 char_u *si_syllable; /* syllable string */
4983 garray_T si_prefcond; /* table with conditions for postponed
4984 * prefixes, each stored as a string */
4985 int si_newprefID; /* current value for ah_newID */
4986 int si_newcompID; /* current value for compound ID */
4987 } spellinfo_T;
4988
4989 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname);
4990 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount);
4991 static void aff_process_flags(afffile_T *affile, affentry_T *entry);
4992 static int spell_info_item(char_u *s);
4993 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum);
4994 static unsigned get_affitem(int flagtype, char_u **pp);
4995 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags);
4996 static void check_renumber(spellinfo_T *spin);
4997 static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag);
4998 static void aff_check_number(int spinval, int affval, char *name);
4999 static void aff_check_string(char_u *spinval, char_u *affval, char *name);
5000 static int str_equal(char_u *s1, char_u *s2);
5001 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to);
5002 static int sal_to_bool(char_u *s);
5003 static void spell_free_aff(afffile_T *aff);
5004 static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile);
5005 static int get_affix_flags(afffile_T *affile, char_u *afflist);
5006 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist);
5007 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist);
5008 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen);
5009 static int spell_read_wordfile(spellinfo_T *spin, char_u *fname);
5010 static void *getroom(spellinfo_T *spin, size_t len, int align);
5011 static char_u *getroom_save(spellinfo_T *spin, char_u *s);
5012 static void free_blocks(sblock_T *bl);
5013 static wordnode_T *wordtree_alloc(spellinfo_T *spin);
5014 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
5015 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
5016 static wordnode_T *get_wordnode(spellinfo_T *spin);
5017 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node);
5018 static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
5019 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root);
5020 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot);
5021 static int node_equal(wordnode_T *n1, wordnode_T *n2);
5022 static int write_vim_spell(spellinfo_T *spin, char_u *fname);
5023 static void clear_node(wordnode_T *node);
5024 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
5025 static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname);
5026 static int sug_filltree(spellinfo_T *spin, slang_T *slang);
5027 static int sug_maketable(spellinfo_T *spin);
5028 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap);
5029 static int offset2bytes(int nr, char_u *buf);
5030 static int bytes2offset(char_u **pp);
5031 static void sug_write(spellinfo_T *spin, char_u *fname);
5032 static void mkspell(int fcount, char_u **fnames, int ascii, int over_write, int added_word);
5033 static void spell_message(spellinfo_T *spin, char_u *str);
5034 static void init_spellfile(void);
5035
5036 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
5037 * but it must be negative to indicate the prefix tree to tree_add_word().
5038 * Use a negative number with the lower 8 bits zero. */
5039 #define PFX_FLAGS -256
5040
5041 /* flags for "condit" argument of store_aff_word() */
5042 #define CONDIT_COMB 1 /* affix must combine */
5043 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
5044 #define CONDIT_SUF 4 /* add a suffix for matching flags */
5045 #define CONDIT_AFF 8 /* word already has an affix */
5046
5047 /*
5048 * Tunable parameters for when the tree is compressed. See 'mkspellmem'.
5049 */
5050 static long compress_start = 30000; /* memory / SBLOCKSIZE */
5051 static long compress_inc = 100; /* memory / SBLOCKSIZE */
5052 static long compress_added = 500000; /* word count */
5053
5054 #ifdef SPELL_PRINTTREE
5055 /*
5056 * For debugging the tree code: print the current tree in a (more or less)
5057 * readable format, so that we can see what happens when adding a word and/or
5058 * compressing the tree.
5059 * Based on code from Olaf Seibert.
5060 */
5061 #define PRINTLINESIZE 1000
5062 #define PRINTWIDTH 6
5063
5064 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
5065 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
5066
5067 static char line1[PRINTLINESIZE];
5068 static char line2[PRINTLINESIZE];
5069 static char line3[PRINTLINESIZE];
5070
5071 static void
5072 spell_clear_flags(wordnode_T *node)
5073 {
5074 wordnode_T *np;
5075
5076 for (np = node; np != NULL; np = np->wn_sibling)
5077 {
5078 np->wn_u1.index = FALSE;
5079 spell_clear_flags(np->wn_child);
5080 }
5081 }
5082
5083 static void
5084 spell_print_node(wordnode_T *node, int depth)
5085 {
5086 if (node->wn_u1.index)
5087 {
5088 /* Done this node before, print the reference. */
5089 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
5090 PRINTSOME(line2, depth, " ", 0, 0);
5091 PRINTSOME(line3, depth, " ", 0, 0);
5092 msg((char_u *)line1);
5093 msg((char_u *)line2);
5094 msg((char_u *)line3);
5095 }
5096 else
5097 {
5098 node->wn_u1.index = TRUE;
5099
5100 if (node->wn_byte != NUL)
5101 {
5102 if (node->wn_child != NULL)
5103 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
5104 else
5105 /* Cannot happen? */
5106 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
5107 }
5108 else
5109 PRINTSOME(line1, depth, " $ ", 0, 0);
5110
5111 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
5112
5113 if (node->wn_sibling != NULL)
5114 PRINTSOME(line3, depth, " | ", 0, 0);
5115 else
5116 PRINTSOME(line3, depth, " ", 0, 0);
5117
5118 if (node->wn_byte == NUL)
5119 {
5120 msg((char_u *)line1);
5121 msg((char_u *)line2);
5122 msg((char_u *)line3);
5123 }
5124
5125 /* do the children */
5126 if (node->wn_byte != NUL && node->wn_child != NULL)
5127 spell_print_node(node->wn_child, depth + 1);
5128
5129 /* do the siblings */
5130 if (node->wn_sibling != NULL)
5131 {
5132 /* get rid of all parent details except | */
5133 STRCPY(line1, line3);
5134 STRCPY(line2, line3);
5135 spell_print_node(node->wn_sibling, depth);
5136 }
5137 }
5138 }
5139
5140 static void
5141 spell_print_tree(wordnode_T *root)
5142 {
5143 if (root != NULL)
5144 {
5145 /* Clear the "wn_u1.index" fields, used to remember what has been
5146 * done. */
5147 spell_clear_flags(root);
5148
5149 /* Recursively print the tree. */
5150 spell_print_node(root, 0);
5151 }
5152 }
5153 #endif /* SPELL_PRINTTREE */
5154
5155 /*
5156 * Read the affix file "fname".
5157 * Returns an afffile_T, NULL for complete failure.
5158 */
5159 static afffile_T *
5160 spell_read_aff(spellinfo_T *spin, char_u *fname)
5161 {
5162 FILE *fd;
5163 afffile_T *aff;
5164 char_u rline[MAXLINELEN];
5165 char_u *line;
5166 char_u *pc = NULL;
5167 #define MAXITEMCNT 30
5168 char_u *(items[MAXITEMCNT]);
5169 int itemcnt;
5170 char_u *p;
5171 int lnum = 0;
5172 affheader_T *cur_aff = NULL;
5173 int did_postpone_prefix = FALSE;
5174 int aff_todo = 0;
5175 hashtab_T *tp;
5176 char_u *low = NULL;
5177 char_u *fol = NULL;
5178 char_u *upp = NULL;
5179 int do_rep;
5180 int do_repsal;
5181 int do_sal;
5182 int do_mapline;
5183 int found_map = FALSE;
5184 hashitem_T *hi;
5185 int l;
5186 int compminlen = 0; /* COMPOUNDMIN value */
5187 int compsylmax = 0; /* COMPOUNDSYLMAX value */
5188 int compoptions = 0; /* COMP_ flags */
5189 int compmax = 0; /* COMPOUNDWORDMAX value */
5190 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE
5191 concatenated */
5192 char_u *midword = NULL; /* MIDWORD value */
5193 char_u *syllable = NULL; /* SYLLABLE value */
5194 char_u *sofofrom = NULL; /* SOFOFROM value */
5195 char_u *sofoto = NULL; /* SOFOTO value */
5196
5197 /*
5198 * Open the file.
5199 */
5200 fd = mch_fopen((char *)fname, "r");
5201 if (fd == NULL)
5202 {
5203 EMSG2(_(e_notopen), fname);
5204 return NULL;
5205 }
5206
5207 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
5208 spell_message(spin, IObuff);
5209
5210 /* Only do REP lines when not done in another .aff file already. */
5211 do_rep = spin->si_rep.ga_len == 0;
5212
5213 /* Only do REPSAL lines when not done in another .aff file already. */
5214 do_repsal = spin->si_repsal.ga_len == 0;
5215
5216 /* Only do SAL lines when not done in another .aff file already. */
5217 do_sal = spin->si_sal.ga_len == 0;
5218
5219 /* Only do MAP lines when not done in another .aff file already. */
5220 do_mapline = spin->si_map.ga_len == 0;
5221
5222 /*
5223 * Allocate and init the afffile_T structure.
5224 */
5225 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
5226 if (aff == NULL)
5227 {
5228 fclose(fd);
5229 return NULL;
5230 }
5231 hash_init(&aff->af_pref);
5232 hash_init(&aff->af_suff);
5233 hash_init(&aff->af_comp);
5234
5235 /*
5236 * Read all the lines in the file one by one.
5237 */
5238 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
5239 {
5240 line_breakcheck();
5241 ++lnum;
5242
5243 /* Skip comment lines. */
5244 if (*rline == '#')
5245 continue;
5246
5247 /* Convert from "SET" to 'encoding' when needed. */
5248 vim_free(pc);
5249 #ifdef FEAT_MBYTE
5250 if (spin->si_conv.vc_type != CONV_NONE)
5251 {
5252 pc = string_convert(&spin->si_conv, rline, NULL);
5253 if (pc == NULL)
5254 {
5255 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5256 fname, lnum, rline);
5257 continue;
5258 }
5259 line = pc;
5260 }
5261 else
5262 #endif
5263 {
5264 pc = NULL;
5265 line = rline;
5266 }
5267
5268 /* Split the line up in white separated items. Put a NUL after each
5269 * item. */
5270 itemcnt = 0;
5271 for (p = line; ; )
5272 {
5273 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
5274 ++p;
5275 if (*p == NUL)
5276 break;
5277 if (itemcnt == MAXITEMCNT) /* too many items */
5278 break;
5279 items[itemcnt++] = p;
5280 /* A few items have arbitrary text argument, don't split them. */
5281 if (itemcnt == 2 && spell_info_item(items[0]))
5282 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */
5283 ++p;
5284 else
5285 while (*p > ' ') /* skip until white space or CR/NL */
5286 ++p;
5287 if (*p == NUL)
5288 break;
5289 *p++ = NUL;
5290 }
5291
5292 /* Handle non-empty lines. */
5293 if (itemcnt > 0)
5294 {
5295 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
5296 {
5297 #ifdef FEAT_MBYTE
5298 /* Setup for conversion from "ENC" to 'encoding'. */
5299 aff->af_enc = enc_canonize(items[1]);
5300 if (aff->af_enc != NULL && !spin->si_ascii
5301 && convert_setup(&spin->si_conv, aff->af_enc,
5302 p_enc) == FAIL)
5303 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
5304 fname, aff->af_enc, p_enc);
5305 spin->si_conv.vc_fail = TRUE;
5306 #else
5307 smsg((char_u *)_("Conversion in %s not supported"), fname);
5308 #endif
5309 }
5310 else if (is_aff_rule(items, itemcnt, "FLAG", 2)
5311 && aff->af_flagtype == AFT_CHAR)
5312 {
5313 if (STRCMP(items[1], "long") == 0)
5314 aff->af_flagtype = AFT_LONG;
5315 else if (STRCMP(items[1], "num") == 0)
5316 aff->af_flagtype = AFT_NUM;
5317 else if (STRCMP(items[1], "caplong") == 0)
5318 aff->af_flagtype = AFT_CAPLONG;
5319 else
5320 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
5321 fname, lnum, items[1]);
5322 if (aff->af_rare != 0
5323 || aff->af_keepcase != 0
5324 || aff->af_bad != 0
5325 || aff->af_needaffix != 0
5326 || aff->af_circumfix != 0
5327 || aff->af_needcomp != 0
5328 || aff->af_comproot != 0
5329 || aff->af_nosuggest != 0
5330 || compflags != NULL
5331 || aff->af_suff.ht_used > 0
5332 || aff->af_pref.ht_used > 0)
5333 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
5334 fname, lnum, items[1]);
5335 }
5336 else if (spell_info_item(items[0]))
5337 {
5338 p = (char_u *)getroom(spin,
5339 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
5340 + STRLEN(items[0])
5341 + STRLEN(items[1]) + 3, FALSE);
5342 if (p != NULL)
5343 {
5344 if (spin->si_info != NULL)
5345 {
5346 STRCPY(p, spin->si_info);
5347 STRCAT(p, "\n");
5348 }
5349 STRCAT(p, items[0]);
5350 STRCAT(p, " ");
5351 STRCAT(p, items[1]);
5352 spin->si_info = p;
5353 }
5354 }
5355 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
5356 && midword == NULL)
5357 {
5358 midword = getroom_save(spin, items[1]);
5359 }
5360 else if (is_aff_rule(items, itemcnt, "TRY", 2))
5361 {
5362 /* ignored, we look in the tree for what chars may appear */
5363 }
5364 /* TODO: remove "RAR" later */
5365 else if ((is_aff_rule(items, itemcnt, "RAR", 2)
5366 || is_aff_rule(items, itemcnt, "RARE", 2))
5367 && aff->af_rare == 0)
5368 {
5369 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
5370 fname, lnum);
5371 }
5372 /* TODO: remove "KEP" later */
5373 else if ((is_aff_rule(items, itemcnt, "KEP", 2)
5374 || is_aff_rule(items, itemcnt, "KEEPCASE", 2))
5375 && aff->af_keepcase == 0)
5376 {
5377 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
5378 fname, lnum);
5379 }
5380 else if ((is_aff_rule(items, itemcnt, "BAD", 2)
5381 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
5382 && aff->af_bad == 0)
5383 {
5384 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5385 fname, lnum);
5386 }
5387 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
5388 && aff->af_needaffix == 0)
5389 {
5390 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5391 fname, lnum);
5392 }
5393 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
5394 && aff->af_circumfix == 0)
5395 {
5396 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
5397 fname, lnum);
5398 }
5399 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
5400 && aff->af_nosuggest == 0)
5401 {
5402 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
5403 fname, lnum);
5404 }
5405 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
5406 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
5407 && aff->af_needcomp == 0)
5408 {
5409 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5410 fname, lnum);
5411 }
5412 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
5413 && aff->af_comproot == 0)
5414 {
5415 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
5416 fname, lnum);
5417 }
5418 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
5419 && aff->af_compforbid == 0)
5420 {
5421 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
5422 fname, lnum);
5423 if (aff->af_pref.ht_used > 0)
5424 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
5425 fname, lnum);
5426 }
5427 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
5428 && aff->af_comppermit == 0)
5429 {
5430 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
5431 fname, lnum);
5432 if (aff->af_pref.ht_used > 0)
5433 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
5434 fname, lnum);
5435 }
5436 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
5437 && compflags == NULL)
5438 {
5439 /* Turn flag "c" into COMPOUNDRULE compatible string "c+",
5440 * "Na" into "Na+", "1234" into "1234+". */
5441 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
5442 if (p != NULL)
5443 {
5444 STRCPY(p, items[1]);
5445 STRCAT(p, "+");
5446 compflags = p;
5447 }
5448 }
5449 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
5450 {
5451 /* We don't use the count, but do check that it's a number and
5452 * not COMPOUNDRULE mistyped. */
5453 if (atoi((char *)items[1]) == 0)
5454 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"),
5455 fname, lnum, items[1]);
5456 }
5457 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
5458 {
5459 /* Don't use the first rule if it is a number. */
5460 if (compflags != NULL || *skipdigits(items[1]) != NUL)
5461 {
5462 /* Concatenate this string to previously defined ones,
5463 * using a slash to separate them. */
5464 l = (int)STRLEN(items[1]) + 1;
5465 if (compflags != NULL)
5466 l += (int)STRLEN(compflags) + 1;
5467 p = getroom(spin, l, FALSE);
5468 if (p != NULL)
5469 {
5470 if (compflags != NULL)
5471 {
5472 STRCPY(p, compflags);
5473 STRCAT(p, "/");
5474 }
5475 STRCAT(p, items[1]);
5476 compflags = p;
5477 }
5478 }
5479 }
5480 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
5481 && compmax == 0)
5482 {
5483 compmax = atoi((char *)items[1]);
5484 if (compmax == 0)
5485 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
5486 fname, lnum, items[1]);
5487 }
5488 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
5489 && compminlen == 0)
5490 {
5491 compminlen = atoi((char *)items[1]);
5492 if (compminlen == 0)
5493 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5494 fname, lnum, items[1]);
5495 }
5496 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
5497 && compsylmax == 0)
5498 {
5499 compsylmax = atoi((char *)items[1]);
5500 if (compsylmax == 0)
5501 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5502 fname, lnum, items[1]);
5503 }
5504 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
5505 {
5506 compoptions |= COMP_CHECKDUP;
5507 }
5508 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
5509 {
5510 compoptions |= COMP_CHECKREP;
5511 }
5512 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
5513 {
5514 compoptions |= COMP_CHECKCASE;
5515 }
5516 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
5517 {
5518 compoptions |= COMP_CHECKTRIPLE;
5519 }
5520 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
5521 {
5522 if (atoi((char *)items[1]) == 0)
5523 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
5524 fname, lnum, items[1]);
5525 }
5526 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
5527 {
5528 garray_T *gap = &spin->si_comppat;
5529 int i;
5530
5531 /* Only add the couple if it isn't already there. */
5532 for (i = 0; i < gap->ga_len - 1; i += 2)
5533 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
5534 && STRCMP(((char_u **)(gap->ga_data))[i + 1],
5535 items[2]) == 0)
5536 break;
5537 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
5538 {
5539 ((char_u **)(gap->ga_data))[gap->ga_len++]
5540 = getroom_save(spin, items[1]);
5541 ((char_u **)(gap->ga_data))[gap->ga_len++]
5542 = getroom_save(spin, items[2]);
5543 }
5544 }
5545 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
5546 && syllable == NULL)
5547 {
5548 syllable = getroom_save(spin, items[1]);
5549 }
5550 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
5551 {
5552 spin->si_nobreak = TRUE;
5553 }
5554 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
5555 {
5556 spin->si_nosplitsugs = TRUE;
5557 }
5558 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
5559 {
5560 spin->si_nocompoundsugs = TRUE;
5561 }
5562 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
5563 {
5564 spin->si_nosugfile = TRUE;
5565 }
5566 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
5567 {
5568 aff->af_pfxpostpone = TRUE;
5569 }
5570 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
5571 {
5572 aff->af_ignoreextra = TRUE;
5573 }
5574 else if ((STRCMP(items[0], "PFX") == 0
5575 || STRCMP(items[0], "SFX") == 0)
5576 && aff_todo == 0
5577 && itemcnt >= 4)
5578 {
5579 int lasti = 4;
5580 char_u key[AH_KEY_LEN];
5581
5582 if (*items[0] == 'P')
5583 tp = &aff->af_pref;
5584 else
5585 tp = &aff->af_suff;
5586
5587 /* Myspell allows the same affix name to be used multiple
5588 * times. The affix files that do this have an undocumented
5589 * "S" flag on all but the last block, thus we check for that
5590 * and store it in ah_follows. */
5591 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5592 hi = hash_find(tp, key);
5593 if (!HASHITEM_EMPTY(hi))
5594 {
5595 cur_aff = HI2AH(hi);
5596 if (cur_aff->ah_combine != (*items[2] == 'Y'))
5597 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5598 fname, lnum, items[1]);
5599 if (!cur_aff->ah_follows)
5600 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5601 fname, lnum, items[1]);
5602 }
5603 else
5604 {
5605 /* New affix letter. */
5606 cur_aff = (affheader_T *)getroom(spin,
5607 sizeof(affheader_T), TRUE);
5608 if (cur_aff == NULL)
5609 break;
5610 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5611 fname, lnum);
5612 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5613 break;
5614 if (cur_aff->ah_flag == aff->af_bad
5615 || cur_aff->ah_flag == aff->af_rare
5616 || cur_aff->ah_flag == aff->af_keepcase
5617 || cur_aff->ah_flag == aff->af_needaffix
5618 || cur_aff->ah_flag == aff->af_circumfix
5619 || cur_aff->ah_flag == aff->af_nosuggest
5620 || cur_aff->ah_flag == aff->af_needcomp
5621 || cur_aff->ah_flag == aff->af_comproot)
5622 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
5623 fname, lnum, items[1]);
5624 STRCPY(cur_aff->ah_key, items[1]);
5625 hash_add(tp, cur_aff->ah_key);
5626
5627 cur_aff->ah_combine = (*items[2] == 'Y');
5628 }
5629
5630 /* Check for the "S" flag, which apparently means that another
5631 * block with the same affix name is following. */
5632 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5633 {
5634 ++lasti;
5635 cur_aff->ah_follows = TRUE;
5636 }
5637 else
5638 cur_aff->ah_follows = FALSE;
5639
5640 /* Myspell allows extra text after the item, but that might
5641 * mean mistakes go unnoticed. Require a comment-starter. */
5642 if (itemcnt > lasti && *items[lasti] != '#')
5643 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5644
5645 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
5646 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5647 fname, lnum, items[2]);
5648
5649 if (*items[0] == 'P' && aff->af_pfxpostpone)
5650 {
5651 if (cur_aff->ah_newID == 0)
5652 {
5653 /* Use a new number in the .spl file later, to be able
5654 * to handle multiple .aff files. */
5655 check_renumber(spin);
5656 cur_aff->ah_newID = ++spin->si_newprefID;
5657
5658 /* We only really use ah_newID if the prefix is
5659 * postponed. We know that only after handling all
5660 * the items. */
5661 did_postpone_prefix = FALSE;
5662 }
5663 else
5664 /* Did use the ID in a previous block. */
5665 did_postpone_prefix = TRUE;
5666 }
5667
5668 aff_todo = atoi((char *)items[3]);
5669 }
5670 else if ((STRCMP(items[0], "PFX") == 0
5671 || STRCMP(items[0], "SFX") == 0)
5672 && aff_todo > 0
5673 && STRCMP(cur_aff->ah_key, items[1]) == 0
5674 && itemcnt >= 5)
5675 {
5676 affentry_T *aff_entry;
5677 int upper = FALSE;
5678 int lasti = 5;
5679
5680 /* Myspell allows extra text after the item, but that might
5681 * mean mistakes go unnoticed. Require a comment-starter,
5682 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */
5683 if (itemcnt > lasti
5684 && !aff->af_ignoreextra
5685 && *items[lasti] != '#'
5686 && (STRCMP(items[lasti], "-") != 0
5687 || itemcnt != lasti + 1))
5688 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5689
5690 /* New item for an affix letter. */
5691 --aff_todo;
5692 aff_entry = (affentry_T *)getroom(spin,
5693 sizeof(affentry_T), TRUE);
5694 if (aff_entry == NULL)
5695 break;
5696
5697 if (STRCMP(items[2], "0") != 0)
5698 aff_entry->ae_chop = getroom_save(spin, items[2]);
5699 if (STRCMP(items[3], "0") != 0)
5700 {
5701 aff_entry->ae_add = getroom_save(spin, items[3]);
5702
5703 /* Recognize flags on the affix: abcd/XYZ */
5704 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
5705 if (aff_entry->ae_flags != NULL)
5706 {
5707 *aff_entry->ae_flags++ = NUL;
5708 aff_process_flags(aff, aff_entry);
5709 }
5710 }
5711
5712 /* Don't use an affix entry with non-ASCII characters when
5713 * "spin->si_ascii" is TRUE. */
5714 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
5715 || has_non_ascii(aff_entry->ae_add)))
5716 {
5717 aff_entry->ae_next = cur_aff->ah_first;
5718 cur_aff->ah_first = aff_entry;
5719
5720 if (STRCMP(items[4], ".") != 0)
5721 {
5722 char_u buf[MAXLINELEN];
5723
5724 aff_entry->ae_cond = getroom_save(spin, items[4]);
5725 if (*items[0] == 'P')
5726 sprintf((char *)buf, "^%s", items[4]);
5727 else
5728 sprintf((char *)buf, "%s$", items[4]);
5729 aff_entry->ae_prog = vim_regcomp(buf,
5730 RE_MAGIC + RE_STRING + RE_STRICT);
5731 if (aff_entry->ae_prog == NULL)
5732 smsg((char_u *)_("Broken condition in %s line %d: %s"),
5733 fname, lnum, items[4]);
5734 }
5735
5736 /* For postponed prefixes we need an entry in si_prefcond
5737 * for the condition. Use an existing one if possible.
5738 * Can't be done for an affix with flags, ignoring
5739 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
5740 if (*items[0] == 'P' && aff->af_pfxpostpone
5741 && aff_entry->ae_flags == NULL)
5742 {
5743 /* When the chop string is one lower-case letter and
5744 * the add string ends in the upper-case letter we set
5745 * the "upper" flag, clear "ae_chop" and remove the
5746 * letters from "ae_add". The condition must either
5747 * be empty or start with the same letter. */
5748 if (aff_entry->ae_chop != NULL
5749 && aff_entry->ae_add != NULL
5750 #ifdef FEAT_MBYTE
5751 && aff_entry->ae_chop[(*mb_ptr2len)(
5752 aff_entry->ae_chop)] == NUL
5753 #else
5754 && aff_entry->ae_chop[1] == NUL
5755 #endif
5756 )
5757 {
5758 int c, c_up;
5759
5760 c = PTR2CHAR(aff_entry->ae_chop);
5761 c_up = SPELL_TOUPPER(c);
5762 if (c_up != c
5763 && (aff_entry->ae_cond == NULL
5764 || PTR2CHAR(aff_entry->ae_cond) == c))
5765 {
5766 p = aff_entry->ae_add
5767 + STRLEN(aff_entry->ae_add);
5768 mb_ptr_back(aff_entry->ae_add, p);
5769 if (PTR2CHAR(p) == c_up)
5770 {
5771 upper = TRUE;
5772 aff_entry->ae_chop = NULL;
5773 *p = NUL;
5774
5775 /* The condition is matched with the
5776 * actual word, thus must check for the
5777 * upper-case letter. */
5778 if (aff_entry->ae_cond != NULL)
5779 {
5780 char_u buf[MAXLINELEN];
5781 #ifdef FEAT_MBYTE
5782 if (has_mbyte)
5783 {
5784 onecap_copy(items[4], buf, TRUE);
5785 aff_entry->ae_cond = getroom_save(
5786 spin, buf);
5787 }
5788 else
5789 #endif
5790 *aff_entry->ae_cond = c_up;
5791 if (aff_entry->ae_cond != NULL)
5792 {
5793 sprintf((char *)buf, "^%s",
5794 aff_entry->ae_cond);
5795 vim_regfree(aff_entry->ae_prog);
5796 aff_entry->ae_prog = vim_regcomp(
5797 buf, RE_MAGIC + RE_STRING);
5798 }
5799 }
5800 }
5801 }
5802 }
5803
5804 if (aff_entry->ae_chop == NULL
5805 && aff_entry->ae_flags == NULL)
5806 {
5807 int idx;
5808 char_u **pp;
5809 int n;
5810
5811 /* Find a previously used condition. */
5812 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5813 --idx)
5814 {
5815 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5816 if (str_equal(p, aff_entry->ae_cond))
5817 break;
5818 }
5819 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5820 {
5821 /* Not found, add a new condition. */
5822 idx = spin->si_prefcond.ga_len++;
5823 pp = ((char_u **)spin->si_prefcond.ga_data)
5824 + idx;
5825 if (aff_entry->ae_cond == NULL)
5826 *pp = NULL;
5827 else
5828 *pp = getroom_save(spin,
5829 aff_entry->ae_cond);
5830 }
5831
5832 /* Add the prefix to the prefix tree. */
5833 if (aff_entry->ae_add == NULL)
5834 p = (char_u *)"";
5835 else
5836 p = aff_entry->ae_add;
5837
5838 /* PFX_FLAGS is a negative number, so that
5839 * tree_add_word() knows this is the prefix tree. */
5840 n = PFX_FLAGS;
5841 if (!cur_aff->ah_combine)
5842 n |= WFP_NC;
5843 if (upper)
5844 n |= WFP_UP;
5845 if (aff_entry->ae_comppermit)
5846 n |= WFP_COMPPERMIT;
5847 if (aff_entry->ae_compforbid)
5848 n |= WFP_COMPFORBID;
5849 tree_add_word(spin, p, spin->si_prefroot, n,
5850 idx, cur_aff->ah_newID);
5851 did_postpone_prefix = TRUE;
5852 }
5853
5854 /* Didn't actually use ah_newID, backup si_newprefID. */
5855 if (aff_todo == 0 && !did_postpone_prefix)
5856 {
5857 --spin->si_newprefID;
5858 cur_aff->ah_newID = 0;
5859 }
5860 }
5861 }
5862 }
5863 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
5864 {
5865 fol = vim_strsave(items[1]);
5866 }
5867 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
5868 {
5869 low = vim_strsave(items[1]);
5870 }
5871 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
5872 {
5873 upp = vim_strsave(items[1]);
5874 }
5875 else if (is_aff_rule(items, itemcnt, "REP", 2)
5876 || is_aff_rule(items, itemcnt, "REPSAL", 2))
5877 {
5878 /* Ignore REP/REPSAL count */;
5879 if (!isdigit(*items[1]))
5880 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
5881 fname, lnum);
5882 }
5883 else if ((STRCMP(items[0], "REP") == 0
5884 || STRCMP(items[0], "REPSAL") == 0)
5885 && itemcnt >= 3)
5886 {
5887 /* REP/REPSAL item */
5888 /* Myspell ignores extra arguments, we require it starts with
5889 * # to detect mistakes. */
5890 if (itemcnt > 3 && items[3][0] != '#')
5891 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
5892 if (items[0][3] == 'S' ? do_repsal : do_rep)
5893 {
5894 /* Replace underscore with space (can't include a space
5895 * directly). */
5896 for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5897 if (*p == '_')
5898 *p = ' ';
5899 for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5900 if (*p == '_')
5901 *p = ' ';
5902 add_fromto(spin, items[0][3] == 'S'
5903 ? &spin->si_repsal
5904 : &spin->si_rep, items[1], items[2]);
5905 }
5906 }
5907 else if (is_aff_rule(items, itemcnt, "MAP", 2))
5908 {
5909 /* MAP item or count */
5910 if (!found_map)
5911 {
5912 /* First line contains the count. */
5913 found_map = TRUE;
5914 if (!isdigit(*items[1]))
5915 smsg((char_u *)_("Expected MAP count in %s line %d"),
5916 fname, lnum);
5917 }
5918 else if (do_mapline)
5919 {
5920 int c;
5921
5922 /* Check that every character appears only once. */
5923 for (p = items[1]; *p != NUL; )
5924 {
5925 #ifdef FEAT_MBYTE
5926 c = mb_ptr2char_adv(&p);
5927 #else
5928 c = *p++;
5929 #endif
5930 if ((spin->si_map.ga_len > 0
5931 && vim_strchr(spin->si_map.ga_data, c)
5932 != NULL)
5933 || vim_strchr(p, c) != NULL)
5934 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5935 fname, lnum);
5936 }
5937
5938 /* We simply concatenate all the MAP strings, separated by
5939 * slashes. */
5940 ga_concat(&spin->si_map, items[1]);
5941 ga_append(&spin->si_map, '/');
5942 }
5943 }
5944 /* Accept "SAL from to" and "SAL from to #comment". */
5945 else if (is_aff_rule(items, itemcnt, "SAL", 3))
5946 {
5947 if (do_sal)
5948 {
5949 /* SAL item (sounds-a-like)
5950 * Either one of the known keys or a from-to pair. */
5951 if (STRCMP(items[1], "followup") == 0)
5952 spin->si_followup = sal_to_bool(items[2]);
5953 else if (STRCMP(items[1], "collapse_result") == 0)
5954 spin->si_collapse = sal_to_bool(items[2]);
5955 else if (STRCMP(items[1], "remove_accents") == 0)
5956 spin->si_rem_accents = sal_to_bool(items[2]);
5957 else
5958 /* when "to" is "_" it means empty */
5959 add_fromto(spin, &spin->si_sal, items[1],
5960 STRCMP(items[2], "_") == 0 ? (char_u *)""
5961 : items[2]);
5962 }
5963 }
5964 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
5965 && sofofrom == NULL)
5966 {
5967 sofofrom = getroom_save(spin, items[1]);
5968 }
5969 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
5970 && sofoto == NULL)
5971 {
5972 sofoto = getroom_save(spin, items[1]);
5973 }
5974 else if (STRCMP(items[0], "COMMON") == 0)
5975 {
5976 int i;
5977
5978 for (i = 1; i < itemcnt; ++i)
5979 {
5980 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
5981 items[i])))
5982 {
5983 p = vim_strsave(items[i]);
5984 if (p == NULL)
5985 break;
5986 hash_add(&spin->si_commonwords, p);
5987 }
5988 }
5989 }
5990 else
5991 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
5992 fname, lnum, items[0]);
5993 }
5994 }
5995
5996 if (fol != NULL || low != NULL || upp != NULL)
5997 {
5998 if (spin->si_clear_chartab)
5999 {
6000 /* Clear the char type tables, don't want to use any of the
6001 * currently used spell properties. */
6002 init_spell_chartab();
6003 spin->si_clear_chartab = FALSE;
6004 }
6005
6006 /*
6007 * Don't write a word table for an ASCII file, so that we don't check
6008 * for conflicts with a word table that matches 'encoding'.
6009 * Don't write one for utf-8 either, we use utf_*() and
6010 * mb_get_class(), the list of chars in the file will be incomplete.
6011 */
6012 if (!spin->si_ascii
6013 #ifdef FEAT_MBYTE
6014 && !enc_utf8
6015 #endif
6016 )
6017 {
6018 if (fol == NULL || low == NULL || upp == NULL)
6019 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
6020 else
6021 (void)set_spell_chartab(fol, low, upp);
6022 }
6023
6024 vim_free(fol);
6025 vim_free(low);
6026 vim_free(upp);
6027 }
6028
6029 /* Use compound specifications of the .aff file for the spell info. */
6030 if (compmax != 0)
6031 {
6032 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
6033 spin->si_compmax = compmax;
6034 }
6035
6036 if (compminlen != 0)
6037 {
6038 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
6039 spin->si_compminlen = compminlen;
6040 }
6041
6042 if (compsylmax != 0)
6043 {
6044 if (syllable == NULL)
6045 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
6046 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
6047 spin->si_compsylmax = compsylmax;
6048 }
6049
6050 if (compoptions != 0)
6051 {
6052 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
6053 spin->si_compoptions |= compoptions;
6054 }
6055
6056 if (compflags != NULL)
6057 process_compflags(spin, aff, compflags);
6058
6059 /* Check that we didn't use too many renumbered flags. */
6060 if (spin->si_newcompID < spin->si_newprefID)
6061 {
6062 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
6063 MSG(_("Too many postponed prefixes"));
6064 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
6065 MSG(_("Too many compound flags"));
6066 else
6067 MSG(_("Too many postponed prefixes and/or compound flags"));
6068 }
6069
6070 if (syllable != NULL)
6071 {
6072 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
6073 spin->si_syllable = syllable;
6074 }
6075
6076 if (sofofrom != NULL || sofoto != NULL)
6077 {
6078 if (sofofrom == NULL || sofoto == NULL)
6079 smsg((char_u *)_("Missing SOFO%s line in %s"),
6080 sofofrom == NULL ? "FROM" : "TO", fname);
6081 else if (spin->si_sal.ga_len > 0)
6082 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
6083 else
6084 {
6085 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
6086 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
6087 spin->si_sofofr = sofofrom;
6088 spin->si_sofoto = sofoto;
6089 }
6090 }
6091
6092 if (midword != NULL)
6093 {
6094 aff_check_string(spin->si_midword, midword, "MIDWORD");
6095 spin->si_midword = midword;
6096 }
6097
6098 vim_free(pc);
6099 fclose(fd);
6100 return aff;
6101 }
6102
6103 /*
6104 * Return TRUE when items[0] equals "rulename", there are "mincount" items or
6105 * a comment is following after item "mincount".
6106 */
6107 static int
6108 is_aff_rule(
6109 char_u **items,
6110 int itemcnt,
6111 char *rulename,
6112 int mincount)
6113 {
6114 return (STRCMP(items[0], rulename) == 0
6115 && (itemcnt == mincount
6116 || (itemcnt > mincount && items[mincount][0] == '#')));
6117 }
6118
6119 /*
6120 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
6121 * ae_flags to ae_comppermit and ae_compforbid.
6122 */
6123 static void
6124 aff_process_flags(afffile_T *affile, affentry_T *entry)
6125 {
6126 char_u *p;
6127 char_u *prevp;
6128 unsigned flag;
6129
6130 if (entry->ae_flags != NULL
6131 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
6132 {
6133 for (p = entry->ae_flags; *p != NUL; )
6134 {
6135 prevp = p;
6136 flag = get_affitem(affile->af_flagtype, &p);
6137 if (flag == affile->af_comppermit || flag == affile->af_compforbid)
6138 {
6139 STRMOVE(prevp, p);
6140 p = prevp;
6141 if (flag == affile->af_comppermit)
6142 entry->ae_comppermit = TRUE;
6143 else
6144 entry->ae_compforbid = TRUE;
6145 }
6146 if (affile->af_flagtype == AFT_NUM && *p == ',')
6147 ++p;
6148 }
6149 if (*entry->ae_flags == NUL)
6150 entry->ae_flags = NULL; /* nothing left */
6151 }
6152 }
6153
6154 /*
6155 * Return TRUE if "s" is the name of an info item in the affix file.
6156 */
6157 static int
6158 spell_info_item(char_u *s)
6159 {
6160 return STRCMP(s, "NAME") == 0
6161 || STRCMP(s, "HOME") == 0
6162 || STRCMP(s, "VERSION") == 0
6163 || STRCMP(s, "AUTHOR") == 0
6164 || STRCMP(s, "EMAIL") == 0
6165 || STRCMP(s, "COPYRIGHT") == 0;
6166 }
6167
6168 /*
6169 * Turn an affix flag name into a number, according to the FLAG type.
6170 * returns zero for failure.
6171 */
6172 static unsigned
6173 affitem2flag(
6174 int flagtype,
6175 char_u *item,
6176 char_u *fname,
6177 int lnum)
6178 {
6179 unsigned res;
6180 char_u *p = item;
6181
6182 res = get_affitem(flagtype, &p);
6183 if (res == 0)
6184 {
6185 if (flagtype == AFT_NUM)
6186 smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
6187 fname, lnum, item);
6188 else
6189 smsg((char_u *)_("Illegal flag in %s line %d: %s"),
6190 fname, lnum, item);
6191 }
6192 if (*p != NUL)
6193 {
6194 smsg((char_u *)_(e_affname), fname, lnum, item);
6195 return 0;
6196 }
6197
6198 return res;
6199 }
6200
6201 /*
6202 * Get one affix name from "*pp" and advance the pointer.
6203 * Returns zero for an error, still advances the pointer then.
6204 */
6205 static unsigned
6206 get_affitem(int flagtype, char_u **pp)
6207 {
6208 int res;
6209
6210 if (flagtype == AFT_NUM)
6211 {
6212 if (!VIM_ISDIGIT(**pp))
6213 {
6214 ++*pp; /* always advance, avoid getting stuck */
6215 return 0;
6216 }
6217 res = getdigits(pp);
6218 }
6219 else
6220 {
6221 #ifdef FEAT_MBYTE
6222 res = mb_ptr2char_adv(pp);
6223 #else
6224 res = *(*pp)++;
6225 #endif
6226 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
6227 && res >= 'A' && res <= 'Z'))
6228 {
6229 if (**pp == NUL)
6230 return 0;
6231 #ifdef FEAT_MBYTE
6232 res = mb_ptr2char_adv(pp) + (res << 16);
6233 #else
6234 res = *(*pp)++ + (res << 16);
6235 #endif
6236 }
6237 }
6238 return res;
6239 }
6240
6241 /*
6242 * Process the "compflags" string used in an affix file and append it to
6243 * spin->si_compflags.
6244 * The processing involves changing the affix names to ID numbers, so that
6245 * they fit in one byte.
6246 */
6247 static void
6248 process_compflags(
6249 spellinfo_T *spin,
6250 afffile_T *aff,
6251 char_u *compflags)
6252 {
6253 char_u *p;
6254 char_u *prevp;
6255 unsigned flag;
6256 compitem_T *ci;
6257 int id;
6258 int len;
6259 char_u *tp;
6260 char_u key[AH_KEY_LEN];
6261 hashitem_T *hi;
6262
6263 /* Make room for the old and the new compflags, concatenated with a / in
6264 * between. Processing it makes it shorter, but we don't know by how
6265 * much, thus allocate the maximum. */
6266 len = (int)STRLEN(compflags) + 1;
6267 if (spin->si_compflags != NULL)
6268 len += (int)STRLEN(spin->si_compflags) + 1;
6269 p = getroom(spin, len, FALSE);
6270 if (p == NULL)
6271 return;
6272 if (spin->si_compflags != NULL)
6273 {
6274 STRCPY(p, spin->si_compflags);
6275 STRCAT(p, "/");
6276 }
6277 spin->si_compflags = p;
6278 tp = p + STRLEN(p);
6279
6280 for (p = compflags; *p != NUL; )
6281 {
6282 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL)
6283 /* Copy non-flag characters directly. */
6284 *tp++ = *p++;
6285 else
6286 {
6287 /* First get the flag number, also checks validity. */
6288 prevp = p;
6289 flag = get_affitem(aff->af_flagtype, &p);
6290 if (flag != 0)
6291 {
6292 /* Find the flag in the hashtable. If it was used before, use
6293 * the existing ID. Otherwise add a new entry. */
6294 vim_strncpy(key, prevp, p - prevp);
6295 hi = hash_find(&aff->af_comp, key);
6296 if (!HASHITEM_EMPTY(hi))
6297 id = HI2CI(hi)->ci_newID;
6298 else
6299 {
6300 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
6301 if (ci == NULL)
6302 break;
6303 STRCPY(ci->ci_key, key);
6304 ci->ci_flag = flag;
6305 /* Avoid using a flag ID that has a special meaning in a
6306 * regexp (also inside []). */
6307 do
6308 {
6309 check_renumber(spin);
6310 id = spin->si_newcompID--;
6311 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL);
6312 ci->ci_newID = id;
6313 hash_add(&aff->af_comp, ci->ci_key);
6314 }
6315 *tp++ = id;
6316 }
6317 if (aff->af_flagtype == AFT_NUM && *p == ',')
6318 ++p;
6319 }
6320 }
6321
6322 *tp = NUL;
6323 }
6324
6325 /*
6326 * Check that the new IDs for postponed affixes and compounding don't overrun
6327 * each other. We have almost 255 available, but start at 0-127 to avoid
6328 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
6329 * When that is used up an error message is given.
6330 */
6331 static void
6332 check_renumber(spellinfo_T *spin)
6333 {
6334 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
6335 {
6336 spin->si_newprefID = 127;
6337 spin->si_newcompID = 255;
6338 }
6339 }
6340
6341 /*
6342 * Return TRUE if flag "flag" appears in affix list "afflist".
6343 */
6344 static int
6345 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
6346 {
6347 char_u *p;
6348 unsigned n;
6349
6350 switch (flagtype)
6351 {
6352 case AFT_CHAR:
6353 return vim_strchr(afflist, flag) != NULL;
6354
6355 case AFT_CAPLONG:
6356 case AFT_LONG:
6357 for (p = afflist; *p != NUL; )
6358 {
6359 #ifdef FEAT_MBYTE
6360 n = mb_ptr2char_adv(&p);
6361 #else
6362 n = *p++;
6363 #endif
6364 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
6365 && *p != NUL)
6366 #ifdef FEAT_MBYTE
6367 n = mb_ptr2char_adv(&p) + (n << 16);
6368 #else
6369 n = *p++ + (n << 16);
6370 #endif
6371 if (n == flag)
6372 return TRUE;
6373 }
6374 break;
6375
6376 case AFT_NUM:
6377 for (p = afflist; *p != NUL; )
6378 {
6379 n = getdigits(&p);
6380 if (n == flag)
6381 return TRUE;
6382 if (*p != NUL) /* skip over comma */
6383 ++p;
6384 }
6385 break;
6386 }
6387 return FALSE;
6388 }
6389
6390 /*
6391 * Give a warning when "spinval" and "affval" numbers are set and not the same.
6392 */
6393 static void
6394 aff_check_number(int spinval, int affval, char *name)
6395 {
6396 if (spinval != 0 && spinval != affval)
6397 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6398 }
6399
6400 /*
6401 * Give a warning when "spinval" and "affval" strings are set and not the same.
6402 */
6403 static void
6404 aff_check_string(char_u *spinval, char_u *affval, char *name)
6405 {
6406 if (spinval != NULL && STRCMP(spinval, affval) != 0)
6407 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6408 }
6409
6410 /*
6411 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
6412 * NULL as equal.
6413 */
6414 static int
6415 str_equal(char_u *s1, char_u *s2)
6416 {
6417 if (s1 == NULL || s2 == NULL)
6418 return s1 == s2;
6419 return STRCMP(s1, s2) == 0;
6420 }
6421
6422 /*
6423 * Add a from-to item to "gap". Used for REP and SAL items.
6424 * They are stored case-folded.
6425 */
6426 static void
6427 add_fromto(
6428 spellinfo_T *spin,
6429 garray_T *gap,
6430 char_u *from,
6431 char_u *to)
6432 {
6433 fromto_T *ftp;
6434 char_u word[MAXWLEN];
6435
6436 if (ga_grow(gap, 1) == OK)
6437 {
6438 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
6439 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
6440 ftp->ft_from = getroom_save(spin, word);
6441 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
6442 ftp->ft_to = getroom_save(spin, word);
6443 ++gap->ga_len;
6444 }
6445 }
6446
6447 /*
6448 * Convert a boolean argument in a SAL line to TRUE or FALSE;
6449 */
6450 static int
6451 sal_to_bool(char_u *s)
6452 {
6453 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
6454 }
6455
6456 /*
6457 * Free the structure filled by spell_read_aff().
6458 */
6459 static void
6460 spell_free_aff(afffile_T *aff)
6461 {
6462 hashtab_T *ht;
6463 hashitem_T *hi;
6464 int todo;
6465 affheader_T *ah;
6466 affentry_T *ae;
6467
6468 vim_free(aff->af_enc);
6469
6470 /* All this trouble to free the "ae_prog" items... */
6471 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
6472 {
6473 todo = (int)ht->ht_used;
6474 for (hi = ht->ht_array; todo > 0; ++hi)
6475 {
6476 if (!HASHITEM_EMPTY(hi))
6477 {
6478 --todo;
6479 ah = HI2AH(hi);
6480 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6481 vim_regfree(ae->ae_prog);
6482 }
6483 }
6484 if (ht == &aff->af_suff)
6485 break;
6486 }
6487
6488 hash_clear(&aff->af_pref);
6489 hash_clear(&aff->af_suff);
6490 hash_clear(&aff->af_comp);
6491 }
6492
6493 /*
6494 * Read dictionary file "fname".
6495 * Returns OK or FAIL;
6496 */
6497 static int
6498 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
6499 {
6500 hashtab_T ht;
6501 char_u line[MAXLINELEN];
6502 char_u *p;
6503 char_u *afflist;
6504 char_u store_afflist[MAXWLEN];
6505 int pfxlen;
6506 int need_affix;
6507 char_u *dw;
6508 char_u *pc;
6509 char_u *w;
6510 int l;
6511 hash_T hash;
6512 hashitem_T *hi;
6513 FILE *fd;
6514 int lnum = 1;
6515 int non_ascii = 0;
6516 int retval = OK;
6517 char_u message[MAXLINELEN + MAXWLEN];
6518 int flags;
6519 int duplicate = 0;
6520
6521 /*
6522 * Open the file.
6523 */
6524 fd = mch_fopen((char *)fname, "r");
6525 if (fd == NULL)
6526 {
6527 EMSG2(_(e_notopen), fname);
6528 return FAIL;
6529 }
6530
6531 /* The hashtable is only used to detect duplicated words. */
6532 hash_init(&ht);
6533
6534 vim_snprintf((char *)IObuff, IOSIZE,
6535 _("Reading dictionary file %s ..."), fname);
6536 spell_message(spin, IObuff);
6537
6538 /* start with a message for the first line */
6539 spin->si_msg_count = 999999;
6540
6541 /* Read and ignore the first line: word count. */
6542 (void)vim_fgets(line, MAXLINELEN, fd);
6543 if (!vim_isdigit(*skipwhite(line)))
6544 EMSG2(_("E760: No word count in %s"), fname);
6545
6546 /*
6547 * Read all the lines in the file one by one.
6548 * The words are converted to 'encoding' here, before being added to
6549 * the hashtable.
6550 */
6551 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
6552 {
6553 line_breakcheck();
6554 ++lnum;
6555 if (line[0] == '#' || line[0] == '/')
6556 continue; /* comment line */
6557
6558 /* Remove CR, LF and white space from the end. White space halfway
6559 * the word is kept to allow e.g., "et al.". */
6560 l = (int)STRLEN(line);
6561 while (l > 0 && line[l - 1] <= ' ')
6562 --l;
6563 if (l == 0)
6564 continue; /* empty line */
6565 line[l] = NUL;
6566
6567 #ifdef FEAT_MBYTE
6568 /* Convert from "SET" to 'encoding' when needed. */
6569 if (spin->si_conv.vc_type != CONV_NONE)
6570 {
6571 pc = string_convert(&spin->si_conv, line, NULL);
6572 if (pc == NULL)
6573 {
6574 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6575 fname, lnum, line);
6576 continue;
6577 }
6578 w = pc;
6579 }
6580 else
6581 #endif
6582 {
6583 pc = NULL;
6584 w = line;
6585 }
6586
6587 /* Truncate the word at the "/", set "afflist" to what follows.
6588 * Replace "\/" by "/" and "\\" by "\". */
6589 afflist = NULL;
6590 for (p = w; *p != NUL; mb_ptr_adv(p))
6591 {
6592 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
6593 STRMOVE(p, p + 1);
6594 else if (*p == '/')
6595 {
6596 *p = NUL;
6597 afflist = p + 1;
6598 break;
6599 }
6600 }
6601
6602 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6603 if (spin->si_ascii && has_non_ascii(w))
6604 {
6605 ++non_ascii;
6606 vim_free(pc);
6607 continue;
6608 }
6609
6610 /* This takes time, print a message every 10000 words. */
6611 if (spin->si_verbose && spin->si_msg_count > 10000)
6612 {
6613 spin->si_msg_count = 0;
6614 vim_snprintf((char *)message, sizeof(message),
6615 _("line %6d, word %6d - %s"),
6616 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6617 msg_start();
6618 msg_puts_long_attr(message, 0);
6619 msg_clr_eos();
6620 msg_didout = FALSE;
6621 msg_col = 0;
6622 out_flush();
6623 }
6624
6625 /* Store the word in the hashtable to be able to find duplicates. */
6626 dw = (char_u *)getroom_save(spin, w);
6627 if (dw == NULL)
6628 {
6629 retval = FAIL;
6630 vim_free(pc);
6631 break;
6632 }
6633
6634 hash = hash_hash(dw);
6635 hi = hash_lookup(&ht, dw, hash);
6636 if (!HASHITEM_EMPTY(hi))
6637 {
6638 if (p_verbose > 0)
6639 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
6640 fname, lnum, dw);
6641 else if (duplicate == 0)
6642 smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6643 fname, lnum, dw);
6644 ++duplicate;
6645 }
6646 else
6647 hash_add_item(&ht, hi, dw, hash);
6648
6649 flags = 0;
6650 store_afflist[0] = NUL;
6651 pfxlen = 0;
6652 need_affix = FALSE;
6653 if (afflist != NULL)
6654 {
6655 /* Extract flags from the affix list. */
6656 flags |= get_affix_flags(affile, afflist);
6657
6658 if (affile->af_needaffix != 0 && flag_in_afflist(
6659 affile->af_flagtype, afflist, affile->af_needaffix))
6660 need_affix = TRUE;
6661
6662 if (affile->af_pfxpostpone)
6663 /* Need to store the list of prefix IDs with the word. */
6664 pfxlen = get_pfxlist(affile, afflist, store_afflist);
6665
6666 if (spin->si_compflags != NULL)
6667 /* Need to store the list of compound flags with the word.
6668 * Concatenate them to the list of prefix IDs. */
6669 get_compflags(affile, afflist, store_afflist + pfxlen);
6670 }
6671
6672 /* Add the word to the word tree(s). */
6673 if (store_word(spin, dw, flags, spin->si_region,
6674 store_afflist, need_affix) == FAIL)
6675 retval = FAIL;
6676
6677 if (afflist != NULL)
6678 {
6679 /* Find all matching suffixes and add the resulting words.
6680 * Additionally do matching prefixes that combine. */
6681 if (store_aff_word(spin, dw, afflist, affile,
6682 &affile->af_suff, &affile->af_pref,
6683 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
6684 retval = FAIL;
6685
6686 /* Find all matching prefixes and add the resulting words. */
6687 if (store_aff_word(spin, dw, afflist, affile,
6688 &affile->af_pref, NULL,
6689 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
6690 retval = FAIL;
6691 }
6692
6693 vim_free(pc);
6694 }
6695
6696 if (duplicate > 0)
6697 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
6698 if (spin->si_ascii && non_ascii > 0)
6699 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6700 non_ascii, fname);
6701 hash_clear(&ht);
6702
6703 fclose(fd);
6704 return retval;
6705 }
6706
6707 /*
6708 * Check for affix flags in "afflist" that are turned into word flags.
6709 * Return WF_ flags.
6710 */
6711 static int
6712 get_affix_flags(afffile_T *affile, char_u *afflist)
6713 {
6714 int flags = 0;
6715
6716 if (affile->af_keepcase != 0 && flag_in_afflist(
6717 affile->af_flagtype, afflist, affile->af_keepcase))
6718 flags |= WF_KEEPCAP | WF_FIXCAP;
6719 if (affile->af_rare != 0 && flag_in_afflist(
6720 affile->af_flagtype, afflist, affile->af_rare))
6721 flags |= WF_RARE;
6722 if (affile->af_bad != 0 && flag_in_afflist(
6723 affile->af_flagtype, afflist, affile->af_bad))
6724 flags |= WF_BANNED;
6725 if (affile->af_needcomp != 0 && flag_in_afflist(
6726 affile->af_flagtype, afflist, affile->af_needcomp))
6727 flags |= WF_NEEDCOMP;
6728 if (affile->af_comproot != 0 && flag_in_afflist(
6729 affile->af_flagtype, afflist, affile->af_comproot))
6730 flags |= WF_COMPROOT;
6731 if (affile->af_nosuggest != 0 && flag_in_afflist(
6732 affile->af_flagtype, afflist, affile->af_nosuggest))
6733 flags |= WF_NOSUGGEST;
6734 return flags;
6735 }
6736
6737 /*
6738 * Get the list of prefix IDs from the affix list "afflist".
6739 * Used for PFXPOSTPONE.
6740 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6741 * and return the number of affixes.
6742 */
6743 static int
6744 get_pfxlist(
6745 afffile_T *affile,
6746 char_u *afflist,
6747 char_u *store_afflist)
6748 {
6749 char_u *p;
6750 char_u *prevp;
6751 int cnt = 0;
6752 int id;
6753 char_u key[AH_KEY_LEN];
6754 hashitem_T *hi;
6755
6756 for (p = afflist; *p != NUL; )
6757 {
6758 prevp = p;
6759 if (get_affitem(affile->af_flagtype, &p) != 0)
6760 {
6761 /* A flag is a postponed prefix flag if it appears in "af_pref"
6762 * and it's ID is not zero. */
6763 vim_strncpy(key, prevp, p - prevp);
6764 hi = hash_find(&affile->af_pref, key);
6765 if (!HASHITEM_EMPTY(hi))
6766 {
6767 id = HI2AH(hi)->ah_newID;
6768 if (id != 0)
6769 store_afflist[cnt++] = id;
6770 }
6771 }
6772 if (affile->af_flagtype == AFT_NUM && *p == ',')
6773 ++p;
6774 }
6775
6776 store_afflist[cnt] = NUL;
6777 return cnt;
6778 }
6779
6780 /*
6781 * Get the list of compound IDs from the affix list "afflist" that are used
6782 * for compound words.
6783 * Puts the flags in "store_afflist[]".
6784 */
6785 static void
6786 get_compflags(
6787 afffile_T *affile,
6788 char_u *afflist,
6789 char_u *store_afflist)
6790 {
6791 char_u *p;
6792 char_u *prevp;
6793 int cnt = 0;
6794 char_u key[AH_KEY_LEN];
6795 hashitem_T *hi;
6796
6797 for (p = afflist; *p != NUL; )
6798 {
6799 prevp = p;
6800 if (get_affitem(affile->af_flagtype, &p) != 0)
6801 {
6802 /* A flag is a compound flag if it appears in "af_comp". */
6803 vim_strncpy(key, prevp, p - prevp);
6804 hi = hash_find(&affile->af_comp, key);
6805 if (!HASHITEM_EMPTY(hi))
6806 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6807 }
6808 if (affile->af_flagtype == AFT_NUM && *p == ',')
6809 ++p;
6810 }
6811
6812 store_afflist[cnt] = NUL;
6813 }
6814
6815 /*
6816 * Apply affixes to a word and store the resulting words.
6817 * "ht" is the hashtable with affentry_T that need to be applied, either
6818 * prefixes or suffixes.
6819 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6820 * the resulting words for combining affixes.
6821 *
6822 * Returns FAIL when out of memory.
6823 */
6824 static int
6825 store_aff_word(
6826 spellinfo_T *spin, /* spell info */
6827 char_u *word, /* basic word start */
6828 char_u *afflist, /* list of names of supported affixes */
6829 afffile_T *affile,
6830 hashtab_T *ht,
6831 hashtab_T *xht,
6832 int condit, /* CONDIT_SUF et al. */
6833 int flags, /* flags for the word */
6834 char_u *pfxlist, /* list of prefix IDs */
6835 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest
6836 * is compound flags */
6837 {
6838 int todo;
6839 hashitem_T *hi;
6840 affheader_T *ah;
6841 affentry_T *ae;
6842 char_u newword[MAXWLEN];
6843 int retval = OK;
6844 int i, j;
6845 char_u *p;
6846 int use_flags;
6847 char_u *use_pfxlist;
6848 int use_pfxlen;
6849 int need_affix;
6850 char_u store_afflist[MAXWLEN];
6851 char_u pfx_pfxlist[MAXWLEN];
6852 size_t wordlen = STRLEN(word);
6853 int use_condit;
6854
6855 todo = (int)ht->ht_used;
6856 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
6857 {
6858 if (!HASHITEM_EMPTY(hi))
6859 {
6860 --todo;
6861 ah = HI2AH(hi);
6862
6863 /* Check that the affix combines, if required, and that the word
6864 * supports this affix. */
6865 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
6866 && flag_in_afflist(affile->af_flagtype, afflist,
6867 ah->ah_flag))
6868 {
6869 /* Loop over all affix entries with this name. */
6870 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6871 {
6872 /* Check the condition. It's not logical to match case
6873 * here, but it is required for compatibility with
6874 * Myspell.
6875 * Another requirement from Myspell is that the chop
6876 * string is shorter than the word itself.
6877 * For prefixes, when "PFXPOSTPONE" was used, only do
6878 * prefixes with a chop string and/or flags.
6879 * When a previously added affix had CIRCUMFIX this one
6880 * must have it too, if it had not then this one must not
6881 * have one either. */
6882 if ((xht != NULL || !affile->af_pfxpostpone
6883 || ae->ae_chop != NULL
6884 || ae->ae_flags != NULL)
6885 && (ae->ae_chop == NULL
6886 || STRLEN(ae->ae_chop) < wordlen)
6887 && (ae->ae_prog == NULL
6888 || vim_regexec_prog(&ae->ae_prog, FALSE,
6889 word, (colnr_T)0))
6890 && (((condit & CONDIT_CFIX) == 0)
6891 == ((condit & CONDIT_AFF) == 0
6892 || ae->ae_flags == NULL
6893 || !flag_in_afflist(affile->af_flagtype,
6894 ae->ae_flags, affile->af_circumfix))))
6895 {
6896 /* Match. Remove the chop and add the affix. */
6897 if (xht == NULL)
6898 {
6899 /* prefix: chop/add at the start of the word */
6900 if (ae->ae_add == NULL)
6901 *newword = NUL;
6902 else
6903 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
6904 p = word;
6905 if (ae->ae_chop != NULL)
6906 {
6907 /* Skip chop string. */
6908 #ifdef FEAT_MBYTE
6909 if (has_mbyte)
6910 {
6911 i = mb_charlen(ae->ae_chop);
6912 for ( ; i > 0; --i)
6913 mb_ptr_adv(p);
6914 }
6915 else
6916 #endif
6917 p += STRLEN(ae->ae_chop);
6918 }
6919 STRCAT(newword, p);
6920 }
6921 else
6922 {
6923 /* suffix: chop/add at the end of the word */
6924 vim_strncpy(newword, word, MAXWLEN - 1);
6925 if (ae->ae_chop != NULL)
6926 {
6927 /* Remove chop string. */
6928 p = newword + STRLEN(newword);
6929 i = (int)MB_CHARLEN(ae->ae_chop);
6930 for ( ; i > 0; --i)
6931 mb_ptr_back(newword, p);
6932 *p = NUL;
6933 }
6934 if (ae->ae_add != NULL)
6935 STRCAT(newword, ae->ae_add);
6936 }
6937
6938 use_flags = flags;
6939 use_pfxlist = pfxlist;
6940 use_pfxlen = pfxlen;
6941 need_affix = FALSE;
6942 use_condit = condit | CONDIT_COMB | CONDIT_AFF;
6943 if (ae->ae_flags != NULL)
6944 {
6945 /* Extract flags from the affix list. */
6946 use_flags |= get_affix_flags(affile, ae->ae_flags);
6947
6948 if (affile->af_needaffix != 0 && flag_in_afflist(
6949 affile->af_flagtype, ae->ae_flags,
6950 affile->af_needaffix))
6951 need_affix = TRUE;
6952
6953 /* When there is a CIRCUMFIX flag the other affix
6954 * must also have it and we don't add the word
6955 * with one affix. */
6956 if (affile->af_circumfix != 0 && flag_in_afflist(
6957 affile->af_flagtype, ae->ae_flags,
6958 affile->af_circumfix))
6959 {
6960 use_condit |= CONDIT_CFIX;
6961 if ((condit & CONDIT_CFIX) == 0)
6962 need_affix = TRUE;
6963 }
6964
6965 if (affile->af_pfxpostpone
6966 || spin->si_compflags != NULL)
6967 {
6968 if (affile->af_pfxpostpone)
6969 /* Get prefix IDS from the affix list. */
6970 use_pfxlen = get_pfxlist(affile,
6971 ae->ae_flags, store_afflist);
6972 else
6973 use_pfxlen = 0;
6974 use_pfxlist = store_afflist;
6975
6976 /* Combine the prefix IDs. Avoid adding the
6977 * same ID twice. */
6978 for (i = 0; i < pfxlen; ++i)
6979 {
6980 for (j = 0; j < use_pfxlen; ++j)
6981 if (pfxlist[i] == use_pfxlist[j])
6982 break;
6983 if (j == use_pfxlen)
6984 use_pfxlist[use_pfxlen++] = pfxlist[i];
6985 }
6986
6987 if (spin->si_compflags != NULL)
6988 /* Get compound IDS from the affix list. */
6989 get_compflags(affile, ae->ae_flags,
6990 use_pfxlist + use_pfxlen);
6991
6992 /* Combine the list of compound flags.
6993 * Concatenate them to the prefix IDs list.
6994 * Avoid adding the same ID twice. */
6995 for (i = pfxlen; pfxlist[i] != NUL; ++i)
6996 {
6997 for (j = use_pfxlen;
6998 use_pfxlist[j] != NUL; ++j)
6999 if (pfxlist[i] == use_pfxlist[j])
7000 break;
7001 if (use_pfxlist[j] == NUL)
7002 {
7003 use_pfxlist[j++] = pfxlist[i];
7004 use_pfxlist[j] = NUL;
7005 }
7006 }
7007 }
7008 }
7009
7010 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
7011 * use the compound flags. */
7012 if (use_pfxlist != NULL && ae->ae_compforbid)
7013 {
7014 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
7015 use_pfxlist = pfx_pfxlist;
7016 }
7017
7018 /* When there are postponed prefixes... */
7019 if (spin->si_prefroot != NULL
7020 && spin->si_prefroot->wn_sibling != NULL)
7021 {
7022 /* ... add a flag to indicate an affix was used. */
7023 use_flags |= WF_HAS_AFF;
7024
7025 /* ... don't use a prefix list if combining
7026 * affixes is not allowed. But do use the
7027 * compound flags after them. */
7028 if (!ah->ah_combine && use_pfxlist != NULL)
7029 use_pfxlist += use_pfxlen;
7030 }
7031
7032 /* When compounding is supported and there is no
7033 * "COMPOUNDPERMITFLAG" then forbid compounding on the
7034 * side where the affix is applied. */
7035 if (spin->si_compflags != NULL && !ae->ae_comppermit)
7036 {
7037 if (xht != NULL)
7038 use_flags |= WF_NOCOMPAFT;
7039 else
7040 use_flags |= WF_NOCOMPBEF;
7041 }
7042
7043 /* Store the modified word. */
7044 if (store_word(spin, newword, use_flags,
7045 spin->si_region, use_pfxlist,
7046 need_affix) == FAIL)
7047 retval = FAIL;
7048
7049 /* When added a prefix or a first suffix and the affix
7050 * has flags may add a(nother) suffix. RECURSIVE! */
7051 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
7052 if (store_aff_word(spin, newword, ae->ae_flags,
7053 affile, &affile->af_suff, xht,
7054 use_condit & (xht == NULL
7055 ? ~0 : ~CONDIT_SUF),
7056 use_flags, use_pfxlist, pfxlen) == FAIL)
7057 retval = FAIL;
7058
7059 /* When added a suffix and combining is allowed also
7060 * try adding a prefix additionally. Both for the
7061 * word flags and for the affix flags. RECURSIVE! */
7062 if (xht != NULL && ah->ah_combine)
7063 {
7064 if (store_aff_word(spin, newword,
7065 afflist, affile,
7066 xht, NULL, use_condit,
7067 use_flags, use_pfxlist,
7068 pfxlen) == FAIL
7069 || (ae->ae_flags != NULL
7070 && store_aff_word(spin, newword,
7071 ae->ae_flags, affile,
7072 xht, NULL, use_condit,
7073 use_flags, use_pfxlist,
7074 pfxlen) == FAIL))
7075 retval = FAIL;
7076 }
7077 }
7078 }
7079 }
7080 }
7081 }
7082
7083 return retval;
7084 }
7085
7086 /*
7087 * Read a file with a list of words.
7088 */
7089 static int
7090 spell_read_wordfile(spellinfo_T *spin, char_u *fname)
7091 {
7092 FILE *fd;
7093 long lnum = 0;
7094 char_u rline[MAXLINELEN];
7095 char_u *line;
7096 char_u *pc = NULL;
7097 char_u *p;
7098 int l;
7099 int retval = OK;
7100 int did_word = FALSE;
7101 int non_ascii = 0;
7102 int flags;
7103 int regionmask;
7104
7105 /*
7106 * Open the file.
7107 */
7108 fd = mch_fopen((char *)fname, "r");
7109 if (fd == NULL)
7110 {
7111 EMSG2(_(e_notopen), fname);
7112 return FAIL;
7113 }
7114
7115 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
7116 spell_message(spin, IObuff);
7117
7118 /*
7119 * Read all the lines in the file one by one.
7120 */
7121 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
7122 {
7123 line_breakcheck();
7124 ++lnum;
7125
7126 /* Skip comment lines. */
7127 if (*rline == '#')
7128 continue;
7129
7130 /* Remove CR, LF and white space from the end. */
7131 l = (int)STRLEN(rline);
7132 while (l > 0 && rline[l - 1] <= ' ')
7133 --l;
7134 if (l == 0)
7135 continue; /* empty or blank line */
7136 rline[l] = NUL;
7137
7138 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */
7139 vim_free(pc);
7140 #ifdef FEAT_MBYTE
7141 if (spin->si_conv.vc_type != CONV_NONE)
7142 {
7143 pc = string_convert(&spin->si_conv, rline, NULL);
7144 if (pc == NULL)
7145 {
7146 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
7147 fname, lnum, rline);
7148 continue;
7149 }
7150 line = pc;
7151 }
7152 else
7153 #endif
7154 {
7155 pc = NULL;
7156 line = rline;
7157 }
7158
7159 if (*line == '/')
7160 {
7161 ++line;
7162 if (STRNCMP(line, "encoding=", 9) == 0)
7163 {
7164 if (spin->si_conv.vc_type != CONV_NONE)
7165 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
7166 fname, lnum, line - 1);
7167 else if (did_word)
7168 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
7169 fname, lnum, line - 1);
7170 else
7171 {
7172 #ifdef FEAT_MBYTE
7173 char_u *enc;
7174
7175 /* Setup for conversion to 'encoding'. */
7176 line += 9;
7177 enc = enc_canonize(line);
7178 if (enc != NULL && !spin->si_ascii
7179 && convert_setup(&spin->si_conv, enc,
7180 p_enc) == FAIL)
7181 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
7182 fname, line, p_enc);
7183 vim_free(enc);
7184 spin->si_conv.vc_fail = TRUE;
7185 #else
7186 smsg((char_u *)_("Conversion in %s not supported"), fname);
7187 #endif
7188 }
7189 continue;
7190 }
7191
7192 if (STRNCMP(line, "regions=", 8) == 0)
7193 {
7194 if (spin->si_region_count > 1)
7195 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
7196 fname, lnum, line);
7197 else
7198 {
7199 line += 8;
7200 if (STRLEN(line) > 16)
7201 smsg((char_u *)_("Too many regions in %s line %d: %s"),
7202 fname, lnum, line);
7203 else
7204 {
7205 spin->si_region_count = (int)STRLEN(line) / 2;
7206 STRCPY(spin->si_region_name, line);
7207
7208 /* Adjust the mask for a word valid in all regions. */
7209 spin->si_region = (1 << spin->si_region_count) - 1;
7210 }
7211 }
7212 continue;
7213 }
7214
7215 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
7216 fname, lnum, line - 1);
7217 continue;
7218 }
7219
7220 flags = 0;
7221 regionmask = spin->si_region;
7222
7223 /* Check for flags and region after a slash. */
7224 p = vim_strchr(line, '/');
7225 if (p != NULL)
7226 {
7227 *p++ = NUL;
7228 while (*p != NUL)
7229 {
7230 if (*p == '=') /* keep-case word */
7231 flags |= WF_KEEPCAP | WF_FIXCAP;
7232 else if (*p == '!') /* Bad, bad, wicked word. */
7233 flags |= WF_BANNED;
7234 else if (*p == '?') /* Rare word. */
7235 flags |= WF_RARE;
7236 else if (VIM_ISDIGIT(*p)) /* region number(s) */
7237 {
7238 if ((flags & WF_REGION) == 0) /* first one */
7239 regionmask = 0;
7240 flags |= WF_REGION;
7241
7242 l = *p - '0';
7243 if (l > spin->si_region_count)
7244 {
7245 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
7246 fname, lnum, p);
7247 break;
7248 }
7249 regionmask |= 1 << (l - 1);
7250 }
7251 else
7252 {
7253 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
7254 fname, lnum, p);
7255 break;
7256 }
7257 ++p;
7258 }
7259 }
7260
7261 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
7262 if (spin->si_ascii && has_non_ascii(line))
7263 {
7264 ++non_ascii;
7265 continue;
7266 }
7267
7268 /* Normal word: store it. */
7269 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
7270 {
7271 retval = FAIL;
7272 break;
7273 }
7274 did_word = TRUE;
7275 }
7276
7277 vim_free(pc);
7278 fclose(fd);
7279
7280 if (spin->si_ascii && non_ascii > 0)
7281 {
7282 vim_snprintf((char *)IObuff, IOSIZE,
7283 _("Ignored %d words with non-ASCII characters"), non_ascii);
7284 spell_message(spin, IObuff);
7285 }
7286
7287 return retval;
7288 }
7289
7290 /*
7291 * Get part of an sblock_T, "len" bytes long.
7292 * This avoids calling free() for every little struct we use (and keeping
7293 * track of them).
7294 * The memory is cleared to all zeros.
7295 * Returns NULL when out of memory.
7296 */
7297 static void *
7298 getroom(
7299 spellinfo_T *spin,
7300 size_t len, /* length needed */
7301 int align) /* align for pointer */
7302 {
7303 char_u *p;
7304 sblock_T *bl = spin->si_blocks;
7305
7306 if (align && bl != NULL)
7307 /* Round size up for alignment. On some systems structures need to be
7308 * aligned to the size of a pointer (e.g., SPARC). */
7309 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
7310 & ~(sizeof(char *) - 1);
7311
7312 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
7313 {
7314 if (len >= SBLOCKSIZE)
7315 bl = NULL;
7316 else
7317 /* Allocate a block of memory. It is not freed until much later. */
7318 bl = (sblock_T *)alloc_clear(
7319 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
7320 if (bl == NULL)
7321 {
7322 if (!spin->si_did_emsg)
7323 {
7324 EMSG(_("E845: Insufficient memory, word list will be incomplete"));
7325 spin->si_did_emsg = TRUE;
7326 }
7327 return NULL;
7328 }
7329 bl->sb_next = spin->si_blocks;
7330 spin->si_blocks = bl;
7331 bl->sb_used = 0;
7332 ++spin->si_blocks_cnt;
7333 }
7334
7335 p = bl->sb_data + bl->sb_used;
7336 bl->sb_used += (int)len;
7337
7338 return p;
7339 }
7340
7341 /*
7342 * Make a copy of a string into memory allocated with getroom().
7343 * Returns NULL when out of memory.
7344 */
7345 static char_u *
7346 getroom_save(spellinfo_T *spin, char_u *s)
7347 {
7348 char_u *sc;
7349
7350 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
7351 if (sc != NULL)
7352 STRCPY(sc, s);
7353 return sc;
7354 }
7355
7356
7357 /*
7358 * Free the list of allocated sblock_T.
7359 */
7360 static void
7361 free_blocks(sblock_T *bl)
7362 {
7363 sblock_T *next;
7364
7365 while (bl != NULL)
7366 {
7367 next = bl->sb_next;
7368 vim_free(bl);
7369 bl = next;
7370 }
7371 }
7372
7373 /*
7374 * Allocate the root of a word tree.
7375 * Returns NULL when out of memory.
7376 */
7377 static wordnode_T *
7378 wordtree_alloc(spellinfo_T *spin)
7379 {
7380 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7381 }
7382
7383 /*
7384 * Store a word in the tree(s).
7385 * Always store it in the case-folded tree. For a keep-case word this is
7386 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
7387 * used to find suggestions.
7388 * For a keep-case word also store it in the keep-case tree.
7389 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
7390 * compound flag.
7391 */
7392 static int
7393 store_word(
7394 spellinfo_T *spin,
7395 char_u *word,
7396 int flags, /* extra flags, WF_BANNED */
7397 int region, /* supported region(s) */
7398 char_u *pfxlist, /* list of prefix IDs or NULL */
7399 int need_affix) /* only store word with affix ID */
7400 {
7401 int len = (int)STRLEN(word);
7402 int ct = captype(word, word + len);
7403 char_u foldword[MAXWLEN];
7404 int res = OK;
7405 char_u *p;
7406
7407 (void)spell_casefold(word, len, foldword, MAXWLEN);
7408 for (p = pfxlist; res == OK; ++p)
7409 {
7410 if (!need_affix || (p != NULL && *p != NUL))
7411 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
7412 region, p == NULL ? 0 : *p);
7413 if (p == NULL || *p == NUL)
7414 break;
7415 }
7416 ++spin->si_foldwcount;
7417
7418 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
7419 {
7420 for (p = pfxlist; res == OK; ++p)
7421 {
7422 if (!need_affix || (p != NULL && *p != NUL))
7423 res = tree_add_word(spin, word, spin->si_keeproot, flags,
7424 region, p == NULL ? 0 : *p);
7425 if (p == NULL || *p == NUL)
7426 break;
7427 }
7428 ++spin->si_keepwcount;
7429 }
7430 return res;
7431 }
7432
7433 /*
7434 * Add word "word" to a word tree at "root".
7435 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
7436 * "rare" and "region" is the condition nr.
7437 * Returns FAIL when out of memory.
7438 */
7439 static int
7440 tree_add_word(
7441 spellinfo_T *spin,
7442 char_u *word,
7443 wordnode_T *root,
7444 int flags,
7445 int region,
7446 int affixID)
7447 {
7448 wordnode_T *node = root;
7449 wordnode_T *np;
7450 wordnode_T *copyp, **copyprev;
7451 wordnode_T **prev = NULL;
7452 int i;
7453
7454 /* Add each byte of the word to the tree, including the NUL at the end. */
7455 for (i = 0; ; ++i)
7456 {
7457 /* When there is more than one reference to this node we need to make
7458 * a copy, so that we can modify it. Copy the whole list of siblings
7459 * (we don't optimize for a partly shared list of siblings). */
7460 if (node != NULL && node->wn_refs > 1)
7461 {
7462 --node->wn_refs;
7463 copyprev = prev;
7464 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
7465 {
7466 /* Allocate a new node and copy the info. */
7467 np = get_wordnode(spin);
7468 if (np == NULL)
7469 return FAIL;
7470 np->wn_child = copyp->wn_child;
7471 if (np->wn_child != NULL)
7472 ++np->wn_child->wn_refs; /* child gets extra ref */
7473 np->wn_byte = copyp->wn_byte;
7474 if (np->wn_byte == NUL)
7475 {
7476 np->wn_flags = copyp->wn_flags;
7477 np->wn_region = copyp->wn_region;
7478 np->wn_affixID = copyp->wn_affixID;
7479 }
7480
7481 /* Link the new node in the list, there will be one ref. */
7482 np->wn_refs = 1;
7483 if (copyprev != NULL)
7484 *copyprev = np;
7485 copyprev = &np->wn_sibling;
7486
7487 /* Let "node" point to the head of the copied list. */
7488 if (copyp == node)
7489 node = np;
7490 }
7491 }
7492
7493 /* Look for the sibling that has the same character. They are sorted
7494 * on byte value, thus stop searching when a sibling is found with a
7495 * higher byte value. For zero bytes (end of word) the sorting is
7496 * done on flags and then on affixID. */
7497 while (node != NULL
7498 && (node->wn_byte < word[i]
7499 || (node->wn_byte == NUL
7500 && (flags < 0
7501 ? node->wn_affixID < (unsigned)affixID
7502 : (node->wn_flags < (unsigned)(flags & WN_MASK)
7503 || (node->wn_flags == (flags & WN_MASK)
7504 && (spin->si_sugtree
7505 ? (node->wn_region & 0xffff) < region
7506 : node->wn_affixID
7507 < (unsigned)affixID)))))))
7508 {
7509 prev = &node->wn_sibling;
7510 node = *prev;
7511 }
7512 if (node == NULL
7513 || node->wn_byte != word[i]
7514 || (word[i] == NUL
7515 && (flags < 0
7516 || spin->si_sugtree
7517 || node->wn_flags != (flags & WN_MASK)
7518 || node->wn_affixID != affixID)))
7519 {
7520 /* Allocate a new node. */
7521 np = get_wordnode(spin);
7522 if (np == NULL)
7523 return FAIL;
7524 np->wn_byte = word[i];
7525
7526 /* If "node" is NULL this is a new child or the end of the sibling
7527 * list: ref count is one. Otherwise use ref count of sibling and
7528 * make ref count of sibling one (matters when inserting in front
7529 * of the list of siblings). */
7530 if (node == NULL)
7531 np->wn_refs = 1;
7532 else
7533 {
7534 np->wn_refs = node->wn_refs;
7535 node->wn_refs = 1;
7536 }
7537 if (prev != NULL)
7538 *prev = np;
7539 np->wn_sibling = node;
7540 node = np;
7541 }
7542
7543 if (word[i] == NUL)
7544 {
7545 node->wn_flags = flags;
7546 node->wn_region |= region;
7547 node->wn_affixID = affixID;
7548 break;
7549 }
7550 prev = &node->wn_child;
7551 node = *prev;
7552 }
7553 #ifdef SPELL_PRINTTREE
7554 smsg((char_u *)"Added \"%s\"", word);
7555 spell_print_tree(root->wn_sibling);
7556 #endif
7557
7558 /* count nr of words added since last message */
7559 ++spin->si_msg_count;
7560
7561 if (spin->si_compress_cnt > 1)
7562 {
7563 if (--spin->si_compress_cnt == 1)
7564 /* Did enough words to lower the block count limit. */
7565 spin->si_blocks_cnt += compress_inc;
7566 }
7567
7568 /*
7569 * When we have allocated lots of memory we need to compress the word tree
7570 * to free up some room. But compression is slow, and we might actually
7571 * need that room, thus only compress in the following situations:
7572 * 1. When not compressed before (si_compress_cnt == 0): when using
7573 * "compress_start" blocks.
7574 * 2. When compressed before and used "compress_inc" blocks before
7575 * adding "compress_added" words (si_compress_cnt > 1).
7576 * 3. When compressed before, added "compress_added" words
7577 * (si_compress_cnt == 1) and the number of free nodes drops below the
7578 * maximum word length.
7579 */
7580 #ifndef SPELL_COMPRESS_ALLWAYS
7581 if (spin->si_compress_cnt == 1
7582 ? spin->si_free_count < MAXWLEN
7583 : spin->si_blocks_cnt >= compress_start)
7584 #endif
7585 {
7586 /* Decrement the block counter. The effect is that we compress again
7587 * when the freed up room has been used and another "compress_inc"
7588 * blocks have been allocated. Unless "compress_added" words have
7589 * been added, then the limit is put back again. */
7590 spin->si_blocks_cnt -= compress_inc;
7591 spin->si_compress_cnt = compress_added;
7592
7593 if (spin->si_verbose)
7594 {
7595 msg_start();
7596 msg_puts((char_u *)_(msg_compressing));
7597 msg_clr_eos();
7598 msg_didout = FALSE;
7599 msg_col = 0;
7600 out_flush();
7601 }
7602
7603 /* Compress both trees. Either they both have many nodes, which makes
7604 * compression useful, or one of them is small, which means
7605 * compression goes fast. But when filling the soundfold word tree
7606 * there is no keep-case tree. */
7607 wordtree_compress(spin, spin->si_foldroot);
7608 if (affixID >= 0)
7609 wordtree_compress(spin, spin->si_keeproot);
7610 }
7611
7612 return OK;
7613 }
7614
7615 /*
7616 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
7617 * Sets "sps_flags".
7618 */
7619 int
7620 spell_check_msm(void)
7621 {
7622 char_u *p = p_msm;
7623 long start = 0;
7624 long incr = 0;
7625 long added = 0;
7626
7627 if (!VIM_ISDIGIT(*p))
7628 return FAIL;
7629 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
7630 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
7631 if (*p != ',')
7632 return FAIL;
7633 ++p;
7634 if (!VIM_ISDIGIT(*p))
7635 return FAIL;
7636 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
7637 if (*p != ',')
7638 return FAIL;
7639 ++p;
7640 if (!VIM_ISDIGIT(*p))
7641 return FAIL;
7642 added = getdigits(&p) * 1024;
7643 if (*p != NUL)
7644 return FAIL;
7645
7646 if (start == 0 || incr == 0 || added == 0 || incr > start)
7647 return FAIL;
7648
7649 compress_start = start;
7650 compress_inc = incr;
7651 compress_added = added;
7652 return OK;
7653 }
7654
7655
7656 /*
7657 * Get a wordnode_T, either from the list of previously freed nodes or
7658 * allocate a new one.
7659 * Returns NULL when out of memory.
7660 */
7661 static wordnode_T *
7662 get_wordnode(spellinfo_T *spin)
7663 {
7664 wordnode_T *n;
7665
7666 if (spin->si_first_free == NULL)
7667 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7668 else
7669 {
7670 n = spin->si_first_free;
7671 spin->si_first_free = n->wn_child;
7672 vim_memset(n, 0, sizeof(wordnode_T));
7673 --spin->si_free_count;
7674 }
7675 #ifdef SPELL_PRINTTREE
7676 if (n != NULL)
7677 n->wn_nr = ++spin->si_wordnode_nr;
7678 #endif
7679 return n;
7680 }
7681
7682 /*
7683 * Decrement the reference count on a node (which is the head of a list of
7684 * siblings). If the reference count becomes zero free the node and its
7685 * siblings.
7686 * Returns the number of nodes actually freed.
7687 */
7688 static int
7689 deref_wordnode(spellinfo_T *spin, wordnode_T *node)
7690 {
7691 wordnode_T *np;
7692 int cnt = 0;
7693
7694 if (--node->wn_refs == 0)
7695 {
7696 for (np = node; np != NULL; np = np->wn_sibling)
7697 {
7698 if (np->wn_child != NULL)
7699 cnt += deref_wordnode(spin, np->wn_child);
7700 free_wordnode(spin, np);
7701 ++cnt;
7702 }
7703 ++cnt; /* length field */
7704 }
7705 return cnt;
7706 }
7707
7708 /*
7709 * Free a wordnode_T for re-use later.
7710 * Only the "wn_child" field becomes invalid.
7711 */
7712 static void
7713 free_wordnode(spellinfo_T *spin, wordnode_T *n)
7714 {
7715 n->wn_child = spin->si_first_free;
7716 spin->si_first_free = n;
7717 ++spin->si_free_count;
7718 }
7719
7720 /*
7721 * Compress a tree: find tails that are identical and can be shared.
7722 */
7723 static void
7724 wordtree_compress(spellinfo_T *spin, wordnode_T *root)
7725 {
7726 hashtab_T ht;
7727 int n;
7728 int tot = 0;
7729 int perc;
7730
7731 /* Skip the root itself, it's not actually used. The first sibling is the
7732 * start of the tree. */
7733 if (root->wn_sibling != NULL)
7734 {
7735 hash_init(&ht);
7736 n = node_compress(spin, root->wn_sibling, &ht, &tot);
7737
7738 #ifndef SPELL_PRINTTREE
7739 if (spin->si_verbose || p_verbose > 2)
7740 #endif
7741 {
7742 if (tot > 1000000)
7743 perc = (tot - n) / (tot / 100);
7744 else if (tot == 0)
7745 perc = 0;
7746 else
7747 perc = (tot - n) * 100 / tot;
7748 vim_snprintf((char *)IObuff, IOSIZE,
7749 _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7750 n, tot, tot - n, perc);
7751 spell_message(spin, IObuff);
7752 }
7753 #ifdef SPELL_PRINTTREE
7754 spell_print_tree(root->wn_sibling);
7755 #endif
7756 hash_clear(&ht);
7757 }
7758 }
7759
7760 /*
7761 * Compress a node, its siblings and its children, depth first.
7762 * Returns the number of compressed nodes.
7763 */
7764 static int
7765 node_compress(
7766 spellinfo_T *spin,
7767 wordnode_T *node,
7768 hashtab_T *ht,
7769 int *tot) /* total count of nodes before compressing,
7770 incremented while going through the tree */
7771 {
7772 wordnode_T *np;
7773 wordnode_T *tp;
7774 wordnode_T *child;
7775 hash_T hash;
7776 hashitem_T *hi;
7777 int len = 0;
7778 unsigned nr, n;
7779 int compressed = 0;
7780
7781 /*
7782 * Go through the list of siblings. Compress each child and then try
7783 * finding an identical child to replace it.
7784 * Note that with "child" we mean not just the node that is pointed to,
7785 * but the whole list of siblings of which the child node is the first.
7786 */
7787 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
7788 {
7789 ++len;
7790 if ((child = np->wn_child) != NULL)
7791 {
7792 /* Compress the child first. This fills hashkey. */
7793 compressed += node_compress(spin, child, ht, tot);
7794
7795 /* Try to find an identical child. */
7796 hash = hash_hash(child->wn_u1.hashkey);
7797 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
7798 if (!HASHITEM_EMPTY(hi))
7799 {
7800 /* There are children we encountered before with a hash value
7801 * identical to the current child. Now check if there is one
7802 * that is really identical. */
7803 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
7804 if (node_equal(child, tp))
7805 {
7806 /* Found one! Now use that child in place of the
7807 * current one. This means the current child and all
7808 * its siblings is unlinked from the tree. */
7809 ++tp->wn_refs;
7810 compressed += deref_wordnode(spin, child);
7811 np->wn_child = tp;
7812 break;
7813 }
7814 if (tp == NULL)
7815 {
7816 /* No other child with this hash value equals the child of
7817 * the node, add it to the linked list after the first
7818 * item. */
7819 tp = HI2WN(hi);
7820 child->wn_u2.next = tp->wn_u2.next;
7821 tp->wn_u2.next = child;
7822 }
7823 }
7824 else
7825 /* No other child has this hash value, add it to the
7826 * hashtable. */
7827 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
7828 }
7829 }
7830 *tot += len + 1; /* add one for the node that stores the length */
7831
7832 /*
7833 * Make a hash key for the node and its siblings, so that we can quickly
7834 * find a lookalike node. This must be done after compressing the sibling
7835 * list, otherwise the hash key would become invalid by the compression.
7836 */
7837 node->wn_u1.hashkey[0] = len;
7838 nr = 0;
7839 for (np = node; np != NULL; np = np->wn_sibling)
7840 {
7841 if (np->wn_byte == NUL)
7842 /* end node: use wn_flags, wn_region and wn_affixID */
7843 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
7844 else
7845 /* byte node: use the byte value and the child pointer */
7846 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
7847 nr = nr * 101 + n;
7848 }
7849
7850 /* Avoid NUL bytes, it terminates the hash key. */
7851 n = nr & 0xff;
7852 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
7853 n = (nr >> 8) & 0xff;
7854 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
7855 n = (nr >> 16) & 0xff;
7856 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
7857 n = (nr >> 24) & 0xff;
7858 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7859 node->wn_u1.hashkey[5] = NUL;
7860
7861 /* Check for CTRL-C pressed now and then. */
7862 fast_breakcheck();
7863
7864 return compressed;
7865 }
7866
7867 /*
7868 * Return TRUE when two nodes have identical siblings and children.
7869 */
7870 static int
7871 node_equal(wordnode_T *n1, wordnode_T *n2)
7872 {
7873 wordnode_T *p1;
7874 wordnode_T *p2;
7875
7876 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
7877 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
7878 if (p1->wn_byte != p2->wn_byte
7879 || (p1->wn_byte == NUL
7880 ? (p1->wn_flags != p2->wn_flags
7881 || p1->wn_region != p2->wn_region
7882 || p1->wn_affixID != p2->wn_affixID)
7883 : (p1->wn_child != p2->wn_child)))
7884 break;
7885
7886 return p1 == NULL && p2 == NULL;
7887 }
7888
7889 static int
7890 #ifdef __BORLANDC__
7891 _RTLENTRYF
7892 #endif
7893 rep_compare(const void *s1, const void *s2);
7894
7895 /*
7896 * Function given to qsort() to sort the REP items on "from" string.
7897 */
7898 static int
7899 #ifdef __BORLANDC__
7900 _RTLENTRYF
7901 #endif
7902 rep_compare(const void *s1, const void *s2)
7903 {
7904 fromto_T *p1 = (fromto_T *)s1;
7905 fromto_T *p2 = (fromto_T *)s2;
7906
7907 return STRCMP(p1->ft_from, p2->ft_from);
7908 }
7909
7910 /*
7911 * Write the Vim .spl file "fname".
7912 * Return FAIL or OK;
7913 */
7914 static int
7915 write_vim_spell(spellinfo_T *spin, char_u *fname)
7916 {
7917 FILE *fd;
7918 int regionmask;
7919 int round;
7920 wordnode_T *tree;
7921 int nodecount;
7922 int i;
7923 int l;
7924 garray_T *gap;
7925 fromto_T *ftp;
7926 char_u *p;
7927 int rr;
7928 int retval = OK;
7929 size_t fwv = 1; /* collect return value of fwrite() to avoid
7930 warnings from picky compiler */
7931
7932 fd = mch_fopen((char *)fname, "w");
7933 if (fd == NULL)
7934 {
7935 EMSG2(_(e_notopen), fname);
7936 return FAIL;
7937 }
7938
7939 /* <HEADER>: <fileID> <versionnr> */
7940 /* <fileID> */
7941 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
7942 if (fwv != (size_t)1)
7943 /* Catch first write error, don't try writing more. */
7944 goto theend;
7945
7946 putc(VIMSPELLVERSION, fd); /* <versionnr> */
7947
7948 /*
7949 * <SECTIONS>: <section> ... <sectionend>
7950 */
7951
7952 /* SN_INFO: <infotext> */
7953 if (spin->si_info != NULL)
7954 {
7955 putc(SN_INFO, fd); /* <sectionID> */
7956 putc(0, fd); /* <sectionflags> */
7957
7958 i = (int)STRLEN(spin->si_info);
7959 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
7960 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
7961 }
7962
7963 /* SN_REGION: <regionname> ...
7964 * Write the region names only if there is more than one. */
7965 if (spin->si_region_count > 1)
7966 {
7967 putc(SN_REGION, fd); /* <sectionID> */
7968 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7969 l = spin->si_region_count * 2;
7970 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7971 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
7972 /* <regionname> ... */
7973 regionmask = (1 << spin->si_region_count) - 1;
7974 }
7975 else
7976 regionmask = 0;
7977
7978 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
7979 *
7980 * The table with character flags and the table for case folding.
7981 * This makes sure the same characters are recognized as word characters
7982 * when generating an when using a spell file.
7983 * Skip this for ASCII, the table may conflict with the one used for
7984 * 'encoding'.
7985 * Also skip this for an .add.spl file, the main spell file must contain
7986 * the table (avoids that it conflicts). File is shorter too.
7987 */
7988 if (!spin->si_ascii && !spin->si_add)
7989 {
7990 char_u folchars[128 * 8];
7991 int flags;
7992
7993 putc(SN_CHARFLAGS, fd); /* <sectionID> */
7994 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7995
7996 /* Form the <folchars> string first, we need to know its length. */
7997 l = 0;
7998 for (i = 128; i < 256; ++i)
7999 {
8000 #ifdef FEAT_MBYTE
8001 if (has_mbyte)
8002 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
8003 else
8004 #endif
8005 folchars[l++] = spelltab.st_fold[i];
8006 }
8007 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
8008
8009 fputc(128, fd); /* <charflagslen> */
8010 for (i = 128; i < 256; ++i)
8011 {
8012 flags = 0;
8013 if (spelltab.st_isw[i])
8014 flags |= CF_WORD;
8015 if (spelltab.st_isu[i])
8016 flags |= CF_UPPER;
8017 fputc(flags, fd); /* <charflags> */
8018 }
8019
8020 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
8021 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
8022 }
8023
8024 /* SN_MIDWORD: <midword> */
8025 if (spin->si_midword != NULL)
8026 {
8027 putc(SN_MIDWORD, fd); /* <sectionID> */
8028 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8029
8030 i = (int)STRLEN(spin->si_midword);
8031 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
8032 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
8033 /* <midword> */
8034 }
8035
8036 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
8037 if (spin->si_prefcond.ga_len > 0)
8038 {
8039 putc(SN_PREFCOND, fd); /* <sectionID> */
8040 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8041
8042 l = write_spell_prefcond(NULL, &spin->si_prefcond);
8043 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8044
8045 write_spell_prefcond(fd, &spin->si_prefcond);
8046 }
8047
8048 /* SN_REP: <repcount> <rep> ...
8049 * SN_SAL: <salflags> <salcount> <sal> ...
8050 * SN_REPSAL: <repcount> <rep> ... */
8051
8052 /* round 1: SN_REP section
8053 * round 2: SN_SAL section (unless SN_SOFO is used)
8054 * round 3: SN_REPSAL section */
8055 for (round = 1; round <= 3; ++round)
8056 {
8057 if (round == 1)
8058 gap = &spin->si_rep;
8059 else if (round == 2)
8060 {
8061 /* Don't write SN_SAL when using a SN_SOFO section */
8062 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8063 continue;
8064 gap = &spin->si_sal;
8065 }
8066 else
8067 gap = &spin->si_repsal;
8068
8069 /* Don't write the section if there are no items. */
8070 if (gap->ga_len == 0)
8071 continue;
8072
8073 /* Sort the REP/REPSAL items. */
8074 if (round != 2)
8075 qsort(gap->ga_data, (size_t)gap->ga_len,
8076 sizeof(fromto_T), rep_compare);
8077
8078 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
8079 putc(i, fd); /* <sectionID> */
8080
8081 /* This is for making suggestions, section is not required. */
8082 putc(0, fd); /* <sectionflags> */
8083
8084 /* Compute the length of what follows. */
8085 l = 2; /* count <repcount> or <salcount> */
8086 for (i = 0; i < gap->ga_len; ++i)
8087 {
8088 ftp = &((fromto_T *)gap->ga_data)[i];
8089 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */
8090 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */
8091 }
8092 if (round == 2)
8093 ++l; /* count <salflags> */
8094 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8095
8096 if (round == 2)
8097 {
8098 i = 0;
8099 if (spin->si_followup)
8100 i |= SAL_F0LLOWUP;
8101 if (spin->si_collapse)
8102 i |= SAL_COLLAPSE;
8103 if (spin->si_rem_accents)
8104 i |= SAL_REM_ACCENTS;
8105 putc(i, fd); /* <salflags> */
8106 }
8107
8108 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
8109 for (i = 0; i < gap->ga_len; ++i)
8110 {
8111 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
8112 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
8113 ftp = &((fromto_T *)gap->ga_data)[i];
8114 for (rr = 1; rr <= 2; ++rr)
8115 {
8116 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
8117 l = (int)STRLEN(p);
8118 putc(l, fd);
8119 if (l > 0)
8120 fwv &= fwrite(p, l, (size_t)1, fd);
8121 }
8122 }
8123
8124 }
8125
8126 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
8127 * This is for making suggestions, section is not required. */
8128 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8129 {
8130 putc(SN_SOFO, fd); /* <sectionID> */
8131 putc(0, fd); /* <sectionflags> */
8132
8133 l = (int)STRLEN(spin->si_sofofr);
8134 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
8135 /* <sectionlen> */
8136
8137 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
8138 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
8139
8140 l = (int)STRLEN(spin->si_sofoto);
8141 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
8142 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
8143 }
8144
8145 /* SN_WORDS: <word> ...
8146 * This is for making suggestions, section is not required. */
8147 if (spin->si_commonwords.ht_used > 0)
8148 {
8149 putc(SN_WORDS, fd); /* <sectionID> */
8150 putc(0, fd); /* <sectionflags> */
8151
8152 /* round 1: count the bytes
8153 * round 2: write the bytes */
8154 for (round = 1; round <= 2; ++round)
8155 {
8156 int todo;
8157 int len = 0;
8158 hashitem_T *hi;
8159
8160 todo = (int)spin->si_commonwords.ht_used;
8161 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
8162 if (!HASHITEM_EMPTY(hi))
8163 {
8164 l = (int)STRLEN(hi->hi_key) + 1;
8165 len += l;
8166 if (round == 2) /* <word> */
8167 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
8168 --todo;
8169 }
8170 if (round == 1)
8171 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
8172 }
8173 }
8174
8175 /* SN_MAP: <mapstr>
8176 * This is for making suggestions, section is not required. */
8177 if (spin->si_map.ga_len > 0)
8178 {
8179 putc(SN_MAP, fd); /* <sectionID> */
8180 putc(0, fd); /* <sectionflags> */
8181 l = spin->si_map.ga_len;
8182 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8183 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
8184 /* <mapstr> */
8185 }
8186
8187 /* SN_SUGFILE: <timestamp>
8188 * This is used to notify that a .sug file may be available and at the
8189 * same time allows for checking that a .sug file that is found matches
8190 * with this .spl file. That's because the word numbers must be exactly
8191 * right. */
8192 if (!spin->si_nosugfile
8193 && (spin->si_sal.ga_len > 0
8194 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
8195 {
8196 putc(SN_SUGFILE, fd); /* <sectionID> */
8197 putc(0, fd); /* <sectionflags> */
8198 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
8199
8200 /* Set si_sugtime and write it to the file. */
8201 spin->si_sugtime = time(NULL);
8202 put_time(fd, spin->si_sugtime); /* <timestamp> */
8203 }
8204
8205 /* SN_NOSPLITSUGS: nothing
8206 * This is used to notify that no suggestions with word splits are to be
8207 * made. */
8208 if (spin->si_nosplitsugs)
8209 {
8210 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
8211 putc(0, fd); /* <sectionflags> */
8212 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8213 }
8214
8215 /* SN_NOCOMPUNDSUGS: nothing
8216 * This is used to notify that no suggestions with compounds are to be
8217 * made. */
8218 if (spin->si_nocompoundsugs)
8219 {
8220 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
8221 putc(0, fd); /* <sectionflags> */
8222 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8223 }
8224
8225 /* SN_COMPOUND: compound info.
8226 * We don't mark it required, when not supported all compound words will
8227 * be bad words. */
8228 if (spin->si_compflags != NULL)
8229 {
8230 putc(SN_COMPOUND, fd); /* <sectionID> */
8231 putc(0, fd); /* <sectionflags> */
8232
8233 l = (int)STRLEN(spin->si_compflags);
8234 for (i = 0; i < spin->si_comppat.ga_len; ++i)
8235 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
8236 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
8237
8238 putc(spin->si_compmax, fd); /* <compmax> */
8239 putc(spin->si_compminlen, fd); /* <compminlen> */
8240 putc(spin->si_compsylmax, fd); /* <compsylmax> */
8241 putc(0, fd); /* for Vim 7.0b compatibility */
8242 putc(spin->si_compoptions, fd); /* <compoptions> */
8243 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
8244 /* <comppatcount> */
8245 for (i = 0; i < spin->si_comppat.ga_len; ++i)
8246 {
8247 p = ((char_u **)(spin->si_comppat.ga_data))[i];
8248 putc((int)STRLEN(p), fd); /* <comppatlen> */
8249 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
8250 /* <comppattext> */
8251 }
8252 /* <compflags> */
8253 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
8254 (size_t)1, fd);
8255 }
8256
8257 /* SN_NOBREAK: NOBREAK flag */
8258 if (spin->si_nobreak)
8259 {
8260 putc(SN_NOBREAK, fd); /* <sectionID> */
8261 putc(0, fd); /* <sectionflags> */
8262
8263 /* It's empty, the presence of the section flags the feature. */
8264 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8265 }
8266
8267 /* SN_SYLLABLE: syllable info.
8268 * We don't mark it required, when not supported syllables will not be
8269 * counted. */
8270 if (spin->si_syllable != NULL)
8271 {
8272 putc(SN_SYLLABLE, fd); /* <sectionID> */
8273 putc(0, fd); /* <sectionflags> */
8274
8275 l = (int)STRLEN(spin->si_syllable);
8276 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8277 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
8278 /* <syllable> */
8279 }
8280
8281 /* end of <SECTIONS> */
8282 putc(SN_END, fd); /* <sectionend> */
8283
8284
8285 /*
8286 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
8287 */
8288 spin->si_memtot = 0;
8289 for (round = 1; round <= 3; ++round)
8290 {
8291 if (round == 1)
8292 tree = spin->si_foldroot->wn_sibling;
8293 else if (round == 2)
8294 tree = spin->si_keeproot->wn_sibling;
8295 else
8296 tree = spin->si_prefroot->wn_sibling;
8297
8298 /* Clear the index and wnode fields in the tree. */
8299 clear_node(tree);
8300
8301 /* Count the number of nodes. Needed to be able to allocate the
8302 * memory when reading the nodes. Also fills in index for shared
8303 * nodes. */
8304 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
8305
8306 /* number of nodes in 4 bytes */
8307 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
8308 spin->si_memtot += nodecount + nodecount * sizeof(int);
8309
8310 /* Write the nodes. */
8311 (void)put_node(fd, tree, 0, regionmask, round == 3);
8312 }
8313
8314 /* Write another byte to check for errors (file system full). */
8315 if (putc(0, fd) == EOF)
8316 retval = FAIL;
8317 theend:
8318 if (fclose(fd) == EOF)
8319 retval = FAIL;
8320
8321 if (fwv != (size_t)1)
8322 retval = FAIL;
8323 if (retval == FAIL)
8324 EMSG(_(e_write));
8325
8326 return retval;
8327 }
8328
8329 /*
8330 * Clear the index and wnode fields of "node", it siblings and its
8331 * children. This is needed because they are a union with other items to save
8332 * space.
8333 */
8334 static void
8335 clear_node(wordnode_T *node)
8336 {
8337 wordnode_T *np;
8338
8339 if (node != NULL)
8340 for (np = node; np != NULL; np = np->wn_sibling)
8341 {
8342 np->wn_u1.index = 0;
8343 np->wn_u2.wnode = NULL;
8344
8345 if (np->wn_byte != NUL)
8346 clear_node(np->wn_child);
8347 }
8348 }
8349
8350
8351 /*
8352 * Dump a word tree at node "node".
8353 *
8354 * This first writes the list of possible bytes (siblings). Then for each
8355 * byte recursively write the children.
8356 *
8357 * NOTE: The code here must match the code in read_tree_node(), since
8358 * assumptions are made about the indexes (so that we don't have to write them
8359 * in the file).
8360 *
8361 * Returns the number of nodes used.
8362 */
8363 static int
8364 put_node(
8365 FILE *fd, /* NULL when only counting */
8366 wordnode_T *node,
8367 int idx,
8368 int regionmask,
8369 int prefixtree) /* TRUE for PREFIXTREE */
8370 {
8371 int newindex = idx;
8372 int siblingcount = 0;
8373 wordnode_T *np;
8374 int flags;
8375
8376 /* If "node" is zero the tree is empty. */
8377 if (node == NULL)
8378 return 0;
8379
8380 /* Store the index where this node is written. */
8381 node->wn_u1.index = idx;
8382
8383 /* Count the number of siblings. */
8384 for (np = node; np != NULL; np = np->wn_sibling)
8385 ++siblingcount;
8386
8387 /* Write the sibling count. */
8388 if (fd != NULL)
8389 putc(siblingcount, fd); /* <siblingcount> */
8390
8391 /* Write each sibling byte and optionally extra info. */
8392 for (np = node; np != NULL; np = np->wn_sibling)
8393 {
8394 if (np->wn_byte == 0)
8395 {
8396 if (fd != NULL)
8397 {
8398 /* For a NUL byte (end of word) write the flags etc. */
8399 if (prefixtree)
8400 {
8401 /* In PREFIXTREE write the required affixID and the
8402 * associated condition nr (stored in wn_region). The
8403 * byte value is misused to store the "rare" and "not
8404 * combining" flags */
8405 if (np->wn_flags == (short_u)PFX_FLAGS)
8406 putc(BY_NOFLAGS, fd); /* <byte> */
8407 else
8408 {
8409 putc(BY_FLAGS, fd); /* <byte> */
8410 putc(np->wn_flags, fd); /* <pflags> */
8411 }
8412 putc(np->wn_affixID, fd); /* <affixID> */
8413 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
8414 }
8415 else
8416 {
8417 /* For word trees we write the flag/region items. */
8418 flags = np->wn_flags;
8419 if (regionmask != 0 && np->wn_region != regionmask)
8420 flags |= WF_REGION;
8421 if (np->wn_affixID != 0)
8422 flags |= WF_AFX;
8423 if (flags == 0)
8424 {
8425 /* word without flags or region */
8426 putc(BY_NOFLAGS, fd); /* <byte> */
8427 }
8428 else
8429 {
8430 if (np->wn_flags >= 0x100)
8431 {
8432 putc(BY_FLAGS2, fd); /* <byte> */
8433 putc(flags, fd); /* <flags> */
8434 putc((unsigned)flags >> 8, fd); /* <flags2> */
8435 }
8436 else
8437 {
8438 putc(BY_FLAGS, fd); /* <byte> */
8439 putc(flags, fd); /* <flags> */
8440 }
8441 if (flags & WF_REGION)
8442 putc(np->wn_region, fd); /* <region> */
8443 if (flags & WF_AFX)
8444 putc(np->wn_affixID, fd); /* <affixID> */
8445 }
8446 }
8447 }
8448 }
8449 else
8450 {
8451 if (np->wn_child->wn_u1.index != 0
8452 && np->wn_child->wn_u2.wnode != node)
8453 {
8454 /* The child is written elsewhere, write the reference. */
8455 if (fd != NULL)
8456 {
8457 putc(BY_INDEX, fd); /* <byte> */
8458 /* <nodeidx> */
8459 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
8460 }
8461 }
8462 else if (np->wn_child->wn_u2.wnode == NULL)
8463 /* We will write the child below and give it an index. */
8464 np->wn_child->wn_u2.wnode = node;
8465
8466 if (fd != NULL)
8467 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
8468 {
8469 EMSG(_(e_write));
8470 return 0;
8471 }
8472 }
8473 }
8474
8475 /* Space used in the array when reading: one for each sibling and one for
8476 * the count. */
8477 newindex += siblingcount + 1;
8478
8479 /* Recursively dump the children of each sibling. */
8480 for (np = node; np != NULL; np = np->wn_sibling)
8481 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
8482 newindex = put_node(fd, np->wn_child, newindex, regionmask,
8483 prefixtree);
8484
8485 return newindex;
8486 }
8487
8488
8489 /*
8490 * ":mkspell [-ascii] outfile infile ..."
8491 * ":mkspell [-ascii] addfile"
8492 */
8493 void
8494 ex_mkspell(exarg_T *eap)
8495 {
8496 int fcount;
8497 char_u **fnames;
8498 char_u *arg = eap->arg;
8499 int ascii = FALSE;
8500
8501 if (STRNCMP(arg, "-ascii", 6) == 0)
8502 {
8503 ascii = TRUE;
8504 arg = skipwhite(arg + 6);
8505 }
8506
8507 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
8508 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
8509 {
8510 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
8511 FreeWild(fcount, fnames);
8512 }
8513 }
8514
8515 /*
8516 * Create the .sug file.
8517 * Uses the soundfold info in "spin".
8518 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
8519 */
8520 static void
8521 spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
8522 {
8523 char_u *fname = NULL;
8524 int len;
8525 slang_T *slang;
8526 int free_slang = FALSE;
8527
8528 /*
8529 * Read back the .spl file that was written. This fills the required
8530 * info for soundfolding. This also uses less memory than the
8531 * pointer-linked version of the trie. And it avoids having two versions
8532 * of the code for the soundfolding stuff.
8533 * It might have been done already by spell_reload_one().
8534 */
8535 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
8536 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
8537 break;
8538 if (slang == NULL)
8539 {
8540 spell_message(spin, (char_u *)_("Reading back spell file..."));
8541 slang = spell_load_file(wfname, NULL, NULL, FALSE);
8542 if (slang == NULL)
8543 return;
8544 free_slang = TRUE;
8545 }
8546
8547 /*
8548 * Clear the info in "spin" that is used.
8549 */
8550 spin->si_blocks = NULL;
8551 spin->si_blocks_cnt = 0;
8552 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
8553 spin->si_free_count = 0;
8554 spin->si_first_free = NULL;
8555 spin->si_foldwcount = 0;
8556
8557 /*
8558 * Go through the trie of good words, soundfold each word and add it to
8559 * the soundfold trie.
8560 */
8561 spell_message(spin, (char_u *)_("Performing soundfolding..."));
8562 if (sug_filltree(spin, slang) == FAIL)
8563 goto theend;
8564
8565 /*
8566 * Create the table which links each soundfold word with a list of the
8567 * good words it may come from. Creates buffer "spin->si_spellbuf".
8568 * This also removes the wordnr from the NUL byte entries to make
8569 * compression possible.
8570 */
8571 if (sug_maketable(spin) == FAIL)
8572 goto theend;
8573
8574 smsg((char_u *)_("Number of words after soundfolding: %ld"),
8575 (long)spin->si_spellbuf->b_ml.ml_line_count);
8576
8577 /*
8578 * Compress the soundfold trie.
8579 */
8580 spell_message(spin, (char_u *)_(msg_compressing));
8581 wordtree_compress(spin, spin->si_foldroot);
8582
8583 /*
8584 * Write the .sug file.
8585 * Make the file name by changing ".spl" to ".sug".
8586 */
8587 fname = alloc(MAXPATHL);
8588 if (fname == NULL)
8589 goto theend;
8590 vim_strncpy(fname, wfname, MAXPATHL - 1);
8591 len = (int)STRLEN(fname);
8592 fname[len - 2] = 'u';
8593 fname[len - 1] = 'g';
8594 sug_write(spin, fname);
8595
8596 theend:
8597 vim_free(fname);
8598 if (free_slang)
8599 slang_free(slang);
8600 free_blocks(spin->si_blocks);
8601 close_spellbuf(spin->si_spellbuf);
8602 }
8603
8604 /*
8605 * Build the soundfold trie for language "slang".
8606 */
8607 static int
8608 sug_filltree(spellinfo_T *spin, slang_T *slang)
8609 {
8610 char_u *byts;
8611 idx_T *idxs;
8612 int depth;
8613 idx_T arridx[MAXWLEN];
8614 int curi[MAXWLEN];
8615 char_u tword[MAXWLEN];
8616 char_u tsalword[MAXWLEN];
8617 int c;
8618 idx_T n;
8619 unsigned words_done = 0;
8620 int wordcount[MAXWLEN];
8621
8622 /* We use si_foldroot for the soundfolded trie. */
8623 spin->si_foldroot = wordtree_alloc(spin);
8624 if (spin->si_foldroot == NULL)
8625 return FAIL;
8626
8627 /* let tree_add_word() know we're adding to the soundfolded tree */
8628 spin->si_sugtree = TRUE;
8629
8630 /*
8631 * Go through the whole case-folded tree, soundfold each word and put it
8632 * in the trie.
8633 */
8634 byts = slang->sl_fbyts;
8635 idxs = slang->sl_fidxs;
8636
8637 arridx[0] = 0;
8638 curi[0] = 1;
8639 wordcount[0] = 0;
8640
8641 depth = 0;
8642 while (depth >= 0 && !got_int)
8643 {
8644 if (curi[depth] > byts[arridx[depth]])
8645 {
8646 /* Done all bytes at this node, go up one level. */
8647 idxs[arridx[depth]] = wordcount[depth];
8648 if (depth > 0)
8649 wordcount[depth - 1] += wordcount[depth];
8650
8651 --depth;
8652 line_breakcheck();
8653 }
8654 else
8655 {
8656
8657 /* Do one more byte at this node. */
8658 n = arridx[depth] + curi[depth];
8659 ++curi[depth];
8660
8661 c = byts[n];
8662 if (c == 0)
8663 {
8664 /* Sound-fold the word. */
8665 tword[depth] = NUL;
8666 spell_soundfold(slang, tword, TRUE, tsalword);
8667
8668 /* We use the "flags" field for the MSB of the wordnr,
8669 * "region" for the LSB of the wordnr. */
8670 if (tree_add_word(spin, tsalword, spin->si_foldroot,
8671 words_done >> 16, words_done & 0xffff,
8672 0) == FAIL)
8673 return FAIL;
8674
8675 ++words_done;
8676 ++wordcount[depth];
8677
8678 /* Reset the block count each time to avoid compression
8679 * kicking in. */
8680 spin->si_blocks_cnt = 0;
8681
8682 /* Skip over any other NUL bytes (same word with different
8683 * flags). */
8684 while (byts[n + 1] == 0)
8685 {
8686 ++n;
8687 ++curi[depth];
8688 }
8689 }
8690 else
8691 {
8692 /* Normal char, go one level deeper. */
8693 tword[depth++] = c;
8694 arridx[depth] = idxs[n];
8695 curi[depth] = 1;
8696 wordcount[depth] = 0;
8697 }
8698 }
8699 }
8700
8701 smsg((char_u *)_("Total number of words: %d"), words_done);
8702
8703 return OK;
8704 }
8705
8706 /*
8707 * Make the table that links each word in the soundfold trie to the words it
8708 * can be produced from.
8709 * This is not unlike lines in a file, thus use a memfile to be able to access
8710 * the table efficiently.
8711 * Returns FAIL when out of memory.
8712 */
8713 static int
8714 sug_maketable(spellinfo_T *spin)
8715 {
8716 garray_T ga;
8717 int res = OK;
8718
8719 /* Allocate a buffer, open a memline for it and create the swap file
8720 * (uses a temp file, not a .swp file). */
8721 spin->si_spellbuf = open_spellbuf();
8722 if (spin->si_spellbuf == NULL)
8723 return FAIL;
8724
8725 /* Use a buffer to store the line info, avoids allocating many small
8726 * pieces of memory. */
8727 ga_init2(&ga, 1, 100);
8728
8729 /* recursively go through the tree */
8730 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8731 res = FAIL;
8732
8733 ga_clear(&ga);
8734 return res;
8735 }
8736
8737 /*
8738 * Fill the table for one node and its children.
8739 * Returns the wordnr at the start of the node.
8740 * Returns -1 when out of memory.
8741 */
8742 static int
8743 sug_filltable(
8744 spellinfo_T *spin,
8745 wordnode_T *node,
8746 int startwordnr,
8747 garray_T *gap) /* place to store line of numbers */
8748 {
8749 wordnode_T *p, *np;
8750 int wordnr = startwordnr;
8751 int nr;
8752 int prev_nr;
8753
8754 for (p = node; p != NULL; p = p->wn_sibling)
8755 {
8756 if (p->wn_byte == NUL)
8757 {
8758 gap->ga_len = 0;
8759 prev_nr = 0;
8760 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8761 {
8762 if (ga_grow(gap, 10) == FAIL)
8763 return -1;
8764
8765 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8766 /* Compute the offset from the previous nr and store the
8767 * offset in a way that it takes a minimum number of bytes.
8768 * It's a bit like utf-8, but without the need to mark
8769 * following bytes. */
8770 nr -= prev_nr;
8771 prev_nr += nr;
8772 gap->ga_len += offset2bytes(nr,
8773 (char_u *)gap->ga_data + gap->ga_len);
8774 }
8775
8776 /* add the NUL byte */
8777 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8778
8779 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8780 gap->ga_data, gap->ga_len, TRUE) == FAIL)
8781 return -1;
8782 ++wordnr;
8783
8784 /* Remove extra NUL entries, we no longer need them. We don't
8785 * bother freeing the nodes, the won't be reused anyway. */
8786 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8787 p->wn_sibling = p->wn_sibling->wn_sibling;
8788
8789 /* Clear the flags on the remaining NUL node, so that compression
8790 * works a lot better. */
8791 p->wn_flags = 0;
8792 p->wn_region = 0;
8793 }
8794 else
8795 {
8796 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8797 if (wordnr == -1)
8798 return -1;
8799 }
8800 }
8801 return wordnr;
8802 }
8803
8804 /*
8805 * Convert an offset into a minimal number of bytes.
8806 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8807 * bytes.
8808 */
8809 static int
8810 offset2bytes(int nr, char_u *buf)
8811 {
8812 int rem;
8813 int b1, b2, b3, b4;
8814
8815 /* Split the number in parts of base 255. We need to avoid NUL bytes. */
8816 b1 = nr % 255 + 1;
8817 rem = nr / 255;
8818 b2 = rem % 255 + 1;
8819 rem = rem / 255;
8820 b3 = rem % 255 + 1;
8821 b4 = rem / 255 + 1;
8822
8823 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */
8824 {
8825 buf[0] = 0xe0 + b4;
8826 buf[1] = b3;
8827 buf[2] = b2;
8828 buf[3] = b1;
8829 return 4;
8830 }
8831 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */
8832 {
8833 buf[0] = 0xc0 + b3;
8834 buf[1] = b2;
8835 buf[2] = b1;
8836 return 3;
8837 }
8838 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */
8839 {
8840 buf[0] = 0x80 + b2;
8841 buf[1] = b1;
8842 return 2;
8843 }
8844 /* 1 byte */
8845 buf[0] = b1;
8846 return 1;
8847 }
8848
8849 /*
8850 * Opposite of offset2bytes(). 2933 * Opposite of offset2bytes().
8851 * "pp" points to the bytes and is advanced over it. 2934 * "pp" points to the bytes and is advanced over it.
8852 * Returns the offset. 2935 * Returns the offset.
8853 */ 2936 */
8854 static int 2937 static int
8884 2967
8885 *pp = p; 2968 *pp = p;
8886 return nr; 2969 return nr;
8887 } 2970 }
8888 2971
8889 /*
8890 * Write the .sug file in "fname".
8891 */
8892 static void
8893 sug_write(spellinfo_T *spin, char_u *fname)
8894 {
8895 FILE *fd;
8896 wordnode_T *tree;
8897 int nodecount;
8898 int wcount;
8899 char_u *line;
8900 linenr_T lnum;
8901 int len;
8902
8903 /* Create the file. Note that an existing file is silently overwritten! */
8904 fd = mch_fopen((char *)fname, "w");
8905 if (fd == NULL)
8906 {
8907 EMSG2(_(e_notopen), fname);
8908 return;
8909 }
8910
8911 vim_snprintf((char *)IObuff, IOSIZE,
8912 _("Writing suggestion file %s ..."), fname);
8913 spell_message(spin, IObuff);
8914
8915 /*
8916 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
8917 */
8918 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
8919 {
8920 EMSG(_(e_write));
8921 goto theend;
8922 }
8923 putc(VIMSUGVERSION, fd); /* <versionnr> */
8924
8925 /* Write si_sugtime to the file. */
8926 put_time(fd, spin->si_sugtime); /* <timestamp> */
8927
8928 /*
8929 * <SUGWORDTREE>
8930 */
8931 spin->si_memtot = 0;
8932 tree = spin->si_foldroot->wn_sibling;
8933
8934 /* Clear the index and wnode fields in the tree. */
8935 clear_node(tree);
8936
8937 /* Count the number of nodes. Needed to be able to allocate the
8938 * memory when reading the nodes. Also fills in index for shared
8939 * nodes. */
8940 nodecount = put_node(NULL, tree, 0, 0, FALSE);
8941
8942 /* number of nodes in 4 bytes */
8943 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
8944 spin->si_memtot += nodecount + nodecount * sizeof(int);
8945
8946 /* Write the nodes. */
8947 (void)put_node(fd, tree, 0, 0, FALSE);
8948
8949 /*
8950 * <SUGTABLE>: <sugwcount> <sugline> ...
8951 */
8952 wcount = spin->si_spellbuf->b_ml.ml_line_count;
8953 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
8954
8955 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
8956 {
8957 /* <sugline>: <sugnr> ... NUL */
8958 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
8959 len = (int)STRLEN(line) + 1;
8960 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
8961 {
8962 EMSG(_(e_write));
8963 goto theend;
8964 }
8965 spin->si_memtot += len;
8966 }
8967
8968 /* Write another byte to check for errors. */
8969 if (putc(0, fd) == EOF)
8970 EMSG(_(e_write));
8971
8972 vim_snprintf((char *)IObuff, IOSIZE,
8973 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
8974 spell_message(spin, IObuff);
8975
8976 theend:
8977 /* close the file */
8978 fclose(fd);
8979 }
8980 2972
8981 /* 2973 /*
8982 * Open a spell buffer. This is a nameless buffer that is not in the buffer 2974 * Open a spell buffer. This is a nameless buffer that is not in the buffer
8983 * list and only contains text lines. Can use a swapfile to reduce memory 2975 * list and only contains text lines. Can use a swapfile to reduce memory
8984 * use. 2976 * use.
8985 * Most other fields are invalid! Esp. watch out for string options being 2977 * Most other fields are invalid! Esp. watch out for string options being
8986 * NULL and there is no undo info. 2978 * NULL and there is no undo info.
8987 * Returns NULL when out of memory. 2979 * Returns NULL when out of memory.
8988 */ 2980 */
8989 static buf_T * 2981 buf_T *
8990 open_spellbuf(void) 2982 open_spellbuf(void)
8991 { 2983 {
8992 buf_T *buf; 2984 buf_T *buf;
8993 2985
8994 buf = (buf_T *)alloc_clear(sizeof(buf_T)); 2986 buf = (buf_T *)alloc_clear(sizeof(buf_T));
9006 } 2998 }
9007 2999
9008 /* 3000 /*
9009 * Close the buffer used for spell info. 3001 * Close the buffer used for spell info.
9010 */ 3002 */
9011 static void 3003 void
9012 close_spellbuf(buf_T *buf) 3004 close_spellbuf(buf_T *buf)
9013 { 3005 {
9014 if (buf != NULL) 3006 if (buf != NULL)
9015 { 3007 {
9016 ml_close(buf, TRUE); 3008 ml_close(buf, TRUE);
9017 vim_free(buf); 3009 vim_free(buf);
9018 } 3010 }
9019 } 3011 }
9020 3012
9021
9022 /*
9023 * Create a Vim spell file from one or more word lists.
9024 * "fnames[0]" is the output file name.
9025 * "fnames[fcount - 1]" is the last input file name.
9026 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
9027 * and ".spl" is appended to make the output file name.
9028 */
9029 static void
9030 mkspell(
9031 int fcount,
9032 char_u **fnames,
9033 int ascii, /* -ascii argument given */
9034 int over_write, /* overwrite existing output file */
9035 int added_word) /* invoked through "zg" */
9036 {
9037 char_u *fname = NULL;
9038 char_u *wfname;
9039 char_u **innames;
9040 int incount;
9041 afffile_T *(afile[8]);
9042 int i;
9043 int len;
9044 stat_T st;
9045 int error = FALSE;
9046 spellinfo_T spin;
9047
9048 vim_memset(&spin, 0, sizeof(spin));
9049 spin.si_verbose = !added_word;
9050 spin.si_ascii = ascii;
9051 spin.si_followup = TRUE;
9052 spin.si_rem_accents = TRUE;
9053 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
9054 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
9055 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
9056 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
9057 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
9058 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
9059 hash_init(&spin.si_commonwords);
9060 spin.si_newcompID = 127; /* start compound ID at first maximum */
9061
9062 /* default: fnames[0] is output file, following are input files */
9063 innames = &fnames[1];
9064 incount = fcount - 1;
9065
9066 wfname = alloc(MAXPATHL);
9067 if (wfname == NULL)
9068 return;
9069
9070 if (fcount >= 1)
9071 {
9072 len = (int)STRLEN(fnames[0]);
9073 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
9074 {
9075 /* For ":mkspell path/en.latin1.add" output file is
9076 * "path/en.latin1.add.spl". */
9077 innames = &fnames[0];
9078 incount = 1;
9079 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
9080 }
9081 else if (fcount == 1)
9082 {
9083 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
9084 innames = &fnames[0];
9085 incount = 1;
9086 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
9087 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9088 }
9089 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
9090 {
9091 /* Name ends in ".spl", use as the file name. */
9092 vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
9093 }
9094 else
9095 /* Name should be language, make the file name from it. */
9096 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
9097 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9098
9099 /* Check for .ascii.spl. */
9100 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
9101 spin.si_ascii = TRUE;
9102
9103 /* Check for .add.spl. */
9104 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
9105 spin.si_add = TRUE;
9106 }
9107
9108 if (incount <= 0)
9109 EMSG(_(e_invarg)); /* need at least output and input names */
9110 else if (vim_strchr(gettail(wfname), '_') != NULL)
9111 EMSG(_("E751: Output file name must not have region name"));
9112 else if (incount > 8)
9113 EMSG(_("E754: Only up to 8 regions supported"));
9114 else
9115 {
9116 /* Check for overwriting before doing things that may take a lot of
9117 * time. */
9118 if (!over_write && mch_stat((char *)wfname, &st) >= 0)
9119 {
9120 EMSG(_(e_exists));
9121 goto theend;
9122 }
9123 if (mch_isdir(wfname))
9124 {
9125 EMSG2(_(e_isadir2), wfname);
9126 goto theend;
9127 }
9128
9129 fname = alloc(MAXPATHL);
9130 if (fname == NULL)
9131 goto theend;
9132
9133 /*
9134 * Init the aff and dic pointers.
9135 * Get the region names if there are more than 2 arguments.
9136 */
9137 for (i = 0; i < incount; ++i)
9138 {
9139 afile[i] = NULL;
9140
9141 if (incount > 1)
9142 {
9143 len = (int)STRLEN(innames[i]);
9144 if (STRLEN(gettail(innames[i])) < 5
9145 || innames[i][len - 3] != '_')
9146 {
9147 EMSG2(_("E755: Invalid region in %s"), innames[i]);
9148 goto theend;
9149 }
9150 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
9151 spin.si_region_name[i * 2 + 1] =
9152 TOLOWER_ASC(innames[i][len - 1]);
9153 }
9154 }
9155 spin.si_region_count = incount;
9156
9157 spin.si_foldroot = wordtree_alloc(&spin);
9158 spin.si_keeproot = wordtree_alloc(&spin);
9159 spin.si_prefroot = wordtree_alloc(&spin);
9160 if (spin.si_foldroot == NULL
9161 || spin.si_keeproot == NULL
9162 || spin.si_prefroot == NULL)
9163 {
9164 free_blocks(spin.si_blocks);
9165 goto theend;
9166 }
9167
9168 /* When not producing a .add.spl file clear the character table when
9169 * we encounter one in the .aff file. This means we dump the current
9170 * one in the .spl file if the .aff file doesn't define one. That's
9171 * better than guessing the contents, the table will match a
9172 * previously loaded spell file. */
9173 if (!spin.si_add)
9174 spin.si_clear_chartab = TRUE;
9175
9176 /*
9177 * Read all the .aff and .dic files.
9178 * Text is converted to 'encoding'.
9179 * Words are stored in the case-folded and keep-case trees.
9180 */
9181 for (i = 0; i < incount && !error; ++i)
9182 {
9183 spin.si_conv.vc_type = CONV_NONE;
9184 spin.si_region = 1 << i;
9185
9186 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
9187 if (mch_stat((char *)fname, &st) >= 0)
9188 {
9189 /* Read the .aff file. Will init "spin->si_conv" based on the
9190 * "SET" line. */
9191 afile[i] = spell_read_aff(&spin, fname);
9192 if (afile[i] == NULL)
9193 error = TRUE;
9194 else
9195 {
9196 /* Read the .dic file and store the words in the trees. */
9197 vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
9198 innames[i]);
9199 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
9200 error = TRUE;
9201 }
9202 }
9203 else
9204 {
9205 /* No .aff file, try reading the file as a word list. Store
9206 * the words in the trees. */
9207 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
9208 error = TRUE;
9209 }
9210
9211 #ifdef FEAT_MBYTE
9212 /* Free any conversion stuff. */
9213 convert_setup(&spin.si_conv, NULL, NULL);
9214 #endif
9215 }
9216
9217 if (spin.si_compflags != NULL && spin.si_nobreak)
9218 MSG(_("Warning: both compounding and NOBREAK specified"));
9219
9220 if (!error && !got_int)
9221 {
9222 /*
9223 * Combine tails in the tree.
9224 */
9225 spell_message(&spin, (char_u *)_(msg_compressing));
9226 wordtree_compress(&spin, spin.si_foldroot);
9227 wordtree_compress(&spin, spin.si_keeproot);
9228 wordtree_compress(&spin, spin.si_prefroot);
9229 }
9230
9231 if (!error && !got_int)
9232 {
9233 /*
9234 * Write the info in the spell file.
9235 */
9236 vim_snprintf((char *)IObuff, IOSIZE,
9237 _("Writing spell file %s ..."), wfname);
9238 spell_message(&spin, IObuff);
9239
9240 error = write_vim_spell(&spin, wfname) == FAIL;
9241
9242 spell_message(&spin, (char_u *)_("Done!"));
9243 vim_snprintf((char *)IObuff, IOSIZE,
9244 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
9245 spell_message(&spin, IObuff);
9246
9247 /*
9248 * If the file is loaded need to reload it.
9249 */
9250 if (!error)
9251 spell_reload_one(wfname, added_word);
9252 }
9253
9254 /* Free the allocated memory. */
9255 ga_clear(&spin.si_rep);
9256 ga_clear(&spin.si_repsal);
9257 ga_clear(&spin.si_sal);
9258 ga_clear(&spin.si_map);
9259 ga_clear(&spin.si_comppat);
9260 ga_clear(&spin.si_prefcond);
9261 hash_clear_all(&spin.si_commonwords, 0);
9262
9263 /* Free the .aff file structures. */
9264 for (i = 0; i < incount; ++i)
9265 if (afile[i] != NULL)
9266 spell_free_aff(afile[i]);
9267
9268 /* Free all the bits and pieces at once. */
9269 free_blocks(spin.si_blocks);
9270
9271 /*
9272 * If there is soundfolding info and no NOSUGFILE item create the
9273 * .sug file with the soundfolded word trie.
9274 */
9275 if (spin.si_sugtime != 0 && !error && !got_int)
9276 spell_make_sugfile(&spin, wfname);
9277
9278 }
9279
9280 theend:
9281 vim_free(fname);
9282 vim_free(wfname);
9283 }
9284
9285 /*
9286 * Display a message for spell file processing when 'verbose' is set or using
9287 * ":mkspell". "str" can be IObuff.
9288 */
9289 static void
9290 spell_message(spellinfo_T *spin, char_u *str)
9291 {
9292 if (spin->si_verbose || p_verbose > 2)
9293 {
9294 if (!spin->si_verbose)
9295 verbose_enter();
9296 MSG(str);
9297 out_flush();
9298 if (!spin->si_verbose)
9299 verbose_leave();
9300 }
9301 }
9302
9303 /*
9304 * ":[count]spellgood {word}"
9305 * ":[count]spellwrong {word}"
9306 * ":[count]spellundo {word}"
9307 */
9308 void
9309 ex_spell(exarg_T *eap)
9310 {
9311 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
9312 eap->forceit ? 0 : (int)eap->line2,
9313 eap->cmdidx == CMD_spellundo);
9314 }
9315
9316 /*
9317 * Add "word[len]" to 'spellfile' as a good or bad word.
9318 */
9319 void
9320 spell_add_word(
9321 char_u *word,
9322 int len,
9323 int bad,
9324 int idx, /* "zG" and "zW": zero, otherwise index in
9325 'spellfile' */
9326 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */
9327 {
9328 FILE *fd = NULL;
9329 buf_T *buf = NULL;
9330 int new_spf = FALSE;
9331 char_u *fname;
9332 char_u *fnamebuf = NULL;
9333 char_u line[MAXWLEN * 2];
9334 long fpos, fpos_next = 0;
9335 int i;
9336 char_u *spf;
9337
9338 if (idx == 0) /* use internal wordlist */
9339 {
9340 if (int_wordlist == NULL)
9341 {
9342 int_wordlist = vim_tempname('s', FALSE);
9343 if (int_wordlist == NULL)
9344 return;
9345 }
9346 fname = int_wordlist;
9347 }
9348 else
9349 {
9350 /* If 'spellfile' isn't set figure out a good default value. */
9351 if (*curwin->w_s->b_p_spf == NUL)
9352 {
9353 init_spellfile();
9354 new_spf = TRUE;
9355 }
9356
9357 if (*curwin->w_s->b_p_spf == NUL)
9358 {
9359 EMSG2(_(e_notset), "spellfile");
9360 return;
9361 }
9362 fnamebuf = alloc(MAXPATHL);
9363 if (fnamebuf == NULL)
9364 return;
9365
9366 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
9367 {
9368 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
9369 if (i == idx)
9370 break;
9371 if (*spf == NUL)
9372 {
9373 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
9374 vim_free(fnamebuf);
9375 return;
9376 }
9377 }
9378
9379 /* Check that the user isn't editing the .add file somewhere. */
9380 buf = buflist_findname_exp(fnamebuf);
9381 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
9382 buf = NULL;
9383 if (buf != NULL && bufIsChanged(buf))
9384 {
9385 EMSG(_(e_bufloaded));
9386 vim_free(fnamebuf);
9387 return;
9388 }
9389
9390 fname = fnamebuf;
9391 }
9392
9393 if (bad || undo)
9394 {
9395 /* When the word appears as good word we need to remove that one,
9396 * since its flags sort before the one with WF_BANNED. */
9397 fd = mch_fopen((char *)fname, "r");
9398 if (fd != NULL)
9399 {
9400 while (!vim_fgets(line, MAXWLEN * 2, fd))
9401 {
9402 fpos = fpos_next;
9403 fpos_next = ftell(fd);
9404 if (STRNCMP(word, line, len) == 0
9405 && (line[len] == '/' || line[len] < ' '))
9406 {
9407 /* Found duplicate word. Remove it by writing a '#' at
9408 * the start of the line. Mixing reading and writing
9409 * doesn't work for all systems, close the file first. */
9410 fclose(fd);
9411 fd = mch_fopen((char *)fname, "r+");
9412 if (fd == NULL)
9413 break;
9414 if (fseek(fd, fpos, SEEK_SET) == 0)
9415 {
9416 fputc('#', fd);
9417 if (undo)
9418 {
9419 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9420 smsg((char_u *)_("Word '%.*s' removed from %s"),
9421 len, word, NameBuff);
9422 }
9423 }
9424 fseek(fd, fpos_next, SEEK_SET);
9425 }
9426 }
9427 if (fd != NULL)
9428 fclose(fd);
9429 }
9430 }
9431
9432 if (!undo)
9433 {
9434 fd = mch_fopen((char *)fname, "a");
9435 if (fd == NULL && new_spf)
9436 {
9437 char_u *p;
9438
9439 /* We just initialized the 'spellfile' option and can't open the
9440 * file. We may need to create the "spell" directory first. We
9441 * already checked the runtime directory is writable in
9442 * init_spellfile(). */
9443 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
9444 {
9445 int c = *p;
9446
9447 /* The directory doesn't exist. Try creating it and opening
9448 * the file again. */
9449 *p = NUL;
9450 vim_mkdir(fname, 0755);
9451 *p = c;
9452 fd = mch_fopen((char *)fname, "a");
9453 }
9454 }
9455
9456 if (fd == NULL)
9457 EMSG2(_(e_notopen), fname);
9458 else
9459 {
9460 if (bad)
9461 fprintf(fd, "%.*s/!\n", len, word);
9462 else
9463 fprintf(fd, "%.*s\n", len, word);
9464 fclose(fd);
9465
9466 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9467 smsg((char_u *)_("Word '%.*s' added to %s"), len, word, NameBuff);
9468 }
9469 }
9470
9471 if (fd != NULL)
9472 {
9473 /* Update the .add.spl file. */
9474 mkspell(1, &fname, FALSE, TRUE, TRUE);
9475
9476 /* If the .add file is edited somewhere, reload it. */
9477 if (buf != NULL)
9478 buf_reload(buf, buf->b_orig_mode);
9479
9480 redraw_all_later(SOME_VALID);
9481 }
9482 vim_free(fnamebuf);
9483 }
9484
9485 /*
9486 * Initialize 'spellfile' for the current buffer.
9487 */
9488 static void
9489 init_spellfile(void)
9490 {
9491 char_u *buf;
9492 int l;
9493 char_u *fname;
9494 char_u *rtp;
9495 char_u *lend;
9496 int aspath = FALSE;
9497 char_u *lstart = curbuf->b_s.b_p_spl;
9498
9499 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
9500 {
9501 buf = alloc(MAXPATHL);
9502 if (buf == NULL)
9503 return;
9504
9505 /* Find the end of the language name. Exclude the region. If there
9506 * is a path separator remember the start of the tail. */
9507 for (lend = curwin->w_s->b_p_spl; *lend != NUL
9508 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
9509 if (vim_ispathsep(*lend))
9510 {
9511 aspath = TRUE;
9512 lstart = lend + 1;
9513 }
9514
9515 /* Loop over all entries in 'runtimepath'. Use the first one where we
9516 * are allowed to write. */
9517 rtp = p_rtp;
9518 while (*rtp != NUL)
9519 {
9520 if (aspath)
9521 /* Use directory of an entry with path, e.g., for
9522 * "/dir/lg.utf-8.spl" use "/dir". */
9523 vim_strncpy(buf, curbuf->b_s.b_p_spl,
9524 lstart - curbuf->b_s.b_p_spl - 1);
9525 else
9526 /* Copy the path from 'runtimepath' to buf[]. */
9527 copy_option_part(&rtp, buf, MAXPATHL, ",");
9528 if (filewritable(buf) == 2)
9529 {
9530 /* Use the first language name from 'spelllang' and the
9531 * encoding used in the first loaded .spl file. */
9532 if (aspath)
9533 vim_strncpy(buf, curbuf->b_s.b_p_spl,
9534 lend - curbuf->b_s.b_p_spl);
9535 else
9536 {
9537 /* Create the "spell" directory if it doesn't exist yet. */
9538 l = (int)STRLEN(buf);
9539 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
9540 if (filewritable(buf) != 2)
9541 vim_mkdir(buf, 0755);
9542
9543 l = (int)STRLEN(buf);
9544 vim_snprintf((char *)buf + l, MAXPATHL - l,
9545 "/%.*s", (int)(lend - lstart), lstart);
9546 }
9547 l = (int)STRLEN(buf);
9548 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
9549 ->lp_slang->sl_fname;
9550 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
9551 fname != NULL
9552 && strstr((char *)gettail(fname), ".ascii.") != NULL
9553 ? (char_u *)"ascii" : spell_enc());
9554 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
9555 break;
9556 }
9557 aspath = FALSE;
9558 }
9559
9560 vim_free(buf);
9561 }
9562 }
9563
9564
9565 /* 3013 /*
9566 * Init the chartab used for spelling for ASCII. 3014 * Init the chartab used for spelling for ASCII.
9567 * EBCDIC is not supported! 3015 * EBCDIC is not supported!
9568 */ 3016 */
9569 static void 3017 void
9570 clear_spell_chartab(spelltab_T *sp) 3018 clear_spell_chartab(spelltab_T *sp)
9571 { 3019 {
9572 int i; 3020 int i;
9573 3021
9574 /* Init everything to FALSE. */ 3022 /* Init everything to FALSE. */
9654 } 3102 }
9655 } 3103 }
9656 } 3104 }
9657 } 3105 }
9658 3106
9659 /*
9660 * Set the spell character tables from strings in the affix file.
9661 */
9662 static int
9663 set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
9664 {
9665 /* We build the new tables here first, so that we can compare with the
9666 * previous one. */
9667 spelltab_T new_st;
9668 char_u *pf = fol, *pl = low, *pu = upp;
9669 int f, l, u;
9670
9671 clear_spell_chartab(&new_st);
9672
9673 while (*pf != NUL)
9674 {
9675 if (*pl == NUL || *pu == NUL)
9676 {
9677 EMSG(_(e_affform));
9678 return FAIL;
9679 }
9680 #ifdef FEAT_MBYTE
9681 f = mb_ptr2char_adv(&pf);
9682 l = mb_ptr2char_adv(&pl);
9683 u = mb_ptr2char_adv(&pu);
9684 #else
9685 f = *pf++;
9686 l = *pl++;
9687 u = *pu++;
9688 #endif
9689 /* Every character that appears is a word character. */
9690 if (f < 256)
9691 new_st.st_isw[f] = TRUE;
9692 if (l < 256)
9693 new_st.st_isw[l] = TRUE;
9694 if (u < 256)
9695 new_st.st_isw[u] = TRUE;
9696
9697 /* if "LOW" and "FOL" are not the same the "LOW" char needs
9698 * case-folding */
9699 if (l < 256 && l != f)
9700 {
9701 if (f >= 256)
9702 {
9703 EMSG(_(e_affrange));
9704 return FAIL;
9705 }
9706 new_st.st_fold[l] = f;
9707 }
9708
9709 /* if "UPP" and "FOL" are not the same the "UPP" char needs
9710 * case-folding, it's upper case and the "UPP" is the upper case of
9711 * "FOL" . */
9712 if (u < 256 && u != f)
9713 {
9714 if (f >= 256)
9715 {
9716 EMSG(_(e_affrange));
9717 return FAIL;
9718 }
9719 new_st.st_fold[u] = f;
9720 new_st.st_isu[u] = TRUE;
9721 new_st.st_upper[f] = u;
9722 }
9723 }
9724
9725 if (*pl != NUL || *pu != NUL)
9726 {
9727 EMSG(_(e_affform));
9728 return FAIL;
9729 }
9730
9731 return set_spell_finish(&new_st);
9732 }
9733
9734 /*
9735 * Set the spell character tables from strings in the .spl file.
9736 */
9737 static void
9738 set_spell_charflags(
9739 char_u *flags,
9740 int cnt, /* length of "flags" */
9741 char_u *fol)
9742 {
9743 /* We build the new tables here first, so that we can compare with the
9744 * previous one. */
9745 spelltab_T new_st;
9746 int i;
9747 char_u *p = fol;
9748 int c;
9749
9750 clear_spell_chartab(&new_st);
9751
9752 for (i = 0; i < 128; ++i)
9753 {
9754 if (i < cnt)
9755 {
9756 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9757 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9758 }
9759
9760 if (*p != NUL)
9761 {
9762 #ifdef FEAT_MBYTE
9763 c = mb_ptr2char_adv(&p);
9764 #else
9765 c = *p++;
9766 #endif
9767 new_st.st_fold[i + 128] = c;
9768 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9769 new_st.st_upper[c] = i + 128;
9770 }
9771 }
9772
9773 (void)set_spell_finish(&new_st);
9774 }
9775
9776 static int
9777 set_spell_finish(spelltab_T *new_st)
9778 {
9779 int i;
9780
9781 if (did_set_spelltab)
9782 {
9783 /* check that it's the same table */
9784 for (i = 0; i < 256; ++i)
9785 {
9786 if (spelltab.st_isw[i] != new_st->st_isw[i]
9787 || spelltab.st_isu[i] != new_st->st_isu[i]
9788 || spelltab.st_fold[i] != new_st->st_fold[i]
9789 || spelltab.st_upper[i] != new_st->st_upper[i])
9790 {
9791 EMSG(_("E763: Word characters differ between spell files"));
9792 return FAIL;
9793 }
9794 }
9795 }
9796 else
9797 {
9798 /* copy the new spelltab into the one being used */
9799 spelltab = *new_st;
9800 did_set_spelltab = TRUE;
9801 }
9802
9803 return OK;
9804 }
9805 3107
9806 /* 3108 /*
9807 * Return TRUE if "p" points to a word character. 3109 * Return TRUE if "p" points to a word character.
9808 * As a special case we see "midword" characters as word character when it is 3110 * As a special case we see "midword" characters as word character when it is
9809 * followed by a word character. This finds they'there but not 'they there'. 3111 * followed by a word character. This finds they'there but not 'they there'.
9850 3152
9851 /* 3153 /*
9852 * Return TRUE if "p" points to a word character. 3154 * Return TRUE if "p" points to a word character.
9853 * Unlike spell_iswordp() this doesn't check for "midword" characters. 3155 * Unlike spell_iswordp() this doesn't check for "midword" characters.
9854 */ 3156 */
9855 static int 3157 int
9856 spell_iswordp_nmw(char_u *p, win_T *wp) 3158 spell_iswordp_nmw(char_u *p, win_T *wp)
9857 { 3159 {
9858 #ifdef FEAT_MBYTE 3160 #ifdef FEAT_MBYTE
9859 int c; 3161 int c;
9860 3162
9913 return spelltab.st_isw[*s]; 3215 return spelltab.st_isw[*s];
9914 } 3216 }
9915 #endif 3217 #endif
9916 3218
9917 /* 3219 /*
9918 * Write the table with prefix conditions to the .spl file.
9919 * When "fd" is NULL only count the length of what is written.
9920 */
9921 static int
9922 write_spell_prefcond(FILE *fd, garray_T *gap)
9923 {
9924 int i;
9925 char_u *p;
9926 int len;
9927 int totlen;
9928 size_t x = 1; /* collect return value of fwrite() */
9929
9930 if (fd != NULL)
9931 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
9932
9933 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
9934
9935 for (i = 0; i < gap->ga_len; ++i)
9936 {
9937 /* <prefcond> : <condlen> <condstr> */
9938 p = ((char_u **)gap->ga_data)[i];
9939 if (p != NULL)
9940 {
9941 len = (int)STRLEN(p);
9942 if (fd != NULL)
9943 {
9944 fputc(len, fd);
9945 x &= fwrite(p, (size_t)len, (size_t)1, fd);
9946 }
9947 totlen += len;
9948 }
9949 else if (fd != NULL)
9950 fputc(0, fd);
9951 }
9952
9953 return totlen;
9954 }
9955
9956 /*
9957 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. 3220 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
9958 * Uses the character definitions from the .spl file. 3221 * Uses the character definitions from the .spl file.
9959 * When using a multi-byte 'encoding' the length may change! 3222 * When using a multi-byte 'encoding' the length may change!
9960 * Returns FAIL when something wrong. 3223 * Returns FAIL when something wrong.
9961 */ 3224 */
9962 static int 3225 int
9963 spell_casefold( 3226 spell_casefold(
9964 char_u *str, 3227 char_u *str,
9965 int len, 3228 int len,
9966 char_u *buf, 3229 char_u *buf,
9967 int buflen) 3230 int buflen)
10813 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); 4076 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10814 } 4077 }
10815 } 4078 }
10816 4079
10817 /* 4080 /*
10818 * Load the .sug files for languages that have one and weren't loaded yet.
10819 */
10820 static void
10821 suggest_load_files(void)
10822 {
10823 langp_T *lp;
10824 int lpi;
10825 slang_T *slang;
10826 char_u *dotp;
10827 FILE *fd;
10828 char_u buf[MAXWLEN];
10829 int i;
10830 time_t timestamp;
10831 int wcount;
10832 int wordnr;
10833 garray_T ga;
10834 int c;
10835
10836 /* Do this for all languages that support sound folding. */
10837 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
10838 {
10839 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
10840 slang = lp->lp_slang;
10841 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
10842 {
10843 /* Change ".spl" to ".sug" and open the file. When the file isn't
10844 * found silently skip it. Do set "sl_sugloaded" so that we
10845 * don't try again and again. */
10846 slang->sl_sugloaded = TRUE;
10847
10848 dotp = vim_strrchr(slang->sl_fname, '.');
10849 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
10850 continue;
10851 STRCPY(dotp, ".sug");
10852 fd = mch_fopen((char *)slang->sl_fname, "r");
10853 if (fd == NULL)
10854 goto nextone;
10855
10856 /*
10857 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
10858 */
10859 for (i = 0; i < VIMSUGMAGICL; ++i)
10860 buf[i] = getc(fd); /* <fileID> */
10861 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
10862 {
10863 EMSG2(_("E778: This does not look like a .sug file: %s"),
10864 slang->sl_fname);
10865 goto nextone;
10866 }
10867 c = getc(fd); /* <versionnr> */
10868 if (c < VIMSUGVERSION)
10869 {
10870 EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
10871 slang->sl_fname);
10872 goto nextone;
10873 }
10874 else if (c > VIMSUGVERSION)
10875 {
10876 EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
10877 slang->sl_fname);
10878 goto nextone;
10879 }
10880
10881 /* Check the timestamp, it must be exactly the same as the one in
10882 * the .spl file. Otherwise the word numbers won't match. */
10883 timestamp = get8ctime(fd); /* <timestamp> */
10884 if (timestamp != slang->sl_sugtime)
10885 {
10886 EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
10887 slang->sl_fname);
10888 goto nextone;
10889 }
10890
10891 /*
10892 * <SUGWORDTREE>: <wordtree>
10893 * Read the trie with the soundfolded words.
10894 */
10895 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
10896 FALSE, 0) != 0)
10897 {
10898 someerror:
10899 EMSG2(_("E782: error while reading .sug file: %s"),
10900 slang->sl_fname);
10901 slang_clear_sug(slang);
10902 goto nextone;
10903 }
10904
10905 /*
10906 * <SUGTABLE>: <sugwcount> <sugline> ...
10907 *
10908 * Read the table with word numbers. We use a file buffer for
10909 * this, because it's so much like a file with lines. Makes it
10910 * possible to swap the info and save on memory use.
10911 */
10912 slang->sl_sugbuf = open_spellbuf();
10913 if (slang->sl_sugbuf == NULL)
10914 goto someerror;
10915 /* <sugwcount> */
10916 wcount = get4c(fd);
10917 if (wcount < 0)
10918 goto someerror;
10919
10920 /* Read all the wordnr lists into the buffer, one NUL terminated
10921 * list per line. */
10922 ga_init2(&ga, 1, 100);
10923 for (wordnr = 0; wordnr < wcount; ++wordnr)
10924 {
10925 ga.ga_len = 0;
10926 for (;;)
10927 {
10928 c = getc(fd); /* <sugline> */
10929 if (c < 0 || ga_grow(&ga, 1) == FAIL)
10930 goto someerror;
10931 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
10932 if (c == NUL)
10933 break;
10934 }
10935 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
10936 ga.ga_data, ga.ga_len, TRUE) == FAIL)
10937 goto someerror;
10938 }
10939 ga_clear(&ga);
10940
10941 /*
10942 * Need to put word counts in the word tries, so that we can find
10943 * a word by its number.
10944 */
10945 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
10946 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
10947
10948 nextone:
10949 if (fd != NULL)
10950 fclose(fd);
10951 STRCPY(dotp, ".spl");
10952 }
10953 }
10954 }
10955
10956
10957 /*
10958 * Fill in the wordcount fields for a trie.
10959 * Returns the total number of words.
10960 */
10961 static void
10962 tree_count_words(char_u *byts, idx_T *idxs)
10963 {
10964 int depth;
10965 idx_T arridx[MAXWLEN];
10966 int curi[MAXWLEN];
10967 int c;
10968 idx_T n;
10969 int wordcount[MAXWLEN];
10970
10971 arridx[0] = 0;
10972 curi[0] = 1;
10973 wordcount[0] = 0;
10974 depth = 0;
10975 while (depth >= 0 && !got_int)
10976 {
10977 if (curi[depth] > byts[arridx[depth]])
10978 {
10979 /* Done all bytes at this node, go up one level. */
10980 idxs[arridx[depth]] = wordcount[depth];
10981 if (depth > 0)
10982 wordcount[depth - 1] += wordcount[depth];
10983
10984 --depth;
10985 fast_breakcheck();
10986 }
10987 else
10988 {
10989 /* Do one more byte at this node. */
10990 n = arridx[depth] + curi[depth];
10991 ++curi[depth];
10992
10993 c = byts[n];
10994 if (c == 0)
10995 {
10996 /* End of word, count it. */
10997 ++wordcount[depth];
10998
10999 /* Skip over any other NUL bytes (same word with different
11000 * flags). */
11001 while (byts[n + 1] == 0)
11002 {
11003 ++n;
11004 ++curi[depth];
11005 }
11006 }
11007 else
11008 {
11009 /* Normal char, go one level deeper to count the words. */
11010 ++depth;
11011 arridx[depth] = idxs[n];
11012 curi[depth] = 1;
11013 wordcount[depth] = 0;
11014 }
11015 }
11016 }
11017 }
11018
11019 /*
11020 * Free the info put in "*su" by spell_find_suggest(). 4081 * Free the info put in "*su" by spell_find_suggest().
11021 */ 4082 */
11022 static void 4083 static void
11023 spell_find_cleanup(suginfo_T *su) 4084 spell_find_cleanup(suginfo_T *su)
11024 { 4085 {
11039 /* 4100 /*
11040 * Make a copy of "word", with the first letter upper or lower cased, to 4101 * Make a copy of "word", with the first letter upper or lower cased, to
11041 * "wcopy[MAXWLEN]". "word" must not be empty. 4102 * "wcopy[MAXWLEN]". "word" must not be empty.
11042 * The result is NUL terminated. 4103 * The result is NUL terminated.
11043 */ 4104 */
11044 static void 4105 void
11045 onecap_copy( 4106 onecap_copy(
11046 char_u *word, 4107 char_u *word,
11047 char_u *wcopy, 4108 char_u *wcopy,
11048 int upper) /* TRUE: first letter made upper case */ 4109 int upper) /* TRUE: first letter made upper case */
11049 { 4110 {
13602 else 6663 else
13603 /* Use goodword as-is. */ 6664 /* Use goodword as-is. */
13604 STRCPY(cword, fword); 6665 STRCPY(cword, fword);
13605 } 6666 }
13606 6667
13607 /*
13608 * Use map string "map" for languages "lp".
13609 */
13610 static void
13611 set_map_str(slang_T *lp, char_u *map)
13612 {
13613 char_u *p;
13614 int headc = 0;
13615 int c;
13616 int i;
13617
13618 if (*map == NUL)
13619 {
13620 lp->sl_has_map = FALSE;
13621 return;
13622 }
13623 lp->sl_has_map = TRUE;
13624
13625 /* Init the array and hash tables empty. */
13626 for (i = 0; i < 256; ++i)
13627 lp->sl_map_array[i] = 0;
13628 #ifdef FEAT_MBYTE
13629 hash_init(&lp->sl_map_hash);
13630 #endif
13631
13632 /*
13633 * The similar characters are stored separated with slashes:
13634 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
13635 * before the same slash. For characters above 255 sl_map_hash is used.
13636 */
13637 for (p = map; *p != NUL; )
13638 {
13639 #ifdef FEAT_MBYTE
13640 c = mb_cptr2char_adv(&p);
13641 #else
13642 c = *p++;
13643 #endif
13644 if (c == '/')
13645 headc = 0;
13646 else
13647 {
13648 if (headc == 0)
13649 headc = c;
13650
13651 #ifdef FEAT_MBYTE
13652 /* Characters above 255 don't fit in sl_map_array[], put them in
13653 * the hash table. Each entry is the char, a NUL the headchar and
13654 * a NUL. */
13655 if (c >= 256)
13656 {
13657 int cl = mb_char2len(c);
13658 int headcl = mb_char2len(headc);
13659 char_u *b;
13660 hash_T hash;
13661 hashitem_T *hi;
13662
13663 b = alloc((unsigned)(cl + headcl + 2));
13664 if (b == NULL)
13665 return;
13666 mb_char2bytes(c, b);
13667 b[cl] = NUL;
13668 mb_char2bytes(headc, b + cl + 1);
13669 b[cl + 1 + headcl] = NUL;
13670 hash = hash_hash(b);
13671 hi = hash_lookup(&lp->sl_map_hash, b, hash);
13672 if (HASHITEM_EMPTY(hi))
13673 hash_add_item(&lp->sl_map_hash, hi, b, hash);
13674 else
13675 {
13676 /* This should have been checked when generating the .spl
13677 * file. */
13678 EMSG(_("E783: duplicate char in MAP entry"));
13679 vim_free(b);
13680 }
13681 }
13682 else
13683 #endif
13684 lp->sl_map_array[c] = headc;
13685 }
13686 }
13687 }
13688 6668
13689 /* 6669 /*
13690 * Return TRUE if "c1" and "c2" are similar characters according to the MAP 6670 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
13691 * lines in the .aff file. 6671 * lines in the .aff file.
13692 */ 6672 */
14069 * 7049 *
14070 * We support two methods: 7050 * We support two methods:
14071 * 1. SOFOFROM/SOFOTO do a simple character mapping. 7051 * 1. SOFOFROM/SOFOTO do a simple character mapping.
14072 * 2. SAL items define a more advanced sound-folding (and much slower). 7052 * 2. SAL items define a more advanced sound-folding (and much slower).
14073 */ 7053 */
14074 static void 7054 void
14075 spell_soundfold( 7055 spell_soundfold(
14076 slang_T *slang, 7056 slang_T *slang,
14077 char_u *inword, 7057 char_u *inword,
14078 int folded, /* "inword" is already case-folded */ 7058 int folded, /* "inword" is already case-folded */
14079 char_u *res) 7059 char_u *res)