Mercurial > vim
comparison src/spellfile.c @ 9583:b0c7061d6439 v7.4.2069
commit https://github.com/vim/vim/commit/9ccfebddc3ff2a3c2853cf706fd4c26f639bf381
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Jul 19 16:39:08 2016 +0200
patch 7.4.2069
Problem: spell.c is too big.
Solution: Split it in spell file handling and spell checking.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Tue, 19 Jul 2016 16:45:06 +0200 |
parents | |
children | 4aead6a9b7a9 |
comparison
equal
deleted
inserted
replaced
9582:96737caf272d | 9583:b0c7061d6439 |
---|---|
1 /* vi:set ts=8 sts=4 sw=4: | |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 | |
10 /* | |
11 * spellfile.c: code for reading and writing spell files. | |
12 * | |
13 * See spell.c for information about spell checking. | |
14 */ | |
15 | |
16 /* | |
17 * Vim spell file format: <HEADER> | |
18 * <SECTIONS> | |
19 * <LWORDTREE> | |
20 * <KWORDTREE> | |
21 * <PREFIXTREE> | |
22 * | |
23 * <HEADER>: <fileID> <versionnr> | |
24 * | |
25 * <fileID> 8 bytes "VIMspell" | |
26 * <versionnr> 1 byte VIMSPELLVERSION | |
27 * | |
28 * | |
29 * Sections make it possible to add information to the .spl file without | |
30 * making it incompatible with previous versions. There are two kinds of | |
31 * sections: | |
32 * 1. Not essential for correct spell checking. E.g. for making suggestions. | |
33 * These are skipped when not supported. | |
34 * 2. Optional information, but essential for spell checking when present. | |
35 * E.g. conditions for affixes. When this section is present but not | |
36 * supported an error message is given. | |
37 * | |
38 * <SECTIONS>: <section> ... <sectionend> | |
39 * | |
40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | |
41 * | |
42 * <sectionID> 1 byte number from 0 to 254 identifying the section | |
43 * | |
44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct | |
45 * spell checking | |
46 * | |
47 * <sectionlen> 4 bytes length of section contents, MSB first | |
48 * | |
49 * <sectionend> 1 byte SN_END | |
50 * | |
51 * | |
52 * sectionID == SN_INFO: <infotext> | |
53 * <infotext> N bytes free format text with spell file info (version, | |
54 * website, etc) | |
55 * | |
56 * sectionID == SN_REGION: <regionname> ... | |
57 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. | |
58 * First <regionname> is region 1. | |
59 * | |
60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> | |
61 * <folcharslen> <folchars> | |
62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). | |
63 * <charflags> N bytes List of flags (first one is for character 128): | |
64 * 0x01 word character CF_WORD | |
65 * 0x02 upper-case character CF_UPPER | |
66 * <folcharslen> 2 bytes Number of bytes in <folchars>. | |
67 * <folchars> N bytes Folded characters, first one is for character 128. | |
68 * | |
69 * sectionID == SN_MIDWORD: <midword> | |
70 * <midword> N bytes Characters that are word characters only when used | |
71 * in the middle of a word. | |
72 * | |
73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... | |
74 * <prefcondcnt> 2 bytes Number of <prefcond> items following. | |
75 * <prefcond> : <condlen> <condstr> | |
76 * <condlen> 1 byte Length of <condstr>. | |
77 * <condstr> N bytes Condition for the prefix. | |
78 * | |
79 * sectionID == SN_REP: <repcount> <rep> ... | |
80 * <repcount> 2 bytes number of <rep> items, MSB first. | |
81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> | |
82 * <repfromlen> 1 byte length of <repfrom> | |
83 * <repfrom> N bytes "from" part of replacement | |
84 * <reptolen> 1 byte length of <repto> | |
85 * <repto> N bytes "to" part of replacement | |
86 * | |
87 * sectionID == SN_REPSAL: <repcount> <rep> ... | |
88 * just like SN_REP but for soundfolded words | |
89 * | |
90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... | |
91 * <salflags> 1 byte flags for soundsalike conversion: | |
92 * SAL_F0LLOWUP | |
93 * SAL_COLLAPSE | |
94 * SAL_REM_ACCENTS | |
95 * <salcount> 2 bytes number of <sal> items following | |
96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> | |
97 * <salfromlen> 1 byte length of <salfrom> | |
98 * <salfrom> N bytes "from" part of soundsalike | |
99 * <saltolen> 1 byte length of <salto> | |
100 * <salto> N bytes "to" part of soundsalike | |
101 * | |
102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
103 * <sofofromlen> 2 bytes length of <sofofrom> | |
104 * <sofofrom> N bytes "from" part of soundfold | |
105 * <sofotolen> 2 bytes length of <sofoto> | |
106 * <sofoto> N bytes "to" part of soundfold | |
107 * | |
108 * sectionID == SN_SUGFILE: <timestamp> | |
109 * <timestamp> 8 bytes time in seconds that must match with .sug file | |
110 * | |
111 * sectionID == SN_NOSPLITSUGS: nothing | |
112 * | |
113 * sectionID == SN_NOCOMPOUNDSUGS: nothing | |
114 * | |
115 * sectionID == SN_WORDS: <word> ... | |
116 * <word> N bytes NUL terminated common word | |
117 * | |
118 * sectionID == SN_MAP: <mapstr> | |
119 * <mapstr> N bytes String with sequences of similar characters, | |
120 * separated by slashes. | |
121 * | |
122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> | |
123 * <comppatcount> <comppattern> ... <compflags> | |
124 * <compmax> 1 byte Maximum nr of words in compound word. | |
125 * <compminlen> 1 byte Minimal word length for compounding. | |
126 * <compsylmax> 1 byte Maximum nr of syllables in compound word. | |
127 * <compoptions> 2 bytes COMP_ flags. | |
128 * <comppatcount> 2 bytes number of <comppattern> following | |
129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by | |
130 * slashes. | |
131 * | |
132 * <comppattern>: <comppatlen> <comppattext> | |
133 * <comppatlen> 1 byte length of <comppattext> | |
134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN | |
135 * | |
136 * sectionID == SN_NOBREAK: (empty, its presence is what matters) | |
137 * | |
138 * sectionID == SN_SYLLABLE: <syllable> | |
139 * <syllable> N bytes String from SYLLABLE item. | |
140 * | |
141 * <LWORDTREE>: <wordtree> | |
142 * | |
143 * <KWORDTREE>: <wordtree> | |
144 * | |
145 * <PREFIXTREE>: <wordtree> | |
146 * | |
147 * | |
148 * <wordtree>: <nodecount> <nodedata> ... | |
149 * | |
150 * <nodecount> 4 bytes Number of nodes following. MSB first. | |
151 * | |
152 * <nodedata>: <siblingcount> <sibling> ... | |
153 * | |
154 * <siblingcount> 1 byte Number of siblings in this node. The siblings | |
155 * follow in sorted order. | |
156 * | |
157 * <sibling>: <byte> [ <nodeidx> <xbyte> | |
158 * | <flags> [<flags2>] [<region>] [<affixID>] | |
159 * | [<pflags>] <affixID> <prefcondnr> ] | |
160 * | |
161 * <byte> 1 byte Byte value of the sibling. Special cases: | |
162 * BY_NOFLAGS: End of word without flags and for all | |
163 * regions. | |
164 * For PREFIXTREE <affixID> and | |
165 * <prefcondnr> follow. | |
166 * BY_FLAGS: End of word, <flags> follow. | |
167 * For PREFIXTREE <pflags>, <affixID> | |
168 * and <prefcondnr> follow. | |
169 * BY_FLAGS2: End of word, <flags> and <flags2> | |
170 * follow. Not used in PREFIXTREE. | |
171 * BY_INDEX: Child of sibling is shared, <nodeidx> | |
172 * and <xbyte> follow. | |
173 * | |
174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. | |
175 * | |
176 * <xbyte> 1 byte byte value of the sibling. | |
177 * | |
178 * <flags> 1 byte bitmask of: | |
179 * WF_ALLCAP word must have only capitals | |
180 * WF_ONECAP first char of word must be capital | |
181 * WF_KEEPCAP keep-case word | |
182 * WF_FIXCAP keep-case word, all caps not allowed | |
183 * WF_RARE rare word | |
184 * WF_BANNED bad word | |
185 * WF_REGION <region> follows | |
186 * WF_AFX <affixID> follows | |
187 * | |
188 * <flags2> 1 byte Bitmask of: | |
189 * WF_HAS_AFF >> 8 word includes affix | |
190 * WF_NEEDCOMP >> 8 word only valid in compound | |
191 * WF_NOSUGGEST >> 8 word not used for suggestions | |
192 * WF_COMPROOT >> 8 word already a compound | |
193 * WF_NOCOMPBEF >> 8 no compounding before this word | |
194 * WF_NOCOMPAFT >> 8 no compounding after this word | |
195 * | |
196 * <pflags> 1 byte bitmask of: | |
197 * WFP_RARE rare prefix | |
198 * WFP_NC non-combining prefix | |
199 * WFP_UP letter after prefix made upper case | |
200 * | |
201 * <region> 1 byte Bitmask for regions in which word is valid. When | |
202 * omitted it's valid in all regions. | |
203 * Lowest bit is for region 1. | |
204 * | |
205 * <affixID> 1 byte ID of affix that can be used with this word. In | |
206 * PREFIXTREE used for the required prefix ID. | |
207 * | |
208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list | |
209 * from HEADER. | |
210 * | |
211 * All text characters are in 'encoding', but stored as single bytes. | |
212 */ | |
213 | |
214 /* | |
215 * Vim .sug file format: <SUGHEADER> | |
216 * <SUGWORDTREE> | |
217 * <SUGTABLE> | |
218 * | |
219 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
220 * | |
221 * <fileID> 6 bytes "VIMsug" | |
222 * <versionnr> 1 byte VIMSUGVERSION | |
223 * <timestamp> 8 bytes timestamp that must match with .spl file | |
224 * | |
225 * | |
226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) | |
227 * | |
228 * | |
229 * <SUGTABLE>: <sugwcount> <sugline> ... | |
230 * | |
231 * <sugwcount> 4 bytes number of <sugline> following | |
232 * | |
233 * <sugline>: <sugnr> ... NUL | |
234 * | |
235 * <sugnr>: X bytes word number that results in this soundfolded word, | |
236 * stored as an offset to the previous number in as | |
237 * few bytes as possible, see offset2bytes()) | |
238 */ | |
239 | |
240 #include "vim.h" | |
241 | |
242 #if defined(FEAT_SPELL) || defined(PROTO) | |
243 | |
244 #ifndef UNIX /* it's in os_unix.h for Unix */ | |
245 # include <time.h> /* for time_t */ | |
246 #endif | |
247 | |
248 #ifndef UNIX /* it's in os_unix.h for Unix */ | |
249 # include <time.h> /* for time_t */ | |
250 #endif | |
251 | |
252 /* Special byte values for <byte>. Some are only used in the tree for | |
253 * postponed prefixes, some only in the other trees. This is a bit messy... */ | |
254 #define BY_NOFLAGS 0 /* end of word without flags or region; for | |
255 * postponed prefix: no <pflags> */ | |
256 #define BY_INDEX 1 /* child is shared, index follows */ | |
257 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for | |
258 * postponed prefix: <pflags> follows */ | |
259 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes | |
260 * follow; never used in prefix tree */ | |
261 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ | |
262 | |
263 /* Flags used in .spl file for soundsalike flags. */ | |
264 #define SAL_F0LLOWUP 1 | |
265 #define SAL_COLLAPSE 2 | |
266 #define SAL_REM_ACCENTS 4 | |
267 | |
268 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ | |
269 #define VIMSPELLMAGICL 8 | |
270 #define VIMSPELLVERSION 50 | |
271 | |
272 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ | |
273 #define SN_REGION 0 /* <regionname> section */ | |
274 #define SN_CHARFLAGS 1 /* charflags section */ | |
275 #define SN_MIDWORD 2 /* <midword> section */ | |
276 #define SN_PREFCOND 3 /* <prefcond> section */ | |
277 #define SN_REP 4 /* REP items section */ | |
278 #define SN_SAL 5 /* SAL items section */ | |
279 #define SN_SOFO 6 /* soundfolding section */ | |
280 #define SN_MAP 7 /* MAP items section */ | |
281 #define SN_COMPOUND 8 /* compound words section */ | |
282 #define SN_SYLLABLE 9 /* syllable section */ | |
283 #define SN_NOBREAK 10 /* NOBREAK section */ | |
284 #define SN_SUGFILE 11 /* timestamp for .sug file */ | |
285 #define SN_REPSAL 12 /* REPSAL items section */ | |
286 #define SN_WORDS 13 /* common words */ | |
287 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ | |
288 #define SN_INFO 15 /* info section */ | |
289 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ | |
290 #define SN_END 255 /* end of sections */ | |
291 | |
292 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ | |
293 | |
294 #define CF_WORD 0x01 | |
295 #define CF_UPPER 0x02 | |
296 | |
297 static int set_spell_finish(spelltab_T *new_st); | |
298 static int write_spell_prefcond(FILE *fd, garray_T *gap); | |
299 static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *lenp); | |
300 static int read_region_section(FILE *fd, slang_T *slang, int len); | |
301 static int read_charflags_section(FILE *fd); | |
302 static int read_prefcond_section(FILE *fd, slang_T *lp); | |
303 static int read_rep_section(FILE *fd, garray_T *gap, short *first); | |
304 static int read_sal_section(FILE *fd, slang_T *slang); | |
305 static int read_words_section(FILE *fd, slang_T *lp, int len); | |
306 static int read_sofo_section(FILE *fd, slang_T *slang); | |
307 static int read_compound(FILE *fd, slang_T *slang, int len); | |
308 static int set_sofo(slang_T *lp, char_u *from, char_u *to); | |
309 static void set_sal_first(slang_T *lp); | |
310 #ifdef FEAT_MBYTE | |
311 static int *mb_str2wide(char_u *s); | |
312 #endif | |
313 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); | |
314 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); | |
315 static void spell_reload_one(char_u *fname, int added_word); | |
316 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); | |
317 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); | |
318 static void set_map_str(slang_T *lp, char_u *map); | |
319 | |
320 | |
321 static char *e_spell_trunc = N_("E758: Truncated spell file"); | |
322 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); | |
323 static char *e_affname = N_("Affix name too long in %s line %d: %s"); | |
324 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); | |
325 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); | |
326 static char *msg_compressing = N_("Compressing word tree..."); | |
327 | |
328 /* | |
329 * Load one spell file and store the info into a slang_T. | |
330 * | |
331 * This is invoked in three ways: | |
332 * - From spell_load_cb() to load a spell file for the first time. "lang" is | |
333 * the language name, "old_lp" is NULL. Will allocate an slang_T. | |
334 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" | |
335 * points to the existing slang_T. | |
336 * - Just after writing a .spl file; it's read back to produce the .sug file. | |
337 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. | |
338 * | |
339 * Returns the slang_T the spell file was loaded into. NULL for error. | |
340 */ | |
341 slang_T * | |
342 spell_load_file( | |
343 char_u *fname, | |
344 char_u *lang, | |
345 slang_T *old_lp, | |
346 int silent) /* no error if file doesn't exist */ | |
347 { | |
348 FILE *fd; | |
349 char_u buf[VIMSPELLMAGICL]; | |
350 char_u *p; | |
351 int i; | |
352 int n; | |
353 int len; | |
354 char_u *save_sourcing_name = sourcing_name; | |
355 linenr_T save_sourcing_lnum = sourcing_lnum; | |
356 slang_T *lp = NULL; | |
357 int c = 0; | |
358 int res; | |
359 | |
360 fd = mch_fopen((char *)fname, "r"); | |
361 if (fd == NULL) | |
362 { | |
363 if (!silent) | |
364 EMSG2(_(e_notopen), fname); | |
365 else if (p_verbose > 2) | |
366 { | |
367 verbose_enter(); | |
368 smsg((char_u *)e_notopen, fname); | |
369 verbose_leave(); | |
370 } | |
371 goto endFAIL; | |
372 } | |
373 if (p_verbose > 2) | |
374 { | |
375 verbose_enter(); | |
376 smsg((char_u *)_("Reading spell file \"%s\""), fname); | |
377 verbose_leave(); | |
378 } | |
379 | |
380 if (old_lp == NULL) | |
381 { | |
382 lp = slang_alloc(lang); | |
383 if (lp == NULL) | |
384 goto endFAIL; | |
385 | |
386 /* Remember the file name, used to reload the file when it's updated. */ | |
387 lp->sl_fname = vim_strsave(fname); | |
388 if (lp->sl_fname == NULL) | |
389 goto endFAIL; | |
390 | |
391 /* Check for .add.spl (_add.spl for VMS). */ | |
392 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; | |
393 } | |
394 else | |
395 lp = old_lp; | |
396 | |
397 /* Set sourcing_name, so that error messages mention the file name. */ | |
398 sourcing_name = fname; | |
399 sourcing_lnum = 0; | |
400 | |
401 /* | |
402 * <HEADER>: <fileID> | |
403 */ | |
404 for (i = 0; i < VIMSPELLMAGICL; ++i) | |
405 buf[i] = getc(fd); /* <fileID> */ | |
406 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) | |
407 { | |
408 EMSG(_("E757: This does not look like a spell file")); | |
409 goto endFAIL; | |
410 } | |
411 c = getc(fd); /* <versionnr> */ | |
412 if (c < VIMSPELLVERSION) | |
413 { | |
414 EMSG(_("E771: Old spell file, needs to be updated")); | |
415 goto endFAIL; | |
416 } | |
417 else if (c > VIMSPELLVERSION) | |
418 { | |
419 EMSG(_("E772: Spell file is for newer version of Vim")); | |
420 goto endFAIL; | |
421 } | |
422 | |
423 | |
424 /* | |
425 * <SECTIONS>: <section> ... <sectionend> | |
426 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | |
427 */ | |
428 for (;;) | |
429 { | |
430 n = getc(fd); /* <sectionID> or <sectionend> */ | |
431 if (n == SN_END) | |
432 break; | |
433 c = getc(fd); /* <sectionflags> */ | |
434 len = get4c(fd); /* <sectionlen> */ | |
435 if (len < 0) | |
436 goto truncerr; | |
437 | |
438 res = 0; | |
439 switch (n) | |
440 { | |
441 case SN_INFO: | |
442 lp->sl_info = read_string(fd, len); /* <infotext> */ | |
443 if (lp->sl_info == NULL) | |
444 goto endFAIL; | |
445 break; | |
446 | |
447 case SN_REGION: | |
448 res = read_region_section(fd, lp, len); | |
449 break; | |
450 | |
451 case SN_CHARFLAGS: | |
452 res = read_charflags_section(fd); | |
453 break; | |
454 | |
455 case SN_MIDWORD: | |
456 lp->sl_midword = read_string(fd, len); /* <midword> */ | |
457 if (lp->sl_midword == NULL) | |
458 goto endFAIL; | |
459 break; | |
460 | |
461 case SN_PREFCOND: | |
462 res = read_prefcond_section(fd, lp); | |
463 break; | |
464 | |
465 case SN_REP: | |
466 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); | |
467 break; | |
468 | |
469 case SN_REPSAL: | |
470 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); | |
471 break; | |
472 | |
473 case SN_SAL: | |
474 res = read_sal_section(fd, lp); | |
475 break; | |
476 | |
477 case SN_SOFO: | |
478 res = read_sofo_section(fd, lp); | |
479 break; | |
480 | |
481 case SN_MAP: | |
482 p = read_string(fd, len); /* <mapstr> */ | |
483 if (p == NULL) | |
484 goto endFAIL; | |
485 set_map_str(lp, p); | |
486 vim_free(p); | |
487 break; | |
488 | |
489 case SN_WORDS: | |
490 res = read_words_section(fd, lp, len); | |
491 break; | |
492 | |
493 case SN_SUGFILE: | |
494 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ | |
495 break; | |
496 | |
497 case SN_NOSPLITSUGS: | |
498 lp->sl_nosplitsugs = TRUE; | |
499 break; | |
500 | |
501 case SN_NOCOMPOUNDSUGS: | |
502 lp->sl_nocompoundsugs = TRUE; | |
503 break; | |
504 | |
505 case SN_COMPOUND: | |
506 res = read_compound(fd, lp, len); | |
507 break; | |
508 | |
509 case SN_NOBREAK: | |
510 lp->sl_nobreak = TRUE; | |
511 break; | |
512 | |
513 case SN_SYLLABLE: | |
514 lp->sl_syllable = read_string(fd, len); /* <syllable> */ | |
515 if (lp->sl_syllable == NULL) | |
516 goto endFAIL; | |
517 if (init_syl_tab(lp) == FAIL) | |
518 goto endFAIL; | |
519 break; | |
520 | |
521 default: | |
522 /* Unsupported section. When it's required give an error | |
523 * message. When it's not required skip the contents. */ | |
524 if (c & SNF_REQUIRED) | |
525 { | |
526 EMSG(_("E770: Unsupported section in spell file")); | |
527 goto endFAIL; | |
528 } | |
529 while (--len >= 0) | |
530 if (getc(fd) < 0) | |
531 goto truncerr; | |
532 break; | |
533 } | |
534 someerror: | |
535 if (res == SP_FORMERROR) | |
536 { | |
537 EMSG(_(e_format)); | |
538 goto endFAIL; | |
539 } | |
540 if (res == SP_TRUNCERROR) | |
541 { | |
542 truncerr: | |
543 EMSG(_(e_spell_trunc)); | |
544 goto endFAIL; | |
545 } | |
546 if (res == SP_OTHERERROR) | |
547 goto endFAIL; | |
548 } | |
549 | |
550 /* <LWORDTREE> */ | |
551 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); | |
552 if (res != 0) | |
553 goto someerror; | |
554 | |
555 /* <KWORDTREE> */ | |
556 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); | |
557 if (res != 0) | |
558 goto someerror; | |
559 | |
560 /* <PREFIXTREE> */ | |
561 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, | |
562 lp->sl_prefixcnt); | |
563 if (res != 0) | |
564 goto someerror; | |
565 | |
566 /* For a new file link it in the list of spell files. */ | |
567 if (old_lp == NULL && lang != NULL) | |
568 { | |
569 lp->sl_next = first_lang; | |
570 first_lang = lp; | |
571 } | |
572 | |
573 goto endOK; | |
574 | |
575 endFAIL: | |
576 if (lang != NULL) | |
577 /* truncating the name signals the error to spell_load_lang() */ | |
578 *lang = NUL; | |
579 if (lp != NULL && old_lp == NULL) | |
580 slang_free(lp); | |
581 lp = NULL; | |
582 | |
583 endOK: | |
584 if (fd != NULL) | |
585 fclose(fd); | |
586 sourcing_name = save_sourcing_name; | |
587 sourcing_lnum = save_sourcing_lnum; | |
588 | |
589 return lp; | |
590 } | |
591 | |
592 /* | |
593 * Fill in the wordcount fields for a trie. | |
594 * Returns the total number of words. | |
595 */ | |
596 static void | |
597 tree_count_words(char_u *byts, idx_T *idxs) | |
598 { | |
599 int depth; | |
600 idx_T arridx[MAXWLEN]; | |
601 int curi[MAXWLEN]; | |
602 int c; | |
603 idx_T n; | |
604 int wordcount[MAXWLEN]; | |
605 | |
606 arridx[0] = 0; | |
607 curi[0] = 1; | |
608 wordcount[0] = 0; | |
609 depth = 0; | |
610 while (depth >= 0 && !got_int) | |
611 { | |
612 if (curi[depth] > byts[arridx[depth]]) | |
613 { | |
614 /* Done all bytes at this node, go up one level. */ | |
615 idxs[arridx[depth]] = wordcount[depth]; | |
616 if (depth > 0) | |
617 wordcount[depth - 1] += wordcount[depth]; | |
618 | |
619 --depth; | |
620 fast_breakcheck(); | |
621 } | |
622 else | |
623 { | |
624 /* Do one more byte at this node. */ | |
625 n = arridx[depth] + curi[depth]; | |
626 ++curi[depth]; | |
627 | |
628 c = byts[n]; | |
629 if (c == 0) | |
630 { | |
631 /* End of word, count it. */ | |
632 ++wordcount[depth]; | |
633 | |
634 /* Skip over any other NUL bytes (same word with different | |
635 * flags). */ | |
636 while (byts[n + 1] == 0) | |
637 { | |
638 ++n; | |
639 ++curi[depth]; | |
640 } | |
641 } | |
642 else | |
643 { | |
644 /* Normal char, go one level deeper to count the words. */ | |
645 ++depth; | |
646 arridx[depth] = idxs[n]; | |
647 curi[depth] = 1; | |
648 wordcount[depth] = 0; | |
649 } | |
650 } | |
651 } | |
652 } | |
653 | |
654 /* | |
655 * Load the .sug files for languages that have one and weren't loaded yet. | |
656 */ | |
657 void | |
658 suggest_load_files(void) | |
659 { | |
660 langp_T *lp; | |
661 int lpi; | |
662 slang_T *slang; | |
663 char_u *dotp; | |
664 FILE *fd; | |
665 char_u buf[MAXWLEN]; | |
666 int i; | |
667 time_t timestamp; | |
668 int wcount; | |
669 int wordnr; | |
670 garray_T ga; | |
671 int c; | |
672 | |
673 /* Do this for all languages that support sound folding. */ | |
674 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) | |
675 { | |
676 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); | |
677 slang = lp->lp_slang; | |
678 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) | |
679 { | |
680 /* Change ".spl" to ".sug" and open the file. When the file isn't | |
681 * found silently skip it. Do set "sl_sugloaded" so that we | |
682 * don't try again and again. */ | |
683 slang->sl_sugloaded = TRUE; | |
684 | |
685 dotp = vim_strrchr(slang->sl_fname, '.'); | |
686 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) | |
687 continue; | |
688 STRCPY(dotp, ".sug"); | |
689 fd = mch_fopen((char *)slang->sl_fname, "r"); | |
690 if (fd == NULL) | |
691 goto nextone; | |
692 | |
693 /* | |
694 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
695 */ | |
696 for (i = 0; i < VIMSUGMAGICL; ++i) | |
697 buf[i] = getc(fd); /* <fileID> */ | |
698 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) | |
699 { | |
700 EMSG2(_("E778: This does not look like a .sug file: %s"), | |
701 slang->sl_fname); | |
702 goto nextone; | |
703 } | |
704 c = getc(fd); /* <versionnr> */ | |
705 if (c < VIMSUGVERSION) | |
706 { | |
707 EMSG2(_("E779: Old .sug file, needs to be updated: %s"), | |
708 slang->sl_fname); | |
709 goto nextone; | |
710 } | |
711 else if (c > VIMSUGVERSION) | |
712 { | |
713 EMSG2(_("E780: .sug file is for newer version of Vim: %s"), | |
714 slang->sl_fname); | |
715 goto nextone; | |
716 } | |
717 | |
718 /* Check the timestamp, it must be exactly the same as the one in | |
719 * the .spl file. Otherwise the word numbers won't match. */ | |
720 timestamp = get8ctime(fd); /* <timestamp> */ | |
721 if (timestamp != slang->sl_sugtime) | |
722 { | |
723 EMSG2(_("E781: .sug file doesn't match .spl file: %s"), | |
724 slang->sl_fname); | |
725 goto nextone; | |
726 } | |
727 | |
728 /* | |
729 * <SUGWORDTREE>: <wordtree> | |
730 * Read the trie with the soundfolded words. | |
731 */ | |
732 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, | |
733 FALSE, 0) != 0) | |
734 { | |
735 someerror: | |
736 EMSG2(_("E782: error while reading .sug file: %s"), | |
737 slang->sl_fname); | |
738 slang_clear_sug(slang); | |
739 goto nextone; | |
740 } | |
741 | |
742 /* | |
743 * <SUGTABLE>: <sugwcount> <sugline> ... | |
744 * | |
745 * Read the table with word numbers. We use a file buffer for | |
746 * this, because it's so much like a file with lines. Makes it | |
747 * possible to swap the info and save on memory use. | |
748 */ | |
749 slang->sl_sugbuf = open_spellbuf(); | |
750 if (slang->sl_sugbuf == NULL) | |
751 goto someerror; | |
752 /* <sugwcount> */ | |
753 wcount = get4c(fd); | |
754 if (wcount < 0) | |
755 goto someerror; | |
756 | |
757 /* Read all the wordnr lists into the buffer, one NUL terminated | |
758 * list per line. */ | |
759 ga_init2(&ga, 1, 100); | |
760 for (wordnr = 0; wordnr < wcount; ++wordnr) | |
761 { | |
762 ga.ga_len = 0; | |
763 for (;;) | |
764 { | |
765 c = getc(fd); /* <sugline> */ | |
766 if (c < 0 || ga_grow(&ga, 1) == FAIL) | |
767 goto someerror; | |
768 ((char_u *)ga.ga_data)[ga.ga_len++] = c; | |
769 if (c == NUL) | |
770 break; | |
771 } | |
772 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, | |
773 ga.ga_data, ga.ga_len, TRUE) == FAIL) | |
774 goto someerror; | |
775 } | |
776 ga_clear(&ga); | |
777 | |
778 /* | |
779 * Need to put word counts in the word tries, so that we can find | |
780 * a word by its number. | |
781 */ | |
782 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); | |
783 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); | |
784 | |
785 nextone: | |
786 if (fd != NULL) | |
787 fclose(fd); | |
788 STRCPY(dotp, ".spl"); | |
789 } | |
790 } | |
791 } | |
792 | |
793 | |
794 /* | |
795 * Read a length field from "fd" in "cnt_bytes" bytes. | |
796 * Allocate memory, read the string into it and add a NUL at the end. | |
797 * Returns NULL when the count is zero. | |
798 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result | |
799 * otherwise. | |
800 */ | |
801 static char_u * | |
802 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) | |
803 { | |
804 int cnt = 0; | |
805 int i; | |
806 char_u *str; | |
807 | |
808 /* read the length bytes, MSB first */ | |
809 for (i = 0; i < cnt_bytes; ++i) | |
810 cnt = (cnt << 8) + getc(fd); | |
811 if (cnt < 0) | |
812 { | |
813 *cntp = SP_TRUNCERROR; | |
814 return NULL; | |
815 } | |
816 *cntp = cnt; | |
817 if (cnt == 0) | |
818 return NULL; /* nothing to read, return NULL */ | |
819 | |
820 str = read_string(fd, cnt); | |
821 if (str == NULL) | |
822 *cntp = SP_OTHERERROR; | |
823 return str; | |
824 } | |
825 | |
826 /* | |
827 * Read SN_REGION: <regionname> ... | |
828 * Return SP_*ERROR flags. | |
829 */ | |
830 static int | |
831 read_region_section(FILE *fd, slang_T *lp, int len) | |
832 { | |
833 int i; | |
834 | |
835 if (len > 16) | |
836 return SP_FORMERROR; | |
837 for (i = 0; i < len; ++i) | |
838 lp->sl_regions[i] = getc(fd); /* <regionname> */ | |
839 lp->sl_regions[len] = NUL; | |
840 return 0; | |
841 } | |
842 | |
843 /* | |
844 * Read SN_CHARFLAGS section: <charflagslen> <charflags> | |
845 * <folcharslen> <folchars> | |
846 * Return SP_*ERROR flags. | |
847 */ | |
848 static int | |
849 read_charflags_section(FILE *fd) | |
850 { | |
851 char_u *flags; | |
852 char_u *fol; | |
853 int flagslen, follen; | |
854 | |
855 /* <charflagslen> <charflags> */ | |
856 flags = read_cnt_string(fd, 1, &flagslen); | |
857 if (flagslen < 0) | |
858 return flagslen; | |
859 | |
860 /* <folcharslen> <folchars> */ | |
861 fol = read_cnt_string(fd, 2, &follen); | |
862 if (follen < 0) | |
863 { | |
864 vim_free(flags); | |
865 return follen; | |
866 } | |
867 | |
868 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ | |
869 if (flags != NULL && fol != NULL) | |
870 set_spell_charflags(flags, flagslen, fol); | |
871 | |
872 vim_free(flags); | |
873 vim_free(fol); | |
874 | |
875 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ | |
876 if ((flags == NULL) != (fol == NULL)) | |
877 return SP_FORMERROR; | |
878 return 0; | |
879 } | |
880 | |
881 /* | |
882 * Read SN_PREFCOND section. | |
883 * Return SP_*ERROR flags. | |
884 */ | |
885 static int | |
886 read_prefcond_section(FILE *fd, slang_T *lp) | |
887 { | |
888 int cnt; | |
889 int i; | |
890 int n; | |
891 char_u *p; | |
892 char_u buf[MAXWLEN + 1]; | |
893 | |
894 /* <prefcondcnt> <prefcond> ... */ | |
895 cnt = get2c(fd); /* <prefcondcnt> */ | |
896 if (cnt <= 0) | |
897 return SP_FORMERROR; | |
898 | |
899 lp->sl_prefprog = (regprog_T **)alloc_clear( | |
900 (unsigned)sizeof(regprog_T *) * cnt); | |
901 if (lp->sl_prefprog == NULL) | |
902 return SP_OTHERERROR; | |
903 lp->sl_prefixcnt = cnt; | |
904 | |
905 for (i = 0; i < cnt; ++i) | |
906 { | |
907 /* <prefcond> : <condlen> <condstr> */ | |
908 n = getc(fd); /* <condlen> */ | |
909 if (n < 0 || n >= MAXWLEN) | |
910 return SP_FORMERROR; | |
911 | |
912 /* When <condlen> is zero we have an empty condition. Otherwise | |
913 * compile the regexp program used to check for the condition. */ | |
914 if (n > 0) | |
915 { | |
916 buf[0] = '^'; /* always match at one position only */ | |
917 p = buf + 1; | |
918 while (n-- > 0) | |
919 *p++ = getc(fd); /* <condstr> */ | |
920 *p = NUL; | |
921 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); | |
922 } | |
923 } | |
924 return 0; | |
925 } | |
926 | |
927 /* | |
928 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... | |
929 * Return SP_*ERROR flags. | |
930 */ | |
931 static int | |
932 read_rep_section(FILE *fd, garray_T *gap, short *first) | |
933 { | |
934 int cnt; | |
935 fromto_T *ftp; | |
936 int i; | |
937 | |
938 cnt = get2c(fd); /* <repcount> */ | |
939 if (cnt < 0) | |
940 return SP_TRUNCERROR; | |
941 | |
942 if (ga_grow(gap, cnt) == FAIL) | |
943 return SP_OTHERERROR; | |
944 | |
945 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | |
946 for (; gap->ga_len < cnt; ++gap->ga_len) | |
947 { | |
948 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; | |
949 ftp->ft_from = read_cnt_string(fd, 1, &i); | |
950 if (i < 0) | |
951 return i; | |
952 if (i == 0) | |
953 return SP_FORMERROR; | |
954 ftp->ft_to = read_cnt_string(fd, 1, &i); | |
955 if (i <= 0) | |
956 { | |
957 vim_free(ftp->ft_from); | |
958 if (i < 0) | |
959 return i; | |
960 return SP_FORMERROR; | |
961 } | |
962 } | |
963 | |
964 /* Fill the first-index table. */ | |
965 for (i = 0; i < 256; ++i) | |
966 first[i] = -1; | |
967 for (i = 0; i < gap->ga_len; ++i) | |
968 { | |
969 ftp = &((fromto_T *)gap->ga_data)[i]; | |
970 if (first[*ftp->ft_from] == -1) | |
971 first[*ftp->ft_from] = i; | |
972 } | |
973 return 0; | |
974 } | |
975 | |
976 /* | |
977 * Read SN_SAL section: <salflags> <salcount> <sal> ... | |
978 * Return SP_*ERROR flags. | |
979 */ | |
980 static int | |
981 read_sal_section(FILE *fd, slang_T *slang) | |
982 { | |
983 int i; | |
984 int cnt; | |
985 garray_T *gap; | |
986 salitem_T *smp; | |
987 int ccnt; | |
988 char_u *p; | |
989 int c = NUL; | |
990 | |
991 slang->sl_sofo = FALSE; | |
992 | |
993 i = getc(fd); /* <salflags> */ | |
994 if (i & SAL_F0LLOWUP) | |
995 slang->sl_followup = TRUE; | |
996 if (i & SAL_COLLAPSE) | |
997 slang->sl_collapse = TRUE; | |
998 if (i & SAL_REM_ACCENTS) | |
999 slang->sl_rem_accents = TRUE; | |
1000 | |
1001 cnt = get2c(fd); /* <salcount> */ | |
1002 if (cnt < 0) | |
1003 return SP_TRUNCERROR; | |
1004 | |
1005 gap = &slang->sl_sal; | |
1006 ga_init2(gap, sizeof(salitem_T), 10); | |
1007 if (ga_grow(gap, cnt + 1) == FAIL) | |
1008 return SP_OTHERERROR; | |
1009 | |
1010 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | |
1011 for (; gap->ga_len < cnt; ++gap->ga_len) | |
1012 { | |
1013 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | |
1014 ccnt = getc(fd); /* <salfromlen> */ | |
1015 if (ccnt < 0) | |
1016 return SP_TRUNCERROR; | |
1017 if ((p = alloc(ccnt + 2)) == NULL) | |
1018 return SP_OTHERERROR; | |
1019 smp->sm_lead = p; | |
1020 | |
1021 /* Read up to the first special char into sm_lead. */ | |
1022 for (i = 0; i < ccnt; ++i) | |
1023 { | |
1024 c = getc(fd); /* <salfrom> */ | |
1025 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) | |
1026 break; | |
1027 *p++ = c; | |
1028 } | |
1029 smp->sm_leadlen = (int)(p - smp->sm_lead); | |
1030 *p++ = NUL; | |
1031 | |
1032 /* Put (abc) chars in sm_oneof, if any. */ | |
1033 if (c == '(') | |
1034 { | |
1035 smp->sm_oneof = p; | |
1036 for (++i; i < ccnt; ++i) | |
1037 { | |
1038 c = getc(fd); /* <salfrom> */ | |
1039 if (c == ')') | |
1040 break; | |
1041 *p++ = c; | |
1042 } | |
1043 *p++ = NUL; | |
1044 if (++i < ccnt) | |
1045 c = getc(fd); | |
1046 } | |
1047 else | |
1048 smp->sm_oneof = NULL; | |
1049 | |
1050 /* Any following chars go in sm_rules. */ | |
1051 smp->sm_rules = p; | |
1052 if (i < ccnt) | |
1053 /* store the char we got while checking for end of sm_lead */ | |
1054 *p++ = c; | |
1055 for (++i; i < ccnt; ++i) | |
1056 *p++ = getc(fd); /* <salfrom> */ | |
1057 *p++ = NUL; | |
1058 | |
1059 /* <saltolen> <salto> */ | |
1060 smp->sm_to = read_cnt_string(fd, 1, &ccnt); | |
1061 if (ccnt < 0) | |
1062 { | |
1063 vim_free(smp->sm_lead); | |
1064 return ccnt; | |
1065 } | |
1066 | |
1067 #ifdef FEAT_MBYTE | |
1068 if (has_mbyte) | |
1069 { | |
1070 /* convert the multi-byte strings to wide char strings */ | |
1071 smp->sm_lead_w = mb_str2wide(smp->sm_lead); | |
1072 smp->sm_leadlen = mb_charlen(smp->sm_lead); | |
1073 if (smp->sm_oneof == NULL) | |
1074 smp->sm_oneof_w = NULL; | |
1075 else | |
1076 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); | |
1077 if (smp->sm_to == NULL) | |
1078 smp->sm_to_w = NULL; | |
1079 else | |
1080 smp->sm_to_w = mb_str2wide(smp->sm_to); | |
1081 if (smp->sm_lead_w == NULL | |
1082 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) | |
1083 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) | |
1084 { | |
1085 vim_free(smp->sm_lead); | |
1086 vim_free(smp->sm_to); | |
1087 vim_free(smp->sm_lead_w); | |
1088 vim_free(smp->sm_oneof_w); | |
1089 vim_free(smp->sm_to_w); | |
1090 return SP_OTHERERROR; | |
1091 } | |
1092 } | |
1093 #endif | |
1094 } | |
1095 | |
1096 if (gap->ga_len > 0) | |
1097 { | |
1098 /* Add one extra entry to mark the end with an empty sm_lead. Avoids | |
1099 * that we need to check the index every time. */ | |
1100 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | |
1101 if ((p = alloc(1)) == NULL) | |
1102 return SP_OTHERERROR; | |
1103 p[0] = NUL; | |
1104 smp->sm_lead = p; | |
1105 smp->sm_leadlen = 0; | |
1106 smp->sm_oneof = NULL; | |
1107 smp->sm_rules = p; | |
1108 smp->sm_to = NULL; | |
1109 #ifdef FEAT_MBYTE | |
1110 if (has_mbyte) | |
1111 { | |
1112 smp->sm_lead_w = mb_str2wide(smp->sm_lead); | |
1113 smp->sm_leadlen = 0; | |
1114 smp->sm_oneof_w = NULL; | |
1115 smp->sm_to_w = NULL; | |
1116 } | |
1117 #endif | |
1118 ++gap->ga_len; | |
1119 } | |
1120 | |
1121 /* Fill the first-index table. */ | |
1122 set_sal_first(slang); | |
1123 | |
1124 return 0; | |
1125 } | |
1126 | |
1127 /* | |
1128 * Read SN_WORDS: <word> ... | |
1129 * Return SP_*ERROR flags. | |
1130 */ | |
1131 static int | |
1132 read_words_section(FILE *fd, slang_T *lp, int len) | |
1133 { | |
1134 int done = 0; | |
1135 int i; | |
1136 int c; | |
1137 char_u word[MAXWLEN]; | |
1138 | |
1139 while (done < len) | |
1140 { | |
1141 /* Read one word at a time. */ | |
1142 for (i = 0; ; ++i) | |
1143 { | |
1144 c = getc(fd); | |
1145 if (c == EOF) | |
1146 return SP_TRUNCERROR; | |
1147 word[i] = c; | |
1148 if (word[i] == NUL) | |
1149 break; | |
1150 if (i == MAXWLEN - 1) | |
1151 return SP_FORMERROR; | |
1152 } | |
1153 | |
1154 /* Init the count to 10. */ | |
1155 count_common_word(lp, word, -1, 10); | |
1156 done += i + 1; | |
1157 } | |
1158 return 0; | |
1159 } | |
1160 | |
1161 /* | |
1162 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
1163 * Return SP_*ERROR flags. | |
1164 */ | |
1165 static int | |
1166 read_sofo_section(FILE *fd, slang_T *slang) | |
1167 { | |
1168 int cnt; | |
1169 char_u *from, *to; | |
1170 int res; | |
1171 | |
1172 slang->sl_sofo = TRUE; | |
1173 | |
1174 /* <sofofromlen> <sofofrom> */ | |
1175 from = read_cnt_string(fd, 2, &cnt); | |
1176 if (cnt < 0) | |
1177 return cnt; | |
1178 | |
1179 /* <sofotolen> <sofoto> */ | |
1180 to = read_cnt_string(fd, 2, &cnt); | |
1181 if (cnt < 0) | |
1182 { | |
1183 vim_free(from); | |
1184 return cnt; | |
1185 } | |
1186 | |
1187 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ | |
1188 if (from != NULL && to != NULL) | |
1189 res = set_sofo(slang, from, to); | |
1190 else if (from != NULL || to != NULL) | |
1191 res = SP_FORMERROR; /* only one of two strings is an error */ | |
1192 else | |
1193 res = 0; | |
1194 | |
1195 vim_free(from); | |
1196 vim_free(to); | |
1197 return res; | |
1198 } | |
1199 | |
1200 /* | |
1201 * Read the compound section from the .spl file: | |
1202 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> | |
1203 * Returns SP_*ERROR flags. | |
1204 */ | |
1205 static int | |
1206 read_compound(FILE *fd, slang_T *slang, int len) | |
1207 { | |
1208 int todo = len; | |
1209 int c; | |
1210 int atstart; | |
1211 char_u *pat; | |
1212 char_u *pp; | |
1213 char_u *cp; | |
1214 char_u *ap; | |
1215 char_u *crp; | |
1216 int cnt; | |
1217 garray_T *gap; | |
1218 | |
1219 if (todo < 2) | |
1220 return SP_FORMERROR; /* need at least two bytes */ | |
1221 | |
1222 --todo; | |
1223 c = getc(fd); /* <compmax> */ | |
1224 if (c < 2) | |
1225 c = MAXWLEN; | |
1226 slang->sl_compmax = c; | |
1227 | |
1228 --todo; | |
1229 c = getc(fd); /* <compminlen> */ | |
1230 if (c < 1) | |
1231 c = 0; | |
1232 slang->sl_compminlen = c; | |
1233 | |
1234 --todo; | |
1235 c = getc(fd); /* <compsylmax> */ | |
1236 if (c < 1) | |
1237 c = MAXWLEN; | |
1238 slang->sl_compsylmax = c; | |
1239 | |
1240 c = getc(fd); /* <compoptions> */ | |
1241 if (c != 0) | |
1242 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ | |
1243 else | |
1244 { | |
1245 --todo; | |
1246 c = getc(fd); /* only use the lower byte for now */ | |
1247 --todo; | |
1248 slang->sl_compoptions = c; | |
1249 | |
1250 gap = &slang->sl_comppat; | |
1251 c = get2c(fd); /* <comppatcount> */ | |
1252 todo -= 2; | |
1253 ga_init2(gap, sizeof(char_u *), c); | |
1254 if (ga_grow(gap, c) == OK) | |
1255 while (--c >= 0) | |
1256 { | |
1257 ((char_u **)(gap->ga_data))[gap->ga_len++] = | |
1258 read_cnt_string(fd, 1, &cnt); | |
1259 /* <comppatlen> <comppattext> */ | |
1260 if (cnt < 0) | |
1261 return cnt; | |
1262 todo -= cnt + 1; | |
1263 } | |
1264 } | |
1265 if (todo < 0) | |
1266 return SP_FORMERROR; | |
1267 | |
1268 /* Turn the COMPOUNDRULE items into a regexp pattern: | |
1269 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". | |
1270 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. | |
1271 * Conversion to utf-8 may double the size. */ | |
1272 c = todo * 2 + 7; | |
1273 #ifdef FEAT_MBYTE | |
1274 if (enc_utf8) | |
1275 c += todo * 2; | |
1276 #endif | |
1277 pat = alloc((unsigned)c); | |
1278 if (pat == NULL) | |
1279 return SP_OTHERERROR; | |
1280 | |
1281 /* We also need a list of all flags that can appear at the start and one | |
1282 * for all flags. */ | |
1283 cp = alloc(todo + 1); | |
1284 if (cp == NULL) | |
1285 { | |
1286 vim_free(pat); | |
1287 return SP_OTHERERROR; | |
1288 } | |
1289 slang->sl_compstartflags = cp; | |
1290 *cp = NUL; | |
1291 | |
1292 ap = alloc(todo + 1); | |
1293 if (ap == NULL) | |
1294 { | |
1295 vim_free(pat); | |
1296 return SP_OTHERERROR; | |
1297 } | |
1298 slang->sl_compallflags = ap; | |
1299 *ap = NUL; | |
1300 | |
1301 /* And a list of all patterns in their original form, for checking whether | |
1302 * compounding may work in match_compoundrule(). This is freed when we | |
1303 * encounter a wildcard, the check doesn't work then. */ | |
1304 crp = alloc(todo + 1); | |
1305 slang->sl_comprules = crp; | |
1306 | |
1307 pp = pat; | |
1308 *pp++ = '^'; | |
1309 *pp++ = '\\'; | |
1310 *pp++ = '('; | |
1311 | |
1312 atstart = 1; | |
1313 while (todo-- > 0) | |
1314 { | |
1315 c = getc(fd); /* <compflags> */ | |
1316 if (c == EOF) | |
1317 { | |
1318 vim_free(pat); | |
1319 return SP_TRUNCERROR; | |
1320 } | |
1321 | |
1322 /* Add all flags to "sl_compallflags". */ | |
1323 if (vim_strchr((char_u *)"?*+[]/", c) == NULL | |
1324 && !byte_in_str(slang->sl_compallflags, c)) | |
1325 { | |
1326 *ap++ = c; | |
1327 *ap = NUL; | |
1328 } | |
1329 | |
1330 if (atstart != 0) | |
1331 { | |
1332 /* At start of item: copy flags to "sl_compstartflags". For a | |
1333 * [abc] item set "atstart" to 2 and copy up to the ']'. */ | |
1334 if (c == '[') | |
1335 atstart = 2; | |
1336 else if (c == ']') | |
1337 atstart = 0; | |
1338 else | |
1339 { | |
1340 if (!byte_in_str(slang->sl_compstartflags, c)) | |
1341 { | |
1342 *cp++ = c; | |
1343 *cp = NUL; | |
1344 } | |
1345 if (atstart == 1) | |
1346 atstart = 0; | |
1347 } | |
1348 } | |
1349 | |
1350 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ | |
1351 if (crp != NULL) | |
1352 { | |
1353 if (c == '?' || c == '+' || c == '*') | |
1354 { | |
1355 vim_free(slang->sl_comprules); | |
1356 slang->sl_comprules = NULL; | |
1357 crp = NULL; | |
1358 } | |
1359 else | |
1360 *crp++ = c; | |
1361 } | |
1362 | |
1363 if (c == '/') /* slash separates two items */ | |
1364 { | |
1365 *pp++ = '\\'; | |
1366 *pp++ = '|'; | |
1367 atstart = 1; | |
1368 } | |
1369 else /* normal char, "[abc]" and '*' are copied as-is */ | |
1370 { | |
1371 if (c == '?' || c == '+' || c == '~') | |
1372 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ | |
1373 #ifdef FEAT_MBYTE | |
1374 if (enc_utf8) | |
1375 pp += mb_char2bytes(c, pp); | |
1376 else | |
1377 #endif | |
1378 *pp++ = c; | |
1379 } | |
1380 } | |
1381 | |
1382 *pp++ = '\\'; | |
1383 *pp++ = ')'; | |
1384 *pp++ = '$'; | |
1385 *pp = NUL; | |
1386 | |
1387 if (crp != NULL) | |
1388 *crp = NUL; | |
1389 | |
1390 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); | |
1391 vim_free(pat); | |
1392 if (slang->sl_compprog == NULL) | |
1393 return SP_FORMERROR; | |
1394 | |
1395 return 0; | |
1396 } | |
1397 | |
1398 /* | |
1399 * Set the SOFOFROM and SOFOTO items in language "lp". | |
1400 * Returns SP_*ERROR flags when there is something wrong. | |
1401 */ | |
1402 static int | |
1403 set_sofo(slang_T *lp, char_u *from, char_u *to) | |
1404 { | |
1405 int i; | |
1406 | |
1407 #ifdef FEAT_MBYTE | |
1408 garray_T *gap; | |
1409 char_u *s; | |
1410 char_u *p; | |
1411 int c; | |
1412 int *inp; | |
1413 | |
1414 if (has_mbyte) | |
1415 { | |
1416 /* Use "sl_sal" as an array with 256 pointers to a list of wide | |
1417 * characters. The index is the low byte of the character. | |
1418 * The list contains from-to pairs with a terminating NUL. | |
1419 * sl_sal_first[] is used for latin1 "from" characters. */ | |
1420 gap = &lp->sl_sal; | |
1421 ga_init2(gap, sizeof(int *), 1); | |
1422 if (ga_grow(gap, 256) == FAIL) | |
1423 return SP_OTHERERROR; | |
1424 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); | |
1425 gap->ga_len = 256; | |
1426 | |
1427 /* First count the number of items for each list. Temporarily use | |
1428 * sl_sal_first[] for this. */ | |
1429 for (p = from, s = to; *p != NUL && *s != NUL; ) | |
1430 { | |
1431 c = mb_cptr2char_adv(&p); | |
1432 mb_cptr_adv(s); | |
1433 if (c >= 256) | |
1434 ++lp->sl_sal_first[c & 0xff]; | |
1435 } | |
1436 if (*p != NUL || *s != NUL) /* lengths differ */ | |
1437 return SP_FORMERROR; | |
1438 | |
1439 /* Allocate the lists. */ | |
1440 for (i = 0; i < 256; ++i) | |
1441 if (lp->sl_sal_first[i] > 0) | |
1442 { | |
1443 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); | |
1444 if (p == NULL) | |
1445 return SP_OTHERERROR; | |
1446 ((int **)gap->ga_data)[i] = (int *)p; | |
1447 *(int *)p = 0; | |
1448 } | |
1449 | |
1450 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal | |
1451 * list. */ | |
1452 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); | |
1453 for (p = from, s = to; *p != NUL && *s != NUL; ) | |
1454 { | |
1455 c = mb_cptr2char_adv(&p); | |
1456 i = mb_cptr2char_adv(&s); | |
1457 if (c >= 256) | |
1458 { | |
1459 /* Append the from-to chars at the end of the list with | |
1460 * the low byte. */ | |
1461 inp = ((int **)gap->ga_data)[c & 0xff]; | |
1462 while (*inp != 0) | |
1463 ++inp; | |
1464 *inp++ = c; /* from char */ | |
1465 *inp++ = i; /* to char */ | |
1466 *inp++ = NUL; /* NUL at the end */ | |
1467 } | |
1468 else | |
1469 /* mapping byte to char is done in sl_sal_first[] */ | |
1470 lp->sl_sal_first[c] = i; | |
1471 } | |
1472 } | |
1473 else | |
1474 #endif | |
1475 { | |
1476 /* mapping bytes to bytes is done in sl_sal_first[] */ | |
1477 if (STRLEN(from) != STRLEN(to)) | |
1478 return SP_FORMERROR; | |
1479 | |
1480 for (i = 0; to[i] != NUL; ++i) | |
1481 lp->sl_sal_first[from[i]] = to[i]; | |
1482 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ | |
1483 } | |
1484 | |
1485 return 0; | |
1486 } | |
1487 | |
1488 /* | |
1489 * Fill the first-index table for "lp". | |
1490 */ | |
1491 static void | |
1492 set_sal_first(slang_T *lp) | |
1493 { | |
1494 salfirst_T *sfirst; | |
1495 int i; | |
1496 salitem_T *smp; | |
1497 int c; | |
1498 garray_T *gap = &lp->sl_sal; | |
1499 | |
1500 sfirst = lp->sl_sal_first; | |
1501 for (i = 0; i < 256; ++i) | |
1502 sfirst[i] = -1; | |
1503 smp = (salitem_T *)gap->ga_data; | |
1504 for (i = 0; i < gap->ga_len; ++i) | |
1505 { | |
1506 #ifdef FEAT_MBYTE | |
1507 if (has_mbyte) | |
1508 /* Use the lowest byte of the first character. For latin1 it's | |
1509 * the character, for other encodings it should differ for most | |
1510 * characters. */ | |
1511 c = *smp[i].sm_lead_w & 0xff; | |
1512 else | |
1513 #endif | |
1514 c = *smp[i].sm_lead; | |
1515 if (sfirst[c] == -1) | |
1516 { | |
1517 sfirst[c] = i; | |
1518 #ifdef FEAT_MBYTE | |
1519 if (has_mbyte) | |
1520 { | |
1521 int n; | |
1522 | |
1523 /* Make sure all entries with this byte are following each | |
1524 * other. Move the ones that are in the wrong position. Do | |
1525 * keep the same ordering! */ | |
1526 while (i + 1 < gap->ga_len | |
1527 && (*smp[i + 1].sm_lead_w & 0xff) == c) | |
1528 /* Skip over entry with same index byte. */ | |
1529 ++i; | |
1530 | |
1531 for (n = 1; i + n < gap->ga_len; ++n) | |
1532 if ((*smp[i + n].sm_lead_w & 0xff) == c) | |
1533 { | |
1534 salitem_T tsal; | |
1535 | |
1536 /* Move entry with same index byte after the entries | |
1537 * we already found. */ | |
1538 ++i; | |
1539 --n; | |
1540 tsal = smp[i + n]; | |
1541 mch_memmove(smp + i + 1, smp + i, | |
1542 sizeof(salitem_T) * n); | |
1543 smp[i] = tsal; | |
1544 } | |
1545 } | |
1546 #endif | |
1547 } | |
1548 } | |
1549 } | |
1550 | |
1551 #ifdef FEAT_MBYTE | |
1552 /* | |
1553 * Turn a multi-byte string into a wide character string. | |
1554 * Return it in allocated memory (NULL for out-of-memory) | |
1555 */ | |
1556 static int * | |
1557 mb_str2wide(char_u *s) | |
1558 { | |
1559 int *res; | |
1560 char_u *p; | |
1561 int i = 0; | |
1562 | |
1563 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); | |
1564 if (res != NULL) | |
1565 { | |
1566 for (p = s; *p != NUL; ) | |
1567 res[i++] = mb_ptr2char_adv(&p); | |
1568 res[i] = NUL; | |
1569 } | |
1570 return res; | |
1571 } | |
1572 #endif | |
1573 | |
1574 /* | |
1575 * Read a tree from the .spl or .sug file. | |
1576 * Allocates the memory and stores pointers in "bytsp" and "idxsp". | |
1577 * This is skipped when the tree has zero length. | |
1578 * Returns zero when OK, SP_ value for an error. | |
1579 */ | |
1580 static int | |
1581 spell_read_tree( | |
1582 FILE *fd, | |
1583 char_u **bytsp, | |
1584 idx_T **idxsp, | |
1585 int prefixtree, /* TRUE for the prefix tree */ | |
1586 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ | |
1587 { | |
1588 int len; | |
1589 int idx; | |
1590 char_u *bp; | |
1591 idx_T *ip; | |
1592 | |
1593 /* The tree size was computed when writing the file, so that we can | |
1594 * allocate it as one long block. <nodecount> */ | |
1595 len = get4c(fd); | |
1596 if (len < 0) | |
1597 return SP_TRUNCERROR; | |
1598 if (len > 0) | |
1599 { | |
1600 /* Allocate the byte array. */ | |
1601 bp = lalloc((long_u)len, TRUE); | |
1602 if (bp == NULL) | |
1603 return SP_OTHERERROR; | |
1604 *bytsp = bp; | |
1605 | |
1606 /* Allocate the index array. */ | |
1607 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); | |
1608 if (ip == NULL) | |
1609 return SP_OTHERERROR; | |
1610 *idxsp = ip; | |
1611 | |
1612 /* Recursively read the tree and store it in the array. */ | |
1613 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); | |
1614 if (idx < 0) | |
1615 return idx; | |
1616 } | |
1617 return 0; | |
1618 } | |
1619 | |
1620 /* | |
1621 * Read one row of siblings from the spell file and store it in the byte array | |
1622 * "byts" and index array "idxs". Recursively read the children. | |
1623 * | |
1624 * NOTE: The code here must match put_node()! | |
1625 * | |
1626 * Returns the index (>= 0) following the siblings. | |
1627 * Returns SP_TRUNCERROR if the file is shorter than expected. | |
1628 * Returns SP_FORMERROR if there is a format error. | |
1629 */ | |
1630 static idx_T | |
1631 read_tree_node( | |
1632 FILE *fd, | |
1633 char_u *byts, | |
1634 idx_T *idxs, | |
1635 int maxidx, /* size of arrays */ | |
1636 idx_T startidx, /* current index in "byts" and "idxs" */ | |
1637 int prefixtree, /* TRUE for reading PREFIXTREE */ | |
1638 int maxprefcondnr) /* maximum for <prefcondnr> */ | |
1639 { | |
1640 int len; | |
1641 int i; | |
1642 int n; | |
1643 idx_T idx = startidx; | |
1644 int c; | |
1645 int c2; | |
1646 #define SHARED_MASK 0x8000000 | |
1647 | |
1648 len = getc(fd); /* <siblingcount> */ | |
1649 if (len <= 0) | |
1650 return SP_TRUNCERROR; | |
1651 | |
1652 if (startidx + len >= maxidx) | |
1653 return SP_FORMERROR; | |
1654 byts[idx++] = len; | |
1655 | |
1656 /* Read the byte values, flag/region bytes and shared indexes. */ | |
1657 for (i = 1; i <= len; ++i) | |
1658 { | |
1659 c = getc(fd); /* <byte> */ | |
1660 if (c < 0) | |
1661 return SP_TRUNCERROR; | |
1662 if (c <= BY_SPECIAL) | |
1663 { | |
1664 if (c == BY_NOFLAGS && !prefixtree) | |
1665 { | |
1666 /* No flags, all regions. */ | |
1667 idxs[idx] = 0; | |
1668 c = 0; | |
1669 } | |
1670 else if (c != BY_INDEX) | |
1671 { | |
1672 if (prefixtree) | |
1673 { | |
1674 /* Read the optional pflags byte, the prefix ID and the | |
1675 * condition nr. In idxs[] store the prefix ID in the low | |
1676 * byte, the condition index shifted up 8 bits, the flags | |
1677 * shifted up 24 bits. */ | |
1678 if (c == BY_FLAGS) | |
1679 c = getc(fd) << 24; /* <pflags> */ | |
1680 else | |
1681 c = 0; | |
1682 | |
1683 c |= getc(fd); /* <affixID> */ | |
1684 | |
1685 n = get2c(fd); /* <prefcondnr> */ | |
1686 if (n >= maxprefcondnr) | |
1687 return SP_FORMERROR; | |
1688 c |= (n << 8); | |
1689 } | |
1690 else /* c must be BY_FLAGS or BY_FLAGS2 */ | |
1691 { | |
1692 /* Read flags and optional region and prefix ID. In | |
1693 * idxs[] the flags go in the low two bytes, region above | |
1694 * that and prefix ID above the region. */ | |
1695 c2 = c; | |
1696 c = getc(fd); /* <flags> */ | |
1697 if (c2 == BY_FLAGS2) | |
1698 c = (getc(fd) << 8) + c; /* <flags2> */ | |
1699 if (c & WF_REGION) | |
1700 c = (getc(fd) << 16) + c; /* <region> */ | |
1701 if (c & WF_AFX) | |
1702 c = (getc(fd) << 24) + c; /* <affixID> */ | |
1703 } | |
1704 | |
1705 idxs[idx] = c; | |
1706 c = 0; | |
1707 } | |
1708 else /* c == BY_INDEX */ | |
1709 { | |
1710 /* <nodeidx> */ | |
1711 n = get3c(fd); | |
1712 if (n < 0 || n >= maxidx) | |
1713 return SP_FORMERROR; | |
1714 idxs[idx] = n + SHARED_MASK; | |
1715 c = getc(fd); /* <xbyte> */ | |
1716 } | |
1717 } | |
1718 byts[idx++] = c; | |
1719 } | |
1720 | |
1721 /* Recursively read the children for non-shared siblings. | |
1722 * Skip the end-of-word ones (zero byte value) and the shared ones (and | |
1723 * remove SHARED_MASK) */ | |
1724 for (i = 1; i <= len; ++i) | |
1725 if (byts[startidx + i] != 0) | |
1726 { | |
1727 if (idxs[startidx + i] & SHARED_MASK) | |
1728 idxs[startidx + i] &= ~SHARED_MASK; | |
1729 else | |
1730 { | |
1731 idxs[startidx + i] = idx; | |
1732 idx = read_tree_node(fd, byts, idxs, maxidx, idx, | |
1733 prefixtree, maxprefcondnr); | |
1734 if (idx < 0) | |
1735 break; | |
1736 } | |
1737 } | |
1738 | |
1739 return idx; | |
1740 } | |
1741 | |
1742 /* | |
1743 * Reload the spell file "fname" if it's loaded. | |
1744 */ | |
1745 static void | |
1746 spell_reload_one( | |
1747 char_u *fname, | |
1748 int added_word) /* invoked through "zg" */ | |
1749 { | |
1750 slang_T *slang; | |
1751 int didit = FALSE; | |
1752 | |
1753 for (slang = first_lang; slang != NULL; slang = slang->sl_next) | |
1754 { | |
1755 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) | |
1756 { | |
1757 slang_clear(slang); | |
1758 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) | |
1759 /* reloading failed, clear the language */ | |
1760 slang_clear(slang); | |
1761 redraw_all_later(SOME_VALID); | |
1762 didit = TRUE; | |
1763 } | |
1764 } | |
1765 | |
1766 /* When "zg" was used and the file wasn't loaded yet, should redo | |
1767 * 'spelllang' to load it now. */ | |
1768 if (added_word && !didit) | |
1769 did_set_spelllang(curwin); | |
1770 } | |
1771 | |
1772 | |
1773 /* | |
1774 * Functions for ":mkspell". | |
1775 */ | |
1776 | |
1777 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff | |
1778 and .dic file. */ | |
1779 /* | |
1780 * Main structure to store the contents of a ".aff" file. | |
1781 */ | |
1782 typedef struct afffile_S | |
1783 { | |
1784 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ | |
1785 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ | |
1786 unsigned af_rare; /* RARE ID for rare word */ | |
1787 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ | |
1788 unsigned af_bad; /* BAD ID for banned word */ | |
1789 unsigned af_needaffix; /* NEEDAFFIX ID */ | |
1790 unsigned af_circumfix; /* CIRCUMFIX ID */ | |
1791 unsigned af_needcomp; /* NEEDCOMPOUND ID */ | |
1792 unsigned af_comproot; /* COMPOUNDROOT ID */ | |
1793 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ | |
1794 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ | |
1795 unsigned af_nosuggest; /* NOSUGGEST ID */ | |
1796 int af_pfxpostpone; /* postpone prefixes without chop string and | |
1797 without flags */ | |
1798 int af_ignoreextra; /* IGNOREEXTRA present */ | |
1799 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ | |
1800 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ | |
1801 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ | |
1802 } afffile_T; | |
1803 | |
1804 #define AFT_CHAR 0 /* flags are one character */ | |
1805 #define AFT_LONG 1 /* flags are two characters */ | |
1806 #define AFT_CAPLONG 2 /* flags are one or two characters */ | |
1807 #define AFT_NUM 3 /* flags are numbers, comma separated */ | |
1808 | |
1809 typedef struct affentry_S affentry_T; | |
1810 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ | |
1811 struct affentry_S | |
1812 { | |
1813 affentry_T *ae_next; /* next affix with same name/number */ | |
1814 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ | |
1815 char_u *ae_add; /* text to add to basic word (can be NULL) */ | |
1816 char_u *ae_flags; /* flags on the affix (can be NULL) */ | |
1817 char_u *ae_cond; /* condition (NULL for ".") */ | |
1818 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ | |
1819 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ | |
1820 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ | |
1821 }; | |
1822 | |
1823 #ifdef FEAT_MBYTE | |
1824 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ | |
1825 #else | |
1826 # define AH_KEY_LEN 7 /* 6 digits + NUL */ | |
1827 #endif | |
1828 | |
1829 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ | |
1830 typedef struct affheader_S | |
1831 { | |
1832 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ | |
1833 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ | |
1834 int ah_newID; /* prefix ID after renumbering; 0 if not used */ | |
1835 int ah_combine; /* suffix may combine with prefix */ | |
1836 int ah_follows; /* another affix block should be following */ | |
1837 affentry_T *ah_first; /* first affix entry */ | |
1838 } affheader_T; | |
1839 | |
1840 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) | |
1841 | |
1842 /* Flag used in compound items. */ | |
1843 typedef struct compitem_S | |
1844 { | |
1845 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ | |
1846 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ | |
1847 int ci_newID; /* affix ID after renumbering. */ | |
1848 } compitem_T; | |
1849 | |
1850 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) | |
1851 | |
1852 /* | |
1853 * Structure that is used to store the items in the word tree. This avoids | |
1854 * the need to keep track of each allocated thing, everything is freed all at | |
1855 * once after ":mkspell" is done. | |
1856 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of | |
1857 * "sb_data" is correct for systems where pointers must be aligned on | |
1858 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). | |
1859 */ | |
1860 #define SBLOCKSIZE 16000 /* size of sb_data */ | |
1861 typedef struct sblock_S sblock_T; | |
1862 struct sblock_S | |
1863 { | |
1864 int sb_used; /* nr of bytes already in use */ | |
1865 sblock_T *sb_next; /* next block in list */ | |
1866 char_u sb_data[1]; /* data, actually longer */ | |
1867 }; | |
1868 | |
1869 /* | |
1870 * A node in the tree. | |
1871 */ | |
1872 typedef struct wordnode_S wordnode_T; | |
1873 struct wordnode_S | |
1874 { | |
1875 union /* shared to save space */ | |
1876 { | |
1877 char_u hashkey[6]; /* the hash key, only used while compressing */ | |
1878 int index; /* index in written nodes (valid after first | |
1879 round) */ | |
1880 } wn_u1; | |
1881 union /* shared to save space */ | |
1882 { | |
1883 wordnode_T *next; /* next node with same hash key */ | |
1884 wordnode_T *wnode; /* parent node that will write this node */ | |
1885 } wn_u2; | |
1886 wordnode_T *wn_child; /* child (next byte in word) */ | |
1887 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, | |
1888 always sorted) */ | |
1889 int wn_refs; /* Nr. of references to this node. Only | |
1890 relevant for first node in a list of | |
1891 siblings, in following siblings it is | |
1892 always one. */ | |
1893 char_u wn_byte; /* Byte for this node. NUL for word end */ | |
1894 | |
1895 /* Info for when "wn_byte" is NUL. | |
1896 * In PREFIXTREE "wn_region" is used for the prefcondnr. | |
1897 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and | |
1898 * "wn_region" the LSW of the wordnr. */ | |
1899 char_u wn_affixID; /* supported/required prefix ID or 0 */ | |
1900 short_u wn_flags; /* WF_ flags */ | |
1901 short wn_region; /* region mask */ | |
1902 | |
1903 #ifdef SPELL_PRINTTREE | |
1904 int wn_nr; /* sequence nr for printing */ | |
1905 #endif | |
1906 }; | |
1907 | |
1908 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ | |
1909 | |
1910 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) | |
1911 | |
1912 /* | |
1913 * Info used while reading the spell files. | |
1914 */ | |
1915 typedef struct spellinfo_S | |
1916 { | |
1917 wordnode_T *si_foldroot; /* tree with case-folded words */ | |
1918 long si_foldwcount; /* nr of words in si_foldroot */ | |
1919 | |
1920 wordnode_T *si_keeproot; /* tree with keep-case words */ | |
1921 long si_keepwcount; /* nr of words in si_keeproot */ | |
1922 | |
1923 wordnode_T *si_prefroot; /* tree with postponed prefixes */ | |
1924 | |
1925 long si_sugtree; /* creating the soundfolding trie */ | |
1926 | |
1927 sblock_T *si_blocks; /* memory blocks used */ | |
1928 long si_blocks_cnt; /* memory blocks allocated */ | |
1929 int si_did_emsg; /* TRUE when ran out of memory */ | |
1930 | |
1931 long si_compress_cnt; /* words to add before lowering | |
1932 compression limit */ | |
1933 wordnode_T *si_first_free; /* List of nodes that have been freed during | |
1934 compression, linked by "wn_child" field. */ | |
1935 long si_free_count; /* number of nodes in si_first_free */ | |
1936 #ifdef SPELL_PRINTTREE | |
1937 int si_wordnode_nr; /* sequence nr for nodes */ | |
1938 #endif | |
1939 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ | |
1940 | |
1941 int si_ascii; /* handling only ASCII words */ | |
1942 int si_add; /* addition file */ | |
1943 int si_clear_chartab; /* when TRUE clear char tables */ | |
1944 int si_region; /* region mask */ | |
1945 vimconv_T si_conv; /* for conversion to 'encoding' */ | |
1946 int si_memtot; /* runtime memory used */ | |
1947 int si_verbose; /* verbose messages */ | |
1948 int si_msg_count; /* number of words added since last message */ | |
1949 char_u *si_info; /* info text chars or NULL */ | |
1950 int si_region_count; /* number of regions supported (1 when there | |
1951 are no regions) */ | |
1952 char_u si_region_name[17]; /* region names; used only if | |
1953 * si_region_count > 1) */ | |
1954 | |
1955 garray_T si_rep; /* list of fromto_T entries from REP lines */ | |
1956 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ | |
1957 garray_T si_sal; /* list of fromto_T entries from SAL lines */ | |
1958 char_u *si_sofofr; /* SOFOFROM text */ | |
1959 char_u *si_sofoto; /* SOFOTO text */ | |
1960 int si_nosugfile; /* NOSUGFILE item found */ | |
1961 int si_nosplitsugs; /* NOSPLITSUGS item found */ | |
1962 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ | |
1963 int si_followup; /* soundsalike: ? */ | |
1964 int si_collapse; /* soundsalike: ? */ | |
1965 hashtab_T si_commonwords; /* hashtable for common words */ | |
1966 time_t si_sugtime; /* timestamp for .sug file */ | |
1967 int si_rem_accents; /* soundsalike: remove accents */ | |
1968 garray_T si_map; /* MAP info concatenated */ | |
1969 char_u *si_midword; /* MIDWORD chars or NULL */ | |
1970 int si_compmax; /* max nr of words for compounding */ | |
1971 int si_compminlen; /* minimal length for compounding */ | |
1972 int si_compsylmax; /* max nr of syllables for compounding */ | |
1973 int si_compoptions; /* COMP_ flags */ | |
1974 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as | |
1975 a string */ | |
1976 char_u *si_compflags; /* flags used for compounding */ | |
1977 char_u si_nobreak; /* NOBREAK */ | |
1978 char_u *si_syllable; /* syllable string */ | |
1979 garray_T si_prefcond; /* table with conditions for postponed | |
1980 * prefixes, each stored as a string */ | |
1981 int si_newprefID; /* current value for ah_newID */ | |
1982 int si_newcompID; /* current value for compound ID */ | |
1983 } spellinfo_T; | |
1984 | |
1985 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname); | |
1986 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); | |
1987 static void aff_process_flags(afffile_T *affile, affentry_T *entry); | |
1988 static int spell_info_item(char_u *s); | |
1989 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); | |
1990 static unsigned get_affitem(int flagtype, char_u **pp); | |
1991 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); | |
1992 static void check_renumber(spellinfo_T *spin); | |
1993 static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag); | |
1994 static void aff_check_number(int spinval, int affval, char *name); | |
1995 static void aff_check_string(char_u *spinval, char_u *affval, char *name); | |
1996 static int str_equal(char_u *s1, char_u *s2); | |
1997 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); | |
1998 static int sal_to_bool(char_u *s); | |
1999 static void spell_free_aff(afffile_T *aff); | |
2000 static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile); | |
2001 static int get_affix_flags(afffile_T *affile, char_u *afflist); | |
2002 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); | |
2003 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); | |
2004 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); | |
2005 static int spell_read_wordfile(spellinfo_T *spin, char_u *fname); | |
2006 static void *getroom(spellinfo_T *spin, size_t len, int align); | |
2007 static char_u *getroom_save(spellinfo_T *spin, char_u *s); | |
2008 static void free_blocks(sblock_T *bl); | |
2009 static wordnode_T *wordtree_alloc(spellinfo_T *spin); | |
2010 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); | |
2011 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); | |
2012 static wordnode_T *get_wordnode(spellinfo_T *spin); | |
2013 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node); | |
2014 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); | |
2015 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); | |
2016 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); | |
2017 static int node_equal(wordnode_T *n1, wordnode_T *n2); | |
2018 static int write_vim_spell(spellinfo_T *spin, char_u *fname); | |
2019 static void clear_node(wordnode_T *node); | |
2020 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); | |
2021 static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname); | |
2022 static int sug_filltree(spellinfo_T *spin, slang_T *slang); | |
2023 static int sug_maketable(spellinfo_T *spin); | |
2024 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); | |
2025 static int offset2bytes(int nr, char_u *buf); | |
2026 static void sug_write(spellinfo_T *spin, char_u *fname); | |
2027 static void spell_message(spellinfo_T *spin, char_u *str); | |
2028 static void init_spellfile(void); | |
2029 | |
2030 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, | |
2031 * but it must be negative to indicate the prefix tree to tree_add_word(). | |
2032 * Use a negative number with the lower 8 bits zero. */ | |
2033 #define PFX_FLAGS -256 | |
2034 | |
2035 /* flags for "condit" argument of store_aff_word() */ | |
2036 #define CONDIT_COMB 1 /* affix must combine */ | |
2037 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ | |
2038 #define CONDIT_SUF 4 /* add a suffix for matching flags */ | |
2039 #define CONDIT_AFF 8 /* word already has an affix */ | |
2040 | |
2041 /* | |
2042 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. | |
2043 */ | |
2044 static long compress_start = 30000; /* memory / SBLOCKSIZE */ | |
2045 static long compress_inc = 100; /* memory / SBLOCKSIZE */ | |
2046 static long compress_added = 500000; /* word count */ | |
2047 | |
2048 /* | |
2049 * Check the 'mkspellmem' option. Return FAIL if it's wrong. | |
2050 * Sets "sps_flags". | |
2051 */ | |
2052 int | |
2053 spell_check_msm(void) | |
2054 { | |
2055 char_u *p = p_msm; | |
2056 long start = 0; | |
2057 long incr = 0; | |
2058 long added = 0; | |
2059 | |
2060 if (!VIM_ISDIGIT(*p)) | |
2061 return FAIL; | |
2062 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ | |
2063 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); | |
2064 if (*p != ',') | |
2065 return FAIL; | |
2066 ++p; | |
2067 if (!VIM_ISDIGIT(*p)) | |
2068 return FAIL; | |
2069 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); | |
2070 if (*p != ',') | |
2071 return FAIL; | |
2072 ++p; | |
2073 if (!VIM_ISDIGIT(*p)) | |
2074 return FAIL; | |
2075 added = getdigits(&p) * 1024; | |
2076 if (*p != NUL) | |
2077 return FAIL; | |
2078 | |
2079 if (start == 0 || incr == 0 || added == 0 || incr > start) | |
2080 return FAIL; | |
2081 | |
2082 compress_start = start; | |
2083 compress_inc = incr; | |
2084 compress_added = added; | |
2085 return OK; | |
2086 } | |
2087 | |
2088 #ifdef SPELL_PRINTTREE | |
2089 /* | |
2090 * For debugging the tree code: print the current tree in a (more or less) | |
2091 * readable format, so that we can see what happens when adding a word and/or | |
2092 * compressing the tree. | |
2093 * Based on code from Olaf Seibert. | |
2094 */ | |
2095 #define PRINTLINESIZE 1000 | |
2096 #define PRINTWIDTH 6 | |
2097 | |
2098 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ | |
2099 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) | |
2100 | |
2101 static char line1[PRINTLINESIZE]; | |
2102 static char line2[PRINTLINESIZE]; | |
2103 static char line3[PRINTLINESIZE]; | |
2104 | |
2105 static void | |
2106 spell_clear_flags(wordnode_T *node) | |
2107 { | |
2108 wordnode_T *np; | |
2109 | |
2110 for (np = node; np != NULL; np = np->wn_sibling) | |
2111 { | |
2112 np->wn_u1.index = FALSE; | |
2113 spell_clear_flags(np->wn_child); | |
2114 } | |
2115 } | |
2116 | |
2117 static void | |
2118 spell_print_node(wordnode_T *node, int depth) | |
2119 { | |
2120 if (node->wn_u1.index) | |
2121 { | |
2122 /* Done this node before, print the reference. */ | |
2123 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); | |
2124 PRINTSOME(line2, depth, " ", 0, 0); | |
2125 PRINTSOME(line3, depth, " ", 0, 0); | |
2126 msg((char_u *)line1); | |
2127 msg((char_u *)line2); | |
2128 msg((char_u *)line3); | |
2129 } | |
2130 else | |
2131 { | |
2132 node->wn_u1.index = TRUE; | |
2133 | |
2134 if (node->wn_byte != NUL) | |
2135 { | |
2136 if (node->wn_child != NULL) | |
2137 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); | |
2138 else | |
2139 /* Cannot happen? */ | |
2140 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); | |
2141 } | |
2142 else | |
2143 PRINTSOME(line1, depth, " $ ", 0, 0); | |
2144 | |
2145 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); | |
2146 | |
2147 if (node->wn_sibling != NULL) | |
2148 PRINTSOME(line3, depth, " | ", 0, 0); | |
2149 else | |
2150 PRINTSOME(line3, depth, " ", 0, 0); | |
2151 | |
2152 if (node->wn_byte == NUL) | |
2153 { | |
2154 msg((char_u *)line1); | |
2155 msg((char_u *)line2); | |
2156 msg((char_u *)line3); | |
2157 } | |
2158 | |
2159 /* do the children */ | |
2160 if (node->wn_byte != NUL && node->wn_child != NULL) | |
2161 spell_print_node(node->wn_child, depth + 1); | |
2162 | |
2163 /* do the siblings */ | |
2164 if (node->wn_sibling != NULL) | |
2165 { | |
2166 /* get rid of all parent details except | */ | |
2167 STRCPY(line1, line3); | |
2168 STRCPY(line2, line3); | |
2169 spell_print_node(node->wn_sibling, depth); | |
2170 } | |
2171 } | |
2172 } | |
2173 | |
2174 static void | |
2175 spell_print_tree(wordnode_T *root) | |
2176 { | |
2177 if (root != NULL) | |
2178 { | |
2179 /* Clear the "wn_u1.index" fields, used to remember what has been | |
2180 * done. */ | |
2181 spell_clear_flags(root); | |
2182 | |
2183 /* Recursively print the tree. */ | |
2184 spell_print_node(root, 0); | |
2185 } | |
2186 } | |
2187 #endif /* SPELL_PRINTTREE */ | |
2188 | |
2189 /* | |
2190 * Read the affix file "fname". | |
2191 * Returns an afffile_T, NULL for complete failure. | |
2192 */ | |
2193 static afffile_T * | |
2194 spell_read_aff(spellinfo_T *spin, char_u *fname) | |
2195 { | |
2196 FILE *fd; | |
2197 afffile_T *aff; | |
2198 char_u rline[MAXLINELEN]; | |
2199 char_u *line; | |
2200 char_u *pc = NULL; | |
2201 #define MAXITEMCNT 30 | |
2202 char_u *(items[MAXITEMCNT]); | |
2203 int itemcnt; | |
2204 char_u *p; | |
2205 int lnum = 0; | |
2206 affheader_T *cur_aff = NULL; | |
2207 int did_postpone_prefix = FALSE; | |
2208 int aff_todo = 0; | |
2209 hashtab_T *tp; | |
2210 char_u *low = NULL; | |
2211 char_u *fol = NULL; | |
2212 char_u *upp = NULL; | |
2213 int do_rep; | |
2214 int do_repsal; | |
2215 int do_sal; | |
2216 int do_mapline; | |
2217 int found_map = FALSE; | |
2218 hashitem_T *hi; | |
2219 int l; | |
2220 int compminlen = 0; /* COMPOUNDMIN value */ | |
2221 int compsylmax = 0; /* COMPOUNDSYLMAX value */ | |
2222 int compoptions = 0; /* COMP_ flags */ | |
2223 int compmax = 0; /* COMPOUNDWORDMAX value */ | |
2224 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE | |
2225 concatenated */ | |
2226 char_u *midword = NULL; /* MIDWORD value */ | |
2227 char_u *syllable = NULL; /* SYLLABLE value */ | |
2228 char_u *sofofrom = NULL; /* SOFOFROM value */ | |
2229 char_u *sofoto = NULL; /* SOFOTO value */ | |
2230 | |
2231 /* | |
2232 * Open the file. | |
2233 */ | |
2234 fd = mch_fopen((char *)fname, "r"); | |
2235 if (fd == NULL) | |
2236 { | |
2237 EMSG2(_(e_notopen), fname); | |
2238 return NULL; | |
2239 } | |
2240 | |
2241 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); | |
2242 spell_message(spin, IObuff); | |
2243 | |
2244 /* Only do REP lines when not done in another .aff file already. */ | |
2245 do_rep = spin->si_rep.ga_len == 0; | |
2246 | |
2247 /* Only do REPSAL lines when not done in another .aff file already. */ | |
2248 do_repsal = spin->si_repsal.ga_len == 0; | |
2249 | |
2250 /* Only do SAL lines when not done in another .aff file already. */ | |
2251 do_sal = spin->si_sal.ga_len == 0; | |
2252 | |
2253 /* Only do MAP lines when not done in another .aff file already. */ | |
2254 do_mapline = spin->si_map.ga_len == 0; | |
2255 | |
2256 /* | |
2257 * Allocate and init the afffile_T structure. | |
2258 */ | |
2259 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); | |
2260 if (aff == NULL) | |
2261 { | |
2262 fclose(fd); | |
2263 return NULL; | |
2264 } | |
2265 hash_init(&aff->af_pref); | |
2266 hash_init(&aff->af_suff); | |
2267 hash_init(&aff->af_comp); | |
2268 | |
2269 /* | |
2270 * Read all the lines in the file one by one. | |
2271 */ | |
2272 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) | |
2273 { | |
2274 line_breakcheck(); | |
2275 ++lnum; | |
2276 | |
2277 /* Skip comment lines. */ | |
2278 if (*rline == '#') | |
2279 continue; | |
2280 | |
2281 /* Convert from "SET" to 'encoding' when needed. */ | |
2282 vim_free(pc); | |
2283 #ifdef FEAT_MBYTE | |
2284 if (spin->si_conv.vc_type != CONV_NONE) | |
2285 { | |
2286 pc = string_convert(&spin->si_conv, rline, NULL); | |
2287 if (pc == NULL) | |
2288 { | |
2289 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
2290 fname, lnum, rline); | |
2291 continue; | |
2292 } | |
2293 line = pc; | |
2294 } | |
2295 else | |
2296 #endif | |
2297 { | |
2298 pc = NULL; | |
2299 line = rline; | |
2300 } | |
2301 | |
2302 /* Split the line up in white separated items. Put a NUL after each | |
2303 * item. */ | |
2304 itemcnt = 0; | |
2305 for (p = line; ; ) | |
2306 { | |
2307 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ | |
2308 ++p; | |
2309 if (*p == NUL) | |
2310 break; | |
2311 if (itemcnt == MAXITEMCNT) /* too many items */ | |
2312 break; | |
2313 items[itemcnt++] = p; | |
2314 /* A few items have arbitrary text argument, don't split them. */ | |
2315 if (itemcnt == 2 && spell_info_item(items[0])) | |
2316 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ | |
2317 ++p; | |
2318 else | |
2319 while (*p > ' ') /* skip until white space or CR/NL */ | |
2320 ++p; | |
2321 if (*p == NUL) | |
2322 break; | |
2323 *p++ = NUL; | |
2324 } | |
2325 | |
2326 /* Handle non-empty lines. */ | |
2327 if (itemcnt > 0) | |
2328 { | |
2329 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) | |
2330 { | |
2331 #ifdef FEAT_MBYTE | |
2332 /* Setup for conversion from "ENC" to 'encoding'. */ | |
2333 aff->af_enc = enc_canonize(items[1]); | |
2334 if (aff->af_enc != NULL && !spin->si_ascii | |
2335 && convert_setup(&spin->si_conv, aff->af_enc, | |
2336 p_enc) == FAIL) | |
2337 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | |
2338 fname, aff->af_enc, p_enc); | |
2339 spin->si_conv.vc_fail = TRUE; | |
2340 #else | |
2341 smsg((char_u *)_("Conversion in %s not supported"), fname); | |
2342 #endif | |
2343 } | |
2344 else if (is_aff_rule(items, itemcnt, "FLAG", 2) | |
2345 && aff->af_flagtype == AFT_CHAR) | |
2346 { | |
2347 if (STRCMP(items[1], "long") == 0) | |
2348 aff->af_flagtype = AFT_LONG; | |
2349 else if (STRCMP(items[1], "num") == 0) | |
2350 aff->af_flagtype = AFT_NUM; | |
2351 else if (STRCMP(items[1], "caplong") == 0) | |
2352 aff->af_flagtype = AFT_CAPLONG; | |
2353 else | |
2354 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), | |
2355 fname, lnum, items[1]); | |
2356 if (aff->af_rare != 0 | |
2357 || aff->af_keepcase != 0 | |
2358 || aff->af_bad != 0 | |
2359 || aff->af_needaffix != 0 | |
2360 || aff->af_circumfix != 0 | |
2361 || aff->af_needcomp != 0 | |
2362 || aff->af_comproot != 0 | |
2363 || aff->af_nosuggest != 0 | |
2364 || compflags != NULL | |
2365 || aff->af_suff.ht_used > 0 | |
2366 || aff->af_pref.ht_used > 0) | |
2367 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), | |
2368 fname, lnum, items[1]); | |
2369 } | |
2370 else if (spell_info_item(items[0])) | |
2371 { | |
2372 p = (char_u *)getroom(spin, | |
2373 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) | |
2374 + STRLEN(items[0]) | |
2375 + STRLEN(items[1]) + 3, FALSE); | |
2376 if (p != NULL) | |
2377 { | |
2378 if (spin->si_info != NULL) | |
2379 { | |
2380 STRCPY(p, spin->si_info); | |
2381 STRCAT(p, "\n"); | |
2382 } | |
2383 STRCAT(p, items[0]); | |
2384 STRCAT(p, " "); | |
2385 STRCAT(p, items[1]); | |
2386 spin->si_info = p; | |
2387 } | |
2388 } | |
2389 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) | |
2390 && midword == NULL) | |
2391 { | |
2392 midword = getroom_save(spin, items[1]); | |
2393 } | |
2394 else if (is_aff_rule(items, itemcnt, "TRY", 2)) | |
2395 { | |
2396 /* ignored, we look in the tree for what chars may appear */ | |
2397 } | |
2398 /* TODO: remove "RAR" later */ | |
2399 else if ((is_aff_rule(items, itemcnt, "RAR", 2) | |
2400 || is_aff_rule(items, itemcnt, "RARE", 2)) | |
2401 && aff->af_rare == 0) | |
2402 { | |
2403 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], | |
2404 fname, lnum); | |
2405 } | |
2406 /* TODO: remove "KEP" later */ | |
2407 else if ((is_aff_rule(items, itemcnt, "KEP", 2) | |
2408 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) | |
2409 && aff->af_keepcase == 0) | |
2410 { | |
2411 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], | |
2412 fname, lnum); | |
2413 } | |
2414 else if ((is_aff_rule(items, itemcnt, "BAD", 2) | |
2415 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) | |
2416 && aff->af_bad == 0) | |
2417 { | |
2418 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], | |
2419 fname, lnum); | |
2420 } | |
2421 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) | |
2422 && aff->af_needaffix == 0) | |
2423 { | |
2424 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], | |
2425 fname, lnum); | |
2426 } | |
2427 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) | |
2428 && aff->af_circumfix == 0) | |
2429 { | |
2430 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], | |
2431 fname, lnum); | |
2432 } | |
2433 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) | |
2434 && aff->af_nosuggest == 0) | |
2435 { | |
2436 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], | |
2437 fname, lnum); | |
2438 } | |
2439 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) | |
2440 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) | |
2441 && aff->af_needcomp == 0) | |
2442 { | |
2443 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], | |
2444 fname, lnum); | |
2445 } | |
2446 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) | |
2447 && aff->af_comproot == 0) | |
2448 { | |
2449 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], | |
2450 fname, lnum); | |
2451 } | |
2452 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) | |
2453 && aff->af_compforbid == 0) | |
2454 { | |
2455 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], | |
2456 fname, lnum); | |
2457 if (aff->af_pref.ht_used > 0) | |
2458 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), | |
2459 fname, lnum); | |
2460 } | |
2461 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) | |
2462 && aff->af_comppermit == 0) | |
2463 { | |
2464 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], | |
2465 fname, lnum); | |
2466 if (aff->af_pref.ht_used > 0) | |
2467 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), | |
2468 fname, lnum); | |
2469 } | |
2470 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) | |
2471 && compflags == NULL) | |
2472 { | |
2473 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", | |
2474 * "Na" into "Na+", "1234" into "1234+". */ | |
2475 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); | |
2476 if (p != NULL) | |
2477 { | |
2478 STRCPY(p, items[1]); | |
2479 STRCAT(p, "+"); | |
2480 compflags = p; | |
2481 } | |
2482 } | |
2483 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) | |
2484 { | |
2485 /* We don't use the count, but do check that it's a number and | |
2486 * not COMPOUNDRULE mistyped. */ | |
2487 if (atoi((char *)items[1]) == 0) | |
2488 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"), | |
2489 fname, lnum, items[1]); | |
2490 } | |
2491 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) | |
2492 { | |
2493 /* Don't use the first rule if it is a number. */ | |
2494 if (compflags != NULL || *skipdigits(items[1]) != NUL) | |
2495 { | |
2496 /* Concatenate this string to previously defined ones, | |
2497 * using a slash to separate them. */ | |
2498 l = (int)STRLEN(items[1]) + 1; | |
2499 if (compflags != NULL) | |
2500 l += (int)STRLEN(compflags) + 1; | |
2501 p = getroom(spin, l, FALSE); | |
2502 if (p != NULL) | |
2503 { | |
2504 if (compflags != NULL) | |
2505 { | |
2506 STRCPY(p, compflags); | |
2507 STRCAT(p, "/"); | |
2508 } | |
2509 STRCAT(p, items[1]); | |
2510 compflags = p; | |
2511 } | |
2512 } | |
2513 } | |
2514 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) | |
2515 && compmax == 0) | |
2516 { | |
2517 compmax = atoi((char *)items[1]); | |
2518 if (compmax == 0) | |
2519 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), | |
2520 fname, lnum, items[1]); | |
2521 } | |
2522 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) | |
2523 && compminlen == 0) | |
2524 { | |
2525 compminlen = atoi((char *)items[1]); | |
2526 if (compminlen == 0) | |
2527 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), | |
2528 fname, lnum, items[1]); | |
2529 } | |
2530 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) | |
2531 && compsylmax == 0) | |
2532 { | |
2533 compsylmax = atoi((char *)items[1]); | |
2534 if (compsylmax == 0) | |
2535 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), | |
2536 fname, lnum, items[1]); | |
2537 } | |
2538 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) | |
2539 { | |
2540 compoptions |= COMP_CHECKDUP; | |
2541 } | |
2542 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) | |
2543 { | |
2544 compoptions |= COMP_CHECKREP; | |
2545 } | |
2546 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) | |
2547 { | |
2548 compoptions |= COMP_CHECKCASE; | |
2549 } | |
2550 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) | |
2551 { | |
2552 compoptions |= COMP_CHECKTRIPLE; | |
2553 } | |
2554 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) | |
2555 { | |
2556 if (atoi((char *)items[1]) == 0) | |
2557 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), | |
2558 fname, lnum, items[1]); | |
2559 } | |
2560 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) | |
2561 { | |
2562 garray_T *gap = &spin->si_comppat; | |
2563 int i; | |
2564 | |
2565 /* Only add the couple if it isn't already there. */ | |
2566 for (i = 0; i < gap->ga_len - 1; i += 2) | |
2567 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 | |
2568 && STRCMP(((char_u **)(gap->ga_data))[i + 1], | |
2569 items[2]) == 0) | |
2570 break; | |
2571 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) | |
2572 { | |
2573 ((char_u **)(gap->ga_data))[gap->ga_len++] | |
2574 = getroom_save(spin, items[1]); | |
2575 ((char_u **)(gap->ga_data))[gap->ga_len++] | |
2576 = getroom_save(spin, items[2]); | |
2577 } | |
2578 } | |
2579 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) | |
2580 && syllable == NULL) | |
2581 { | |
2582 syllable = getroom_save(spin, items[1]); | |
2583 } | |
2584 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) | |
2585 { | |
2586 spin->si_nobreak = TRUE; | |
2587 } | |
2588 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) | |
2589 { | |
2590 spin->si_nosplitsugs = TRUE; | |
2591 } | |
2592 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) | |
2593 { | |
2594 spin->si_nocompoundsugs = TRUE; | |
2595 } | |
2596 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) | |
2597 { | |
2598 spin->si_nosugfile = TRUE; | |
2599 } | |
2600 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) | |
2601 { | |
2602 aff->af_pfxpostpone = TRUE; | |
2603 } | |
2604 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) | |
2605 { | |
2606 aff->af_ignoreextra = TRUE; | |
2607 } | |
2608 else if ((STRCMP(items[0], "PFX") == 0 | |
2609 || STRCMP(items[0], "SFX") == 0) | |
2610 && aff_todo == 0 | |
2611 && itemcnt >= 4) | |
2612 { | |
2613 int lasti = 4; | |
2614 char_u key[AH_KEY_LEN]; | |
2615 | |
2616 if (*items[0] == 'P') | |
2617 tp = &aff->af_pref; | |
2618 else | |
2619 tp = &aff->af_suff; | |
2620 | |
2621 /* Myspell allows the same affix name to be used multiple | |
2622 * times. The affix files that do this have an undocumented | |
2623 * "S" flag on all but the last block, thus we check for that | |
2624 * and store it in ah_follows. */ | |
2625 vim_strncpy(key, items[1], AH_KEY_LEN - 1); | |
2626 hi = hash_find(tp, key); | |
2627 if (!HASHITEM_EMPTY(hi)) | |
2628 { | |
2629 cur_aff = HI2AH(hi); | |
2630 if (cur_aff->ah_combine != (*items[2] == 'Y')) | |
2631 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), | |
2632 fname, lnum, items[1]); | |
2633 if (!cur_aff->ah_follows) | |
2634 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), | |
2635 fname, lnum, items[1]); | |
2636 } | |
2637 else | |
2638 { | |
2639 /* New affix letter. */ | |
2640 cur_aff = (affheader_T *)getroom(spin, | |
2641 sizeof(affheader_T), TRUE); | |
2642 if (cur_aff == NULL) | |
2643 break; | |
2644 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], | |
2645 fname, lnum); | |
2646 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) | |
2647 break; | |
2648 if (cur_aff->ah_flag == aff->af_bad | |
2649 || cur_aff->ah_flag == aff->af_rare | |
2650 || cur_aff->ah_flag == aff->af_keepcase | |
2651 || cur_aff->ah_flag == aff->af_needaffix | |
2652 || cur_aff->ah_flag == aff->af_circumfix | |
2653 || cur_aff->ah_flag == aff->af_nosuggest | |
2654 || cur_aff->ah_flag == aff->af_needcomp | |
2655 || cur_aff->ah_flag == aff->af_comproot) | |
2656 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), | |
2657 fname, lnum, items[1]); | |
2658 STRCPY(cur_aff->ah_key, items[1]); | |
2659 hash_add(tp, cur_aff->ah_key); | |
2660 | |
2661 cur_aff->ah_combine = (*items[2] == 'Y'); | |
2662 } | |
2663 | |
2664 /* Check for the "S" flag, which apparently means that another | |
2665 * block with the same affix name is following. */ | |
2666 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) | |
2667 { | |
2668 ++lasti; | |
2669 cur_aff->ah_follows = TRUE; | |
2670 } | |
2671 else | |
2672 cur_aff->ah_follows = FALSE; | |
2673 | |
2674 /* Myspell allows extra text after the item, but that might | |
2675 * mean mistakes go unnoticed. Require a comment-starter. */ | |
2676 if (itemcnt > lasti && *items[lasti] != '#') | |
2677 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); | |
2678 | |
2679 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) | |
2680 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), | |
2681 fname, lnum, items[2]); | |
2682 | |
2683 if (*items[0] == 'P' && aff->af_pfxpostpone) | |
2684 { | |
2685 if (cur_aff->ah_newID == 0) | |
2686 { | |
2687 /* Use a new number in the .spl file later, to be able | |
2688 * to handle multiple .aff files. */ | |
2689 check_renumber(spin); | |
2690 cur_aff->ah_newID = ++spin->si_newprefID; | |
2691 | |
2692 /* We only really use ah_newID if the prefix is | |
2693 * postponed. We know that only after handling all | |
2694 * the items. */ | |
2695 did_postpone_prefix = FALSE; | |
2696 } | |
2697 else | |
2698 /* Did use the ID in a previous block. */ | |
2699 did_postpone_prefix = TRUE; | |
2700 } | |
2701 | |
2702 aff_todo = atoi((char *)items[3]); | |
2703 } | |
2704 else if ((STRCMP(items[0], "PFX") == 0 | |
2705 || STRCMP(items[0], "SFX") == 0) | |
2706 && aff_todo > 0 | |
2707 && STRCMP(cur_aff->ah_key, items[1]) == 0 | |
2708 && itemcnt >= 5) | |
2709 { | |
2710 affentry_T *aff_entry; | |
2711 int upper = FALSE; | |
2712 int lasti = 5; | |
2713 | |
2714 /* Myspell allows extra text after the item, but that might | |
2715 * mean mistakes go unnoticed. Require a comment-starter, | |
2716 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ | |
2717 if (itemcnt > lasti | |
2718 && !aff->af_ignoreextra | |
2719 && *items[lasti] != '#' | |
2720 && (STRCMP(items[lasti], "-") != 0 | |
2721 || itemcnt != lasti + 1)) | |
2722 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); | |
2723 | |
2724 /* New item for an affix letter. */ | |
2725 --aff_todo; | |
2726 aff_entry = (affentry_T *)getroom(spin, | |
2727 sizeof(affentry_T), TRUE); | |
2728 if (aff_entry == NULL) | |
2729 break; | |
2730 | |
2731 if (STRCMP(items[2], "0") != 0) | |
2732 aff_entry->ae_chop = getroom_save(spin, items[2]); | |
2733 if (STRCMP(items[3], "0") != 0) | |
2734 { | |
2735 aff_entry->ae_add = getroom_save(spin, items[3]); | |
2736 | |
2737 /* Recognize flags on the affix: abcd/XYZ */ | |
2738 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); | |
2739 if (aff_entry->ae_flags != NULL) | |
2740 { | |
2741 *aff_entry->ae_flags++ = NUL; | |
2742 aff_process_flags(aff, aff_entry); | |
2743 } | |
2744 } | |
2745 | |
2746 /* Don't use an affix entry with non-ASCII characters when | |
2747 * "spin->si_ascii" is TRUE. */ | |
2748 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) | |
2749 || has_non_ascii(aff_entry->ae_add))) | |
2750 { | |
2751 aff_entry->ae_next = cur_aff->ah_first; | |
2752 cur_aff->ah_first = aff_entry; | |
2753 | |
2754 if (STRCMP(items[4], ".") != 0) | |
2755 { | |
2756 char_u buf[MAXLINELEN]; | |
2757 | |
2758 aff_entry->ae_cond = getroom_save(spin, items[4]); | |
2759 if (*items[0] == 'P') | |
2760 sprintf((char *)buf, "^%s", items[4]); | |
2761 else | |
2762 sprintf((char *)buf, "%s$", items[4]); | |
2763 aff_entry->ae_prog = vim_regcomp(buf, | |
2764 RE_MAGIC + RE_STRING + RE_STRICT); | |
2765 if (aff_entry->ae_prog == NULL) | |
2766 smsg((char_u *)_("Broken condition in %s line %d: %s"), | |
2767 fname, lnum, items[4]); | |
2768 } | |
2769 | |
2770 /* For postponed prefixes we need an entry in si_prefcond | |
2771 * for the condition. Use an existing one if possible. | |
2772 * Can't be done for an affix with flags, ignoring | |
2773 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ | |
2774 if (*items[0] == 'P' && aff->af_pfxpostpone | |
2775 && aff_entry->ae_flags == NULL) | |
2776 { | |
2777 /* When the chop string is one lower-case letter and | |
2778 * the add string ends in the upper-case letter we set | |
2779 * the "upper" flag, clear "ae_chop" and remove the | |
2780 * letters from "ae_add". The condition must either | |
2781 * be empty or start with the same letter. */ | |
2782 if (aff_entry->ae_chop != NULL | |
2783 && aff_entry->ae_add != NULL | |
2784 #ifdef FEAT_MBYTE | |
2785 && aff_entry->ae_chop[(*mb_ptr2len)( | |
2786 aff_entry->ae_chop)] == NUL | |
2787 #else | |
2788 && aff_entry->ae_chop[1] == NUL | |
2789 #endif | |
2790 ) | |
2791 { | |
2792 int c, c_up; | |
2793 | |
2794 c = PTR2CHAR(aff_entry->ae_chop); | |
2795 c_up = SPELL_TOUPPER(c); | |
2796 if (c_up != c | |
2797 && (aff_entry->ae_cond == NULL | |
2798 || PTR2CHAR(aff_entry->ae_cond) == c)) | |
2799 { | |
2800 p = aff_entry->ae_add | |
2801 + STRLEN(aff_entry->ae_add); | |
2802 mb_ptr_back(aff_entry->ae_add, p); | |
2803 if (PTR2CHAR(p) == c_up) | |
2804 { | |
2805 upper = TRUE; | |
2806 aff_entry->ae_chop = NULL; | |
2807 *p = NUL; | |
2808 | |
2809 /* The condition is matched with the | |
2810 * actual word, thus must check for the | |
2811 * upper-case letter. */ | |
2812 if (aff_entry->ae_cond != NULL) | |
2813 { | |
2814 char_u buf[MAXLINELEN]; | |
2815 #ifdef FEAT_MBYTE | |
2816 if (has_mbyte) | |
2817 { | |
2818 onecap_copy(items[4], buf, TRUE); | |
2819 aff_entry->ae_cond = getroom_save( | |
2820 spin, buf); | |
2821 } | |
2822 else | |
2823 #endif | |
2824 *aff_entry->ae_cond = c_up; | |
2825 if (aff_entry->ae_cond != NULL) | |
2826 { | |
2827 sprintf((char *)buf, "^%s", | |
2828 aff_entry->ae_cond); | |
2829 vim_regfree(aff_entry->ae_prog); | |
2830 aff_entry->ae_prog = vim_regcomp( | |
2831 buf, RE_MAGIC + RE_STRING); | |
2832 } | |
2833 } | |
2834 } | |
2835 } | |
2836 } | |
2837 | |
2838 if (aff_entry->ae_chop == NULL | |
2839 && aff_entry->ae_flags == NULL) | |
2840 { | |
2841 int idx; | |
2842 char_u **pp; | |
2843 int n; | |
2844 | |
2845 /* Find a previously used condition. */ | |
2846 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; | |
2847 --idx) | |
2848 { | |
2849 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; | |
2850 if (str_equal(p, aff_entry->ae_cond)) | |
2851 break; | |
2852 } | |
2853 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) | |
2854 { | |
2855 /* Not found, add a new condition. */ | |
2856 idx = spin->si_prefcond.ga_len++; | |
2857 pp = ((char_u **)spin->si_prefcond.ga_data) | |
2858 + idx; | |
2859 if (aff_entry->ae_cond == NULL) | |
2860 *pp = NULL; | |
2861 else | |
2862 *pp = getroom_save(spin, | |
2863 aff_entry->ae_cond); | |
2864 } | |
2865 | |
2866 /* Add the prefix to the prefix tree. */ | |
2867 if (aff_entry->ae_add == NULL) | |
2868 p = (char_u *)""; | |
2869 else | |
2870 p = aff_entry->ae_add; | |
2871 | |
2872 /* PFX_FLAGS is a negative number, so that | |
2873 * tree_add_word() knows this is the prefix tree. */ | |
2874 n = PFX_FLAGS; | |
2875 if (!cur_aff->ah_combine) | |
2876 n |= WFP_NC; | |
2877 if (upper) | |
2878 n |= WFP_UP; | |
2879 if (aff_entry->ae_comppermit) | |
2880 n |= WFP_COMPPERMIT; | |
2881 if (aff_entry->ae_compforbid) | |
2882 n |= WFP_COMPFORBID; | |
2883 tree_add_word(spin, p, spin->si_prefroot, n, | |
2884 idx, cur_aff->ah_newID); | |
2885 did_postpone_prefix = TRUE; | |
2886 } | |
2887 | |
2888 /* Didn't actually use ah_newID, backup si_newprefID. */ | |
2889 if (aff_todo == 0 && !did_postpone_prefix) | |
2890 { | |
2891 --spin->si_newprefID; | |
2892 cur_aff->ah_newID = 0; | |
2893 } | |
2894 } | |
2895 } | |
2896 } | |
2897 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) | |
2898 { | |
2899 fol = vim_strsave(items[1]); | |
2900 } | |
2901 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) | |
2902 { | |
2903 low = vim_strsave(items[1]); | |
2904 } | |
2905 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) | |
2906 { | |
2907 upp = vim_strsave(items[1]); | |
2908 } | |
2909 else if (is_aff_rule(items, itemcnt, "REP", 2) | |
2910 || is_aff_rule(items, itemcnt, "REPSAL", 2)) | |
2911 { | |
2912 /* Ignore REP/REPSAL count */; | |
2913 if (!isdigit(*items[1])) | |
2914 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), | |
2915 fname, lnum); | |
2916 } | |
2917 else if ((STRCMP(items[0], "REP") == 0 | |
2918 || STRCMP(items[0], "REPSAL") == 0) | |
2919 && itemcnt >= 3) | |
2920 { | |
2921 /* REP/REPSAL item */ | |
2922 /* Myspell ignores extra arguments, we require it starts with | |
2923 * # to detect mistakes. */ | |
2924 if (itemcnt > 3 && items[3][0] != '#') | |
2925 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); | |
2926 if (items[0][3] == 'S' ? do_repsal : do_rep) | |
2927 { | |
2928 /* Replace underscore with space (can't include a space | |
2929 * directly). */ | |
2930 for (p = items[1]; *p != NUL; mb_ptr_adv(p)) | |
2931 if (*p == '_') | |
2932 *p = ' '; | |
2933 for (p = items[2]; *p != NUL; mb_ptr_adv(p)) | |
2934 if (*p == '_') | |
2935 *p = ' '; | |
2936 add_fromto(spin, items[0][3] == 'S' | |
2937 ? &spin->si_repsal | |
2938 : &spin->si_rep, items[1], items[2]); | |
2939 } | |
2940 } | |
2941 else if (is_aff_rule(items, itemcnt, "MAP", 2)) | |
2942 { | |
2943 /* MAP item or count */ | |
2944 if (!found_map) | |
2945 { | |
2946 /* First line contains the count. */ | |
2947 found_map = TRUE; | |
2948 if (!isdigit(*items[1])) | |
2949 smsg((char_u *)_("Expected MAP count in %s line %d"), | |
2950 fname, lnum); | |
2951 } | |
2952 else if (do_mapline) | |
2953 { | |
2954 int c; | |
2955 | |
2956 /* Check that every character appears only once. */ | |
2957 for (p = items[1]; *p != NUL; ) | |
2958 { | |
2959 #ifdef FEAT_MBYTE | |
2960 c = mb_ptr2char_adv(&p); | |
2961 #else | |
2962 c = *p++; | |
2963 #endif | |
2964 if ((spin->si_map.ga_len > 0 | |
2965 && vim_strchr(spin->si_map.ga_data, c) | |
2966 != NULL) | |
2967 || vim_strchr(p, c) != NULL) | |
2968 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), | |
2969 fname, lnum); | |
2970 } | |
2971 | |
2972 /* We simply concatenate all the MAP strings, separated by | |
2973 * slashes. */ | |
2974 ga_concat(&spin->si_map, items[1]); | |
2975 ga_append(&spin->si_map, '/'); | |
2976 } | |
2977 } | |
2978 /* Accept "SAL from to" and "SAL from to #comment". */ | |
2979 else if (is_aff_rule(items, itemcnt, "SAL", 3)) | |
2980 { | |
2981 if (do_sal) | |
2982 { | |
2983 /* SAL item (sounds-a-like) | |
2984 * Either one of the known keys or a from-to pair. */ | |
2985 if (STRCMP(items[1], "followup") == 0) | |
2986 spin->si_followup = sal_to_bool(items[2]); | |
2987 else if (STRCMP(items[1], "collapse_result") == 0) | |
2988 spin->si_collapse = sal_to_bool(items[2]); | |
2989 else if (STRCMP(items[1], "remove_accents") == 0) | |
2990 spin->si_rem_accents = sal_to_bool(items[2]); | |
2991 else | |
2992 /* when "to" is "_" it means empty */ | |
2993 add_fromto(spin, &spin->si_sal, items[1], | |
2994 STRCMP(items[2], "_") == 0 ? (char_u *)"" | |
2995 : items[2]); | |
2996 } | |
2997 } | |
2998 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) | |
2999 && sofofrom == NULL) | |
3000 { | |
3001 sofofrom = getroom_save(spin, items[1]); | |
3002 } | |
3003 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) | |
3004 && sofoto == NULL) | |
3005 { | |
3006 sofoto = getroom_save(spin, items[1]); | |
3007 } | |
3008 else if (STRCMP(items[0], "COMMON") == 0) | |
3009 { | |
3010 int i; | |
3011 | |
3012 for (i = 1; i < itemcnt; ++i) | |
3013 { | |
3014 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, | |
3015 items[i]))) | |
3016 { | |
3017 p = vim_strsave(items[i]); | |
3018 if (p == NULL) | |
3019 break; | |
3020 hash_add(&spin->si_commonwords, p); | |
3021 } | |
3022 } | |
3023 } | |
3024 else | |
3025 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), | |
3026 fname, lnum, items[0]); | |
3027 } | |
3028 } | |
3029 | |
3030 if (fol != NULL || low != NULL || upp != NULL) | |
3031 { | |
3032 if (spin->si_clear_chartab) | |
3033 { | |
3034 /* Clear the char type tables, don't want to use any of the | |
3035 * currently used spell properties. */ | |
3036 init_spell_chartab(); | |
3037 spin->si_clear_chartab = FALSE; | |
3038 } | |
3039 | |
3040 /* | |
3041 * Don't write a word table for an ASCII file, so that we don't check | |
3042 * for conflicts with a word table that matches 'encoding'. | |
3043 * Don't write one for utf-8 either, we use utf_*() and | |
3044 * mb_get_class(), the list of chars in the file will be incomplete. | |
3045 */ | |
3046 if (!spin->si_ascii | |
3047 #ifdef FEAT_MBYTE | |
3048 && !enc_utf8 | |
3049 #endif | |
3050 ) | |
3051 { | |
3052 if (fol == NULL || low == NULL || upp == NULL) | |
3053 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); | |
3054 else | |
3055 (void)set_spell_chartab(fol, low, upp); | |
3056 } | |
3057 | |
3058 vim_free(fol); | |
3059 vim_free(low); | |
3060 vim_free(upp); | |
3061 } | |
3062 | |
3063 /* Use compound specifications of the .aff file for the spell info. */ | |
3064 if (compmax != 0) | |
3065 { | |
3066 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); | |
3067 spin->si_compmax = compmax; | |
3068 } | |
3069 | |
3070 if (compminlen != 0) | |
3071 { | |
3072 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); | |
3073 spin->si_compminlen = compminlen; | |
3074 } | |
3075 | |
3076 if (compsylmax != 0) | |
3077 { | |
3078 if (syllable == NULL) | |
3079 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); | |
3080 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); | |
3081 spin->si_compsylmax = compsylmax; | |
3082 } | |
3083 | |
3084 if (compoptions != 0) | |
3085 { | |
3086 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); | |
3087 spin->si_compoptions |= compoptions; | |
3088 } | |
3089 | |
3090 if (compflags != NULL) | |
3091 process_compflags(spin, aff, compflags); | |
3092 | |
3093 /* Check that we didn't use too many renumbered flags. */ | |
3094 if (spin->si_newcompID < spin->si_newprefID) | |
3095 { | |
3096 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) | |
3097 MSG(_("Too many postponed prefixes")); | |
3098 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) | |
3099 MSG(_("Too many compound flags")); | |
3100 else | |
3101 MSG(_("Too many postponed prefixes and/or compound flags")); | |
3102 } | |
3103 | |
3104 if (syllable != NULL) | |
3105 { | |
3106 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); | |
3107 spin->si_syllable = syllable; | |
3108 } | |
3109 | |
3110 if (sofofrom != NULL || sofoto != NULL) | |
3111 { | |
3112 if (sofofrom == NULL || sofoto == NULL) | |
3113 smsg((char_u *)_("Missing SOFO%s line in %s"), | |
3114 sofofrom == NULL ? "FROM" : "TO", fname); | |
3115 else if (spin->si_sal.ga_len > 0) | |
3116 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); | |
3117 else | |
3118 { | |
3119 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); | |
3120 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); | |
3121 spin->si_sofofr = sofofrom; | |
3122 spin->si_sofoto = sofoto; | |
3123 } | |
3124 } | |
3125 | |
3126 if (midword != NULL) | |
3127 { | |
3128 aff_check_string(spin->si_midword, midword, "MIDWORD"); | |
3129 spin->si_midword = midword; | |
3130 } | |
3131 | |
3132 vim_free(pc); | |
3133 fclose(fd); | |
3134 return aff; | |
3135 } | |
3136 | |
3137 /* | |
3138 * Return TRUE when items[0] equals "rulename", there are "mincount" items or | |
3139 * a comment is following after item "mincount". | |
3140 */ | |
3141 static int | |
3142 is_aff_rule( | |
3143 char_u **items, | |
3144 int itemcnt, | |
3145 char *rulename, | |
3146 int mincount) | |
3147 { | |
3148 return (STRCMP(items[0], rulename) == 0 | |
3149 && (itemcnt == mincount | |
3150 || (itemcnt > mincount && items[mincount][0] == '#'))); | |
3151 } | |
3152 | |
3153 /* | |
3154 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from | |
3155 * ae_flags to ae_comppermit and ae_compforbid. | |
3156 */ | |
3157 static void | |
3158 aff_process_flags(afffile_T *affile, affentry_T *entry) | |
3159 { | |
3160 char_u *p; | |
3161 char_u *prevp; | |
3162 unsigned flag; | |
3163 | |
3164 if (entry->ae_flags != NULL | |
3165 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) | |
3166 { | |
3167 for (p = entry->ae_flags; *p != NUL; ) | |
3168 { | |
3169 prevp = p; | |
3170 flag = get_affitem(affile->af_flagtype, &p); | |
3171 if (flag == affile->af_comppermit || flag == affile->af_compforbid) | |
3172 { | |
3173 STRMOVE(prevp, p); | |
3174 p = prevp; | |
3175 if (flag == affile->af_comppermit) | |
3176 entry->ae_comppermit = TRUE; | |
3177 else | |
3178 entry->ae_compforbid = TRUE; | |
3179 } | |
3180 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
3181 ++p; | |
3182 } | |
3183 if (*entry->ae_flags == NUL) | |
3184 entry->ae_flags = NULL; /* nothing left */ | |
3185 } | |
3186 } | |
3187 | |
3188 /* | |
3189 * Return TRUE if "s" is the name of an info item in the affix file. | |
3190 */ | |
3191 static int | |
3192 spell_info_item(char_u *s) | |
3193 { | |
3194 return STRCMP(s, "NAME") == 0 | |
3195 || STRCMP(s, "HOME") == 0 | |
3196 || STRCMP(s, "VERSION") == 0 | |
3197 || STRCMP(s, "AUTHOR") == 0 | |
3198 || STRCMP(s, "EMAIL") == 0 | |
3199 || STRCMP(s, "COPYRIGHT") == 0; | |
3200 } | |
3201 | |
3202 /* | |
3203 * Turn an affix flag name into a number, according to the FLAG type. | |
3204 * returns zero for failure. | |
3205 */ | |
3206 static unsigned | |
3207 affitem2flag( | |
3208 int flagtype, | |
3209 char_u *item, | |
3210 char_u *fname, | |
3211 int lnum) | |
3212 { | |
3213 unsigned res; | |
3214 char_u *p = item; | |
3215 | |
3216 res = get_affitem(flagtype, &p); | |
3217 if (res == 0) | |
3218 { | |
3219 if (flagtype == AFT_NUM) | |
3220 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), | |
3221 fname, lnum, item); | |
3222 else | |
3223 smsg((char_u *)_("Illegal flag in %s line %d: %s"), | |
3224 fname, lnum, item); | |
3225 } | |
3226 if (*p != NUL) | |
3227 { | |
3228 smsg((char_u *)_(e_affname), fname, lnum, item); | |
3229 return 0; | |
3230 } | |
3231 | |
3232 return res; | |
3233 } | |
3234 | |
3235 /* | |
3236 * Get one affix name from "*pp" and advance the pointer. | |
3237 * Returns zero for an error, still advances the pointer then. | |
3238 */ | |
3239 static unsigned | |
3240 get_affitem(int flagtype, char_u **pp) | |
3241 { | |
3242 int res; | |
3243 | |
3244 if (flagtype == AFT_NUM) | |
3245 { | |
3246 if (!VIM_ISDIGIT(**pp)) | |
3247 { | |
3248 ++*pp; /* always advance, avoid getting stuck */ | |
3249 return 0; | |
3250 } | |
3251 res = getdigits(pp); | |
3252 } | |
3253 else | |
3254 { | |
3255 #ifdef FEAT_MBYTE | |
3256 res = mb_ptr2char_adv(pp); | |
3257 #else | |
3258 res = *(*pp)++; | |
3259 #endif | |
3260 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG | |
3261 && res >= 'A' && res <= 'Z')) | |
3262 { | |
3263 if (**pp == NUL) | |
3264 return 0; | |
3265 #ifdef FEAT_MBYTE | |
3266 res = mb_ptr2char_adv(pp) + (res << 16); | |
3267 #else | |
3268 res = *(*pp)++ + (res << 16); | |
3269 #endif | |
3270 } | |
3271 } | |
3272 return res; | |
3273 } | |
3274 | |
3275 /* | |
3276 * Process the "compflags" string used in an affix file and append it to | |
3277 * spin->si_compflags. | |
3278 * The processing involves changing the affix names to ID numbers, so that | |
3279 * they fit in one byte. | |
3280 */ | |
3281 static void | |
3282 process_compflags( | |
3283 spellinfo_T *spin, | |
3284 afffile_T *aff, | |
3285 char_u *compflags) | |
3286 { | |
3287 char_u *p; | |
3288 char_u *prevp; | |
3289 unsigned flag; | |
3290 compitem_T *ci; | |
3291 int id; | |
3292 int len; | |
3293 char_u *tp; | |
3294 char_u key[AH_KEY_LEN]; | |
3295 hashitem_T *hi; | |
3296 | |
3297 /* Make room for the old and the new compflags, concatenated with a / in | |
3298 * between. Processing it makes it shorter, but we don't know by how | |
3299 * much, thus allocate the maximum. */ | |
3300 len = (int)STRLEN(compflags) + 1; | |
3301 if (spin->si_compflags != NULL) | |
3302 len += (int)STRLEN(spin->si_compflags) + 1; | |
3303 p = getroom(spin, len, FALSE); | |
3304 if (p == NULL) | |
3305 return; | |
3306 if (spin->si_compflags != NULL) | |
3307 { | |
3308 STRCPY(p, spin->si_compflags); | |
3309 STRCAT(p, "/"); | |
3310 } | |
3311 spin->si_compflags = p; | |
3312 tp = p + STRLEN(p); | |
3313 | |
3314 for (p = compflags; *p != NUL; ) | |
3315 { | |
3316 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) | |
3317 /* Copy non-flag characters directly. */ | |
3318 *tp++ = *p++; | |
3319 else | |
3320 { | |
3321 /* First get the flag number, also checks validity. */ | |
3322 prevp = p; | |
3323 flag = get_affitem(aff->af_flagtype, &p); | |
3324 if (flag != 0) | |
3325 { | |
3326 /* Find the flag in the hashtable. If it was used before, use | |
3327 * the existing ID. Otherwise add a new entry. */ | |
3328 vim_strncpy(key, prevp, p - prevp); | |
3329 hi = hash_find(&aff->af_comp, key); | |
3330 if (!HASHITEM_EMPTY(hi)) | |
3331 id = HI2CI(hi)->ci_newID; | |
3332 else | |
3333 { | |
3334 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); | |
3335 if (ci == NULL) | |
3336 break; | |
3337 STRCPY(ci->ci_key, key); | |
3338 ci->ci_flag = flag; | |
3339 /* Avoid using a flag ID that has a special meaning in a | |
3340 * regexp (also inside []). */ | |
3341 do | |
3342 { | |
3343 check_renumber(spin); | |
3344 id = spin->si_newcompID--; | |
3345 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); | |
3346 ci->ci_newID = id; | |
3347 hash_add(&aff->af_comp, ci->ci_key); | |
3348 } | |
3349 *tp++ = id; | |
3350 } | |
3351 if (aff->af_flagtype == AFT_NUM && *p == ',') | |
3352 ++p; | |
3353 } | |
3354 } | |
3355 | |
3356 *tp = NUL; | |
3357 } | |
3358 | |
3359 /* | |
3360 * Check that the new IDs for postponed affixes and compounding don't overrun | |
3361 * each other. We have almost 255 available, but start at 0-127 to avoid | |
3362 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. | |
3363 * When that is used up an error message is given. | |
3364 */ | |
3365 static void | |
3366 check_renumber(spellinfo_T *spin) | |
3367 { | |
3368 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) | |
3369 { | |
3370 spin->si_newprefID = 127; | |
3371 spin->si_newcompID = 255; | |
3372 } | |
3373 } | |
3374 | |
3375 /* | |
3376 * Return TRUE if flag "flag" appears in affix list "afflist". | |
3377 */ | |
3378 static int | |
3379 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) | |
3380 { | |
3381 char_u *p; | |
3382 unsigned n; | |
3383 | |
3384 switch (flagtype) | |
3385 { | |
3386 case AFT_CHAR: | |
3387 return vim_strchr(afflist, flag) != NULL; | |
3388 | |
3389 case AFT_CAPLONG: | |
3390 case AFT_LONG: | |
3391 for (p = afflist; *p != NUL; ) | |
3392 { | |
3393 #ifdef FEAT_MBYTE | |
3394 n = mb_ptr2char_adv(&p); | |
3395 #else | |
3396 n = *p++; | |
3397 #endif | |
3398 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) | |
3399 && *p != NUL) | |
3400 #ifdef FEAT_MBYTE | |
3401 n = mb_ptr2char_adv(&p) + (n << 16); | |
3402 #else | |
3403 n = *p++ + (n << 16); | |
3404 #endif | |
3405 if (n == flag) | |
3406 return TRUE; | |
3407 } | |
3408 break; | |
3409 | |
3410 case AFT_NUM: | |
3411 for (p = afflist; *p != NUL; ) | |
3412 { | |
3413 n = getdigits(&p); | |
3414 if (n == flag) | |
3415 return TRUE; | |
3416 if (*p != NUL) /* skip over comma */ | |
3417 ++p; | |
3418 } | |
3419 break; | |
3420 } | |
3421 return FALSE; | |
3422 } | |
3423 | |
3424 /* | |
3425 * Give a warning when "spinval" and "affval" numbers are set and not the same. | |
3426 */ | |
3427 static void | |
3428 aff_check_number(int spinval, int affval, char *name) | |
3429 { | |
3430 if (spinval != 0 && spinval != affval) | |
3431 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | |
3432 } | |
3433 | |
3434 /* | |
3435 * Give a warning when "spinval" and "affval" strings are set and not the same. | |
3436 */ | |
3437 static void | |
3438 aff_check_string(char_u *spinval, char_u *affval, char *name) | |
3439 { | |
3440 if (spinval != NULL && STRCMP(spinval, affval) != 0) | |
3441 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | |
3442 } | |
3443 | |
3444 /* | |
3445 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being | |
3446 * NULL as equal. | |
3447 */ | |
3448 static int | |
3449 str_equal(char_u *s1, char_u *s2) | |
3450 { | |
3451 if (s1 == NULL || s2 == NULL) | |
3452 return s1 == s2; | |
3453 return STRCMP(s1, s2) == 0; | |
3454 } | |
3455 | |
3456 /* | |
3457 * Add a from-to item to "gap". Used for REP and SAL items. | |
3458 * They are stored case-folded. | |
3459 */ | |
3460 static void | |
3461 add_fromto( | |
3462 spellinfo_T *spin, | |
3463 garray_T *gap, | |
3464 char_u *from, | |
3465 char_u *to) | |
3466 { | |
3467 fromto_T *ftp; | |
3468 char_u word[MAXWLEN]; | |
3469 | |
3470 if (ga_grow(gap, 1) == OK) | |
3471 { | |
3472 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; | |
3473 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); | |
3474 ftp->ft_from = getroom_save(spin, word); | |
3475 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); | |
3476 ftp->ft_to = getroom_save(spin, word); | |
3477 ++gap->ga_len; | |
3478 } | |
3479 } | |
3480 | |
3481 /* | |
3482 * Convert a boolean argument in a SAL line to TRUE or FALSE; | |
3483 */ | |
3484 static int | |
3485 sal_to_bool(char_u *s) | |
3486 { | |
3487 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; | |
3488 } | |
3489 | |
3490 /* | |
3491 * Free the structure filled by spell_read_aff(). | |
3492 */ | |
3493 static void | |
3494 spell_free_aff(afffile_T *aff) | |
3495 { | |
3496 hashtab_T *ht; | |
3497 hashitem_T *hi; | |
3498 int todo; | |
3499 affheader_T *ah; | |
3500 affentry_T *ae; | |
3501 | |
3502 vim_free(aff->af_enc); | |
3503 | |
3504 /* All this trouble to free the "ae_prog" items... */ | |
3505 for (ht = &aff->af_pref; ; ht = &aff->af_suff) | |
3506 { | |
3507 todo = (int)ht->ht_used; | |
3508 for (hi = ht->ht_array; todo > 0; ++hi) | |
3509 { | |
3510 if (!HASHITEM_EMPTY(hi)) | |
3511 { | |
3512 --todo; | |
3513 ah = HI2AH(hi); | |
3514 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | |
3515 vim_regfree(ae->ae_prog); | |
3516 } | |
3517 } | |
3518 if (ht == &aff->af_suff) | |
3519 break; | |
3520 } | |
3521 | |
3522 hash_clear(&aff->af_pref); | |
3523 hash_clear(&aff->af_suff); | |
3524 hash_clear(&aff->af_comp); | |
3525 } | |
3526 | |
3527 /* | |
3528 * Read dictionary file "fname". | |
3529 * Returns OK or FAIL; | |
3530 */ | |
3531 static int | |
3532 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) | |
3533 { | |
3534 hashtab_T ht; | |
3535 char_u line[MAXLINELEN]; | |
3536 char_u *p; | |
3537 char_u *afflist; | |
3538 char_u store_afflist[MAXWLEN]; | |
3539 int pfxlen; | |
3540 int need_affix; | |
3541 char_u *dw; | |
3542 char_u *pc; | |
3543 char_u *w; | |
3544 int l; | |
3545 hash_T hash; | |
3546 hashitem_T *hi; | |
3547 FILE *fd; | |
3548 int lnum = 1; | |
3549 int non_ascii = 0; | |
3550 int retval = OK; | |
3551 char_u message[MAXLINELEN + MAXWLEN]; | |
3552 int flags; | |
3553 int duplicate = 0; | |
3554 | |
3555 /* | |
3556 * Open the file. | |
3557 */ | |
3558 fd = mch_fopen((char *)fname, "r"); | |
3559 if (fd == NULL) | |
3560 { | |
3561 EMSG2(_(e_notopen), fname); | |
3562 return FAIL; | |
3563 } | |
3564 | |
3565 /* The hashtable is only used to detect duplicated words. */ | |
3566 hash_init(&ht); | |
3567 | |
3568 vim_snprintf((char *)IObuff, IOSIZE, | |
3569 _("Reading dictionary file %s ..."), fname); | |
3570 spell_message(spin, IObuff); | |
3571 | |
3572 /* start with a message for the first line */ | |
3573 spin->si_msg_count = 999999; | |
3574 | |
3575 /* Read and ignore the first line: word count. */ | |
3576 (void)vim_fgets(line, MAXLINELEN, fd); | |
3577 if (!vim_isdigit(*skipwhite(line))) | |
3578 EMSG2(_("E760: No word count in %s"), fname); | |
3579 | |
3580 /* | |
3581 * Read all the lines in the file one by one. | |
3582 * The words are converted to 'encoding' here, before being added to | |
3583 * the hashtable. | |
3584 */ | |
3585 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) | |
3586 { | |
3587 line_breakcheck(); | |
3588 ++lnum; | |
3589 if (line[0] == '#' || line[0] == '/') | |
3590 continue; /* comment line */ | |
3591 | |
3592 /* Remove CR, LF and white space from the end. White space halfway | |
3593 * the word is kept to allow e.g., "et al.". */ | |
3594 l = (int)STRLEN(line); | |
3595 while (l > 0 && line[l - 1] <= ' ') | |
3596 --l; | |
3597 if (l == 0) | |
3598 continue; /* empty line */ | |
3599 line[l] = NUL; | |
3600 | |
3601 #ifdef FEAT_MBYTE | |
3602 /* Convert from "SET" to 'encoding' when needed. */ | |
3603 if (spin->si_conv.vc_type != CONV_NONE) | |
3604 { | |
3605 pc = string_convert(&spin->si_conv, line, NULL); | |
3606 if (pc == NULL) | |
3607 { | |
3608 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
3609 fname, lnum, line); | |
3610 continue; | |
3611 } | |
3612 w = pc; | |
3613 } | |
3614 else | |
3615 #endif | |
3616 { | |
3617 pc = NULL; | |
3618 w = line; | |
3619 } | |
3620 | |
3621 /* Truncate the word at the "/", set "afflist" to what follows. | |
3622 * Replace "\/" by "/" and "\\" by "\". */ | |
3623 afflist = NULL; | |
3624 for (p = w; *p != NUL; mb_ptr_adv(p)) | |
3625 { | |
3626 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) | |
3627 STRMOVE(p, p + 1); | |
3628 else if (*p == '/') | |
3629 { | |
3630 *p = NUL; | |
3631 afflist = p + 1; | |
3632 break; | |
3633 } | |
3634 } | |
3635 | |
3636 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | |
3637 if (spin->si_ascii && has_non_ascii(w)) | |
3638 { | |
3639 ++non_ascii; | |
3640 vim_free(pc); | |
3641 continue; | |
3642 } | |
3643 | |
3644 /* This takes time, print a message every 10000 words. */ | |
3645 if (spin->si_verbose && spin->si_msg_count > 10000) | |
3646 { | |
3647 spin->si_msg_count = 0; | |
3648 vim_snprintf((char *)message, sizeof(message), | |
3649 _("line %6d, word %6d - %s"), | |
3650 lnum, spin->si_foldwcount + spin->si_keepwcount, w); | |
3651 msg_start(); | |
3652 msg_puts_long_attr(message, 0); | |
3653 msg_clr_eos(); | |
3654 msg_didout = FALSE; | |
3655 msg_col = 0; | |
3656 out_flush(); | |
3657 } | |
3658 | |
3659 /* Store the word in the hashtable to be able to find duplicates. */ | |
3660 dw = (char_u *)getroom_save(spin, w); | |
3661 if (dw == NULL) | |
3662 { | |
3663 retval = FAIL; | |
3664 vim_free(pc); | |
3665 break; | |
3666 } | |
3667 | |
3668 hash = hash_hash(dw); | |
3669 hi = hash_lookup(&ht, dw, hash); | |
3670 if (!HASHITEM_EMPTY(hi)) | |
3671 { | |
3672 if (p_verbose > 0) | |
3673 smsg((char_u *)_("Duplicate word in %s line %d: %s"), | |
3674 fname, lnum, dw); | |
3675 else if (duplicate == 0) | |
3676 smsg((char_u *)_("First duplicate word in %s line %d: %s"), | |
3677 fname, lnum, dw); | |
3678 ++duplicate; | |
3679 } | |
3680 else | |
3681 hash_add_item(&ht, hi, dw, hash); | |
3682 | |
3683 flags = 0; | |
3684 store_afflist[0] = NUL; | |
3685 pfxlen = 0; | |
3686 need_affix = FALSE; | |
3687 if (afflist != NULL) | |
3688 { | |
3689 /* Extract flags from the affix list. */ | |
3690 flags |= get_affix_flags(affile, afflist); | |
3691 | |
3692 if (affile->af_needaffix != 0 && flag_in_afflist( | |
3693 affile->af_flagtype, afflist, affile->af_needaffix)) | |
3694 need_affix = TRUE; | |
3695 | |
3696 if (affile->af_pfxpostpone) | |
3697 /* Need to store the list of prefix IDs with the word. */ | |
3698 pfxlen = get_pfxlist(affile, afflist, store_afflist); | |
3699 | |
3700 if (spin->si_compflags != NULL) | |
3701 /* Need to store the list of compound flags with the word. | |
3702 * Concatenate them to the list of prefix IDs. */ | |
3703 get_compflags(affile, afflist, store_afflist + pfxlen); | |
3704 } | |
3705 | |
3706 /* Add the word to the word tree(s). */ | |
3707 if (store_word(spin, dw, flags, spin->si_region, | |
3708 store_afflist, need_affix) == FAIL) | |
3709 retval = FAIL; | |
3710 | |
3711 if (afflist != NULL) | |
3712 { | |
3713 /* Find all matching suffixes and add the resulting words. | |
3714 * Additionally do matching prefixes that combine. */ | |
3715 if (store_aff_word(spin, dw, afflist, affile, | |
3716 &affile->af_suff, &affile->af_pref, | |
3717 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) | |
3718 retval = FAIL; | |
3719 | |
3720 /* Find all matching prefixes and add the resulting words. */ | |
3721 if (store_aff_word(spin, dw, afflist, affile, | |
3722 &affile->af_pref, NULL, | |
3723 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) | |
3724 retval = FAIL; | |
3725 } | |
3726 | |
3727 vim_free(pc); | |
3728 } | |
3729 | |
3730 if (duplicate > 0) | |
3731 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); | |
3732 if (spin->si_ascii && non_ascii > 0) | |
3733 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), | |
3734 non_ascii, fname); | |
3735 hash_clear(&ht); | |
3736 | |
3737 fclose(fd); | |
3738 return retval; | |
3739 } | |
3740 | |
3741 /* | |
3742 * Check for affix flags in "afflist" that are turned into word flags. | |
3743 * Return WF_ flags. | |
3744 */ | |
3745 static int | |
3746 get_affix_flags(afffile_T *affile, char_u *afflist) | |
3747 { | |
3748 int flags = 0; | |
3749 | |
3750 if (affile->af_keepcase != 0 && flag_in_afflist( | |
3751 affile->af_flagtype, afflist, affile->af_keepcase)) | |
3752 flags |= WF_KEEPCAP | WF_FIXCAP; | |
3753 if (affile->af_rare != 0 && flag_in_afflist( | |
3754 affile->af_flagtype, afflist, affile->af_rare)) | |
3755 flags |= WF_RARE; | |
3756 if (affile->af_bad != 0 && flag_in_afflist( | |
3757 affile->af_flagtype, afflist, affile->af_bad)) | |
3758 flags |= WF_BANNED; | |
3759 if (affile->af_needcomp != 0 && flag_in_afflist( | |
3760 affile->af_flagtype, afflist, affile->af_needcomp)) | |
3761 flags |= WF_NEEDCOMP; | |
3762 if (affile->af_comproot != 0 && flag_in_afflist( | |
3763 affile->af_flagtype, afflist, affile->af_comproot)) | |
3764 flags |= WF_COMPROOT; | |
3765 if (affile->af_nosuggest != 0 && flag_in_afflist( | |
3766 affile->af_flagtype, afflist, affile->af_nosuggest)) | |
3767 flags |= WF_NOSUGGEST; | |
3768 return flags; | |
3769 } | |
3770 | |
3771 /* | |
3772 * Get the list of prefix IDs from the affix list "afflist". | |
3773 * Used for PFXPOSTPONE. | |
3774 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL | |
3775 * and return the number of affixes. | |
3776 */ | |
3777 static int | |
3778 get_pfxlist( | |
3779 afffile_T *affile, | |
3780 char_u *afflist, | |
3781 char_u *store_afflist) | |
3782 { | |
3783 char_u *p; | |
3784 char_u *prevp; | |
3785 int cnt = 0; | |
3786 int id; | |
3787 char_u key[AH_KEY_LEN]; | |
3788 hashitem_T *hi; | |
3789 | |
3790 for (p = afflist; *p != NUL; ) | |
3791 { | |
3792 prevp = p; | |
3793 if (get_affitem(affile->af_flagtype, &p) != 0) | |
3794 { | |
3795 /* A flag is a postponed prefix flag if it appears in "af_pref" | |
3796 * and it's ID is not zero. */ | |
3797 vim_strncpy(key, prevp, p - prevp); | |
3798 hi = hash_find(&affile->af_pref, key); | |
3799 if (!HASHITEM_EMPTY(hi)) | |
3800 { | |
3801 id = HI2AH(hi)->ah_newID; | |
3802 if (id != 0) | |
3803 store_afflist[cnt++] = id; | |
3804 } | |
3805 } | |
3806 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
3807 ++p; | |
3808 } | |
3809 | |
3810 store_afflist[cnt] = NUL; | |
3811 return cnt; | |
3812 } | |
3813 | |
3814 /* | |
3815 * Get the list of compound IDs from the affix list "afflist" that are used | |
3816 * for compound words. | |
3817 * Puts the flags in "store_afflist[]". | |
3818 */ | |
3819 static void | |
3820 get_compflags( | |
3821 afffile_T *affile, | |
3822 char_u *afflist, | |
3823 char_u *store_afflist) | |
3824 { | |
3825 char_u *p; | |
3826 char_u *prevp; | |
3827 int cnt = 0; | |
3828 char_u key[AH_KEY_LEN]; | |
3829 hashitem_T *hi; | |
3830 | |
3831 for (p = afflist; *p != NUL; ) | |
3832 { | |
3833 prevp = p; | |
3834 if (get_affitem(affile->af_flagtype, &p) != 0) | |
3835 { | |
3836 /* A flag is a compound flag if it appears in "af_comp". */ | |
3837 vim_strncpy(key, prevp, p - prevp); | |
3838 hi = hash_find(&affile->af_comp, key); | |
3839 if (!HASHITEM_EMPTY(hi)) | |
3840 store_afflist[cnt++] = HI2CI(hi)->ci_newID; | |
3841 } | |
3842 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
3843 ++p; | |
3844 } | |
3845 | |
3846 store_afflist[cnt] = NUL; | |
3847 } | |
3848 | |
3849 /* | |
3850 * Apply affixes to a word and store the resulting words. | |
3851 * "ht" is the hashtable with affentry_T that need to be applied, either | |
3852 * prefixes or suffixes. | |
3853 * "xht", when not NULL, is the prefix hashtable, to be used additionally on | |
3854 * the resulting words for combining affixes. | |
3855 * | |
3856 * Returns FAIL when out of memory. | |
3857 */ | |
3858 static int | |
3859 store_aff_word( | |
3860 spellinfo_T *spin, /* spell info */ | |
3861 char_u *word, /* basic word start */ | |
3862 char_u *afflist, /* list of names of supported affixes */ | |
3863 afffile_T *affile, | |
3864 hashtab_T *ht, | |
3865 hashtab_T *xht, | |
3866 int condit, /* CONDIT_SUF et al. */ | |
3867 int flags, /* flags for the word */ | |
3868 char_u *pfxlist, /* list of prefix IDs */ | |
3869 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest | |
3870 * is compound flags */ | |
3871 { | |
3872 int todo; | |
3873 hashitem_T *hi; | |
3874 affheader_T *ah; | |
3875 affentry_T *ae; | |
3876 char_u newword[MAXWLEN]; | |
3877 int retval = OK; | |
3878 int i, j; | |
3879 char_u *p; | |
3880 int use_flags; | |
3881 char_u *use_pfxlist; | |
3882 int use_pfxlen; | |
3883 int need_affix; | |
3884 char_u store_afflist[MAXWLEN]; | |
3885 char_u pfx_pfxlist[MAXWLEN]; | |
3886 size_t wordlen = STRLEN(word); | |
3887 int use_condit; | |
3888 | |
3889 todo = (int)ht->ht_used; | |
3890 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) | |
3891 { | |
3892 if (!HASHITEM_EMPTY(hi)) | |
3893 { | |
3894 --todo; | |
3895 ah = HI2AH(hi); | |
3896 | |
3897 /* Check that the affix combines, if required, and that the word | |
3898 * supports this affix. */ | |
3899 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) | |
3900 && flag_in_afflist(affile->af_flagtype, afflist, | |
3901 ah->ah_flag)) | |
3902 { | |
3903 /* Loop over all affix entries with this name. */ | |
3904 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | |
3905 { | |
3906 /* Check the condition. It's not logical to match case | |
3907 * here, but it is required for compatibility with | |
3908 * Myspell. | |
3909 * Another requirement from Myspell is that the chop | |
3910 * string is shorter than the word itself. | |
3911 * For prefixes, when "PFXPOSTPONE" was used, only do | |
3912 * prefixes with a chop string and/or flags. | |
3913 * When a previously added affix had CIRCUMFIX this one | |
3914 * must have it too, if it had not then this one must not | |
3915 * have one either. */ | |
3916 if ((xht != NULL || !affile->af_pfxpostpone | |
3917 || ae->ae_chop != NULL | |
3918 || ae->ae_flags != NULL) | |
3919 && (ae->ae_chop == NULL | |
3920 || STRLEN(ae->ae_chop) < wordlen) | |
3921 && (ae->ae_prog == NULL | |
3922 || vim_regexec_prog(&ae->ae_prog, FALSE, | |
3923 word, (colnr_T)0)) | |
3924 && (((condit & CONDIT_CFIX) == 0) | |
3925 == ((condit & CONDIT_AFF) == 0 | |
3926 || ae->ae_flags == NULL | |
3927 || !flag_in_afflist(affile->af_flagtype, | |
3928 ae->ae_flags, affile->af_circumfix)))) | |
3929 { | |
3930 /* Match. Remove the chop and add the affix. */ | |
3931 if (xht == NULL) | |
3932 { | |
3933 /* prefix: chop/add at the start of the word */ | |
3934 if (ae->ae_add == NULL) | |
3935 *newword = NUL; | |
3936 else | |
3937 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); | |
3938 p = word; | |
3939 if (ae->ae_chop != NULL) | |
3940 { | |
3941 /* Skip chop string. */ | |
3942 #ifdef FEAT_MBYTE | |
3943 if (has_mbyte) | |
3944 { | |
3945 i = mb_charlen(ae->ae_chop); | |
3946 for ( ; i > 0; --i) | |
3947 mb_ptr_adv(p); | |
3948 } | |
3949 else | |
3950 #endif | |
3951 p += STRLEN(ae->ae_chop); | |
3952 } | |
3953 STRCAT(newword, p); | |
3954 } | |
3955 else | |
3956 { | |
3957 /* suffix: chop/add at the end of the word */ | |
3958 vim_strncpy(newword, word, MAXWLEN - 1); | |
3959 if (ae->ae_chop != NULL) | |
3960 { | |
3961 /* Remove chop string. */ | |
3962 p = newword + STRLEN(newword); | |
3963 i = (int)MB_CHARLEN(ae->ae_chop); | |
3964 for ( ; i > 0; --i) | |
3965 mb_ptr_back(newword, p); | |
3966 *p = NUL; | |
3967 } | |
3968 if (ae->ae_add != NULL) | |
3969 STRCAT(newword, ae->ae_add); | |
3970 } | |
3971 | |
3972 use_flags = flags; | |
3973 use_pfxlist = pfxlist; | |
3974 use_pfxlen = pfxlen; | |
3975 need_affix = FALSE; | |
3976 use_condit = condit | CONDIT_COMB | CONDIT_AFF; | |
3977 if (ae->ae_flags != NULL) | |
3978 { | |
3979 /* Extract flags from the affix list. */ | |
3980 use_flags |= get_affix_flags(affile, ae->ae_flags); | |
3981 | |
3982 if (affile->af_needaffix != 0 && flag_in_afflist( | |
3983 affile->af_flagtype, ae->ae_flags, | |
3984 affile->af_needaffix)) | |
3985 need_affix = TRUE; | |
3986 | |
3987 /* When there is a CIRCUMFIX flag the other affix | |
3988 * must also have it and we don't add the word | |
3989 * with one affix. */ | |
3990 if (affile->af_circumfix != 0 && flag_in_afflist( | |
3991 affile->af_flagtype, ae->ae_flags, | |
3992 affile->af_circumfix)) | |
3993 { | |
3994 use_condit |= CONDIT_CFIX; | |
3995 if ((condit & CONDIT_CFIX) == 0) | |
3996 need_affix = TRUE; | |
3997 } | |
3998 | |
3999 if (affile->af_pfxpostpone | |
4000 || spin->si_compflags != NULL) | |
4001 { | |
4002 if (affile->af_pfxpostpone) | |
4003 /* Get prefix IDS from the affix list. */ | |
4004 use_pfxlen = get_pfxlist(affile, | |
4005 ae->ae_flags, store_afflist); | |
4006 else | |
4007 use_pfxlen = 0; | |
4008 use_pfxlist = store_afflist; | |
4009 | |
4010 /* Combine the prefix IDs. Avoid adding the | |
4011 * same ID twice. */ | |
4012 for (i = 0; i < pfxlen; ++i) | |
4013 { | |
4014 for (j = 0; j < use_pfxlen; ++j) | |
4015 if (pfxlist[i] == use_pfxlist[j]) | |
4016 break; | |
4017 if (j == use_pfxlen) | |
4018 use_pfxlist[use_pfxlen++] = pfxlist[i]; | |
4019 } | |
4020 | |
4021 if (spin->si_compflags != NULL) | |
4022 /* Get compound IDS from the affix list. */ | |
4023 get_compflags(affile, ae->ae_flags, | |
4024 use_pfxlist + use_pfxlen); | |
4025 | |
4026 /* Combine the list of compound flags. | |
4027 * Concatenate them to the prefix IDs list. | |
4028 * Avoid adding the same ID twice. */ | |
4029 for (i = pfxlen; pfxlist[i] != NUL; ++i) | |
4030 { | |
4031 for (j = use_pfxlen; | |
4032 use_pfxlist[j] != NUL; ++j) | |
4033 if (pfxlist[i] == use_pfxlist[j]) | |
4034 break; | |
4035 if (use_pfxlist[j] == NUL) | |
4036 { | |
4037 use_pfxlist[j++] = pfxlist[i]; | |
4038 use_pfxlist[j] = NUL; | |
4039 } | |
4040 } | |
4041 } | |
4042 } | |
4043 | |
4044 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't | |
4045 * use the compound flags. */ | |
4046 if (use_pfxlist != NULL && ae->ae_compforbid) | |
4047 { | |
4048 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); | |
4049 use_pfxlist = pfx_pfxlist; | |
4050 } | |
4051 | |
4052 /* When there are postponed prefixes... */ | |
4053 if (spin->si_prefroot != NULL | |
4054 && spin->si_prefroot->wn_sibling != NULL) | |
4055 { | |
4056 /* ... add a flag to indicate an affix was used. */ | |
4057 use_flags |= WF_HAS_AFF; | |
4058 | |
4059 /* ... don't use a prefix list if combining | |
4060 * affixes is not allowed. But do use the | |
4061 * compound flags after them. */ | |
4062 if (!ah->ah_combine && use_pfxlist != NULL) | |
4063 use_pfxlist += use_pfxlen; | |
4064 } | |
4065 | |
4066 /* When compounding is supported and there is no | |
4067 * "COMPOUNDPERMITFLAG" then forbid compounding on the | |
4068 * side where the affix is applied. */ | |
4069 if (spin->si_compflags != NULL && !ae->ae_comppermit) | |
4070 { | |
4071 if (xht != NULL) | |
4072 use_flags |= WF_NOCOMPAFT; | |
4073 else | |
4074 use_flags |= WF_NOCOMPBEF; | |
4075 } | |
4076 | |
4077 /* Store the modified word. */ | |
4078 if (store_word(spin, newword, use_flags, | |
4079 spin->si_region, use_pfxlist, | |
4080 need_affix) == FAIL) | |
4081 retval = FAIL; | |
4082 | |
4083 /* When added a prefix or a first suffix and the affix | |
4084 * has flags may add a(nother) suffix. RECURSIVE! */ | |
4085 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) | |
4086 if (store_aff_word(spin, newword, ae->ae_flags, | |
4087 affile, &affile->af_suff, xht, | |
4088 use_condit & (xht == NULL | |
4089 ? ~0 : ~CONDIT_SUF), | |
4090 use_flags, use_pfxlist, pfxlen) == FAIL) | |
4091 retval = FAIL; | |
4092 | |
4093 /* When added a suffix and combining is allowed also | |
4094 * try adding a prefix additionally. Both for the | |
4095 * word flags and for the affix flags. RECURSIVE! */ | |
4096 if (xht != NULL && ah->ah_combine) | |
4097 { | |
4098 if (store_aff_word(spin, newword, | |
4099 afflist, affile, | |
4100 xht, NULL, use_condit, | |
4101 use_flags, use_pfxlist, | |
4102 pfxlen) == FAIL | |
4103 || (ae->ae_flags != NULL | |
4104 && store_aff_word(spin, newword, | |
4105 ae->ae_flags, affile, | |
4106 xht, NULL, use_condit, | |
4107 use_flags, use_pfxlist, | |
4108 pfxlen) == FAIL)) | |
4109 retval = FAIL; | |
4110 } | |
4111 } | |
4112 } | |
4113 } | |
4114 } | |
4115 } | |
4116 | |
4117 return retval; | |
4118 } | |
4119 | |
4120 /* | |
4121 * Read a file with a list of words. | |
4122 */ | |
4123 static int | |
4124 spell_read_wordfile(spellinfo_T *spin, char_u *fname) | |
4125 { | |
4126 FILE *fd; | |
4127 long lnum = 0; | |
4128 char_u rline[MAXLINELEN]; | |
4129 char_u *line; | |
4130 char_u *pc = NULL; | |
4131 char_u *p; | |
4132 int l; | |
4133 int retval = OK; | |
4134 int did_word = FALSE; | |
4135 int non_ascii = 0; | |
4136 int flags; | |
4137 int regionmask; | |
4138 | |
4139 /* | |
4140 * Open the file. | |
4141 */ | |
4142 fd = mch_fopen((char *)fname, "r"); | |
4143 if (fd == NULL) | |
4144 { | |
4145 EMSG2(_(e_notopen), fname); | |
4146 return FAIL; | |
4147 } | |
4148 | |
4149 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); | |
4150 spell_message(spin, IObuff); | |
4151 | |
4152 /* | |
4153 * Read all the lines in the file one by one. | |
4154 */ | |
4155 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) | |
4156 { | |
4157 line_breakcheck(); | |
4158 ++lnum; | |
4159 | |
4160 /* Skip comment lines. */ | |
4161 if (*rline == '#') | |
4162 continue; | |
4163 | |
4164 /* Remove CR, LF and white space from the end. */ | |
4165 l = (int)STRLEN(rline); | |
4166 while (l > 0 && rline[l - 1] <= ' ') | |
4167 --l; | |
4168 if (l == 0) | |
4169 continue; /* empty or blank line */ | |
4170 rline[l] = NUL; | |
4171 | |
4172 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ | |
4173 vim_free(pc); | |
4174 #ifdef FEAT_MBYTE | |
4175 if (spin->si_conv.vc_type != CONV_NONE) | |
4176 { | |
4177 pc = string_convert(&spin->si_conv, rline, NULL); | |
4178 if (pc == NULL) | |
4179 { | |
4180 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
4181 fname, lnum, rline); | |
4182 continue; | |
4183 } | |
4184 line = pc; | |
4185 } | |
4186 else | |
4187 #endif | |
4188 { | |
4189 pc = NULL; | |
4190 line = rline; | |
4191 } | |
4192 | |
4193 if (*line == '/') | |
4194 { | |
4195 ++line; | |
4196 if (STRNCMP(line, "encoding=", 9) == 0) | |
4197 { | |
4198 if (spin->si_conv.vc_type != CONV_NONE) | |
4199 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), | |
4200 fname, lnum, line - 1); | |
4201 else if (did_word) | |
4202 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), | |
4203 fname, lnum, line - 1); | |
4204 else | |
4205 { | |
4206 #ifdef FEAT_MBYTE | |
4207 char_u *enc; | |
4208 | |
4209 /* Setup for conversion to 'encoding'. */ | |
4210 line += 9; | |
4211 enc = enc_canonize(line); | |
4212 if (enc != NULL && !spin->si_ascii | |
4213 && convert_setup(&spin->si_conv, enc, | |
4214 p_enc) == FAIL) | |
4215 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | |
4216 fname, line, p_enc); | |
4217 vim_free(enc); | |
4218 spin->si_conv.vc_fail = TRUE; | |
4219 #else | |
4220 smsg((char_u *)_("Conversion in %s not supported"), fname); | |
4221 #endif | |
4222 } | |
4223 continue; | |
4224 } | |
4225 | |
4226 if (STRNCMP(line, "regions=", 8) == 0) | |
4227 { | |
4228 if (spin->si_region_count > 1) | |
4229 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), | |
4230 fname, lnum, line); | |
4231 else | |
4232 { | |
4233 line += 8; | |
4234 if (STRLEN(line) > 16) | |
4235 smsg((char_u *)_("Too many regions in %s line %d: %s"), | |
4236 fname, lnum, line); | |
4237 else | |
4238 { | |
4239 spin->si_region_count = (int)STRLEN(line) / 2; | |
4240 STRCPY(spin->si_region_name, line); | |
4241 | |
4242 /* Adjust the mask for a word valid in all regions. */ | |
4243 spin->si_region = (1 << spin->si_region_count) - 1; | |
4244 } | |
4245 } | |
4246 continue; | |
4247 } | |
4248 | |
4249 smsg((char_u *)_("/ line ignored in %s line %d: %s"), | |
4250 fname, lnum, line - 1); | |
4251 continue; | |
4252 } | |
4253 | |
4254 flags = 0; | |
4255 regionmask = spin->si_region; | |
4256 | |
4257 /* Check for flags and region after a slash. */ | |
4258 p = vim_strchr(line, '/'); | |
4259 if (p != NULL) | |
4260 { | |
4261 *p++ = NUL; | |
4262 while (*p != NUL) | |
4263 { | |
4264 if (*p == '=') /* keep-case word */ | |
4265 flags |= WF_KEEPCAP | WF_FIXCAP; | |
4266 else if (*p == '!') /* Bad, bad, wicked word. */ | |
4267 flags |= WF_BANNED; | |
4268 else if (*p == '?') /* Rare word. */ | |
4269 flags |= WF_RARE; | |
4270 else if (VIM_ISDIGIT(*p)) /* region number(s) */ | |
4271 { | |
4272 if ((flags & WF_REGION) == 0) /* first one */ | |
4273 regionmask = 0; | |
4274 flags |= WF_REGION; | |
4275 | |
4276 l = *p - '0'; | |
4277 if (l > spin->si_region_count) | |
4278 { | |
4279 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), | |
4280 fname, lnum, p); | |
4281 break; | |
4282 } | |
4283 regionmask |= 1 << (l - 1); | |
4284 } | |
4285 else | |
4286 { | |
4287 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), | |
4288 fname, lnum, p); | |
4289 break; | |
4290 } | |
4291 ++p; | |
4292 } | |
4293 } | |
4294 | |
4295 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | |
4296 if (spin->si_ascii && has_non_ascii(line)) | |
4297 { | |
4298 ++non_ascii; | |
4299 continue; | |
4300 } | |
4301 | |
4302 /* Normal word: store it. */ | |
4303 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) | |
4304 { | |
4305 retval = FAIL; | |
4306 break; | |
4307 } | |
4308 did_word = TRUE; | |
4309 } | |
4310 | |
4311 vim_free(pc); | |
4312 fclose(fd); | |
4313 | |
4314 if (spin->si_ascii && non_ascii > 0) | |
4315 { | |
4316 vim_snprintf((char *)IObuff, IOSIZE, | |
4317 _("Ignored %d words with non-ASCII characters"), non_ascii); | |
4318 spell_message(spin, IObuff); | |
4319 } | |
4320 | |
4321 return retval; | |
4322 } | |
4323 | |
4324 /* | |
4325 * Get part of an sblock_T, "len" bytes long. | |
4326 * This avoids calling free() for every little struct we use (and keeping | |
4327 * track of them). | |
4328 * The memory is cleared to all zeros. | |
4329 * Returns NULL when out of memory. | |
4330 */ | |
4331 static void * | |
4332 getroom( | |
4333 spellinfo_T *spin, | |
4334 size_t len, /* length needed */ | |
4335 int align) /* align for pointer */ | |
4336 { | |
4337 char_u *p; | |
4338 sblock_T *bl = spin->si_blocks; | |
4339 | |
4340 if (align && bl != NULL) | |
4341 /* Round size up for alignment. On some systems structures need to be | |
4342 * aligned to the size of a pointer (e.g., SPARC). */ | |
4343 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) | |
4344 & ~(sizeof(char *) - 1); | |
4345 | |
4346 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) | |
4347 { | |
4348 if (len >= SBLOCKSIZE) | |
4349 bl = NULL; | |
4350 else | |
4351 /* Allocate a block of memory. It is not freed until much later. */ | |
4352 bl = (sblock_T *)alloc_clear( | |
4353 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); | |
4354 if (bl == NULL) | |
4355 { | |
4356 if (!spin->si_did_emsg) | |
4357 { | |
4358 EMSG(_("E845: Insufficient memory, word list will be incomplete")); | |
4359 spin->si_did_emsg = TRUE; | |
4360 } | |
4361 return NULL; | |
4362 } | |
4363 bl->sb_next = spin->si_blocks; | |
4364 spin->si_blocks = bl; | |
4365 bl->sb_used = 0; | |
4366 ++spin->si_blocks_cnt; | |
4367 } | |
4368 | |
4369 p = bl->sb_data + bl->sb_used; | |
4370 bl->sb_used += (int)len; | |
4371 | |
4372 return p; | |
4373 } | |
4374 | |
4375 /* | |
4376 * Make a copy of a string into memory allocated with getroom(). | |
4377 * Returns NULL when out of memory. | |
4378 */ | |
4379 static char_u * | |
4380 getroom_save(spellinfo_T *spin, char_u *s) | |
4381 { | |
4382 char_u *sc; | |
4383 | |
4384 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); | |
4385 if (sc != NULL) | |
4386 STRCPY(sc, s); | |
4387 return sc; | |
4388 } | |
4389 | |
4390 | |
4391 /* | |
4392 * Free the list of allocated sblock_T. | |
4393 */ | |
4394 static void | |
4395 free_blocks(sblock_T *bl) | |
4396 { | |
4397 sblock_T *next; | |
4398 | |
4399 while (bl != NULL) | |
4400 { | |
4401 next = bl->sb_next; | |
4402 vim_free(bl); | |
4403 bl = next; | |
4404 } | |
4405 } | |
4406 | |
4407 /* | |
4408 * Allocate the root of a word tree. | |
4409 * Returns NULL when out of memory. | |
4410 */ | |
4411 static wordnode_T * | |
4412 wordtree_alloc(spellinfo_T *spin) | |
4413 { | |
4414 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); | |
4415 } | |
4416 | |
4417 /* | |
4418 * Store a word in the tree(s). | |
4419 * Always store it in the case-folded tree. For a keep-case word this is | |
4420 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and | |
4421 * used to find suggestions. | |
4422 * For a keep-case word also store it in the keep-case tree. | |
4423 * When "pfxlist" is not NULL store the word for each postponed prefix ID and | |
4424 * compound flag. | |
4425 */ | |
4426 static int | |
4427 store_word( | |
4428 spellinfo_T *spin, | |
4429 char_u *word, | |
4430 int flags, /* extra flags, WF_BANNED */ | |
4431 int region, /* supported region(s) */ | |
4432 char_u *pfxlist, /* list of prefix IDs or NULL */ | |
4433 int need_affix) /* only store word with affix ID */ | |
4434 { | |
4435 int len = (int)STRLEN(word); | |
4436 int ct = captype(word, word + len); | |
4437 char_u foldword[MAXWLEN]; | |
4438 int res = OK; | |
4439 char_u *p; | |
4440 | |
4441 (void)spell_casefold(word, len, foldword, MAXWLEN); | |
4442 for (p = pfxlist; res == OK; ++p) | |
4443 { | |
4444 if (!need_affix || (p != NULL && *p != NUL)) | |
4445 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, | |
4446 region, p == NULL ? 0 : *p); | |
4447 if (p == NULL || *p == NUL) | |
4448 break; | |
4449 } | |
4450 ++spin->si_foldwcount; | |
4451 | |
4452 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) | |
4453 { | |
4454 for (p = pfxlist; res == OK; ++p) | |
4455 { | |
4456 if (!need_affix || (p != NULL && *p != NUL)) | |
4457 res = tree_add_word(spin, word, spin->si_keeproot, flags, | |
4458 region, p == NULL ? 0 : *p); | |
4459 if (p == NULL || *p == NUL) | |
4460 break; | |
4461 } | |
4462 ++spin->si_keepwcount; | |
4463 } | |
4464 return res; | |
4465 } | |
4466 | |
4467 /* | |
4468 * Add word "word" to a word tree at "root". | |
4469 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for | |
4470 * "rare" and "region" is the condition nr. | |
4471 * Returns FAIL when out of memory. | |
4472 */ | |
4473 static int | |
4474 tree_add_word( | |
4475 spellinfo_T *spin, | |
4476 char_u *word, | |
4477 wordnode_T *root, | |
4478 int flags, | |
4479 int region, | |
4480 int affixID) | |
4481 { | |
4482 wordnode_T *node = root; | |
4483 wordnode_T *np; | |
4484 wordnode_T *copyp, **copyprev; | |
4485 wordnode_T **prev = NULL; | |
4486 int i; | |
4487 | |
4488 /* Add each byte of the word to the tree, including the NUL at the end. */ | |
4489 for (i = 0; ; ++i) | |
4490 { | |
4491 /* When there is more than one reference to this node we need to make | |
4492 * a copy, so that we can modify it. Copy the whole list of siblings | |
4493 * (we don't optimize for a partly shared list of siblings). */ | |
4494 if (node != NULL && node->wn_refs > 1) | |
4495 { | |
4496 --node->wn_refs; | |
4497 copyprev = prev; | |
4498 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) | |
4499 { | |
4500 /* Allocate a new node and copy the info. */ | |
4501 np = get_wordnode(spin); | |
4502 if (np == NULL) | |
4503 return FAIL; | |
4504 np->wn_child = copyp->wn_child; | |
4505 if (np->wn_child != NULL) | |
4506 ++np->wn_child->wn_refs; /* child gets extra ref */ | |
4507 np->wn_byte = copyp->wn_byte; | |
4508 if (np->wn_byte == NUL) | |
4509 { | |
4510 np->wn_flags = copyp->wn_flags; | |
4511 np->wn_region = copyp->wn_region; | |
4512 np->wn_affixID = copyp->wn_affixID; | |
4513 } | |
4514 | |
4515 /* Link the new node in the list, there will be one ref. */ | |
4516 np->wn_refs = 1; | |
4517 if (copyprev != NULL) | |
4518 *copyprev = np; | |
4519 copyprev = &np->wn_sibling; | |
4520 | |
4521 /* Let "node" point to the head of the copied list. */ | |
4522 if (copyp == node) | |
4523 node = np; | |
4524 } | |
4525 } | |
4526 | |
4527 /* Look for the sibling that has the same character. They are sorted | |
4528 * on byte value, thus stop searching when a sibling is found with a | |
4529 * higher byte value. For zero bytes (end of word) the sorting is | |
4530 * done on flags and then on affixID. */ | |
4531 while (node != NULL | |
4532 && (node->wn_byte < word[i] | |
4533 || (node->wn_byte == NUL | |
4534 && (flags < 0 | |
4535 ? node->wn_affixID < (unsigned)affixID | |
4536 : (node->wn_flags < (unsigned)(flags & WN_MASK) | |
4537 || (node->wn_flags == (flags & WN_MASK) | |
4538 && (spin->si_sugtree | |
4539 ? (node->wn_region & 0xffff) < region | |
4540 : node->wn_affixID | |
4541 < (unsigned)affixID))))))) | |
4542 { | |
4543 prev = &node->wn_sibling; | |
4544 node = *prev; | |
4545 } | |
4546 if (node == NULL | |
4547 || node->wn_byte != word[i] | |
4548 || (word[i] == NUL | |
4549 && (flags < 0 | |
4550 || spin->si_sugtree | |
4551 || node->wn_flags != (flags & WN_MASK) | |
4552 || node->wn_affixID != affixID))) | |
4553 { | |
4554 /* Allocate a new node. */ | |
4555 np = get_wordnode(spin); | |
4556 if (np == NULL) | |
4557 return FAIL; | |
4558 np->wn_byte = word[i]; | |
4559 | |
4560 /* If "node" is NULL this is a new child or the end of the sibling | |
4561 * list: ref count is one. Otherwise use ref count of sibling and | |
4562 * make ref count of sibling one (matters when inserting in front | |
4563 * of the list of siblings). */ | |
4564 if (node == NULL) | |
4565 np->wn_refs = 1; | |
4566 else | |
4567 { | |
4568 np->wn_refs = node->wn_refs; | |
4569 node->wn_refs = 1; | |
4570 } | |
4571 if (prev != NULL) | |
4572 *prev = np; | |
4573 np->wn_sibling = node; | |
4574 node = np; | |
4575 } | |
4576 | |
4577 if (word[i] == NUL) | |
4578 { | |
4579 node->wn_flags = flags; | |
4580 node->wn_region |= region; | |
4581 node->wn_affixID = affixID; | |
4582 break; | |
4583 } | |
4584 prev = &node->wn_child; | |
4585 node = *prev; | |
4586 } | |
4587 #ifdef SPELL_PRINTTREE | |
4588 smsg((char_u *)"Added \"%s\"", word); | |
4589 spell_print_tree(root->wn_sibling); | |
4590 #endif | |
4591 | |
4592 /* count nr of words added since last message */ | |
4593 ++spin->si_msg_count; | |
4594 | |
4595 if (spin->si_compress_cnt > 1) | |
4596 { | |
4597 if (--spin->si_compress_cnt == 1) | |
4598 /* Did enough words to lower the block count limit. */ | |
4599 spin->si_blocks_cnt += compress_inc; | |
4600 } | |
4601 | |
4602 /* | |
4603 * When we have allocated lots of memory we need to compress the word tree | |
4604 * to free up some room. But compression is slow, and we might actually | |
4605 * need that room, thus only compress in the following situations: | |
4606 * 1. When not compressed before (si_compress_cnt == 0): when using | |
4607 * "compress_start" blocks. | |
4608 * 2. When compressed before and used "compress_inc" blocks before | |
4609 * adding "compress_added" words (si_compress_cnt > 1). | |
4610 * 3. When compressed before, added "compress_added" words | |
4611 * (si_compress_cnt == 1) and the number of free nodes drops below the | |
4612 * maximum word length. | |
4613 */ | |
4614 #ifndef SPELL_COMPRESS_ALLWAYS | |
4615 if (spin->si_compress_cnt == 1 | |
4616 ? spin->si_free_count < MAXWLEN | |
4617 : spin->si_blocks_cnt >= compress_start) | |
4618 #endif | |
4619 { | |
4620 /* Decrement the block counter. The effect is that we compress again | |
4621 * when the freed up room has been used and another "compress_inc" | |
4622 * blocks have been allocated. Unless "compress_added" words have | |
4623 * been added, then the limit is put back again. */ | |
4624 spin->si_blocks_cnt -= compress_inc; | |
4625 spin->si_compress_cnt = compress_added; | |
4626 | |
4627 if (spin->si_verbose) | |
4628 { | |
4629 msg_start(); | |
4630 msg_puts((char_u *)_(msg_compressing)); | |
4631 msg_clr_eos(); | |
4632 msg_didout = FALSE; | |
4633 msg_col = 0; | |
4634 out_flush(); | |
4635 } | |
4636 | |
4637 /* Compress both trees. Either they both have many nodes, which makes | |
4638 * compression useful, or one of them is small, which means | |
4639 * compression goes fast. But when filling the soundfold word tree | |
4640 * there is no keep-case tree. */ | |
4641 wordtree_compress(spin, spin->si_foldroot); | |
4642 if (affixID >= 0) | |
4643 wordtree_compress(spin, spin->si_keeproot); | |
4644 } | |
4645 | |
4646 return OK; | |
4647 } | |
4648 | |
4649 /* | |
4650 * Get a wordnode_T, either from the list of previously freed nodes or | |
4651 * allocate a new one. | |
4652 * Returns NULL when out of memory. | |
4653 */ | |
4654 static wordnode_T * | |
4655 get_wordnode(spellinfo_T *spin) | |
4656 { | |
4657 wordnode_T *n; | |
4658 | |
4659 if (spin->si_first_free == NULL) | |
4660 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); | |
4661 else | |
4662 { | |
4663 n = spin->si_first_free; | |
4664 spin->si_first_free = n->wn_child; | |
4665 vim_memset(n, 0, sizeof(wordnode_T)); | |
4666 --spin->si_free_count; | |
4667 } | |
4668 #ifdef SPELL_PRINTTREE | |
4669 if (n != NULL) | |
4670 n->wn_nr = ++spin->si_wordnode_nr; | |
4671 #endif | |
4672 return n; | |
4673 } | |
4674 | |
4675 /* | |
4676 * Decrement the reference count on a node (which is the head of a list of | |
4677 * siblings). If the reference count becomes zero free the node and its | |
4678 * siblings. | |
4679 * Returns the number of nodes actually freed. | |
4680 */ | |
4681 static int | |
4682 deref_wordnode(spellinfo_T *spin, wordnode_T *node) | |
4683 { | |
4684 wordnode_T *np; | |
4685 int cnt = 0; | |
4686 | |
4687 if (--node->wn_refs == 0) | |
4688 { | |
4689 for (np = node; np != NULL; np = np->wn_sibling) | |
4690 { | |
4691 if (np->wn_child != NULL) | |
4692 cnt += deref_wordnode(spin, np->wn_child); | |
4693 free_wordnode(spin, np); | |
4694 ++cnt; | |
4695 } | |
4696 ++cnt; /* length field */ | |
4697 } | |
4698 return cnt; | |
4699 } | |
4700 | |
4701 /* | |
4702 * Free a wordnode_T for re-use later. | |
4703 * Only the "wn_child" field becomes invalid. | |
4704 */ | |
4705 static void | |
4706 free_wordnode(spellinfo_T *spin, wordnode_T *n) | |
4707 { | |
4708 n->wn_child = spin->si_first_free; | |
4709 spin->si_first_free = n; | |
4710 ++spin->si_free_count; | |
4711 } | |
4712 | |
4713 /* | |
4714 * Compress a tree: find tails that are identical and can be shared. | |
4715 */ | |
4716 static void | |
4717 wordtree_compress(spellinfo_T *spin, wordnode_T *root) | |
4718 { | |
4719 hashtab_T ht; | |
4720 int n; | |
4721 int tot = 0; | |
4722 int perc; | |
4723 | |
4724 /* Skip the root itself, it's not actually used. The first sibling is the | |
4725 * start of the tree. */ | |
4726 if (root->wn_sibling != NULL) | |
4727 { | |
4728 hash_init(&ht); | |
4729 n = node_compress(spin, root->wn_sibling, &ht, &tot); | |
4730 | |
4731 #ifndef SPELL_PRINTTREE | |
4732 if (spin->si_verbose || p_verbose > 2) | |
4733 #endif | |
4734 { | |
4735 if (tot > 1000000) | |
4736 perc = (tot - n) / (tot / 100); | |
4737 else if (tot == 0) | |
4738 perc = 0; | |
4739 else | |
4740 perc = (tot - n) * 100 / tot; | |
4741 vim_snprintf((char *)IObuff, IOSIZE, | |
4742 _("Compressed %d of %d nodes; %d (%d%%) remaining"), | |
4743 n, tot, tot - n, perc); | |
4744 spell_message(spin, IObuff); | |
4745 } | |
4746 #ifdef SPELL_PRINTTREE | |
4747 spell_print_tree(root->wn_sibling); | |
4748 #endif | |
4749 hash_clear(&ht); | |
4750 } | |
4751 } | |
4752 | |
4753 /* | |
4754 * Compress a node, its siblings and its children, depth first. | |
4755 * Returns the number of compressed nodes. | |
4756 */ | |
4757 static int | |
4758 node_compress( | |
4759 spellinfo_T *spin, | |
4760 wordnode_T *node, | |
4761 hashtab_T *ht, | |
4762 int *tot) /* total count of nodes before compressing, | |
4763 incremented while going through the tree */ | |
4764 { | |
4765 wordnode_T *np; | |
4766 wordnode_T *tp; | |
4767 wordnode_T *child; | |
4768 hash_T hash; | |
4769 hashitem_T *hi; | |
4770 int len = 0; | |
4771 unsigned nr, n; | |
4772 int compressed = 0; | |
4773 | |
4774 /* | |
4775 * Go through the list of siblings. Compress each child and then try | |
4776 * finding an identical child to replace it. | |
4777 * Note that with "child" we mean not just the node that is pointed to, | |
4778 * but the whole list of siblings of which the child node is the first. | |
4779 */ | |
4780 for (np = node; np != NULL && !got_int; np = np->wn_sibling) | |
4781 { | |
4782 ++len; | |
4783 if ((child = np->wn_child) != NULL) | |
4784 { | |
4785 /* Compress the child first. This fills hashkey. */ | |
4786 compressed += node_compress(spin, child, ht, tot); | |
4787 | |
4788 /* Try to find an identical child. */ | |
4789 hash = hash_hash(child->wn_u1.hashkey); | |
4790 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); | |
4791 if (!HASHITEM_EMPTY(hi)) | |
4792 { | |
4793 /* There are children we encountered before with a hash value | |
4794 * identical to the current child. Now check if there is one | |
4795 * that is really identical. */ | |
4796 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) | |
4797 if (node_equal(child, tp)) | |
4798 { | |
4799 /* Found one! Now use that child in place of the | |
4800 * current one. This means the current child and all | |
4801 * its siblings is unlinked from the tree. */ | |
4802 ++tp->wn_refs; | |
4803 compressed += deref_wordnode(spin, child); | |
4804 np->wn_child = tp; | |
4805 break; | |
4806 } | |
4807 if (tp == NULL) | |
4808 { | |
4809 /* No other child with this hash value equals the child of | |
4810 * the node, add it to the linked list after the first | |
4811 * item. */ | |
4812 tp = HI2WN(hi); | |
4813 child->wn_u2.next = tp->wn_u2.next; | |
4814 tp->wn_u2.next = child; | |
4815 } | |
4816 } | |
4817 else | |
4818 /* No other child has this hash value, add it to the | |
4819 * hashtable. */ | |
4820 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); | |
4821 } | |
4822 } | |
4823 *tot += len + 1; /* add one for the node that stores the length */ | |
4824 | |
4825 /* | |
4826 * Make a hash key for the node and its siblings, so that we can quickly | |
4827 * find a lookalike node. This must be done after compressing the sibling | |
4828 * list, otherwise the hash key would become invalid by the compression. | |
4829 */ | |
4830 node->wn_u1.hashkey[0] = len; | |
4831 nr = 0; | |
4832 for (np = node; np != NULL; np = np->wn_sibling) | |
4833 { | |
4834 if (np->wn_byte == NUL) | |
4835 /* end node: use wn_flags, wn_region and wn_affixID */ | |
4836 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); | |
4837 else | |
4838 /* byte node: use the byte value and the child pointer */ | |
4839 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); | |
4840 nr = nr * 101 + n; | |
4841 } | |
4842 | |
4843 /* Avoid NUL bytes, it terminates the hash key. */ | |
4844 n = nr & 0xff; | |
4845 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; | |
4846 n = (nr >> 8) & 0xff; | |
4847 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; | |
4848 n = (nr >> 16) & 0xff; | |
4849 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; | |
4850 n = (nr >> 24) & 0xff; | |
4851 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; | |
4852 node->wn_u1.hashkey[5] = NUL; | |
4853 | |
4854 /* Check for CTRL-C pressed now and then. */ | |
4855 fast_breakcheck(); | |
4856 | |
4857 return compressed; | |
4858 } | |
4859 | |
4860 /* | |
4861 * Return TRUE when two nodes have identical siblings and children. | |
4862 */ | |
4863 static int | |
4864 node_equal(wordnode_T *n1, wordnode_T *n2) | |
4865 { | |
4866 wordnode_T *p1; | |
4867 wordnode_T *p2; | |
4868 | |
4869 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; | |
4870 p1 = p1->wn_sibling, p2 = p2->wn_sibling) | |
4871 if (p1->wn_byte != p2->wn_byte | |
4872 || (p1->wn_byte == NUL | |
4873 ? (p1->wn_flags != p2->wn_flags | |
4874 || p1->wn_region != p2->wn_region | |
4875 || p1->wn_affixID != p2->wn_affixID) | |
4876 : (p1->wn_child != p2->wn_child))) | |
4877 break; | |
4878 | |
4879 return p1 == NULL && p2 == NULL; | |
4880 } | |
4881 | |
4882 static int | |
4883 #ifdef __BORLANDC__ | |
4884 _RTLENTRYF | |
4885 #endif | |
4886 rep_compare(const void *s1, const void *s2); | |
4887 | |
4888 /* | |
4889 * Function given to qsort() to sort the REP items on "from" string. | |
4890 */ | |
4891 static int | |
4892 #ifdef __BORLANDC__ | |
4893 _RTLENTRYF | |
4894 #endif | |
4895 rep_compare(const void *s1, const void *s2) | |
4896 { | |
4897 fromto_T *p1 = (fromto_T *)s1; | |
4898 fromto_T *p2 = (fromto_T *)s2; | |
4899 | |
4900 return STRCMP(p1->ft_from, p2->ft_from); | |
4901 } | |
4902 | |
4903 /* | |
4904 * Write the Vim .spl file "fname". | |
4905 * Return FAIL or OK; | |
4906 */ | |
4907 static int | |
4908 write_vim_spell(spellinfo_T *spin, char_u *fname) | |
4909 { | |
4910 FILE *fd; | |
4911 int regionmask; | |
4912 int round; | |
4913 wordnode_T *tree; | |
4914 int nodecount; | |
4915 int i; | |
4916 int l; | |
4917 garray_T *gap; | |
4918 fromto_T *ftp; | |
4919 char_u *p; | |
4920 int rr; | |
4921 int retval = OK; | |
4922 size_t fwv = 1; /* collect return value of fwrite() to avoid | |
4923 warnings from picky compiler */ | |
4924 | |
4925 fd = mch_fopen((char *)fname, "w"); | |
4926 if (fd == NULL) | |
4927 { | |
4928 EMSG2(_(e_notopen), fname); | |
4929 return FAIL; | |
4930 } | |
4931 | |
4932 /* <HEADER>: <fileID> <versionnr> */ | |
4933 /* <fileID> */ | |
4934 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); | |
4935 if (fwv != (size_t)1) | |
4936 /* Catch first write error, don't try writing more. */ | |
4937 goto theend; | |
4938 | |
4939 putc(VIMSPELLVERSION, fd); /* <versionnr> */ | |
4940 | |
4941 /* | |
4942 * <SECTIONS>: <section> ... <sectionend> | |
4943 */ | |
4944 | |
4945 /* SN_INFO: <infotext> */ | |
4946 if (spin->si_info != NULL) | |
4947 { | |
4948 putc(SN_INFO, fd); /* <sectionID> */ | |
4949 putc(0, fd); /* <sectionflags> */ | |
4950 | |
4951 i = (int)STRLEN(spin->si_info); | |
4952 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ | |
4953 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ | |
4954 } | |
4955 | |
4956 /* SN_REGION: <regionname> ... | |
4957 * Write the region names only if there is more than one. */ | |
4958 if (spin->si_region_count > 1) | |
4959 { | |
4960 putc(SN_REGION, fd); /* <sectionID> */ | |
4961 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
4962 l = spin->si_region_count * 2; | |
4963 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
4964 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); | |
4965 /* <regionname> ... */ | |
4966 regionmask = (1 << spin->si_region_count) - 1; | |
4967 } | |
4968 else | |
4969 regionmask = 0; | |
4970 | |
4971 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> | |
4972 * | |
4973 * The table with character flags and the table for case folding. | |
4974 * This makes sure the same characters are recognized as word characters | |
4975 * when generating an when using a spell file. | |
4976 * Skip this for ASCII, the table may conflict with the one used for | |
4977 * 'encoding'. | |
4978 * Also skip this for an .add.spl file, the main spell file must contain | |
4979 * the table (avoids that it conflicts). File is shorter too. | |
4980 */ | |
4981 if (!spin->si_ascii && !spin->si_add) | |
4982 { | |
4983 char_u folchars[128 * 8]; | |
4984 int flags; | |
4985 | |
4986 putc(SN_CHARFLAGS, fd); /* <sectionID> */ | |
4987 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
4988 | |
4989 /* Form the <folchars> string first, we need to know its length. */ | |
4990 l = 0; | |
4991 for (i = 128; i < 256; ++i) | |
4992 { | |
4993 #ifdef FEAT_MBYTE | |
4994 if (has_mbyte) | |
4995 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); | |
4996 else | |
4997 #endif | |
4998 folchars[l++] = spelltab.st_fold[i]; | |
4999 } | |
5000 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ | |
5001 | |
5002 fputc(128, fd); /* <charflagslen> */ | |
5003 for (i = 128; i < 256; ++i) | |
5004 { | |
5005 flags = 0; | |
5006 if (spelltab.st_isw[i]) | |
5007 flags |= CF_WORD; | |
5008 if (spelltab.st_isu[i]) | |
5009 flags |= CF_UPPER; | |
5010 fputc(flags, fd); /* <charflags> */ | |
5011 } | |
5012 | |
5013 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ | |
5014 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ | |
5015 } | |
5016 | |
5017 /* SN_MIDWORD: <midword> */ | |
5018 if (spin->si_midword != NULL) | |
5019 { | |
5020 putc(SN_MIDWORD, fd); /* <sectionID> */ | |
5021 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
5022 | |
5023 i = (int)STRLEN(spin->si_midword); | |
5024 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ | |
5025 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); | |
5026 /* <midword> */ | |
5027 } | |
5028 | |
5029 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ | |
5030 if (spin->si_prefcond.ga_len > 0) | |
5031 { | |
5032 putc(SN_PREFCOND, fd); /* <sectionID> */ | |
5033 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
5034 | |
5035 l = write_spell_prefcond(NULL, &spin->si_prefcond); | |
5036 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
5037 | |
5038 write_spell_prefcond(fd, &spin->si_prefcond); | |
5039 } | |
5040 | |
5041 /* SN_REP: <repcount> <rep> ... | |
5042 * SN_SAL: <salflags> <salcount> <sal> ... | |
5043 * SN_REPSAL: <repcount> <rep> ... */ | |
5044 | |
5045 /* round 1: SN_REP section | |
5046 * round 2: SN_SAL section (unless SN_SOFO is used) | |
5047 * round 3: SN_REPSAL section */ | |
5048 for (round = 1; round <= 3; ++round) | |
5049 { | |
5050 if (round == 1) | |
5051 gap = &spin->si_rep; | |
5052 else if (round == 2) | |
5053 { | |
5054 /* Don't write SN_SAL when using a SN_SOFO section */ | |
5055 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) | |
5056 continue; | |
5057 gap = &spin->si_sal; | |
5058 } | |
5059 else | |
5060 gap = &spin->si_repsal; | |
5061 | |
5062 /* Don't write the section if there are no items. */ | |
5063 if (gap->ga_len == 0) | |
5064 continue; | |
5065 | |
5066 /* Sort the REP/REPSAL items. */ | |
5067 if (round != 2) | |
5068 qsort(gap->ga_data, (size_t)gap->ga_len, | |
5069 sizeof(fromto_T), rep_compare); | |
5070 | |
5071 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); | |
5072 putc(i, fd); /* <sectionID> */ | |
5073 | |
5074 /* This is for making suggestions, section is not required. */ | |
5075 putc(0, fd); /* <sectionflags> */ | |
5076 | |
5077 /* Compute the length of what follows. */ | |
5078 l = 2; /* count <repcount> or <salcount> */ | |
5079 for (i = 0; i < gap->ga_len; ++i) | |
5080 { | |
5081 ftp = &((fromto_T *)gap->ga_data)[i]; | |
5082 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ | |
5083 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ | |
5084 } | |
5085 if (round == 2) | |
5086 ++l; /* count <salflags> */ | |
5087 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
5088 | |
5089 if (round == 2) | |
5090 { | |
5091 i = 0; | |
5092 if (spin->si_followup) | |
5093 i |= SAL_F0LLOWUP; | |
5094 if (spin->si_collapse) | |
5095 i |= SAL_COLLAPSE; | |
5096 if (spin->si_rem_accents) | |
5097 i |= SAL_REM_ACCENTS; | |
5098 putc(i, fd); /* <salflags> */ | |
5099 } | |
5100 | |
5101 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ | |
5102 for (i = 0; i < gap->ga_len; ++i) | |
5103 { | |
5104 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | |
5105 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | |
5106 ftp = &((fromto_T *)gap->ga_data)[i]; | |
5107 for (rr = 1; rr <= 2; ++rr) | |
5108 { | |
5109 p = rr == 1 ? ftp->ft_from : ftp->ft_to; | |
5110 l = (int)STRLEN(p); | |
5111 putc(l, fd); | |
5112 if (l > 0) | |
5113 fwv &= fwrite(p, l, (size_t)1, fd); | |
5114 } | |
5115 } | |
5116 | |
5117 } | |
5118 | |
5119 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
5120 * This is for making suggestions, section is not required. */ | |
5121 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) | |
5122 { | |
5123 putc(SN_SOFO, fd); /* <sectionID> */ | |
5124 putc(0, fd); /* <sectionflags> */ | |
5125 | |
5126 l = (int)STRLEN(spin->si_sofofr); | |
5127 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); | |
5128 /* <sectionlen> */ | |
5129 | |
5130 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ | |
5131 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ | |
5132 | |
5133 l = (int)STRLEN(spin->si_sofoto); | |
5134 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ | |
5135 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ | |
5136 } | |
5137 | |
5138 /* SN_WORDS: <word> ... | |
5139 * This is for making suggestions, section is not required. */ | |
5140 if (spin->si_commonwords.ht_used > 0) | |
5141 { | |
5142 putc(SN_WORDS, fd); /* <sectionID> */ | |
5143 putc(0, fd); /* <sectionflags> */ | |
5144 | |
5145 /* round 1: count the bytes | |
5146 * round 2: write the bytes */ | |
5147 for (round = 1; round <= 2; ++round) | |
5148 { | |
5149 int todo; | |
5150 int len = 0; | |
5151 hashitem_T *hi; | |
5152 | |
5153 todo = (int)spin->si_commonwords.ht_used; | |
5154 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) | |
5155 if (!HASHITEM_EMPTY(hi)) | |
5156 { | |
5157 l = (int)STRLEN(hi->hi_key) + 1; | |
5158 len += l; | |
5159 if (round == 2) /* <word> */ | |
5160 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); | |
5161 --todo; | |
5162 } | |
5163 if (round == 1) | |
5164 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ | |
5165 } | |
5166 } | |
5167 | |
5168 /* SN_MAP: <mapstr> | |
5169 * This is for making suggestions, section is not required. */ | |
5170 if (spin->si_map.ga_len > 0) | |
5171 { | |
5172 putc(SN_MAP, fd); /* <sectionID> */ | |
5173 putc(0, fd); /* <sectionflags> */ | |
5174 l = spin->si_map.ga_len; | |
5175 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
5176 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); | |
5177 /* <mapstr> */ | |
5178 } | |
5179 | |
5180 /* SN_SUGFILE: <timestamp> | |
5181 * This is used to notify that a .sug file may be available and at the | |
5182 * same time allows for checking that a .sug file that is found matches | |
5183 * with this .spl file. That's because the word numbers must be exactly | |
5184 * right. */ | |
5185 if (!spin->si_nosugfile | |
5186 && (spin->si_sal.ga_len > 0 | |
5187 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) | |
5188 { | |
5189 putc(SN_SUGFILE, fd); /* <sectionID> */ | |
5190 putc(0, fd); /* <sectionflags> */ | |
5191 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ | |
5192 | |
5193 /* Set si_sugtime and write it to the file. */ | |
5194 spin->si_sugtime = time(NULL); | |
5195 put_time(fd, spin->si_sugtime); /* <timestamp> */ | |
5196 } | |
5197 | |
5198 /* SN_NOSPLITSUGS: nothing | |
5199 * This is used to notify that no suggestions with word splits are to be | |
5200 * made. */ | |
5201 if (spin->si_nosplitsugs) | |
5202 { | |
5203 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ | |
5204 putc(0, fd); /* <sectionflags> */ | |
5205 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
5206 } | |
5207 | |
5208 /* SN_NOCOMPUNDSUGS: nothing | |
5209 * This is used to notify that no suggestions with compounds are to be | |
5210 * made. */ | |
5211 if (spin->si_nocompoundsugs) | |
5212 { | |
5213 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ | |
5214 putc(0, fd); /* <sectionflags> */ | |
5215 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
5216 } | |
5217 | |
5218 /* SN_COMPOUND: compound info. | |
5219 * We don't mark it required, when not supported all compound words will | |
5220 * be bad words. */ | |
5221 if (spin->si_compflags != NULL) | |
5222 { | |
5223 putc(SN_COMPOUND, fd); /* <sectionID> */ | |
5224 putc(0, fd); /* <sectionflags> */ | |
5225 | |
5226 l = (int)STRLEN(spin->si_compflags); | |
5227 for (i = 0; i < spin->si_comppat.ga_len; ++i) | |
5228 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; | |
5229 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ | |
5230 | |
5231 putc(spin->si_compmax, fd); /* <compmax> */ | |
5232 putc(spin->si_compminlen, fd); /* <compminlen> */ | |
5233 putc(spin->si_compsylmax, fd); /* <compsylmax> */ | |
5234 putc(0, fd); /* for Vim 7.0b compatibility */ | |
5235 putc(spin->si_compoptions, fd); /* <compoptions> */ | |
5236 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); | |
5237 /* <comppatcount> */ | |
5238 for (i = 0; i < spin->si_comppat.ga_len; ++i) | |
5239 { | |
5240 p = ((char_u **)(spin->si_comppat.ga_data))[i]; | |
5241 putc((int)STRLEN(p), fd); /* <comppatlen> */ | |
5242 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); | |
5243 /* <comppattext> */ | |
5244 } | |
5245 /* <compflags> */ | |
5246 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), | |
5247 (size_t)1, fd); | |
5248 } | |
5249 | |
5250 /* SN_NOBREAK: NOBREAK flag */ | |
5251 if (spin->si_nobreak) | |
5252 { | |
5253 putc(SN_NOBREAK, fd); /* <sectionID> */ | |
5254 putc(0, fd); /* <sectionflags> */ | |
5255 | |
5256 /* It's empty, the presence of the section flags the feature. */ | |
5257 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
5258 } | |
5259 | |
5260 /* SN_SYLLABLE: syllable info. | |
5261 * We don't mark it required, when not supported syllables will not be | |
5262 * counted. */ | |
5263 if (spin->si_syllable != NULL) | |
5264 { | |
5265 putc(SN_SYLLABLE, fd); /* <sectionID> */ | |
5266 putc(0, fd); /* <sectionflags> */ | |
5267 | |
5268 l = (int)STRLEN(spin->si_syllable); | |
5269 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
5270 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); | |
5271 /* <syllable> */ | |
5272 } | |
5273 | |
5274 /* end of <SECTIONS> */ | |
5275 putc(SN_END, fd); /* <sectionend> */ | |
5276 | |
5277 | |
5278 /* | |
5279 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> | |
5280 */ | |
5281 spin->si_memtot = 0; | |
5282 for (round = 1; round <= 3; ++round) | |
5283 { | |
5284 if (round == 1) | |
5285 tree = spin->si_foldroot->wn_sibling; | |
5286 else if (round == 2) | |
5287 tree = spin->si_keeproot->wn_sibling; | |
5288 else | |
5289 tree = spin->si_prefroot->wn_sibling; | |
5290 | |
5291 /* Clear the index and wnode fields in the tree. */ | |
5292 clear_node(tree); | |
5293 | |
5294 /* Count the number of nodes. Needed to be able to allocate the | |
5295 * memory when reading the nodes. Also fills in index for shared | |
5296 * nodes. */ | |
5297 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); | |
5298 | |
5299 /* number of nodes in 4 bytes */ | |
5300 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | |
5301 spin->si_memtot += nodecount + nodecount * sizeof(int); | |
5302 | |
5303 /* Write the nodes. */ | |
5304 (void)put_node(fd, tree, 0, regionmask, round == 3); | |
5305 } | |
5306 | |
5307 /* Write another byte to check for errors (file system full). */ | |
5308 if (putc(0, fd) == EOF) | |
5309 retval = FAIL; | |
5310 theend: | |
5311 if (fclose(fd) == EOF) | |
5312 retval = FAIL; | |
5313 | |
5314 if (fwv != (size_t)1) | |
5315 retval = FAIL; | |
5316 if (retval == FAIL) | |
5317 EMSG(_(e_write)); | |
5318 | |
5319 return retval; | |
5320 } | |
5321 | |
5322 /* | |
5323 * Clear the index and wnode fields of "node", it siblings and its | |
5324 * children. This is needed because they are a union with other items to save | |
5325 * space. | |
5326 */ | |
5327 static void | |
5328 clear_node(wordnode_T *node) | |
5329 { | |
5330 wordnode_T *np; | |
5331 | |
5332 if (node != NULL) | |
5333 for (np = node; np != NULL; np = np->wn_sibling) | |
5334 { | |
5335 np->wn_u1.index = 0; | |
5336 np->wn_u2.wnode = NULL; | |
5337 | |
5338 if (np->wn_byte != NUL) | |
5339 clear_node(np->wn_child); | |
5340 } | |
5341 } | |
5342 | |
5343 | |
5344 /* | |
5345 * Dump a word tree at node "node". | |
5346 * | |
5347 * This first writes the list of possible bytes (siblings). Then for each | |
5348 * byte recursively write the children. | |
5349 * | |
5350 * NOTE: The code here must match the code in read_tree_node(), since | |
5351 * assumptions are made about the indexes (so that we don't have to write them | |
5352 * in the file). | |
5353 * | |
5354 * Returns the number of nodes used. | |
5355 */ | |
5356 static int | |
5357 put_node( | |
5358 FILE *fd, /* NULL when only counting */ | |
5359 wordnode_T *node, | |
5360 int idx, | |
5361 int regionmask, | |
5362 int prefixtree) /* TRUE for PREFIXTREE */ | |
5363 { | |
5364 int newindex = idx; | |
5365 int siblingcount = 0; | |
5366 wordnode_T *np; | |
5367 int flags; | |
5368 | |
5369 /* If "node" is zero the tree is empty. */ | |
5370 if (node == NULL) | |
5371 return 0; | |
5372 | |
5373 /* Store the index where this node is written. */ | |
5374 node->wn_u1.index = idx; | |
5375 | |
5376 /* Count the number of siblings. */ | |
5377 for (np = node; np != NULL; np = np->wn_sibling) | |
5378 ++siblingcount; | |
5379 | |
5380 /* Write the sibling count. */ | |
5381 if (fd != NULL) | |
5382 putc(siblingcount, fd); /* <siblingcount> */ | |
5383 | |
5384 /* Write each sibling byte and optionally extra info. */ | |
5385 for (np = node; np != NULL; np = np->wn_sibling) | |
5386 { | |
5387 if (np->wn_byte == 0) | |
5388 { | |
5389 if (fd != NULL) | |
5390 { | |
5391 /* For a NUL byte (end of word) write the flags etc. */ | |
5392 if (prefixtree) | |
5393 { | |
5394 /* In PREFIXTREE write the required affixID and the | |
5395 * associated condition nr (stored in wn_region). The | |
5396 * byte value is misused to store the "rare" and "not | |
5397 * combining" flags */ | |
5398 if (np->wn_flags == (short_u)PFX_FLAGS) | |
5399 putc(BY_NOFLAGS, fd); /* <byte> */ | |
5400 else | |
5401 { | |
5402 putc(BY_FLAGS, fd); /* <byte> */ | |
5403 putc(np->wn_flags, fd); /* <pflags> */ | |
5404 } | |
5405 putc(np->wn_affixID, fd); /* <affixID> */ | |
5406 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ | |
5407 } | |
5408 else | |
5409 { | |
5410 /* For word trees we write the flag/region items. */ | |
5411 flags = np->wn_flags; | |
5412 if (regionmask != 0 && np->wn_region != regionmask) | |
5413 flags |= WF_REGION; | |
5414 if (np->wn_affixID != 0) | |
5415 flags |= WF_AFX; | |
5416 if (flags == 0) | |
5417 { | |
5418 /* word without flags or region */ | |
5419 putc(BY_NOFLAGS, fd); /* <byte> */ | |
5420 } | |
5421 else | |
5422 { | |
5423 if (np->wn_flags >= 0x100) | |
5424 { | |
5425 putc(BY_FLAGS2, fd); /* <byte> */ | |
5426 putc(flags, fd); /* <flags> */ | |
5427 putc((unsigned)flags >> 8, fd); /* <flags2> */ | |
5428 } | |
5429 else | |
5430 { | |
5431 putc(BY_FLAGS, fd); /* <byte> */ | |
5432 putc(flags, fd); /* <flags> */ | |
5433 } | |
5434 if (flags & WF_REGION) | |
5435 putc(np->wn_region, fd); /* <region> */ | |
5436 if (flags & WF_AFX) | |
5437 putc(np->wn_affixID, fd); /* <affixID> */ | |
5438 } | |
5439 } | |
5440 } | |
5441 } | |
5442 else | |
5443 { | |
5444 if (np->wn_child->wn_u1.index != 0 | |
5445 && np->wn_child->wn_u2.wnode != node) | |
5446 { | |
5447 /* The child is written elsewhere, write the reference. */ | |
5448 if (fd != NULL) | |
5449 { | |
5450 putc(BY_INDEX, fd); /* <byte> */ | |
5451 /* <nodeidx> */ | |
5452 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); | |
5453 } | |
5454 } | |
5455 else if (np->wn_child->wn_u2.wnode == NULL) | |
5456 /* We will write the child below and give it an index. */ | |
5457 np->wn_child->wn_u2.wnode = node; | |
5458 | |
5459 if (fd != NULL) | |
5460 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ | |
5461 { | |
5462 EMSG(_(e_write)); | |
5463 return 0; | |
5464 } | |
5465 } | |
5466 } | |
5467 | |
5468 /* Space used in the array when reading: one for each sibling and one for | |
5469 * the count. */ | |
5470 newindex += siblingcount + 1; | |
5471 | |
5472 /* Recursively dump the children of each sibling. */ | |
5473 for (np = node; np != NULL; np = np->wn_sibling) | |
5474 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) | |
5475 newindex = put_node(fd, np->wn_child, newindex, regionmask, | |
5476 prefixtree); | |
5477 | |
5478 return newindex; | |
5479 } | |
5480 | |
5481 | |
5482 /* | |
5483 * ":mkspell [-ascii] outfile infile ..." | |
5484 * ":mkspell [-ascii] addfile" | |
5485 */ | |
5486 void | |
5487 ex_mkspell(exarg_T *eap) | |
5488 { | |
5489 int fcount; | |
5490 char_u **fnames; | |
5491 char_u *arg = eap->arg; | |
5492 int ascii = FALSE; | |
5493 | |
5494 if (STRNCMP(arg, "-ascii", 6) == 0) | |
5495 { | |
5496 ascii = TRUE; | |
5497 arg = skipwhite(arg + 6); | |
5498 } | |
5499 | |
5500 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ | |
5501 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) | |
5502 { | |
5503 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); | |
5504 FreeWild(fcount, fnames); | |
5505 } | |
5506 } | |
5507 | |
5508 /* | |
5509 * Create the .sug file. | |
5510 * Uses the soundfold info in "spin". | |
5511 * Writes the file with the name "wfname", with ".spl" changed to ".sug". | |
5512 */ | |
5513 static void | |
5514 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) | |
5515 { | |
5516 char_u *fname = NULL; | |
5517 int len; | |
5518 slang_T *slang; | |
5519 int free_slang = FALSE; | |
5520 | |
5521 /* | |
5522 * Read back the .spl file that was written. This fills the required | |
5523 * info for soundfolding. This also uses less memory than the | |
5524 * pointer-linked version of the trie. And it avoids having two versions | |
5525 * of the code for the soundfolding stuff. | |
5526 * It might have been done already by spell_reload_one(). | |
5527 */ | |
5528 for (slang = first_lang; slang != NULL; slang = slang->sl_next) | |
5529 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) | |
5530 break; | |
5531 if (slang == NULL) | |
5532 { | |
5533 spell_message(spin, (char_u *)_("Reading back spell file...")); | |
5534 slang = spell_load_file(wfname, NULL, NULL, FALSE); | |
5535 if (slang == NULL) | |
5536 return; | |
5537 free_slang = TRUE; | |
5538 } | |
5539 | |
5540 /* | |
5541 * Clear the info in "spin" that is used. | |
5542 */ | |
5543 spin->si_blocks = NULL; | |
5544 spin->si_blocks_cnt = 0; | |
5545 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ | |
5546 spin->si_free_count = 0; | |
5547 spin->si_first_free = NULL; | |
5548 spin->si_foldwcount = 0; | |
5549 | |
5550 /* | |
5551 * Go through the trie of good words, soundfold each word and add it to | |
5552 * the soundfold trie. | |
5553 */ | |
5554 spell_message(spin, (char_u *)_("Performing soundfolding...")); | |
5555 if (sug_filltree(spin, slang) == FAIL) | |
5556 goto theend; | |
5557 | |
5558 /* | |
5559 * Create the table which links each soundfold word with a list of the | |
5560 * good words it may come from. Creates buffer "spin->si_spellbuf". | |
5561 * This also removes the wordnr from the NUL byte entries to make | |
5562 * compression possible. | |
5563 */ | |
5564 if (sug_maketable(spin) == FAIL) | |
5565 goto theend; | |
5566 | |
5567 smsg((char_u *)_("Number of words after soundfolding: %ld"), | |
5568 (long)spin->si_spellbuf->b_ml.ml_line_count); | |
5569 | |
5570 /* | |
5571 * Compress the soundfold trie. | |
5572 */ | |
5573 spell_message(spin, (char_u *)_(msg_compressing)); | |
5574 wordtree_compress(spin, spin->si_foldroot); | |
5575 | |
5576 /* | |
5577 * Write the .sug file. | |
5578 * Make the file name by changing ".spl" to ".sug". | |
5579 */ | |
5580 fname = alloc(MAXPATHL); | |
5581 if (fname == NULL) | |
5582 goto theend; | |
5583 vim_strncpy(fname, wfname, MAXPATHL - 1); | |
5584 len = (int)STRLEN(fname); | |
5585 fname[len - 2] = 'u'; | |
5586 fname[len - 1] = 'g'; | |
5587 sug_write(spin, fname); | |
5588 | |
5589 theend: | |
5590 vim_free(fname); | |
5591 if (free_slang) | |
5592 slang_free(slang); | |
5593 free_blocks(spin->si_blocks); | |
5594 close_spellbuf(spin->si_spellbuf); | |
5595 } | |
5596 | |
5597 /* | |
5598 * Build the soundfold trie for language "slang". | |
5599 */ | |
5600 static int | |
5601 sug_filltree(spellinfo_T *spin, slang_T *slang) | |
5602 { | |
5603 char_u *byts; | |
5604 idx_T *idxs; | |
5605 int depth; | |
5606 idx_T arridx[MAXWLEN]; | |
5607 int curi[MAXWLEN]; | |
5608 char_u tword[MAXWLEN]; | |
5609 char_u tsalword[MAXWLEN]; | |
5610 int c; | |
5611 idx_T n; | |
5612 unsigned words_done = 0; | |
5613 int wordcount[MAXWLEN]; | |
5614 | |
5615 /* We use si_foldroot for the soundfolded trie. */ | |
5616 spin->si_foldroot = wordtree_alloc(spin); | |
5617 if (spin->si_foldroot == NULL) | |
5618 return FAIL; | |
5619 | |
5620 /* let tree_add_word() know we're adding to the soundfolded tree */ | |
5621 spin->si_sugtree = TRUE; | |
5622 | |
5623 /* | |
5624 * Go through the whole case-folded tree, soundfold each word and put it | |
5625 * in the trie. | |
5626 */ | |
5627 byts = slang->sl_fbyts; | |
5628 idxs = slang->sl_fidxs; | |
5629 | |
5630 arridx[0] = 0; | |
5631 curi[0] = 1; | |
5632 wordcount[0] = 0; | |
5633 | |
5634 depth = 0; | |
5635 while (depth >= 0 && !got_int) | |
5636 { | |
5637 if (curi[depth] > byts[arridx[depth]]) | |
5638 { | |
5639 /* Done all bytes at this node, go up one level. */ | |
5640 idxs[arridx[depth]] = wordcount[depth]; | |
5641 if (depth > 0) | |
5642 wordcount[depth - 1] += wordcount[depth]; | |
5643 | |
5644 --depth; | |
5645 line_breakcheck(); | |
5646 } | |
5647 else | |
5648 { | |
5649 | |
5650 /* Do one more byte at this node. */ | |
5651 n = arridx[depth] + curi[depth]; | |
5652 ++curi[depth]; | |
5653 | |
5654 c = byts[n]; | |
5655 if (c == 0) | |
5656 { | |
5657 /* Sound-fold the word. */ | |
5658 tword[depth] = NUL; | |
5659 spell_soundfold(slang, tword, TRUE, tsalword); | |
5660 | |
5661 /* We use the "flags" field for the MSB of the wordnr, | |
5662 * "region" for the LSB of the wordnr. */ | |
5663 if (tree_add_word(spin, tsalword, spin->si_foldroot, | |
5664 words_done >> 16, words_done & 0xffff, | |
5665 0) == FAIL) | |
5666 return FAIL; | |
5667 | |
5668 ++words_done; | |
5669 ++wordcount[depth]; | |
5670 | |
5671 /* Reset the block count each time to avoid compression | |
5672 * kicking in. */ | |
5673 spin->si_blocks_cnt = 0; | |
5674 | |
5675 /* Skip over any other NUL bytes (same word with different | |
5676 * flags). */ | |
5677 while (byts[n + 1] == 0) | |
5678 { | |
5679 ++n; | |
5680 ++curi[depth]; | |
5681 } | |
5682 } | |
5683 else | |
5684 { | |
5685 /* Normal char, go one level deeper. */ | |
5686 tword[depth++] = c; | |
5687 arridx[depth] = idxs[n]; | |
5688 curi[depth] = 1; | |
5689 wordcount[depth] = 0; | |
5690 } | |
5691 } | |
5692 } | |
5693 | |
5694 smsg((char_u *)_("Total number of words: %d"), words_done); | |
5695 | |
5696 return OK; | |
5697 } | |
5698 | |
5699 /* | |
5700 * Make the table that links each word in the soundfold trie to the words it | |
5701 * can be produced from. | |
5702 * This is not unlike lines in a file, thus use a memfile to be able to access | |
5703 * the table efficiently. | |
5704 * Returns FAIL when out of memory. | |
5705 */ | |
5706 static int | |
5707 sug_maketable(spellinfo_T *spin) | |
5708 { | |
5709 garray_T ga; | |
5710 int res = OK; | |
5711 | |
5712 /* Allocate a buffer, open a memline for it and create the swap file | |
5713 * (uses a temp file, not a .swp file). */ | |
5714 spin->si_spellbuf = open_spellbuf(); | |
5715 if (spin->si_spellbuf == NULL) | |
5716 return FAIL; | |
5717 | |
5718 /* Use a buffer to store the line info, avoids allocating many small | |
5719 * pieces of memory. */ | |
5720 ga_init2(&ga, 1, 100); | |
5721 | |
5722 /* recursively go through the tree */ | |
5723 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) | |
5724 res = FAIL; | |
5725 | |
5726 ga_clear(&ga); | |
5727 return res; | |
5728 } | |
5729 | |
5730 /* | |
5731 * Fill the table for one node and its children. | |
5732 * Returns the wordnr at the start of the node. | |
5733 * Returns -1 when out of memory. | |
5734 */ | |
5735 static int | |
5736 sug_filltable( | |
5737 spellinfo_T *spin, | |
5738 wordnode_T *node, | |
5739 int startwordnr, | |
5740 garray_T *gap) /* place to store line of numbers */ | |
5741 { | |
5742 wordnode_T *p, *np; | |
5743 int wordnr = startwordnr; | |
5744 int nr; | |
5745 int prev_nr; | |
5746 | |
5747 for (p = node; p != NULL; p = p->wn_sibling) | |
5748 { | |
5749 if (p->wn_byte == NUL) | |
5750 { | |
5751 gap->ga_len = 0; | |
5752 prev_nr = 0; | |
5753 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) | |
5754 { | |
5755 if (ga_grow(gap, 10) == FAIL) | |
5756 return -1; | |
5757 | |
5758 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); | |
5759 /* Compute the offset from the previous nr and store the | |
5760 * offset in a way that it takes a minimum number of bytes. | |
5761 * It's a bit like utf-8, but without the need to mark | |
5762 * following bytes. */ | |
5763 nr -= prev_nr; | |
5764 prev_nr += nr; | |
5765 gap->ga_len += offset2bytes(nr, | |
5766 (char_u *)gap->ga_data + gap->ga_len); | |
5767 } | |
5768 | |
5769 /* add the NUL byte */ | |
5770 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; | |
5771 | |
5772 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, | |
5773 gap->ga_data, gap->ga_len, TRUE) == FAIL) | |
5774 return -1; | |
5775 ++wordnr; | |
5776 | |
5777 /* Remove extra NUL entries, we no longer need them. We don't | |
5778 * bother freeing the nodes, the won't be reused anyway. */ | |
5779 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) | |
5780 p->wn_sibling = p->wn_sibling->wn_sibling; | |
5781 | |
5782 /* Clear the flags on the remaining NUL node, so that compression | |
5783 * works a lot better. */ | |
5784 p->wn_flags = 0; | |
5785 p->wn_region = 0; | |
5786 } | |
5787 else | |
5788 { | |
5789 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); | |
5790 if (wordnr == -1) | |
5791 return -1; | |
5792 } | |
5793 } | |
5794 return wordnr; | |
5795 } | |
5796 | |
5797 /* | |
5798 * Convert an offset into a minimal number of bytes. | |
5799 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL | |
5800 * bytes. | |
5801 */ | |
5802 static int | |
5803 offset2bytes(int nr, char_u *buf) | |
5804 { | |
5805 int rem; | |
5806 int b1, b2, b3, b4; | |
5807 | |
5808 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ | |
5809 b1 = nr % 255 + 1; | |
5810 rem = nr / 255; | |
5811 b2 = rem % 255 + 1; | |
5812 rem = rem / 255; | |
5813 b3 = rem % 255 + 1; | |
5814 b4 = rem / 255 + 1; | |
5815 | |
5816 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ | |
5817 { | |
5818 buf[0] = 0xe0 + b4; | |
5819 buf[1] = b3; | |
5820 buf[2] = b2; | |
5821 buf[3] = b1; | |
5822 return 4; | |
5823 } | |
5824 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ | |
5825 { | |
5826 buf[0] = 0xc0 + b3; | |
5827 buf[1] = b2; | |
5828 buf[2] = b1; | |
5829 return 3; | |
5830 } | |
5831 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ | |
5832 { | |
5833 buf[0] = 0x80 + b2; | |
5834 buf[1] = b1; | |
5835 return 2; | |
5836 } | |
5837 /* 1 byte */ | |
5838 buf[0] = b1; | |
5839 return 1; | |
5840 } | |
5841 | |
5842 /* | |
5843 * Write the .sug file in "fname". | |
5844 */ | |
5845 static void | |
5846 sug_write(spellinfo_T *spin, char_u *fname) | |
5847 { | |
5848 FILE *fd; | |
5849 wordnode_T *tree; | |
5850 int nodecount; | |
5851 int wcount; | |
5852 char_u *line; | |
5853 linenr_T lnum; | |
5854 int len; | |
5855 | |
5856 /* Create the file. Note that an existing file is silently overwritten! */ | |
5857 fd = mch_fopen((char *)fname, "w"); | |
5858 if (fd == NULL) | |
5859 { | |
5860 EMSG2(_(e_notopen), fname); | |
5861 return; | |
5862 } | |
5863 | |
5864 vim_snprintf((char *)IObuff, IOSIZE, | |
5865 _("Writing suggestion file %s ..."), fname); | |
5866 spell_message(spin, IObuff); | |
5867 | |
5868 /* | |
5869 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
5870 */ | |
5871 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ | |
5872 { | |
5873 EMSG(_(e_write)); | |
5874 goto theend; | |
5875 } | |
5876 putc(VIMSUGVERSION, fd); /* <versionnr> */ | |
5877 | |
5878 /* Write si_sugtime to the file. */ | |
5879 put_time(fd, spin->si_sugtime); /* <timestamp> */ | |
5880 | |
5881 /* | |
5882 * <SUGWORDTREE> | |
5883 */ | |
5884 spin->si_memtot = 0; | |
5885 tree = spin->si_foldroot->wn_sibling; | |
5886 | |
5887 /* Clear the index and wnode fields in the tree. */ | |
5888 clear_node(tree); | |
5889 | |
5890 /* Count the number of nodes. Needed to be able to allocate the | |
5891 * memory when reading the nodes. Also fills in index for shared | |
5892 * nodes. */ | |
5893 nodecount = put_node(NULL, tree, 0, 0, FALSE); | |
5894 | |
5895 /* number of nodes in 4 bytes */ | |
5896 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | |
5897 spin->si_memtot += nodecount + nodecount * sizeof(int); | |
5898 | |
5899 /* Write the nodes. */ | |
5900 (void)put_node(fd, tree, 0, 0, FALSE); | |
5901 | |
5902 /* | |
5903 * <SUGTABLE>: <sugwcount> <sugline> ... | |
5904 */ | |
5905 wcount = spin->si_spellbuf->b_ml.ml_line_count; | |
5906 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ | |
5907 | |
5908 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) | |
5909 { | |
5910 /* <sugline>: <sugnr> ... NUL */ | |
5911 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); | |
5912 len = (int)STRLEN(line) + 1; | |
5913 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) | |
5914 { | |
5915 EMSG(_(e_write)); | |
5916 goto theend; | |
5917 } | |
5918 spin->si_memtot += len; | |
5919 } | |
5920 | |
5921 /* Write another byte to check for errors. */ | |
5922 if (putc(0, fd) == EOF) | |
5923 EMSG(_(e_write)); | |
5924 | |
5925 vim_snprintf((char *)IObuff, IOSIZE, | |
5926 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); | |
5927 spell_message(spin, IObuff); | |
5928 | |
5929 theend: | |
5930 /* close the file */ | |
5931 fclose(fd); | |
5932 } | |
5933 | |
5934 | |
5935 /* | |
5936 * Create a Vim spell file from one or more word lists. | |
5937 * "fnames[0]" is the output file name. | |
5938 * "fnames[fcount - 1]" is the last input file name. | |
5939 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name | |
5940 * and ".spl" is appended to make the output file name. | |
5941 */ | |
5942 void | |
5943 mkspell( | |
5944 int fcount, | |
5945 char_u **fnames, | |
5946 int ascii, /* -ascii argument given */ | |
5947 int over_write, /* overwrite existing output file */ | |
5948 int added_word) /* invoked through "zg" */ | |
5949 { | |
5950 char_u *fname = NULL; | |
5951 char_u *wfname; | |
5952 char_u **innames; | |
5953 int incount; | |
5954 afffile_T *(afile[8]); | |
5955 int i; | |
5956 int len; | |
5957 stat_T st; | |
5958 int error = FALSE; | |
5959 spellinfo_T spin; | |
5960 | |
5961 vim_memset(&spin, 0, sizeof(spin)); | |
5962 spin.si_verbose = !added_word; | |
5963 spin.si_ascii = ascii; | |
5964 spin.si_followup = TRUE; | |
5965 spin.si_rem_accents = TRUE; | |
5966 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); | |
5967 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); | |
5968 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); | |
5969 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); | |
5970 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); | |
5971 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); | |
5972 hash_init(&spin.si_commonwords); | |
5973 spin.si_newcompID = 127; /* start compound ID at first maximum */ | |
5974 | |
5975 /* default: fnames[0] is output file, following are input files */ | |
5976 innames = &fnames[1]; | |
5977 incount = fcount - 1; | |
5978 | |
5979 wfname = alloc(MAXPATHL); | |
5980 if (wfname == NULL) | |
5981 return; | |
5982 | |
5983 if (fcount >= 1) | |
5984 { | |
5985 len = (int)STRLEN(fnames[0]); | |
5986 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) | |
5987 { | |
5988 /* For ":mkspell path/en.latin1.add" output file is | |
5989 * "path/en.latin1.add.spl". */ | |
5990 innames = &fnames[0]; | |
5991 incount = 1; | |
5992 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); | |
5993 } | |
5994 else if (fcount == 1) | |
5995 { | |
5996 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ | |
5997 innames = &fnames[0]; | |
5998 incount = 1; | |
5999 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, | |
6000 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | |
6001 } | |
6002 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) | |
6003 { | |
6004 /* Name ends in ".spl", use as the file name. */ | |
6005 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); | |
6006 } | |
6007 else | |
6008 /* Name should be language, make the file name from it. */ | |
6009 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, | |
6010 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | |
6011 | |
6012 /* Check for .ascii.spl. */ | |
6013 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) | |
6014 spin.si_ascii = TRUE; | |
6015 | |
6016 /* Check for .add.spl. */ | |
6017 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) | |
6018 spin.si_add = TRUE; | |
6019 } | |
6020 | |
6021 if (incount <= 0) | |
6022 EMSG(_(e_invarg)); /* need at least output and input names */ | |
6023 else if (vim_strchr(gettail(wfname), '_') != NULL) | |
6024 EMSG(_("E751: Output file name must not have region name")); | |
6025 else if (incount > 8) | |
6026 EMSG(_("E754: Only up to 8 regions supported")); | |
6027 else | |
6028 { | |
6029 /* Check for overwriting before doing things that may take a lot of | |
6030 * time. */ | |
6031 if (!over_write && mch_stat((char *)wfname, &st) >= 0) | |
6032 { | |
6033 EMSG(_(e_exists)); | |
6034 goto theend; | |
6035 } | |
6036 if (mch_isdir(wfname)) | |
6037 { | |
6038 EMSG2(_(e_isadir2), wfname); | |
6039 goto theend; | |
6040 } | |
6041 | |
6042 fname = alloc(MAXPATHL); | |
6043 if (fname == NULL) | |
6044 goto theend; | |
6045 | |
6046 /* | |
6047 * Init the aff and dic pointers. | |
6048 * Get the region names if there are more than 2 arguments. | |
6049 */ | |
6050 for (i = 0; i < incount; ++i) | |
6051 { | |
6052 afile[i] = NULL; | |
6053 | |
6054 if (incount > 1) | |
6055 { | |
6056 len = (int)STRLEN(innames[i]); | |
6057 if (STRLEN(gettail(innames[i])) < 5 | |
6058 || innames[i][len - 3] != '_') | |
6059 { | |
6060 EMSG2(_("E755: Invalid region in %s"), innames[i]); | |
6061 goto theend; | |
6062 } | |
6063 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); | |
6064 spin.si_region_name[i * 2 + 1] = | |
6065 TOLOWER_ASC(innames[i][len - 1]); | |
6066 } | |
6067 } | |
6068 spin.si_region_count = incount; | |
6069 | |
6070 spin.si_foldroot = wordtree_alloc(&spin); | |
6071 spin.si_keeproot = wordtree_alloc(&spin); | |
6072 spin.si_prefroot = wordtree_alloc(&spin); | |
6073 if (spin.si_foldroot == NULL | |
6074 || spin.si_keeproot == NULL | |
6075 || spin.si_prefroot == NULL) | |
6076 { | |
6077 free_blocks(spin.si_blocks); | |
6078 goto theend; | |
6079 } | |
6080 | |
6081 /* When not producing a .add.spl file clear the character table when | |
6082 * we encounter one in the .aff file. This means we dump the current | |
6083 * one in the .spl file if the .aff file doesn't define one. That's | |
6084 * better than guessing the contents, the table will match a | |
6085 * previously loaded spell file. */ | |
6086 if (!spin.si_add) | |
6087 spin.si_clear_chartab = TRUE; | |
6088 | |
6089 /* | |
6090 * Read all the .aff and .dic files. | |
6091 * Text is converted to 'encoding'. | |
6092 * Words are stored in the case-folded and keep-case trees. | |
6093 */ | |
6094 for (i = 0; i < incount && !error; ++i) | |
6095 { | |
6096 spin.si_conv.vc_type = CONV_NONE; | |
6097 spin.si_region = 1 << i; | |
6098 | |
6099 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); | |
6100 if (mch_stat((char *)fname, &st) >= 0) | |
6101 { | |
6102 /* Read the .aff file. Will init "spin->si_conv" based on the | |
6103 * "SET" line. */ | |
6104 afile[i] = spell_read_aff(&spin, fname); | |
6105 if (afile[i] == NULL) | |
6106 error = TRUE; | |
6107 else | |
6108 { | |
6109 /* Read the .dic file and store the words in the trees. */ | |
6110 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", | |
6111 innames[i]); | |
6112 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) | |
6113 error = TRUE; | |
6114 } | |
6115 } | |
6116 else | |
6117 { | |
6118 /* No .aff file, try reading the file as a word list. Store | |
6119 * the words in the trees. */ | |
6120 if (spell_read_wordfile(&spin, innames[i]) == FAIL) | |
6121 error = TRUE; | |
6122 } | |
6123 | |
6124 #ifdef FEAT_MBYTE | |
6125 /* Free any conversion stuff. */ | |
6126 convert_setup(&spin.si_conv, NULL, NULL); | |
6127 #endif | |
6128 } | |
6129 | |
6130 if (spin.si_compflags != NULL && spin.si_nobreak) | |
6131 MSG(_("Warning: both compounding and NOBREAK specified")); | |
6132 | |
6133 if (!error && !got_int) | |
6134 { | |
6135 /* | |
6136 * Combine tails in the tree. | |
6137 */ | |
6138 spell_message(&spin, (char_u *)_(msg_compressing)); | |
6139 wordtree_compress(&spin, spin.si_foldroot); | |
6140 wordtree_compress(&spin, spin.si_keeproot); | |
6141 wordtree_compress(&spin, spin.si_prefroot); | |
6142 } | |
6143 | |
6144 if (!error && !got_int) | |
6145 { | |
6146 /* | |
6147 * Write the info in the spell file. | |
6148 */ | |
6149 vim_snprintf((char *)IObuff, IOSIZE, | |
6150 _("Writing spell file %s ..."), wfname); | |
6151 spell_message(&spin, IObuff); | |
6152 | |
6153 error = write_vim_spell(&spin, wfname) == FAIL; | |
6154 | |
6155 spell_message(&spin, (char_u *)_("Done!")); | |
6156 vim_snprintf((char *)IObuff, IOSIZE, | |
6157 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); | |
6158 spell_message(&spin, IObuff); | |
6159 | |
6160 /* | |
6161 * If the file is loaded need to reload it. | |
6162 */ | |
6163 if (!error) | |
6164 spell_reload_one(wfname, added_word); | |
6165 } | |
6166 | |
6167 /* Free the allocated memory. */ | |
6168 ga_clear(&spin.si_rep); | |
6169 ga_clear(&spin.si_repsal); | |
6170 ga_clear(&spin.si_sal); | |
6171 ga_clear(&spin.si_map); | |
6172 ga_clear(&spin.si_comppat); | |
6173 ga_clear(&spin.si_prefcond); | |
6174 hash_clear_all(&spin.si_commonwords, 0); | |
6175 | |
6176 /* Free the .aff file structures. */ | |
6177 for (i = 0; i < incount; ++i) | |
6178 if (afile[i] != NULL) | |
6179 spell_free_aff(afile[i]); | |
6180 | |
6181 /* Free all the bits and pieces at once. */ | |
6182 free_blocks(spin.si_blocks); | |
6183 | |
6184 /* | |
6185 * If there is soundfolding info and no NOSUGFILE item create the | |
6186 * .sug file with the soundfolded word trie. | |
6187 */ | |
6188 if (spin.si_sugtime != 0 && !error && !got_int) | |
6189 spell_make_sugfile(&spin, wfname); | |
6190 | |
6191 } | |
6192 | |
6193 theend: | |
6194 vim_free(fname); | |
6195 vim_free(wfname); | |
6196 } | |
6197 | |
6198 /* | |
6199 * Display a message for spell file processing when 'verbose' is set or using | |
6200 * ":mkspell". "str" can be IObuff. | |
6201 */ | |
6202 static void | |
6203 spell_message(spellinfo_T *spin, char_u *str) | |
6204 { | |
6205 if (spin->si_verbose || p_verbose > 2) | |
6206 { | |
6207 if (!spin->si_verbose) | |
6208 verbose_enter(); | |
6209 MSG(str); | |
6210 out_flush(); | |
6211 if (!spin->si_verbose) | |
6212 verbose_leave(); | |
6213 } | |
6214 } | |
6215 | |
6216 /* | |
6217 * ":[count]spellgood {word}" | |
6218 * ":[count]spellwrong {word}" | |
6219 * ":[count]spellundo {word}" | |
6220 */ | |
6221 void | |
6222 ex_spell(exarg_T *eap) | |
6223 { | |
6224 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, | |
6225 eap->forceit ? 0 : (int)eap->line2, | |
6226 eap->cmdidx == CMD_spellundo); | |
6227 } | |
6228 | |
6229 /* | |
6230 * Add "word[len]" to 'spellfile' as a good or bad word. | |
6231 */ | |
6232 void | |
6233 spell_add_word( | |
6234 char_u *word, | |
6235 int len, | |
6236 int bad, | |
6237 int idx, /* "zG" and "zW": zero, otherwise index in | |
6238 'spellfile' */ | |
6239 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ | |
6240 { | |
6241 FILE *fd = NULL; | |
6242 buf_T *buf = NULL; | |
6243 int new_spf = FALSE; | |
6244 char_u *fname; | |
6245 char_u *fnamebuf = NULL; | |
6246 char_u line[MAXWLEN * 2]; | |
6247 long fpos, fpos_next = 0; | |
6248 int i; | |
6249 char_u *spf; | |
6250 | |
6251 if (idx == 0) /* use internal wordlist */ | |
6252 { | |
6253 if (int_wordlist == NULL) | |
6254 { | |
6255 int_wordlist = vim_tempname('s', FALSE); | |
6256 if (int_wordlist == NULL) | |
6257 return; | |
6258 } | |
6259 fname = int_wordlist; | |
6260 } | |
6261 else | |
6262 { | |
6263 /* If 'spellfile' isn't set figure out a good default value. */ | |
6264 if (*curwin->w_s->b_p_spf == NUL) | |
6265 { | |
6266 init_spellfile(); | |
6267 new_spf = TRUE; | |
6268 } | |
6269 | |
6270 if (*curwin->w_s->b_p_spf == NUL) | |
6271 { | |
6272 EMSG2(_(e_notset), "spellfile"); | |
6273 return; | |
6274 } | |
6275 fnamebuf = alloc(MAXPATHL); | |
6276 if (fnamebuf == NULL) | |
6277 return; | |
6278 | |
6279 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) | |
6280 { | |
6281 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); | |
6282 if (i == idx) | |
6283 break; | |
6284 if (*spf == NUL) | |
6285 { | |
6286 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx); | |
6287 vim_free(fnamebuf); | |
6288 return; | |
6289 } | |
6290 } | |
6291 | |
6292 /* Check that the user isn't editing the .add file somewhere. */ | |
6293 buf = buflist_findname_exp(fnamebuf); | |
6294 if (buf != NULL && buf->b_ml.ml_mfp == NULL) | |
6295 buf = NULL; | |
6296 if (buf != NULL && bufIsChanged(buf)) | |
6297 { | |
6298 EMSG(_(e_bufloaded)); | |
6299 vim_free(fnamebuf); | |
6300 return; | |
6301 } | |
6302 | |
6303 fname = fnamebuf; | |
6304 } | |
6305 | |
6306 if (bad || undo) | |
6307 { | |
6308 /* When the word appears as good word we need to remove that one, | |
6309 * since its flags sort before the one with WF_BANNED. */ | |
6310 fd = mch_fopen((char *)fname, "r"); | |
6311 if (fd != NULL) | |
6312 { | |
6313 while (!vim_fgets(line, MAXWLEN * 2, fd)) | |
6314 { | |
6315 fpos = fpos_next; | |
6316 fpos_next = ftell(fd); | |
6317 if (STRNCMP(word, line, len) == 0 | |
6318 && (line[len] == '/' || line[len] < ' ')) | |
6319 { | |
6320 /* Found duplicate word. Remove it by writing a '#' at | |
6321 * the start of the line. Mixing reading and writing | |
6322 * doesn't work for all systems, close the file first. */ | |
6323 fclose(fd); | |
6324 fd = mch_fopen((char *)fname, "r+"); | |
6325 if (fd == NULL) | |
6326 break; | |
6327 if (fseek(fd, fpos, SEEK_SET) == 0) | |
6328 { | |
6329 fputc('#', fd); | |
6330 if (undo) | |
6331 { | |
6332 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); | |
6333 smsg((char_u *)_("Word '%.*s' removed from %s"), | |
6334 len, word, NameBuff); | |
6335 } | |
6336 } | |
6337 fseek(fd, fpos_next, SEEK_SET); | |
6338 } | |
6339 } | |
6340 if (fd != NULL) | |
6341 fclose(fd); | |
6342 } | |
6343 } | |
6344 | |
6345 if (!undo) | |
6346 { | |
6347 fd = mch_fopen((char *)fname, "a"); | |
6348 if (fd == NULL && new_spf) | |
6349 { | |
6350 char_u *p; | |
6351 | |
6352 /* We just initialized the 'spellfile' option and can't open the | |
6353 * file. We may need to create the "spell" directory first. We | |
6354 * already checked the runtime directory is writable in | |
6355 * init_spellfile(). */ | |
6356 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) | |
6357 { | |
6358 int c = *p; | |
6359 | |
6360 /* The directory doesn't exist. Try creating it and opening | |
6361 * the file again. */ | |
6362 *p = NUL; | |
6363 vim_mkdir(fname, 0755); | |
6364 *p = c; | |
6365 fd = mch_fopen((char *)fname, "a"); | |
6366 } | |
6367 } | |
6368 | |
6369 if (fd == NULL) | |
6370 EMSG2(_(e_notopen), fname); | |
6371 else | |
6372 { | |
6373 if (bad) | |
6374 fprintf(fd, "%.*s/!\n", len, word); | |
6375 else | |
6376 fprintf(fd, "%.*s\n", len, word); | |
6377 fclose(fd); | |
6378 | |
6379 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); | |
6380 smsg((char_u *)_("Word '%.*s' added to %s"), len, word, NameBuff); | |
6381 } | |
6382 } | |
6383 | |
6384 if (fd != NULL) | |
6385 { | |
6386 /* Update the .add.spl file. */ | |
6387 mkspell(1, &fname, FALSE, TRUE, TRUE); | |
6388 | |
6389 /* If the .add file is edited somewhere, reload it. */ | |
6390 if (buf != NULL) | |
6391 buf_reload(buf, buf->b_orig_mode); | |
6392 | |
6393 redraw_all_later(SOME_VALID); | |
6394 } | |
6395 vim_free(fnamebuf); | |
6396 } | |
6397 | |
6398 /* | |
6399 * Initialize 'spellfile' for the current buffer. | |
6400 */ | |
6401 static void | |
6402 init_spellfile(void) | |
6403 { | |
6404 char_u *buf; | |
6405 int l; | |
6406 char_u *fname; | |
6407 char_u *rtp; | |
6408 char_u *lend; | |
6409 int aspath = FALSE; | |
6410 char_u *lstart = curbuf->b_s.b_p_spl; | |
6411 | |
6412 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) | |
6413 { | |
6414 buf = alloc(MAXPATHL); | |
6415 if (buf == NULL) | |
6416 return; | |
6417 | |
6418 /* Find the end of the language name. Exclude the region. If there | |
6419 * is a path separator remember the start of the tail. */ | |
6420 for (lend = curwin->w_s->b_p_spl; *lend != NUL | |
6421 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) | |
6422 if (vim_ispathsep(*lend)) | |
6423 { | |
6424 aspath = TRUE; | |
6425 lstart = lend + 1; | |
6426 } | |
6427 | |
6428 /* Loop over all entries in 'runtimepath'. Use the first one where we | |
6429 * are allowed to write. */ | |
6430 rtp = p_rtp; | |
6431 while (*rtp != NUL) | |
6432 { | |
6433 if (aspath) | |
6434 /* Use directory of an entry with path, e.g., for | |
6435 * "/dir/lg.utf-8.spl" use "/dir". */ | |
6436 vim_strncpy(buf, curbuf->b_s.b_p_spl, | |
6437 lstart - curbuf->b_s.b_p_spl - 1); | |
6438 else | |
6439 /* Copy the path from 'runtimepath' to buf[]. */ | |
6440 copy_option_part(&rtp, buf, MAXPATHL, ","); | |
6441 if (filewritable(buf) == 2) | |
6442 { | |
6443 /* Use the first language name from 'spelllang' and the | |
6444 * encoding used in the first loaded .spl file. */ | |
6445 if (aspath) | |
6446 vim_strncpy(buf, curbuf->b_s.b_p_spl, | |
6447 lend - curbuf->b_s.b_p_spl); | |
6448 else | |
6449 { | |
6450 /* Create the "spell" directory if it doesn't exist yet. */ | |
6451 l = (int)STRLEN(buf); | |
6452 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); | |
6453 if (filewritable(buf) != 2) | |
6454 vim_mkdir(buf, 0755); | |
6455 | |
6456 l = (int)STRLEN(buf); | |
6457 vim_snprintf((char *)buf + l, MAXPATHL - l, | |
6458 "/%.*s", (int)(lend - lstart), lstart); | |
6459 } | |
6460 l = (int)STRLEN(buf); | |
6461 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) | |
6462 ->lp_slang->sl_fname; | |
6463 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", | |
6464 fname != NULL | |
6465 && strstr((char *)gettail(fname), ".ascii.") != NULL | |
6466 ? (char_u *)"ascii" : spell_enc()); | |
6467 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); | |
6468 break; | |
6469 } | |
6470 aspath = FALSE; | |
6471 } | |
6472 | |
6473 vim_free(buf); | |
6474 } | |
6475 } | |
6476 | |
6477 | |
6478 | |
6479 /* | |
6480 * Set the spell character tables from strings in the affix file. | |
6481 */ | |
6482 static int | |
6483 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) | |
6484 { | |
6485 /* We build the new tables here first, so that we can compare with the | |
6486 * previous one. */ | |
6487 spelltab_T new_st; | |
6488 char_u *pf = fol, *pl = low, *pu = upp; | |
6489 int f, l, u; | |
6490 | |
6491 clear_spell_chartab(&new_st); | |
6492 | |
6493 while (*pf != NUL) | |
6494 { | |
6495 if (*pl == NUL || *pu == NUL) | |
6496 { | |
6497 EMSG(_(e_affform)); | |
6498 return FAIL; | |
6499 } | |
6500 #ifdef FEAT_MBYTE | |
6501 f = mb_ptr2char_adv(&pf); | |
6502 l = mb_ptr2char_adv(&pl); | |
6503 u = mb_ptr2char_adv(&pu); | |
6504 #else | |
6505 f = *pf++; | |
6506 l = *pl++; | |
6507 u = *pu++; | |
6508 #endif | |
6509 /* Every character that appears is a word character. */ | |
6510 if (f < 256) | |
6511 new_st.st_isw[f] = TRUE; | |
6512 if (l < 256) | |
6513 new_st.st_isw[l] = TRUE; | |
6514 if (u < 256) | |
6515 new_st.st_isw[u] = TRUE; | |
6516 | |
6517 /* if "LOW" and "FOL" are not the same the "LOW" char needs | |
6518 * case-folding */ | |
6519 if (l < 256 && l != f) | |
6520 { | |
6521 if (f >= 256) | |
6522 { | |
6523 EMSG(_(e_affrange)); | |
6524 return FAIL; | |
6525 } | |
6526 new_st.st_fold[l] = f; | |
6527 } | |
6528 | |
6529 /* if "UPP" and "FOL" are not the same the "UPP" char needs | |
6530 * case-folding, it's upper case and the "UPP" is the upper case of | |
6531 * "FOL" . */ | |
6532 if (u < 256 && u != f) | |
6533 { | |
6534 if (f >= 256) | |
6535 { | |
6536 EMSG(_(e_affrange)); | |
6537 return FAIL; | |
6538 } | |
6539 new_st.st_fold[u] = f; | |
6540 new_st.st_isu[u] = TRUE; | |
6541 new_st.st_upper[f] = u; | |
6542 } | |
6543 } | |
6544 | |
6545 if (*pl != NUL || *pu != NUL) | |
6546 { | |
6547 EMSG(_(e_affform)); | |
6548 return FAIL; | |
6549 } | |
6550 | |
6551 return set_spell_finish(&new_st); | |
6552 } | |
6553 | |
6554 /* | |
6555 * Set the spell character tables from strings in the .spl file. | |
6556 */ | |
6557 static void | |
6558 set_spell_charflags( | |
6559 char_u *flags, | |
6560 int cnt, /* length of "flags" */ | |
6561 char_u *fol) | |
6562 { | |
6563 /* We build the new tables here first, so that we can compare with the | |
6564 * previous one. */ | |
6565 spelltab_T new_st; | |
6566 int i; | |
6567 char_u *p = fol; | |
6568 int c; | |
6569 | |
6570 clear_spell_chartab(&new_st); | |
6571 | |
6572 for (i = 0; i < 128; ++i) | |
6573 { | |
6574 if (i < cnt) | |
6575 { | |
6576 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; | |
6577 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; | |
6578 } | |
6579 | |
6580 if (*p != NUL) | |
6581 { | |
6582 #ifdef FEAT_MBYTE | |
6583 c = mb_ptr2char_adv(&p); | |
6584 #else | |
6585 c = *p++; | |
6586 #endif | |
6587 new_st.st_fold[i + 128] = c; | |
6588 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) | |
6589 new_st.st_upper[c] = i + 128; | |
6590 } | |
6591 } | |
6592 | |
6593 (void)set_spell_finish(&new_st); | |
6594 } | |
6595 | |
6596 static int | |
6597 set_spell_finish(spelltab_T *new_st) | |
6598 { | |
6599 int i; | |
6600 | |
6601 if (did_set_spelltab) | |
6602 { | |
6603 /* check that it's the same table */ | |
6604 for (i = 0; i < 256; ++i) | |
6605 { | |
6606 if (spelltab.st_isw[i] != new_st->st_isw[i] | |
6607 || spelltab.st_isu[i] != new_st->st_isu[i] | |
6608 || spelltab.st_fold[i] != new_st->st_fold[i] | |
6609 || spelltab.st_upper[i] != new_st->st_upper[i]) | |
6610 { | |
6611 EMSG(_("E763: Word characters differ between spell files")); | |
6612 return FAIL; | |
6613 } | |
6614 } | |
6615 } | |
6616 else | |
6617 { | |
6618 /* copy the new spelltab into the one being used */ | |
6619 spelltab = *new_st; | |
6620 did_set_spelltab = TRUE; | |
6621 } | |
6622 | |
6623 return OK; | |
6624 } | |
6625 | |
6626 /* | |
6627 * Write the table with prefix conditions to the .spl file. | |
6628 * When "fd" is NULL only count the length of what is written. | |
6629 */ | |
6630 static int | |
6631 write_spell_prefcond(FILE *fd, garray_T *gap) | |
6632 { | |
6633 int i; | |
6634 char_u *p; | |
6635 int len; | |
6636 int totlen; | |
6637 size_t x = 1; /* collect return value of fwrite() */ | |
6638 | |
6639 if (fd != NULL) | |
6640 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ | |
6641 | |
6642 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ | |
6643 | |
6644 for (i = 0; i < gap->ga_len; ++i) | |
6645 { | |
6646 /* <prefcond> : <condlen> <condstr> */ | |
6647 p = ((char_u **)gap->ga_data)[i]; | |
6648 if (p != NULL) | |
6649 { | |
6650 len = (int)STRLEN(p); | |
6651 if (fd != NULL) | |
6652 { | |
6653 fputc(len, fd); | |
6654 x &= fwrite(p, (size_t)len, (size_t)1, fd); | |
6655 } | |
6656 totlen += len; | |
6657 } | |
6658 else if (fd != NULL) | |
6659 fputc(0, fd); | |
6660 } | |
6661 | |
6662 return totlen; | |
6663 } | |
6664 | |
6665 | |
6666 /* | |
6667 * Use map string "map" for languages "lp". | |
6668 */ | |
6669 static void | |
6670 set_map_str(slang_T *lp, char_u *map) | |
6671 { | |
6672 char_u *p; | |
6673 int headc = 0; | |
6674 int c; | |
6675 int i; | |
6676 | |
6677 if (*map == NUL) | |
6678 { | |
6679 lp->sl_has_map = FALSE; | |
6680 return; | |
6681 } | |
6682 lp->sl_has_map = TRUE; | |
6683 | |
6684 /* Init the array and hash tables empty. */ | |
6685 for (i = 0; i < 256; ++i) | |
6686 lp->sl_map_array[i] = 0; | |
6687 #ifdef FEAT_MBYTE | |
6688 hash_init(&lp->sl_map_hash); | |
6689 #endif | |
6690 | |
6691 /* | |
6692 * The similar characters are stored separated with slashes: | |
6693 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and | |
6694 * before the same slash. For characters above 255 sl_map_hash is used. | |
6695 */ | |
6696 for (p = map; *p != NUL; ) | |
6697 { | |
6698 #ifdef FEAT_MBYTE | |
6699 c = mb_cptr2char_adv(&p); | |
6700 #else | |
6701 c = *p++; | |
6702 #endif | |
6703 if (c == '/') | |
6704 headc = 0; | |
6705 else | |
6706 { | |
6707 if (headc == 0) | |
6708 headc = c; | |
6709 | |
6710 #ifdef FEAT_MBYTE | |
6711 /* Characters above 255 don't fit in sl_map_array[], put them in | |
6712 * the hash table. Each entry is the char, a NUL the headchar and | |
6713 * a NUL. */ | |
6714 if (c >= 256) | |
6715 { | |
6716 int cl = mb_char2len(c); | |
6717 int headcl = mb_char2len(headc); | |
6718 char_u *b; | |
6719 hash_T hash; | |
6720 hashitem_T *hi; | |
6721 | |
6722 b = alloc((unsigned)(cl + headcl + 2)); | |
6723 if (b == NULL) | |
6724 return; | |
6725 mb_char2bytes(c, b); | |
6726 b[cl] = NUL; | |
6727 mb_char2bytes(headc, b + cl + 1); | |
6728 b[cl + 1 + headcl] = NUL; | |
6729 hash = hash_hash(b); | |
6730 hi = hash_lookup(&lp->sl_map_hash, b, hash); | |
6731 if (HASHITEM_EMPTY(hi)) | |
6732 hash_add_item(&lp->sl_map_hash, hi, b, hash); | |
6733 else | |
6734 { | |
6735 /* This should have been checked when generating the .spl | |
6736 * file. */ | |
6737 EMSG(_("E783: duplicate char in MAP entry")); | |
6738 vim_free(b); | |
6739 } | |
6740 } | |
6741 else | |
6742 #endif | |
6743 lp->sl_map_array[c] = headc; | |
6744 } | |
6745 } | |
6746 } | |
6747 | |
6748 | |
6749 #endif /* FEAT_SPELL */ |