Mercurial > vim
comparison src/spell.c @ 9583:b0c7061d6439 v7.4.2069
commit https://github.com/vim/vim/commit/9ccfebddc3ff2a3c2853cf706fd4c26f639bf381
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Jul 19 16:39:08 2016 +0200
patch 7.4.2069
Problem: spell.c is too big.
Solution: Split it in spell file handling and spell checking.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Tue, 19 Jul 2016 16:45:06 +0200 |
parents | 695186e11daa |
children | fd9727ae3c49 |
comparison
equal
deleted
inserted
replaced
9582:96737caf272d | 9583:b0c7061d6439 |
---|---|
7 * See README.txt for an overview of the Vim source code. | 7 * See README.txt for an overview of the Vim source code. |
8 */ | 8 */ |
9 | 9 |
10 /* | 10 /* |
11 * spell.c: code for spell checking | 11 * spell.c: code for spell checking |
12 * | |
13 * See spellfile.c for the Vim spell file format. | |
12 * | 14 * |
13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree | 15 * The spell checking mechanism uses a tree (aka trie). Each node in the tree |
14 * has a list of bytes that can appear (siblings). For each byte there is a | 16 * has a list of bytes that can appear (siblings). For each byte there is a |
15 * pointer to the node with the byte that follows in the word (child). | 17 * pointer to the node with the byte that follows in the word (child). |
16 * | 18 * |
51 * | 53 * |
52 * Why doesn't Vim use aspell/ispell/myspell/etc.? | 54 * Why doesn't Vim use aspell/ispell/myspell/etc.? |
53 * See ":help develop-spell". | 55 * See ":help develop-spell". |
54 */ | 56 */ |
55 | 57 |
56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. | |
57 * Only use it for small word lists! */ | |
58 #if 0 | |
59 # define SPELL_PRINTTREE | |
60 #endif | |
61 | |
62 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after | |
63 * adding a word. Only use it for small word lists! */ | |
64 #if 0 | |
65 # define SPELL_COMPRESS_ALLWAYS | |
66 #endif | |
67 | |
68 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a | |
69 * specific word. */ | |
70 #if 0 | |
71 # define DEBUG_TRIEWALK | |
72 #endif | |
73 | |
74 /* | 58 /* |
75 * Use this to adjust the score after finding suggestions, based on the | 59 * Use this to adjust the score after finding suggestions, based on the |
76 * suggested word sounding like the bad word. This is much faster than doing | 60 * suggested word sounding like the bad word. This is much faster than doing |
77 * it for every possible suggestion. | 61 * it for every possible suggestion. |
78 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" | 62 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" |
85 * Do the opposite: based on a maximum end score and a known sound score, | 69 * Do the opposite: based on a maximum end score and a known sound score, |
86 * compute the maximum word score that can be used. | 70 * compute the maximum word score that can be used. |
87 */ | 71 */ |
88 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) | 72 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) |
89 | 73 |
90 /* | 74 #define IN_SPELL_C |
91 * Vim spell file format: <HEADER> | |
92 * <SECTIONS> | |
93 * <LWORDTREE> | |
94 * <KWORDTREE> | |
95 * <PREFIXTREE> | |
96 * | |
97 * <HEADER>: <fileID> <versionnr> | |
98 * | |
99 * <fileID> 8 bytes "VIMspell" | |
100 * <versionnr> 1 byte VIMSPELLVERSION | |
101 * | |
102 * | |
103 * Sections make it possible to add information to the .spl file without | |
104 * making it incompatible with previous versions. There are two kinds of | |
105 * sections: | |
106 * 1. Not essential for correct spell checking. E.g. for making suggestions. | |
107 * These are skipped when not supported. | |
108 * 2. Optional information, but essential for spell checking when present. | |
109 * E.g. conditions for affixes. When this section is present but not | |
110 * supported an error message is given. | |
111 * | |
112 * <SECTIONS>: <section> ... <sectionend> | |
113 * | |
114 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | |
115 * | |
116 * <sectionID> 1 byte number from 0 to 254 identifying the section | |
117 * | |
118 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct | |
119 * spell checking | |
120 * | |
121 * <sectionlen> 4 bytes length of section contents, MSB first | |
122 * | |
123 * <sectionend> 1 byte SN_END | |
124 * | |
125 * | |
126 * sectionID == SN_INFO: <infotext> | |
127 * <infotext> N bytes free format text with spell file info (version, | |
128 * website, etc) | |
129 * | |
130 * sectionID == SN_REGION: <regionname> ... | |
131 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. | |
132 * First <regionname> is region 1. | |
133 * | |
134 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> | |
135 * <folcharslen> <folchars> | |
136 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). | |
137 * <charflags> N bytes List of flags (first one is for character 128): | |
138 * 0x01 word character CF_WORD | |
139 * 0x02 upper-case character CF_UPPER | |
140 * <folcharslen> 2 bytes Number of bytes in <folchars>. | |
141 * <folchars> N bytes Folded characters, first one is for character 128. | |
142 * | |
143 * sectionID == SN_MIDWORD: <midword> | |
144 * <midword> N bytes Characters that are word characters only when used | |
145 * in the middle of a word. | |
146 * | |
147 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... | |
148 * <prefcondcnt> 2 bytes Number of <prefcond> items following. | |
149 * <prefcond> : <condlen> <condstr> | |
150 * <condlen> 1 byte Length of <condstr>. | |
151 * <condstr> N bytes Condition for the prefix. | |
152 * | |
153 * sectionID == SN_REP: <repcount> <rep> ... | |
154 * <repcount> 2 bytes number of <rep> items, MSB first. | |
155 * <rep> : <repfromlen> <repfrom> <reptolen> <repto> | |
156 * <repfromlen> 1 byte length of <repfrom> | |
157 * <repfrom> N bytes "from" part of replacement | |
158 * <reptolen> 1 byte length of <repto> | |
159 * <repto> N bytes "to" part of replacement | |
160 * | |
161 * sectionID == SN_REPSAL: <repcount> <rep> ... | |
162 * just like SN_REP but for soundfolded words | |
163 * | |
164 * sectionID == SN_SAL: <salflags> <salcount> <sal> ... | |
165 * <salflags> 1 byte flags for soundsalike conversion: | |
166 * SAL_F0LLOWUP | |
167 * SAL_COLLAPSE | |
168 * SAL_REM_ACCENTS | |
169 * <salcount> 2 bytes number of <sal> items following | |
170 * <sal> : <salfromlen> <salfrom> <saltolen> <salto> | |
171 * <salfromlen> 1 byte length of <salfrom> | |
172 * <salfrom> N bytes "from" part of soundsalike | |
173 * <saltolen> 1 byte length of <salto> | |
174 * <salto> N bytes "to" part of soundsalike | |
175 * | |
176 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
177 * <sofofromlen> 2 bytes length of <sofofrom> | |
178 * <sofofrom> N bytes "from" part of soundfold | |
179 * <sofotolen> 2 bytes length of <sofoto> | |
180 * <sofoto> N bytes "to" part of soundfold | |
181 * | |
182 * sectionID == SN_SUGFILE: <timestamp> | |
183 * <timestamp> 8 bytes time in seconds that must match with .sug file | |
184 * | |
185 * sectionID == SN_NOSPLITSUGS: nothing | |
186 * | |
187 * sectionID == SN_NOCOMPOUNDSUGS: nothing | |
188 * | |
189 * sectionID == SN_WORDS: <word> ... | |
190 * <word> N bytes NUL terminated common word | |
191 * | |
192 * sectionID == SN_MAP: <mapstr> | |
193 * <mapstr> N bytes String with sequences of similar characters, | |
194 * separated by slashes. | |
195 * | |
196 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> | |
197 * <comppatcount> <comppattern> ... <compflags> | |
198 * <compmax> 1 byte Maximum nr of words in compound word. | |
199 * <compminlen> 1 byte Minimal word length for compounding. | |
200 * <compsylmax> 1 byte Maximum nr of syllables in compound word. | |
201 * <compoptions> 2 bytes COMP_ flags. | |
202 * <comppatcount> 2 bytes number of <comppattern> following | |
203 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by | |
204 * slashes. | |
205 * | |
206 * <comppattern>: <comppatlen> <comppattext> | |
207 * <comppatlen> 1 byte length of <comppattext> | |
208 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN | |
209 * | |
210 * sectionID == SN_NOBREAK: (empty, its presence is what matters) | |
211 * | |
212 * sectionID == SN_SYLLABLE: <syllable> | |
213 * <syllable> N bytes String from SYLLABLE item. | |
214 * | |
215 * <LWORDTREE>: <wordtree> | |
216 * | |
217 * <KWORDTREE>: <wordtree> | |
218 * | |
219 * <PREFIXTREE>: <wordtree> | |
220 * | |
221 * | |
222 * <wordtree>: <nodecount> <nodedata> ... | |
223 * | |
224 * <nodecount> 4 bytes Number of nodes following. MSB first. | |
225 * | |
226 * <nodedata>: <siblingcount> <sibling> ... | |
227 * | |
228 * <siblingcount> 1 byte Number of siblings in this node. The siblings | |
229 * follow in sorted order. | |
230 * | |
231 * <sibling>: <byte> [ <nodeidx> <xbyte> | |
232 * | <flags> [<flags2>] [<region>] [<affixID>] | |
233 * | [<pflags>] <affixID> <prefcondnr> ] | |
234 * | |
235 * <byte> 1 byte Byte value of the sibling. Special cases: | |
236 * BY_NOFLAGS: End of word without flags and for all | |
237 * regions. | |
238 * For PREFIXTREE <affixID> and | |
239 * <prefcondnr> follow. | |
240 * BY_FLAGS: End of word, <flags> follow. | |
241 * For PREFIXTREE <pflags>, <affixID> | |
242 * and <prefcondnr> follow. | |
243 * BY_FLAGS2: End of word, <flags> and <flags2> | |
244 * follow. Not used in PREFIXTREE. | |
245 * BY_INDEX: Child of sibling is shared, <nodeidx> | |
246 * and <xbyte> follow. | |
247 * | |
248 * <nodeidx> 3 bytes Index of child for this sibling, MSB first. | |
249 * | |
250 * <xbyte> 1 byte byte value of the sibling. | |
251 * | |
252 * <flags> 1 byte bitmask of: | |
253 * WF_ALLCAP word must have only capitals | |
254 * WF_ONECAP first char of word must be capital | |
255 * WF_KEEPCAP keep-case word | |
256 * WF_FIXCAP keep-case word, all caps not allowed | |
257 * WF_RARE rare word | |
258 * WF_BANNED bad word | |
259 * WF_REGION <region> follows | |
260 * WF_AFX <affixID> follows | |
261 * | |
262 * <flags2> 1 byte Bitmask of: | |
263 * WF_HAS_AFF >> 8 word includes affix | |
264 * WF_NEEDCOMP >> 8 word only valid in compound | |
265 * WF_NOSUGGEST >> 8 word not used for suggestions | |
266 * WF_COMPROOT >> 8 word already a compound | |
267 * WF_NOCOMPBEF >> 8 no compounding before this word | |
268 * WF_NOCOMPAFT >> 8 no compounding after this word | |
269 * | |
270 * <pflags> 1 byte bitmask of: | |
271 * WFP_RARE rare prefix | |
272 * WFP_NC non-combining prefix | |
273 * WFP_UP letter after prefix made upper case | |
274 * | |
275 * <region> 1 byte Bitmask for regions in which word is valid. When | |
276 * omitted it's valid in all regions. | |
277 * Lowest bit is for region 1. | |
278 * | |
279 * <affixID> 1 byte ID of affix that can be used with this word. In | |
280 * PREFIXTREE used for the required prefix ID. | |
281 * | |
282 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list | |
283 * from HEADER. | |
284 * | |
285 * All text characters are in 'encoding', but stored as single bytes. | |
286 */ | |
287 | |
288 /* | |
289 * Vim .sug file format: <SUGHEADER> | |
290 * <SUGWORDTREE> | |
291 * <SUGTABLE> | |
292 * | |
293 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
294 * | |
295 * <fileID> 6 bytes "VIMsug" | |
296 * <versionnr> 1 byte VIMSUGVERSION | |
297 * <timestamp> 8 bytes timestamp that must match with .spl file | |
298 * | |
299 * | |
300 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) | |
301 * | |
302 * | |
303 * <SUGTABLE>: <sugwcount> <sugline> ... | |
304 * | |
305 * <sugwcount> 4 bytes number of <sugline> following | |
306 * | |
307 * <sugline>: <sugnr> ... NUL | |
308 * | |
309 * <sugnr>: X bytes word number that results in this soundfolded word, | |
310 * stored as an offset to the previous number in as | |
311 * few bytes as possible, see offset2bytes()) | |
312 */ | |
313 | |
314 #include "vim.h" | 75 #include "vim.h" |
315 | 76 |
316 #if defined(FEAT_SPELL) || defined(PROTO) | 77 #if defined(FEAT_SPELL) || defined(PROTO) |
317 | 78 |
318 #ifndef UNIX /* it's in os_unix.h for Unix */ | 79 #ifndef UNIX /* it's in os_unix.h for Unix */ |
319 # include <time.h> /* for time_t */ | 80 # include <time.h> /* for time_t */ |
320 #endif | 81 #endif |
321 | 82 |
322 #define MAXWLEN 254 /* Assume max. word len is this many bytes. | |
323 Some places assume a word length fits in a | |
324 byte, thus it can't be above 255. | |
325 Must be >= PFD_NOTSPECIAL. */ | |
326 | |
327 /* Type used for indexes in the word tree need to be at least 4 bytes. If int | |
328 * is 8 bytes we could use something smaller, but what? */ | |
329 #if VIM_SIZEOF_INT > 3 | |
330 typedef int idx_T; | |
331 #else | |
332 typedef long idx_T; | |
333 #endif | |
334 | |
335 #ifdef VMS | |
336 # define SPL_FNAME_TMPL "%s_%s.spl" | |
337 # define SPL_FNAME_ADD "_add." | |
338 # define SPL_FNAME_ASCII "_ascii." | |
339 #else | |
340 # define SPL_FNAME_TMPL "%s.%s.spl" | |
341 # define SPL_FNAME_ADD ".add." | |
342 # define SPL_FNAME_ASCII ".ascii." | |
343 #endif | |
344 | |
345 /* Flags used for a word. Only the lowest byte can be used, the region byte | |
346 * comes above it. */ | |
347 #define WF_REGION 0x01 /* region byte follows */ | |
348 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ | |
349 #define WF_ALLCAP 0x04 /* word must be all capitals */ | |
350 #define WF_RARE 0x08 /* rare word */ | |
351 #define WF_BANNED 0x10 /* bad word */ | |
352 #define WF_AFX 0x20 /* affix ID follows */ | |
353 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ | |
354 #define WF_KEEPCAP 0x80 /* keep-case word */ | |
355 | |
356 /* for <flags2>, shifted up one byte to be used in wn_flags */ | |
357 #define WF_HAS_AFF 0x0100 /* word includes affix */ | |
358 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ | |
359 #define WF_NOSUGGEST 0x0400 /* word not to be suggested */ | |
360 #define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */ | |
361 #define WF_NOCOMPBEF 0x1000 /* no compounding before this word */ | |
362 #define WF_NOCOMPAFT 0x2000 /* no compounding after this word */ | |
363 | |
364 /* only used for su_badflags */ | 83 /* only used for su_badflags */ |
365 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ | 84 #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ |
366 | 85 |
367 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) | 86 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) |
368 | 87 |
369 /* flags for <pflags> */ | |
370 #define WFP_RARE 0x01 /* rare prefix */ | |
371 #define WFP_NC 0x02 /* prefix is not combining */ | |
372 #define WFP_UP 0x04 /* to-upper prefix */ | |
373 #define WFP_COMPPERMIT 0x08 /* prefix with COMPOUNDPERMITFLAG */ | |
374 #define WFP_COMPFORBID 0x10 /* prefix with COMPOUNDFORBIDFLAG */ | |
375 | |
376 /* Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one | |
377 * byte) and prefcondnr (two bytes). */ | |
378 #define WF_RAREPFX (WFP_RARE << 24) /* rare postponed prefix */ | |
379 #define WF_PFX_NC (WFP_NC << 24) /* non-combining postponed prefix */ | |
380 #define WF_PFX_UP (WFP_UP << 24) /* to-upper postponed prefix */ | |
381 #define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with | |
382 * COMPOUNDPERMITFLAG */ | |
383 #define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with | |
384 * COMPOUNDFORBIDFLAG */ | |
385 | |
386 | |
387 /* flags for <compoptions> */ | |
388 #define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */ | |
389 #define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */ | |
390 #define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */ | |
391 #define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */ | |
392 | |
393 /* Special byte values for <byte>. Some are only used in the tree for | |
394 * postponed prefixes, some only in the other trees. This is a bit messy... */ | |
395 #define BY_NOFLAGS 0 /* end of word without flags or region; for | |
396 * postponed prefix: no <pflags> */ | |
397 #define BY_INDEX 1 /* child is shared, index follows */ | |
398 #define BY_FLAGS 2 /* end of word, <flags> byte follows; for | |
399 * postponed prefix: <pflags> follows */ | |
400 #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes | |
401 * follow; never used in prefix tree */ | |
402 #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ | |
403 | |
404 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, | |
405 * si_repsal, sl_rep, and si_sal. Not for sl_sal! | |
406 * One replacement: from "ft_from" to "ft_to". */ | |
407 typedef struct fromto_S | |
408 { | |
409 char_u *ft_from; | |
410 char_u *ft_to; | |
411 } fromto_T; | |
412 | |
413 /* Info from "SAL" entries in ".aff" file used in sl_sal. | |
414 * The info is split for quick processing by spell_soundfold(). | |
415 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ | |
416 typedef struct salitem_S | |
417 { | |
418 char_u *sm_lead; /* leading letters */ | |
419 int sm_leadlen; /* length of "sm_lead" */ | |
420 char_u *sm_oneof; /* letters from () or NULL */ | |
421 char_u *sm_rules; /* rules like ^, $, priority */ | |
422 char_u *sm_to; /* replacement. */ | |
423 #ifdef FEAT_MBYTE | |
424 int *sm_lead_w; /* wide character copy of "sm_lead" */ | |
425 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ | |
426 int *sm_to_w; /* wide character copy of "sm_to" */ | |
427 #endif | |
428 } salitem_T; | |
429 | |
430 #ifdef FEAT_MBYTE | |
431 typedef int salfirst_T; | |
432 #else | |
433 typedef short salfirst_T; | |
434 #endif | |
435 | |
436 /* Values for SP_*ERROR are negative, positive values are used by | |
437 * read_cnt_string(). */ | |
438 #define SP_TRUNCERROR -1 /* spell file truncated error */ | |
439 #define SP_FORMERROR -2 /* format error in spell file */ | |
440 #define SP_OTHERERROR -3 /* other error while reading spell file */ | |
441 | |
442 /* | |
443 * Structure used to store words and other info for one language, loaded from | |
444 * a .spl file. | |
445 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the | |
446 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. | |
447 * | |
448 * The "byts" array stores the possible bytes in each tree node, preceded by | |
449 * the number of possible bytes, sorted on byte value: | |
450 * <len> <byte1> <byte2> ... | |
451 * The "idxs" array stores the index of the child node corresponding to the | |
452 * byte in "byts". | |
453 * Exception: when the byte is zero, the word may end here and "idxs" holds | |
454 * the flags, region mask and affixID for the word. There may be several | |
455 * zeros in sequence for alternative flag/region/affixID combinations. | |
456 */ | |
457 typedef struct slang_S slang_T; | |
458 struct slang_S | |
459 { | |
460 slang_T *sl_next; /* next language */ | |
461 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ | |
462 char_u *sl_fname; /* name of .spl file */ | |
463 int sl_add; /* TRUE if it's a .add file. */ | |
464 | |
465 char_u *sl_fbyts; /* case-folded word bytes */ | |
466 idx_T *sl_fidxs; /* case-folded word indexes */ | |
467 char_u *sl_kbyts; /* keep-case word bytes */ | |
468 idx_T *sl_kidxs; /* keep-case word indexes */ | |
469 char_u *sl_pbyts; /* prefix tree word bytes */ | |
470 idx_T *sl_pidxs; /* prefix tree word indexes */ | |
471 | |
472 char_u *sl_info; /* infotext string or NULL */ | |
473 | |
474 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ | |
475 | |
476 char_u *sl_midword; /* MIDWORD string or NULL */ | |
477 | |
478 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ | |
479 | |
480 int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */ | |
481 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ | |
482 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ | |
483 int sl_compoptions; /* COMP_* flags */ | |
484 garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */ | |
485 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm | |
486 * (NULL when no compounding) */ | |
487 char_u *sl_comprules; /* all COMPOUNDRULE concatenated (or NULL) */ | |
488 char_u *sl_compstartflags; /* flags for first compound word */ | |
489 char_u *sl_compallflags; /* all flags for compound words */ | |
490 char_u sl_nobreak; /* When TRUE: no spaces between words */ | |
491 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ | |
492 garray_T sl_syl_items; /* syllable items */ | |
493 | |
494 int sl_prefixcnt; /* number of items in "sl_prefprog" */ | |
495 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ | |
496 | |
497 garray_T sl_rep; /* list of fromto_T entries from REP lines */ | |
498 short sl_rep_first[256]; /* indexes where byte first appears, -1 if | |
499 there is none */ | |
500 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ | |
501 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if | |
502 there is none */ | |
503 int sl_followup; /* SAL followup */ | |
504 int sl_collapse; /* SAL collapse_result */ | |
505 int sl_rem_accents; /* SAL remove_accents */ | |
506 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: | |
507 * "sl_sal_first" maps chars, when has_mbyte | |
508 * "sl_sal" is a list of wide char lists. */ | |
509 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ | |
510 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ | |
511 int sl_nosplitsugs; /* don't suggest splitting a word */ | |
512 int sl_nocompoundsugs; /* don't suggest compounding */ | |
513 | |
514 /* Info from the .sug file. Loaded on demand. */ | |
515 time_t sl_sugtime; /* timestamp for .sug file */ | |
516 char_u *sl_sbyts; /* soundfolded word bytes */ | |
517 idx_T *sl_sidxs; /* soundfolded word indexes */ | |
518 buf_T *sl_sugbuf; /* buffer with word number table */ | |
519 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to | |
520 load */ | |
521 | |
522 int sl_has_map; /* TRUE if there is a MAP line */ | |
523 #ifdef FEAT_MBYTE | |
524 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ | |
525 int sl_map_array[256]; /* MAP for first 256 chars */ | |
526 #else | |
527 char_u sl_map_array[256]; /* MAP for first 256 chars */ | |
528 #endif | |
529 hashtab_T sl_sounddone; /* table with soundfolded words that have | |
530 handled, see add_sound_suggest() */ | |
531 }; | |
532 | |
533 /* First language that is loaded, start of the linked list of loaded | |
534 * languages. */ | |
535 static slang_T *first_lang = NULL; | |
536 | |
537 /* Flags used in .spl file for soundsalike flags. */ | |
538 #define SAL_F0LLOWUP 1 | |
539 #define SAL_COLLAPSE 2 | |
540 #define SAL_REM_ACCENTS 4 | |
541 | |
542 /* | |
543 * Structure used in "b_langp", filled from 'spelllang'. | |
544 */ | |
545 typedef struct langp_S | |
546 { | |
547 slang_T *lp_slang; /* info for this language */ | |
548 slang_T *lp_sallang; /* language used for sound folding or NULL */ | |
549 slang_T *lp_replang; /* language used for REP items or NULL */ | |
550 int lp_region; /* bitmask for region or REGION_ALL */ | |
551 } langp_T; | |
552 | |
553 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) | |
554 | |
555 #define REGION_ALL 0xff /* word valid in all regions */ | 88 #define REGION_ALL 0xff /* word valid in all regions */ |
556 | |
557 #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ | |
558 #define VIMSPELLMAGICL 8 | |
559 #define VIMSPELLVERSION 50 | |
560 | 89 |
561 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ | 90 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ |
562 #define VIMSUGMAGICL 6 | 91 #define VIMSUGMAGICL 6 |
563 #define VIMSUGVERSION 1 | 92 #define VIMSUGVERSION 1 |
564 | |
565 /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ | |
566 #define SN_REGION 0 /* <regionname> section */ | |
567 #define SN_CHARFLAGS 1 /* charflags section */ | |
568 #define SN_MIDWORD 2 /* <midword> section */ | |
569 #define SN_PREFCOND 3 /* <prefcond> section */ | |
570 #define SN_REP 4 /* REP items section */ | |
571 #define SN_SAL 5 /* SAL items section */ | |
572 #define SN_SOFO 6 /* soundfolding section */ | |
573 #define SN_MAP 7 /* MAP items section */ | |
574 #define SN_COMPOUND 8 /* compound words section */ | |
575 #define SN_SYLLABLE 9 /* syllable section */ | |
576 #define SN_NOBREAK 10 /* NOBREAK section */ | |
577 #define SN_SUGFILE 11 /* timestamp for .sug file */ | |
578 #define SN_REPSAL 12 /* REPSAL items section */ | |
579 #define SN_WORDS 13 /* common words */ | |
580 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ | |
581 #define SN_INFO 15 /* info section */ | |
582 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ | |
583 #define SN_END 255 /* end of sections */ | |
584 | |
585 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ | |
586 | 93 |
587 /* Result values. Lower number is accepted over higher one. */ | 94 /* Result values. Lower number is accepted over higher one. */ |
588 #define SP_BANNED -1 | 95 #define SP_BANNED -1 |
589 #define SP_OK 0 | 96 #define SP_OK 0 |
590 #define SP_RARE 1 | 97 #define SP_RARE 1 |
591 #define SP_LOCAL 2 | 98 #define SP_LOCAL 2 |
592 #define SP_BAD 3 | 99 #define SP_BAD 3 |
593 | |
594 /* file used for "zG" and "zW" */ | |
595 static char_u *int_wordlist = NULL; | |
596 | 100 |
597 typedef struct wordcount_S | 101 typedef struct wordcount_S |
598 { | 102 { |
599 short_u wc_count; /* nr of times word was seen */ | 103 short_u wc_count; /* nr of times word was seen */ |
600 char_u wc_word[1]; /* word, actually longer */ | 104 char_u wc_word[1]; /* word, actually longer */ |
741 /* for NOBREAK */ | 245 /* for NOBREAK */ |
742 int mi_result2; /* "mi_resul" without following word */ | 246 int mi_result2; /* "mi_resul" without following word */ |
743 char_u *mi_end2; /* "mi_end" without following word */ | 247 char_u *mi_end2; /* "mi_end" without following word */ |
744 } matchinf_T; | 248 } matchinf_T; |
745 | 249 |
746 /* | 250 |
747 * The tables used for recognizing word characters according to spelling. | |
748 * These are only used for the first 256 characters of 'encoding'. | |
749 */ | |
750 typedef struct spelltab_S | |
751 { | |
752 char_u st_isw[256]; /* flags: is word char */ | |
753 char_u st_isu[256]; /* flags: is uppercase char */ | |
754 char_u st_fold[256]; /* chars: folded case */ | |
755 char_u st_upper[256]; /* chars: upper case */ | |
756 } spelltab_T; | |
757 | |
758 static spelltab_T spelltab; | |
759 static int did_set_spelltab; | |
760 | |
761 #define CF_WORD 0x01 | |
762 #define CF_UPPER 0x02 | |
763 | |
764 static void clear_spell_chartab(spelltab_T *sp); | |
765 static int set_spell_finish(spelltab_T *new_st); | |
766 static int spell_iswordp(char_u *p, win_T *wp); | 251 static int spell_iswordp(char_u *p, win_T *wp); |
767 static int spell_iswordp_nmw(char_u *p, win_T *wp); | |
768 #ifdef FEAT_MBYTE | 252 #ifdef FEAT_MBYTE |
769 static int spell_mb_isword_class(int cl, win_T *wp); | 253 static int spell_mb_isword_class(int cl, win_T *wp); |
770 static int spell_iswordp_w(int *p, win_T *wp); | 254 static int spell_iswordp_w(int *p, win_T *wp); |
771 #endif | 255 #endif |
772 static int write_spell_prefcond(FILE *fd, garray_T *gap); | |
773 | 256 |
774 /* | 257 /* |
775 * For finding suggestions: At each node in the tree these states are tried: | 258 * For finding suggestions: At each node in the tree these states are tried: |
776 */ | 259 */ |
777 typedef enum | 260 typedef enum |
849 #define FIND_KEEPWORD 1 /* find keep-case word */ | 332 #define FIND_KEEPWORD 1 /* find keep-case word */ |
850 #define FIND_PREFIX 2 /* find word after prefix */ | 333 #define FIND_PREFIX 2 /* find word after prefix */ |
851 #define FIND_COMPOUND 3 /* find case-folded compound word */ | 334 #define FIND_COMPOUND 3 /* find case-folded compound word */ |
852 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ | 335 #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ |
853 | 336 |
854 static slang_T *slang_alloc(char_u *lang); | |
855 static void slang_free(slang_T *lp); | |
856 static void slang_clear(slang_T *lp); | |
857 static void slang_clear_sug(slang_T *lp); | |
858 static void find_word(matchinf_T *mip, int mode); | 337 static void find_word(matchinf_T *mip, int mode); |
859 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); | 338 static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap); |
860 static int can_compound(slang_T *slang, char_u *word, char_u *flags); | 339 static int can_compound(slang_T *slang, char_u *word, char_u *flags); |
861 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); | 340 static int can_be_compound(trystate_T *sp, slang_T *slang, char_u *compflags, int flag); |
862 static int match_compoundrule(slang_T *slang, char_u *compflags); | 341 static int match_compoundrule(slang_T *slang, char_u *compflags); |
864 static void find_prefix(matchinf_T *mip, int mode); | 343 static void find_prefix(matchinf_T *mip, int mode); |
865 static int fold_more(matchinf_T *mip); | 344 static int fold_more(matchinf_T *mip); |
866 static int spell_valid_case(int wordflags, int treeflags); | 345 static int spell_valid_case(int wordflags, int treeflags); |
867 static int no_spell_checking(win_T *wp); | 346 static int no_spell_checking(win_T *wp); |
868 static void spell_load_lang(char_u *lang); | 347 static void spell_load_lang(char_u *lang); |
869 static char_u *spell_enc(void); | |
870 static void int_wordlist_spl(char_u *fname); | 348 static void int_wordlist_spl(char_u *fname); |
871 static void spell_load_cb(char_u *fname, void *cookie); | 349 static void spell_load_cb(char_u *fname, void *cookie); |
872 static slang_T *spell_load_file(char_u *fname, char_u *lang, slang_T *old_lp, int silent); | |
873 static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *lenp); | |
874 static int read_region_section(FILE *fd, slang_T *slang, int len); | |
875 static int read_charflags_section(FILE *fd); | |
876 static int read_prefcond_section(FILE *fd, slang_T *lp); | |
877 static int read_rep_section(FILE *fd, garray_T *gap, short *first); | |
878 static int read_sal_section(FILE *fd, slang_T *slang); | |
879 static int read_words_section(FILE *fd, slang_T *lp, int len); | |
880 static void count_common_word(slang_T *lp, char_u *word, int len, int count); | |
881 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); | 350 static int score_wordcount_adj(slang_T *slang, int score, char_u *word, int split); |
882 static int read_sofo_section(FILE *fd, slang_T *slang); | |
883 static int read_compound(FILE *fd, slang_T *slang, int len); | |
884 static int byte_in_str(char_u *str, int byte); | |
885 static int init_syl_tab(slang_T *slang); | |
886 static int count_syllables(slang_T *slang, char_u *word); | 351 static int count_syllables(slang_T *slang, char_u *word); |
887 static int set_sofo(slang_T *lp, char_u *from, char_u *to); | |
888 static void set_sal_first(slang_T *lp); | |
889 #ifdef FEAT_MBYTE | |
890 static int *mb_str2wide(char_u *s); | |
891 #endif | |
892 static int spell_read_tree(FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt); | |
893 static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr); | |
894 static void clear_midword(win_T *buf); | 352 static void clear_midword(win_T *buf); |
895 static void use_midword(slang_T *lp, win_T *buf); | 353 static void use_midword(slang_T *lp, win_T *buf); |
896 static int find_region(char_u *rp, char_u *region); | 354 static int find_region(char_u *rp, char_u *region); |
897 static int captype(char_u *word, char_u *end); | |
898 static int badword_captype(char_u *word, char_u *end); | 355 static int badword_captype(char_u *word, char_u *end); |
899 static void spell_reload_one(char_u *fname, int added_word); | |
900 static void set_spell_charflags(char_u *flags, int cnt, char_u *upp); | |
901 static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp); | |
902 static int spell_casefold(char_u *p, int len, char_u *buf, int buflen); | |
903 static int check_need_cap(linenr_T lnum, colnr_T col); | 356 static int check_need_cap(linenr_T lnum, colnr_T col); |
904 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); | 357 static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive); |
905 #ifdef FEAT_EVAL | 358 #ifdef FEAT_EVAL |
906 static void spell_suggest_expr(suginfo_T *su, char_u *expr); | 359 static void spell_suggest_expr(suginfo_T *su, char_u *expr); |
907 #endif | 360 #endif |
908 static void spell_suggest_file(suginfo_T *su, char_u *fname); | 361 static void spell_suggest_file(suginfo_T *su, char_u *fname); |
909 static void spell_suggest_intern(suginfo_T *su, int interactive); | 362 static void spell_suggest_intern(suginfo_T *su, int interactive); |
910 static void suggest_load_files(void); | |
911 static void tree_count_words(char_u *byts, idx_T *idxs); | |
912 static void spell_find_cleanup(suginfo_T *su); | 363 static void spell_find_cleanup(suginfo_T *su); |
913 static void onecap_copy(char_u *word, char_u *wcopy, int upper); | |
914 static void allcap_copy(char_u *word, char_u *wcopy); | 364 static void allcap_copy(char_u *word, char_u *wcopy); |
915 static void suggest_try_special(suginfo_T *su); | 365 static void suggest_try_special(suginfo_T *su); |
916 static void suggest_try_change(suginfo_T *su); | 366 static void suggest_try_change(suginfo_T *su); |
917 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); | 367 static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold); |
918 static void go_deeper(trystate_T *stack, int depth, int score_add); | 368 static void go_deeper(trystate_T *stack, int depth, int score_add); |
927 static void suggest_try_soundalike(suginfo_T *su); | 377 static void suggest_try_soundalike(suginfo_T *su); |
928 static void suggest_try_soundalike_finish(void); | 378 static void suggest_try_soundalike_finish(void); |
929 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); | 379 static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp); |
930 static int soundfold_find(slang_T *slang, char_u *word); | 380 static int soundfold_find(slang_T *slang, char_u *word); |
931 static void make_case_word(char_u *fword, char_u *cword, int flags); | 381 static void make_case_word(char_u *fword, char_u *cword, int flags); |
932 static void set_map_str(slang_T *lp, char_u *map); | |
933 static int similar_chars(slang_T *slang, int c1, int c2); | 382 static int similar_chars(slang_T *slang, int c1, int c2); |
934 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); | 383 static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf); |
935 static void check_suggestions(suginfo_T *su, garray_T *gap); | 384 static void check_suggestions(suginfo_T *su, garray_T *gap); |
936 static void add_banned(suginfo_T *su, char_u *word); | 385 static void add_banned(suginfo_T *su, char_u *word); |
937 static void rescore_suggestions(suginfo_T *su); | 386 static void rescore_suggestions(suginfo_T *su); |
938 static void rescore_one(suginfo_T *su, suggest_T *stp); | 387 static void rescore_one(suginfo_T *su, suggest_T *stp); |
939 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); | 388 static int cleanup_suggestions(garray_T *gap, int maxscore, int keep); |
940 static void spell_soundfold(slang_T *slang, char_u *inword, int folded, char_u *res); | |
941 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); | 389 static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res); |
942 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); | 390 static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res); |
943 #ifdef FEAT_MBYTE | 391 #ifdef FEAT_MBYTE |
944 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); | 392 static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res); |
945 #endif | 393 #endif |
949 #ifdef FEAT_MBYTE | 397 #ifdef FEAT_MBYTE |
950 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); | 398 static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit); |
951 #endif | 399 #endif |
952 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); | 400 static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum); |
953 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); | 401 static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum); |
954 static buf_T *open_spellbuf(void); | 402 |
955 static void close_spellbuf(buf_T *buf); | |
956 | |
957 /* | |
958 * Use our own character-case definitions, because the current locale may | |
959 * differ from what the .spl file uses. | |
960 * These must not be called with negative number! | |
961 */ | |
962 #ifndef FEAT_MBYTE | |
963 /* Non-multi-byte implementation. */ | |
964 # define SPELL_TOFOLD(c) ((c) < 256 ? (int)spelltab.st_fold[c] : (c)) | |
965 # define SPELL_TOUPPER(c) ((c) < 256 ? (int)spelltab.st_upper[c] : (c)) | |
966 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) | |
967 #else | |
968 # if defined(HAVE_WCHAR_H) | |
969 # include <wchar.h> /* for towupper() and towlower() */ | |
970 # endif | |
971 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do | |
972 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use | |
973 * the "w" library function for characters above 255 if available. */ | |
974 # ifdef HAVE_TOWLOWER | |
975 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
976 : (c) < 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) | |
977 # else | |
978 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
979 : (c) < 256 ? (int)spelltab.st_fold[c] : (c)) | |
980 # endif | |
981 | |
982 # ifdef HAVE_TOWUPPER | |
983 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
984 : (c) < 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) | |
985 # else | |
986 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
987 : (c) < 256 ? (int)spelltab.st_upper[c] : (c)) | |
988 # endif | |
989 | |
990 # ifdef HAVE_ISWUPPER | |
991 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
992 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) | |
993 # else | |
994 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
995 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) | |
996 # endif | |
997 #endif | |
998 | |
999 | |
1000 static char *e_format = N_("E759: Format error in spell file"); | |
1001 static char *e_spell_trunc = N_("E758: Truncated spell file"); | |
1002 static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); | |
1003 static char *e_affname = N_("Affix name too long in %s line %d: %s"); | |
1004 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); | |
1005 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); | |
1006 static char *msg_compressing = N_("Compressing word tree..."); | |
1007 | 403 |
1008 /* Remember what "z?" replaced. */ | 404 /* Remember what "z?" replaced. */ |
1009 static char_u *repl_from = NULL; | 405 static char_u *repl_from = NULL; |
1010 static char_u *repl_to = NULL; | 406 static char_u *repl_to = NULL; |
1011 | 407 |
2526 | 1922 |
2527 /* | 1923 /* |
2528 * Return the encoding used for spell checking: Use 'encoding', except that we | 1924 * Return the encoding used for spell checking: Use 'encoding', except that we |
2529 * use "latin1" for "latin9". And limit to 60 characters (just in case). | 1925 * use "latin1" for "latin9". And limit to 60 characters (just in case). |
2530 */ | 1926 */ |
2531 static char_u * | 1927 char_u * |
2532 spell_enc(void) | 1928 spell_enc(void) |
2533 { | 1929 { |
2534 | 1930 |
2535 #ifdef FEAT_MBYTE | 1931 #ifdef FEAT_MBYTE |
2536 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) | 1932 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) |
2552 | 1948 |
2553 /* | 1949 /* |
2554 * Allocate a new slang_T for language "lang". "lang" can be NULL. | 1950 * Allocate a new slang_T for language "lang". "lang" can be NULL. |
2555 * Caller must fill "sl_next". | 1951 * Caller must fill "sl_next". |
2556 */ | 1952 */ |
2557 static slang_T * | 1953 slang_T * |
2558 slang_alloc(char_u *lang) | 1954 slang_alloc(char_u *lang) |
2559 { | 1955 { |
2560 slang_T *lp; | 1956 slang_T *lp; |
2561 | 1957 |
2562 lp = (slang_T *)alloc_clear(sizeof(slang_T)); | 1958 lp = (slang_T *)alloc_clear(sizeof(slang_T)); |
2575 } | 1971 } |
2576 | 1972 |
2577 /* | 1973 /* |
2578 * Free the contents of an slang_T and the structure itself. | 1974 * Free the contents of an slang_T and the structure itself. |
2579 */ | 1975 */ |
2580 static void | 1976 void |
2581 slang_free(slang_T *lp) | 1977 slang_free(slang_T *lp) |
2582 { | 1978 { |
2583 vim_free(lp->sl_name); | 1979 vim_free(lp->sl_name); |
2584 vim_free(lp->sl_fname); | 1980 vim_free(lp->sl_fname); |
2585 slang_clear(lp); | 1981 slang_clear(lp); |
2587 } | 1983 } |
2588 | 1984 |
2589 /* | 1985 /* |
2590 * Clear an slang_T so that the file can be reloaded. | 1986 * Clear an slang_T so that the file can be reloaded. |
2591 */ | 1987 */ |
2592 static void | 1988 void |
2593 slang_clear(slang_T *lp) | 1989 slang_clear(slang_T *lp) |
2594 { | 1990 { |
2595 garray_T *gap; | 1991 garray_T *gap; |
2596 fromto_T *ftp; | 1992 fromto_T *ftp; |
2597 salitem_T *smp; | 1993 salitem_T *smp; |
2693 } | 2089 } |
2694 | 2090 |
2695 /* | 2091 /* |
2696 * Clear the info from the .sug file in "lp". | 2092 * Clear the info from the .sug file in "lp". |
2697 */ | 2093 */ |
2698 static void | 2094 void |
2699 slang_clear_sug(slang_T *lp) | 2095 slang_clear_sug(slang_T *lp) |
2700 { | 2096 { |
2701 vim_free(lp->sl_sbyts); | 2097 vim_free(lp->sl_sbyts); |
2702 lp->sl_sbyts = NULL; | 2098 lp->sl_sbyts = NULL; |
2703 vim_free(lp->sl_sidxs); | 2099 vim_free(lp->sl_sidxs); |
2730 | 2126 |
2731 slp->sl_slang = slang; | 2127 slp->sl_slang = slang; |
2732 } | 2128 } |
2733 } | 2129 } |
2734 | 2130 |
2735 /* | |
2736 * Load one spell file and store the info into a slang_T. | |
2737 * | |
2738 * This is invoked in three ways: | |
2739 * - From spell_load_cb() to load a spell file for the first time. "lang" is | |
2740 * the language name, "old_lp" is NULL. Will allocate an slang_T. | |
2741 * - To reload a spell file that was changed. "lang" is NULL and "old_lp" | |
2742 * points to the existing slang_T. | |
2743 * - Just after writing a .spl file; it's read back to produce the .sug file. | |
2744 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. | |
2745 * | |
2746 * Returns the slang_T the spell file was loaded into. NULL for error. | |
2747 */ | |
2748 static slang_T * | |
2749 spell_load_file( | |
2750 char_u *fname, | |
2751 char_u *lang, | |
2752 slang_T *old_lp, | |
2753 int silent) /* no error if file doesn't exist */ | |
2754 { | |
2755 FILE *fd; | |
2756 char_u buf[VIMSPELLMAGICL]; | |
2757 char_u *p; | |
2758 int i; | |
2759 int n; | |
2760 int len; | |
2761 char_u *save_sourcing_name = sourcing_name; | |
2762 linenr_T save_sourcing_lnum = sourcing_lnum; | |
2763 slang_T *lp = NULL; | |
2764 int c = 0; | |
2765 int res; | |
2766 | |
2767 fd = mch_fopen((char *)fname, "r"); | |
2768 if (fd == NULL) | |
2769 { | |
2770 if (!silent) | |
2771 EMSG2(_(e_notopen), fname); | |
2772 else if (p_verbose > 2) | |
2773 { | |
2774 verbose_enter(); | |
2775 smsg((char_u *)e_notopen, fname); | |
2776 verbose_leave(); | |
2777 } | |
2778 goto endFAIL; | |
2779 } | |
2780 if (p_verbose > 2) | |
2781 { | |
2782 verbose_enter(); | |
2783 smsg((char_u *)_("Reading spell file \"%s\""), fname); | |
2784 verbose_leave(); | |
2785 } | |
2786 | |
2787 if (old_lp == NULL) | |
2788 { | |
2789 lp = slang_alloc(lang); | |
2790 if (lp == NULL) | |
2791 goto endFAIL; | |
2792 | |
2793 /* Remember the file name, used to reload the file when it's updated. */ | |
2794 lp->sl_fname = vim_strsave(fname); | |
2795 if (lp->sl_fname == NULL) | |
2796 goto endFAIL; | |
2797 | |
2798 /* Check for .add.spl (_add.spl for VMS). */ | |
2799 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL; | |
2800 } | |
2801 else | |
2802 lp = old_lp; | |
2803 | |
2804 /* Set sourcing_name, so that error messages mention the file name. */ | |
2805 sourcing_name = fname; | |
2806 sourcing_lnum = 0; | |
2807 | |
2808 /* | |
2809 * <HEADER>: <fileID> | |
2810 */ | |
2811 for (i = 0; i < VIMSPELLMAGICL; ++i) | |
2812 buf[i] = getc(fd); /* <fileID> */ | |
2813 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) | |
2814 { | |
2815 EMSG(_("E757: This does not look like a spell file")); | |
2816 goto endFAIL; | |
2817 } | |
2818 c = getc(fd); /* <versionnr> */ | |
2819 if (c < VIMSPELLVERSION) | |
2820 { | |
2821 EMSG(_("E771: Old spell file, needs to be updated")); | |
2822 goto endFAIL; | |
2823 } | |
2824 else if (c > VIMSPELLVERSION) | |
2825 { | |
2826 EMSG(_("E772: Spell file is for newer version of Vim")); | |
2827 goto endFAIL; | |
2828 } | |
2829 | |
2830 | |
2831 /* | |
2832 * <SECTIONS>: <section> ... <sectionend> | |
2833 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | |
2834 */ | |
2835 for (;;) | |
2836 { | |
2837 n = getc(fd); /* <sectionID> or <sectionend> */ | |
2838 if (n == SN_END) | |
2839 break; | |
2840 c = getc(fd); /* <sectionflags> */ | |
2841 len = get4c(fd); /* <sectionlen> */ | |
2842 if (len < 0) | |
2843 goto truncerr; | |
2844 | |
2845 res = 0; | |
2846 switch (n) | |
2847 { | |
2848 case SN_INFO: | |
2849 lp->sl_info = read_string(fd, len); /* <infotext> */ | |
2850 if (lp->sl_info == NULL) | |
2851 goto endFAIL; | |
2852 break; | |
2853 | |
2854 case SN_REGION: | |
2855 res = read_region_section(fd, lp, len); | |
2856 break; | |
2857 | |
2858 case SN_CHARFLAGS: | |
2859 res = read_charflags_section(fd); | |
2860 break; | |
2861 | |
2862 case SN_MIDWORD: | |
2863 lp->sl_midword = read_string(fd, len); /* <midword> */ | |
2864 if (lp->sl_midword == NULL) | |
2865 goto endFAIL; | |
2866 break; | |
2867 | |
2868 case SN_PREFCOND: | |
2869 res = read_prefcond_section(fd, lp); | |
2870 break; | |
2871 | |
2872 case SN_REP: | |
2873 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); | |
2874 break; | |
2875 | |
2876 case SN_REPSAL: | |
2877 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); | |
2878 break; | |
2879 | |
2880 case SN_SAL: | |
2881 res = read_sal_section(fd, lp); | |
2882 break; | |
2883 | |
2884 case SN_SOFO: | |
2885 res = read_sofo_section(fd, lp); | |
2886 break; | |
2887 | |
2888 case SN_MAP: | |
2889 p = read_string(fd, len); /* <mapstr> */ | |
2890 if (p == NULL) | |
2891 goto endFAIL; | |
2892 set_map_str(lp, p); | |
2893 vim_free(p); | |
2894 break; | |
2895 | |
2896 case SN_WORDS: | |
2897 res = read_words_section(fd, lp, len); | |
2898 break; | |
2899 | |
2900 case SN_SUGFILE: | |
2901 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ | |
2902 break; | |
2903 | |
2904 case SN_NOSPLITSUGS: | |
2905 lp->sl_nosplitsugs = TRUE; | |
2906 break; | |
2907 | |
2908 case SN_NOCOMPOUNDSUGS: | |
2909 lp->sl_nocompoundsugs = TRUE; | |
2910 break; | |
2911 | |
2912 case SN_COMPOUND: | |
2913 res = read_compound(fd, lp, len); | |
2914 break; | |
2915 | |
2916 case SN_NOBREAK: | |
2917 lp->sl_nobreak = TRUE; | |
2918 break; | |
2919 | |
2920 case SN_SYLLABLE: | |
2921 lp->sl_syllable = read_string(fd, len); /* <syllable> */ | |
2922 if (lp->sl_syllable == NULL) | |
2923 goto endFAIL; | |
2924 if (init_syl_tab(lp) == FAIL) | |
2925 goto endFAIL; | |
2926 break; | |
2927 | |
2928 default: | |
2929 /* Unsupported section. When it's required give an error | |
2930 * message. When it's not required skip the contents. */ | |
2931 if (c & SNF_REQUIRED) | |
2932 { | |
2933 EMSG(_("E770: Unsupported section in spell file")); | |
2934 goto endFAIL; | |
2935 } | |
2936 while (--len >= 0) | |
2937 if (getc(fd) < 0) | |
2938 goto truncerr; | |
2939 break; | |
2940 } | |
2941 someerror: | |
2942 if (res == SP_FORMERROR) | |
2943 { | |
2944 EMSG(_(e_format)); | |
2945 goto endFAIL; | |
2946 } | |
2947 if (res == SP_TRUNCERROR) | |
2948 { | |
2949 truncerr: | |
2950 EMSG(_(e_spell_trunc)); | |
2951 goto endFAIL; | |
2952 } | |
2953 if (res == SP_OTHERERROR) | |
2954 goto endFAIL; | |
2955 } | |
2956 | |
2957 /* <LWORDTREE> */ | |
2958 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); | |
2959 if (res != 0) | |
2960 goto someerror; | |
2961 | |
2962 /* <KWORDTREE> */ | |
2963 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); | |
2964 if (res != 0) | |
2965 goto someerror; | |
2966 | |
2967 /* <PREFIXTREE> */ | |
2968 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, | |
2969 lp->sl_prefixcnt); | |
2970 if (res != 0) | |
2971 goto someerror; | |
2972 | |
2973 /* For a new file link it in the list of spell files. */ | |
2974 if (old_lp == NULL && lang != NULL) | |
2975 { | |
2976 lp->sl_next = first_lang; | |
2977 first_lang = lp; | |
2978 } | |
2979 | |
2980 goto endOK; | |
2981 | |
2982 endFAIL: | |
2983 if (lang != NULL) | |
2984 /* truncating the name signals the error to spell_load_lang() */ | |
2985 *lang = NUL; | |
2986 if (lp != NULL && old_lp == NULL) | |
2987 slang_free(lp); | |
2988 lp = NULL; | |
2989 | |
2990 endOK: | |
2991 if (fd != NULL) | |
2992 fclose(fd); | |
2993 sourcing_name = save_sourcing_name; | |
2994 sourcing_lnum = save_sourcing_lnum; | |
2995 | |
2996 return lp; | |
2997 } | |
2998 | |
2999 /* | |
3000 * Read a length field from "fd" in "cnt_bytes" bytes. | |
3001 * Allocate memory, read the string into it and add a NUL at the end. | |
3002 * Returns NULL when the count is zero. | |
3003 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result | |
3004 * otherwise. | |
3005 */ | |
3006 static char_u * | |
3007 read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) | |
3008 { | |
3009 int cnt = 0; | |
3010 int i; | |
3011 char_u *str; | |
3012 | |
3013 /* read the length bytes, MSB first */ | |
3014 for (i = 0; i < cnt_bytes; ++i) | |
3015 cnt = (cnt << 8) + getc(fd); | |
3016 if (cnt < 0) | |
3017 { | |
3018 *cntp = SP_TRUNCERROR; | |
3019 return NULL; | |
3020 } | |
3021 *cntp = cnt; | |
3022 if (cnt == 0) | |
3023 return NULL; /* nothing to read, return NULL */ | |
3024 | |
3025 str = read_string(fd, cnt); | |
3026 if (str == NULL) | |
3027 *cntp = SP_OTHERERROR; | |
3028 return str; | |
3029 } | |
3030 | |
3031 /* | |
3032 * Read SN_REGION: <regionname> ... | |
3033 * Return SP_*ERROR flags. | |
3034 */ | |
3035 static int | |
3036 read_region_section(FILE *fd, slang_T *lp, int len) | |
3037 { | |
3038 int i; | |
3039 | |
3040 if (len > 16) | |
3041 return SP_FORMERROR; | |
3042 for (i = 0; i < len; ++i) | |
3043 lp->sl_regions[i] = getc(fd); /* <regionname> */ | |
3044 lp->sl_regions[len] = NUL; | |
3045 return 0; | |
3046 } | |
3047 | |
3048 /* | |
3049 * Read SN_CHARFLAGS section: <charflagslen> <charflags> | |
3050 * <folcharslen> <folchars> | |
3051 * Return SP_*ERROR flags. | |
3052 */ | |
3053 static int | |
3054 read_charflags_section(FILE *fd) | |
3055 { | |
3056 char_u *flags; | |
3057 char_u *fol; | |
3058 int flagslen, follen; | |
3059 | |
3060 /* <charflagslen> <charflags> */ | |
3061 flags = read_cnt_string(fd, 1, &flagslen); | |
3062 if (flagslen < 0) | |
3063 return flagslen; | |
3064 | |
3065 /* <folcharslen> <folchars> */ | |
3066 fol = read_cnt_string(fd, 2, &follen); | |
3067 if (follen < 0) | |
3068 { | |
3069 vim_free(flags); | |
3070 return follen; | |
3071 } | |
3072 | |
3073 /* Set the word-char flags and fill SPELL_ISUPPER() table. */ | |
3074 if (flags != NULL && fol != NULL) | |
3075 set_spell_charflags(flags, flagslen, fol); | |
3076 | |
3077 vim_free(flags); | |
3078 vim_free(fol); | |
3079 | |
3080 /* When <charflagslen> is zero then <fcharlen> must also be zero. */ | |
3081 if ((flags == NULL) != (fol == NULL)) | |
3082 return SP_FORMERROR; | |
3083 return 0; | |
3084 } | |
3085 | |
3086 /* | |
3087 * Read SN_PREFCOND section. | |
3088 * Return SP_*ERROR flags. | |
3089 */ | |
3090 static int | |
3091 read_prefcond_section(FILE *fd, slang_T *lp) | |
3092 { | |
3093 int cnt; | |
3094 int i; | |
3095 int n; | |
3096 char_u *p; | |
3097 char_u buf[MAXWLEN + 1]; | |
3098 | |
3099 /* <prefcondcnt> <prefcond> ... */ | |
3100 cnt = get2c(fd); /* <prefcondcnt> */ | |
3101 if (cnt <= 0) | |
3102 return SP_FORMERROR; | |
3103 | |
3104 lp->sl_prefprog = (regprog_T **)alloc_clear( | |
3105 (unsigned)sizeof(regprog_T *) * cnt); | |
3106 if (lp->sl_prefprog == NULL) | |
3107 return SP_OTHERERROR; | |
3108 lp->sl_prefixcnt = cnt; | |
3109 | |
3110 for (i = 0; i < cnt; ++i) | |
3111 { | |
3112 /* <prefcond> : <condlen> <condstr> */ | |
3113 n = getc(fd); /* <condlen> */ | |
3114 if (n < 0 || n >= MAXWLEN) | |
3115 return SP_FORMERROR; | |
3116 | |
3117 /* When <condlen> is zero we have an empty condition. Otherwise | |
3118 * compile the regexp program used to check for the condition. */ | |
3119 if (n > 0) | |
3120 { | |
3121 buf[0] = '^'; /* always match at one position only */ | |
3122 p = buf + 1; | |
3123 while (n-- > 0) | |
3124 *p++ = getc(fd); /* <condstr> */ | |
3125 *p = NUL; | |
3126 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); | |
3127 } | |
3128 } | |
3129 return 0; | |
3130 } | |
3131 | |
3132 /* | |
3133 * Read REP or REPSAL items section from "fd": <repcount> <rep> ... | |
3134 * Return SP_*ERROR flags. | |
3135 */ | |
3136 static int | |
3137 read_rep_section(FILE *fd, garray_T *gap, short *first) | |
3138 { | |
3139 int cnt; | |
3140 fromto_T *ftp; | |
3141 int i; | |
3142 | |
3143 cnt = get2c(fd); /* <repcount> */ | |
3144 if (cnt < 0) | |
3145 return SP_TRUNCERROR; | |
3146 | |
3147 if (ga_grow(gap, cnt) == FAIL) | |
3148 return SP_OTHERERROR; | |
3149 | |
3150 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | |
3151 for (; gap->ga_len < cnt; ++gap->ga_len) | |
3152 { | |
3153 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; | |
3154 ftp->ft_from = read_cnt_string(fd, 1, &i); | |
3155 if (i < 0) | |
3156 return i; | |
3157 if (i == 0) | |
3158 return SP_FORMERROR; | |
3159 ftp->ft_to = read_cnt_string(fd, 1, &i); | |
3160 if (i <= 0) | |
3161 { | |
3162 vim_free(ftp->ft_from); | |
3163 if (i < 0) | |
3164 return i; | |
3165 return SP_FORMERROR; | |
3166 } | |
3167 } | |
3168 | |
3169 /* Fill the first-index table. */ | |
3170 for (i = 0; i < 256; ++i) | |
3171 first[i] = -1; | |
3172 for (i = 0; i < gap->ga_len; ++i) | |
3173 { | |
3174 ftp = &((fromto_T *)gap->ga_data)[i]; | |
3175 if (first[*ftp->ft_from] == -1) | |
3176 first[*ftp->ft_from] = i; | |
3177 } | |
3178 return 0; | |
3179 } | |
3180 | |
3181 /* | |
3182 * Read SN_SAL section: <salflags> <salcount> <sal> ... | |
3183 * Return SP_*ERROR flags. | |
3184 */ | |
3185 static int | |
3186 read_sal_section(FILE *fd, slang_T *slang) | |
3187 { | |
3188 int i; | |
3189 int cnt; | |
3190 garray_T *gap; | |
3191 salitem_T *smp; | |
3192 int ccnt; | |
3193 char_u *p; | |
3194 int c = NUL; | |
3195 | |
3196 slang->sl_sofo = FALSE; | |
3197 | |
3198 i = getc(fd); /* <salflags> */ | |
3199 if (i & SAL_F0LLOWUP) | |
3200 slang->sl_followup = TRUE; | |
3201 if (i & SAL_COLLAPSE) | |
3202 slang->sl_collapse = TRUE; | |
3203 if (i & SAL_REM_ACCENTS) | |
3204 slang->sl_rem_accents = TRUE; | |
3205 | |
3206 cnt = get2c(fd); /* <salcount> */ | |
3207 if (cnt < 0) | |
3208 return SP_TRUNCERROR; | |
3209 | |
3210 gap = &slang->sl_sal; | |
3211 ga_init2(gap, sizeof(salitem_T), 10); | |
3212 if (ga_grow(gap, cnt + 1) == FAIL) | |
3213 return SP_OTHERERROR; | |
3214 | |
3215 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | |
3216 for (; gap->ga_len < cnt; ++gap->ga_len) | |
3217 { | |
3218 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | |
3219 ccnt = getc(fd); /* <salfromlen> */ | |
3220 if (ccnt < 0) | |
3221 return SP_TRUNCERROR; | |
3222 if ((p = alloc(ccnt + 2)) == NULL) | |
3223 return SP_OTHERERROR; | |
3224 smp->sm_lead = p; | |
3225 | |
3226 /* Read up to the first special char into sm_lead. */ | |
3227 for (i = 0; i < ccnt; ++i) | |
3228 { | |
3229 c = getc(fd); /* <salfrom> */ | |
3230 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) | |
3231 break; | |
3232 *p++ = c; | |
3233 } | |
3234 smp->sm_leadlen = (int)(p - smp->sm_lead); | |
3235 *p++ = NUL; | |
3236 | |
3237 /* Put (abc) chars in sm_oneof, if any. */ | |
3238 if (c == '(') | |
3239 { | |
3240 smp->sm_oneof = p; | |
3241 for (++i; i < ccnt; ++i) | |
3242 { | |
3243 c = getc(fd); /* <salfrom> */ | |
3244 if (c == ')') | |
3245 break; | |
3246 *p++ = c; | |
3247 } | |
3248 *p++ = NUL; | |
3249 if (++i < ccnt) | |
3250 c = getc(fd); | |
3251 } | |
3252 else | |
3253 smp->sm_oneof = NULL; | |
3254 | |
3255 /* Any following chars go in sm_rules. */ | |
3256 smp->sm_rules = p; | |
3257 if (i < ccnt) | |
3258 /* store the char we got while checking for end of sm_lead */ | |
3259 *p++ = c; | |
3260 for (++i; i < ccnt; ++i) | |
3261 *p++ = getc(fd); /* <salfrom> */ | |
3262 *p++ = NUL; | |
3263 | |
3264 /* <saltolen> <salto> */ | |
3265 smp->sm_to = read_cnt_string(fd, 1, &ccnt); | |
3266 if (ccnt < 0) | |
3267 { | |
3268 vim_free(smp->sm_lead); | |
3269 return ccnt; | |
3270 } | |
3271 | |
3272 #ifdef FEAT_MBYTE | |
3273 if (has_mbyte) | |
3274 { | |
3275 /* convert the multi-byte strings to wide char strings */ | |
3276 smp->sm_lead_w = mb_str2wide(smp->sm_lead); | |
3277 smp->sm_leadlen = mb_charlen(smp->sm_lead); | |
3278 if (smp->sm_oneof == NULL) | |
3279 smp->sm_oneof_w = NULL; | |
3280 else | |
3281 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); | |
3282 if (smp->sm_to == NULL) | |
3283 smp->sm_to_w = NULL; | |
3284 else | |
3285 smp->sm_to_w = mb_str2wide(smp->sm_to); | |
3286 if (smp->sm_lead_w == NULL | |
3287 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) | |
3288 || (smp->sm_to_w == NULL && smp->sm_to != NULL)) | |
3289 { | |
3290 vim_free(smp->sm_lead); | |
3291 vim_free(smp->sm_to); | |
3292 vim_free(smp->sm_lead_w); | |
3293 vim_free(smp->sm_oneof_w); | |
3294 vim_free(smp->sm_to_w); | |
3295 return SP_OTHERERROR; | |
3296 } | |
3297 } | |
3298 #endif | |
3299 } | |
3300 | |
3301 if (gap->ga_len > 0) | |
3302 { | |
3303 /* Add one extra entry to mark the end with an empty sm_lead. Avoids | |
3304 * that we need to check the index every time. */ | |
3305 smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | |
3306 if ((p = alloc(1)) == NULL) | |
3307 return SP_OTHERERROR; | |
3308 p[0] = NUL; | |
3309 smp->sm_lead = p; | |
3310 smp->sm_leadlen = 0; | |
3311 smp->sm_oneof = NULL; | |
3312 smp->sm_rules = p; | |
3313 smp->sm_to = NULL; | |
3314 #ifdef FEAT_MBYTE | |
3315 if (has_mbyte) | |
3316 { | |
3317 smp->sm_lead_w = mb_str2wide(smp->sm_lead); | |
3318 smp->sm_leadlen = 0; | |
3319 smp->sm_oneof_w = NULL; | |
3320 smp->sm_to_w = NULL; | |
3321 } | |
3322 #endif | |
3323 ++gap->ga_len; | |
3324 } | |
3325 | |
3326 /* Fill the first-index table. */ | |
3327 set_sal_first(slang); | |
3328 | |
3329 return 0; | |
3330 } | |
3331 | |
3332 /* | |
3333 * Read SN_WORDS: <word> ... | |
3334 * Return SP_*ERROR flags. | |
3335 */ | |
3336 static int | |
3337 read_words_section(FILE *fd, slang_T *lp, int len) | |
3338 { | |
3339 int done = 0; | |
3340 int i; | |
3341 int c; | |
3342 char_u word[MAXWLEN]; | |
3343 | |
3344 while (done < len) | |
3345 { | |
3346 /* Read one word at a time. */ | |
3347 for (i = 0; ; ++i) | |
3348 { | |
3349 c = getc(fd); | |
3350 if (c == EOF) | |
3351 return SP_TRUNCERROR; | |
3352 word[i] = c; | |
3353 if (word[i] == NUL) | |
3354 break; | |
3355 if (i == MAXWLEN - 1) | |
3356 return SP_FORMERROR; | |
3357 } | |
3358 | |
3359 /* Init the count to 10. */ | |
3360 count_common_word(lp, word, -1, 10); | |
3361 done += i + 1; | |
3362 } | |
3363 return 0; | |
3364 } | |
3365 | 2131 |
3366 /* | 2132 /* |
3367 * Add a word to the hashtable of common words. | 2133 * Add a word to the hashtable of common words. |
3368 * If it's already there then the counter is increased. | 2134 * If it's already there then the counter is increased. |
3369 */ | 2135 */ |
3370 static void | 2136 void |
3371 count_common_word( | 2137 count_common_word( |
3372 slang_T *lp, | 2138 slang_T *lp, |
3373 char_u *word, | 2139 char_u *word, |
3374 int len, /* word length, -1 for upto NUL */ | 2140 int len, /* word length, -1 for upto NUL */ |
3375 int count) /* 1 to count once, 10 to init */ | 2141 int count) /* 1 to count once, 10 to init */ |
3441 return newscore; | 2207 return newscore; |
3442 } | 2208 } |
3443 return score; | 2209 return score; |
3444 } | 2210 } |
3445 | 2211 |
3446 /* | |
3447 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
3448 * Return SP_*ERROR flags. | |
3449 */ | |
3450 static int | |
3451 read_sofo_section(FILE *fd, slang_T *slang) | |
3452 { | |
3453 int cnt; | |
3454 char_u *from, *to; | |
3455 int res; | |
3456 | |
3457 slang->sl_sofo = TRUE; | |
3458 | |
3459 /* <sofofromlen> <sofofrom> */ | |
3460 from = read_cnt_string(fd, 2, &cnt); | |
3461 if (cnt < 0) | |
3462 return cnt; | |
3463 | |
3464 /* <sofotolen> <sofoto> */ | |
3465 to = read_cnt_string(fd, 2, &cnt); | |
3466 if (cnt < 0) | |
3467 { | |
3468 vim_free(from); | |
3469 return cnt; | |
3470 } | |
3471 | |
3472 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ | |
3473 if (from != NULL && to != NULL) | |
3474 res = set_sofo(slang, from, to); | |
3475 else if (from != NULL || to != NULL) | |
3476 res = SP_FORMERROR; /* only one of two strings is an error */ | |
3477 else | |
3478 res = 0; | |
3479 | |
3480 vim_free(from); | |
3481 vim_free(to); | |
3482 return res; | |
3483 } | |
3484 | |
3485 /* | |
3486 * Read the compound section from the .spl file: | |
3487 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> | |
3488 * Returns SP_*ERROR flags. | |
3489 */ | |
3490 static int | |
3491 read_compound(FILE *fd, slang_T *slang, int len) | |
3492 { | |
3493 int todo = len; | |
3494 int c; | |
3495 int atstart; | |
3496 char_u *pat; | |
3497 char_u *pp; | |
3498 char_u *cp; | |
3499 char_u *ap; | |
3500 char_u *crp; | |
3501 int cnt; | |
3502 garray_T *gap; | |
3503 | |
3504 if (todo < 2) | |
3505 return SP_FORMERROR; /* need at least two bytes */ | |
3506 | |
3507 --todo; | |
3508 c = getc(fd); /* <compmax> */ | |
3509 if (c < 2) | |
3510 c = MAXWLEN; | |
3511 slang->sl_compmax = c; | |
3512 | |
3513 --todo; | |
3514 c = getc(fd); /* <compminlen> */ | |
3515 if (c < 1) | |
3516 c = 0; | |
3517 slang->sl_compminlen = c; | |
3518 | |
3519 --todo; | |
3520 c = getc(fd); /* <compsylmax> */ | |
3521 if (c < 1) | |
3522 c = MAXWLEN; | |
3523 slang->sl_compsylmax = c; | |
3524 | |
3525 c = getc(fd); /* <compoptions> */ | |
3526 if (c != 0) | |
3527 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ | |
3528 else | |
3529 { | |
3530 --todo; | |
3531 c = getc(fd); /* only use the lower byte for now */ | |
3532 --todo; | |
3533 slang->sl_compoptions = c; | |
3534 | |
3535 gap = &slang->sl_comppat; | |
3536 c = get2c(fd); /* <comppatcount> */ | |
3537 todo -= 2; | |
3538 ga_init2(gap, sizeof(char_u *), c); | |
3539 if (ga_grow(gap, c) == OK) | |
3540 while (--c >= 0) | |
3541 { | |
3542 ((char_u **)(gap->ga_data))[gap->ga_len++] = | |
3543 read_cnt_string(fd, 1, &cnt); | |
3544 /* <comppatlen> <comppattext> */ | |
3545 if (cnt < 0) | |
3546 return cnt; | |
3547 todo -= cnt + 1; | |
3548 } | |
3549 } | |
3550 if (todo < 0) | |
3551 return SP_FORMERROR; | |
3552 | |
3553 /* Turn the COMPOUNDRULE items into a regexp pattern: | |
3554 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". | |
3555 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. | |
3556 * Conversion to utf-8 may double the size. */ | |
3557 c = todo * 2 + 7; | |
3558 #ifdef FEAT_MBYTE | |
3559 if (enc_utf8) | |
3560 c += todo * 2; | |
3561 #endif | |
3562 pat = alloc((unsigned)c); | |
3563 if (pat == NULL) | |
3564 return SP_OTHERERROR; | |
3565 | |
3566 /* We also need a list of all flags that can appear at the start and one | |
3567 * for all flags. */ | |
3568 cp = alloc(todo + 1); | |
3569 if (cp == NULL) | |
3570 { | |
3571 vim_free(pat); | |
3572 return SP_OTHERERROR; | |
3573 } | |
3574 slang->sl_compstartflags = cp; | |
3575 *cp = NUL; | |
3576 | |
3577 ap = alloc(todo + 1); | |
3578 if (ap == NULL) | |
3579 { | |
3580 vim_free(pat); | |
3581 return SP_OTHERERROR; | |
3582 } | |
3583 slang->sl_compallflags = ap; | |
3584 *ap = NUL; | |
3585 | |
3586 /* And a list of all patterns in their original form, for checking whether | |
3587 * compounding may work in match_compoundrule(). This is freed when we | |
3588 * encounter a wildcard, the check doesn't work then. */ | |
3589 crp = alloc(todo + 1); | |
3590 slang->sl_comprules = crp; | |
3591 | |
3592 pp = pat; | |
3593 *pp++ = '^'; | |
3594 *pp++ = '\\'; | |
3595 *pp++ = '('; | |
3596 | |
3597 atstart = 1; | |
3598 while (todo-- > 0) | |
3599 { | |
3600 c = getc(fd); /* <compflags> */ | |
3601 if (c == EOF) | |
3602 { | |
3603 vim_free(pat); | |
3604 return SP_TRUNCERROR; | |
3605 } | |
3606 | |
3607 /* Add all flags to "sl_compallflags". */ | |
3608 if (vim_strchr((char_u *)"?*+[]/", c) == NULL | |
3609 && !byte_in_str(slang->sl_compallflags, c)) | |
3610 { | |
3611 *ap++ = c; | |
3612 *ap = NUL; | |
3613 } | |
3614 | |
3615 if (atstart != 0) | |
3616 { | |
3617 /* At start of item: copy flags to "sl_compstartflags". For a | |
3618 * [abc] item set "atstart" to 2 and copy up to the ']'. */ | |
3619 if (c == '[') | |
3620 atstart = 2; | |
3621 else if (c == ']') | |
3622 atstart = 0; | |
3623 else | |
3624 { | |
3625 if (!byte_in_str(slang->sl_compstartflags, c)) | |
3626 { | |
3627 *cp++ = c; | |
3628 *cp = NUL; | |
3629 } | |
3630 if (atstart == 1) | |
3631 atstart = 0; | |
3632 } | |
3633 } | |
3634 | |
3635 /* Copy flag to "sl_comprules", unless we run into a wildcard. */ | |
3636 if (crp != NULL) | |
3637 { | |
3638 if (c == '?' || c == '+' || c == '*') | |
3639 { | |
3640 vim_free(slang->sl_comprules); | |
3641 slang->sl_comprules = NULL; | |
3642 crp = NULL; | |
3643 } | |
3644 else | |
3645 *crp++ = c; | |
3646 } | |
3647 | |
3648 if (c == '/') /* slash separates two items */ | |
3649 { | |
3650 *pp++ = '\\'; | |
3651 *pp++ = '|'; | |
3652 atstart = 1; | |
3653 } | |
3654 else /* normal char, "[abc]" and '*' are copied as-is */ | |
3655 { | |
3656 if (c == '?' || c == '+' || c == '~') | |
3657 *pp++ = '\\'; /* "a?" becomes "a\?", "a+" becomes "a\+" */ | |
3658 #ifdef FEAT_MBYTE | |
3659 if (enc_utf8) | |
3660 pp += mb_char2bytes(c, pp); | |
3661 else | |
3662 #endif | |
3663 *pp++ = c; | |
3664 } | |
3665 } | |
3666 | |
3667 *pp++ = '\\'; | |
3668 *pp++ = ')'; | |
3669 *pp++ = '$'; | |
3670 *pp = NUL; | |
3671 | |
3672 if (crp != NULL) | |
3673 *crp = NUL; | |
3674 | |
3675 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); | |
3676 vim_free(pat); | |
3677 if (slang->sl_compprog == NULL) | |
3678 return SP_FORMERROR; | |
3679 | |
3680 return 0; | |
3681 } | |
3682 | 2212 |
3683 /* | 2213 /* |
3684 * Return TRUE if byte "n" appears in "str". | 2214 * Return TRUE if byte "n" appears in "str". |
3685 * Like strchr() but independent of locale. | 2215 * Like strchr() but independent of locale. |
3686 */ | 2216 */ |
3687 static int | 2217 int |
3688 byte_in_str(char_u *str, int n) | 2218 byte_in_str(char_u *str, int n) |
3689 { | 2219 { |
3690 char_u *p; | 2220 char_u *p; |
3691 | 2221 |
3692 for (p = str; *p != NUL; ++p) | 2222 for (p = str; *p != NUL; ++p) |
3704 | 2234 |
3705 /* | 2235 /* |
3706 * Truncate "slang->sl_syllable" at the first slash and put the following items | 2236 * Truncate "slang->sl_syllable" at the first slash and put the following items |
3707 * in "slang->sl_syl_items". | 2237 * in "slang->sl_syl_items". |
3708 */ | 2238 */ |
3709 static int | 2239 int |
3710 init_syl_tab(slang_T *slang) | 2240 init_syl_tab(slang_T *slang) |
3711 { | 2241 { |
3712 char_u *p; | 2242 char_u *p; |
3713 char_u *s; | 2243 char_u *s; |
3714 int l; | 2244 int l; |
3800 skip = TRUE; /* don't count following syllable chars */ | 2330 skip = TRUE; /* don't count following syllable chars */ |
3801 } | 2331 } |
3802 } | 2332 } |
3803 } | 2333 } |
3804 return cnt; | 2334 return cnt; |
3805 } | |
3806 | |
3807 /* | |
3808 * Set the SOFOFROM and SOFOTO items in language "lp". | |
3809 * Returns SP_*ERROR flags when there is something wrong. | |
3810 */ | |
3811 static int | |
3812 set_sofo(slang_T *lp, char_u *from, char_u *to) | |
3813 { | |
3814 int i; | |
3815 | |
3816 #ifdef FEAT_MBYTE | |
3817 garray_T *gap; | |
3818 char_u *s; | |
3819 char_u *p; | |
3820 int c; | |
3821 int *inp; | |
3822 | |
3823 if (has_mbyte) | |
3824 { | |
3825 /* Use "sl_sal" as an array with 256 pointers to a list of wide | |
3826 * characters. The index is the low byte of the character. | |
3827 * The list contains from-to pairs with a terminating NUL. | |
3828 * sl_sal_first[] is used for latin1 "from" characters. */ | |
3829 gap = &lp->sl_sal; | |
3830 ga_init2(gap, sizeof(int *), 1); | |
3831 if (ga_grow(gap, 256) == FAIL) | |
3832 return SP_OTHERERROR; | |
3833 vim_memset(gap->ga_data, 0, sizeof(int *) * 256); | |
3834 gap->ga_len = 256; | |
3835 | |
3836 /* First count the number of items for each list. Temporarily use | |
3837 * sl_sal_first[] for this. */ | |
3838 for (p = from, s = to; *p != NUL && *s != NUL; ) | |
3839 { | |
3840 c = mb_cptr2char_adv(&p); | |
3841 mb_cptr_adv(s); | |
3842 if (c >= 256) | |
3843 ++lp->sl_sal_first[c & 0xff]; | |
3844 } | |
3845 if (*p != NUL || *s != NUL) /* lengths differ */ | |
3846 return SP_FORMERROR; | |
3847 | |
3848 /* Allocate the lists. */ | |
3849 for (i = 0; i < 256; ++i) | |
3850 if (lp->sl_sal_first[i] > 0) | |
3851 { | |
3852 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); | |
3853 if (p == NULL) | |
3854 return SP_OTHERERROR; | |
3855 ((int **)gap->ga_data)[i] = (int *)p; | |
3856 *(int *)p = 0; | |
3857 } | |
3858 | |
3859 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal | |
3860 * list. */ | |
3861 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); | |
3862 for (p = from, s = to; *p != NUL && *s != NUL; ) | |
3863 { | |
3864 c = mb_cptr2char_adv(&p); | |
3865 i = mb_cptr2char_adv(&s); | |
3866 if (c >= 256) | |
3867 { | |
3868 /* Append the from-to chars at the end of the list with | |
3869 * the low byte. */ | |
3870 inp = ((int **)gap->ga_data)[c & 0xff]; | |
3871 while (*inp != 0) | |
3872 ++inp; | |
3873 *inp++ = c; /* from char */ | |
3874 *inp++ = i; /* to char */ | |
3875 *inp++ = NUL; /* NUL at the end */ | |
3876 } | |
3877 else | |
3878 /* mapping byte to char is done in sl_sal_first[] */ | |
3879 lp->sl_sal_first[c] = i; | |
3880 } | |
3881 } | |
3882 else | |
3883 #endif | |
3884 { | |
3885 /* mapping bytes to bytes is done in sl_sal_first[] */ | |
3886 if (STRLEN(from) != STRLEN(to)) | |
3887 return SP_FORMERROR; | |
3888 | |
3889 for (i = 0; to[i] != NUL; ++i) | |
3890 lp->sl_sal_first[from[i]] = to[i]; | |
3891 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ | |
3892 } | |
3893 | |
3894 return 0; | |
3895 } | |
3896 | |
3897 /* | |
3898 * Fill the first-index table for "lp". | |
3899 */ | |
3900 static void | |
3901 set_sal_first(slang_T *lp) | |
3902 { | |
3903 salfirst_T *sfirst; | |
3904 int i; | |
3905 salitem_T *smp; | |
3906 int c; | |
3907 garray_T *gap = &lp->sl_sal; | |
3908 | |
3909 sfirst = lp->sl_sal_first; | |
3910 for (i = 0; i < 256; ++i) | |
3911 sfirst[i] = -1; | |
3912 smp = (salitem_T *)gap->ga_data; | |
3913 for (i = 0; i < gap->ga_len; ++i) | |
3914 { | |
3915 #ifdef FEAT_MBYTE | |
3916 if (has_mbyte) | |
3917 /* Use the lowest byte of the first character. For latin1 it's | |
3918 * the character, for other encodings it should differ for most | |
3919 * characters. */ | |
3920 c = *smp[i].sm_lead_w & 0xff; | |
3921 else | |
3922 #endif | |
3923 c = *smp[i].sm_lead; | |
3924 if (sfirst[c] == -1) | |
3925 { | |
3926 sfirst[c] = i; | |
3927 #ifdef FEAT_MBYTE | |
3928 if (has_mbyte) | |
3929 { | |
3930 int n; | |
3931 | |
3932 /* Make sure all entries with this byte are following each | |
3933 * other. Move the ones that are in the wrong position. Do | |
3934 * keep the same ordering! */ | |
3935 while (i + 1 < gap->ga_len | |
3936 && (*smp[i + 1].sm_lead_w & 0xff) == c) | |
3937 /* Skip over entry with same index byte. */ | |
3938 ++i; | |
3939 | |
3940 for (n = 1; i + n < gap->ga_len; ++n) | |
3941 if ((*smp[i + n].sm_lead_w & 0xff) == c) | |
3942 { | |
3943 salitem_T tsal; | |
3944 | |
3945 /* Move entry with same index byte after the entries | |
3946 * we already found. */ | |
3947 ++i; | |
3948 --n; | |
3949 tsal = smp[i + n]; | |
3950 mch_memmove(smp + i + 1, smp + i, | |
3951 sizeof(salitem_T) * n); | |
3952 smp[i] = tsal; | |
3953 } | |
3954 } | |
3955 #endif | |
3956 } | |
3957 } | |
3958 } | |
3959 | |
3960 #ifdef FEAT_MBYTE | |
3961 /* | |
3962 * Turn a multi-byte string into a wide character string. | |
3963 * Return it in allocated memory (NULL for out-of-memory) | |
3964 */ | |
3965 static int * | |
3966 mb_str2wide(char_u *s) | |
3967 { | |
3968 int *res; | |
3969 char_u *p; | |
3970 int i = 0; | |
3971 | |
3972 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); | |
3973 if (res != NULL) | |
3974 { | |
3975 for (p = s; *p != NUL; ) | |
3976 res[i++] = mb_ptr2char_adv(&p); | |
3977 res[i] = NUL; | |
3978 } | |
3979 return res; | |
3980 } | |
3981 #endif | |
3982 | |
3983 /* | |
3984 * Read a tree from the .spl or .sug file. | |
3985 * Allocates the memory and stores pointers in "bytsp" and "idxsp". | |
3986 * This is skipped when the tree has zero length. | |
3987 * Returns zero when OK, SP_ value for an error. | |
3988 */ | |
3989 static int | |
3990 spell_read_tree( | |
3991 FILE *fd, | |
3992 char_u **bytsp, | |
3993 idx_T **idxsp, | |
3994 int prefixtree, /* TRUE for the prefix tree */ | |
3995 int prefixcnt) /* when "prefixtree" is TRUE: prefix count */ | |
3996 { | |
3997 int len; | |
3998 int idx; | |
3999 char_u *bp; | |
4000 idx_T *ip; | |
4001 | |
4002 /* The tree size was computed when writing the file, so that we can | |
4003 * allocate it as one long block. <nodecount> */ | |
4004 len = get4c(fd); | |
4005 if (len < 0) | |
4006 return SP_TRUNCERROR; | |
4007 if (len > 0) | |
4008 { | |
4009 /* Allocate the byte array. */ | |
4010 bp = lalloc((long_u)len, TRUE); | |
4011 if (bp == NULL) | |
4012 return SP_OTHERERROR; | |
4013 *bytsp = bp; | |
4014 | |
4015 /* Allocate the index array. */ | |
4016 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); | |
4017 if (ip == NULL) | |
4018 return SP_OTHERERROR; | |
4019 *idxsp = ip; | |
4020 | |
4021 /* Recursively read the tree and store it in the array. */ | |
4022 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); | |
4023 if (idx < 0) | |
4024 return idx; | |
4025 } | |
4026 return 0; | |
4027 } | |
4028 | |
4029 /* | |
4030 * Read one row of siblings from the spell file and store it in the byte array | |
4031 * "byts" and index array "idxs". Recursively read the children. | |
4032 * | |
4033 * NOTE: The code here must match put_node()! | |
4034 * | |
4035 * Returns the index (>= 0) following the siblings. | |
4036 * Returns SP_TRUNCERROR if the file is shorter than expected. | |
4037 * Returns SP_FORMERROR if there is a format error. | |
4038 */ | |
4039 static idx_T | |
4040 read_tree_node( | |
4041 FILE *fd, | |
4042 char_u *byts, | |
4043 idx_T *idxs, | |
4044 int maxidx, /* size of arrays */ | |
4045 idx_T startidx, /* current index in "byts" and "idxs" */ | |
4046 int prefixtree, /* TRUE for reading PREFIXTREE */ | |
4047 int maxprefcondnr) /* maximum for <prefcondnr> */ | |
4048 { | |
4049 int len; | |
4050 int i; | |
4051 int n; | |
4052 idx_T idx = startidx; | |
4053 int c; | |
4054 int c2; | |
4055 #define SHARED_MASK 0x8000000 | |
4056 | |
4057 len = getc(fd); /* <siblingcount> */ | |
4058 if (len <= 0) | |
4059 return SP_TRUNCERROR; | |
4060 | |
4061 if (startidx + len >= maxidx) | |
4062 return SP_FORMERROR; | |
4063 byts[idx++] = len; | |
4064 | |
4065 /* Read the byte values, flag/region bytes and shared indexes. */ | |
4066 for (i = 1; i <= len; ++i) | |
4067 { | |
4068 c = getc(fd); /* <byte> */ | |
4069 if (c < 0) | |
4070 return SP_TRUNCERROR; | |
4071 if (c <= BY_SPECIAL) | |
4072 { | |
4073 if (c == BY_NOFLAGS && !prefixtree) | |
4074 { | |
4075 /* No flags, all regions. */ | |
4076 idxs[idx] = 0; | |
4077 c = 0; | |
4078 } | |
4079 else if (c != BY_INDEX) | |
4080 { | |
4081 if (prefixtree) | |
4082 { | |
4083 /* Read the optional pflags byte, the prefix ID and the | |
4084 * condition nr. In idxs[] store the prefix ID in the low | |
4085 * byte, the condition index shifted up 8 bits, the flags | |
4086 * shifted up 24 bits. */ | |
4087 if (c == BY_FLAGS) | |
4088 c = getc(fd) << 24; /* <pflags> */ | |
4089 else | |
4090 c = 0; | |
4091 | |
4092 c |= getc(fd); /* <affixID> */ | |
4093 | |
4094 n = get2c(fd); /* <prefcondnr> */ | |
4095 if (n >= maxprefcondnr) | |
4096 return SP_FORMERROR; | |
4097 c |= (n << 8); | |
4098 } | |
4099 else /* c must be BY_FLAGS or BY_FLAGS2 */ | |
4100 { | |
4101 /* Read flags and optional region and prefix ID. In | |
4102 * idxs[] the flags go in the low two bytes, region above | |
4103 * that and prefix ID above the region. */ | |
4104 c2 = c; | |
4105 c = getc(fd); /* <flags> */ | |
4106 if (c2 == BY_FLAGS2) | |
4107 c = (getc(fd) << 8) + c; /* <flags2> */ | |
4108 if (c & WF_REGION) | |
4109 c = (getc(fd) << 16) + c; /* <region> */ | |
4110 if (c & WF_AFX) | |
4111 c = (getc(fd) << 24) + c; /* <affixID> */ | |
4112 } | |
4113 | |
4114 idxs[idx] = c; | |
4115 c = 0; | |
4116 } | |
4117 else /* c == BY_INDEX */ | |
4118 { | |
4119 /* <nodeidx> */ | |
4120 n = get3c(fd); | |
4121 if (n < 0 || n >= maxidx) | |
4122 return SP_FORMERROR; | |
4123 idxs[idx] = n + SHARED_MASK; | |
4124 c = getc(fd); /* <xbyte> */ | |
4125 } | |
4126 } | |
4127 byts[idx++] = c; | |
4128 } | |
4129 | |
4130 /* Recursively read the children for non-shared siblings. | |
4131 * Skip the end-of-word ones (zero byte value) and the shared ones (and | |
4132 * remove SHARED_MASK) */ | |
4133 for (i = 1; i <= len; ++i) | |
4134 if (byts[startidx + i] != 0) | |
4135 { | |
4136 if (idxs[startidx + i] & SHARED_MASK) | |
4137 idxs[startidx + i] &= ~SHARED_MASK; | |
4138 else | |
4139 { | |
4140 idxs[startidx + i] = idx; | |
4141 idx = read_tree_node(fd, byts, idxs, maxidx, idx, | |
4142 prefixtree, maxprefcondnr); | |
4143 if (idx < 0) | |
4144 break; | |
4145 } | |
4146 } | |
4147 | |
4148 return idx; | |
4149 } | 2335 } |
4150 | 2336 |
4151 /* | 2337 /* |
4152 * Parse 'spelllang' and set w_s->b_langp accordingly. | 2338 * Parse 'spelllang' and set w_s->b_langp accordingly. |
4153 * Returns NULL if it's OK, an error message otherwise. | 2339 * Returns NULL if it's OK, an error message otherwise. |
4560 * w word 0 | 2746 * w word 0 |
4561 * Word WF_ONECAP | 2747 * Word WF_ONECAP |
4562 * W WORD WF_ALLCAP | 2748 * W WORD WF_ALLCAP |
4563 * WoRd wOrd WF_KEEPCAP | 2749 * WoRd wOrd WF_KEEPCAP |
4564 */ | 2750 */ |
4565 static int | 2751 int |
4566 captype( | 2752 captype( |
4567 char_u *word, | 2753 char_u *word, |
4568 char_u *end) /* When NULL use up to NUL byte. */ | 2754 char_u *end) /* When NULL use up to NUL byte. */ |
4569 { | 2755 { |
4570 char_u *p; | 2756 char_u *p; |
4742 } | 2928 } |
4743 } | 2929 } |
4744 #endif | 2930 #endif |
4745 | 2931 |
4746 /* | 2932 /* |
4747 * Reload the spell file "fname" if it's loaded. | |
4748 */ | |
4749 static void | |
4750 spell_reload_one( | |
4751 char_u *fname, | |
4752 int added_word) /* invoked through "zg" */ | |
4753 { | |
4754 slang_T *slang; | |
4755 int didit = FALSE; | |
4756 | |
4757 for (slang = first_lang; slang != NULL; slang = slang->sl_next) | |
4758 { | |
4759 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) | |
4760 { | |
4761 slang_clear(slang); | |
4762 if (spell_load_file(fname, NULL, slang, FALSE) == NULL) | |
4763 /* reloading failed, clear the language */ | |
4764 slang_clear(slang); | |
4765 redraw_all_later(SOME_VALID); | |
4766 didit = TRUE; | |
4767 } | |
4768 } | |
4769 | |
4770 /* When "zg" was used and the file wasn't loaded yet, should redo | |
4771 * 'spelllang' to load it now. */ | |
4772 if (added_word && !didit) | |
4773 did_set_spelllang(curwin); | |
4774 } | |
4775 | |
4776 | |
4777 /* | |
4778 * Functions for ":mkspell". | |
4779 */ | |
4780 | |
4781 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff | |
4782 and .dic file. */ | |
4783 /* | |
4784 * Main structure to store the contents of a ".aff" file. | |
4785 */ | |
4786 typedef struct afffile_S | |
4787 { | |
4788 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ | |
4789 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ | |
4790 unsigned af_rare; /* RARE ID for rare word */ | |
4791 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ | |
4792 unsigned af_bad; /* BAD ID for banned word */ | |
4793 unsigned af_needaffix; /* NEEDAFFIX ID */ | |
4794 unsigned af_circumfix; /* CIRCUMFIX ID */ | |
4795 unsigned af_needcomp; /* NEEDCOMPOUND ID */ | |
4796 unsigned af_comproot; /* COMPOUNDROOT ID */ | |
4797 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ | |
4798 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ | |
4799 unsigned af_nosuggest; /* NOSUGGEST ID */ | |
4800 int af_pfxpostpone; /* postpone prefixes without chop string and | |
4801 without flags */ | |
4802 int af_ignoreextra; /* IGNOREEXTRA present */ | |
4803 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ | |
4804 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ | |
4805 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ | |
4806 } afffile_T; | |
4807 | |
4808 #define AFT_CHAR 0 /* flags are one character */ | |
4809 #define AFT_LONG 1 /* flags are two characters */ | |
4810 #define AFT_CAPLONG 2 /* flags are one or two characters */ | |
4811 #define AFT_NUM 3 /* flags are numbers, comma separated */ | |
4812 | |
4813 typedef struct affentry_S affentry_T; | |
4814 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ | |
4815 struct affentry_S | |
4816 { | |
4817 affentry_T *ae_next; /* next affix with same name/number */ | |
4818 char_u *ae_chop; /* text to chop off basic word (can be NULL) */ | |
4819 char_u *ae_add; /* text to add to basic word (can be NULL) */ | |
4820 char_u *ae_flags; /* flags on the affix (can be NULL) */ | |
4821 char_u *ae_cond; /* condition (NULL for ".") */ | |
4822 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ | |
4823 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */ | |
4824 char ae_comppermit; /* COMPOUNDPERMITFLAG found */ | |
4825 }; | |
4826 | |
4827 #ifdef FEAT_MBYTE | |
4828 # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ | |
4829 #else | |
4830 # define AH_KEY_LEN 7 /* 6 digits + NUL */ | |
4831 #endif | |
4832 | |
4833 /* Affix header from ".aff" file. Used for af_pref and af_suff. */ | |
4834 typedef struct affheader_S | |
4835 { | |
4836 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ | |
4837 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ | |
4838 int ah_newID; /* prefix ID after renumbering; 0 if not used */ | |
4839 int ah_combine; /* suffix may combine with prefix */ | |
4840 int ah_follows; /* another affix block should be following */ | |
4841 affentry_T *ah_first; /* first affix entry */ | |
4842 } affheader_T; | |
4843 | |
4844 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) | |
4845 | |
4846 /* Flag used in compound items. */ | |
4847 typedef struct compitem_S | |
4848 { | |
4849 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ | |
4850 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ | |
4851 int ci_newID; /* affix ID after renumbering. */ | |
4852 } compitem_T; | |
4853 | |
4854 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) | |
4855 | |
4856 /* | |
4857 * Structure that is used to store the items in the word tree. This avoids | |
4858 * the need to keep track of each allocated thing, everything is freed all at | |
4859 * once after ":mkspell" is done. | |
4860 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of | |
4861 * "sb_data" is correct for systems where pointers must be aligned on | |
4862 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). | |
4863 */ | |
4864 #define SBLOCKSIZE 16000 /* size of sb_data */ | |
4865 typedef struct sblock_S sblock_T; | |
4866 struct sblock_S | |
4867 { | |
4868 int sb_used; /* nr of bytes already in use */ | |
4869 sblock_T *sb_next; /* next block in list */ | |
4870 char_u sb_data[1]; /* data, actually longer */ | |
4871 }; | |
4872 | |
4873 /* | |
4874 * A node in the tree. | |
4875 */ | |
4876 typedef struct wordnode_S wordnode_T; | |
4877 struct wordnode_S | |
4878 { | |
4879 union /* shared to save space */ | |
4880 { | |
4881 char_u hashkey[6]; /* the hash key, only used while compressing */ | |
4882 int index; /* index in written nodes (valid after first | |
4883 round) */ | |
4884 } wn_u1; | |
4885 union /* shared to save space */ | |
4886 { | |
4887 wordnode_T *next; /* next node with same hash key */ | |
4888 wordnode_T *wnode; /* parent node that will write this node */ | |
4889 } wn_u2; | |
4890 wordnode_T *wn_child; /* child (next byte in word) */ | |
4891 wordnode_T *wn_sibling; /* next sibling (alternate byte in word, | |
4892 always sorted) */ | |
4893 int wn_refs; /* Nr. of references to this node. Only | |
4894 relevant for first node in a list of | |
4895 siblings, in following siblings it is | |
4896 always one. */ | |
4897 char_u wn_byte; /* Byte for this node. NUL for word end */ | |
4898 | |
4899 /* Info for when "wn_byte" is NUL. | |
4900 * In PREFIXTREE "wn_region" is used for the prefcondnr. | |
4901 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and | |
4902 * "wn_region" the LSW of the wordnr. */ | |
4903 char_u wn_affixID; /* supported/required prefix ID or 0 */ | |
4904 short_u wn_flags; /* WF_ flags */ | |
4905 short wn_region; /* region mask */ | |
4906 | |
4907 #ifdef SPELL_PRINTTREE | |
4908 int wn_nr; /* sequence nr for printing */ | |
4909 #endif | |
4910 }; | |
4911 | |
4912 #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ | |
4913 | |
4914 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) | |
4915 | |
4916 /* | |
4917 * Info used while reading the spell files. | |
4918 */ | |
4919 typedef struct spellinfo_S | |
4920 { | |
4921 wordnode_T *si_foldroot; /* tree with case-folded words */ | |
4922 long si_foldwcount; /* nr of words in si_foldroot */ | |
4923 | |
4924 wordnode_T *si_keeproot; /* tree with keep-case words */ | |
4925 long si_keepwcount; /* nr of words in si_keeproot */ | |
4926 | |
4927 wordnode_T *si_prefroot; /* tree with postponed prefixes */ | |
4928 | |
4929 long si_sugtree; /* creating the soundfolding trie */ | |
4930 | |
4931 sblock_T *si_blocks; /* memory blocks used */ | |
4932 long si_blocks_cnt; /* memory blocks allocated */ | |
4933 int si_did_emsg; /* TRUE when ran out of memory */ | |
4934 | |
4935 long si_compress_cnt; /* words to add before lowering | |
4936 compression limit */ | |
4937 wordnode_T *si_first_free; /* List of nodes that have been freed during | |
4938 compression, linked by "wn_child" field. */ | |
4939 long si_free_count; /* number of nodes in si_first_free */ | |
4940 #ifdef SPELL_PRINTTREE | |
4941 int si_wordnode_nr; /* sequence nr for nodes */ | |
4942 #endif | |
4943 buf_T *si_spellbuf; /* buffer used to store soundfold word table */ | |
4944 | |
4945 int si_ascii; /* handling only ASCII words */ | |
4946 int si_add; /* addition file */ | |
4947 int si_clear_chartab; /* when TRUE clear char tables */ | |
4948 int si_region; /* region mask */ | |
4949 vimconv_T si_conv; /* for conversion to 'encoding' */ | |
4950 int si_memtot; /* runtime memory used */ | |
4951 int si_verbose; /* verbose messages */ | |
4952 int si_msg_count; /* number of words added since last message */ | |
4953 char_u *si_info; /* info text chars or NULL */ | |
4954 int si_region_count; /* number of regions supported (1 when there | |
4955 are no regions) */ | |
4956 char_u si_region_name[17]; /* region names; used only if | |
4957 * si_region_count > 1) */ | |
4958 | |
4959 garray_T si_rep; /* list of fromto_T entries from REP lines */ | |
4960 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ | |
4961 garray_T si_sal; /* list of fromto_T entries from SAL lines */ | |
4962 char_u *si_sofofr; /* SOFOFROM text */ | |
4963 char_u *si_sofoto; /* SOFOTO text */ | |
4964 int si_nosugfile; /* NOSUGFILE item found */ | |
4965 int si_nosplitsugs; /* NOSPLITSUGS item found */ | |
4966 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ | |
4967 int si_followup; /* soundsalike: ? */ | |
4968 int si_collapse; /* soundsalike: ? */ | |
4969 hashtab_T si_commonwords; /* hashtable for common words */ | |
4970 time_t si_sugtime; /* timestamp for .sug file */ | |
4971 int si_rem_accents; /* soundsalike: remove accents */ | |
4972 garray_T si_map; /* MAP info concatenated */ | |
4973 char_u *si_midword; /* MIDWORD chars or NULL */ | |
4974 int si_compmax; /* max nr of words for compounding */ | |
4975 int si_compminlen; /* minimal length for compounding */ | |
4976 int si_compsylmax; /* max nr of syllables for compounding */ | |
4977 int si_compoptions; /* COMP_ flags */ | |
4978 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as | |
4979 a string */ | |
4980 char_u *si_compflags; /* flags used for compounding */ | |
4981 char_u si_nobreak; /* NOBREAK */ | |
4982 char_u *si_syllable; /* syllable string */ | |
4983 garray_T si_prefcond; /* table with conditions for postponed | |
4984 * prefixes, each stored as a string */ | |
4985 int si_newprefID; /* current value for ah_newID */ | |
4986 int si_newcompID; /* current value for compound ID */ | |
4987 } spellinfo_T; | |
4988 | |
4989 static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname); | |
4990 static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount); | |
4991 static void aff_process_flags(afffile_T *affile, affentry_T *entry); | |
4992 static int spell_info_item(char_u *s); | |
4993 static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum); | |
4994 static unsigned get_affitem(int flagtype, char_u **pp); | |
4995 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags); | |
4996 static void check_renumber(spellinfo_T *spin); | |
4997 static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag); | |
4998 static void aff_check_number(int spinval, int affval, char *name); | |
4999 static void aff_check_string(char_u *spinval, char_u *affval, char *name); | |
5000 static int str_equal(char_u *s1, char_u *s2); | |
5001 static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to); | |
5002 static int sal_to_bool(char_u *s); | |
5003 static void spell_free_aff(afffile_T *aff); | |
5004 static int spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile); | |
5005 static int get_affix_flags(afffile_T *affile, char_u *afflist); | |
5006 static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist); | |
5007 static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist); | |
5008 static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen); | |
5009 static int spell_read_wordfile(spellinfo_T *spin, char_u *fname); | |
5010 static void *getroom(spellinfo_T *spin, size_t len, int align); | |
5011 static char_u *getroom_save(spellinfo_T *spin, char_u *s); | |
5012 static void free_blocks(sblock_T *bl); | |
5013 static wordnode_T *wordtree_alloc(spellinfo_T *spin); | |
5014 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); | |
5015 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); | |
5016 static wordnode_T *get_wordnode(spellinfo_T *spin); | |
5017 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node); | |
5018 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); | |
5019 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); | |
5020 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot); | |
5021 static int node_equal(wordnode_T *n1, wordnode_T *n2); | |
5022 static int write_vim_spell(spellinfo_T *spin, char_u *fname); | |
5023 static void clear_node(wordnode_T *node); | |
5024 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); | |
5025 static void spell_make_sugfile(spellinfo_T *spin, char_u *wfname); | |
5026 static int sug_filltree(spellinfo_T *spin, slang_T *slang); | |
5027 static int sug_maketable(spellinfo_T *spin); | |
5028 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap); | |
5029 static int offset2bytes(int nr, char_u *buf); | |
5030 static int bytes2offset(char_u **pp); | |
5031 static void sug_write(spellinfo_T *spin, char_u *fname); | |
5032 static void mkspell(int fcount, char_u **fnames, int ascii, int over_write, int added_word); | |
5033 static void spell_message(spellinfo_T *spin, char_u *str); | |
5034 static void init_spellfile(void); | |
5035 | |
5036 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, | |
5037 * but it must be negative to indicate the prefix tree to tree_add_word(). | |
5038 * Use a negative number with the lower 8 bits zero. */ | |
5039 #define PFX_FLAGS -256 | |
5040 | |
5041 /* flags for "condit" argument of store_aff_word() */ | |
5042 #define CONDIT_COMB 1 /* affix must combine */ | |
5043 #define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */ | |
5044 #define CONDIT_SUF 4 /* add a suffix for matching flags */ | |
5045 #define CONDIT_AFF 8 /* word already has an affix */ | |
5046 | |
5047 /* | |
5048 * Tunable parameters for when the tree is compressed. See 'mkspellmem'. | |
5049 */ | |
5050 static long compress_start = 30000; /* memory / SBLOCKSIZE */ | |
5051 static long compress_inc = 100; /* memory / SBLOCKSIZE */ | |
5052 static long compress_added = 500000; /* word count */ | |
5053 | |
5054 #ifdef SPELL_PRINTTREE | |
5055 /* | |
5056 * For debugging the tree code: print the current tree in a (more or less) | |
5057 * readable format, so that we can see what happens when adding a word and/or | |
5058 * compressing the tree. | |
5059 * Based on code from Olaf Seibert. | |
5060 */ | |
5061 #define PRINTLINESIZE 1000 | |
5062 #define PRINTWIDTH 6 | |
5063 | |
5064 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ | |
5065 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) | |
5066 | |
5067 static char line1[PRINTLINESIZE]; | |
5068 static char line2[PRINTLINESIZE]; | |
5069 static char line3[PRINTLINESIZE]; | |
5070 | |
5071 static void | |
5072 spell_clear_flags(wordnode_T *node) | |
5073 { | |
5074 wordnode_T *np; | |
5075 | |
5076 for (np = node; np != NULL; np = np->wn_sibling) | |
5077 { | |
5078 np->wn_u1.index = FALSE; | |
5079 spell_clear_flags(np->wn_child); | |
5080 } | |
5081 } | |
5082 | |
5083 static void | |
5084 spell_print_node(wordnode_T *node, int depth) | |
5085 { | |
5086 if (node->wn_u1.index) | |
5087 { | |
5088 /* Done this node before, print the reference. */ | |
5089 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); | |
5090 PRINTSOME(line2, depth, " ", 0, 0); | |
5091 PRINTSOME(line3, depth, " ", 0, 0); | |
5092 msg((char_u *)line1); | |
5093 msg((char_u *)line2); | |
5094 msg((char_u *)line3); | |
5095 } | |
5096 else | |
5097 { | |
5098 node->wn_u1.index = TRUE; | |
5099 | |
5100 if (node->wn_byte != NUL) | |
5101 { | |
5102 if (node->wn_child != NULL) | |
5103 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); | |
5104 else | |
5105 /* Cannot happen? */ | |
5106 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); | |
5107 } | |
5108 else | |
5109 PRINTSOME(line1, depth, " $ ", 0, 0); | |
5110 | |
5111 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); | |
5112 | |
5113 if (node->wn_sibling != NULL) | |
5114 PRINTSOME(line3, depth, " | ", 0, 0); | |
5115 else | |
5116 PRINTSOME(line3, depth, " ", 0, 0); | |
5117 | |
5118 if (node->wn_byte == NUL) | |
5119 { | |
5120 msg((char_u *)line1); | |
5121 msg((char_u *)line2); | |
5122 msg((char_u *)line3); | |
5123 } | |
5124 | |
5125 /* do the children */ | |
5126 if (node->wn_byte != NUL && node->wn_child != NULL) | |
5127 spell_print_node(node->wn_child, depth + 1); | |
5128 | |
5129 /* do the siblings */ | |
5130 if (node->wn_sibling != NULL) | |
5131 { | |
5132 /* get rid of all parent details except | */ | |
5133 STRCPY(line1, line3); | |
5134 STRCPY(line2, line3); | |
5135 spell_print_node(node->wn_sibling, depth); | |
5136 } | |
5137 } | |
5138 } | |
5139 | |
5140 static void | |
5141 spell_print_tree(wordnode_T *root) | |
5142 { | |
5143 if (root != NULL) | |
5144 { | |
5145 /* Clear the "wn_u1.index" fields, used to remember what has been | |
5146 * done. */ | |
5147 spell_clear_flags(root); | |
5148 | |
5149 /* Recursively print the tree. */ | |
5150 spell_print_node(root, 0); | |
5151 } | |
5152 } | |
5153 #endif /* SPELL_PRINTTREE */ | |
5154 | |
5155 /* | |
5156 * Read the affix file "fname". | |
5157 * Returns an afffile_T, NULL for complete failure. | |
5158 */ | |
5159 static afffile_T * | |
5160 spell_read_aff(spellinfo_T *spin, char_u *fname) | |
5161 { | |
5162 FILE *fd; | |
5163 afffile_T *aff; | |
5164 char_u rline[MAXLINELEN]; | |
5165 char_u *line; | |
5166 char_u *pc = NULL; | |
5167 #define MAXITEMCNT 30 | |
5168 char_u *(items[MAXITEMCNT]); | |
5169 int itemcnt; | |
5170 char_u *p; | |
5171 int lnum = 0; | |
5172 affheader_T *cur_aff = NULL; | |
5173 int did_postpone_prefix = FALSE; | |
5174 int aff_todo = 0; | |
5175 hashtab_T *tp; | |
5176 char_u *low = NULL; | |
5177 char_u *fol = NULL; | |
5178 char_u *upp = NULL; | |
5179 int do_rep; | |
5180 int do_repsal; | |
5181 int do_sal; | |
5182 int do_mapline; | |
5183 int found_map = FALSE; | |
5184 hashitem_T *hi; | |
5185 int l; | |
5186 int compminlen = 0; /* COMPOUNDMIN value */ | |
5187 int compsylmax = 0; /* COMPOUNDSYLMAX value */ | |
5188 int compoptions = 0; /* COMP_ flags */ | |
5189 int compmax = 0; /* COMPOUNDWORDMAX value */ | |
5190 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE | |
5191 concatenated */ | |
5192 char_u *midword = NULL; /* MIDWORD value */ | |
5193 char_u *syllable = NULL; /* SYLLABLE value */ | |
5194 char_u *sofofrom = NULL; /* SOFOFROM value */ | |
5195 char_u *sofoto = NULL; /* SOFOTO value */ | |
5196 | |
5197 /* | |
5198 * Open the file. | |
5199 */ | |
5200 fd = mch_fopen((char *)fname, "r"); | |
5201 if (fd == NULL) | |
5202 { | |
5203 EMSG2(_(e_notopen), fname); | |
5204 return NULL; | |
5205 } | |
5206 | |
5207 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); | |
5208 spell_message(spin, IObuff); | |
5209 | |
5210 /* Only do REP lines when not done in another .aff file already. */ | |
5211 do_rep = spin->si_rep.ga_len == 0; | |
5212 | |
5213 /* Only do REPSAL lines when not done in another .aff file already. */ | |
5214 do_repsal = spin->si_repsal.ga_len == 0; | |
5215 | |
5216 /* Only do SAL lines when not done in another .aff file already. */ | |
5217 do_sal = spin->si_sal.ga_len == 0; | |
5218 | |
5219 /* Only do MAP lines when not done in another .aff file already. */ | |
5220 do_mapline = spin->si_map.ga_len == 0; | |
5221 | |
5222 /* | |
5223 * Allocate and init the afffile_T structure. | |
5224 */ | |
5225 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); | |
5226 if (aff == NULL) | |
5227 { | |
5228 fclose(fd); | |
5229 return NULL; | |
5230 } | |
5231 hash_init(&aff->af_pref); | |
5232 hash_init(&aff->af_suff); | |
5233 hash_init(&aff->af_comp); | |
5234 | |
5235 /* | |
5236 * Read all the lines in the file one by one. | |
5237 */ | |
5238 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) | |
5239 { | |
5240 line_breakcheck(); | |
5241 ++lnum; | |
5242 | |
5243 /* Skip comment lines. */ | |
5244 if (*rline == '#') | |
5245 continue; | |
5246 | |
5247 /* Convert from "SET" to 'encoding' when needed. */ | |
5248 vim_free(pc); | |
5249 #ifdef FEAT_MBYTE | |
5250 if (spin->si_conv.vc_type != CONV_NONE) | |
5251 { | |
5252 pc = string_convert(&spin->si_conv, rline, NULL); | |
5253 if (pc == NULL) | |
5254 { | |
5255 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
5256 fname, lnum, rline); | |
5257 continue; | |
5258 } | |
5259 line = pc; | |
5260 } | |
5261 else | |
5262 #endif | |
5263 { | |
5264 pc = NULL; | |
5265 line = rline; | |
5266 } | |
5267 | |
5268 /* Split the line up in white separated items. Put a NUL after each | |
5269 * item. */ | |
5270 itemcnt = 0; | |
5271 for (p = line; ; ) | |
5272 { | |
5273 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ | |
5274 ++p; | |
5275 if (*p == NUL) | |
5276 break; | |
5277 if (itemcnt == MAXITEMCNT) /* too many items */ | |
5278 break; | |
5279 items[itemcnt++] = p; | |
5280 /* A few items have arbitrary text argument, don't split them. */ | |
5281 if (itemcnt == 2 && spell_info_item(items[0])) | |
5282 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ | |
5283 ++p; | |
5284 else | |
5285 while (*p > ' ') /* skip until white space or CR/NL */ | |
5286 ++p; | |
5287 if (*p == NUL) | |
5288 break; | |
5289 *p++ = NUL; | |
5290 } | |
5291 | |
5292 /* Handle non-empty lines. */ | |
5293 if (itemcnt > 0) | |
5294 { | |
5295 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) | |
5296 { | |
5297 #ifdef FEAT_MBYTE | |
5298 /* Setup for conversion from "ENC" to 'encoding'. */ | |
5299 aff->af_enc = enc_canonize(items[1]); | |
5300 if (aff->af_enc != NULL && !spin->si_ascii | |
5301 && convert_setup(&spin->si_conv, aff->af_enc, | |
5302 p_enc) == FAIL) | |
5303 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | |
5304 fname, aff->af_enc, p_enc); | |
5305 spin->si_conv.vc_fail = TRUE; | |
5306 #else | |
5307 smsg((char_u *)_("Conversion in %s not supported"), fname); | |
5308 #endif | |
5309 } | |
5310 else if (is_aff_rule(items, itemcnt, "FLAG", 2) | |
5311 && aff->af_flagtype == AFT_CHAR) | |
5312 { | |
5313 if (STRCMP(items[1], "long") == 0) | |
5314 aff->af_flagtype = AFT_LONG; | |
5315 else if (STRCMP(items[1], "num") == 0) | |
5316 aff->af_flagtype = AFT_NUM; | |
5317 else if (STRCMP(items[1], "caplong") == 0) | |
5318 aff->af_flagtype = AFT_CAPLONG; | |
5319 else | |
5320 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), | |
5321 fname, lnum, items[1]); | |
5322 if (aff->af_rare != 0 | |
5323 || aff->af_keepcase != 0 | |
5324 || aff->af_bad != 0 | |
5325 || aff->af_needaffix != 0 | |
5326 || aff->af_circumfix != 0 | |
5327 || aff->af_needcomp != 0 | |
5328 || aff->af_comproot != 0 | |
5329 || aff->af_nosuggest != 0 | |
5330 || compflags != NULL | |
5331 || aff->af_suff.ht_used > 0 | |
5332 || aff->af_pref.ht_used > 0) | |
5333 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), | |
5334 fname, lnum, items[1]); | |
5335 } | |
5336 else if (spell_info_item(items[0])) | |
5337 { | |
5338 p = (char_u *)getroom(spin, | |
5339 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) | |
5340 + STRLEN(items[0]) | |
5341 + STRLEN(items[1]) + 3, FALSE); | |
5342 if (p != NULL) | |
5343 { | |
5344 if (spin->si_info != NULL) | |
5345 { | |
5346 STRCPY(p, spin->si_info); | |
5347 STRCAT(p, "\n"); | |
5348 } | |
5349 STRCAT(p, items[0]); | |
5350 STRCAT(p, " "); | |
5351 STRCAT(p, items[1]); | |
5352 spin->si_info = p; | |
5353 } | |
5354 } | |
5355 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) | |
5356 && midword == NULL) | |
5357 { | |
5358 midword = getroom_save(spin, items[1]); | |
5359 } | |
5360 else if (is_aff_rule(items, itemcnt, "TRY", 2)) | |
5361 { | |
5362 /* ignored, we look in the tree for what chars may appear */ | |
5363 } | |
5364 /* TODO: remove "RAR" later */ | |
5365 else if ((is_aff_rule(items, itemcnt, "RAR", 2) | |
5366 || is_aff_rule(items, itemcnt, "RARE", 2)) | |
5367 && aff->af_rare == 0) | |
5368 { | |
5369 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], | |
5370 fname, lnum); | |
5371 } | |
5372 /* TODO: remove "KEP" later */ | |
5373 else if ((is_aff_rule(items, itemcnt, "KEP", 2) | |
5374 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) | |
5375 && aff->af_keepcase == 0) | |
5376 { | |
5377 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], | |
5378 fname, lnum); | |
5379 } | |
5380 else if ((is_aff_rule(items, itemcnt, "BAD", 2) | |
5381 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) | |
5382 && aff->af_bad == 0) | |
5383 { | |
5384 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], | |
5385 fname, lnum); | |
5386 } | |
5387 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) | |
5388 && aff->af_needaffix == 0) | |
5389 { | |
5390 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], | |
5391 fname, lnum); | |
5392 } | |
5393 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) | |
5394 && aff->af_circumfix == 0) | |
5395 { | |
5396 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], | |
5397 fname, lnum); | |
5398 } | |
5399 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) | |
5400 && aff->af_nosuggest == 0) | |
5401 { | |
5402 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], | |
5403 fname, lnum); | |
5404 } | |
5405 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) | |
5406 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) | |
5407 && aff->af_needcomp == 0) | |
5408 { | |
5409 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], | |
5410 fname, lnum); | |
5411 } | |
5412 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) | |
5413 && aff->af_comproot == 0) | |
5414 { | |
5415 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], | |
5416 fname, lnum); | |
5417 } | |
5418 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) | |
5419 && aff->af_compforbid == 0) | |
5420 { | |
5421 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], | |
5422 fname, lnum); | |
5423 if (aff->af_pref.ht_used > 0) | |
5424 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), | |
5425 fname, lnum); | |
5426 } | |
5427 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) | |
5428 && aff->af_comppermit == 0) | |
5429 { | |
5430 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], | |
5431 fname, lnum); | |
5432 if (aff->af_pref.ht_used > 0) | |
5433 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), | |
5434 fname, lnum); | |
5435 } | |
5436 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) | |
5437 && compflags == NULL) | |
5438 { | |
5439 /* Turn flag "c" into COMPOUNDRULE compatible string "c+", | |
5440 * "Na" into "Na+", "1234" into "1234+". */ | |
5441 p = getroom(spin, STRLEN(items[1]) + 2, FALSE); | |
5442 if (p != NULL) | |
5443 { | |
5444 STRCPY(p, items[1]); | |
5445 STRCAT(p, "+"); | |
5446 compflags = p; | |
5447 } | |
5448 } | |
5449 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) | |
5450 { | |
5451 /* We don't use the count, but do check that it's a number and | |
5452 * not COMPOUNDRULE mistyped. */ | |
5453 if (atoi((char *)items[1]) == 0) | |
5454 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"), | |
5455 fname, lnum, items[1]); | |
5456 } | |
5457 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) | |
5458 { | |
5459 /* Don't use the first rule if it is a number. */ | |
5460 if (compflags != NULL || *skipdigits(items[1]) != NUL) | |
5461 { | |
5462 /* Concatenate this string to previously defined ones, | |
5463 * using a slash to separate them. */ | |
5464 l = (int)STRLEN(items[1]) + 1; | |
5465 if (compflags != NULL) | |
5466 l += (int)STRLEN(compflags) + 1; | |
5467 p = getroom(spin, l, FALSE); | |
5468 if (p != NULL) | |
5469 { | |
5470 if (compflags != NULL) | |
5471 { | |
5472 STRCPY(p, compflags); | |
5473 STRCAT(p, "/"); | |
5474 } | |
5475 STRCAT(p, items[1]); | |
5476 compflags = p; | |
5477 } | |
5478 } | |
5479 } | |
5480 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) | |
5481 && compmax == 0) | |
5482 { | |
5483 compmax = atoi((char *)items[1]); | |
5484 if (compmax == 0) | |
5485 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), | |
5486 fname, lnum, items[1]); | |
5487 } | |
5488 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) | |
5489 && compminlen == 0) | |
5490 { | |
5491 compminlen = atoi((char *)items[1]); | |
5492 if (compminlen == 0) | |
5493 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), | |
5494 fname, lnum, items[1]); | |
5495 } | |
5496 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) | |
5497 && compsylmax == 0) | |
5498 { | |
5499 compsylmax = atoi((char *)items[1]); | |
5500 if (compsylmax == 0) | |
5501 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), | |
5502 fname, lnum, items[1]); | |
5503 } | |
5504 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) | |
5505 { | |
5506 compoptions |= COMP_CHECKDUP; | |
5507 } | |
5508 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) | |
5509 { | |
5510 compoptions |= COMP_CHECKREP; | |
5511 } | |
5512 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) | |
5513 { | |
5514 compoptions |= COMP_CHECKCASE; | |
5515 } | |
5516 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) | |
5517 { | |
5518 compoptions |= COMP_CHECKTRIPLE; | |
5519 } | |
5520 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) | |
5521 { | |
5522 if (atoi((char *)items[1]) == 0) | |
5523 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), | |
5524 fname, lnum, items[1]); | |
5525 } | |
5526 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) | |
5527 { | |
5528 garray_T *gap = &spin->si_comppat; | |
5529 int i; | |
5530 | |
5531 /* Only add the couple if it isn't already there. */ | |
5532 for (i = 0; i < gap->ga_len - 1; i += 2) | |
5533 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 | |
5534 && STRCMP(((char_u **)(gap->ga_data))[i + 1], | |
5535 items[2]) == 0) | |
5536 break; | |
5537 if (i >= gap->ga_len && ga_grow(gap, 2) == OK) | |
5538 { | |
5539 ((char_u **)(gap->ga_data))[gap->ga_len++] | |
5540 = getroom_save(spin, items[1]); | |
5541 ((char_u **)(gap->ga_data))[gap->ga_len++] | |
5542 = getroom_save(spin, items[2]); | |
5543 } | |
5544 } | |
5545 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) | |
5546 && syllable == NULL) | |
5547 { | |
5548 syllable = getroom_save(spin, items[1]); | |
5549 } | |
5550 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) | |
5551 { | |
5552 spin->si_nobreak = TRUE; | |
5553 } | |
5554 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) | |
5555 { | |
5556 spin->si_nosplitsugs = TRUE; | |
5557 } | |
5558 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) | |
5559 { | |
5560 spin->si_nocompoundsugs = TRUE; | |
5561 } | |
5562 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) | |
5563 { | |
5564 spin->si_nosugfile = TRUE; | |
5565 } | |
5566 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) | |
5567 { | |
5568 aff->af_pfxpostpone = TRUE; | |
5569 } | |
5570 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) | |
5571 { | |
5572 aff->af_ignoreextra = TRUE; | |
5573 } | |
5574 else if ((STRCMP(items[0], "PFX") == 0 | |
5575 || STRCMP(items[0], "SFX") == 0) | |
5576 && aff_todo == 0 | |
5577 && itemcnt >= 4) | |
5578 { | |
5579 int lasti = 4; | |
5580 char_u key[AH_KEY_LEN]; | |
5581 | |
5582 if (*items[0] == 'P') | |
5583 tp = &aff->af_pref; | |
5584 else | |
5585 tp = &aff->af_suff; | |
5586 | |
5587 /* Myspell allows the same affix name to be used multiple | |
5588 * times. The affix files that do this have an undocumented | |
5589 * "S" flag on all but the last block, thus we check for that | |
5590 * and store it in ah_follows. */ | |
5591 vim_strncpy(key, items[1], AH_KEY_LEN - 1); | |
5592 hi = hash_find(tp, key); | |
5593 if (!HASHITEM_EMPTY(hi)) | |
5594 { | |
5595 cur_aff = HI2AH(hi); | |
5596 if (cur_aff->ah_combine != (*items[2] == 'Y')) | |
5597 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), | |
5598 fname, lnum, items[1]); | |
5599 if (!cur_aff->ah_follows) | |
5600 smsg((char_u *)_("Duplicate affix in %s line %d: %s"), | |
5601 fname, lnum, items[1]); | |
5602 } | |
5603 else | |
5604 { | |
5605 /* New affix letter. */ | |
5606 cur_aff = (affheader_T *)getroom(spin, | |
5607 sizeof(affheader_T), TRUE); | |
5608 if (cur_aff == NULL) | |
5609 break; | |
5610 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], | |
5611 fname, lnum); | |
5612 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) | |
5613 break; | |
5614 if (cur_aff->ah_flag == aff->af_bad | |
5615 || cur_aff->ah_flag == aff->af_rare | |
5616 || cur_aff->ah_flag == aff->af_keepcase | |
5617 || cur_aff->ah_flag == aff->af_needaffix | |
5618 || cur_aff->ah_flag == aff->af_circumfix | |
5619 || cur_aff->ah_flag == aff->af_nosuggest | |
5620 || cur_aff->ah_flag == aff->af_needcomp | |
5621 || cur_aff->ah_flag == aff->af_comproot) | |
5622 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), | |
5623 fname, lnum, items[1]); | |
5624 STRCPY(cur_aff->ah_key, items[1]); | |
5625 hash_add(tp, cur_aff->ah_key); | |
5626 | |
5627 cur_aff->ah_combine = (*items[2] == 'Y'); | |
5628 } | |
5629 | |
5630 /* Check for the "S" flag, which apparently means that another | |
5631 * block with the same affix name is following. */ | |
5632 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) | |
5633 { | |
5634 ++lasti; | |
5635 cur_aff->ah_follows = TRUE; | |
5636 } | |
5637 else | |
5638 cur_aff->ah_follows = FALSE; | |
5639 | |
5640 /* Myspell allows extra text after the item, but that might | |
5641 * mean mistakes go unnoticed. Require a comment-starter. */ | |
5642 if (itemcnt > lasti && *items[lasti] != '#') | |
5643 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); | |
5644 | |
5645 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) | |
5646 smsg((char_u *)_("Expected Y or N in %s line %d: %s"), | |
5647 fname, lnum, items[2]); | |
5648 | |
5649 if (*items[0] == 'P' && aff->af_pfxpostpone) | |
5650 { | |
5651 if (cur_aff->ah_newID == 0) | |
5652 { | |
5653 /* Use a new number in the .spl file later, to be able | |
5654 * to handle multiple .aff files. */ | |
5655 check_renumber(spin); | |
5656 cur_aff->ah_newID = ++spin->si_newprefID; | |
5657 | |
5658 /* We only really use ah_newID if the prefix is | |
5659 * postponed. We know that only after handling all | |
5660 * the items. */ | |
5661 did_postpone_prefix = FALSE; | |
5662 } | |
5663 else | |
5664 /* Did use the ID in a previous block. */ | |
5665 did_postpone_prefix = TRUE; | |
5666 } | |
5667 | |
5668 aff_todo = atoi((char *)items[3]); | |
5669 } | |
5670 else if ((STRCMP(items[0], "PFX") == 0 | |
5671 || STRCMP(items[0], "SFX") == 0) | |
5672 && aff_todo > 0 | |
5673 && STRCMP(cur_aff->ah_key, items[1]) == 0 | |
5674 && itemcnt >= 5) | |
5675 { | |
5676 affentry_T *aff_entry; | |
5677 int upper = FALSE; | |
5678 int lasti = 5; | |
5679 | |
5680 /* Myspell allows extra text after the item, but that might | |
5681 * mean mistakes go unnoticed. Require a comment-starter, | |
5682 * unless IGNOREEXTRA is used. Hunspell uses a "-" item. */ | |
5683 if (itemcnt > lasti | |
5684 && !aff->af_ignoreextra | |
5685 && *items[lasti] != '#' | |
5686 && (STRCMP(items[lasti], "-") != 0 | |
5687 || itemcnt != lasti + 1)) | |
5688 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); | |
5689 | |
5690 /* New item for an affix letter. */ | |
5691 --aff_todo; | |
5692 aff_entry = (affentry_T *)getroom(spin, | |
5693 sizeof(affentry_T), TRUE); | |
5694 if (aff_entry == NULL) | |
5695 break; | |
5696 | |
5697 if (STRCMP(items[2], "0") != 0) | |
5698 aff_entry->ae_chop = getroom_save(spin, items[2]); | |
5699 if (STRCMP(items[3], "0") != 0) | |
5700 { | |
5701 aff_entry->ae_add = getroom_save(spin, items[3]); | |
5702 | |
5703 /* Recognize flags on the affix: abcd/XYZ */ | |
5704 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); | |
5705 if (aff_entry->ae_flags != NULL) | |
5706 { | |
5707 *aff_entry->ae_flags++ = NUL; | |
5708 aff_process_flags(aff, aff_entry); | |
5709 } | |
5710 } | |
5711 | |
5712 /* Don't use an affix entry with non-ASCII characters when | |
5713 * "spin->si_ascii" is TRUE. */ | |
5714 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) | |
5715 || has_non_ascii(aff_entry->ae_add))) | |
5716 { | |
5717 aff_entry->ae_next = cur_aff->ah_first; | |
5718 cur_aff->ah_first = aff_entry; | |
5719 | |
5720 if (STRCMP(items[4], ".") != 0) | |
5721 { | |
5722 char_u buf[MAXLINELEN]; | |
5723 | |
5724 aff_entry->ae_cond = getroom_save(spin, items[4]); | |
5725 if (*items[0] == 'P') | |
5726 sprintf((char *)buf, "^%s", items[4]); | |
5727 else | |
5728 sprintf((char *)buf, "%s$", items[4]); | |
5729 aff_entry->ae_prog = vim_regcomp(buf, | |
5730 RE_MAGIC + RE_STRING + RE_STRICT); | |
5731 if (aff_entry->ae_prog == NULL) | |
5732 smsg((char_u *)_("Broken condition in %s line %d: %s"), | |
5733 fname, lnum, items[4]); | |
5734 } | |
5735 | |
5736 /* For postponed prefixes we need an entry in si_prefcond | |
5737 * for the condition. Use an existing one if possible. | |
5738 * Can't be done for an affix with flags, ignoring | |
5739 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */ | |
5740 if (*items[0] == 'P' && aff->af_pfxpostpone | |
5741 && aff_entry->ae_flags == NULL) | |
5742 { | |
5743 /* When the chop string is one lower-case letter and | |
5744 * the add string ends in the upper-case letter we set | |
5745 * the "upper" flag, clear "ae_chop" and remove the | |
5746 * letters from "ae_add". The condition must either | |
5747 * be empty or start with the same letter. */ | |
5748 if (aff_entry->ae_chop != NULL | |
5749 && aff_entry->ae_add != NULL | |
5750 #ifdef FEAT_MBYTE | |
5751 && aff_entry->ae_chop[(*mb_ptr2len)( | |
5752 aff_entry->ae_chop)] == NUL | |
5753 #else | |
5754 && aff_entry->ae_chop[1] == NUL | |
5755 #endif | |
5756 ) | |
5757 { | |
5758 int c, c_up; | |
5759 | |
5760 c = PTR2CHAR(aff_entry->ae_chop); | |
5761 c_up = SPELL_TOUPPER(c); | |
5762 if (c_up != c | |
5763 && (aff_entry->ae_cond == NULL | |
5764 || PTR2CHAR(aff_entry->ae_cond) == c)) | |
5765 { | |
5766 p = aff_entry->ae_add | |
5767 + STRLEN(aff_entry->ae_add); | |
5768 mb_ptr_back(aff_entry->ae_add, p); | |
5769 if (PTR2CHAR(p) == c_up) | |
5770 { | |
5771 upper = TRUE; | |
5772 aff_entry->ae_chop = NULL; | |
5773 *p = NUL; | |
5774 | |
5775 /* The condition is matched with the | |
5776 * actual word, thus must check for the | |
5777 * upper-case letter. */ | |
5778 if (aff_entry->ae_cond != NULL) | |
5779 { | |
5780 char_u buf[MAXLINELEN]; | |
5781 #ifdef FEAT_MBYTE | |
5782 if (has_mbyte) | |
5783 { | |
5784 onecap_copy(items[4], buf, TRUE); | |
5785 aff_entry->ae_cond = getroom_save( | |
5786 spin, buf); | |
5787 } | |
5788 else | |
5789 #endif | |
5790 *aff_entry->ae_cond = c_up; | |
5791 if (aff_entry->ae_cond != NULL) | |
5792 { | |
5793 sprintf((char *)buf, "^%s", | |
5794 aff_entry->ae_cond); | |
5795 vim_regfree(aff_entry->ae_prog); | |
5796 aff_entry->ae_prog = vim_regcomp( | |
5797 buf, RE_MAGIC + RE_STRING); | |
5798 } | |
5799 } | |
5800 } | |
5801 } | |
5802 } | |
5803 | |
5804 if (aff_entry->ae_chop == NULL | |
5805 && aff_entry->ae_flags == NULL) | |
5806 { | |
5807 int idx; | |
5808 char_u **pp; | |
5809 int n; | |
5810 | |
5811 /* Find a previously used condition. */ | |
5812 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; | |
5813 --idx) | |
5814 { | |
5815 p = ((char_u **)spin->si_prefcond.ga_data)[idx]; | |
5816 if (str_equal(p, aff_entry->ae_cond)) | |
5817 break; | |
5818 } | |
5819 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) | |
5820 { | |
5821 /* Not found, add a new condition. */ | |
5822 idx = spin->si_prefcond.ga_len++; | |
5823 pp = ((char_u **)spin->si_prefcond.ga_data) | |
5824 + idx; | |
5825 if (aff_entry->ae_cond == NULL) | |
5826 *pp = NULL; | |
5827 else | |
5828 *pp = getroom_save(spin, | |
5829 aff_entry->ae_cond); | |
5830 } | |
5831 | |
5832 /* Add the prefix to the prefix tree. */ | |
5833 if (aff_entry->ae_add == NULL) | |
5834 p = (char_u *)""; | |
5835 else | |
5836 p = aff_entry->ae_add; | |
5837 | |
5838 /* PFX_FLAGS is a negative number, so that | |
5839 * tree_add_word() knows this is the prefix tree. */ | |
5840 n = PFX_FLAGS; | |
5841 if (!cur_aff->ah_combine) | |
5842 n |= WFP_NC; | |
5843 if (upper) | |
5844 n |= WFP_UP; | |
5845 if (aff_entry->ae_comppermit) | |
5846 n |= WFP_COMPPERMIT; | |
5847 if (aff_entry->ae_compforbid) | |
5848 n |= WFP_COMPFORBID; | |
5849 tree_add_word(spin, p, spin->si_prefroot, n, | |
5850 idx, cur_aff->ah_newID); | |
5851 did_postpone_prefix = TRUE; | |
5852 } | |
5853 | |
5854 /* Didn't actually use ah_newID, backup si_newprefID. */ | |
5855 if (aff_todo == 0 && !did_postpone_prefix) | |
5856 { | |
5857 --spin->si_newprefID; | |
5858 cur_aff->ah_newID = 0; | |
5859 } | |
5860 } | |
5861 } | |
5862 } | |
5863 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) | |
5864 { | |
5865 fol = vim_strsave(items[1]); | |
5866 } | |
5867 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) | |
5868 { | |
5869 low = vim_strsave(items[1]); | |
5870 } | |
5871 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) | |
5872 { | |
5873 upp = vim_strsave(items[1]); | |
5874 } | |
5875 else if (is_aff_rule(items, itemcnt, "REP", 2) | |
5876 || is_aff_rule(items, itemcnt, "REPSAL", 2)) | |
5877 { | |
5878 /* Ignore REP/REPSAL count */; | |
5879 if (!isdigit(*items[1])) | |
5880 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), | |
5881 fname, lnum); | |
5882 } | |
5883 else if ((STRCMP(items[0], "REP") == 0 | |
5884 || STRCMP(items[0], "REPSAL") == 0) | |
5885 && itemcnt >= 3) | |
5886 { | |
5887 /* REP/REPSAL item */ | |
5888 /* Myspell ignores extra arguments, we require it starts with | |
5889 * # to detect mistakes. */ | |
5890 if (itemcnt > 3 && items[3][0] != '#') | |
5891 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); | |
5892 if (items[0][3] == 'S' ? do_repsal : do_rep) | |
5893 { | |
5894 /* Replace underscore with space (can't include a space | |
5895 * directly). */ | |
5896 for (p = items[1]; *p != NUL; mb_ptr_adv(p)) | |
5897 if (*p == '_') | |
5898 *p = ' '; | |
5899 for (p = items[2]; *p != NUL; mb_ptr_adv(p)) | |
5900 if (*p == '_') | |
5901 *p = ' '; | |
5902 add_fromto(spin, items[0][3] == 'S' | |
5903 ? &spin->si_repsal | |
5904 : &spin->si_rep, items[1], items[2]); | |
5905 } | |
5906 } | |
5907 else if (is_aff_rule(items, itemcnt, "MAP", 2)) | |
5908 { | |
5909 /* MAP item or count */ | |
5910 if (!found_map) | |
5911 { | |
5912 /* First line contains the count. */ | |
5913 found_map = TRUE; | |
5914 if (!isdigit(*items[1])) | |
5915 smsg((char_u *)_("Expected MAP count in %s line %d"), | |
5916 fname, lnum); | |
5917 } | |
5918 else if (do_mapline) | |
5919 { | |
5920 int c; | |
5921 | |
5922 /* Check that every character appears only once. */ | |
5923 for (p = items[1]; *p != NUL; ) | |
5924 { | |
5925 #ifdef FEAT_MBYTE | |
5926 c = mb_ptr2char_adv(&p); | |
5927 #else | |
5928 c = *p++; | |
5929 #endif | |
5930 if ((spin->si_map.ga_len > 0 | |
5931 && vim_strchr(spin->si_map.ga_data, c) | |
5932 != NULL) | |
5933 || vim_strchr(p, c) != NULL) | |
5934 smsg((char_u *)_("Duplicate character in MAP in %s line %d"), | |
5935 fname, lnum); | |
5936 } | |
5937 | |
5938 /* We simply concatenate all the MAP strings, separated by | |
5939 * slashes. */ | |
5940 ga_concat(&spin->si_map, items[1]); | |
5941 ga_append(&spin->si_map, '/'); | |
5942 } | |
5943 } | |
5944 /* Accept "SAL from to" and "SAL from to #comment". */ | |
5945 else if (is_aff_rule(items, itemcnt, "SAL", 3)) | |
5946 { | |
5947 if (do_sal) | |
5948 { | |
5949 /* SAL item (sounds-a-like) | |
5950 * Either one of the known keys or a from-to pair. */ | |
5951 if (STRCMP(items[1], "followup") == 0) | |
5952 spin->si_followup = sal_to_bool(items[2]); | |
5953 else if (STRCMP(items[1], "collapse_result") == 0) | |
5954 spin->si_collapse = sal_to_bool(items[2]); | |
5955 else if (STRCMP(items[1], "remove_accents") == 0) | |
5956 spin->si_rem_accents = sal_to_bool(items[2]); | |
5957 else | |
5958 /* when "to" is "_" it means empty */ | |
5959 add_fromto(spin, &spin->si_sal, items[1], | |
5960 STRCMP(items[2], "_") == 0 ? (char_u *)"" | |
5961 : items[2]); | |
5962 } | |
5963 } | |
5964 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) | |
5965 && sofofrom == NULL) | |
5966 { | |
5967 sofofrom = getroom_save(spin, items[1]); | |
5968 } | |
5969 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) | |
5970 && sofoto == NULL) | |
5971 { | |
5972 sofoto = getroom_save(spin, items[1]); | |
5973 } | |
5974 else if (STRCMP(items[0], "COMMON") == 0) | |
5975 { | |
5976 int i; | |
5977 | |
5978 for (i = 1; i < itemcnt; ++i) | |
5979 { | |
5980 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, | |
5981 items[i]))) | |
5982 { | |
5983 p = vim_strsave(items[i]); | |
5984 if (p == NULL) | |
5985 break; | |
5986 hash_add(&spin->si_commonwords, p); | |
5987 } | |
5988 } | |
5989 } | |
5990 else | |
5991 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), | |
5992 fname, lnum, items[0]); | |
5993 } | |
5994 } | |
5995 | |
5996 if (fol != NULL || low != NULL || upp != NULL) | |
5997 { | |
5998 if (spin->si_clear_chartab) | |
5999 { | |
6000 /* Clear the char type tables, don't want to use any of the | |
6001 * currently used spell properties. */ | |
6002 init_spell_chartab(); | |
6003 spin->si_clear_chartab = FALSE; | |
6004 } | |
6005 | |
6006 /* | |
6007 * Don't write a word table for an ASCII file, so that we don't check | |
6008 * for conflicts with a word table that matches 'encoding'. | |
6009 * Don't write one for utf-8 either, we use utf_*() and | |
6010 * mb_get_class(), the list of chars in the file will be incomplete. | |
6011 */ | |
6012 if (!spin->si_ascii | |
6013 #ifdef FEAT_MBYTE | |
6014 && !enc_utf8 | |
6015 #endif | |
6016 ) | |
6017 { | |
6018 if (fol == NULL || low == NULL || upp == NULL) | |
6019 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); | |
6020 else | |
6021 (void)set_spell_chartab(fol, low, upp); | |
6022 } | |
6023 | |
6024 vim_free(fol); | |
6025 vim_free(low); | |
6026 vim_free(upp); | |
6027 } | |
6028 | |
6029 /* Use compound specifications of the .aff file for the spell info. */ | |
6030 if (compmax != 0) | |
6031 { | |
6032 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); | |
6033 spin->si_compmax = compmax; | |
6034 } | |
6035 | |
6036 if (compminlen != 0) | |
6037 { | |
6038 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); | |
6039 spin->si_compminlen = compminlen; | |
6040 } | |
6041 | |
6042 if (compsylmax != 0) | |
6043 { | |
6044 if (syllable == NULL) | |
6045 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); | |
6046 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); | |
6047 spin->si_compsylmax = compsylmax; | |
6048 } | |
6049 | |
6050 if (compoptions != 0) | |
6051 { | |
6052 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); | |
6053 spin->si_compoptions |= compoptions; | |
6054 } | |
6055 | |
6056 if (compflags != NULL) | |
6057 process_compflags(spin, aff, compflags); | |
6058 | |
6059 /* Check that we didn't use too many renumbered flags. */ | |
6060 if (spin->si_newcompID < spin->si_newprefID) | |
6061 { | |
6062 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) | |
6063 MSG(_("Too many postponed prefixes")); | |
6064 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) | |
6065 MSG(_("Too many compound flags")); | |
6066 else | |
6067 MSG(_("Too many postponed prefixes and/or compound flags")); | |
6068 } | |
6069 | |
6070 if (syllable != NULL) | |
6071 { | |
6072 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); | |
6073 spin->si_syllable = syllable; | |
6074 } | |
6075 | |
6076 if (sofofrom != NULL || sofoto != NULL) | |
6077 { | |
6078 if (sofofrom == NULL || sofoto == NULL) | |
6079 smsg((char_u *)_("Missing SOFO%s line in %s"), | |
6080 sofofrom == NULL ? "FROM" : "TO", fname); | |
6081 else if (spin->si_sal.ga_len > 0) | |
6082 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); | |
6083 else | |
6084 { | |
6085 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); | |
6086 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); | |
6087 spin->si_sofofr = sofofrom; | |
6088 spin->si_sofoto = sofoto; | |
6089 } | |
6090 } | |
6091 | |
6092 if (midword != NULL) | |
6093 { | |
6094 aff_check_string(spin->si_midword, midword, "MIDWORD"); | |
6095 spin->si_midword = midword; | |
6096 } | |
6097 | |
6098 vim_free(pc); | |
6099 fclose(fd); | |
6100 return aff; | |
6101 } | |
6102 | |
6103 /* | |
6104 * Return TRUE when items[0] equals "rulename", there are "mincount" items or | |
6105 * a comment is following after item "mincount". | |
6106 */ | |
6107 static int | |
6108 is_aff_rule( | |
6109 char_u **items, | |
6110 int itemcnt, | |
6111 char *rulename, | |
6112 int mincount) | |
6113 { | |
6114 return (STRCMP(items[0], rulename) == 0 | |
6115 && (itemcnt == mincount | |
6116 || (itemcnt > mincount && items[mincount][0] == '#'))); | |
6117 } | |
6118 | |
6119 /* | |
6120 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from | |
6121 * ae_flags to ae_comppermit and ae_compforbid. | |
6122 */ | |
6123 static void | |
6124 aff_process_flags(afffile_T *affile, affentry_T *entry) | |
6125 { | |
6126 char_u *p; | |
6127 char_u *prevp; | |
6128 unsigned flag; | |
6129 | |
6130 if (entry->ae_flags != NULL | |
6131 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) | |
6132 { | |
6133 for (p = entry->ae_flags; *p != NUL; ) | |
6134 { | |
6135 prevp = p; | |
6136 flag = get_affitem(affile->af_flagtype, &p); | |
6137 if (flag == affile->af_comppermit || flag == affile->af_compforbid) | |
6138 { | |
6139 STRMOVE(prevp, p); | |
6140 p = prevp; | |
6141 if (flag == affile->af_comppermit) | |
6142 entry->ae_comppermit = TRUE; | |
6143 else | |
6144 entry->ae_compforbid = TRUE; | |
6145 } | |
6146 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
6147 ++p; | |
6148 } | |
6149 if (*entry->ae_flags == NUL) | |
6150 entry->ae_flags = NULL; /* nothing left */ | |
6151 } | |
6152 } | |
6153 | |
6154 /* | |
6155 * Return TRUE if "s" is the name of an info item in the affix file. | |
6156 */ | |
6157 static int | |
6158 spell_info_item(char_u *s) | |
6159 { | |
6160 return STRCMP(s, "NAME") == 0 | |
6161 || STRCMP(s, "HOME") == 0 | |
6162 || STRCMP(s, "VERSION") == 0 | |
6163 || STRCMP(s, "AUTHOR") == 0 | |
6164 || STRCMP(s, "EMAIL") == 0 | |
6165 || STRCMP(s, "COPYRIGHT") == 0; | |
6166 } | |
6167 | |
6168 /* | |
6169 * Turn an affix flag name into a number, according to the FLAG type. | |
6170 * returns zero for failure. | |
6171 */ | |
6172 static unsigned | |
6173 affitem2flag( | |
6174 int flagtype, | |
6175 char_u *item, | |
6176 char_u *fname, | |
6177 int lnum) | |
6178 { | |
6179 unsigned res; | |
6180 char_u *p = item; | |
6181 | |
6182 res = get_affitem(flagtype, &p); | |
6183 if (res == 0) | |
6184 { | |
6185 if (flagtype == AFT_NUM) | |
6186 smsg((char_u *)_("Flag is not a number in %s line %d: %s"), | |
6187 fname, lnum, item); | |
6188 else | |
6189 smsg((char_u *)_("Illegal flag in %s line %d: %s"), | |
6190 fname, lnum, item); | |
6191 } | |
6192 if (*p != NUL) | |
6193 { | |
6194 smsg((char_u *)_(e_affname), fname, lnum, item); | |
6195 return 0; | |
6196 } | |
6197 | |
6198 return res; | |
6199 } | |
6200 | |
6201 /* | |
6202 * Get one affix name from "*pp" and advance the pointer. | |
6203 * Returns zero for an error, still advances the pointer then. | |
6204 */ | |
6205 static unsigned | |
6206 get_affitem(int flagtype, char_u **pp) | |
6207 { | |
6208 int res; | |
6209 | |
6210 if (flagtype == AFT_NUM) | |
6211 { | |
6212 if (!VIM_ISDIGIT(**pp)) | |
6213 { | |
6214 ++*pp; /* always advance, avoid getting stuck */ | |
6215 return 0; | |
6216 } | |
6217 res = getdigits(pp); | |
6218 } | |
6219 else | |
6220 { | |
6221 #ifdef FEAT_MBYTE | |
6222 res = mb_ptr2char_adv(pp); | |
6223 #else | |
6224 res = *(*pp)++; | |
6225 #endif | |
6226 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG | |
6227 && res >= 'A' && res <= 'Z')) | |
6228 { | |
6229 if (**pp == NUL) | |
6230 return 0; | |
6231 #ifdef FEAT_MBYTE | |
6232 res = mb_ptr2char_adv(pp) + (res << 16); | |
6233 #else | |
6234 res = *(*pp)++ + (res << 16); | |
6235 #endif | |
6236 } | |
6237 } | |
6238 return res; | |
6239 } | |
6240 | |
6241 /* | |
6242 * Process the "compflags" string used in an affix file and append it to | |
6243 * spin->si_compflags. | |
6244 * The processing involves changing the affix names to ID numbers, so that | |
6245 * they fit in one byte. | |
6246 */ | |
6247 static void | |
6248 process_compflags( | |
6249 spellinfo_T *spin, | |
6250 afffile_T *aff, | |
6251 char_u *compflags) | |
6252 { | |
6253 char_u *p; | |
6254 char_u *prevp; | |
6255 unsigned flag; | |
6256 compitem_T *ci; | |
6257 int id; | |
6258 int len; | |
6259 char_u *tp; | |
6260 char_u key[AH_KEY_LEN]; | |
6261 hashitem_T *hi; | |
6262 | |
6263 /* Make room for the old and the new compflags, concatenated with a / in | |
6264 * between. Processing it makes it shorter, but we don't know by how | |
6265 * much, thus allocate the maximum. */ | |
6266 len = (int)STRLEN(compflags) + 1; | |
6267 if (spin->si_compflags != NULL) | |
6268 len += (int)STRLEN(spin->si_compflags) + 1; | |
6269 p = getroom(spin, len, FALSE); | |
6270 if (p == NULL) | |
6271 return; | |
6272 if (spin->si_compflags != NULL) | |
6273 { | |
6274 STRCPY(p, spin->si_compflags); | |
6275 STRCAT(p, "/"); | |
6276 } | |
6277 spin->si_compflags = p; | |
6278 tp = p + STRLEN(p); | |
6279 | |
6280 for (p = compflags; *p != NUL; ) | |
6281 { | |
6282 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL) | |
6283 /* Copy non-flag characters directly. */ | |
6284 *tp++ = *p++; | |
6285 else | |
6286 { | |
6287 /* First get the flag number, also checks validity. */ | |
6288 prevp = p; | |
6289 flag = get_affitem(aff->af_flagtype, &p); | |
6290 if (flag != 0) | |
6291 { | |
6292 /* Find the flag in the hashtable. If it was used before, use | |
6293 * the existing ID. Otherwise add a new entry. */ | |
6294 vim_strncpy(key, prevp, p - prevp); | |
6295 hi = hash_find(&aff->af_comp, key); | |
6296 if (!HASHITEM_EMPTY(hi)) | |
6297 id = HI2CI(hi)->ci_newID; | |
6298 else | |
6299 { | |
6300 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); | |
6301 if (ci == NULL) | |
6302 break; | |
6303 STRCPY(ci->ci_key, key); | |
6304 ci->ci_flag = flag; | |
6305 /* Avoid using a flag ID that has a special meaning in a | |
6306 * regexp (also inside []). */ | |
6307 do | |
6308 { | |
6309 check_renumber(spin); | |
6310 id = spin->si_newcompID--; | |
6311 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL); | |
6312 ci->ci_newID = id; | |
6313 hash_add(&aff->af_comp, ci->ci_key); | |
6314 } | |
6315 *tp++ = id; | |
6316 } | |
6317 if (aff->af_flagtype == AFT_NUM && *p == ',') | |
6318 ++p; | |
6319 } | |
6320 } | |
6321 | |
6322 *tp = NUL; | |
6323 } | |
6324 | |
6325 /* | |
6326 * Check that the new IDs for postponed affixes and compounding don't overrun | |
6327 * each other. We have almost 255 available, but start at 0-127 to avoid | |
6328 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. | |
6329 * When that is used up an error message is given. | |
6330 */ | |
6331 static void | |
6332 check_renumber(spellinfo_T *spin) | |
6333 { | |
6334 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) | |
6335 { | |
6336 spin->si_newprefID = 127; | |
6337 spin->si_newcompID = 255; | |
6338 } | |
6339 } | |
6340 | |
6341 /* | |
6342 * Return TRUE if flag "flag" appears in affix list "afflist". | |
6343 */ | |
6344 static int | |
6345 flag_in_afflist(int flagtype, char_u *afflist, unsigned flag) | |
6346 { | |
6347 char_u *p; | |
6348 unsigned n; | |
6349 | |
6350 switch (flagtype) | |
6351 { | |
6352 case AFT_CHAR: | |
6353 return vim_strchr(afflist, flag) != NULL; | |
6354 | |
6355 case AFT_CAPLONG: | |
6356 case AFT_LONG: | |
6357 for (p = afflist; *p != NUL; ) | |
6358 { | |
6359 #ifdef FEAT_MBYTE | |
6360 n = mb_ptr2char_adv(&p); | |
6361 #else | |
6362 n = *p++; | |
6363 #endif | |
6364 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) | |
6365 && *p != NUL) | |
6366 #ifdef FEAT_MBYTE | |
6367 n = mb_ptr2char_adv(&p) + (n << 16); | |
6368 #else | |
6369 n = *p++ + (n << 16); | |
6370 #endif | |
6371 if (n == flag) | |
6372 return TRUE; | |
6373 } | |
6374 break; | |
6375 | |
6376 case AFT_NUM: | |
6377 for (p = afflist; *p != NUL; ) | |
6378 { | |
6379 n = getdigits(&p); | |
6380 if (n == flag) | |
6381 return TRUE; | |
6382 if (*p != NUL) /* skip over comma */ | |
6383 ++p; | |
6384 } | |
6385 break; | |
6386 } | |
6387 return FALSE; | |
6388 } | |
6389 | |
6390 /* | |
6391 * Give a warning when "spinval" and "affval" numbers are set and not the same. | |
6392 */ | |
6393 static void | |
6394 aff_check_number(int spinval, int affval, char *name) | |
6395 { | |
6396 if (spinval != 0 && spinval != affval) | |
6397 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | |
6398 } | |
6399 | |
6400 /* | |
6401 * Give a warning when "spinval" and "affval" strings are set and not the same. | |
6402 */ | |
6403 static void | |
6404 aff_check_string(char_u *spinval, char_u *affval, char *name) | |
6405 { | |
6406 if (spinval != NULL && STRCMP(spinval, affval) != 0) | |
6407 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | |
6408 } | |
6409 | |
6410 /* | |
6411 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being | |
6412 * NULL as equal. | |
6413 */ | |
6414 static int | |
6415 str_equal(char_u *s1, char_u *s2) | |
6416 { | |
6417 if (s1 == NULL || s2 == NULL) | |
6418 return s1 == s2; | |
6419 return STRCMP(s1, s2) == 0; | |
6420 } | |
6421 | |
6422 /* | |
6423 * Add a from-to item to "gap". Used for REP and SAL items. | |
6424 * They are stored case-folded. | |
6425 */ | |
6426 static void | |
6427 add_fromto( | |
6428 spellinfo_T *spin, | |
6429 garray_T *gap, | |
6430 char_u *from, | |
6431 char_u *to) | |
6432 { | |
6433 fromto_T *ftp; | |
6434 char_u word[MAXWLEN]; | |
6435 | |
6436 if (ga_grow(gap, 1) == OK) | |
6437 { | |
6438 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; | |
6439 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN); | |
6440 ftp->ft_from = getroom_save(spin, word); | |
6441 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN); | |
6442 ftp->ft_to = getroom_save(spin, word); | |
6443 ++gap->ga_len; | |
6444 } | |
6445 } | |
6446 | |
6447 /* | |
6448 * Convert a boolean argument in a SAL line to TRUE or FALSE; | |
6449 */ | |
6450 static int | |
6451 sal_to_bool(char_u *s) | |
6452 { | |
6453 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; | |
6454 } | |
6455 | |
6456 /* | |
6457 * Free the structure filled by spell_read_aff(). | |
6458 */ | |
6459 static void | |
6460 spell_free_aff(afffile_T *aff) | |
6461 { | |
6462 hashtab_T *ht; | |
6463 hashitem_T *hi; | |
6464 int todo; | |
6465 affheader_T *ah; | |
6466 affentry_T *ae; | |
6467 | |
6468 vim_free(aff->af_enc); | |
6469 | |
6470 /* All this trouble to free the "ae_prog" items... */ | |
6471 for (ht = &aff->af_pref; ; ht = &aff->af_suff) | |
6472 { | |
6473 todo = (int)ht->ht_used; | |
6474 for (hi = ht->ht_array; todo > 0; ++hi) | |
6475 { | |
6476 if (!HASHITEM_EMPTY(hi)) | |
6477 { | |
6478 --todo; | |
6479 ah = HI2AH(hi); | |
6480 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | |
6481 vim_regfree(ae->ae_prog); | |
6482 } | |
6483 } | |
6484 if (ht == &aff->af_suff) | |
6485 break; | |
6486 } | |
6487 | |
6488 hash_clear(&aff->af_pref); | |
6489 hash_clear(&aff->af_suff); | |
6490 hash_clear(&aff->af_comp); | |
6491 } | |
6492 | |
6493 /* | |
6494 * Read dictionary file "fname". | |
6495 * Returns OK or FAIL; | |
6496 */ | |
6497 static int | |
6498 spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile) | |
6499 { | |
6500 hashtab_T ht; | |
6501 char_u line[MAXLINELEN]; | |
6502 char_u *p; | |
6503 char_u *afflist; | |
6504 char_u store_afflist[MAXWLEN]; | |
6505 int pfxlen; | |
6506 int need_affix; | |
6507 char_u *dw; | |
6508 char_u *pc; | |
6509 char_u *w; | |
6510 int l; | |
6511 hash_T hash; | |
6512 hashitem_T *hi; | |
6513 FILE *fd; | |
6514 int lnum = 1; | |
6515 int non_ascii = 0; | |
6516 int retval = OK; | |
6517 char_u message[MAXLINELEN + MAXWLEN]; | |
6518 int flags; | |
6519 int duplicate = 0; | |
6520 | |
6521 /* | |
6522 * Open the file. | |
6523 */ | |
6524 fd = mch_fopen((char *)fname, "r"); | |
6525 if (fd == NULL) | |
6526 { | |
6527 EMSG2(_(e_notopen), fname); | |
6528 return FAIL; | |
6529 } | |
6530 | |
6531 /* The hashtable is only used to detect duplicated words. */ | |
6532 hash_init(&ht); | |
6533 | |
6534 vim_snprintf((char *)IObuff, IOSIZE, | |
6535 _("Reading dictionary file %s ..."), fname); | |
6536 spell_message(spin, IObuff); | |
6537 | |
6538 /* start with a message for the first line */ | |
6539 spin->si_msg_count = 999999; | |
6540 | |
6541 /* Read and ignore the first line: word count. */ | |
6542 (void)vim_fgets(line, MAXLINELEN, fd); | |
6543 if (!vim_isdigit(*skipwhite(line))) | |
6544 EMSG2(_("E760: No word count in %s"), fname); | |
6545 | |
6546 /* | |
6547 * Read all the lines in the file one by one. | |
6548 * The words are converted to 'encoding' here, before being added to | |
6549 * the hashtable. | |
6550 */ | |
6551 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) | |
6552 { | |
6553 line_breakcheck(); | |
6554 ++lnum; | |
6555 if (line[0] == '#' || line[0] == '/') | |
6556 continue; /* comment line */ | |
6557 | |
6558 /* Remove CR, LF and white space from the end. White space halfway | |
6559 * the word is kept to allow e.g., "et al.". */ | |
6560 l = (int)STRLEN(line); | |
6561 while (l > 0 && line[l - 1] <= ' ') | |
6562 --l; | |
6563 if (l == 0) | |
6564 continue; /* empty line */ | |
6565 line[l] = NUL; | |
6566 | |
6567 #ifdef FEAT_MBYTE | |
6568 /* Convert from "SET" to 'encoding' when needed. */ | |
6569 if (spin->si_conv.vc_type != CONV_NONE) | |
6570 { | |
6571 pc = string_convert(&spin->si_conv, line, NULL); | |
6572 if (pc == NULL) | |
6573 { | |
6574 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
6575 fname, lnum, line); | |
6576 continue; | |
6577 } | |
6578 w = pc; | |
6579 } | |
6580 else | |
6581 #endif | |
6582 { | |
6583 pc = NULL; | |
6584 w = line; | |
6585 } | |
6586 | |
6587 /* Truncate the word at the "/", set "afflist" to what follows. | |
6588 * Replace "\/" by "/" and "\\" by "\". */ | |
6589 afflist = NULL; | |
6590 for (p = w; *p != NUL; mb_ptr_adv(p)) | |
6591 { | |
6592 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) | |
6593 STRMOVE(p, p + 1); | |
6594 else if (*p == '/') | |
6595 { | |
6596 *p = NUL; | |
6597 afflist = p + 1; | |
6598 break; | |
6599 } | |
6600 } | |
6601 | |
6602 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | |
6603 if (spin->si_ascii && has_non_ascii(w)) | |
6604 { | |
6605 ++non_ascii; | |
6606 vim_free(pc); | |
6607 continue; | |
6608 } | |
6609 | |
6610 /* This takes time, print a message every 10000 words. */ | |
6611 if (spin->si_verbose && spin->si_msg_count > 10000) | |
6612 { | |
6613 spin->si_msg_count = 0; | |
6614 vim_snprintf((char *)message, sizeof(message), | |
6615 _("line %6d, word %6d - %s"), | |
6616 lnum, spin->si_foldwcount + spin->si_keepwcount, w); | |
6617 msg_start(); | |
6618 msg_puts_long_attr(message, 0); | |
6619 msg_clr_eos(); | |
6620 msg_didout = FALSE; | |
6621 msg_col = 0; | |
6622 out_flush(); | |
6623 } | |
6624 | |
6625 /* Store the word in the hashtable to be able to find duplicates. */ | |
6626 dw = (char_u *)getroom_save(spin, w); | |
6627 if (dw == NULL) | |
6628 { | |
6629 retval = FAIL; | |
6630 vim_free(pc); | |
6631 break; | |
6632 } | |
6633 | |
6634 hash = hash_hash(dw); | |
6635 hi = hash_lookup(&ht, dw, hash); | |
6636 if (!HASHITEM_EMPTY(hi)) | |
6637 { | |
6638 if (p_verbose > 0) | |
6639 smsg((char_u *)_("Duplicate word in %s line %d: %s"), | |
6640 fname, lnum, dw); | |
6641 else if (duplicate == 0) | |
6642 smsg((char_u *)_("First duplicate word in %s line %d: %s"), | |
6643 fname, lnum, dw); | |
6644 ++duplicate; | |
6645 } | |
6646 else | |
6647 hash_add_item(&ht, hi, dw, hash); | |
6648 | |
6649 flags = 0; | |
6650 store_afflist[0] = NUL; | |
6651 pfxlen = 0; | |
6652 need_affix = FALSE; | |
6653 if (afflist != NULL) | |
6654 { | |
6655 /* Extract flags from the affix list. */ | |
6656 flags |= get_affix_flags(affile, afflist); | |
6657 | |
6658 if (affile->af_needaffix != 0 && flag_in_afflist( | |
6659 affile->af_flagtype, afflist, affile->af_needaffix)) | |
6660 need_affix = TRUE; | |
6661 | |
6662 if (affile->af_pfxpostpone) | |
6663 /* Need to store the list of prefix IDs with the word. */ | |
6664 pfxlen = get_pfxlist(affile, afflist, store_afflist); | |
6665 | |
6666 if (spin->si_compflags != NULL) | |
6667 /* Need to store the list of compound flags with the word. | |
6668 * Concatenate them to the list of prefix IDs. */ | |
6669 get_compflags(affile, afflist, store_afflist + pfxlen); | |
6670 } | |
6671 | |
6672 /* Add the word to the word tree(s). */ | |
6673 if (store_word(spin, dw, flags, spin->si_region, | |
6674 store_afflist, need_affix) == FAIL) | |
6675 retval = FAIL; | |
6676 | |
6677 if (afflist != NULL) | |
6678 { | |
6679 /* Find all matching suffixes and add the resulting words. | |
6680 * Additionally do matching prefixes that combine. */ | |
6681 if (store_aff_word(spin, dw, afflist, affile, | |
6682 &affile->af_suff, &affile->af_pref, | |
6683 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) | |
6684 retval = FAIL; | |
6685 | |
6686 /* Find all matching prefixes and add the resulting words. */ | |
6687 if (store_aff_word(spin, dw, afflist, affile, | |
6688 &affile->af_pref, NULL, | |
6689 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) | |
6690 retval = FAIL; | |
6691 } | |
6692 | |
6693 vim_free(pc); | |
6694 } | |
6695 | |
6696 if (duplicate > 0) | |
6697 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); | |
6698 if (spin->si_ascii && non_ascii > 0) | |
6699 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), | |
6700 non_ascii, fname); | |
6701 hash_clear(&ht); | |
6702 | |
6703 fclose(fd); | |
6704 return retval; | |
6705 } | |
6706 | |
6707 /* | |
6708 * Check for affix flags in "afflist" that are turned into word flags. | |
6709 * Return WF_ flags. | |
6710 */ | |
6711 static int | |
6712 get_affix_flags(afffile_T *affile, char_u *afflist) | |
6713 { | |
6714 int flags = 0; | |
6715 | |
6716 if (affile->af_keepcase != 0 && flag_in_afflist( | |
6717 affile->af_flagtype, afflist, affile->af_keepcase)) | |
6718 flags |= WF_KEEPCAP | WF_FIXCAP; | |
6719 if (affile->af_rare != 0 && flag_in_afflist( | |
6720 affile->af_flagtype, afflist, affile->af_rare)) | |
6721 flags |= WF_RARE; | |
6722 if (affile->af_bad != 0 && flag_in_afflist( | |
6723 affile->af_flagtype, afflist, affile->af_bad)) | |
6724 flags |= WF_BANNED; | |
6725 if (affile->af_needcomp != 0 && flag_in_afflist( | |
6726 affile->af_flagtype, afflist, affile->af_needcomp)) | |
6727 flags |= WF_NEEDCOMP; | |
6728 if (affile->af_comproot != 0 && flag_in_afflist( | |
6729 affile->af_flagtype, afflist, affile->af_comproot)) | |
6730 flags |= WF_COMPROOT; | |
6731 if (affile->af_nosuggest != 0 && flag_in_afflist( | |
6732 affile->af_flagtype, afflist, affile->af_nosuggest)) | |
6733 flags |= WF_NOSUGGEST; | |
6734 return flags; | |
6735 } | |
6736 | |
6737 /* | |
6738 * Get the list of prefix IDs from the affix list "afflist". | |
6739 * Used for PFXPOSTPONE. | |
6740 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL | |
6741 * and return the number of affixes. | |
6742 */ | |
6743 static int | |
6744 get_pfxlist( | |
6745 afffile_T *affile, | |
6746 char_u *afflist, | |
6747 char_u *store_afflist) | |
6748 { | |
6749 char_u *p; | |
6750 char_u *prevp; | |
6751 int cnt = 0; | |
6752 int id; | |
6753 char_u key[AH_KEY_LEN]; | |
6754 hashitem_T *hi; | |
6755 | |
6756 for (p = afflist; *p != NUL; ) | |
6757 { | |
6758 prevp = p; | |
6759 if (get_affitem(affile->af_flagtype, &p) != 0) | |
6760 { | |
6761 /* A flag is a postponed prefix flag if it appears in "af_pref" | |
6762 * and it's ID is not zero. */ | |
6763 vim_strncpy(key, prevp, p - prevp); | |
6764 hi = hash_find(&affile->af_pref, key); | |
6765 if (!HASHITEM_EMPTY(hi)) | |
6766 { | |
6767 id = HI2AH(hi)->ah_newID; | |
6768 if (id != 0) | |
6769 store_afflist[cnt++] = id; | |
6770 } | |
6771 } | |
6772 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
6773 ++p; | |
6774 } | |
6775 | |
6776 store_afflist[cnt] = NUL; | |
6777 return cnt; | |
6778 } | |
6779 | |
6780 /* | |
6781 * Get the list of compound IDs from the affix list "afflist" that are used | |
6782 * for compound words. | |
6783 * Puts the flags in "store_afflist[]". | |
6784 */ | |
6785 static void | |
6786 get_compflags( | |
6787 afffile_T *affile, | |
6788 char_u *afflist, | |
6789 char_u *store_afflist) | |
6790 { | |
6791 char_u *p; | |
6792 char_u *prevp; | |
6793 int cnt = 0; | |
6794 char_u key[AH_KEY_LEN]; | |
6795 hashitem_T *hi; | |
6796 | |
6797 for (p = afflist; *p != NUL; ) | |
6798 { | |
6799 prevp = p; | |
6800 if (get_affitem(affile->af_flagtype, &p) != 0) | |
6801 { | |
6802 /* A flag is a compound flag if it appears in "af_comp". */ | |
6803 vim_strncpy(key, prevp, p - prevp); | |
6804 hi = hash_find(&affile->af_comp, key); | |
6805 if (!HASHITEM_EMPTY(hi)) | |
6806 store_afflist[cnt++] = HI2CI(hi)->ci_newID; | |
6807 } | |
6808 if (affile->af_flagtype == AFT_NUM && *p == ',') | |
6809 ++p; | |
6810 } | |
6811 | |
6812 store_afflist[cnt] = NUL; | |
6813 } | |
6814 | |
6815 /* | |
6816 * Apply affixes to a word and store the resulting words. | |
6817 * "ht" is the hashtable with affentry_T that need to be applied, either | |
6818 * prefixes or suffixes. | |
6819 * "xht", when not NULL, is the prefix hashtable, to be used additionally on | |
6820 * the resulting words for combining affixes. | |
6821 * | |
6822 * Returns FAIL when out of memory. | |
6823 */ | |
6824 static int | |
6825 store_aff_word( | |
6826 spellinfo_T *spin, /* spell info */ | |
6827 char_u *word, /* basic word start */ | |
6828 char_u *afflist, /* list of names of supported affixes */ | |
6829 afffile_T *affile, | |
6830 hashtab_T *ht, | |
6831 hashtab_T *xht, | |
6832 int condit, /* CONDIT_SUF et al. */ | |
6833 int flags, /* flags for the word */ | |
6834 char_u *pfxlist, /* list of prefix IDs */ | |
6835 int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest | |
6836 * is compound flags */ | |
6837 { | |
6838 int todo; | |
6839 hashitem_T *hi; | |
6840 affheader_T *ah; | |
6841 affentry_T *ae; | |
6842 char_u newword[MAXWLEN]; | |
6843 int retval = OK; | |
6844 int i, j; | |
6845 char_u *p; | |
6846 int use_flags; | |
6847 char_u *use_pfxlist; | |
6848 int use_pfxlen; | |
6849 int need_affix; | |
6850 char_u store_afflist[MAXWLEN]; | |
6851 char_u pfx_pfxlist[MAXWLEN]; | |
6852 size_t wordlen = STRLEN(word); | |
6853 int use_condit; | |
6854 | |
6855 todo = (int)ht->ht_used; | |
6856 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) | |
6857 { | |
6858 if (!HASHITEM_EMPTY(hi)) | |
6859 { | |
6860 --todo; | |
6861 ah = HI2AH(hi); | |
6862 | |
6863 /* Check that the affix combines, if required, and that the word | |
6864 * supports this affix. */ | |
6865 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) | |
6866 && flag_in_afflist(affile->af_flagtype, afflist, | |
6867 ah->ah_flag)) | |
6868 { | |
6869 /* Loop over all affix entries with this name. */ | |
6870 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | |
6871 { | |
6872 /* Check the condition. It's not logical to match case | |
6873 * here, but it is required for compatibility with | |
6874 * Myspell. | |
6875 * Another requirement from Myspell is that the chop | |
6876 * string is shorter than the word itself. | |
6877 * For prefixes, when "PFXPOSTPONE" was used, only do | |
6878 * prefixes with a chop string and/or flags. | |
6879 * When a previously added affix had CIRCUMFIX this one | |
6880 * must have it too, if it had not then this one must not | |
6881 * have one either. */ | |
6882 if ((xht != NULL || !affile->af_pfxpostpone | |
6883 || ae->ae_chop != NULL | |
6884 || ae->ae_flags != NULL) | |
6885 && (ae->ae_chop == NULL | |
6886 || STRLEN(ae->ae_chop) < wordlen) | |
6887 && (ae->ae_prog == NULL | |
6888 || vim_regexec_prog(&ae->ae_prog, FALSE, | |
6889 word, (colnr_T)0)) | |
6890 && (((condit & CONDIT_CFIX) == 0) | |
6891 == ((condit & CONDIT_AFF) == 0 | |
6892 || ae->ae_flags == NULL | |
6893 || !flag_in_afflist(affile->af_flagtype, | |
6894 ae->ae_flags, affile->af_circumfix)))) | |
6895 { | |
6896 /* Match. Remove the chop and add the affix. */ | |
6897 if (xht == NULL) | |
6898 { | |
6899 /* prefix: chop/add at the start of the word */ | |
6900 if (ae->ae_add == NULL) | |
6901 *newword = NUL; | |
6902 else | |
6903 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1); | |
6904 p = word; | |
6905 if (ae->ae_chop != NULL) | |
6906 { | |
6907 /* Skip chop string. */ | |
6908 #ifdef FEAT_MBYTE | |
6909 if (has_mbyte) | |
6910 { | |
6911 i = mb_charlen(ae->ae_chop); | |
6912 for ( ; i > 0; --i) | |
6913 mb_ptr_adv(p); | |
6914 } | |
6915 else | |
6916 #endif | |
6917 p += STRLEN(ae->ae_chop); | |
6918 } | |
6919 STRCAT(newword, p); | |
6920 } | |
6921 else | |
6922 { | |
6923 /* suffix: chop/add at the end of the word */ | |
6924 vim_strncpy(newword, word, MAXWLEN - 1); | |
6925 if (ae->ae_chop != NULL) | |
6926 { | |
6927 /* Remove chop string. */ | |
6928 p = newword + STRLEN(newword); | |
6929 i = (int)MB_CHARLEN(ae->ae_chop); | |
6930 for ( ; i > 0; --i) | |
6931 mb_ptr_back(newword, p); | |
6932 *p = NUL; | |
6933 } | |
6934 if (ae->ae_add != NULL) | |
6935 STRCAT(newword, ae->ae_add); | |
6936 } | |
6937 | |
6938 use_flags = flags; | |
6939 use_pfxlist = pfxlist; | |
6940 use_pfxlen = pfxlen; | |
6941 need_affix = FALSE; | |
6942 use_condit = condit | CONDIT_COMB | CONDIT_AFF; | |
6943 if (ae->ae_flags != NULL) | |
6944 { | |
6945 /* Extract flags from the affix list. */ | |
6946 use_flags |= get_affix_flags(affile, ae->ae_flags); | |
6947 | |
6948 if (affile->af_needaffix != 0 && flag_in_afflist( | |
6949 affile->af_flagtype, ae->ae_flags, | |
6950 affile->af_needaffix)) | |
6951 need_affix = TRUE; | |
6952 | |
6953 /* When there is a CIRCUMFIX flag the other affix | |
6954 * must also have it and we don't add the word | |
6955 * with one affix. */ | |
6956 if (affile->af_circumfix != 0 && flag_in_afflist( | |
6957 affile->af_flagtype, ae->ae_flags, | |
6958 affile->af_circumfix)) | |
6959 { | |
6960 use_condit |= CONDIT_CFIX; | |
6961 if ((condit & CONDIT_CFIX) == 0) | |
6962 need_affix = TRUE; | |
6963 } | |
6964 | |
6965 if (affile->af_pfxpostpone | |
6966 || spin->si_compflags != NULL) | |
6967 { | |
6968 if (affile->af_pfxpostpone) | |
6969 /* Get prefix IDS from the affix list. */ | |
6970 use_pfxlen = get_pfxlist(affile, | |
6971 ae->ae_flags, store_afflist); | |
6972 else | |
6973 use_pfxlen = 0; | |
6974 use_pfxlist = store_afflist; | |
6975 | |
6976 /* Combine the prefix IDs. Avoid adding the | |
6977 * same ID twice. */ | |
6978 for (i = 0; i < pfxlen; ++i) | |
6979 { | |
6980 for (j = 0; j < use_pfxlen; ++j) | |
6981 if (pfxlist[i] == use_pfxlist[j]) | |
6982 break; | |
6983 if (j == use_pfxlen) | |
6984 use_pfxlist[use_pfxlen++] = pfxlist[i]; | |
6985 } | |
6986 | |
6987 if (spin->si_compflags != NULL) | |
6988 /* Get compound IDS from the affix list. */ | |
6989 get_compflags(affile, ae->ae_flags, | |
6990 use_pfxlist + use_pfxlen); | |
6991 | |
6992 /* Combine the list of compound flags. | |
6993 * Concatenate them to the prefix IDs list. | |
6994 * Avoid adding the same ID twice. */ | |
6995 for (i = pfxlen; pfxlist[i] != NUL; ++i) | |
6996 { | |
6997 for (j = use_pfxlen; | |
6998 use_pfxlist[j] != NUL; ++j) | |
6999 if (pfxlist[i] == use_pfxlist[j]) | |
7000 break; | |
7001 if (use_pfxlist[j] == NUL) | |
7002 { | |
7003 use_pfxlist[j++] = pfxlist[i]; | |
7004 use_pfxlist[j] = NUL; | |
7005 } | |
7006 } | |
7007 } | |
7008 } | |
7009 | |
7010 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't | |
7011 * use the compound flags. */ | |
7012 if (use_pfxlist != NULL && ae->ae_compforbid) | |
7013 { | |
7014 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen); | |
7015 use_pfxlist = pfx_pfxlist; | |
7016 } | |
7017 | |
7018 /* When there are postponed prefixes... */ | |
7019 if (spin->si_prefroot != NULL | |
7020 && spin->si_prefroot->wn_sibling != NULL) | |
7021 { | |
7022 /* ... add a flag to indicate an affix was used. */ | |
7023 use_flags |= WF_HAS_AFF; | |
7024 | |
7025 /* ... don't use a prefix list if combining | |
7026 * affixes is not allowed. But do use the | |
7027 * compound flags after them. */ | |
7028 if (!ah->ah_combine && use_pfxlist != NULL) | |
7029 use_pfxlist += use_pfxlen; | |
7030 } | |
7031 | |
7032 /* When compounding is supported and there is no | |
7033 * "COMPOUNDPERMITFLAG" then forbid compounding on the | |
7034 * side where the affix is applied. */ | |
7035 if (spin->si_compflags != NULL && !ae->ae_comppermit) | |
7036 { | |
7037 if (xht != NULL) | |
7038 use_flags |= WF_NOCOMPAFT; | |
7039 else | |
7040 use_flags |= WF_NOCOMPBEF; | |
7041 } | |
7042 | |
7043 /* Store the modified word. */ | |
7044 if (store_word(spin, newword, use_flags, | |
7045 spin->si_region, use_pfxlist, | |
7046 need_affix) == FAIL) | |
7047 retval = FAIL; | |
7048 | |
7049 /* When added a prefix or a first suffix and the affix | |
7050 * has flags may add a(nother) suffix. RECURSIVE! */ | |
7051 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) | |
7052 if (store_aff_word(spin, newword, ae->ae_flags, | |
7053 affile, &affile->af_suff, xht, | |
7054 use_condit & (xht == NULL | |
7055 ? ~0 : ~CONDIT_SUF), | |
7056 use_flags, use_pfxlist, pfxlen) == FAIL) | |
7057 retval = FAIL; | |
7058 | |
7059 /* When added a suffix and combining is allowed also | |
7060 * try adding a prefix additionally. Both for the | |
7061 * word flags and for the affix flags. RECURSIVE! */ | |
7062 if (xht != NULL && ah->ah_combine) | |
7063 { | |
7064 if (store_aff_word(spin, newword, | |
7065 afflist, affile, | |
7066 xht, NULL, use_condit, | |
7067 use_flags, use_pfxlist, | |
7068 pfxlen) == FAIL | |
7069 || (ae->ae_flags != NULL | |
7070 && store_aff_word(spin, newword, | |
7071 ae->ae_flags, affile, | |
7072 xht, NULL, use_condit, | |
7073 use_flags, use_pfxlist, | |
7074 pfxlen) == FAIL)) | |
7075 retval = FAIL; | |
7076 } | |
7077 } | |
7078 } | |
7079 } | |
7080 } | |
7081 } | |
7082 | |
7083 return retval; | |
7084 } | |
7085 | |
7086 /* | |
7087 * Read a file with a list of words. | |
7088 */ | |
7089 static int | |
7090 spell_read_wordfile(spellinfo_T *spin, char_u *fname) | |
7091 { | |
7092 FILE *fd; | |
7093 long lnum = 0; | |
7094 char_u rline[MAXLINELEN]; | |
7095 char_u *line; | |
7096 char_u *pc = NULL; | |
7097 char_u *p; | |
7098 int l; | |
7099 int retval = OK; | |
7100 int did_word = FALSE; | |
7101 int non_ascii = 0; | |
7102 int flags; | |
7103 int regionmask; | |
7104 | |
7105 /* | |
7106 * Open the file. | |
7107 */ | |
7108 fd = mch_fopen((char *)fname, "r"); | |
7109 if (fd == NULL) | |
7110 { | |
7111 EMSG2(_(e_notopen), fname); | |
7112 return FAIL; | |
7113 } | |
7114 | |
7115 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); | |
7116 spell_message(spin, IObuff); | |
7117 | |
7118 /* | |
7119 * Read all the lines in the file one by one. | |
7120 */ | |
7121 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) | |
7122 { | |
7123 line_breakcheck(); | |
7124 ++lnum; | |
7125 | |
7126 /* Skip comment lines. */ | |
7127 if (*rline == '#') | |
7128 continue; | |
7129 | |
7130 /* Remove CR, LF and white space from the end. */ | |
7131 l = (int)STRLEN(rline); | |
7132 while (l > 0 && rline[l - 1] <= ' ') | |
7133 --l; | |
7134 if (l == 0) | |
7135 continue; /* empty or blank line */ | |
7136 rline[l] = NUL; | |
7137 | |
7138 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */ | |
7139 vim_free(pc); | |
7140 #ifdef FEAT_MBYTE | |
7141 if (spin->si_conv.vc_type != CONV_NONE) | |
7142 { | |
7143 pc = string_convert(&spin->si_conv, rline, NULL); | |
7144 if (pc == NULL) | |
7145 { | |
7146 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | |
7147 fname, lnum, rline); | |
7148 continue; | |
7149 } | |
7150 line = pc; | |
7151 } | |
7152 else | |
7153 #endif | |
7154 { | |
7155 pc = NULL; | |
7156 line = rline; | |
7157 } | |
7158 | |
7159 if (*line == '/') | |
7160 { | |
7161 ++line; | |
7162 if (STRNCMP(line, "encoding=", 9) == 0) | |
7163 { | |
7164 if (spin->si_conv.vc_type != CONV_NONE) | |
7165 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), | |
7166 fname, lnum, line - 1); | |
7167 else if (did_word) | |
7168 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), | |
7169 fname, lnum, line - 1); | |
7170 else | |
7171 { | |
7172 #ifdef FEAT_MBYTE | |
7173 char_u *enc; | |
7174 | |
7175 /* Setup for conversion to 'encoding'. */ | |
7176 line += 9; | |
7177 enc = enc_canonize(line); | |
7178 if (enc != NULL && !spin->si_ascii | |
7179 && convert_setup(&spin->si_conv, enc, | |
7180 p_enc) == FAIL) | |
7181 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | |
7182 fname, line, p_enc); | |
7183 vim_free(enc); | |
7184 spin->si_conv.vc_fail = TRUE; | |
7185 #else | |
7186 smsg((char_u *)_("Conversion in %s not supported"), fname); | |
7187 #endif | |
7188 } | |
7189 continue; | |
7190 } | |
7191 | |
7192 if (STRNCMP(line, "regions=", 8) == 0) | |
7193 { | |
7194 if (spin->si_region_count > 1) | |
7195 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), | |
7196 fname, lnum, line); | |
7197 else | |
7198 { | |
7199 line += 8; | |
7200 if (STRLEN(line) > 16) | |
7201 smsg((char_u *)_("Too many regions in %s line %d: %s"), | |
7202 fname, lnum, line); | |
7203 else | |
7204 { | |
7205 spin->si_region_count = (int)STRLEN(line) / 2; | |
7206 STRCPY(spin->si_region_name, line); | |
7207 | |
7208 /* Adjust the mask for a word valid in all regions. */ | |
7209 spin->si_region = (1 << spin->si_region_count) - 1; | |
7210 } | |
7211 } | |
7212 continue; | |
7213 } | |
7214 | |
7215 smsg((char_u *)_("/ line ignored in %s line %d: %s"), | |
7216 fname, lnum, line - 1); | |
7217 continue; | |
7218 } | |
7219 | |
7220 flags = 0; | |
7221 regionmask = spin->si_region; | |
7222 | |
7223 /* Check for flags and region after a slash. */ | |
7224 p = vim_strchr(line, '/'); | |
7225 if (p != NULL) | |
7226 { | |
7227 *p++ = NUL; | |
7228 while (*p != NUL) | |
7229 { | |
7230 if (*p == '=') /* keep-case word */ | |
7231 flags |= WF_KEEPCAP | WF_FIXCAP; | |
7232 else if (*p == '!') /* Bad, bad, wicked word. */ | |
7233 flags |= WF_BANNED; | |
7234 else if (*p == '?') /* Rare word. */ | |
7235 flags |= WF_RARE; | |
7236 else if (VIM_ISDIGIT(*p)) /* region number(s) */ | |
7237 { | |
7238 if ((flags & WF_REGION) == 0) /* first one */ | |
7239 regionmask = 0; | |
7240 flags |= WF_REGION; | |
7241 | |
7242 l = *p - '0'; | |
7243 if (l > spin->si_region_count) | |
7244 { | |
7245 smsg((char_u *)_("Invalid region nr in %s line %d: %s"), | |
7246 fname, lnum, p); | |
7247 break; | |
7248 } | |
7249 regionmask |= 1 << (l - 1); | |
7250 } | |
7251 else | |
7252 { | |
7253 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), | |
7254 fname, lnum, p); | |
7255 break; | |
7256 } | |
7257 ++p; | |
7258 } | |
7259 } | |
7260 | |
7261 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | |
7262 if (spin->si_ascii && has_non_ascii(line)) | |
7263 { | |
7264 ++non_ascii; | |
7265 continue; | |
7266 } | |
7267 | |
7268 /* Normal word: store it. */ | |
7269 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) | |
7270 { | |
7271 retval = FAIL; | |
7272 break; | |
7273 } | |
7274 did_word = TRUE; | |
7275 } | |
7276 | |
7277 vim_free(pc); | |
7278 fclose(fd); | |
7279 | |
7280 if (spin->si_ascii && non_ascii > 0) | |
7281 { | |
7282 vim_snprintf((char *)IObuff, IOSIZE, | |
7283 _("Ignored %d words with non-ASCII characters"), non_ascii); | |
7284 spell_message(spin, IObuff); | |
7285 } | |
7286 | |
7287 return retval; | |
7288 } | |
7289 | |
7290 /* | |
7291 * Get part of an sblock_T, "len" bytes long. | |
7292 * This avoids calling free() for every little struct we use (and keeping | |
7293 * track of them). | |
7294 * The memory is cleared to all zeros. | |
7295 * Returns NULL when out of memory. | |
7296 */ | |
7297 static void * | |
7298 getroom( | |
7299 spellinfo_T *spin, | |
7300 size_t len, /* length needed */ | |
7301 int align) /* align for pointer */ | |
7302 { | |
7303 char_u *p; | |
7304 sblock_T *bl = spin->si_blocks; | |
7305 | |
7306 if (align && bl != NULL) | |
7307 /* Round size up for alignment. On some systems structures need to be | |
7308 * aligned to the size of a pointer (e.g., SPARC). */ | |
7309 bl->sb_used = (bl->sb_used + sizeof(char *) - 1) | |
7310 & ~(sizeof(char *) - 1); | |
7311 | |
7312 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) | |
7313 { | |
7314 if (len >= SBLOCKSIZE) | |
7315 bl = NULL; | |
7316 else | |
7317 /* Allocate a block of memory. It is not freed until much later. */ | |
7318 bl = (sblock_T *)alloc_clear( | |
7319 (unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); | |
7320 if (bl == NULL) | |
7321 { | |
7322 if (!spin->si_did_emsg) | |
7323 { | |
7324 EMSG(_("E845: Insufficient memory, word list will be incomplete")); | |
7325 spin->si_did_emsg = TRUE; | |
7326 } | |
7327 return NULL; | |
7328 } | |
7329 bl->sb_next = spin->si_blocks; | |
7330 spin->si_blocks = bl; | |
7331 bl->sb_used = 0; | |
7332 ++spin->si_blocks_cnt; | |
7333 } | |
7334 | |
7335 p = bl->sb_data + bl->sb_used; | |
7336 bl->sb_used += (int)len; | |
7337 | |
7338 return p; | |
7339 } | |
7340 | |
7341 /* | |
7342 * Make a copy of a string into memory allocated with getroom(). | |
7343 * Returns NULL when out of memory. | |
7344 */ | |
7345 static char_u * | |
7346 getroom_save(spellinfo_T *spin, char_u *s) | |
7347 { | |
7348 char_u *sc; | |
7349 | |
7350 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); | |
7351 if (sc != NULL) | |
7352 STRCPY(sc, s); | |
7353 return sc; | |
7354 } | |
7355 | |
7356 | |
7357 /* | |
7358 * Free the list of allocated sblock_T. | |
7359 */ | |
7360 static void | |
7361 free_blocks(sblock_T *bl) | |
7362 { | |
7363 sblock_T *next; | |
7364 | |
7365 while (bl != NULL) | |
7366 { | |
7367 next = bl->sb_next; | |
7368 vim_free(bl); | |
7369 bl = next; | |
7370 } | |
7371 } | |
7372 | |
7373 /* | |
7374 * Allocate the root of a word tree. | |
7375 * Returns NULL when out of memory. | |
7376 */ | |
7377 static wordnode_T * | |
7378 wordtree_alloc(spellinfo_T *spin) | |
7379 { | |
7380 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); | |
7381 } | |
7382 | |
7383 /* | |
7384 * Store a word in the tree(s). | |
7385 * Always store it in the case-folded tree. For a keep-case word this is | |
7386 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and | |
7387 * used to find suggestions. | |
7388 * For a keep-case word also store it in the keep-case tree. | |
7389 * When "pfxlist" is not NULL store the word for each postponed prefix ID and | |
7390 * compound flag. | |
7391 */ | |
7392 static int | |
7393 store_word( | |
7394 spellinfo_T *spin, | |
7395 char_u *word, | |
7396 int flags, /* extra flags, WF_BANNED */ | |
7397 int region, /* supported region(s) */ | |
7398 char_u *pfxlist, /* list of prefix IDs or NULL */ | |
7399 int need_affix) /* only store word with affix ID */ | |
7400 { | |
7401 int len = (int)STRLEN(word); | |
7402 int ct = captype(word, word + len); | |
7403 char_u foldword[MAXWLEN]; | |
7404 int res = OK; | |
7405 char_u *p; | |
7406 | |
7407 (void)spell_casefold(word, len, foldword, MAXWLEN); | |
7408 for (p = pfxlist; res == OK; ++p) | |
7409 { | |
7410 if (!need_affix || (p != NULL && *p != NUL)) | |
7411 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, | |
7412 region, p == NULL ? 0 : *p); | |
7413 if (p == NULL || *p == NUL) | |
7414 break; | |
7415 } | |
7416 ++spin->si_foldwcount; | |
7417 | |
7418 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) | |
7419 { | |
7420 for (p = pfxlist; res == OK; ++p) | |
7421 { | |
7422 if (!need_affix || (p != NULL && *p != NUL)) | |
7423 res = tree_add_word(spin, word, spin->si_keeproot, flags, | |
7424 region, p == NULL ? 0 : *p); | |
7425 if (p == NULL || *p == NUL) | |
7426 break; | |
7427 } | |
7428 ++spin->si_keepwcount; | |
7429 } | |
7430 return res; | |
7431 } | |
7432 | |
7433 /* | |
7434 * Add word "word" to a word tree at "root". | |
7435 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for | |
7436 * "rare" and "region" is the condition nr. | |
7437 * Returns FAIL when out of memory. | |
7438 */ | |
7439 static int | |
7440 tree_add_word( | |
7441 spellinfo_T *spin, | |
7442 char_u *word, | |
7443 wordnode_T *root, | |
7444 int flags, | |
7445 int region, | |
7446 int affixID) | |
7447 { | |
7448 wordnode_T *node = root; | |
7449 wordnode_T *np; | |
7450 wordnode_T *copyp, **copyprev; | |
7451 wordnode_T **prev = NULL; | |
7452 int i; | |
7453 | |
7454 /* Add each byte of the word to the tree, including the NUL at the end. */ | |
7455 for (i = 0; ; ++i) | |
7456 { | |
7457 /* When there is more than one reference to this node we need to make | |
7458 * a copy, so that we can modify it. Copy the whole list of siblings | |
7459 * (we don't optimize for a partly shared list of siblings). */ | |
7460 if (node != NULL && node->wn_refs > 1) | |
7461 { | |
7462 --node->wn_refs; | |
7463 copyprev = prev; | |
7464 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) | |
7465 { | |
7466 /* Allocate a new node and copy the info. */ | |
7467 np = get_wordnode(spin); | |
7468 if (np == NULL) | |
7469 return FAIL; | |
7470 np->wn_child = copyp->wn_child; | |
7471 if (np->wn_child != NULL) | |
7472 ++np->wn_child->wn_refs; /* child gets extra ref */ | |
7473 np->wn_byte = copyp->wn_byte; | |
7474 if (np->wn_byte == NUL) | |
7475 { | |
7476 np->wn_flags = copyp->wn_flags; | |
7477 np->wn_region = copyp->wn_region; | |
7478 np->wn_affixID = copyp->wn_affixID; | |
7479 } | |
7480 | |
7481 /* Link the new node in the list, there will be one ref. */ | |
7482 np->wn_refs = 1; | |
7483 if (copyprev != NULL) | |
7484 *copyprev = np; | |
7485 copyprev = &np->wn_sibling; | |
7486 | |
7487 /* Let "node" point to the head of the copied list. */ | |
7488 if (copyp == node) | |
7489 node = np; | |
7490 } | |
7491 } | |
7492 | |
7493 /* Look for the sibling that has the same character. They are sorted | |
7494 * on byte value, thus stop searching when a sibling is found with a | |
7495 * higher byte value. For zero bytes (end of word) the sorting is | |
7496 * done on flags and then on affixID. */ | |
7497 while (node != NULL | |
7498 && (node->wn_byte < word[i] | |
7499 || (node->wn_byte == NUL | |
7500 && (flags < 0 | |
7501 ? node->wn_affixID < (unsigned)affixID | |
7502 : (node->wn_flags < (unsigned)(flags & WN_MASK) | |
7503 || (node->wn_flags == (flags & WN_MASK) | |
7504 && (spin->si_sugtree | |
7505 ? (node->wn_region & 0xffff) < region | |
7506 : node->wn_affixID | |
7507 < (unsigned)affixID))))))) | |
7508 { | |
7509 prev = &node->wn_sibling; | |
7510 node = *prev; | |
7511 } | |
7512 if (node == NULL | |
7513 || node->wn_byte != word[i] | |
7514 || (word[i] == NUL | |
7515 && (flags < 0 | |
7516 || spin->si_sugtree | |
7517 || node->wn_flags != (flags & WN_MASK) | |
7518 || node->wn_affixID != affixID))) | |
7519 { | |
7520 /* Allocate a new node. */ | |
7521 np = get_wordnode(spin); | |
7522 if (np == NULL) | |
7523 return FAIL; | |
7524 np->wn_byte = word[i]; | |
7525 | |
7526 /* If "node" is NULL this is a new child or the end of the sibling | |
7527 * list: ref count is one. Otherwise use ref count of sibling and | |
7528 * make ref count of sibling one (matters when inserting in front | |
7529 * of the list of siblings). */ | |
7530 if (node == NULL) | |
7531 np->wn_refs = 1; | |
7532 else | |
7533 { | |
7534 np->wn_refs = node->wn_refs; | |
7535 node->wn_refs = 1; | |
7536 } | |
7537 if (prev != NULL) | |
7538 *prev = np; | |
7539 np->wn_sibling = node; | |
7540 node = np; | |
7541 } | |
7542 | |
7543 if (word[i] == NUL) | |
7544 { | |
7545 node->wn_flags = flags; | |
7546 node->wn_region |= region; | |
7547 node->wn_affixID = affixID; | |
7548 break; | |
7549 } | |
7550 prev = &node->wn_child; | |
7551 node = *prev; | |
7552 } | |
7553 #ifdef SPELL_PRINTTREE | |
7554 smsg((char_u *)"Added \"%s\"", word); | |
7555 spell_print_tree(root->wn_sibling); | |
7556 #endif | |
7557 | |
7558 /* count nr of words added since last message */ | |
7559 ++spin->si_msg_count; | |
7560 | |
7561 if (spin->si_compress_cnt > 1) | |
7562 { | |
7563 if (--spin->si_compress_cnt == 1) | |
7564 /* Did enough words to lower the block count limit. */ | |
7565 spin->si_blocks_cnt += compress_inc; | |
7566 } | |
7567 | |
7568 /* | |
7569 * When we have allocated lots of memory we need to compress the word tree | |
7570 * to free up some room. But compression is slow, and we might actually | |
7571 * need that room, thus only compress in the following situations: | |
7572 * 1. When not compressed before (si_compress_cnt == 0): when using | |
7573 * "compress_start" blocks. | |
7574 * 2. When compressed before and used "compress_inc" blocks before | |
7575 * adding "compress_added" words (si_compress_cnt > 1). | |
7576 * 3. When compressed before, added "compress_added" words | |
7577 * (si_compress_cnt == 1) and the number of free nodes drops below the | |
7578 * maximum word length. | |
7579 */ | |
7580 #ifndef SPELL_COMPRESS_ALLWAYS | |
7581 if (spin->si_compress_cnt == 1 | |
7582 ? spin->si_free_count < MAXWLEN | |
7583 : spin->si_blocks_cnt >= compress_start) | |
7584 #endif | |
7585 { | |
7586 /* Decrement the block counter. The effect is that we compress again | |
7587 * when the freed up room has been used and another "compress_inc" | |
7588 * blocks have been allocated. Unless "compress_added" words have | |
7589 * been added, then the limit is put back again. */ | |
7590 spin->si_blocks_cnt -= compress_inc; | |
7591 spin->si_compress_cnt = compress_added; | |
7592 | |
7593 if (spin->si_verbose) | |
7594 { | |
7595 msg_start(); | |
7596 msg_puts((char_u *)_(msg_compressing)); | |
7597 msg_clr_eos(); | |
7598 msg_didout = FALSE; | |
7599 msg_col = 0; | |
7600 out_flush(); | |
7601 } | |
7602 | |
7603 /* Compress both trees. Either they both have many nodes, which makes | |
7604 * compression useful, or one of them is small, which means | |
7605 * compression goes fast. But when filling the soundfold word tree | |
7606 * there is no keep-case tree. */ | |
7607 wordtree_compress(spin, spin->si_foldroot); | |
7608 if (affixID >= 0) | |
7609 wordtree_compress(spin, spin->si_keeproot); | |
7610 } | |
7611 | |
7612 return OK; | |
7613 } | |
7614 | |
7615 /* | |
7616 * Check the 'mkspellmem' option. Return FAIL if it's wrong. | |
7617 * Sets "sps_flags". | |
7618 */ | |
7619 int | |
7620 spell_check_msm(void) | |
7621 { | |
7622 char_u *p = p_msm; | |
7623 long start = 0; | |
7624 long incr = 0; | |
7625 long added = 0; | |
7626 | |
7627 if (!VIM_ISDIGIT(*p)) | |
7628 return FAIL; | |
7629 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ | |
7630 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); | |
7631 if (*p != ',') | |
7632 return FAIL; | |
7633 ++p; | |
7634 if (!VIM_ISDIGIT(*p)) | |
7635 return FAIL; | |
7636 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); | |
7637 if (*p != ',') | |
7638 return FAIL; | |
7639 ++p; | |
7640 if (!VIM_ISDIGIT(*p)) | |
7641 return FAIL; | |
7642 added = getdigits(&p) * 1024; | |
7643 if (*p != NUL) | |
7644 return FAIL; | |
7645 | |
7646 if (start == 0 || incr == 0 || added == 0 || incr > start) | |
7647 return FAIL; | |
7648 | |
7649 compress_start = start; | |
7650 compress_inc = incr; | |
7651 compress_added = added; | |
7652 return OK; | |
7653 } | |
7654 | |
7655 | |
7656 /* | |
7657 * Get a wordnode_T, either from the list of previously freed nodes or | |
7658 * allocate a new one. | |
7659 * Returns NULL when out of memory. | |
7660 */ | |
7661 static wordnode_T * | |
7662 get_wordnode(spellinfo_T *spin) | |
7663 { | |
7664 wordnode_T *n; | |
7665 | |
7666 if (spin->si_first_free == NULL) | |
7667 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); | |
7668 else | |
7669 { | |
7670 n = spin->si_first_free; | |
7671 spin->si_first_free = n->wn_child; | |
7672 vim_memset(n, 0, sizeof(wordnode_T)); | |
7673 --spin->si_free_count; | |
7674 } | |
7675 #ifdef SPELL_PRINTTREE | |
7676 if (n != NULL) | |
7677 n->wn_nr = ++spin->si_wordnode_nr; | |
7678 #endif | |
7679 return n; | |
7680 } | |
7681 | |
7682 /* | |
7683 * Decrement the reference count on a node (which is the head of a list of | |
7684 * siblings). If the reference count becomes zero free the node and its | |
7685 * siblings. | |
7686 * Returns the number of nodes actually freed. | |
7687 */ | |
7688 static int | |
7689 deref_wordnode(spellinfo_T *spin, wordnode_T *node) | |
7690 { | |
7691 wordnode_T *np; | |
7692 int cnt = 0; | |
7693 | |
7694 if (--node->wn_refs == 0) | |
7695 { | |
7696 for (np = node; np != NULL; np = np->wn_sibling) | |
7697 { | |
7698 if (np->wn_child != NULL) | |
7699 cnt += deref_wordnode(spin, np->wn_child); | |
7700 free_wordnode(spin, np); | |
7701 ++cnt; | |
7702 } | |
7703 ++cnt; /* length field */ | |
7704 } | |
7705 return cnt; | |
7706 } | |
7707 | |
7708 /* | |
7709 * Free a wordnode_T for re-use later. | |
7710 * Only the "wn_child" field becomes invalid. | |
7711 */ | |
7712 static void | |
7713 free_wordnode(spellinfo_T *spin, wordnode_T *n) | |
7714 { | |
7715 n->wn_child = spin->si_first_free; | |
7716 spin->si_first_free = n; | |
7717 ++spin->si_free_count; | |
7718 } | |
7719 | |
7720 /* | |
7721 * Compress a tree: find tails that are identical and can be shared. | |
7722 */ | |
7723 static void | |
7724 wordtree_compress(spellinfo_T *spin, wordnode_T *root) | |
7725 { | |
7726 hashtab_T ht; | |
7727 int n; | |
7728 int tot = 0; | |
7729 int perc; | |
7730 | |
7731 /* Skip the root itself, it's not actually used. The first sibling is the | |
7732 * start of the tree. */ | |
7733 if (root->wn_sibling != NULL) | |
7734 { | |
7735 hash_init(&ht); | |
7736 n = node_compress(spin, root->wn_sibling, &ht, &tot); | |
7737 | |
7738 #ifndef SPELL_PRINTTREE | |
7739 if (spin->si_verbose || p_verbose > 2) | |
7740 #endif | |
7741 { | |
7742 if (tot > 1000000) | |
7743 perc = (tot - n) / (tot / 100); | |
7744 else if (tot == 0) | |
7745 perc = 0; | |
7746 else | |
7747 perc = (tot - n) * 100 / tot; | |
7748 vim_snprintf((char *)IObuff, IOSIZE, | |
7749 _("Compressed %d of %d nodes; %d (%d%%) remaining"), | |
7750 n, tot, tot - n, perc); | |
7751 spell_message(spin, IObuff); | |
7752 } | |
7753 #ifdef SPELL_PRINTTREE | |
7754 spell_print_tree(root->wn_sibling); | |
7755 #endif | |
7756 hash_clear(&ht); | |
7757 } | |
7758 } | |
7759 | |
7760 /* | |
7761 * Compress a node, its siblings and its children, depth first. | |
7762 * Returns the number of compressed nodes. | |
7763 */ | |
7764 static int | |
7765 node_compress( | |
7766 spellinfo_T *spin, | |
7767 wordnode_T *node, | |
7768 hashtab_T *ht, | |
7769 int *tot) /* total count of nodes before compressing, | |
7770 incremented while going through the tree */ | |
7771 { | |
7772 wordnode_T *np; | |
7773 wordnode_T *tp; | |
7774 wordnode_T *child; | |
7775 hash_T hash; | |
7776 hashitem_T *hi; | |
7777 int len = 0; | |
7778 unsigned nr, n; | |
7779 int compressed = 0; | |
7780 | |
7781 /* | |
7782 * Go through the list of siblings. Compress each child and then try | |
7783 * finding an identical child to replace it. | |
7784 * Note that with "child" we mean not just the node that is pointed to, | |
7785 * but the whole list of siblings of which the child node is the first. | |
7786 */ | |
7787 for (np = node; np != NULL && !got_int; np = np->wn_sibling) | |
7788 { | |
7789 ++len; | |
7790 if ((child = np->wn_child) != NULL) | |
7791 { | |
7792 /* Compress the child first. This fills hashkey. */ | |
7793 compressed += node_compress(spin, child, ht, tot); | |
7794 | |
7795 /* Try to find an identical child. */ | |
7796 hash = hash_hash(child->wn_u1.hashkey); | |
7797 hi = hash_lookup(ht, child->wn_u1.hashkey, hash); | |
7798 if (!HASHITEM_EMPTY(hi)) | |
7799 { | |
7800 /* There are children we encountered before with a hash value | |
7801 * identical to the current child. Now check if there is one | |
7802 * that is really identical. */ | |
7803 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) | |
7804 if (node_equal(child, tp)) | |
7805 { | |
7806 /* Found one! Now use that child in place of the | |
7807 * current one. This means the current child and all | |
7808 * its siblings is unlinked from the tree. */ | |
7809 ++tp->wn_refs; | |
7810 compressed += deref_wordnode(spin, child); | |
7811 np->wn_child = tp; | |
7812 break; | |
7813 } | |
7814 if (tp == NULL) | |
7815 { | |
7816 /* No other child with this hash value equals the child of | |
7817 * the node, add it to the linked list after the first | |
7818 * item. */ | |
7819 tp = HI2WN(hi); | |
7820 child->wn_u2.next = tp->wn_u2.next; | |
7821 tp->wn_u2.next = child; | |
7822 } | |
7823 } | |
7824 else | |
7825 /* No other child has this hash value, add it to the | |
7826 * hashtable. */ | |
7827 hash_add_item(ht, hi, child->wn_u1.hashkey, hash); | |
7828 } | |
7829 } | |
7830 *tot += len + 1; /* add one for the node that stores the length */ | |
7831 | |
7832 /* | |
7833 * Make a hash key for the node and its siblings, so that we can quickly | |
7834 * find a lookalike node. This must be done after compressing the sibling | |
7835 * list, otherwise the hash key would become invalid by the compression. | |
7836 */ | |
7837 node->wn_u1.hashkey[0] = len; | |
7838 nr = 0; | |
7839 for (np = node; np != NULL; np = np->wn_sibling) | |
7840 { | |
7841 if (np->wn_byte == NUL) | |
7842 /* end node: use wn_flags, wn_region and wn_affixID */ | |
7843 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); | |
7844 else | |
7845 /* byte node: use the byte value and the child pointer */ | |
7846 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8)); | |
7847 nr = nr * 101 + n; | |
7848 } | |
7849 | |
7850 /* Avoid NUL bytes, it terminates the hash key. */ | |
7851 n = nr & 0xff; | |
7852 node->wn_u1.hashkey[1] = n == 0 ? 1 : n; | |
7853 n = (nr >> 8) & 0xff; | |
7854 node->wn_u1.hashkey[2] = n == 0 ? 1 : n; | |
7855 n = (nr >> 16) & 0xff; | |
7856 node->wn_u1.hashkey[3] = n == 0 ? 1 : n; | |
7857 n = (nr >> 24) & 0xff; | |
7858 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; | |
7859 node->wn_u1.hashkey[5] = NUL; | |
7860 | |
7861 /* Check for CTRL-C pressed now and then. */ | |
7862 fast_breakcheck(); | |
7863 | |
7864 return compressed; | |
7865 } | |
7866 | |
7867 /* | |
7868 * Return TRUE when two nodes have identical siblings and children. | |
7869 */ | |
7870 static int | |
7871 node_equal(wordnode_T *n1, wordnode_T *n2) | |
7872 { | |
7873 wordnode_T *p1; | |
7874 wordnode_T *p2; | |
7875 | |
7876 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; | |
7877 p1 = p1->wn_sibling, p2 = p2->wn_sibling) | |
7878 if (p1->wn_byte != p2->wn_byte | |
7879 || (p1->wn_byte == NUL | |
7880 ? (p1->wn_flags != p2->wn_flags | |
7881 || p1->wn_region != p2->wn_region | |
7882 || p1->wn_affixID != p2->wn_affixID) | |
7883 : (p1->wn_child != p2->wn_child))) | |
7884 break; | |
7885 | |
7886 return p1 == NULL && p2 == NULL; | |
7887 } | |
7888 | |
7889 static int | |
7890 #ifdef __BORLANDC__ | |
7891 _RTLENTRYF | |
7892 #endif | |
7893 rep_compare(const void *s1, const void *s2); | |
7894 | |
7895 /* | |
7896 * Function given to qsort() to sort the REP items on "from" string. | |
7897 */ | |
7898 static int | |
7899 #ifdef __BORLANDC__ | |
7900 _RTLENTRYF | |
7901 #endif | |
7902 rep_compare(const void *s1, const void *s2) | |
7903 { | |
7904 fromto_T *p1 = (fromto_T *)s1; | |
7905 fromto_T *p2 = (fromto_T *)s2; | |
7906 | |
7907 return STRCMP(p1->ft_from, p2->ft_from); | |
7908 } | |
7909 | |
7910 /* | |
7911 * Write the Vim .spl file "fname". | |
7912 * Return FAIL or OK; | |
7913 */ | |
7914 static int | |
7915 write_vim_spell(spellinfo_T *spin, char_u *fname) | |
7916 { | |
7917 FILE *fd; | |
7918 int regionmask; | |
7919 int round; | |
7920 wordnode_T *tree; | |
7921 int nodecount; | |
7922 int i; | |
7923 int l; | |
7924 garray_T *gap; | |
7925 fromto_T *ftp; | |
7926 char_u *p; | |
7927 int rr; | |
7928 int retval = OK; | |
7929 size_t fwv = 1; /* collect return value of fwrite() to avoid | |
7930 warnings from picky compiler */ | |
7931 | |
7932 fd = mch_fopen((char *)fname, "w"); | |
7933 if (fd == NULL) | |
7934 { | |
7935 EMSG2(_(e_notopen), fname); | |
7936 return FAIL; | |
7937 } | |
7938 | |
7939 /* <HEADER>: <fileID> <versionnr> */ | |
7940 /* <fileID> */ | |
7941 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); | |
7942 if (fwv != (size_t)1) | |
7943 /* Catch first write error, don't try writing more. */ | |
7944 goto theend; | |
7945 | |
7946 putc(VIMSPELLVERSION, fd); /* <versionnr> */ | |
7947 | |
7948 /* | |
7949 * <SECTIONS>: <section> ... <sectionend> | |
7950 */ | |
7951 | |
7952 /* SN_INFO: <infotext> */ | |
7953 if (spin->si_info != NULL) | |
7954 { | |
7955 putc(SN_INFO, fd); /* <sectionID> */ | |
7956 putc(0, fd); /* <sectionflags> */ | |
7957 | |
7958 i = (int)STRLEN(spin->si_info); | |
7959 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ | |
7960 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ | |
7961 } | |
7962 | |
7963 /* SN_REGION: <regionname> ... | |
7964 * Write the region names only if there is more than one. */ | |
7965 if (spin->si_region_count > 1) | |
7966 { | |
7967 putc(SN_REGION, fd); /* <sectionID> */ | |
7968 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
7969 l = spin->si_region_count * 2; | |
7970 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
7971 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); | |
7972 /* <regionname> ... */ | |
7973 regionmask = (1 << spin->si_region_count) - 1; | |
7974 } | |
7975 else | |
7976 regionmask = 0; | |
7977 | |
7978 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> | |
7979 * | |
7980 * The table with character flags and the table for case folding. | |
7981 * This makes sure the same characters are recognized as word characters | |
7982 * when generating an when using a spell file. | |
7983 * Skip this for ASCII, the table may conflict with the one used for | |
7984 * 'encoding'. | |
7985 * Also skip this for an .add.spl file, the main spell file must contain | |
7986 * the table (avoids that it conflicts). File is shorter too. | |
7987 */ | |
7988 if (!spin->si_ascii && !spin->si_add) | |
7989 { | |
7990 char_u folchars[128 * 8]; | |
7991 int flags; | |
7992 | |
7993 putc(SN_CHARFLAGS, fd); /* <sectionID> */ | |
7994 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
7995 | |
7996 /* Form the <folchars> string first, we need to know its length. */ | |
7997 l = 0; | |
7998 for (i = 128; i < 256; ++i) | |
7999 { | |
8000 #ifdef FEAT_MBYTE | |
8001 if (has_mbyte) | |
8002 l += mb_char2bytes(spelltab.st_fold[i], folchars + l); | |
8003 else | |
8004 #endif | |
8005 folchars[l++] = spelltab.st_fold[i]; | |
8006 } | |
8007 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ | |
8008 | |
8009 fputc(128, fd); /* <charflagslen> */ | |
8010 for (i = 128; i < 256; ++i) | |
8011 { | |
8012 flags = 0; | |
8013 if (spelltab.st_isw[i]) | |
8014 flags |= CF_WORD; | |
8015 if (spelltab.st_isu[i]) | |
8016 flags |= CF_UPPER; | |
8017 fputc(flags, fd); /* <charflags> */ | |
8018 } | |
8019 | |
8020 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ | |
8021 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ | |
8022 } | |
8023 | |
8024 /* SN_MIDWORD: <midword> */ | |
8025 if (spin->si_midword != NULL) | |
8026 { | |
8027 putc(SN_MIDWORD, fd); /* <sectionID> */ | |
8028 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
8029 | |
8030 i = (int)STRLEN(spin->si_midword); | |
8031 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ | |
8032 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); | |
8033 /* <midword> */ | |
8034 } | |
8035 | |
8036 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ | |
8037 if (spin->si_prefcond.ga_len > 0) | |
8038 { | |
8039 putc(SN_PREFCOND, fd); /* <sectionID> */ | |
8040 putc(SNF_REQUIRED, fd); /* <sectionflags> */ | |
8041 | |
8042 l = write_spell_prefcond(NULL, &spin->si_prefcond); | |
8043 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
8044 | |
8045 write_spell_prefcond(fd, &spin->si_prefcond); | |
8046 } | |
8047 | |
8048 /* SN_REP: <repcount> <rep> ... | |
8049 * SN_SAL: <salflags> <salcount> <sal> ... | |
8050 * SN_REPSAL: <repcount> <rep> ... */ | |
8051 | |
8052 /* round 1: SN_REP section | |
8053 * round 2: SN_SAL section (unless SN_SOFO is used) | |
8054 * round 3: SN_REPSAL section */ | |
8055 for (round = 1; round <= 3; ++round) | |
8056 { | |
8057 if (round == 1) | |
8058 gap = &spin->si_rep; | |
8059 else if (round == 2) | |
8060 { | |
8061 /* Don't write SN_SAL when using a SN_SOFO section */ | |
8062 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) | |
8063 continue; | |
8064 gap = &spin->si_sal; | |
8065 } | |
8066 else | |
8067 gap = &spin->si_repsal; | |
8068 | |
8069 /* Don't write the section if there are no items. */ | |
8070 if (gap->ga_len == 0) | |
8071 continue; | |
8072 | |
8073 /* Sort the REP/REPSAL items. */ | |
8074 if (round != 2) | |
8075 qsort(gap->ga_data, (size_t)gap->ga_len, | |
8076 sizeof(fromto_T), rep_compare); | |
8077 | |
8078 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); | |
8079 putc(i, fd); /* <sectionID> */ | |
8080 | |
8081 /* This is for making suggestions, section is not required. */ | |
8082 putc(0, fd); /* <sectionflags> */ | |
8083 | |
8084 /* Compute the length of what follows. */ | |
8085 l = 2; /* count <repcount> or <salcount> */ | |
8086 for (i = 0; i < gap->ga_len; ++i) | |
8087 { | |
8088 ftp = &((fromto_T *)gap->ga_data)[i]; | |
8089 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ | |
8090 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ | |
8091 } | |
8092 if (round == 2) | |
8093 ++l; /* count <salflags> */ | |
8094 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
8095 | |
8096 if (round == 2) | |
8097 { | |
8098 i = 0; | |
8099 if (spin->si_followup) | |
8100 i |= SAL_F0LLOWUP; | |
8101 if (spin->si_collapse) | |
8102 i |= SAL_COLLAPSE; | |
8103 if (spin->si_rem_accents) | |
8104 i |= SAL_REM_ACCENTS; | |
8105 putc(i, fd); /* <salflags> */ | |
8106 } | |
8107 | |
8108 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ | |
8109 for (i = 0; i < gap->ga_len; ++i) | |
8110 { | |
8111 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | |
8112 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | |
8113 ftp = &((fromto_T *)gap->ga_data)[i]; | |
8114 for (rr = 1; rr <= 2; ++rr) | |
8115 { | |
8116 p = rr == 1 ? ftp->ft_from : ftp->ft_to; | |
8117 l = (int)STRLEN(p); | |
8118 putc(l, fd); | |
8119 if (l > 0) | |
8120 fwv &= fwrite(p, l, (size_t)1, fd); | |
8121 } | |
8122 } | |
8123 | |
8124 } | |
8125 | |
8126 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> | |
8127 * This is for making suggestions, section is not required. */ | |
8128 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) | |
8129 { | |
8130 putc(SN_SOFO, fd); /* <sectionID> */ | |
8131 putc(0, fd); /* <sectionflags> */ | |
8132 | |
8133 l = (int)STRLEN(spin->si_sofofr); | |
8134 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); | |
8135 /* <sectionlen> */ | |
8136 | |
8137 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ | |
8138 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ | |
8139 | |
8140 l = (int)STRLEN(spin->si_sofoto); | |
8141 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ | |
8142 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ | |
8143 } | |
8144 | |
8145 /* SN_WORDS: <word> ... | |
8146 * This is for making suggestions, section is not required. */ | |
8147 if (spin->si_commonwords.ht_used > 0) | |
8148 { | |
8149 putc(SN_WORDS, fd); /* <sectionID> */ | |
8150 putc(0, fd); /* <sectionflags> */ | |
8151 | |
8152 /* round 1: count the bytes | |
8153 * round 2: write the bytes */ | |
8154 for (round = 1; round <= 2; ++round) | |
8155 { | |
8156 int todo; | |
8157 int len = 0; | |
8158 hashitem_T *hi; | |
8159 | |
8160 todo = (int)spin->si_commonwords.ht_used; | |
8161 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) | |
8162 if (!HASHITEM_EMPTY(hi)) | |
8163 { | |
8164 l = (int)STRLEN(hi->hi_key) + 1; | |
8165 len += l; | |
8166 if (round == 2) /* <word> */ | |
8167 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); | |
8168 --todo; | |
8169 } | |
8170 if (round == 1) | |
8171 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ | |
8172 } | |
8173 } | |
8174 | |
8175 /* SN_MAP: <mapstr> | |
8176 * This is for making suggestions, section is not required. */ | |
8177 if (spin->si_map.ga_len > 0) | |
8178 { | |
8179 putc(SN_MAP, fd); /* <sectionID> */ | |
8180 putc(0, fd); /* <sectionflags> */ | |
8181 l = spin->si_map.ga_len; | |
8182 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
8183 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); | |
8184 /* <mapstr> */ | |
8185 } | |
8186 | |
8187 /* SN_SUGFILE: <timestamp> | |
8188 * This is used to notify that a .sug file may be available and at the | |
8189 * same time allows for checking that a .sug file that is found matches | |
8190 * with this .spl file. That's because the word numbers must be exactly | |
8191 * right. */ | |
8192 if (!spin->si_nosugfile | |
8193 && (spin->si_sal.ga_len > 0 | |
8194 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) | |
8195 { | |
8196 putc(SN_SUGFILE, fd); /* <sectionID> */ | |
8197 putc(0, fd); /* <sectionflags> */ | |
8198 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ | |
8199 | |
8200 /* Set si_sugtime and write it to the file. */ | |
8201 spin->si_sugtime = time(NULL); | |
8202 put_time(fd, spin->si_sugtime); /* <timestamp> */ | |
8203 } | |
8204 | |
8205 /* SN_NOSPLITSUGS: nothing | |
8206 * This is used to notify that no suggestions with word splits are to be | |
8207 * made. */ | |
8208 if (spin->si_nosplitsugs) | |
8209 { | |
8210 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ | |
8211 putc(0, fd); /* <sectionflags> */ | |
8212 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
8213 } | |
8214 | |
8215 /* SN_NOCOMPUNDSUGS: nothing | |
8216 * This is used to notify that no suggestions with compounds are to be | |
8217 * made. */ | |
8218 if (spin->si_nocompoundsugs) | |
8219 { | |
8220 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ | |
8221 putc(0, fd); /* <sectionflags> */ | |
8222 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
8223 } | |
8224 | |
8225 /* SN_COMPOUND: compound info. | |
8226 * We don't mark it required, when not supported all compound words will | |
8227 * be bad words. */ | |
8228 if (spin->si_compflags != NULL) | |
8229 { | |
8230 putc(SN_COMPOUND, fd); /* <sectionID> */ | |
8231 putc(0, fd); /* <sectionflags> */ | |
8232 | |
8233 l = (int)STRLEN(spin->si_compflags); | |
8234 for (i = 0; i < spin->si_comppat.ga_len; ++i) | |
8235 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; | |
8236 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ | |
8237 | |
8238 putc(spin->si_compmax, fd); /* <compmax> */ | |
8239 putc(spin->si_compminlen, fd); /* <compminlen> */ | |
8240 putc(spin->si_compsylmax, fd); /* <compsylmax> */ | |
8241 putc(0, fd); /* for Vim 7.0b compatibility */ | |
8242 putc(spin->si_compoptions, fd); /* <compoptions> */ | |
8243 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); | |
8244 /* <comppatcount> */ | |
8245 for (i = 0; i < spin->si_comppat.ga_len; ++i) | |
8246 { | |
8247 p = ((char_u **)(spin->si_comppat.ga_data))[i]; | |
8248 putc((int)STRLEN(p), fd); /* <comppatlen> */ | |
8249 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd); | |
8250 /* <comppattext> */ | |
8251 } | |
8252 /* <compflags> */ | |
8253 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), | |
8254 (size_t)1, fd); | |
8255 } | |
8256 | |
8257 /* SN_NOBREAK: NOBREAK flag */ | |
8258 if (spin->si_nobreak) | |
8259 { | |
8260 putc(SN_NOBREAK, fd); /* <sectionID> */ | |
8261 putc(0, fd); /* <sectionflags> */ | |
8262 | |
8263 /* It's empty, the presence of the section flags the feature. */ | |
8264 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
8265 } | |
8266 | |
8267 /* SN_SYLLABLE: syllable info. | |
8268 * We don't mark it required, when not supported syllables will not be | |
8269 * counted. */ | |
8270 if (spin->si_syllable != NULL) | |
8271 { | |
8272 putc(SN_SYLLABLE, fd); /* <sectionID> */ | |
8273 putc(0, fd); /* <sectionflags> */ | |
8274 | |
8275 l = (int)STRLEN(spin->si_syllable); | |
8276 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | |
8277 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); | |
8278 /* <syllable> */ | |
8279 } | |
8280 | |
8281 /* end of <SECTIONS> */ | |
8282 putc(SN_END, fd); /* <sectionend> */ | |
8283 | |
8284 | |
8285 /* | |
8286 * <LWORDTREE> <KWORDTREE> <PREFIXTREE> | |
8287 */ | |
8288 spin->si_memtot = 0; | |
8289 for (round = 1; round <= 3; ++round) | |
8290 { | |
8291 if (round == 1) | |
8292 tree = spin->si_foldroot->wn_sibling; | |
8293 else if (round == 2) | |
8294 tree = spin->si_keeproot->wn_sibling; | |
8295 else | |
8296 tree = spin->si_prefroot->wn_sibling; | |
8297 | |
8298 /* Clear the index and wnode fields in the tree. */ | |
8299 clear_node(tree); | |
8300 | |
8301 /* Count the number of nodes. Needed to be able to allocate the | |
8302 * memory when reading the nodes. Also fills in index for shared | |
8303 * nodes. */ | |
8304 nodecount = put_node(NULL, tree, 0, regionmask, round == 3); | |
8305 | |
8306 /* number of nodes in 4 bytes */ | |
8307 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | |
8308 spin->si_memtot += nodecount + nodecount * sizeof(int); | |
8309 | |
8310 /* Write the nodes. */ | |
8311 (void)put_node(fd, tree, 0, regionmask, round == 3); | |
8312 } | |
8313 | |
8314 /* Write another byte to check for errors (file system full). */ | |
8315 if (putc(0, fd) == EOF) | |
8316 retval = FAIL; | |
8317 theend: | |
8318 if (fclose(fd) == EOF) | |
8319 retval = FAIL; | |
8320 | |
8321 if (fwv != (size_t)1) | |
8322 retval = FAIL; | |
8323 if (retval == FAIL) | |
8324 EMSG(_(e_write)); | |
8325 | |
8326 return retval; | |
8327 } | |
8328 | |
8329 /* | |
8330 * Clear the index and wnode fields of "node", it siblings and its | |
8331 * children. This is needed because they are a union with other items to save | |
8332 * space. | |
8333 */ | |
8334 static void | |
8335 clear_node(wordnode_T *node) | |
8336 { | |
8337 wordnode_T *np; | |
8338 | |
8339 if (node != NULL) | |
8340 for (np = node; np != NULL; np = np->wn_sibling) | |
8341 { | |
8342 np->wn_u1.index = 0; | |
8343 np->wn_u2.wnode = NULL; | |
8344 | |
8345 if (np->wn_byte != NUL) | |
8346 clear_node(np->wn_child); | |
8347 } | |
8348 } | |
8349 | |
8350 | |
8351 /* | |
8352 * Dump a word tree at node "node". | |
8353 * | |
8354 * This first writes the list of possible bytes (siblings). Then for each | |
8355 * byte recursively write the children. | |
8356 * | |
8357 * NOTE: The code here must match the code in read_tree_node(), since | |
8358 * assumptions are made about the indexes (so that we don't have to write them | |
8359 * in the file). | |
8360 * | |
8361 * Returns the number of nodes used. | |
8362 */ | |
8363 static int | |
8364 put_node( | |
8365 FILE *fd, /* NULL when only counting */ | |
8366 wordnode_T *node, | |
8367 int idx, | |
8368 int regionmask, | |
8369 int prefixtree) /* TRUE for PREFIXTREE */ | |
8370 { | |
8371 int newindex = idx; | |
8372 int siblingcount = 0; | |
8373 wordnode_T *np; | |
8374 int flags; | |
8375 | |
8376 /* If "node" is zero the tree is empty. */ | |
8377 if (node == NULL) | |
8378 return 0; | |
8379 | |
8380 /* Store the index where this node is written. */ | |
8381 node->wn_u1.index = idx; | |
8382 | |
8383 /* Count the number of siblings. */ | |
8384 for (np = node; np != NULL; np = np->wn_sibling) | |
8385 ++siblingcount; | |
8386 | |
8387 /* Write the sibling count. */ | |
8388 if (fd != NULL) | |
8389 putc(siblingcount, fd); /* <siblingcount> */ | |
8390 | |
8391 /* Write each sibling byte and optionally extra info. */ | |
8392 for (np = node; np != NULL; np = np->wn_sibling) | |
8393 { | |
8394 if (np->wn_byte == 0) | |
8395 { | |
8396 if (fd != NULL) | |
8397 { | |
8398 /* For a NUL byte (end of word) write the flags etc. */ | |
8399 if (prefixtree) | |
8400 { | |
8401 /* In PREFIXTREE write the required affixID and the | |
8402 * associated condition nr (stored in wn_region). The | |
8403 * byte value is misused to store the "rare" and "not | |
8404 * combining" flags */ | |
8405 if (np->wn_flags == (short_u)PFX_FLAGS) | |
8406 putc(BY_NOFLAGS, fd); /* <byte> */ | |
8407 else | |
8408 { | |
8409 putc(BY_FLAGS, fd); /* <byte> */ | |
8410 putc(np->wn_flags, fd); /* <pflags> */ | |
8411 } | |
8412 putc(np->wn_affixID, fd); /* <affixID> */ | |
8413 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ | |
8414 } | |
8415 else | |
8416 { | |
8417 /* For word trees we write the flag/region items. */ | |
8418 flags = np->wn_flags; | |
8419 if (regionmask != 0 && np->wn_region != regionmask) | |
8420 flags |= WF_REGION; | |
8421 if (np->wn_affixID != 0) | |
8422 flags |= WF_AFX; | |
8423 if (flags == 0) | |
8424 { | |
8425 /* word without flags or region */ | |
8426 putc(BY_NOFLAGS, fd); /* <byte> */ | |
8427 } | |
8428 else | |
8429 { | |
8430 if (np->wn_flags >= 0x100) | |
8431 { | |
8432 putc(BY_FLAGS2, fd); /* <byte> */ | |
8433 putc(flags, fd); /* <flags> */ | |
8434 putc((unsigned)flags >> 8, fd); /* <flags2> */ | |
8435 } | |
8436 else | |
8437 { | |
8438 putc(BY_FLAGS, fd); /* <byte> */ | |
8439 putc(flags, fd); /* <flags> */ | |
8440 } | |
8441 if (flags & WF_REGION) | |
8442 putc(np->wn_region, fd); /* <region> */ | |
8443 if (flags & WF_AFX) | |
8444 putc(np->wn_affixID, fd); /* <affixID> */ | |
8445 } | |
8446 } | |
8447 } | |
8448 } | |
8449 else | |
8450 { | |
8451 if (np->wn_child->wn_u1.index != 0 | |
8452 && np->wn_child->wn_u2.wnode != node) | |
8453 { | |
8454 /* The child is written elsewhere, write the reference. */ | |
8455 if (fd != NULL) | |
8456 { | |
8457 putc(BY_INDEX, fd); /* <byte> */ | |
8458 /* <nodeidx> */ | |
8459 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); | |
8460 } | |
8461 } | |
8462 else if (np->wn_child->wn_u2.wnode == NULL) | |
8463 /* We will write the child below and give it an index. */ | |
8464 np->wn_child->wn_u2.wnode = node; | |
8465 | |
8466 if (fd != NULL) | |
8467 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ | |
8468 { | |
8469 EMSG(_(e_write)); | |
8470 return 0; | |
8471 } | |
8472 } | |
8473 } | |
8474 | |
8475 /* Space used in the array when reading: one for each sibling and one for | |
8476 * the count. */ | |
8477 newindex += siblingcount + 1; | |
8478 | |
8479 /* Recursively dump the children of each sibling. */ | |
8480 for (np = node; np != NULL; np = np->wn_sibling) | |
8481 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) | |
8482 newindex = put_node(fd, np->wn_child, newindex, regionmask, | |
8483 prefixtree); | |
8484 | |
8485 return newindex; | |
8486 } | |
8487 | |
8488 | |
8489 /* | |
8490 * ":mkspell [-ascii] outfile infile ..." | |
8491 * ":mkspell [-ascii] addfile" | |
8492 */ | |
8493 void | |
8494 ex_mkspell(exarg_T *eap) | |
8495 { | |
8496 int fcount; | |
8497 char_u **fnames; | |
8498 char_u *arg = eap->arg; | |
8499 int ascii = FALSE; | |
8500 | |
8501 if (STRNCMP(arg, "-ascii", 6) == 0) | |
8502 { | |
8503 ascii = TRUE; | |
8504 arg = skipwhite(arg + 6); | |
8505 } | |
8506 | |
8507 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ | |
8508 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK) | |
8509 { | |
8510 mkspell(fcount, fnames, ascii, eap->forceit, FALSE); | |
8511 FreeWild(fcount, fnames); | |
8512 } | |
8513 } | |
8514 | |
8515 /* | |
8516 * Create the .sug file. | |
8517 * Uses the soundfold info in "spin". | |
8518 * Writes the file with the name "wfname", with ".spl" changed to ".sug". | |
8519 */ | |
8520 static void | |
8521 spell_make_sugfile(spellinfo_T *spin, char_u *wfname) | |
8522 { | |
8523 char_u *fname = NULL; | |
8524 int len; | |
8525 slang_T *slang; | |
8526 int free_slang = FALSE; | |
8527 | |
8528 /* | |
8529 * Read back the .spl file that was written. This fills the required | |
8530 * info for soundfolding. This also uses less memory than the | |
8531 * pointer-linked version of the trie. And it avoids having two versions | |
8532 * of the code for the soundfolding stuff. | |
8533 * It might have been done already by spell_reload_one(). | |
8534 */ | |
8535 for (slang = first_lang; slang != NULL; slang = slang->sl_next) | |
8536 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) | |
8537 break; | |
8538 if (slang == NULL) | |
8539 { | |
8540 spell_message(spin, (char_u *)_("Reading back spell file...")); | |
8541 slang = spell_load_file(wfname, NULL, NULL, FALSE); | |
8542 if (slang == NULL) | |
8543 return; | |
8544 free_slang = TRUE; | |
8545 } | |
8546 | |
8547 /* | |
8548 * Clear the info in "spin" that is used. | |
8549 */ | |
8550 spin->si_blocks = NULL; | |
8551 spin->si_blocks_cnt = 0; | |
8552 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ | |
8553 spin->si_free_count = 0; | |
8554 spin->si_first_free = NULL; | |
8555 spin->si_foldwcount = 0; | |
8556 | |
8557 /* | |
8558 * Go through the trie of good words, soundfold each word and add it to | |
8559 * the soundfold trie. | |
8560 */ | |
8561 spell_message(spin, (char_u *)_("Performing soundfolding...")); | |
8562 if (sug_filltree(spin, slang) == FAIL) | |
8563 goto theend; | |
8564 | |
8565 /* | |
8566 * Create the table which links each soundfold word with a list of the | |
8567 * good words it may come from. Creates buffer "spin->si_spellbuf". | |
8568 * This also removes the wordnr from the NUL byte entries to make | |
8569 * compression possible. | |
8570 */ | |
8571 if (sug_maketable(spin) == FAIL) | |
8572 goto theend; | |
8573 | |
8574 smsg((char_u *)_("Number of words after soundfolding: %ld"), | |
8575 (long)spin->si_spellbuf->b_ml.ml_line_count); | |
8576 | |
8577 /* | |
8578 * Compress the soundfold trie. | |
8579 */ | |
8580 spell_message(spin, (char_u *)_(msg_compressing)); | |
8581 wordtree_compress(spin, spin->si_foldroot); | |
8582 | |
8583 /* | |
8584 * Write the .sug file. | |
8585 * Make the file name by changing ".spl" to ".sug". | |
8586 */ | |
8587 fname = alloc(MAXPATHL); | |
8588 if (fname == NULL) | |
8589 goto theend; | |
8590 vim_strncpy(fname, wfname, MAXPATHL - 1); | |
8591 len = (int)STRLEN(fname); | |
8592 fname[len - 2] = 'u'; | |
8593 fname[len - 1] = 'g'; | |
8594 sug_write(spin, fname); | |
8595 | |
8596 theend: | |
8597 vim_free(fname); | |
8598 if (free_slang) | |
8599 slang_free(slang); | |
8600 free_blocks(spin->si_blocks); | |
8601 close_spellbuf(spin->si_spellbuf); | |
8602 } | |
8603 | |
8604 /* | |
8605 * Build the soundfold trie for language "slang". | |
8606 */ | |
8607 static int | |
8608 sug_filltree(spellinfo_T *spin, slang_T *slang) | |
8609 { | |
8610 char_u *byts; | |
8611 idx_T *idxs; | |
8612 int depth; | |
8613 idx_T arridx[MAXWLEN]; | |
8614 int curi[MAXWLEN]; | |
8615 char_u tword[MAXWLEN]; | |
8616 char_u tsalword[MAXWLEN]; | |
8617 int c; | |
8618 idx_T n; | |
8619 unsigned words_done = 0; | |
8620 int wordcount[MAXWLEN]; | |
8621 | |
8622 /* We use si_foldroot for the soundfolded trie. */ | |
8623 spin->si_foldroot = wordtree_alloc(spin); | |
8624 if (spin->si_foldroot == NULL) | |
8625 return FAIL; | |
8626 | |
8627 /* let tree_add_word() know we're adding to the soundfolded tree */ | |
8628 spin->si_sugtree = TRUE; | |
8629 | |
8630 /* | |
8631 * Go through the whole case-folded tree, soundfold each word and put it | |
8632 * in the trie. | |
8633 */ | |
8634 byts = slang->sl_fbyts; | |
8635 idxs = slang->sl_fidxs; | |
8636 | |
8637 arridx[0] = 0; | |
8638 curi[0] = 1; | |
8639 wordcount[0] = 0; | |
8640 | |
8641 depth = 0; | |
8642 while (depth >= 0 && !got_int) | |
8643 { | |
8644 if (curi[depth] > byts[arridx[depth]]) | |
8645 { | |
8646 /* Done all bytes at this node, go up one level. */ | |
8647 idxs[arridx[depth]] = wordcount[depth]; | |
8648 if (depth > 0) | |
8649 wordcount[depth - 1] += wordcount[depth]; | |
8650 | |
8651 --depth; | |
8652 line_breakcheck(); | |
8653 } | |
8654 else | |
8655 { | |
8656 | |
8657 /* Do one more byte at this node. */ | |
8658 n = arridx[depth] + curi[depth]; | |
8659 ++curi[depth]; | |
8660 | |
8661 c = byts[n]; | |
8662 if (c == 0) | |
8663 { | |
8664 /* Sound-fold the word. */ | |
8665 tword[depth] = NUL; | |
8666 spell_soundfold(slang, tword, TRUE, tsalword); | |
8667 | |
8668 /* We use the "flags" field for the MSB of the wordnr, | |
8669 * "region" for the LSB of the wordnr. */ | |
8670 if (tree_add_word(spin, tsalword, spin->si_foldroot, | |
8671 words_done >> 16, words_done & 0xffff, | |
8672 0) == FAIL) | |
8673 return FAIL; | |
8674 | |
8675 ++words_done; | |
8676 ++wordcount[depth]; | |
8677 | |
8678 /* Reset the block count each time to avoid compression | |
8679 * kicking in. */ | |
8680 spin->si_blocks_cnt = 0; | |
8681 | |
8682 /* Skip over any other NUL bytes (same word with different | |
8683 * flags). */ | |
8684 while (byts[n + 1] == 0) | |
8685 { | |
8686 ++n; | |
8687 ++curi[depth]; | |
8688 } | |
8689 } | |
8690 else | |
8691 { | |
8692 /* Normal char, go one level deeper. */ | |
8693 tword[depth++] = c; | |
8694 arridx[depth] = idxs[n]; | |
8695 curi[depth] = 1; | |
8696 wordcount[depth] = 0; | |
8697 } | |
8698 } | |
8699 } | |
8700 | |
8701 smsg((char_u *)_("Total number of words: %d"), words_done); | |
8702 | |
8703 return OK; | |
8704 } | |
8705 | |
8706 /* | |
8707 * Make the table that links each word in the soundfold trie to the words it | |
8708 * can be produced from. | |
8709 * This is not unlike lines in a file, thus use a memfile to be able to access | |
8710 * the table efficiently. | |
8711 * Returns FAIL when out of memory. | |
8712 */ | |
8713 static int | |
8714 sug_maketable(spellinfo_T *spin) | |
8715 { | |
8716 garray_T ga; | |
8717 int res = OK; | |
8718 | |
8719 /* Allocate a buffer, open a memline for it and create the swap file | |
8720 * (uses a temp file, not a .swp file). */ | |
8721 spin->si_spellbuf = open_spellbuf(); | |
8722 if (spin->si_spellbuf == NULL) | |
8723 return FAIL; | |
8724 | |
8725 /* Use a buffer to store the line info, avoids allocating many small | |
8726 * pieces of memory. */ | |
8727 ga_init2(&ga, 1, 100); | |
8728 | |
8729 /* recursively go through the tree */ | |
8730 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) | |
8731 res = FAIL; | |
8732 | |
8733 ga_clear(&ga); | |
8734 return res; | |
8735 } | |
8736 | |
8737 /* | |
8738 * Fill the table for one node and its children. | |
8739 * Returns the wordnr at the start of the node. | |
8740 * Returns -1 when out of memory. | |
8741 */ | |
8742 static int | |
8743 sug_filltable( | |
8744 spellinfo_T *spin, | |
8745 wordnode_T *node, | |
8746 int startwordnr, | |
8747 garray_T *gap) /* place to store line of numbers */ | |
8748 { | |
8749 wordnode_T *p, *np; | |
8750 int wordnr = startwordnr; | |
8751 int nr; | |
8752 int prev_nr; | |
8753 | |
8754 for (p = node; p != NULL; p = p->wn_sibling) | |
8755 { | |
8756 if (p->wn_byte == NUL) | |
8757 { | |
8758 gap->ga_len = 0; | |
8759 prev_nr = 0; | |
8760 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) | |
8761 { | |
8762 if (ga_grow(gap, 10) == FAIL) | |
8763 return -1; | |
8764 | |
8765 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); | |
8766 /* Compute the offset from the previous nr and store the | |
8767 * offset in a way that it takes a minimum number of bytes. | |
8768 * It's a bit like utf-8, but without the need to mark | |
8769 * following bytes. */ | |
8770 nr -= prev_nr; | |
8771 prev_nr += nr; | |
8772 gap->ga_len += offset2bytes(nr, | |
8773 (char_u *)gap->ga_data + gap->ga_len); | |
8774 } | |
8775 | |
8776 /* add the NUL byte */ | |
8777 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; | |
8778 | |
8779 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, | |
8780 gap->ga_data, gap->ga_len, TRUE) == FAIL) | |
8781 return -1; | |
8782 ++wordnr; | |
8783 | |
8784 /* Remove extra NUL entries, we no longer need them. We don't | |
8785 * bother freeing the nodes, the won't be reused anyway. */ | |
8786 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) | |
8787 p->wn_sibling = p->wn_sibling->wn_sibling; | |
8788 | |
8789 /* Clear the flags on the remaining NUL node, so that compression | |
8790 * works a lot better. */ | |
8791 p->wn_flags = 0; | |
8792 p->wn_region = 0; | |
8793 } | |
8794 else | |
8795 { | |
8796 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); | |
8797 if (wordnr == -1) | |
8798 return -1; | |
8799 } | |
8800 } | |
8801 return wordnr; | |
8802 } | |
8803 | |
8804 /* | |
8805 * Convert an offset into a minimal number of bytes. | |
8806 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL | |
8807 * bytes. | |
8808 */ | |
8809 static int | |
8810 offset2bytes(int nr, char_u *buf) | |
8811 { | |
8812 int rem; | |
8813 int b1, b2, b3, b4; | |
8814 | |
8815 /* Split the number in parts of base 255. We need to avoid NUL bytes. */ | |
8816 b1 = nr % 255 + 1; | |
8817 rem = nr / 255; | |
8818 b2 = rem % 255 + 1; | |
8819 rem = rem / 255; | |
8820 b3 = rem % 255 + 1; | |
8821 b4 = rem / 255 + 1; | |
8822 | |
8823 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ | |
8824 { | |
8825 buf[0] = 0xe0 + b4; | |
8826 buf[1] = b3; | |
8827 buf[2] = b2; | |
8828 buf[3] = b1; | |
8829 return 4; | |
8830 } | |
8831 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ | |
8832 { | |
8833 buf[0] = 0xc0 + b3; | |
8834 buf[1] = b2; | |
8835 buf[2] = b1; | |
8836 return 3; | |
8837 } | |
8838 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ | |
8839 { | |
8840 buf[0] = 0x80 + b2; | |
8841 buf[1] = b1; | |
8842 return 2; | |
8843 } | |
8844 /* 1 byte */ | |
8845 buf[0] = b1; | |
8846 return 1; | |
8847 } | |
8848 | |
8849 /* | |
8850 * Opposite of offset2bytes(). | 2933 * Opposite of offset2bytes(). |
8851 * "pp" points to the bytes and is advanced over it. | 2934 * "pp" points to the bytes and is advanced over it. |
8852 * Returns the offset. | 2935 * Returns the offset. |
8853 */ | 2936 */ |
8854 static int | 2937 static int |
8884 | 2967 |
8885 *pp = p; | 2968 *pp = p; |
8886 return nr; | 2969 return nr; |
8887 } | 2970 } |
8888 | 2971 |
8889 /* | |
8890 * Write the .sug file in "fname". | |
8891 */ | |
8892 static void | |
8893 sug_write(spellinfo_T *spin, char_u *fname) | |
8894 { | |
8895 FILE *fd; | |
8896 wordnode_T *tree; | |
8897 int nodecount; | |
8898 int wcount; | |
8899 char_u *line; | |
8900 linenr_T lnum; | |
8901 int len; | |
8902 | |
8903 /* Create the file. Note that an existing file is silently overwritten! */ | |
8904 fd = mch_fopen((char *)fname, "w"); | |
8905 if (fd == NULL) | |
8906 { | |
8907 EMSG2(_(e_notopen), fname); | |
8908 return; | |
8909 } | |
8910 | |
8911 vim_snprintf((char *)IObuff, IOSIZE, | |
8912 _("Writing suggestion file %s ..."), fname); | |
8913 spell_message(spin, IObuff); | |
8914 | |
8915 /* | |
8916 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
8917 */ | |
8918 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ | |
8919 { | |
8920 EMSG(_(e_write)); | |
8921 goto theend; | |
8922 } | |
8923 putc(VIMSUGVERSION, fd); /* <versionnr> */ | |
8924 | |
8925 /* Write si_sugtime to the file. */ | |
8926 put_time(fd, spin->si_sugtime); /* <timestamp> */ | |
8927 | |
8928 /* | |
8929 * <SUGWORDTREE> | |
8930 */ | |
8931 spin->si_memtot = 0; | |
8932 tree = spin->si_foldroot->wn_sibling; | |
8933 | |
8934 /* Clear the index and wnode fields in the tree. */ | |
8935 clear_node(tree); | |
8936 | |
8937 /* Count the number of nodes. Needed to be able to allocate the | |
8938 * memory when reading the nodes. Also fills in index for shared | |
8939 * nodes. */ | |
8940 nodecount = put_node(NULL, tree, 0, 0, FALSE); | |
8941 | |
8942 /* number of nodes in 4 bytes */ | |
8943 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | |
8944 spin->si_memtot += nodecount + nodecount * sizeof(int); | |
8945 | |
8946 /* Write the nodes. */ | |
8947 (void)put_node(fd, tree, 0, 0, FALSE); | |
8948 | |
8949 /* | |
8950 * <SUGTABLE>: <sugwcount> <sugline> ... | |
8951 */ | |
8952 wcount = spin->si_spellbuf->b_ml.ml_line_count; | |
8953 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ | |
8954 | |
8955 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) | |
8956 { | |
8957 /* <sugline>: <sugnr> ... NUL */ | |
8958 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); | |
8959 len = (int)STRLEN(line) + 1; | |
8960 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) | |
8961 { | |
8962 EMSG(_(e_write)); | |
8963 goto theend; | |
8964 } | |
8965 spin->si_memtot += len; | |
8966 } | |
8967 | |
8968 /* Write another byte to check for errors. */ | |
8969 if (putc(0, fd) == EOF) | |
8970 EMSG(_(e_write)); | |
8971 | |
8972 vim_snprintf((char *)IObuff, IOSIZE, | |
8973 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); | |
8974 spell_message(spin, IObuff); | |
8975 | |
8976 theend: | |
8977 /* close the file */ | |
8978 fclose(fd); | |
8979 } | |
8980 | 2972 |
8981 /* | 2973 /* |
8982 * Open a spell buffer. This is a nameless buffer that is not in the buffer | 2974 * Open a spell buffer. This is a nameless buffer that is not in the buffer |
8983 * list and only contains text lines. Can use a swapfile to reduce memory | 2975 * list and only contains text lines. Can use a swapfile to reduce memory |
8984 * use. | 2976 * use. |
8985 * Most other fields are invalid! Esp. watch out for string options being | 2977 * Most other fields are invalid! Esp. watch out for string options being |
8986 * NULL and there is no undo info. | 2978 * NULL and there is no undo info. |
8987 * Returns NULL when out of memory. | 2979 * Returns NULL when out of memory. |
8988 */ | 2980 */ |
8989 static buf_T * | 2981 buf_T * |
8990 open_spellbuf(void) | 2982 open_spellbuf(void) |
8991 { | 2983 { |
8992 buf_T *buf; | 2984 buf_T *buf; |
8993 | 2985 |
8994 buf = (buf_T *)alloc_clear(sizeof(buf_T)); | 2986 buf = (buf_T *)alloc_clear(sizeof(buf_T)); |
9006 } | 2998 } |
9007 | 2999 |
9008 /* | 3000 /* |
9009 * Close the buffer used for spell info. | 3001 * Close the buffer used for spell info. |
9010 */ | 3002 */ |
9011 static void | 3003 void |
9012 close_spellbuf(buf_T *buf) | 3004 close_spellbuf(buf_T *buf) |
9013 { | 3005 { |
9014 if (buf != NULL) | 3006 if (buf != NULL) |
9015 { | 3007 { |
9016 ml_close(buf, TRUE); | 3008 ml_close(buf, TRUE); |
9017 vim_free(buf); | 3009 vim_free(buf); |
9018 } | 3010 } |
9019 } | 3011 } |
9020 | 3012 |
9021 | |
9022 /* | |
9023 * Create a Vim spell file from one or more word lists. | |
9024 * "fnames[0]" is the output file name. | |
9025 * "fnames[fcount - 1]" is the last input file name. | |
9026 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name | |
9027 * and ".spl" is appended to make the output file name. | |
9028 */ | |
9029 static void | |
9030 mkspell( | |
9031 int fcount, | |
9032 char_u **fnames, | |
9033 int ascii, /* -ascii argument given */ | |
9034 int over_write, /* overwrite existing output file */ | |
9035 int added_word) /* invoked through "zg" */ | |
9036 { | |
9037 char_u *fname = NULL; | |
9038 char_u *wfname; | |
9039 char_u **innames; | |
9040 int incount; | |
9041 afffile_T *(afile[8]); | |
9042 int i; | |
9043 int len; | |
9044 stat_T st; | |
9045 int error = FALSE; | |
9046 spellinfo_T spin; | |
9047 | |
9048 vim_memset(&spin, 0, sizeof(spin)); | |
9049 spin.si_verbose = !added_word; | |
9050 spin.si_ascii = ascii; | |
9051 spin.si_followup = TRUE; | |
9052 spin.si_rem_accents = TRUE; | |
9053 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); | |
9054 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); | |
9055 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); | |
9056 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); | |
9057 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); | |
9058 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); | |
9059 hash_init(&spin.si_commonwords); | |
9060 spin.si_newcompID = 127; /* start compound ID at first maximum */ | |
9061 | |
9062 /* default: fnames[0] is output file, following are input files */ | |
9063 innames = &fnames[1]; | |
9064 incount = fcount - 1; | |
9065 | |
9066 wfname = alloc(MAXPATHL); | |
9067 if (wfname == NULL) | |
9068 return; | |
9069 | |
9070 if (fcount >= 1) | |
9071 { | |
9072 len = (int)STRLEN(fnames[0]); | |
9073 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) | |
9074 { | |
9075 /* For ":mkspell path/en.latin1.add" output file is | |
9076 * "path/en.latin1.add.spl". */ | |
9077 innames = &fnames[0]; | |
9078 incount = 1; | |
9079 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]); | |
9080 } | |
9081 else if (fcount == 1) | |
9082 { | |
9083 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ | |
9084 innames = &fnames[0]; | |
9085 incount = 1; | |
9086 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, | |
9087 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | |
9088 } | |
9089 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) | |
9090 { | |
9091 /* Name ends in ".spl", use as the file name. */ | |
9092 vim_strncpy(wfname, fnames[0], MAXPATHL - 1); | |
9093 } | |
9094 else | |
9095 /* Name should be language, make the file name from it. */ | |
9096 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL, | |
9097 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | |
9098 | |
9099 /* Check for .ascii.spl. */ | |
9100 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL) | |
9101 spin.si_ascii = TRUE; | |
9102 | |
9103 /* Check for .add.spl. */ | |
9104 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL) | |
9105 spin.si_add = TRUE; | |
9106 } | |
9107 | |
9108 if (incount <= 0) | |
9109 EMSG(_(e_invarg)); /* need at least output and input names */ | |
9110 else if (vim_strchr(gettail(wfname), '_') != NULL) | |
9111 EMSG(_("E751: Output file name must not have region name")); | |
9112 else if (incount > 8) | |
9113 EMSG(_("E754: Only up to 8 regions supported")); | |
9114 else | |
9115 { | |
9116 /* Check for overwriting before doing things that may take a lot of | |
9117 * time. */ | |
9118 if (!over_write && mch_stat((char *)wfname, &st) >= 0) | |
9119 { | |
9120 EMSG(_(e_exists)); | |
9121 goto theend; | |
9122 } | |
9123 if (mch_isdir(wfname)) | |
9124 { | |
9125 EMSG2(_(e_isadir2), wfname); | |
9126 goto theend; | |
9127 } | |
9128 | |
9129 fname = alloc(MAXPATHL); | |
9130 if (fname == NULL) | |
9131 goto theend; | |
9132 | |
9133 /* | |
9134 * Init the aff and dic pointers. | |
9135 * Get the region names if there are more than 2 arguments. | |
9136 */ | |
9137 for (i = 0; i < incount; ++i) | |
9138 { | |
9139 afile[i] = NULL; | |
9140 | |
9141 if (incount > 1) | |
9142 { | |
9143 len = (int)STRLEN(innames[i]); | |
9144 if (STRLEN(gettail(innames[i])) < 5 | |
9145 || innames[i][len - 3] != '_') | |
9146 { | |
9147 EMSG2(_("E755: Invalid region in %s"), innames[i]); | |
9148 goto theend; | |
9149 } | |
9150 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); | |
9151 spin.si_region_name[i * 2 + 1] = | |
9152 TOLOWER_ASC(innames[i][len - 1]); | |
9153 } | |
9154 } | |
9155 spin.si_region_count = incount; | |
9156 | |
9157 spin.si_foldroot = wordtree_alloc(&spin); | |
9158 spin.si_keeproot = wordtree_alloc(&spin); | |
9159 spin.si_prefroot = wordtree_alloc(&spin); | |
9160 if (spin.si_foldroot == NULL | |
9161 || spin.si_keeproot == NULL | |
9162 || spin.si_prefroot == NULL) | |
9163 { | |
9164 free_blocks(spin.si_blocks); | |
9165 goto theend; | |
9166 } | |
9167 | |
9168 /* When not producing a .add.spl file clear the character table when | |
9169 * we encounter one in the .aff file. This means we dump the current | |
9170 * one in the .spl file if the .aff file doesn't define one. That's | |
9171 * better than guessing the contents, the table will match a | |
9172 * previously loaded spell file. */ | |
9173 if (!spin.si_add) | |
9174 spin.si_clear_chartab = TRUE; | |
9175 | |
9176 /* | |
9177 * Read all the .aff and .dic files. | |
9178 * Text is converted to 'encoding'. | |
9179 * Words are stored in the case-folded and keep-case trees. | |
9180 */ | |
9181 for (i = 0; i < incount && !error; ++i) | |
9182 { | |
9183 spin.si_conv.vc_type = CONV_NONE; | |
9184 spin.si_region = 1 << i; | |
9185 | |
9186 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]); | |
9187 if (mch_stat((char *)fname, &st) >= 0) | |
9188 { | |
9189 /* Read the .aff file. Will init "spin->si_conv" based on the | |
9190 * "SET" line. */ | |
9191 afile[i] = spell_read_aff(&spin, fname); | |
9192 if (afile[i] == NULL) | |
9193 error = TRUE; | |
9194 else | |
9195 { | |
9196 /* Read the .dic file and store the words in the trees. */ | |
9197 vim_snprintf((char *)fname, MAXPATHL, "%s.dic", | |
9198 innames[i]); | |
9199 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) | |
9200 error = TRUE; | |
9201 } | |
9202 } | |
9203 else | |
9204 { | |
9205 /* No .aff file, try reading the file as a word list. Store | |
9206 * the words in the trees. */ | |
9207 if (spell_read_wordfile(&spin, innames[i]) == FAIL) | |
9208 error = TRUE; | |
9209 } | |
9210 | |
9211 #ifdef FEAT_MBYTE | |
9212 /* Free any conversion stuff. */ | |
9213 convert_setup(&spin.si_conv, NULL, NULL); | |
9214 #endif | |
9215 } | |
9216 | |
9217 if (spin.si_compflags != NULL && spin.si_nobreak) | |
9218 MSG(_("Warning: both compounding and NOBREAK specified")); | |
9219 | |
9220 if (!error && !got_int) | |
9221 { | |
9222 /* | |
9223 * Combine tails in the tree. | |
9224 */ | |
9225 spell_message(&spin, (char_u *)_(msg_compressing)); | |
9226 wordtree_compress(&spin, spin.si_foldroot); | |
9227 wordtree_compress(&spin, spin.si_keeproot); | |
9228 wordtree_compress(&spin, spin.si_prefroot); | |
9229 } | |
9230 | |
9231 if (!error && !got_int) | |
9232 { | |
9233 /* | |
9234 * Write the info in the spell file. | |
9235 */ | |
9236 vim_snprintf((char *)IObuff, IOSIZE, | |
9237 _("Writing spell file %s ..."), wfname); | |
9238 spell_message(&spin, IObuff); | |
9239 | |
9240 error = write_vim_spell(&spin, wfname) == FAIL; | |
9241 | |
9242 spell_message(&spin, (char_u *)_("Done!")); | |
9243 vim_snprintf((char *)IObuff, IOSIZE, | |
9244 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); | |
9245 spell_message(&spin, IObuff); | |
9246 | |
9247 /* | |
9248 * If the file is loaded need to reload it. | |
9249 */ | |
9250 if (!error) | |
9251 spell_reload_one(wfname, added_word); | |
9252 } | |
9253 | |
9254 /* Free the allocated memory. */ | |
9255 ga_clear(&spin.si_rep); | |
9256 ga_clear(&spin.si_repsal); | |
9257 ga_clear(&spin.si_sal); | |
9258 ga_clear(&spin.si_map); | |
9259 ga_clear(&spin.si_comppat); | |
9260 ga_clear(&spin.si_prefcond); | |
9261 hash_clear_all(&spin.si_commonwords, 0); | |
9262 | |
9263 /* Free the .aff file structures. */ | |
9264 for (i = 0; i < incount; ++i) | |
9265 if (afile[i] != NULL) | |
9266 spell_free_aff(afile[i]); | |
9267 | |
9268 /* Free all the bits and pieces at once. */ | |
9269 free_blocks(spin.si_blocks); | |
9270 | |
9271 /* | |
9272 * If there is soundfolding info and no NOSUGFILE item create the | |
9273 * .sug file with the soundfolded word trie. | |
9274 */ | |
9275 if (spin.si_sugtime != 0 && !error && !got_int) | |
9276 spell_make_sugfile(&spin, wfname); | |
9277 | |
9278 } | |
9279 | |
9280 theend: | |
9281 vim_free(fname); | |
9282 vim_free(wfname); | |
9283 } | |
9284 | |
9285 /* | |
9286 * Display a message for spell file processing when 'verbose' is set or using | |
9287 * ":mkspell". "str" can be IObuff. | |
9288 */ | |
9289 static void | |
9290 spell_message(spellinfo_T *spin, char_u *str) | |
9291 { | |
9292 if (spin->si_verbose || p_verbose > 2) | |
9293 { | |
9294 if (!spin->si_verbose) | |
9295 verbose_enter(); | |
9296 MSG(str); | |
9297 out_flush(); | |
9298 if (!spin->si_verbose) | |
9299 verbose_leave(); | |
9300 } | |
9301 } | |
9302 | |
9303 /* | |
9304 * ":[count]spellgood {word}" | |
9305 * ":[count]spellwrong {word}" | |
9306 * ":[count]spellundo {word}" | |
9307 */ | |
9308 void | |
9309 ex_spell(exarg_T *eap) | |
9310 { | |
9311 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, | |
9312 eap->forceit ? 0 : (int)eap->line2, | |
9313 eap->cmdidx == CMD_spellundo); | |
9314 } | |
9315 | |
9316 /* | |
9317 * Add "word[len]" to 'spellfile' as a good or bad word. | |
9318 */ | |
9319 void | |
9320 spell_add_word( | |
9321 char_u *word, | |
9322 int len, | |
9323 int bad, | |
9324 int idx, /* "zG" and "zW": zero, otherwise index in | |
9325 'spellfile' */ | |
9326 int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */ | |
9327 { | |
9328 FILE *fd = NULL; | |
9329 buf_T *buf = NULL; | |
9330 int new_spf = FALSE; | |
9331 char_u *fname; | |
9332 char_u *fnamebuf = NULL; | |
9333 char_u line[MAXWLEN * 2]; | |
9334 long fpos, fpos_next = 0; | |
9335 int i; | |
9336 char_u *spf; | |
9337 | |
9338 if (idx == 0) /* use internal wordlist */ | |
9339 { | |
9340 if (int_wordlist == NULL) | |
9341 { | |
9342 int_wordlist = vim_tempname('s', FALSE); | |
9343 if (int_wordlist == NULL) | |
9344 return; | |
9345 } | |
9346 fname = int_wordlist; | |
9347 } | |
9348 else | |
9349 { | |
9350 /* If 'spellfile' isn't set figure out a good default value. */ | |
9351 if (*curwin->w_s->b_p_spf == NUL) | |
9352 { | |
9353 init_spellfile(); | |
9354 new_spf = TRUE; | |
9355 } | |
9356 | |
9357 if (*curwin->w_s->b_p_spf == NUL) | |
9358 { | |
9359 EMSG2(_(e_notset), "spellfile"); | |
9360 return; | |
9361 } | |
9362 fnamebuf = alloc(MAXPATHL); | |
9363 if (fnamebuf == NULL) | |
9364 return; | |
9365 | |
9366 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i) | |
9367 { | |
9368 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); | |
9369 if (i == idx) | |
9370 break; | |
9371 if (*spf == NUL) | |
9372 { | |
9373 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx); | |
9374 vim_free(fnamebuf); | |
9375 return; | |
9376 } | |
9377 } | |
9378 | |
9379 /* Check that the user isn't editing the .add file somewhere. */ | |
9380 buf = buflist_findname_exp(fnamebuf); | |
9381 if (buf != NULL && buf->b_ml.ml_mfp == NULL) | |
9382 buf = NULL; | |
9383 if (buf != NULL && bufIsChanged(buf)) | |
9384 { | |
9385 EMSG(_(e_bufloaded)); | |
9386 vim_free(fnamebuf); | |
9387 return; | |
9388 } | |
9389 | |
9390 fname = fnamebuf; | |
9391 } | |
9392 | |
9393 if (bad || undo) | |
9394 { | |
9395 /* When the word appears as good word we need to remove that one, | |
9396 * since its flags sort before the one with WF_BANNED. */ | |
9397 fd = mch_fopen((char *)fname, "r"); | |
9398 if (fd != NULL) | |
9399 { | |
9400 while (!vim_fgets(line, MAXWLEN * 2, fd)) | |
9401 { | |
9402 fpos = fpos_next; | |
9403 fpos_next = ftell(fd); | |
9404 if (STRNCMP(word, line, len) == 0 | |
9405 && (line[len] == '/' || line[len] < ' ')) | |
9406 { | |
9407 /* Found duplicate word. Remove it by writing a '#' at | |
9408 * the start of the line. Mixing reading and writing | |
9409 * doesn't work for all systems, close the file first. */ | |
9410 fclose(fd); | |
9411 fd = mch_fopen((char *)fname, "r+"); | |
9412 if (fd == NULL) | |
9413 break; | |
9414 if (fseek(fd, fpos, SEEK_SET) == 0) | |
9415 { | |
9416 fputc('#', fd); | |
9417 if (undo) | |
9418 { | |
9419 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); | |
9420 smsg((char_u *)_("Word '%.*s' removed from %s"), | |
9421 len, word, NameBuff); | |
9422 } | |
9423 } | |
9424 fseek(fd, fpos_next, SEEK_SET); | |
9425 } | |
9426 } | |
9427 if (fd != NULL) | |
9428 fclose(fd); | |
9429 } | |
9430 } | |
9431 | |
9432 if (!undo) | |
9433 { | |
9434 fd = mch_fopen((char *)fname, "a"); | |
9435 if (fd == NULL && new_spf) | |
9436 { | |
9437 char_u *p; | |
9438 | |
9439 /* We just initialized the 'spellfile' option and can't open the | |
9440 * file. We may need to create the "spell" directory first. We | |
9441 * already checked the runtime directory is writable in | |
9442 * init_spellfile(). */ | |
9443 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname) | |
9444 { | |
9445 int c = *p; | |
9446 | |
9447 /* The directory doesn't exist. Try creating it and opening | |
9448 * the file again. */ | |
9449 *p = NUL; | |
9450 vim_mkdir(fname, 0755); | |
9451 *p = c; | |
9452 fd = mch_fopen((char *)fname, "a"); | |
9453 } | |
9454 } | |
9455 | |
9456 if (fd == NULL) | |
9457 EMSG2(_(e_notopen), fname); | |
9458 else | |
9459 { | |
9460 if (bad) | |
9461 fprintf(fd, "%.*s/!\n", len, word); | |
9462 else | |
9463 fprintf(fd, "%.*s\n", len, word); | |
9464 fclose(fd); | |
9465 | |
9466 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); | |
9467 smsg((char_u *)_("Word '%.*s' added to %s"), len, word, NameBuff); | |
9468 } | |
9469 } | |
9470 | |
9471 if (fd != NULL) | |
9472 { | |
9473 /* Update the .add.spl file. */ | |
9474 mkspell(1, &fname, FALSE, TRUE, TRUE); | |
9475 | |
9476 /* If the .add file is edited somewhere, reload it. */ | |
9477 if (buf != NULL) | |
9478 buf_reload(buf, buf->b_orig_mode); | |
9479 | |
9480 redraw_all_later(SOME_VALID); | |
9481 } | |
9482 vim_free(fnamebuf); | |
9483 } | |
9484 | |
9485 /* | |
9486 * Initialize 'spellfile' for the current buffer. | |
9487 */ | |
9488 static void | |
9489 init_spellfile(void) | |
9490 { | |
9491 char_u *buf; | |
9492 int l; | |
9493 char_u *fname; | |
9494 char_u *rtp; | |
9495 char_u *lend; | |
9496 int aspath = FALSE; | |
9497 char_u *lstart = curbuf->b_s.b_p_spl; | |
9498 | |
9499 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0) | |
9500 { | |
9501 buf = alloc(MAXPATHL); | |
9502 if (buf == NULL) | |
9503 return; | |
9504 | |
9505 /* Find the end of the language name. Exclude the region. If there | |
9506 * is a path separator remember the start of the tail. */ | |
9507 for (lend = curwin->w_s->b_p_spl; *lend != NUL | |
9508 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) | |
9509 if (vim_ispathsep(*lend)) | |
9510 { | |
9511 aspath = TRUE; | |
9512 lstart = lend + 1; | |
9513 } | |
9514 | |
9515 /* Loop over all entries in 'runtimepath'. Use the first one where we | |
9516 * are allowed to write. */ | |
9517 rtp = p_rtp; | |
9518 while (*rtp != NUL) | |
9519 { | |
9520 if (aspath) | |
9521 /* Use directory of an entry with path, e.g., for | |
9522 * "/dir/lg.utf-8.spl" use "/dir". */ | |
9523 vim_strncpy(buf, curbuf->b_s.b_p_spl, | |
9524 lstart - curbuf->b_s.b_p_spl - 1); | |
9525 else | |
9526 /* Copy the path from 'runtimepath' to buf[]. */ | |
9527 copy_option_part(&rtp, buf, MAXPATHL, ","); | |
9528 if (filewritable(buf) == 2) | |
9529 { | |
9530 /* Use the first language name from 'spelllang' and the | |
9531 * encoding used in the first loaded .spl file. */ | |
9532 if (aspath) | |
9533 vim_strncpy(buf, curbuf->b_s.b_p_spl, | |
9534 lend - curbuf->b_s.b_p_spl); | |
9535 else | |
9536 { | |
9537 /* Create the "spell" directory if it doesn't exist yet. */ | |
9538 l = (int)STRLEN(buf); | |
9539 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell"); | |
9540 if (filewritable(buf) != 2) | |
9541 vim_mkdir(buf, 0755); | |
9542 | |
9543 l = (int)STRLEN(buf); | |
9544 vim_snprintf((char *)buf + l, MAXPATHL - l, | |
9545 "/%.*s", (int)(lend - lstart), lstart); | |
9546 } | |
9547 l = (int)STRLEN(buf); | |
9548 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0) | |
9549 ->lp_slang->sl_fname; | |
9550 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", | |
9551 fname != NULL | |
9552 && strstr((char *)gettail(fname), ".ascii.") != NULL | |
9553 ? (char_u *)"ascii" : spell_enc()); | |
9554 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); | |
9555 break; | |
9556 } | |
9557 aspath = FALSE; | |
9558 } | |
9559 | |
9560 vim_free(buf); | |
9561 } | |
9562 } | |
9563 | |
9564 | |
9565 /* | 3013 /* |
9566 * Init the chartab used for spelling for ASCII. | 3014 * Init the chartab used for spelling for ASCII. |
9567 * EBCDIC is not supported! | 3015 * EBCDIC is not supported! |
9568 */ | 3016 */ |
9569 static void | 3017 void |
9570 clear_spell_chartab(spelltab_T *sp) | 3018 clear_spell_chartab(spelltab_T *sp) |
9571 { | 3019 { |
9572 int i; | 3020 int i; |
9573 | 3021 |
9574 /* Init everything to FALSE. */ | 3022 /* Init everything to FALSE. */ |
9654 } | 3102 } |
9655 } | 3103 } |
9656 } | 3104 } |
9657 } | 3105 } |
9658 | 3106 |
9659 /* | |
9660 * Set the spell character tables from strings in the affix file. | |
9661 */ | |
9662 static int | |
9663 set_spell_chartab(char_u *fol, char_u *low, char_u *upp) | |
9664 { | |
9665 /* We build the new tables here first, so that we can compare with the | |
9666 * previous one. */ | |
9667 spelltab_T new_st; | |
9668 char_u *pf = fol, *pl = low, *pu = upp; | |
9669 int f, l, u; | |
9670 | |
9671 clear_spell_chartab(&new_st); | |
9672 | |
9673 while (*pf != NUL) | |
9674 { | |
9675 if (*pl == NUL || *pu == NUL) | |
9676 { | |
9677 EMSG(_(e_affform)); | |
9678 return FAIL; | |
9679 } | |
9680 #ifdef FEAT_MBYTE | |
9681 f = mb_ptr2char_adv(&pf); | |
9682 l = mb_ptr2char_adv(&pl); | |
9683 u = mb_ptr2char_adv(&pu); | |
9684 #else | |
9685 f = *pf++; | |
9686 l = *pl++; | |
9687 u = *pu++; | |
9688 #endif | |
9689 /* Every character that appears is a word character. */ | |
9690 if (f < 256) | |
9691 new_st.st_isw[f] = TRUE; | |
9692 if (l < 256) | |
9693 new_st.st_isw[l] = TRUE; | |
9694 if (u < 256) | |
9695 new_st.st_isw[u] = TRUE; | |
9696 | |
9697 /* if "LOW" and "FOL" are not the same the "LOW" char needs | |
9698 * case-folding */ | |
9699 if (l < 256 && l != f) | |
9700 { | |
9701 if (f >= 256) | |
9702 { | |
9703 EMSG(_(e_affrange)); | |
9704 return FAIL; | |
9705 } | |
9706 new_st.st_fold[l] = f; | |
9707 } | |
9708 | |
9709 /* if "UPP" and "FOL" are not the same the "UPP" char needs | |
9710 * case-folding, it's upper case and the "UPP" is the upper case of | |
9711 * "FOL" . */ | |
9712 if (u < 256 && u != f) | |
9713 { | |
9714 if (f >= 256) | |
9715 { | |
9716 EMSG(_(e_affrange)); | |
9717 return FAIL; | |
9718 } | |
9719 new_st.st_fold[u] = f; | |
9720 new_st.st_isu[u] = TRUE; | |
9721 new_st.st_upper[f] = u; | |
9722 } | |
9723 } | |
9724 | |
9725 if (*pl != NUL || *pu != NUL) | |
9726 { | |
9727 EMSG(_(e_affform)); | |
9728 return FAIL; | |
9729 } | |
9730 | |
9731 return set_spell_finish(&new_st); | |
9732 } | |
9733 | |
9734 /* | |
9735 * Set the spell character tables from strings in the .spl file. | |
9736 */ | |
9737 static void | |
9738 set_spell_charflags( | |
9739 char_u *flags, | |
9740 int cnt, /* length of "flags" */ | |
9741 char_u *fol) | |
9742 { | |
9743 /* We build the new tables here first, so that we can compare with the | |
9744 * previous one. */ | |
9745 spelltab_T new_st; | |
9746 int i; | |
9747 char_u *p = fol; | |
9748 int c; | |
9749 | |
9750 clear_spell_chartab(&new_st); | |
9751 | |
9752 for (i = 0; i < 128; ++i) | |
9753 { | |
9754 if (i < cnt) | |
9755 { | |
9756 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; | |
9757 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; | |
9758 } | |
9759 | |
9760 if (*p != NUL) | |
9761 { | |
9762 #ifdef FEAT_MBYTE | |
9763 c = mb_ptr2char_adv(&p); | |
9764 #else | |
9765 c = *p++; | |
9766 #endif | |
9767 new_st.st_fold[i + 128] = c; | |
9768 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) | |
9769 new_st.st_upper[c] = i + 128; | |
9770 } | |
9771 } | |
9772 | |
9773 (void)set_spell_finish(&new_st); | |
9774 } | |
9775 | |
9776 static int | |
9777 set_spell_finish(spelltab_T *new_st) | |
9778 { | |
9779 int i; | |
9780 | |
9781 if (did_set_spelltab) | |
9782 { | |
9783 /* check that it's the same table */ | |
9784 for (i = 0; i < 256; ++i) | |
9785 { | |
9786 if (spelltab.st_isw[i] != new_st->st_isw[i] | |
9787 || spelltab.st_isu[i] != new_st->st_isu[i] | |
9788 || spelltab.st_fold[i] != new_st->st_fold[i] | |
9789 || spelltab.st_upper[i] != new_st->st_upper[i]) | |
9790 { | |
9791 EMSG(_("E763: Word characters differ between spell files")); | |
9792 return FAIL; | |
9793 } | |
9794 } | |
9795 } | |
9796 else | |
9797 { | |
9798 /* copy the new spelltab into the one being used */ | |
9799 spelltab = *new_st; | |
9800 did_set_spelltab = TRUE; | |
9801 } | |
9802 | |
9803 return OK; | |
9804 } | |
9805 | 3107 |
9806 /* | 3108 /* |
9807 * Return TRUE if "p" points to a word character. | 3109 * Return TRUE if "p" points to a word character. |
9808 * As a special case we see "midword" characters as word character when it is | 3110 * As a special case we see "midword" characters as word character when it is |
9809 * followed by a word character. This finds they'there but not 'they there'. | 3111 * followed by a word character. This finds they'there but not 'they there'. |
9850 | 3152 |
9851 /* | 3153 /* |
9852 * Return TRUE if "p" points to a word character. | 3154 * Return TRUE if "p" points to a word character. |
9853 * Unlike spell_iswordp() this doesn't check for "midword" characters. | 3155 * Unlike spell_iswordp() this doesn't check for "midword" characters. |
9854 */ | 3156 */ |
9855 static int | 3157 int |
9856 spell_iswordp_nmw(char_u *p, win_T *wp) | 3158 spell_iswordp_nmw(char_u *p, win_T *wp) |
9857 { | 3159 { |
9858 #ifdef FEAT_MBYTE | 3160 #ifdef FEAT_MBYTE |
9859 int c; | 3161 int c; |
9860 | 3162 |
9913 return spelltab.st_isw[*s]; | 3215 return spelltab.st_isw[*s]; |
9914 } | 3216 } |
9915 #endif | 3217 #endif |
9916 | 3218 |
9917 /* | 3219 /* |
9918 * Write the table with prefix conditions to the .spl file. | |
9919 * When "fd" is NULL only count the length of what is written. | |
9920 */ | |
9921 static int | |
9922 write_spell_prefcond(FILE *fd, garray_T *gap) | |
9923 { | |
9924 int i; | |
9925 char_u *p; | |
9926 int len; | |
9927 int totlen; | |
9928 size_t x = 1; /* collect return value of fwrite() */ | |
9929 | |
9930 if (fd != NULL) | |
9931 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ | |
9932 | |
9933 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ | |
9934 | |
9935 for (i = 0; i < gap->ga_len; ++i) | |
9936 { | |
9937 /* <prefcond> : <condlen> <condstr> */ | |
9938 p = ((char_u **)gap->ga_data)[i]; | |
9939 if (p != NULL) | |
9940 { | |
9941 len = (int)STRLEN(p); | |
9942 if (fd != NULL) | |
9943 { | |
9944 fputc(len, fd); | |
9945 x &= fwrite(p, (size_t)len, (size_t)1, fd); | |
9946 } | |
9947 totlen += len; | |
9948 } | |
9949 else if (fd != NULL) | |
9950 fputc(0, fd); | |
9951 } | |
9952 | |
9953 return totlen; | |
9954 } | |
9955 | |
9956 /* | |
9957 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. | 3220 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. |
9958 * Uses the character definitions from the .spl file. | 3221 * Uses the character definitions from the .spl file. |
9959 * When using a multi-byte 'encoding' the length may change! | 3222 * When using a multi-byte 'encoding' the length may change! |
9960 * Returns FAIL when something wrong. | 3223 * Returns FAIL when something wrong. |
9961 */ | 3224 */ |
9962 static int | 3225 int |
9963 spell_casefold( | 3226 spell_casefold( |
9964 char_u *str, | 3227 char_u *str, |
9965 int len, | 3228 int len, |
9966 char_u *buf, | 3229 char_u *buf, |
9967 int buflen) | 3230 int buflen) |
10813 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); | 4076 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); |
10814 } | 4077 } |
10815 } | 4078 } |
10816 | 4079 |
10817 /* | 4080 /* |
10818 * Load the .sug files for languages that have one and weren't loaded yet. | |
10819 */ | |
10820 static void | |
10821 suggest_load_files(void) | |
10822 { | |
10823 langp_T *lp; | |
10824 int lpi; | |
10825 slang_T *slang; | |
10826 char_u *dotp; | |
10827 FILE *fd; | |
10828 char_u buf[MAXWLEN]; | |
10829 int i; | |
10830 time_t timestamp; | |
10831 int wcount; | |
10832 int wordnr; | |
10833 garray_T ga; | |
10834 int c; | |
10835 | |
10836 /* Do this for all languages that support sound folding. */ | |
10837 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi) | |
10838 { | |
10839 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); | |
10840 slang = lp->lp_slang; | |
10841 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) | |
10842 { | |
10843 /* Change ".spl" to ".sug" and open the file. When the file isn't | |
10844 * found silently skip it. Do set "sl_sugloaded" so that we | |
10845 * don't try again and again. */ | |
10846 slang->sl_sugloaded = TRUE; | |
10847 | |
10848 dotp = vim_strrchr(slang->sl_fname, '.'); | |
10849 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) | |
10850 continue; | |
10851 STRCPY(dotp, ".sug"); | |
10852 fd = mch_fopen((char *)slang->sl_fname, "r"); | |
10853 if (fd == NULL) | |
10854 goto nextone; | |
10855 | |
10856 /* | |
10857 * <SUGHEADER>: <fileID> <versionnr> <timestamp> | |
10858 */ | |
10859 for (i = 0; i < VIMSUGMAGICL; ++i) | |
10860 buf[i] = getc(fd); /* <fileID> */ | |
10861 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) | |
10862 { | |
10863 EMSG2(_("E778: This does not look like a .sug file: %s"), | |
10864 slang->sl_fname); | |
10865 goto nextone; | |
10866 } | |
10867 c = getc(fd); /* <versionnr> */ | |
10868 if (c < VIMSUGVERSION) | |
10869 { | |
10870 EMSG2(_("E779: Old .sug file, needs to be updated: %s"), | |
10871 slang->sl_fname); | |
10872 goto nextone; | |
10873 } | |
10874 else if (c > VIMSUGVERSION) | |
10875 { | |
10876 EMSG2(_("E780: .sug file is for newer version of Vim: %s"), | |
10877 slang->sl_fname); | |
10878 goto nextone; | |
10879 } | |
10880 | |
10881 /* Check the timestamp, it must be exactly the same as the one in | |
10882 * the .spl file. Otherwise the word numbers won't match. */ | |
10883 timestamp = get8ctime(fd); /* <timestamp> */ | |
10884 if (timestamp != slang->sl_sugtime) | |
10885 { | |
10886 EMSG2(_("E781: .sug file doesn't match .spl file: %s"), | |
10887 slang->sl_fname); | |
10888 goto nextone; | |
10889 } | |
10890 | |
10891 /* | |
10892 * <SUGWORDTREE>: <wordtree> | |
10893 * Read the trie with the soundfolded words. | |
10894 */ | |
10895 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, | |
10896 FALSE, 0) != 0) | |
10897 { | |
10898 someerror: | |
10899 EMSG2(_("E782: error while reading .sug file: %s"), | |
10900 slang->sl_fname); | |
10901 slang_clear_sug(slang); | |
10902 goto nextone; | |
10903 } | |
10904 | |
10905 /* | |
10906 * <SUGTABLE>: <sugwcount> <sugline> ... | |
10907 * | |
10908 * Read the table with word numbers. We use a file buffer for | |
10909 * this, because it's so much like a file with lines. Makes it | |
10910 * possible to swap the info and save on memory use. | |
10911 */ | |
10912 slang->sl_sugbuf = open_spellbuf(); | |
10913 if (slang->sl_sugbuf == NULL) | |
10914 goto someerror; | |
10915 /* <sugwcount> */ | |
10916 wcount = get4c(fd); | |
10917 if (wcount < 0) | |
10918 goto someerror; | |
10919 | |
10920 /* Read all the wordnr lists into the buffer, one NUL terminated | |
10921 * list per line. */ | |
10922 ga_init2(&ga, 1, 100); | |
10923 for (wordnr = 0; wordnr < wcount; ++wordnr) | |
10924 { | |
10925 ga.ga_len = 0; | |
10926 for (;;) | |
10927 { | |
10928 c = getc(fd); /* <sugline> */ | |
10929 if (c < 0 || ga_grow(&ga, 1) == FAIL) | |
10930 goto someerror; | |
10931 ((char_u *)ga.ga_data)[ga.ga_len++] = c; | |
10932 if (c == NUL) | |
10933 break; | |
10934 } | |
10935 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, | |
10936 ga.ga_data, ga.ga_len, TRUE) == FAIL) | |
10937 goto someerror; | |
10938 } | |
10939 ga_clear(&ga); | |
10940 | |
10941 /* | |
10942 * Need to put word counts in the word tries, so that we can find | |
10943 * a word by its number. | |
10944 */ | |
10945 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); | |
10946 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); | |
10947 | |
10948 nextone: | |
10949 if (fd != NULL) | |
10950 fclose(fd); | |
10951 STRCPY(dotp, ".spl"); | |
10952 } | |
10953 } | |
10954 } | |
10955 | |
10956 | |
10957 /* | |
10958 * Fill in the wordcount fields for a trie. | |
10959 * Returns the total number of words. | |
10960 */ | |
10961 static void | |
10962 tree_count_words(char_u *byts, idx_T *idxs) | |
10963 { | |
10964 int depth; | |
10965 idx_T arridx[MAXWLEN]; | |
10966 int curi[MAXWLEN]; | |
10967 int c; | |
10968 idx_T n; | |
10969 int wordcount[MAXWLEN]; | |
10970 | |
10971 arridx[0] = 0; | |
10972 curi[0] = 1; | |
10973 wordcount[0] = 0; | |
10974 depth = 0; | |
10975 while (depth >= 0 && !got_int) | |
10976 { | |
10977 if (curi[depth] > byts[arridx[depth]]) | |
10978 { | |
10979 /* Done all bytes at this node, go up one level. */ | |
10980 idxs[arridx[depth]] = wordcount[depth]; | |
10981 if (depth > 0) | |
10982 wordcount[depth - 1] += wordcount[depth]; | |
10983 | |
10984 --depth; | |
10985 fast_breakcheck(); | |
10986 } | |
10987 else | |
10988 { | |
10989 /* Do one more byte at this node. */ | |
10990 n = arridx[depth] + curi[depth]; | |
10991 ++curi[depth]; | |
10992 | |
10993 c = byts[n]; | |
10994 if (c == 0) | |
10995 { | |
10996 /* End of word, count it. */ | |
10997 ++wordcount[depth]; | |
10998 | |
10999 /* Skip over any other NUL bytes (same word with different | |
11000 * flags). */ | |
11001 while (byts[n + 1] == 0) | |
11002 { | |
11003 ++n; | |
11004 ++curi[depth]; | |
11005 } | |
11006 } | |
11007 else | |
11008 { | |
11009 /* Normal char, go one level deeper to count the words. */ | |
11010 ++depth; | |
11011 arridx[depth] = idxs[n]; | |
11012 curi[depth] = 1; | |
11013 wordcount[depth] = 0; | |
11014 } | |
11015 } | |
11016 } | |
11017 } | |
11018 | |
11019 /* | |
11020 * Free the info put in "*su" by spell_find_suggest(). | 4081 * Free the info put in "*su" by spell_find_suggest(). |
11021 */ | 4082 */ |
11022 static void | 4083 static void |
11023 spell_find_cleanup(suginfo_T *su) | 4084 spell_find_cleanup(suginfo_T *su) |
11024 { | 4085 { |
11039 /* | 4100 /* |
11040 * Make a copy of "word", with the first letter upper or lower cased, to | 4101 * Make a copy of "word", with the first letter upper or lower cased, to |
11041 * "wcopy[MAXWLEN]". "word" must not be empty. | 4102 * "wcopy[MAXWLEN]". "word" must not be empty. |
11042 * The result is NUL terminated. | 4103 * The result is NUL terminated. |
11043 */ | 4104 */ |
11044 static void | 4105 void |
11045 onecap_copy( | 4106 onecap_copy( |
11046 char_u *word, | 4107 char_u *word, |
11047 char_u *wcopy, | 4108 char_u *wcopy, |
11048 int upper) /* TRUE: first letter made upper case */ | 4109 int upper) /* TRUE: first letter made upper case */ |
11049 { | 4110 { |
13602 else | 6663 else |
13603 /* Use goodword as-is. */ | 6664 /* Use goodword as-is. */ |
13604 STRCPY(cword, fword); | 6665 STRCPY(cword, fword); |
13605 } | 6666 } |
13606 | 6667 |
13607 /* | |
13608 * Use map string "map" for languages "lp". | |
13609 */ | |
13610 static void | |
13611 set_map_str(slang_T *lp, char_u *map) | |
13612 { | |
13613 char_u *p; | |
13614 int headc = 0; | |
13615 int c; | |
13616 int i; | |
13617 | |
13618 if (*map == NUL) | |
13619 { | |
13620 lp->sl_has_map = FALSE; | |
13621 return; | |
13622 } | |
13623 lp->sl_has_map = TRUE; | |
13624 | |
13625 /* Init the array and hash tables empty. */ | |
13626 for (i = 0; i < 256; ++i) | |
13627 lp->sl_map_array[i] = 0; | |
13628 #ifdef FEAT_MBYTE | |
13629 hash_init(&lp->sl_map_hash); | |
13630 #endif | |
13631 | |
13632 /* | |
13633 * The similar characters are stored separated with slashes: | |
13634 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and | |
13635 * before the same slash. For characters above 255 sl_map_hash is used. | |
13636 */ | |
13637 for (p = map; *p != NUL; ) | |
13638 { | |
13639 #ifdef FEAT_MBYTE | |
13640 c = mb_cptr2char_adv(&p); | |
13641 #else | |
13642 c = *p++; | |
13643 #endif | |
13644 if (c == '/') | |
13645 headc = 0; | |
13646 else | |
13647 { | |
13648 if (headc == 0) | |
13649 headc = c; | |
13650 | |
13651 #ifdef FEAT_MBYTE | |
13652 /* Characters above 255 don't fit in sl_map_array[], put them in | |
13653 * the hash table. Each entry is the char, a NUL the headchar and | |
13654 * a NUL. */ | |
13655 if (c >= 256) | |
13656 { | |
13657 int cl = mb_char2len(c); | |
13658 int headcl = mb_char2len(headc); | |
13659 char_u *b; | |
13660 hash_T hash; | |
13661 hashitem_T *hi; | |
13662 | |
13663 b = alloc((unsigned)(cl + headcl + 2)); | |
13664 if (b == NULL) | |
13665 return; | |
13666 mb_char2bytes(c, b); | |
13667 b[cl] = NUL; | |
13668 mb_char2bytes(headc, b + cl + 1); | |
13669 b[cl + 1 + headcl] = NUL; | |
13670 hash = hash_hash(b); | |
13671 hi = hash_lookup(&lp->sl_map_hash, b, hash); | |
13672 if (HASHITEM_EMPTY(hi)) | |
13673 hash_add_item(&lp->sl_map_hash, hi, b, hash); | |
13674 else | |
13675 { | |
13676 /* This should have been checked when generating the .spl | |
13677 * file. */ | |
13678 EMSG(_("E783: duplicate char in MAP entry")); | |
13679 vim_free(b); | |
13680 } | |
13681 } | |
13682 else | |
13683 #endif | |
13684 lp->sl_map_array[c] = headc; | |
13685 } | |
13686 } | |
13687 } | |
13688 | 6668 |
13689 /* | 6669 /* |
13690 * Return TRUE if "c1" and "c2" are similar characters according to the MAP | 6670 * Return TRUE if "c1" and "c2" are similar characters according to the MAP |
13691 * lines in the .aff file. | 6671 * lines in the .aff file. |
13692 */ | 6672 */ |
14069 * | 7049 * |
14070 * We support two methods: | 7050 * We support two methods: |
14071 * 1. SOFOFROM/SOFOTO do a simple character mapping. | 7051 * 1. SOFOFROM/SOFOTO do a simple character mapping. |
14072 * 2. SAL items define a more advanced sound-folding (and much slower). | 7052 * 2. SAL items define a more advanced sound-folding (and much slower). |
14073 */ | 7053 */ |
14074 static void | 7054 void |
14075 spell_soundfold( | 7055 spell_soundfold( |
14076 slang_T *slang, | 7056 slang_T *slang, |
14077 char_u *inword, | 7057 char_u *inword, |
14078 int folded, /* "inword" is already case-folded */ | 7058 int folded, /* "inword" is already case-folded */ |
14079 char_u *res) | 7059 char_u *res) |