223
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * spell.c: code for spell checking
|
226
|
12 *
|
300
|
13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
|
|
14 * has a list of bytes that can appear (siblings). For each byte there is a
|
|
15 * pointer to the node with the byte that follows in the word (child).
|
324
|
16 *
|
|
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
|
|
18 * binary searching can be used and the NUL bytes are at the start. The
|
|
19 * number of possible bytes is stored before the list of bytes.
|
|
20 *
|
|
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
|
|
22 * either the next index or flags. The tree starts at index 0. For example,
|
|
23 * to lookup "vi" this sequence is followed:
|
|
24 * i = 0
|
|
25 * len = byts[i]
|
|
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
|
|
27 * i = idxs[n]
|
|
28 * len = byts[i]
|
|
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
|
|
30 * i = idxs[n]
|
|
31 * len = byts[i]
|
|
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
|
300
|
33 *
|
339
|
34 * There are two word trees: one with case-folded words and one with words in
|
300
|
35 * original case. The second one is only used for keep-case words and is
|
|
36 * usually small.
|
|
37 *
|
339
|
38 * There is one additional tree for when prefixes are not applied when
|
|
39 * generating the .spl file. This tree stores all the possible prefixes, as
|
|
40 * if they were words. At each word (prefix) end the prefix nr is stored, the
|
|
41 * following word must support this prefix nr. And the condition nr is
|
|
42 * stored, used to lookup the condition that the word must match with.
|
|
43 *
|
300
|
44 * Thanks to Olaf Seibert for providing an example implementation of this tree
|
|
45 * and the compression mechanism.
|
243
|
46 *
|
|
47 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
|
|
48 *
|
236
|
49 * Why doesn't Vim use aspell/ispell/myspell/etc.?
|
|
50 * See ":help develop-spell".
|
|
51 */
|
|
52
|
300
|
53 /*
|
324
|
54 * Use this to adjust the score after finding suggestions, based on the
|
|
55 * suggested word sounding like the bad word. This is much faster than doing
|
|
56 * it for every possible suggestion.
|
|
57 * Disadvantage: When "the" is typed as "hte" it sounds different and goes
|
|
58 * down in the list.
|
344
|
59 * Used when 'spellsuggest' is set to "best".
|
|
60 */
|
|
61 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
|
|
62
|
|
63 /*
|
|
64 * The double scoring mechanism is based on the principle that there are two
|
|
65 * kinds of spelling mistakes:
|
|
66 * 1. You know how to spell the word, but mistype something. This results in
|
|
67 * a small editing distance (character swapped/omitted/inserted) and
|
|
68 * possibly a word that sounds completely different.
|
|
69 * 2. You don't know how to spell the word and type something that sounds
|
|
70 * right. The edit distance can be big but the word is similar after
|
|
71 * sound-folding.
|
|
72 * Since scores for these two mistakes will be very different we use a list
|
|
73 * for each.
|
|
74 * The sound-folding is slow, only do double scoring when 'spellsuggest' is
|
|
75 * "double".
|
323
|
76 */
|
|
77
|
|
78 /*
|
339
|
79 * Vim spell file format: <HEADER>
|
|
80 * <SUGGEST>
|
|
81 * <LWORDTREE>
|
|
82 * <KWORDTREE>
|
|
83 * <PREFIXTREE>
|
300
|
84 *
|
339
|
85 * <HEADER>: <fileID>
|
|
86 * <regioncnt> <regionname> ...
|
|
87 * <charflagslen> <charflags>
|
|
88 * <fcharslen> <fchars>
|
366
|
89 * <midwordlen> <midword>
|
339
|
90 * <prefcondcnt> <prefcond> ...
|
300
|
91 *
|
366
|
92 * <fileID> 10 bytes "VIMspell08"
|
300
|
93 * <regioncnt> 1 byte number of regions following (8 supported)
|
307
|
94 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
|
300
|
95 * First <regionname> is region 1.
|
|
96 *
|
|
97 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
|
|
98 * <charflags> N bytes List of flags (first one is for character 128):
|
324
|
99 * 0x01 word character CF_WORD
|
|
100 * 0x02 upper-case character CF_UPPER
|
300
|
101 * <fcharslen> 2 bytes Number of bytes in <fchars>.
|
|
102 * <fchars> N bytes Folded characters, first one is for character 128.
|
|
103 *
|
366
|
104 * <midwordlen> 2 bytes Number of bytes in <midword>.
|
|
105 * <midword> N bytes Characters that are word characters only when used
|
|
106 * in the middle of a word.
|
|
107 *
|
339
|
108 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
|
|
109 *
|
|
110 * <prefcond> : <condlen> <condstr>
|
|
111 *
|
|
112 * <condlen> 1 byte Length of <condstr>.
|
|
113 *
|
|
114 * <condstr> N bytes Condition for the prefix.
|
|
115 *
|
300
|
116 *
|
323
|
117 * <SUGGEST> : <repcount> <rep> ...
|
|
118 * <salflags> <salcount> <sal> ...
|
|
119 * <maplen> <mapstr>
|
|
120 *
|
|
121 * <repcount> 2 bytes number of <rep> items, MSB first.
|
|
122 *
|
|
123 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
|
|
124 *
|
|
125 * <repfromlen> 1 byte length of <repfrom>
|
|
126 *
|
|
127 * <repfrom> N bytes "from" part of replacement
|
|
128 *
|
|
129 * <reptolen> 1 byte length of <repto>
|
|
130 *
|
|
131 * <repto> N bytes "to" part of replacement
|
300
|
132 *
|
323
|
133 * <salflags> 1 byte flags for soundsalike conversion:
|
|
134 * SAL_F0LLOWUP
|
|
135 * SAL_COLLAPSE
|
|
136 * SAL_REM_ACCENTS
|
|
137 *
|
|
138 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
|
|
139 *
|
|
140 * <salfromlen> 1 byte length of <salfrom>
|
|
141 *
|
|
142 * <salfrom> N bytes "from" part of soundsalike
|
|
143 *
|
|
144 * <saltolen> 1 byte length of <salto>
|
|
145 *
|
|
146 * <salto> N bytes "to" part of soundsalike
|
|
147 *
|
|
148 * <maplen> 2 bytes length of <mapstr>, MSB first
|
|
149 *
|
|
150 * <mapstr> N bytes String with sequences of similar characters,
|
|
151 * separated by slashes.
|
300
|
152 *
|
|
153 *
|
|
154 * <LWORDTREE>: <wordtree>
|
|
155 *
|
339
|
156 * <KWORDTREE>: <wordtree>
|
|
157 *
|
|
158 * <PREFIXTREE>: <wordtree>
|
|
159 *
|
|
160 *
|
300
|
161 * <wordtree>: <nodecount> <nodedata> ...
|
|
162 *
|
|
163 * <nodecount> 4 bytes Number of nodes following. MSB first.
|
|
164 *
|
|
165 * <nodedata>: <siblingcount> <sibling> ...
|
|
166 *
|
|
167 * <siblingcount> 1 byte Number of siblings in this node. The siblings
|
|
168 * follow in sorted order.
|
|
169 *
|
339
|
170 * <sibling>: <byte> [ <nodeidx> <xbyte>
|
|
171 * | <flags> [<region>] [<prefixID>]
|
|
172 * | <prefixID> <prefcondnr> ]
|
300
|
173 *
|
|
174 * <byte> 1 byte Byte value of the sibling. Special cases:
|
|
175 * BY_NOFLAGS: End of word without flags and for all
|
|
176 * regions.
|
366
|
177 * For PREFIXTREE <prefixID> and
|
|
178 * <prefcondnr> follow.
|
|
179 * BY_FLAGS: End of word, <flags> follow.
|
|
180 * For PREFIXTREE <prefixID> and
|
|
181 * <prefcondnr> follow for rare prefix.
|
300
|
182 * BY_INDEX: Child of sibling is shared, <nodeidx>
|
|
183 * and <xbyte> follow.
|
|
184 *
|
|
185 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
|
|
186 *
|
|
187 * <xbyte> 1 byte byte value of the sibling.
|
|
188 *
|
|
189 * <flags> 1 byte bitmask of:
|
|
190 * WF_ALLCAP word must have only capitals
|
|
191 * WF_ONECAP first char of word must be capital
|
|
192 * WF_RARE rare word
|
|
193 * WF_REGION <region> follows
|
339
|
194 * WF_PFX <prefixID> follows
|
300
|
195 *
|
|
196 * <region> 1 byte Bitmask for regions in which word is valid. When
|
|
197 * omitted it's valid in all regions.
|
|
198 * Lowest bit is for region 1.
|
|
199 *
|
339
|
200 * <prefixID> 1 byte ID of prefix that can be used with this word. For
|
|
201 * PREFIXTREE used for the required prefix ID.
|
|
202 *
|
|
203 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
|
|
204 * from HEADER.
|
300
|
205 *
|
|
206 * All text characters are in 'encoding', but stored as single bytes.
|
|
207 */
|
|
208
|
223
|
209 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
|
|
210 # include <io.h> /* for lseek(), must be before vim.h */
|
|
211 #endif
|
|
212
|
|
213 #include "vim.h"
|
|
214
|
|
215 #if defined(FEAT_SYN_HL) || defined(PROTO)
|
|
216
|
|
217 #ifdef HAVE_FCNTL_H
|
|
218 # include <fcntl.h>
|
|
219 #endif
|
|
220
|
323
|
221 #define MAXWLEN 250 /* Assume max. word len is this many bytes.
|
|
222 Some places assume a word length fits in a
|
|
223 byte, thus it can't be above 255. */
|
226
|
224
|
324
|
225 /* Type used for indexes in the word tree need to be at least 3 bytes. If int
|
|
226 * is 8 bytes we could use something smaller, but what? */
|
|
227 #if SIZEOF_INT > 2
|
|
228 typedef int idx_T;
|
|
229 #else
|
|
230 typedef long idx_T;
|
|
231 #endif
|
|
232
|
|
233 /* Flags used for a word. Only the lowest byte can be used, the region byte
|
|
234 * comes above it. */
|
300
|
235 #define WF_REGION 0x01 /* region byte follows */
|
|
236 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
|
|
237 #define WF_ALLCAP 0x04 /* word must be all capitals */
|
|
238 #define WF_RARE 0x08 /* rare word */
|
307
|
239 #define WF_BANNED 0x10 /* bad word */
|
339
|
240 #define WF_PFX 0x20 /* prefix ID list follows */
|
323
|
241 #define WF_KEEPCAP 0x80 /* keep-case word */
|
|
242
|
|
243 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
|
300
|
244
|
366
|
245 #define WF_RAREPFX 0x1000000 /* in sl_pidxs: flag for rare postponed
|
|
246 prefix; must be above prefixID (one byte)
|
|
247 and prefcondnr (two bytes) */
|
|
248
|
300
|
249 #define BY_NOFLAGS 0 /* end of word without flags or region */
|
|
250 #define BY_FLAGS 1 /* end of word, flag byte follows */
|
|
251 #define BY_INDEX 2 /* child is shared, index follows */
|
|
252 #define BY_SPECIAL BY_INDEX /* hightest special byte value */
|
236
|
253
|
323
|
254 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep,
|
344
|
255 * and si_sal. Not for sl_sal!
|
323
|
256 * One replacement: from "ft_from" to "ft_to". */
|
|
257 typedef struct fromto_S
|
236
|
258 {
|
323
|
259 char_u *ft_from;
|
|
260 char_u *ft_to;
|
|
261 } fromto_T;
|
236
|
262
|
344
|
263 /* Info from "SAL" entries in ".aff" file used in sl_sal.
|
|
264 * The info is split for quick processing by spell_soundfold().
|
|
265 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
|
|
266 typedef struct salitem_S
|
|
267 {
|
|
268 char_u *sm_lead; /* leading letters */
|
|
269 int sm_leadlen; /* length of "sm_lead" */
|
|
270 char_u *sm_oneoff; /* letters from () or NULL */
|
|
271 char_u *sm_rules; /* rules like ^, $, priority */
|
|
272 char_u *sm_to; /* replacement. */
|
|
273 } salitem_T;
|
|
274
|
236
|
275 /*
|
243
|
276 * Structure used to store words and other info for one language, loaded from
|
|
277 * a .spl file.
|
300
|
278 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
|
|
279 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
|
|
280 *
|
|
281 * The "byts" array stores the possible bytes in each tree node, preceded by
|
|
282 * the number of possible bytes, sorted on byte value:
|
|
283 * <len> <byte1> <byte2> ...
|
|
284 * The "idxs" array stores the index of the child node corresponding to the
|
|
285 * byte in "byts".
|
|
286 * Exception: when the byte is zero, the word may end here and "idxs" holds
|
|
287 * the flags and region for the word. There may be several zeros in sequence
|
|
288 * for alternative flag/region combinations.
|
236
|
289 */
|
|
290 typedef struct slang_S slang_T;
|
|
291 struct slang_S
|
|
292 {
|
|
293 slang_T *sl_next; /* next language */
|
|
294 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
|
310
|
295 char_u *sl_fname; /* name of .spl file */
|
323
|
296 int sl_add; /* TRUE if it's a .add file. */
|
339
|
297
|
300
|
298 char_u *sl_fbyts; /* case-folded word bytes */
|
324
|
299 idx_T *sl_fidxs; /* case-folded word indexes */
|
300
|
300 char_u *sl_kbyts; /* keep-case word bytes */
|
324
|
301 idx_T *sl_kidxs; /* keep-case word indexes */
|
339
|
302 char_u *sl_pbyts; /* prefix tree word bytes */
|
|
303 idx_T *sl_pidxs; /* prefix tree word indexes */
|
|
304
|
236
|
305 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
|
323
|
306
|
339
|
307 int sl_prefixcnt; /* number of items in "sl_prefprog" */
|
|
308 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
|
|
309
|
323
|
310 garray_T sl_rep; /* list of fromto_T entries from REP lines */
|
|
311 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
|
|
312 there is none */
|
344
|
313 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
|
323
|
314 short sl_sal_first[256]; /* indexes where byte first appears, -1 if
|
|
315 there is none */
|
|
316 int sl_followup; /* SAL followup */
|
|
317 int sl_collapse; /* SAL collapse_result */
|
|
318 int sl_rem_accents; /* SAL remove_accents */
|
330
|
319 int sl_has_map; /* TRUE if there is a MAP line */
|
|
320 #ifdef FEAT_MBYTE
|
|
321 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
|
|
322 int sl_map_array[256]; /* MAP for first 256 chars */
|
|
323 #else
|
|
324 char_u sl_map_array[256]; /* MAP for first 256 chars */
|
|
325 #endif
|
236
|
326 };
|
|
327
|
243
|
328 /* First language that is loaded, start of the linked list of loaded
|
|
329 * languages. */
|
236
|
330 static slang_T *first_lang = NULL;
|
|
331
|
323
|
332 /* Flags used in .spl file for soundsalike flags. */
|
|
333 #define SAL_F0LLOWUP 1
|
|
334 #define SAL_COLLAPSE 2
|
|
335 #define SAL_REM_ACCENTS 4
|
|
336
|
236
|
337 /*
|
|
338 * Structure used in "b_langp", filled from 'spelllang'.
|
|
339 */
|
|
340 typedef struct langp_S
|
|
341 {
|
|
342 slang_T *lp_slang; /* info for this language (NULL for last one) */
|
|
343 int lp_region; /* bitmask for region or REGION_ALL */
|
|
344 } langp_T;
|
|
345
|
|
346 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
|
|
347
|
307
|
348 #define REGION_ALL 0xff /* word valid in all regions */
|
|
349
|
|
350 /* Result values. Lower number is accepted over higher one. */
|
|
351 #define SP_BANNED -1
|
236
|
352 #define SP_OK 0
|
307
|
353 #define SP_RARE 1
|
|
354 #define SP_LOCAL 2
|
|
355 #define SP_BAD 3
|
236
|
356
|
366
|
357 #define VIMSPELLMAGIC "VIMspell08" /* string at start of Vim spell file */
|
236
|
358 #define VIMSPELLMAGICL 10
|
|
359
|
|
360 /*
|
323
|
361 * Information used when looking for suggestions.
|
|
362 */
|
|
363 typedef struct suginfo_S
|
|
364 {
|
|
365 garray_T su_ga; /* suggestions, contains "suggest_T" */
|
344
|
366 int su_maxcount; /* max. number of suggestions displayed */
|
323
|
367 int su_maxscore; /* maximum score for adding to su_ga */
|
344
|
368 garray_T su_sga; /* like su_ga, sound-folded scoring */
|
323
|
369 char_u *su_badptr; /* start of bad word in line */
|
|
370 int su_badlen; /* length of detected bad word in line */
|
346
|
371 int su_badflags; /* caps flags for bad word */
|
323
|
372 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
|
|
373 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
|
|
374 hashtab_T su_banned; /* table with banned words */
|
|
375 } suginfo_T;
|
|
376
|
|
377 /* One word suggestion. Used in "si_ga". */
|
|
378 typedef struct suggest_S
|
|
379 {
|
|
380 char_u *st_word; /* suggested word, allocated string */
|
|
381 int st_orglen; /* length of replaced text */
|
|
382 int st_score; /* lower is better */
|
344
|
383 int st_altscore; /* used when st_score compares equal */
|
|
384 int st_salscore; /* st_score is for soundalike */
|
324
|
385 int st_had_bonus; /* bonus already included in score */
|
323
|
386 } suggest_T;
|
|
387
|
344
|
388 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
|
323
|
389
|
324
|
390 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is
|
|
391 * called the score may change, thus we need to keep more than what is
|
|
392 * displayed. */
|
351
|
393 #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount)
|
324
|
394
|
|
395 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
|
|
396 * of suggestions that are not going to be displayed. */
|
344
|
397 #define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50)
|
323
|
398
|
|
399 /* score for various changes */
|
344
|
400 #define SCORE_SPLIT 149 /* split bad word */
|
323
|
401 #define SCORE_ICASE 52 /* slightly different case */
|
|
402 #define SCORE_REGION 70 /* word is for different region */
|
|
403 #define SCORE_RARE 180 /* rare word */
|
|
404 #define SCORE_SWAP 90 /* swap two characters */
|
|
405 #define SCORE_SWAP3 110 /* swap two characters in three */
|
|
406 #define SCORE_REP 87 /* REP replacement */
|
|
407 #define SCORE_SUBST 93 /* substitute a character */
|
|
408 #define SCORE_SIMILAR 33 /* substitute a similar character */
|
324
|
409 #define SCORE_DEL 94 /* delete a character */
|
358
|
410 #define SCORE_DELDUP 64 /* delete a duplicated character */
|
324
|
411 #define SCORE_INS 96 /* insert a character */
|
358
|
412 #define SCORE_INSDUP 66 /* insert a duplicate character */
|
366
|
413 #define SCORE_NONWORD 103 /* change non-word to word char */
|
323
|
414
|
|
415 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
|
|
416 * 350 allows for about three changes. */
|
344
|
417
|
|
418 #define SCORE_BIG SCORE_INS * 3 /* big difference */
|
323
|
419 #define SCORE_MAXMAX 999999 /* accept any score */
|
|
420
|
|
421 /*
|
236
|
422 * Structure to store info for word matching.
|
|
423 */
|
|
424 typedef struct matchinf_S
|
|
425 {
|
|
426 langp_T *mi_lp; /* info for language and region */
|
243
|
427
|
|
428 /* pointers to original text to be checked */
|
236
|
429 char_u *mi_word; /* start of word being checked */
|
339
|
430 char_u *mi_end; /* end of matching word so far */
|
243
|
431 char_u *mi_fend; /* next char to be added to mi_fword */
|
300
|
432 char_u *mi_cend; /* char after what was used for
|
|
433 mi_capflags */
|
243
|
434
|
|
435 /* case-folded text */
|
|
436 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
|
300
|
437 int mi_fwordlen; /* nr of valid bytes in mi_fword */
|
243
|
438
|
339
|
439 /* for when checking word after a prefix */
|
|
440 int mi_prefarridx; /* index in sl_pidxs with list of
|
|
441 prefixID/condition */
|
|
442 int mi_prefcnt; /* number of entries at mi_prefarridx */
|
|
443 int mi_prefixlen; /* byte length of prefix */
|
|
444
|
243
|
445 /* others */
|
236
|
446 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
|
300
|
447 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
|
236
|
448 } matchinf_T;
|
|
449
|
307
|
450 /*
|
|
451 * The tables used for recognizing word characters according to spelling.
|
|
452 * These are only used for the first 256 characters of 'encoding'.
|
|
453 */
|
|
454 typedef struct spelltab_S
|
|
455 {
|
|
456 char_u st_isw[256]; /* flags: is word char */
|
|
457 char_u st_isu[256]; /* flags: is uppercase char */
|
|
458 char_u st_fold[256]; /* chars: folded case */
|
324
|
459 char_u st_upper[256]; /* chars: upper case */
|
307
|
460 } spelltab_T;
|
|
461
|
|
462 static spelltab_T spelltab;
|
|
463 static int did_set_spelltab;
|
366
|
464 static char_u spell_ismw[256]; /* flags: is midword char */
|
|
465 #ifdef FEAT_MBYTE
|
|
466 static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */
|
|
467 #endif
|
307
|
468
|
324
|
469 #define CF_WORD 0x01
|
|
470 #define CF_UPPER 0x02
|
307
|
471
|
|
472 static void clear_spell_chartab __ARGS((spelltab_T *sp));
|
|
473 static int set_spell_finish __ARGS((spelltab_T *new_st));
|
358
|
474 static int spell_iswordp __ARGS((char_u *p));
|
339
|
475 static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
|
307
|
476
|
|
477 /*
|
358
|
478 * Return TRUE if "p" points to a word character. Like spell_iswordp() but
|
|
479 * without the special handling of a single quote.
|
307
|
480 * Checking for a word character is done very often, avoid the function call
|
|
481 * overhead.
|
|
482 */
|
|
483 #ifdef FEAT_MBYTE
|
|
484 # define SPELL_ISWORDP(p) ((has_mbyte && MB_BYTE2LEN(*(p)) > 1) \
|
|
485 ? (mb_get_class(p) >= 2) : spelltab.st_isw[*(p)])
|
|
486 #else
|
|
487 # define SPELL_ISWORDP(p) (spelltab.st_isw[*(p)])
|
|
488 #endif
|
|
489
|
323
|
490 /*
|
344
|
491 * For finding suggestions: At each node in the tree these states are tried:
|
330
|
492 */
|
|
493 typedef enum
|
|
494 {
|
344
|
495 STATE_START = 0, /* At start of node check for NUL bytes (goodword
|
|
496 * ends); if badword ends there is a match, otherwise
|
|
497 * try splitting word. */
|
|
498 STATE_SPLITUNDO, /* Undo splitting. */
|
330
|
499 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
|
|
500 STATE_PLAIN, /* Use each byte of the node. */
|
|
501 STATE_DEL, /* Delete a byte from the bad word. */
|
|
502 STATE_INS, /* Insert a byte in the bad word. */
|
|
503 STATE_SWAP, /* Swap two bytes. */
|
344
|
504 STATE_UNSWAP, /* Undo swap two characters. */
|
|
505 STATE_SWAP3, /* Swap two characters over three. */
|
|
506 STATE_UNSWAP3, /* Undo Swap two characters over three. */
|
|
507 STATE_UNROT3L, /* Undo rotate three characters left */
|
|
508 STATE_UNROT3R, /* Undo rotate three characters right */
|
330
|
509 STATE_REP_INI, /* Prepare for using REP items. */
|
|
510 STATE_REP, /* Use matching REP items from the .aff file. */
|
|
511 STATE_REP_UNDO, /* Undo a REP item replacement. */
|
|
512 STATE_FINAL /* End of this node. */
|
|
513 } state_T;
|
|
514
|
|
515 /*
|
346
|
516 * Struct to keep the state at each level in suggest_try_change().
|
323
|
517 */
|
|
518 typedef struct trystate_S
|
|
519 {
|
330
|
520 state_T ts_state; /* state at this level, STATE_ */
|
323
|
521 int ts_score; /* score */
|
344
|
522 idx_T ts_arridx; /* index in tree array, start of node */
|
330
|
523 short ts_curi; /* index in list of child nodes */
|
|
524 char_u ts_fidx; /* index in fword[], case-folded bad word */
|
|
525 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
|
|
526 char_u ts_twordlen; /* valid length of tword[] */
|
|
527 #ifdef FEAT_MBYTE
|
|
528 char_u ts_tcharlen; /* number of bytes in tword character */
|
|
529 char_u ts_tcharidx; /* current byte index in tword character */
|
|
530 char_u ts_isdiff; /* DIFF_ values */
|
|
531 char_u ts_fcharstart; /* index in fword where badword char started */
|
|
532 #endif
|
323
|
533 char_u ts_save_prewordlen; /* saved "prewordlen" */
|
330
|
534 char_u ts_save_splitoff; /* su_splitoff saved here */
|
346
|
535 char_u ts_save_badflags; /* su_badflags saved here */
|
323
|
536 } trystate_T;
|
|
537
|
330
|
538 /* values for ts_isdiff */
|
|
539 #define DIFF_NONE 0 /* no different byte (yet) */
|
|
540 #define DIFF_YES 1 /* different byte found */
|
|
541 #define DIFF_INSERT 2 /* inserting character */
|
|
542
|
339
|
543 /* mode values for find_word */
|
|
544 #define FIND_FOLDWORD 0 /* find word case-folded */
|
|
545 #define FIND_KEEPWORD 1 /* find keep-case word */
|
|
546 #define FIND_PREFIX 2 /* find word after prefix */
|
|
547
|
236
|
548 static slang_T *slang_alloc __ARGS((char_u *lang));
|
|
549 static void slang_free __ARGS((slang_T *lp));
|
310
|
550 static void slang_clear __ARGS((slang_T *lp));
|
339
|
551 static void find_word __ARGS((matchinf_T *mip, int mode));
|
351
|
552 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang));
|
339
|
553 static void find_prefix __ARGS((matchinf_T *mip));
|
|
554 static int fold_more __ARGS((matchinf_T *mip));
|
323
|
555 static int spell_valid_case __ARGS((int origflags, int treeflags));
|
351
|
556 static int no_spell_checking __ARGS((void));
|
307
|
557 static void spell_load_lang __ARGS((char_u *lang));
|
310
|
558 static char_u *spell_enc __ARGS((void));
|
|
559 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
|
323
|
560 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
|
339
|
561 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
|
236
|
562 static int find_region __ARGS((char_u *rp, char_u *region));
|
|
563 static int captype __ARGS((char_u *word, char_u *end));
|
323
|
564 static void spell_reload_one __ARGS((char_u *fname, int added_word));
|
307
|
565 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
|
|
566 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
|
|
567 static void write_spell_chartab __ARGS((FILE *fd));
|
|
568 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
|
358
|
569 static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword));
|
344
|
570 static void spell_find_cleanup __ARGS((suginfo_T *su));
|
324
|
571 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
|
344
|
572 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
|
346
|
573 static void suggest_try_special __ARGS((suginfo_T *su));
|
|
574 static void suggest_try_change __ARGS((suginfo_T *su));
|
323
|
575 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
|
|
576 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
|
344
|
577 static void score_comp_sal __ARGS((suginfo_T *su));
|
|
578 static void score_combine __ARGS((suginfo_T *su));
|
351
|
579 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
|
346
|
580 static void suggest_try_soundalike __ARGS((suginfo_T *su));
|
323
|
581 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
|
330
|
582 static void set_map_str __ARGS((slang_T *lp, char_u *map));
|
323
|
583 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
|
351
|
584 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus));
|
323
|
585 static void add_banned __ARGS((suginfo_T *su, char_u *word));
|
|
586 static int was_banned __ARGS((suginfo_T *su, char_u *word));
|
|
587 static void free_banned __ARGS((suginfo_T *su));
|
324
|
588 static void rescore_suggestions __ARGS((suginfo_T *su));
|
344
|
589 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
|
323
|
590 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
|
344
|
591 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
|
323
|
592 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
|
351
|
593 static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum));
|
|
594 static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
|
323
|
595
|
324
|
596 /*
|
|
597 * Use our own character-case definitions, because the current locale may
|
|
598 * differ from what the .spl file uses.
|
|
599 * These must not be called with negative number!
|
|
600 */
|
|
601 #ifndef FEAT_MBYTE
|
|
602 /* Non-multi-byte implementation. */
|
|
603 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
|
|
604 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
|
|
605 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
|
|
606 #else
|
|
607 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
|
|
608 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
|
|
609 * the "w" library function for characters above 255 if available. */
|
|
610 # ifdef HAVE_TOWLOWER
|
|
611 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
|
|
612 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
|
|
613 # else
|
|
614 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
|
|
615 : (c) < 256 ? spelltab.st_fold[c] : (c))
|
|
616 # endif
|
|
617
|
|
618 # ifdef HAVE_TOWUPPER
|
|
619 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
|
|
620 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
|
|
621 # else
|
|
622 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
|
|
623 : (c) < 256 ? spelltab.st_upper[c] : (c))
|
|
624 # endif
|
|
625
|
|
626 # ifdef HAVE_ISWUPPER
|
|
627 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
|
|
628 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
|
|
629 # else
|
|
630 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
|
|
631 : (c) < 256 ? spelltab.st_isu[c] : (c))
|
|
632 # endif
|
|
633 #endif
|
|
634
|
307
|
635
|
|
636 static char *e_format = N_("E759: Format error in spell file");
|
236
|
637
|
|
638 /*
|
|
639 * Main spell-checking function.
|
300
|
640 * "ptr" points to a character that could be the start of a word.
|
236
|
641 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
|
|
642 * or when it's OK it remains unchanged.
|
|
643 * This must only be called when 'spelllang' is not empty.
|
323
|
644 *
|
|
645 * "sug" is normally NULL. When looking for suggestions it points to
|
|
646 * suginfo_T. It's passed as a void pointer to keep the struct local.
|
|
647 *
|
236
|
648 * Returns the length of the word in bytes, also when it's OK, so that the
|
|
649 * caller can skip over the word.
|
|
650 */
|
|
651 int
|
300
|
652 spell_check(wp, ptr, attrp)
|
236
|
653 win_T *wp; /* current window */
|
|
654 char_u *ptr;
|
|
655 int *attrp;
|
|
656 {
|
|
657 matchinf_T mi; /* Most things are put in "mi" so that it can
|
|
658 be passed to functions quickly. */
|
344
|
659 int nrlen = 0; /* found a number first */
|
236
|
660
|
307
|
661 /* A word never starts at a space or a control character. Return quickly
|
|
662 * then, skipping over the character. */
|
|
663 if (*ptr <= ' ')
|
|
664 return 1;
|
236
|
665
|
344
|
666 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
|
346
|
667 * 0X99FF. But when a word character follows do check spelling to find
|
|
668 * "3GPP". */
|
300
|
669 if (*ptr >= '0' && *ptr <= '9')
|
|
670 {
|
316
|
671 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
|
672 mi.mi_end = skiphex(ptr + 2);
|
300
|
673 else
|
|
674 {
|
344
|
675 mi.mi_end = skipdigits(ptr);
|
|
676 nrlen = mi.mi_end - ptr;
|
300
|
677 }
|
358
|
678 if (!spell_iswordp(mi.mi_end))
|
344
|
679 return (int)(mi.mi_end - ptr);
|
346
|
680
|
|
681 /* Try including the digits in the word. */
|
|
682 mi.mi_fend = ptr + nrlen;
|
|
683 }
|
|
684 else
|
|
685 mi.mi_fend = ptr;
|
|
686
|
|
687 /* Find the normal end of the word (until the next non-word character). */
|
344
|
688 mi.mi_word = ptr;
|
358
|
689 if (spell_iswordp(mi.mi_fend))
|
344
|
690 {
|
|
691 do
|
|
692 {
|
307
|
693 mb_ptr_adv(mi.mi_fend);
|
358
|
694 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend));
|
344
|
695 }
|
|
696
|
|
697 /* We always use the characters up to the next non-word character,
|
|
698 * also for bad words. */
|
|
699 mi.mi_end = mi.mi_fend;
|
|
700
|
|
701 /* Check caps type later. */
|
|
702 mi.mi_capflags = 0;
|
|
703 mi.mi_cend = NULL;
|
|
704
|
|
705 /* Include one non-word character so that we can check for the
|
|
706 * word end. */
|
|
707 if (*mi.mi_fend != NUL)
|
|
708 mb_ptr_adv(mi.mi_fend);
|
|
709
|
|
710 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
|
|
711 MAXWLEN + 1);
|
|
712 mi.mi_fwordlen = STRLEN(mi.mi_fword);
|
|
713
|
|
714 /* The word is bad unless we recognize it. */
|
|
715 mi.mi_result = SP_BAD;
|
|
716
|
|
717 /*
|
|
718 * Loop over the languages specified in 'spelllang'.
|
|
719 * We check them all, because a matching word may be longer than an
|
|
720 * already found matching word.
|
|
721 */
|
|
722 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
|
|
723 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
|
|
724 {
|
|
725 /* Check for a matching word in case-folded words. */
|
|
726 find_word(&mi, FIND_FOLDWORD);
|
|
727
|
|
728 /* Check for a matching word in keep-case words. */
|
|
729 find_word(&mi, FIND_KEEPWORD);
|
|
730
|
|
731 /* Check for matching prefixes. */
|
|
732 find_prefix(&mi);
|
|
733 }
|
|
734
|
|
735 if (mi.mi_result != SP_OK)
|
|
736 {
|
346
|
737 /* If we found a number skip over it. Allows for "42nd". Do flag
|
|
738 * rare and local words, e.g., "3GPP". */
|
344
|
739 if (nrlen > 0)
|
346
|
740 {
|
|
741 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
|
|
742 return nrlen;
|
|
743 }
|
344
|
744
|
|
745 /* When we are at a non-word character there is no error, just
|
|
746 * skip over the character (try looking for a word after it). */
|
346
|
747 else if (!SPELL_ISWORDP(ptr))
|
243
|
748 {
|
344
|
749 #ifdef FEAT_MBYTE
|
|
750 if (has_mbyte)
|
|
751 return mb_ptr2len_check(ptr);
|
|
752 #endif
|
|
753 return 1;
|
300
|
754 }
|
243
|
755
|
344
|
756 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
|
|
757 *attrp = highlight_attr[HLF_SPB];
|
|
758 else if (mi.mi_result == SP_RARE)
|
|
759 *attrp = highlight_attr[HLF_SPR];
|
|
760 else
|
|
761 *attrp = highlight_attr[HLF_SPL];
|
243
|
762 }
|
|
763
|
300
|
764 return (int)(mi.mi_end - ptr);
|
236
|
765 }
|
|
766
|
|
767 /*
|
300
|
768 * Check if the word at "mip->mi_word" is in the tree.
|
339
|
769 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
|
|
770 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
|
|
771 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
|
|
772 * tree.
|
300
|
773 *
|
|
774 * For a match mip->mi_result is updated.
|
243
|
775 */
|
|
776 static void
|
339
|
777 find_word(mip, mode)
|
243
|
778 matchinf_T *mip;
|
339
|
779 int mode;
|
243
|
780 {
|
324
|
781 idx_T arridx = 0;
|
300
|
782 int endlen[MAXWLEN]; /* length at possible word endings */
|
324
|
783 idx_T endidx[MAXWLEN]; /* possible word endings */
|
300
|
784 int endidxcnt = 0;
|
|
785 int len;
|
|
786 int wlen = 0;
|
|
787 int flen;
|
|
788 int c;
|
|
789 char_u *ptr;
|
324
|
790 idx_T lo, hi, m;
|
243
|
791 #ifdef FEAT_MBYTE
|
300
|
792 char_u *s;
|
339
|
793 char_u *p;
|
307
|
794 #endif
|
|
795 int res = SP_BAD;
|
300
|
796 slang_T *slang = mip->mi_lp->lp_slang;
|
|
797 unsigned flags;
|
|
798 char_u *byts;
|
324
|
799 idx_T *idxs;
|
339
|
800 int prefid;
|
|
801
|
|
802 if (mode == FIND_KEEPWORD)
|
236
|
803 {
|
300
|
804 /* Check for word with matching case in keep-case tree. */
|
|
805 ptr = mip->mi_word;
|
|
806 flen = 9999; /* no case folding, always enough bytes */
|
|
807 byts = slang->sl_kbyts;
|
|
808 idxs = slang->sl_kidxs;
|
236
|
809 }
|
|
810 else
|
|
811 {
|
300
|
812 /* Check for case-folded in case-folded tree. */
|
|
813 ptr = mip->mi_fword;
|
|
814 flen = mip->mi_fwordlen; /* available case-folded bytes */
|
|
815 byts = slang->sl_fbyts;
|
|
816 idxs = slang->sl_fidxs;
|
339
|
817
|
|
818 if (mode == FIND_PREFIX)
|
|
819 {
|
|
820 /* Skip over the prefix. */
|
|
821 wlen = mip->mi_prefixlen;
|
|
822 flen -= mip->mi_prefixlen;
|
|
823 }
|
243
|
824 }
|
|
825
|
300
|
826 if (byts == NULL)
|
|
827 return; /* array is empty */
|
236
|
828
|
|
829 /*
|
307
|
830 * Repeat advancing in the tree until:
|
|
831 * - there is a byte that doesn't match,
|
|
832 * - we reach the end of the tree,
|
|
833 * - or we reach the end of the line.
|
236
|
834 */
|
300
|
835 for (;;)
|
236
|
836 {
|
346
|
837 if (flen <= 0 && *mip->mi_fend != NUL)
|
339
|
838 flen = fold_more(mip);
|
300
|
839
|
|
840 len = byts[arridx++];
|
|
841
|
|
842 /* If the first possible byte is a zero the word could end here.
|
|
843 * Remember this index, we first check for the longest word. */
|
|
844 if (byts[arridx] == 0)
|
|
845 {
|
307
|
846 if (endidxcnt == MAXWLEN)
|
|
847 {
|
|
848 /* Must be a corrupted spell file. */
|
|
849 EMSG(_(e_format));
|
|
850 return;
|
|
851 }
|
300
|
852 endlen[endidxcnt] = wlen;
|
|
853 endidx[endidxcnt++] = arridx++;
|
|
854 --len;
|
|
855
|
|
856 /* Skip over the zeros, there can be several flag/region
|
|
857 * combinations. */
|
|
858 while (len > 0 && byts[arridx] == 0)
|
|
859 {
|
|
860 ++arridx;
|
|
861 --len;
|
|
862 }
|
|
863 if (len == 0)
|
|
864 break; /* no children, word must end here */
|
|
865 }
|
|
866
|
|
867 /* Stop looking at end of the line. */
|
|
868 if (ptr[wlen] == NUL)
|
|
869 break;
|
|
870
|
|
871 /* Perform a binary search in the list of accepted bytes. */
|
|
872 c = ptr[wlen];
|
346
|
873 if (c == TAB) /* <Tab> is handled like <Space> */
|
|
874 c = ' ';
|
300
|
875 lo = arridx;
|
|
876 hi = arridx + len - 1;
|
|
877 while (lo < hi)
|
|
878 {
|
|
879 m = (lo + hi) / 2;
|
|
880 if (byts[m] > c)
|
|
881 hi = m - 1;
|
|
882 else if (byts[m] < c)
|
|
883 lo = m + 1;
|
|
884 else
|
|
885 {
|
|
886 lo = hi = m;
|
|
887 break;
|
236
|
888 }
|
|
889 }
|
300
|
890
|
|
891 /* Stop if there is no matching byte. */
|
|
892 if (hi < lo || byts[lo] != c)
|
|
893 break;
|
|
894
|
|
895 /* Continue at the child (if there is one). */
|
|
896 arridx = idxs[lo];
|
|
897 ++wlen;
|
|
898 --flen;
|
346
|
899
|
|
900 /* One space in the good word may stand for several spaces in the
|
|
901 * checked word. */
|
|
902 if (c == ' ')
|
|
903 {
|
|
904 for (;;)
|
|
905 {
|
|
906 if (flen <= 0 && *mip->mi_fend != NUL)
|
|
907 flen = fold_more(mip);
|
|
908 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
|
|
909 break;
|
|
910 ++wlen;
|
|
911 --flen;
|
|
912 }
|
|
913 }
|
236
|
914 }
|
|
915
|
300
|
916 /*
|
|
917 * Verify that one of the possible endings is valid. Try the longest
|
|
918 * first.
|
|
919 */
|
|
920 while (endidxcnt > 0)
|
|
921 {
|
|
922 --endidxcnt;
|
|
923 arridx = endidx[endidxcnt];
|
|
924 wlen = endlen[endidxcnt];
|
236
|
925
|
300
|
926 #ifdef FEAT_MBYTE
|
|
927 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
|
|
928 continue; /* not at first byte of character */
|
|
929 #endif
|
358
|
930 if (spell_iswordp(ptr + wlen))
|
300
|
931 continue; /* next char is a word character */
|
|
932
|
|
933 #ifdef FEAT_MBYTE
|
339
|
934 if (mode != FIND_KEEPWORD && has_mbyte)
|
300
|
935 {
|
|
936 /* Compute byte length in original word, length may change
|
339
|
937 * when folding case. This can be slow, take a shortcut when the
|
|
938 * case-folded word is equal to the keep-case word. */
|
300
|
939 p = mip->mi_word;
|
339
|
940 if (STRNCMP(ptr, p, wlen) != 0)
|
|
941 {
|
|
942 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
|
|
943 mb_ptr_adv(p);
|
|
944 wlen = p - mip->mi_word;
|
|
945 }
|
300
|
946 }
|
|
947 #endif
|
236
|
948
|
339
|
949 /* Check flags and region. For FIND_PREFIX check the condition and
|
|
950 * prefix ID.
|
|
951 * Repeat this if there are more flags/region alternatives until there
|
|
952 * is a match. */
|
|
953 res = SP_BAD;
|
|
954 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
|
|
955 --len, ++arridx)
|
300
|
956 {
|
|
957 flags = idxs[arridx];
|
324
|
958
|
339
|
959 /* For the fold-case tree check that the case of the checked word
|
|
960 * matches with what the word in the tree requires.
|
|
961 * For keep-case tree the case is always right. For prefixes we
|
|
962 * don't bother to check. */
|
|
963 if (mode == FIND_FOLDWORD)
|
300
|
964 {
|
|
965 if (mip->mi_cend != mip->mi_word + wlen)
|
|
966 {
|
323
|
967 /* mi_capflags was set for a different word length, need
|
|
968 * to do it again. */
|
300
|
969 mip->mi_cend = mip->mi_word + wlen;
|
323
|
970 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
|
300
|
971 }
|
|
972
|
346
|
973 if (mip->mi_capflags == WF_KEEPCAP
|
|
974 || !spell_valid_case(mip->mi_capflags, flags))
|
339
|
975 continue;
|
300
|
976 }
|
236
|
977
|
339
|
978 /* When mode is FIND_PREFIX the word must support the prefix:
|
|
979 * check the prefix ID and the condition. Do that for the list at
|
366
|
980 * mip->mi_prefarridx that find_prefix() filled. */
|
339
|
981 if (mode == FIND_PREFIX)
|
300
|
982 {
|
339
|
983 /* The prefix ID is stored two bytes above the flags. */
|
|
984 prefid = (unsigned)flags >> 16;
|
366
|
985 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
|
351
|
986 prefid, mip->mi_fword + mip->mi_prefixlen,
|
366
|
987 slang);
|
|
988 if (c == 0)
|
339
|
989 continue;
|
366
|
990
|
|
991 /* Use the WF_RARE flag for a rare prefix. */
|
|
992 if (c & WF_RAREPFX)
|
|
993 flags |= WF_RARE;
|
339
|
994 }
|
|
995
|
|
996 if (flags & WF_BANNED)
|
|
997 res = SP_BANNED;
|
|
998 else if (flags & WF_REGION)
|
|
999 {
|
|
1000 /* Check region. */
|
|
1001 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
|
300
|
1002 res = SP_OK;
|
339
|
1003 else
|
|
1004 res = SP_LOCAL;
|
300
|
1005 }
|
339
|
1006 else if (flags & WF_RARE)
|
|
1007 res = SP_RARE;
|
307
|
1008 else
|
339
|
1009 res = SP_OK;
|
|
1010
|
|
1011 /* Always use the longest match and the best result. */
|
|
1012 if (mip->mi_result > res)
|
|
1013 {
|
|
1014 mip->mi_result = res;
|
|
1015 mip->mi_end = mip->mi_word + wlen;
|
|
1016 }
|
351
|
1017 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
|
339
|
1018 mip->mi_end = mip->mi_word + wlen;
|
|
1019
|
|
1020 if (res == SP_OK)
|
|
1021 break;
|
300
|
1022 }
|
|
1023
|
307
|
1024 if (res == SP_OK)
|
300
|
1025 break;
|
|
1026 }
|
236
|
1027 }
|
|
1028
|
323
|
1029 /*
|
366
|
1030 * Return non-zero if the prefix indicated by "mip->mi_prefarridx" matches
|
|
1031 * with the prefix ID "prefid" for the word "word".
|
|
1032 * The WF_RAREPFX flag is included in the return value for a rare prefix.
|
351
|
1033 */
|
|
1034 static int
|
|
1035 valid_word_prefix(totprefcnt, arridx, prefid, word, slang)
|
|
1036 int totprefcnt; /* nr of prefix IDs */
|
|
1037 int arridx; /* idx in sl_pidxs[] */
|
|
1038 int prefid;
|
|
1039 char_u *word;
|
|
1040 slang_T *slang;
|
|
1041 {
|
|
1042 int prefcnt;
|
|
1043 int pidx;
|
|
1044 regprog_T *rp;
|
|
1045 regmatch_T regmatch;
|
|
1046
|
|
1047 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
|
|
1048 {
|
|
1049 pidx = slang->sl_pidxs[arridx + prefcnt];
|
|
1050
|
|
1051 /* Check the prefix ID. */
|
|
1052 if (prefid != (pidx & 0xff))
|
|
1053 continue;
|
|
1054
|
|
1055 /* Check the condition, if there is one. The condition index is
|
366
|
1056 * stored in the two bytes above the prefix ID byte. */
|
|
1057 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
|
351
|
1058 if (rp != NULL)
|
|
1059 {
|
|
1060 regmatch.regprog = rp;
|
|
1061 regmatch.rm_ic = FALSE;
|
|
1062 if (!vim_regexec(®match, word, 0))
|
|
1063 continue;
|
|
1064 }
|
|
1065
|
366
|
1066 /* It's a match! Return the WF_RAREPFX flag. */
|
|
1067 return pidx;
|
|
1068 }
|
|
1069 return 0;
|
351
|
1070 }
|
|
1071
|
|
1072 /*
|
339
|
1073 * Check if the word at "mip->mi_word" has a matching prefix.
|
|
1074 * If it does, then check the following word.
|
|
1075 *
|
|
1076 * For a match mip->mi_result is updated.
|
|
1077 */
|
|
1078 static void
|
|
1079 find_prefix(mip)
|
|
1080 matchinf_T *mip;
|
|
1081 {
|
|
1082 idx_T arridx = 0;
|
|
1083 int len;
|
|
1084 int wlen = 0;
|
|
1085 int flen;
|
|
1086 int c;
|
|
1087 char_u *ptr;
|
|
1088 idx_T lo, hi, m;
|
|
1089 slang_T *slang = mip->mi_lp->lp_slang;
|
|
1090 char_u *byts;
|
|
1091 idx_T *idxs;
|
|
1092
|
|
1093 /* We use the case-folded word here, since prefixes are always
|
|
1094 * case-folded. */
|
|
1095 ptr = mip->mi_fword;
|
|
1096 flen = mip->mi_fwordlen; /* available case-folded bytes */
|
|
1097 byts = slang->sl_pbyts;
|
|
1098 idxs = slang->sl_pidxs;
|
|
1099
|
|
1100 if (byts == NULL)
|
|
1101 return; /* array is empty */
|
|
1102
|
|
1103 /*
|
|
1104 * Repeat advancing in the tree until:
|
|
1105 * - there is a byte that doesn't match,
|
|
1106 * - we reach the end of the tree,
|
|
1107 * - or we reach the end of the line.
|
|
1108 */
|
|
1109 for (;;)
|
|
1110 {
|
|
1111 if (flen == 0 && *mip->mi_fend != NUL)
|
|
1112 flen = fold_more(mip);
|
|
1113
|
|
1114 len = byts[arridx++];
|
|
1115
|
|
1116 /* If the first possible byte is a zero the prefix could end here.
|
|
1117 * Check if the following word matches and supports the prefix. */
|
|
1118 if (byts[arridx] == 0)
|
|
1119 {
|
|
1120 /* There can be several prefixes with different conditions. We
|
|
1121 * try them all, since we don't know which one will give the
|
|
1122 * longest match. The word is the same each time, pass the list
|
|
1123 * of possible prefixes to find_word(). */
|
|
1124 mip->mi_prefarridx = arridx;
|
|
1125 mip->mi_prefcnt = len;
|
|
1126 while (len > 0 && byts[arridx] == 0)
|
|
1127 {
|
|
1128 ++arridx;
|
|
1129 --len;
|
|
1130 }
|
|
1131 mip->mi_prefcnt -= len;
|
|
1132
|
|
1133 /* Find the word that comes after the prefix. */
|
|
1134 mip->mi_prefixlen = wlen;
|
|
1135 find_word(mip, FIND_PREFIX);
|
|
1136
|
|
1137
|
|
1138 if (len == 0)
|
|
1139 break; /* no children, word must end here */
|
|
1140 }
|
|
1141
|
|
1142 /* Stop looking at end of the line. */
|
|
1143 if (ptr[wlen] == NUL)
|
|
1144 break;
|
|
1145
|
|
1146 /* Perform a binary search in the list of accepted bytes. */
|
|
1147 c = ptr[wlen];
|
|
1148 lo = arridx;
|
|
1149 hi = arridx + len - 1;
|
|
1150 while (lo < hi)
|
|
1151 {
|
|
1152 m = (lo + hi) / 2;
|
|
1153 if (byts[m] > c)
|
|
1154 hi = m - 1;
|
|
1155 else if (byts[m] < c)
|
|
1156 lo = m + 1;
|
|
1157 else
|
|
1158 {
|
|
1159 lo = hi = m;
|
|
1160 break;
|
|
1161 }
|
|
1162 }
|
|
1163
|
|
1164 /* Stop if there is no matching byte. */
|
|
1165 if (hi < lo || byts[lo] != c)
|
|
1166 break;
|
|
1167
|
|
1168 /* Continue at the child (if there is one). */
|
|
1169 arridx = idxs[lo];
|
|
1170 ++wlen;
|
|
1171 --flen;
|
|
1172 }
|
|
1173 }
|
|
1174
|
|
1175 /*
|
|
1176 * Need to fold at least one more character. Do until next non-word character
|
|
1177 * for efficiency.
|
|
1178 * Return the length of the folded chars in bytes.
|
|
1179 */
|
|
1180 static int
|
|
1181 fold_more(mip)
|
|
1182 matchinf_T *mip;
|
|
1183 {
|
|
1184 int flen;
|
|
1185 char_u *p;
|
|
1186
|
|
1187 p = mip->mi_fend;
|
|
1188 do
|
|
1189 {
|
|
1190 mb_ptr_adv(mip->mi_fend);
|
358
|
1191 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend));
|
339
|
1192
|
|
1193 /* Include the non-word character so that we can check for the
|
|
1194 * word end. */
|
|
1195 if (*mip->mi_fend != NUL)
|
|
1196 mb_ptr_adv(mip->mi_fend);
|
|
1197
|
|
1198 (void)spell_casefold(p, (int)(mip->mi_fend - p),
|
|
1199 mip->mi_fword + mip->mi_fwordlen,
|
|
1200 MAXWLEN - mip->mi_fwordlen);
|
|
1201 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
|
|
1202 mip->mi_fwordlen += flen;
|
|
1203 return flen;
|
|
1204 }
|
|
1205
|
|
1206 /*
|
323
|
1207 * Check case flags for a word. Return TRUE if the word has the requested
|
|
1208 * case.
|
|
1209 */
|
|
1210 static int
|
|
1211 spell_valid_case(origflags, treeflags)
|
|
1212 int origflags; /* flags for the checked word. */
|
|
1213 int treeflags; /* flags for the word in the spell tree */
|
|
1214 {
|
|
1215 return (origflags == WF_ALLCAP
|
|
1216 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
|
|
1217 && ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
|
|
1218 }
|
|
1219
|
351
|
1220 /*
|
|
1221 * Return TRUE if spell checking is not enabled.
|
|
1222 */
|
|
1223 static int
|
|
1224 no_spell_checking()
|
|
1225 {
|
|
1226 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
|
|
1227 {
|
|
1228 EMSG(_("E756: Spell checking is not enabled"));
|
|
1229 return TRUE;
|
|
1230 }
|
|
1231 return FALSE;
|
|
1232 }
|
300
|
1233
|
236
|
1234 /*
|
|
1235 * Move to next spell error.
|
323
|
1236 * "curline" is TRUE for "z?": find word under/after cursor in the same line.
|
236
|
1237 * Return OK if found, FAIL otherwise.
|
|
1238 */
|
|
1239 int
|
323
|
1240 spell_move_to(dir, allwords, curline)
|
236
|
1241 int dir; /* FORWARD or BACKWARD */
|
|
1242 int allwords; /* TRUE for "[s" and "]s" */
|
323
|
1243 int curline;
|
236
|
1244 {
|
249
|
1245 linenr_T lnum;
|
|
1246 pos_T found_pos;
|
236
|
1247 char_u *line;
|
|
1248 char_u *p;
|
346
|
1249 char_u *endp;
|
|
1250 int attr;
|
236
|
1251 int len;
|
249
|
1252 int has_syntax = syntax_present(curbuf);
|
|
1253 int col;
|
|
1254 int can_spell;
|
346
|
1255 char_u *buf = NULL;
|
|
1256 int buflen = 0;
|
|
1257 int skip = 0;
|
236
|
1258
|
351
|
1259 if (no_spell_checking())
|
236
|
1260 return FAIL;
|
|
1261
|
249
|
1262 /*
|
|
1263 * Start looking for bad word at the start of the line, because we can't
|
346
|
1264 * start halfway a word, we don't know where the it starts or ends.
|
249
|
1265 *
|
|
1266 * When searching backwards, we continue in the line to find the last
|
|
1267 * bad word (in the cursor line: before the cursor).
|
346
|
1268 *
|
|
1269 * We concatenate the start of the next line, so that wrapped words work
|
|
1270 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
|
|
1271 * though...
|
249
|
1272 */
|
|
1273 lnum = curwin->w_cursor.lnum;
|
|
1274 found_pos.lnum = 0;
|
236
|
1275
|
|
1276 while (!got_int)
|
|
1277 {
|
249
|
1278 line = ml_get(lnum);
|
346
|
1279
|
|
1280 len = STRLEN(line);
|
|
1281 if (buflen < len + MAXWLEN + 2)
|
|
1282 {
|
|
1283 vim_free(buf);
|
|
1284 buflen = len + MAXWLEN + 2;
|
|
1285 buf = alloc(buflen);
|
|
1286 if (buf == NULL)
|
|
1287 break;
|
|
1288 }
|
|
1289
|
|
1290 /* Copy the line into "buf" and append the start of the next line if
|
|
1291 * possible. */
|
|
1292 STRCPY(buf, line);
|
|
1293 if (lnum < curbuf->b_ml.ml_line_count)
|
|
1294 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
|
|
1295
|
|
1296 p = buf + skip;
|
|
1297 endp = buf + len;
|
|
1298 while (p < endp)
|
236
|
1299 {
|
300
|
1300 /* When searching backward don't search after the cursor. */
|
|
1301 if (dir == BACKWARD
|
|
1302 && lnum == curwin->w_cursor.lnum
|
346
|
1303 && (colnr_T)(p - buf) >= curwin->w_cursor.col)
|
300
|
1304 break;
|
249
|
1305
|
300
|
1306 /* start of word */
|
346
|
1307 attr = 0;
|
300
|
1308 len = spell_check(curwin, p, &attr);
|
249
|
1309
|
300
|
1310 if (attr != 0)
|
|
1311 {
|
|
1312 /* We found a bad word. Check the attribute. */
|
|
1313 if (allwords || attr == highlight_attr[HLF_SPB])
|
236
|
1314 {
|
300
|
1315 /* When searching forward only accept a bad word after
|
|
1316 * the cursor. */
|
|
1317 if (dir == BACKWARD
|
|
1318 || lnum > curwin->w_cursor.lnum
|
|
1319 || (lnum == curwin->w_cursor.lnum
|
346
|
1320 && (colnr_T)(curline ? p - buf + len
|
|
1321 : p - buf)
|
300
|
1322 > curwin->w_cursor.col))
|
236
|
1323 {
|
300
|
1324 if (has_syntax)
|
249
|
1325 {
|
346
|
1326 col = p - buf;
|
300
|
1327 (void)syn_get_id(lnum, (colnr_T)col,
|
|
1328 FALSE, &can_spell);
|
|
1329 }
|
|
1330 else
|
|
1331 can_spell = TRUE;
|
249
|
1332
|
300
|
1333 if (can_spell)
|
|
1334 {
|
|
1335 found_pos.lnum = lnum;
|
346
|
1336 found_pos.col = p - buf;
|
249
|
1337 #ifdef FEAT_VIRTUALEDIT
|
300
|
1338 found_pos.coladd = 0;
|
249
|
1339 #endif
|
300
|
1340 if (dir == FORWARD)
|
|
1341 {
|
|
1342 /* No need to search further. */
|
|
1343 curwin->w_cursor = found_pos;
|
346
|
1344 vim_free(buf);
|
300
|
1345 return OK;
|
249
|
1346 }
|
|
1347 }
|
236
|
1348 }
|
|
1349 }
|
|
1350 }
|
|
1351
|
300
|
1352 /* advance to character after the word */
|
|
1353 p += len;
|
236
|
1354 }
|
|
1355
|
323
|
1356 if (curline)
|
346
|
1357 break; /* only check cursor line */
|
323
|
1358
|
236
|
1359 /* Advance to next line. */
|
249
|
1360 if (dir == BACKWARD)
|
|
1361 {
|
|
1362 if (found_pos.lnum != 0)
|
|
1363 {
|
|
1364 /* Use the last match in the line. */
|
|
1365 curwin->w_cursor = found_pos;
|
346
|
1366 vim_free(buf);
|
249
|
1367 return OK;
|
|
1368 }
|
|
1369 if (lnum == 1)
|
346
|
1370 break;
|
249
|
1371 --lnum;
|
|
1372 }
|
|
1373 else
|
|
1374 {
|
|
1375 if (lnum == curbuf->b_ml.ml_line_count)
|
346
|
1376 break;
|
249
|
1377 ++lnum;
|
346
|
1378
|
|
1379 /* Skip the characters at the start of the next line that were
|
|
1380 * included in a match crossing line boundaries. */
|
|
1381 if (attr == 0)
|
|
1382 skip = p - endp;
|
|
1383 else
|
|
1384 skip = 0;
|
249
|
1385 }
|
236
|
1386
|
|
1387 line_breakcheck();
|
|
1388 }
|
|
1389
|
346
|
1390 vim_free(buf);
|
|
1391 return FAIL;
|
|
1392 }
|
|
1393
|
|
1394 /*
|
|
1395 * For spell checking: concatenate the start of the following line "line" into
|
|
1396 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
|
|
1397 */
|
|
1398 void
|
|
1399 spell_cat_line(buf, line, maxlen)
|
|
1400 char_u *buf;
|
|
1401 char_u *line;
|
|
1402 int maxlen;
|
|
1403 {
|
|
1404 char_u *p;
|
|
1405 int n;
|
|
1406
|
|
1407 p = skipwhite(line);
|
|
1408 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
|
|
1409 p = skipwhite(p + 1);
|
|
1410
|
|
1411 if (*p != NUL)
|
|
1412 {
|
|
1413 *buf = ' ';
|
|
1414 vim_strncpy(buf + 1, line, maxlen - 1);
|
|
1415 n = p - line;
|
|
1416 if (n >= maxlen)
|
|
1417 n = maxlen - 1;
|
|
1418 vim_memset(buf + 1, ' ', n);
|
|
1419 }
|
236
|
1420 }
|
|
1421
|
|
1422 /*
|
307
|
1423 * Load word list(s) for "lang" from Vim spell file(s).
|
310
|
1424 * "lang" must be the language without the region: e.g., "en".
|
236
|
1425 */
|
307
|
1426 static void
|
236
|
1427 spell_load_lang(lang)
|
|
1428 char_u *lang;
|
|
1429 {
|
310
|
1430 char_u fname_enc[85];
|
236
|
1431 int r;
|
307
|
1432 char_u langcp[MAXWLEN + 1];
|
|
1433
|
310
|
1434 /* Copy the language name to pass it to spell_load_cb() as a cookie.
|
307
|
1435 * It's truncated when an error is detected. */
|
|
1436 STRCPY(langcp, lang);
|
|
1437
|
310
|
1438 /*
|
|
1439 * Find the first spell file for "lang" in 'runtimepath' and load it.
|
|
1440 */
|
|
1441 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
|
1442 "spell/%s.%s.spl", lang, spell_enc());
|
|
1443 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
|
307
|
1444
|
|
1445 if (r == FAIL && *langcp != NUL)
|
|
1446 {
|
|
1447 /* Try loading the ASCII version. */
|
310
|
1448 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
272
|
1449 "spell/%s.ascii.spl", lang);
|
310
|
1450 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
|
307
|
1451 }
|
|
1452
|
|
1453 if (r == FAIL)
|
|
1454 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
|
236
|
1455 fname_enc + 6);
|
310
|
1456 else if (*langcp != NUL)
|
|
1457 {
|
|
1458 /* Load all the additions. */
|
|
1459 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
|
|
1460 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &langcp);
|
|
1461 }
|
|
1462 }
|
|
1463
|
|
1464 /*
|
|
1465 * Return the encoding used for spell checking: Use 'encoding', except that we
|
|
1466 * use "latin1" for "latin9". And limit to 60 characters (just in case).
|
|
1467 */
|
|
1468 static char_u *
|
|
1469 spell_enc()
|
|
1470 {
|
|
1471
|
|
1472 #ifdef FEAT_MBYTE
|
|
1473 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
|
|
1474 return p_enc;
|
|
1475 #endif
|
|
1476 return (char_u *)"latin1";
|
236
|
1477 }
|
|
1478
|
|
1479 /*
|
|
1480 * Allocate a new slang_T.
|
|
1481 * Caller must fill "sl_next".
|
|
1482 */
|
|
1483 static slang_T *
|
|
1484 slang_alloc(lang)
|
|
1485 char_u *lang;
|
|
1486 {
|
|
1487 slang_T *lp;
|
|
1488
|
300
|
1489 lp = (slang_T *)alloc_clear(sizeof(slang_T));
|
236
|
1490 if (lp != NULL)
|
|
1491 {
|
|
1492 lp->sl_name = vim_strsave(lang);
|
323
|
1493 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
|
344
|
1494 ga_init2(&lp->sl_sal, sizeof(salitem_T), 10);
|
236
|
1495 }
|
|
1496 return lp;
|
|
1497 }
|
|
1498
|
|
1499 /*
|
|
1500 * Free the contents of an slang_T and the structure itself.
|
|
1501 */
|
|
1502 static void
|
|
1503 slang_free(lp)
|
|
1504 slang_T *lp;
|
|
1505 {
|
|
1506 vim_free(lp->sl_name);
|
310
|
1507 vim_free(lp->sl_fname);
|
|
1508 slang_clear(lp);
|
|
1509 vim_free(lp);
|
|
1510 }
|
|
1511
|
|
1512 /*
|
|
1513 * Clear an slang_T so that the file can be reloaded.
|
|
1514 */
|
|
1515 static void
|
|
1516 slang_clear(lp)
|
|
1517 slang_T *lp;
|
|
1518 {
|
339
|
1519 garray_T *gap;
|
|
1520 fromto_T *ftp;
|
344
|
1521 salitem_T *smp;
|
339
|
1522 int i;
|
323
|
1523
|
300
|
1524 vim_free(lp->sl_fbyts);
|
310
|
1525 lp->sl_fbyts = NULL;
|
300
|
1526 vim_free(lp->sl_kbyts);
|
310
|
1527 lp->sl_kbyts = NULL;
|
339
|
1528 vim_free(lp->sl_pbyts);
|
|
1529 lp->sl_pbyts = NULL;
|
|
1530
|
300
|
1531 vim_free(lp->sl_fidxs);
|
310
|
1532 lp->sl_fidxs = NULL;
|
300
|
1533 vim_free(lp->sl_kidxs);
|
310
|
1534 lp->sl_kidxs = NULL;
|
339
|
1535 vim_free(lp->sl_pidxs);
|
|
1536 lp->sl_pidxs = NULL;
|
323
|
1537
|
344
|
1538 gap = &lp->sl_rep;
|
|
1539 while (gap->ga_len > 0)
|
323
|
1540 {
|
344
|
1541 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
|
|
1542 vim_free(ftp->ft_from);
|
|
1543 vim_free(ftp->ft_to);
|
323
|
1544 }
|
344
|
1545 ga_clear(gap);
|
|
1546
|
|
1547 gap = &lp->sl_sal;
|
|
1548 while (gap->ga_len > 0)
|
|
1549 {
|
|
1550 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
|
|
1551 vim_free(smp->sm_lead);
|
|
1552 vim_free(smp->sm_to);
|
|
1553 }
|
|
1554 ga_clear(gap);
|
323
|
1555
|
339
|
1556 for (i = 0; i < lp->sl_prefixcnt; ++i)
|
|
1557 vim_free(lp->sl_prefprog[i]);
|
|
1558 vim_free(lp->sl_prefprog);
|
|
1559
|
330
|
1560 #ifdef FEAT_MBYTE
|
|
1561 {
|
|
1562 int todo = lp->sl_map_hash.ht_used;
|
|
1563 hashitem_T *hi;
|
|
1564
|
|
1565 for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi)
|
|
1566 if (!HASHITEM_EMPTY(hi))
|
|
1567 {
|
|
1568 --todo;
|
|
1569 vim_free(hi->hi_key);
|
|
1570 }
|
|
1571 }
|
|
1572 hash_clear(&lp->sl_map_hash);
|
|
1573 #endif
|
236
|
1574 }
|
|
1575
|
|
1576 /*
|
307
|
1577 * Load one spell file and store the info into a slang_T.
|
236
|
1578 * Invoked through do_in_runtimepath().
|
|
1579 */
|
|
1580 static void
|
310
|
1581 spell_load_cb(fname, cookie)
|
236
|
1582 char_u *fname;
|
307
|
1583 void *cookie; /* points to the language name */
|
236
|
1584 {
|
323
|
1585 (void)spell_load_file(fname, (char_u *)cookie, NULL, FALSE);
|
310
|
1586 }
|
|
1587
|
|
1588 /*
|
|
1589 * Load one spell file and store the info into a slang_T.
|
|
1590 *
|
|
1591 * This is invoked in two ways:
|
|
1592 * - From spell_load_cb() to load a spell file for the first time. "lang" is
|
|
1593 * the language name, "old_lp" is NULL. Will allocate an slang_T.
|
|
1594 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
|
|
1595 * points to the existing slang_T.
|
323
|
1596 * Returns the slang_T the spell file was loaded into. NULL for error.
|
310
|
1597 */
|
323
|
1598 static slang_T *
|
|
1599 spell_load_file(fname, lang, old_lp, silent)
|
310
|
1600 char_u *fname;
|
|
1601 char_u *lang;
|
|
1602 slang_T *old_lp;
|
323
|
1603 int silent; /* no error if file doesn't exist */
|
310
|
1604 {
|
236
|
1605 FILE *fd;
|
|
1606 char_u buf[MAXWLEN + 1];
|
|
1607 char_u *p;
|
339
|
1608 char_u *bp;
|
|
1609 idx_T *ip;
|
236
|
1610 int i;
|
339
|
1611 int n;
|
300
|
1612 int len;
|
236
|
1613 int round;
|
|
1614 char_u *save_sourcing_name = sourcing_name;
|
|
1615 linenr_T save_sourcing_lnum = sourcing_lnum;
|
255
|
1616 int cnt, ccnt;
|
|
1617 char_u *fol;
|
307
|
1618 slang_T *lp = NULL;
|
323
|
1619 garray_T *gap;
|
|
1620 fromto_T *ftp;
|
344
|
1621 salitem_T *smp;
|
323
|
1622 int rr;
|
|
1623 short *first;
|
324
|
1624 idx_T idx;
|
344
|
1625 int c = 0;
|
236
|
1626
|
310
|
1627 fd = mch_fopen((char *)fname, "r");
|
236
|
1628 if (fd == NULL)
|
|
1629 {
|
323
|
1630 if (!silent)
|
|
1631 EMSG2(_(e_notopen), fname);
|
|
1632 else if (p_verbose > 2)
|
|
1633 {
|
|
1634 verbose_enter();
|
|
1635 smsg((char_u *)e_notopen, fname);
|
|
1636 verbose_leave();
|
|
1637 }
|
255
|
1638 goto endFAIL;
|
236
|
1639 }
|
310
|
1640 if (p_verbose > 2)
|
|
1641 {
|
|
1642 verbose_enter();
|
|
1643 smsg((char_u *)_("Reading spell file \"%s\""), fname);
|
|
1644 verbose_leave();
|
|
1645 }
|
|
1646
|
|
1647 if (old_lp == NULL)
|
|
1648 {
|
|
1649 lp = slang_alloc(lang);
|
|
1650 if (lp == NULL)
|
|
1651 goto endFAIL;
|
|
1652
|
|
1653 /* Remember the file name, used to reload the file when it's updated. */
|
|
1654 lp->sl_fname = vim_strsave(fname);
|
|
1655 if (lp->sl_fname == NULL)
|
|
1656 goto endFAIL;
|
|
1657
|
|
1658 /* Check for .add.spl. */
|
|
1659 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
|
|
1660 }
|
|
1661 else
|
|
1662 lp = old_lp;
|
307
|
1663
|
236
|
1664 /* Set sourcing_name, so that error messages mention the file name. */
|
|
1665 sourcing_name = fname;
|
|
1666 sourcing_lnum = 0;
|
|
1667
|
339
|
1668 /* <HEADER>: <fileID>
|
|
1669 * <regioncnt> <regionname> ...
|
|
1670 * <charflagslen> <charflags>
|
|
1671 * <fcharslen> <fchars>
|
366
|
1672 * <midwordlen> <midword>
|
339
|
1673 * <prefcondcnt> <prefcond> ...
|
|
1674 */
|
236
|
1675 for (i = 0; i < VIMSPELLMAGICL; ++i)
|
|
1676 buf[i] = getc(fd); /* <fileID> */
|
|
1677 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
|
|
1678 {
|
|
1679 EMSG(_("E757: Wrong file ID in spell file"));
|
255
|
1680 goto endFAIL;
|
236
|
1681 }
|
|
1682
|
|
1683 cnt = getc(fd); /* <regioncnt> */
|
255
|
1684 if (cnt < 0)
|
236
|
1685 {
|
|
1686 truncerr:
|
|
1687 EMSG(_("E758: Truncated spell file"));
|
255
|
1688 goto endFAIL;
|
236
|
1689 }
|
|
1690 if (cnt > 8)
|
|
1691 {
|
|
1692 formerr:
|
307
|
1693 EMSG(_(e_format));
|
255
|
1694 goto endFAIL;
|
236
|
1695 }
|
|
1696 for (i = 0; i < cnt; ++i)
|
|
1697 {
|
|
1698 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
|
|
1699 lp->sl_regions[i * 2 + 1] = getc(fd);
|
|
1700 }
|
|
1701 lp->sl_regions[cnt * 2] = NUL;
|
|
1702
|
255
|
1703 cnt = getc(fd); /* <charflagslen> */
|
|
1704 if (cnt > 0)
|
|
1705 {
|
300
|
1706 p = alloc((unsigned)cnt);
|
255
|
1707 if (p == NULL)
|
|
1708 goto endFAIL;
|
|
1709 for (i = 0; i < cnt; ++i)
|
|
1710 p[i] = getc(fd); /* <charflags> */
|
|
1711
|
|
1712 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
|
|
1713 if (ccnt <= 0)
|
300
|
1714 {
|
|
1715 vim_free(p);
|
255
|
1716 goto formerr;
|
300
|
1717 }
|
|
1718 fol = alloc((unsigned)ccnt + 1);
|
255
|
1719 if (fol == NULL)
|
300
|
1720 {
|
|
1721 vim_free(p);
|
255
|
1722 goto endFAIL;
|
300
|
1723 }
|
255
|
1724 for (i = 0; i < ccnt; ++i)
|
|
1725 fol[i] = getc(fd); /* <fchars> */
|
|
1726 fol[i] = NUL;
|
|
1727
|
324
|
1728 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
|
300
|
1729 i = set_spell_charflags(p, cnt, fol);
|
|
1730 vim_free(p);
|
|
1731 vim_free(fol);
|
351
|
1732 #if 0 /* tolerate the differences */
|
300
|
1733 if (i == FAIL)
|
255
|
1734 goto formerr;
|
351
|
1735 #endif
|
255
|
1736 }
|
|
1737 else
|
|
1738 {
|
|
1739 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
|
|
1740 cnt = (getc(fd) << 8) + getc(fd);
|
|
1741 if (cnt != 0)
|
|
1742 goto formerr;
|
|
1743 }
|
|
1744
|
366
|
1745 /* <midwordlen> <midword> */
|
|
1746 cnt = (getc(fd) << 8) + getc(fd);
|
|
1747 if (cnt < 0)
|
|
1748 goto truncerr;
|
|
1749 if (cnt > 0)
|
|
1750 {
|
|
1751 for (i = 0; i < cnt; ++i)
|
|
1752 if (i < MAXWLEN) /* truncate at reasonable length */
|
|
1753 buf[i] = getc(fd);
|
|
1754 if (i < MAXWLEN)
|
|
1755 buf[i] = NUL;
|
|
1756 else
|
|
1757 buf[MAXWLEN] = NUL;
|
|
1758
|
|
1759 /* The midword characters add up to any midword characters from other
|
|
1760 * .spel files. */
|
|
1761 for (p = buf; *p != NUL; )
|
|
1762 #ifdef FEAT_MBYTE
|
|
1763 if (has_mbyte)
|
|
1764 {
|
|
1765 c = mb_ptr2char(p);
|
|
1766 i = mb_ptr2len_check(p);
|
|
1767 if (c < 256)
|
|
1768 spell_ismw[c] = TRUE;
|
|
1769 else if (spell_ismw_mb == NULL)
|
|
1770 /* First multi-byte char in "spell_ismw_mb". */
|
|
1771 spell_ismw_mb = vim_strnsave(p, i);
|
|
1772 else
|
|
1773 {
|
|
1774 /* Append multi-byte chars to "spell_ismw_mb". */
|
|
1775 n = STRLEN(spell_ismw_mb);
|
|
1776 bp = vim_strnsave(spell_ismw_mb, n + i);
|
|
1777 if (bp != NULL)
|
|
1778 {
|
|
1779 vim_free(spell_ismw_mb);
|
|
1780 spell_ismw_mb = bp;
|
|
1781 vim_strncpy(bp + n, p, i);
|
|
1782 }
|
|
1783 }
|
|
1784 p += i;
|
|
1785 }
|
|
1786 else
|
|
1787 #endif
|
|
1788 spell_ismw[*p++] = TRUE;
|
|
1789 }
|
|
1790
|
339
|
1791 /* <prefcondcnt> <prefcond> ... */
|
|
1792 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
|
|
1793 if (cnt > 0)
|
|
1794 {
|
|
1795 lp->sl_prefprog = (regprog_T **)alloc_clear(
|
|
1796 (unsigned)sizeof(regprog_T *) * cnt);
|
|
1797 if (lp->sl_prefprog == NULL)
|
|
1798 goto endFAIL;
|
|
1799 lp->sl_prefixcnt = cnt;
|
|
1800
|
|
1801 for (i = 0; i < cnt; ++i)
|
|
1802 {
|
|
1803 /* <prefcond> : <condlen> <condstr> */
|
|
1804 n = getc(fd); /* <condlen> */
|
|
1805 if (n < 0)
|
|
1806 goto formerr;
|
|
1807 /* When <condlen> is zero we have an empty condition. Otherwise
|
|
1808 * compile the regexp program used to check for the condition. */
|
|
1809 if (n > 0)
|
|
1810 {
|
344
|
1811 buf[0] = '^'; /* always match at one position only */
|
|
1812 p = buf + 1;
|
339
|
1813 while (n-- > 0)
|
|
1814 *p++ = getc(fd); /* <condstr> */
|
|
1815 *p = NUL;
|
|
1816 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
|
|
1817 }
|
|
1818 }
|
|
1819 }
|
|
1820
|
|
1821
|
323
|
1822 /* <SUGGEST> : <repcount> <rep> ...
|
|
1823 * <salflags> <salcount> <sal> ...
|
|
1824 * <maplen> <mapstr> */
|
339
|
1825
|
344
|
1826 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */
|
|
1827 if (cnt < 0)
|
|
1828 goto formerr;
|
|
1829
|
|
1830 gap = &lp->sl_rep;
|
|
1831 if (ga_grow(gap, cnt) == FAIL)
|
|
1832 goto endFAIL;
|
|
1833
|
|
1834 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
|
|
1835 for (; gap->ga_len < cnt; ++gap->ga_len)
|
|
1836 {
|
|
1837 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
|
|
1838 for (rr = 1; rr <= 2; ++rr)
|
|
1839 {
|
|
1840 ccnt = getc(fd);
|
|
1841 if (ccnt < 0)
|
|
1842 {
|
|
1843 if (rr == 2)
|
|
1844 vim_free(ftp->ft_from);
|
|
1845 goto formerr;
|
|
1846 }
|
|
1847 if ((p = alloc(ccnt + 1)) == NULL)
|
|
1848 {
|
|
1849 if (rr == 2)
|
|
1850 vim_free(ftp->ft_from);
|
|
1851 goto endFAIL;
|
|
1852 }
|
|
1853 for (i = 0; i < ccnt; ++i)
|
|
1854 p[i] = getc(fd); /* <repfrom> or <repto> */
|
|
1855 p[i] = NUL;
|
|
1856 if (rr == 1)
|
|
1857 ftp->ft_from = p;
|
|
1858 else
|
|
1859 ftp->ft_to = p;
|
|
1860 }
|
|
1861 }
|
|
1862
|
|
1863 /* Fill the first-index table. */
|
|
1864 first = lp->sl_rep_first;
|
|
1865 for (i = 0; i < 256; ++i)
|
|
1866 first[i] = -1;
|
|
1867 for (i = 0; i < gap->ga_len; ++i)
|
323
|
1868 {
|
344
|
1869 ftp = &((fromto_T *)gap->ga_data)[i];
|
|
1870 if (first[*ftp->ft_from] == -1)
|
|
1871 first[*ftp->ft_from] = i;
|
|
1872 }
|
|
1873
|
|
1874 i = getc(fd); /* <salflags> */
|
|
1875 if (i & SAL_F0LLOWUP)
|
|
1876 lp->sl_followup = TRUE;
|
|
1877 if (i & SAL_COLLAPSE)
|
|
1878 lp->sl_collapse = TRUE;
|
|
1879 if (i & SAL_REM_ACCENTS)
|
|
1880 lp->sl_rem_accents = TRUE;
|
|
1881
|
|
1882 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */
|
|
1883 if (cnt < 0)
|
|
1884 goto formerr;
|
|
1885
|
|
1886 gap = &lp->sl_sal;
|
|
1887 if (ga_grow(gap, cnt) == FAIL)
|
|
1888 goto endFAIL;
|
|
1889
|
|
1890 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
|
|
1891 for (; gap->ga_len < cnt; ++gap->ga_len)
|
|
1892 {
|
|
1893 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
|
|
1894 ccnt = getc(fd); /* <salfromlen> */
|
|
1895 if (ccnt < 0)
|
|
1896 goto formerr;
|
|
1897 if ((p = alloc(ccnt + 2)) == NULL)
|
|
1898 goto endFAIL;
|
|
1899 smp->sm_lead = p;
|
|
1900
|
|
1901 /* Read up to the first special char into sm_lead. */
|
|
1902 for (i = 0; i < ccnt; ++i)
|
323
|
1903 {
|
344
|
1904 c = getc(fd); /* <salfrom> */
|
|
1905 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
|
|
1906 break;
|
|
1907 *p++ = c;
|
|
1908 }
|
|
1909 smp->sm_leadlen = p - smp->sm_lead;
|
|
1910 *p++ = NUL;
|
|
1911
|
|
1912 /* Put optional chars in sm_oneoff, if any. */
|
|
1913 if (c == '(')
|
|
1914 {
|
|
1915 smp->sm_oneoff = p;
|
|
1916 for (++i; i < ccnt; ++i)
|
|
1917 {
|
|
1918 c = getc(fd); /* <salfrom> */
|
|
1919 if (c == ')')
|
|
1920 break;
|
|
1921 *p++ = c;
|
|
1922 }
|
|
1923 *p++ = NUL;
|
|
1924 if (++i < ccnt)
|
|
1925 c = getc(fd);
|
323
|
1926 }
|
|
1927 else
|
344
|
1928 smp->sm_oneoff = NULL;
|
|
1929
|
|
1930 /* Any following chars go in sm_rules. */
|
|
1931 smp->sm_rules = p;
|
|
1932 if (i < ccnt)
|
|
1933 *p++ = c;
|
|
1934 for (++i; i < ccnt; ++i)
|
|
1935 *p++ = getc(fd); /* <salfrom> */
|
|
1936 *p++ = NUL;
|
|
1937
|
|
1938 ccnt = getc(fd); /* <saltolen> */
|
|
1939 if (ccnt < 0)
|
323
|
1940 {
|
344
|
1941 vim_free(smp->sm_lead);
|
|
1942 goto formerr;
|
323
|
1943 }
|
344
|
1944 if ((p = alloc(ccnt + 1)) == NULL)
|
323
|
1945 {
|
344
|
1946 vim_free(smp->sm_lead);
|
|
1947 goto endFAIL;
|
323
|
1948 }
|
344
|
1949 smp->sm_to = p;
|
|
1950
|
|
1951 for (i = 0; i < ccnt; ++i)
|
|
1952 *p++ = getc(fd); /* <salto> */
|
|
1953 *p++ = NUL;
|
|
1954 }
|
|
1955
|
|
1956 /* Fill the first-index table. */
|
|
1957 first = lp->sl_sal_first;
|
|
1958 for (i = 0; i < 256; ++i)
|
|
1959 first[i] = -1;
|
|
1960 for (i = 0; i < gap->ga_len; ++i)
|
|
1961 {
|
|
1962 smp = &((salitem_T *)gap->ga_data)[i];
|
|
1963 if (first[*smp->sm_lead] == -1)
|
|
1964 first[*smp->sm_lead] = i;
|
323
|
1965 }
|
|
1966
|
|
1967 cnt = (getc(fd) << 8) + getc(fd); /* <maplen> */
|
|
1968 if (cnt < 0)
|
|
1969 goto formerr;
|
|
1970 p = alloc(cnt + 1);
|
|
1971 if (p == NULL)
|
|
1972 goto endFAIL;
|
|
1973 for (i = 0; i < cnt; ++i)
|
|
1974 p[i] = getc(fd); /* <mapstr> */
|
|
1975 p[i] = NUL;
|
330
|
1976 set_map_str(lp, p);
|
|
1977 vim_free(p);
|
323
|
1978
|
236
|
1979
|
300
|
1980 /* round 1: <LWORDTREE>
|
339
|
1981 * round 2: <KWORDTREE>
|
|
1982 * round 3: <PREFIXTREE> */
|
|
1983 for (round = 1; round <= 3; ++round)
|
236
|
1984 {
|
300
|
1985 /* The tree size was computed when writing the file, so that we can
|
|
1986 * allocate it as one long block. <nodecount> */
|
|
1987 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
|
|
1988 if (len < 0)
|
|
1989 goto truncerr;
|
|
1990 if (len > 0)
|
236
|
1991 {
|
300
|
1992 /* Allocate the byte array. */
|
339
|
1993 bp = lalloc((long_u)len, TRUE);
|
|
1994 if (bp == NULL)
|
300
|
1995 goto endFAIL;
|
|
1996 if (round == 1)
|
339
|
1997 lp->sl_fbyts = bp;
|
|
1998 else if (round == 2)
|
|
1999 lp->sl_kbyts = bp;
|
300
|
2000 else
|
339
|
2001 lp->sl_pbyts = bp;
|
236
|
2002
|
300
|
2003 /* Allocate the index array. */
|
339
|
2004 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
|
|
2005 if (ip == NULL)
|
300
|
2006 goto endFAIL;
|
|
2007 if (round == 1)
|
339
|
2008 lp->sl_fidxs = ip;
|
|
2009 else if (round == 2)
|
|
2010 lp->sl_kidxs = ip;
|
300
|
2011 else
|
339
|
2012 lp->sl_pidxs = ip;
|
300
|
2013
|
|
2014 /* Read the tree and store it in the array. */
|
339
|
2015 idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt);
|
324
|
2016 if (idx == -1)
|
300
|
2017 goto truncerr;
|
324
|
2018 if (idx < 0)
|
236
|
2019 goto formerr;
|
|
2020 }
|
300
|
2021 }
|
243
|
2022
|
310
|
2023 /* For a new file link it in the list of spell files. */
|
|
2024 if (old_lp == NULL)
|
|
2025 {
|
|
2026 lp->sl_next = first_lang;
|
|
2027 first_lang = lp;
|
|
2028 }
|
307
|
2029
|
255
|
2030 goto endOK;
|
|
2031
|
|
2032 endFAIL:
|
310
|
2033 if (lang != NULL)
|
|
2034 /* truncating the name signals the error to spell_load_lang() */
|
|
2035 *lang = NUL;
|
|
2036 if (lp != NULL && old_lp == NULL)
|
323
|
2037 {
|
307
|
2038 slang_free(lp);
|
323
|
2039 lp = NULL;
|
|
2040 }
|
255
|
2041
|
|
2042 endOK:
|
236
|
2043 if (fd != NULL)
|
|
2044 fclose(fd);
|
|
2045 sourcing_name = save_sourcing_name;
|
|
2046 sourcing_lnum = save_sourcing_lnum;
|
323
|
2047
|
|
2048 return lp;
|
236
|
2049 }
|
|
2050
|
|
2051 /*
|
300
|
2052 * Read one row of siblings from the spell file and store it in the byte array
|
|
2053 * "byts" and index array "idxs". Recursively read the children.
|
|
2054 *
|
346
|
2055 * NOTE: The code here must match put_node().
|
300
|
2056 *
|
|
2057 * Returns the index follosing the siblings.
|
|
2058 * Returns -1 if the file is shorter than expected.
|
|
2059 * Returns -2 if there is a format error.
|
236
|
2060 */
|
324
|
2061 static idx_T
|
339
|
2062 read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
|
300
|
2063 FILE *fd;
|
|
2064 char_u *byts;
|
324
|
2065 idx_T *idxs;
|
300
|
2066 int maxidx; /* size of arrays */
|
324
|
2067 idx_T startidx; /* current index in "byts" and "idxs" */
|
339
|
2068 int prefixtree; /* TRUE for reading PREFIXTREE */
|
|
2069 int maxprefcondnr; /* maximum for <prefcondnr> */
|
236
|
2070 {
|
300
|
2071 int len;
|
|
2072 int i;
|
|
2073 int n;
|
324
|
2074 idx_T idx = startidx;
|
300
|
2075 int c;
|
366
|
2076 int c2;
|
300
|
2077 #define SHARED_MASK 0x8000000
|
236
|
2078
|
300
|
2079 len = getc(fd); /* <siblingcount> */
|
|
2080 if (len <= 0)
|
|
2081 return -1;
|
|
2082
|
|
2083 if (startidx + len >= maxidx)
|
|
2084 return -2;
|
|
2085 byts[idx++] = len;
|
|
2086
|
|
2087 /* Read the byte values, flag/region bytes and shared indexes. */
|
|
2088 for (i = 1; i <= len; ++i)
|
236
|
2089 {
|
300
|
2090 c = getc(fd); /* <byte> */
|
|
2091 if (c < 0)
|
|
2092 return -1;
|
|
2093 if (c <= BY_SPECIAL)
|
|
2094 {
|
366
|
2095 if (c == BY_NOFLAGS && !prefixtree)
|
300
|
2096 {
|
|
2097 /* No flags, all regions. */
|
|
2098 idxs[idx] = 0;
|
|
2099 c = 0;
|
|
2100 }
|
366
|
2101 else if (c == BY_FLAGS || c == BY_NOFLAGS)
|
300
|
2102 {
|
339
|
2103 if (prefixtree)
|
|
2104 {
|
|
2105 /* Read the prefix ID and the condition nr. In idxs[]
|
|
2106 * store the prefix ID in the low byte, the condition
|
|
2107 * index shifted up 8 bits. */
|
366
|
2108 c2 = getc(fd); /* <prefixID> */
|
339
|
2109 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */
|
|
2110 if (n >= maxprefcondnr)
|
|
2111 return -2;
|
366
|
2112 c2 += (n << 8);
|
|
2113 if (c == BY_NOFLAGS)
|
|
2114 c = c2;
|
|
2115 else
|
|
2116 c = c2 | WF_RAREPFX;
|
339
|
2117 }
|
|
2118 else
|
|
2119 {
|
|
2120 /* Read flags and optional region and prefix ID. In
|
|
2121 * idxs[] the flags go in the low byte, region above that
|
|
2122 * and prefix ID above the region. */
|
|
2123 c = getc(fd); /* <flags> */
|
|
2124 if (c & WF_REGION)
|
|
2125 c = (getc(fd) << 8) + c; /* <region> */
|
|
2126 if (c & WF_PFX)
|
|
2127 c = (getc(fd) << 16) + c; /* <prefixID> */
|
|
2128 }
|
|
2129
|
300
|
2130 idxs[idx] = c;
|
|
2131 c = 0;
|
|
2132 }
|
|
2133 else /* c == BY_INDEX */
|
|
2134 {
|
|
2135 /* <nodeidx> */
|
|
2136 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
|
|
2137 if (n < 0 || n >= maxidx)
|
|
2138 return -2;
|
|
2139 idxs[idx] = n + SHARED_MASK;
|
|
2140 c = getc(fd); /* <xbyte> */
|
|
2141 }
|
|
2142 }
|
|
2143 byts[idx++] = c;
|
236
|
2144 }
|
|
2145
|
300
|
2146 /* Recursively read the children for non-shared siblings.
|
|
2147 * Skip the end-of-word ones (zero byte value) and the shared ones (and
|
|
2148 * remove SHARED_MASK) */
|
|
2149 for (i = 1; i <= len; ++i)
|
|
2150 if (byts[startidx + i] != 0)
|
|
2151 {
|
|
2152 if (idxs[startidx + i] & SHARED_MASK)
|
|
2153 idxs[startidx + i] &= ~SHARED_MASK;
|
|
2154 else
|
|
2155 {
|
|
2156 idxs[startidx + i] = idx;
|
339
|
2157 idx = read_tree(fd, byts, idxs, maxidx, idx,
|
|
2158 prefixtree, maxprefcondnr);
|
300
|
2159 if (idx < 0)
|
|
2160 break;
|
|
2161 }
|
|
2162 }
|
236
|
2163
|
300
|
2164 return idx;
|
236
|
2165 }
|
|
2166
|
|
2167 /*
|
|
2168 * Parse 'spelllang' and set buf->b_langp accordingly.
|
351
|
2169 * Returns NULL if it's OK, an error message otherwise.
|
236
|
2170 */
|
|
2171 char_u *
|
|
2172 did_set_spelllang(buf)
|
|
2173 buf_T *buf;
|
|
2174 {
|
|
2175 garray_T ga;
|
351
|
2176 char_u *splp;
|
236
|
2177 char_u *region;
|
355
|
2178 int filename;
|
236
|
2179 int region_mask;
|
|
2180 slang_T *lp;
|
|
2181 int c;
|
351
|
2182 char_u lang[MAXWLEN + 1];
|
323
|
2183 char_u spf_name[MAXPATHL];
|
351
|
2184 int load_spf;
|
|
2185 int len;
|
|
2186 char_u *p;
|
236
|
2187
|
|
2188 ga_init2(&ga, sizeof(langp_T), 2);
|
|
2189
|
351
|
2190 /* Make the name of the .spl file associated with 'spellfile'. */
|
323
|
2191 if (*buf->b_p_spf == NUL)
|
351
|
2192 load_spf = FALSE;
|
323
|
2193 else
|
351
|
2194 {
|
323
|
2195 vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
|
|
2196 buf->b_p_spf);
|
351
|
2197 load_spf = TRUE;
|
|
2198 }
|
|
2199
|
|
2200 /* loop over comma separated language names. */
|
|
2201 for (splp = buf->b_p_spl; *splp != NUL; )
|
|
2202 {
|
|
2203 /* Get one language name. */
|
|
2204 copy_option_part(&splp, lang, MAXWLEN, ",");
|
|
2205
|
240
|
2206 region = NULL;
|
351
|
2207 len = STRLEN(lang);
|
355
|
2208
|
|
2209 /* If the name ends in ".spl" use it as the name of the spell file.
|
|
2210 * If there is a region name let "region" point to it and remove it
|
|
2211 * from the name. */
|
|
2212 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
|
236
|
2213 {
|
355
|
2214 filename = TRUE;
|
|
2215
|
|
2216 /* Check if we loaded this language before. */
|
|
2217 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
2218 if (fullpathcmp(lang, lp->sl_fname, FALSE) == FPC_SAME)
|
|
2219 break;
|
236
|
2220 }
|
355
|
2221 else
|
|
2222 {
|
|
2223 filename = FALSE;
|
|
2224 if (len > 3 && lang[len - 3] == '_')
|
|
2225 {
|
|
2226 region = lang + len - 2;
|
|
2227 len -= 3;
|
|
2228 lang[len] = NUL;
|
|
2229 }
|
|
2230
|
|
2231 /* Check if we loaded this language before. */
|
|
2232 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
2233 if (STRICMP(lang, lp->sl_name) == 0)
|
|
2234 break;
|
|
2235 }
|
236
|
2236
|
351
|
2237 /* If not found try loading the language now. */
|
236
|
2238 if (lp == NULL)
|
355
|
2239 {
|
|
2240 if (filename)
|
|
2241 (void)spell_load_file(lang, lang, NULL, FALSE);
|
|
2242 else
|
|
2243 spell_load_lang(lang);
|
|
2244 }
|
236
|
2245
|
307
|
2246 /*
|
351
|
2247 * Loop over the languages, there can be several files for "lang".
|
307
|
2248 */
|
|
2249 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
355
|
2250 if (filename ? fullpathcmp(lang, lp->sl_fname, FALSE) == FPC_SAME
|
|
2251 : STRICMP(lang, lp->sl_name) == 0)
|
236
|
2252 {
|
316
|
2253 region_mask = REGION_ALL;
|
355
|
2254 if (!filename && region != NULL)
|
236
|
2255 {
|
307
|
2256 /* find region in sl_regions */
|
|
2257 c = find_region(lp->sl_regions, region);
|
|
2258 if (c == REGION_ALL)
|
|
2259 {
|
316
|
2260 if (!lp->sl_add)
|
351
|
2261 smsg((char_u *)
|
|
2262 _("Warning: region %s not supported"),
|
|
2263 region);
|
307
|
2264 }
|
|
2265 else
|
|
2266 region_mask = 1 << c;
|
236
|
2267 }
|
307
|
2268
|
|
2269 if (ga_grow(&ga, 1) == FAIL)
|
|
2270 {
|
|
2271 ga_clear(&ga);
|
|
2272 return e_outofmem;
|
|
2273 }
|
|
2274 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
|
|
2275 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
|
|
2276 ++ga.ga_len;
|
323
|
2277
|
351
|
2278 /* Check if this is the spell file related to 'spellfile'. */
|
|
2279 if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE)
|
|
2280 == FPC_SAME)
|
|
2281 load_spf = FALSE;
|
236
|
2282 }
|
|
2283 }
|
|
2284
|
323
|
2285 /*
|
|
2286 * Make sure the 'spellfile' file is loaded. It may be in 'runtimepath',
|
|
2287 * then it's probably loaded above already. Otherwise load it here.
|
|
2288 */
|
351
|
2289 if (load_spf)
|
|
2290 {
|
|
2291 /* Check if it was loaded already. */
|
323
|
2292 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
2293 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
|
|
2294 break;
|
|
2295 if (lp == NULL)
|
|
2296 {
|
351
|
2297 /* Not loaded, try loading it now. The language name includes the
|
|
2298 * region name, the region is ignored otherwise. */
|
|
2299 vim_strncpy(lang, gettail(buf->b_p_spf), MAXWLEN);
|
|
2300 p = vim_strchr(lang, '.');
|
|
2301 if (p != NULL)
|
|
2302 *p = NUL; /* truncate at ".encoding.add" */
|
|
2303 lp = spell_load_file(spf_name, lang, NULL, TRUE);
|
323
|
2304 }
|
|
2305 if (lp != NULL && ga_grow(&ga, 1) == OK)
|
|
2306 {
|
|
2307 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
|
|
2308 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL;
|
|
2309 ++ga.ga_len;
|
|
2310 }
|
|
2311 }
|
|
2312
|
236
|
2313 /* Add a NULL entry to mark the end of the list. */
|
|
2314 if (ga_grow(&ga, 1) == FAIL)
|
|
2315 {
|
|
2316 ga_clear(&ga);
|
|
2317 return e_outofmem;
|
|
2318 }
|
|
2319 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
|
|
2320 ++ga.ga_len;
|
|
2321
|
|
2322 /* Everything is fine, store the new b_langp value. */
|
|
2323 ga_clear(&buf->b_langp);
|
|
2324 buf->b_langp = ga;
|
|
2325
|
|
2326 return NULL;
|
|
2327 }
|
|
2328
|
|
2329 /*
|
|
2330 * Find the region "region[2]" in "rp" (points to "sl_regions").
|
|
2331 * Each region is simply stored as the two characters of it's name.
|
|
2332 * Returns the index if found, REGION_ALL if not found.
|
|
2333 */
|
|
2334 static int
|
|
2335 find_region(rp, region)
|
|
2336 char_u *rp;
|
|
2337 char_u *region;
|
|
2338 {
|
|
2339 int i;
|
|
2340
|
|
2341 for (i = 0; ; i += 2)
|
|
2342 {
|
|
2343 if (rp[i] == NUL)
|
|
2344 return REGION_ALL;
|
|
2345 if (rp[i] == region[0] && rp[i + 1] == region[1])
|
|
2346 break;
|
|
2347 }
|
|
2348 return i / 2;
|
|
2349 }
|
|
2350
|
|
2351 /*
|
323
|
2352 * Return case type of word:
|
236
|
2353 * w word 0
|
300
|
2354 * Word WF_ONECAP
|
|
2355 * W WORD WF_ALLCAP
|
|
2356 * WoRd wOrd WF_KEEPCAP
|
236
|
2357 */
|
|
2358 static int
|
|
2359 captype(word, end)
|
|
2360 char_u *word;
|
323
|
2361 char_u *end; /* When NULL use up to NUL byte. */
|
236
|
2362 {
|
|
2363 char_u *p;
|
|
2364 int c;
|
|
2365 int firstcap;
|
|
2366 int allcap;
|
|
2367 int past_second = FALSE; /* past second word char */
|
|
2368
|
|
2369 /* find first letter */
|
358
|
2370 for (p = word; !spell_iswordp(p); mb_ptr_adv(p))
|
323
|
2371 if (end == NULL ? *p == NUL : p >= end)
|
236
|
2372 return 0; /* only non-word characters, illegal word */
|
|
2373 #ifdef FEAT_MBYTE
|
310
|
2374 if (has_mbyte)
|
|
2375 c = mb_ptr2char_adv(&p);
|
|
2376 else
|
236
|
2377 #endif
|
310
|
2378 c = *p++;
|
324
|
2379 firstcap = allcap = SPELL_ISUPPER(c);
|
236
|
2380
|
|
2381 /*
|
|
2382 * Need to check all letters to find a word with mixed upper/lower.
|
|
2383 * But a word with an upper char only at start is a ONECAP.
|
|
2384 */
|
323
|
2385 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
|
358
|
2386 if (spell_iswordp(p))
|
236
|
2387 {
|
|
2388 #ifdef FEAT_MBYTE
|
|
2389 c = mb_ptr2char(p);
|
|
2390 #else
|
|
2391 c = *p;
|
|
2392 #endif
|
324
|
2393 if (!SPELL_ISUPPER(c))
|
236
|
2394 {
|
|
2395 /* UUl -> KEEPCAP */
|
|
2396 if (past_second && allcap)
|
300
|
2397 return WF_KEEPCAP;
|
236
|
2398 allcap = FALSE;
|
|
2399 }
|
|
2400 else if (!allcap)
|
|
2401 /* UlU -> KEEPCAP */
|
300
|
2402 return WF_KEEPCAP;
|
236
|
2403 past_second = TRUE;
|
|
2404 }
|
|
2405
|
|
2406 if (allcap)
|
300
|
2407 return WF_ALLCAP;
|
236
|
2408 if (firstcap)
|
300
|
2409 return WF_ONECAP;
|
236
|
2410 return 0;
|
|
2411 }
|
|
2412
|
355
|
2413 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
|
|
2414 /*
|
|
2415 * Free all languages.
|
|
2416 */
|
|
2417 void
|
|
2418 spell_free_all()
|
|
2419 {
|
|
2420 slang_T *lp;
|
|
2421 buf_T *buf;
|
|
2422
|
|
2423 /* Go through all buffers and handle 'spelllang'. */
|
|
2424 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
|
|
2425 ga_clear(&buf->b_langp);
|
|
2426
|
|
2427 while (first_lang != NULL)
|
|
2428 {
|
|
2429 lp = first_lang;
|
|
2430 first_lang = lp->sl_next;
|
|
2431 slang_free(lp);
|
|
2432 }
|
366
|
2433
|
|
2434 init_spell_chartab();
|
355
|
2435 }
|
|
2436 # endif
|
|
2437
|
236
|
2438 # if defined(FEAT_MBYTE) || defined(PROTO)
|
|
2439 /*
|
|
2440 * Clear all spelling tables and reload them.
|
307
|
2441 * Used after 'encoding' is set and when ":mkspell" was used.
|
236
|
2442 */
|
|
2443 void
|
|
2444 spell_reload()
|
|
2445 {
|
|
2446 buf_T *buf;
|
316
|
2447 win_T *wp;
|
236
|
2448
|
358
|
2449 /* Initialize the table for spell_iswordp(). */
|
236
|
2450 init_spell_chartab();
|
|
2451
|
|
2452 /* Unload all allocated memory. */
|
355
|
2453 spell_free_all();
|
236
|
2454
|
|
2455 /* Go through all buffers and handle 'spelllang'. */
|
|
2456 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
|
|
2457 {
|
316
|
2458 /* Only load the wordlists when 'spelllang' is set and there is a
|
|
2459 * window for this buffer in which 'spell' is set. */
|
236
|
2460 if (*buf->b_p_spl != NUL)
|
316
|
2461 {
|
|
2462 FOR_ALL_WINDOWS(wp)
|
|
2463 if (wp->w_buffer == buf && wp->w_p_spell)
|
|
2464 {
|
|
2465 (void)did_set_spelllang(buf);
|
|
2466 # ifdef FEAT_WINDOWS
|
|
2467 break;
|
|
2468 # endif
|
|
2469 }
|
|
2470 }
|
236
|
2471 }
|
|
2472 }
|
|
2473 # endif
|
|
2474
|
310
|
2475 /*
|
|
2476 * Reload the spell file "fname" if it's loaded.
|
|
2477 */
|
|
2478 static void
|
323
|
2479 spell_reload_one(fname, added_word)
|
310
|
2480 char_u *fname;
|
323
|
2481 int added_word; /* invoked through "zg" */
|
310
|
2482 {
|
|
2483 slang_T *lp;
|
323
|
2484 int didit = FALSE;
|
310
|
2485
|
|
2486 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
2487 if (fullpathcmp(fname, lp->sl_fname, FALSE) == FPC_SAME)
|
|
2488 {
|
|
2489 slang_clear(lp);
|
323
|
2490 (void)spell_load_file(fname, NULL, lp, FALSE);
|
310
|
2491 redraw_all_later(NOT_VALID);
|
323
|
2492 didit = TRUE;
|
310
|
2493 }
|
323
|
2494
|
|
2495 /* When "zg" was used and the file wasn't loaded yet, should redo
|
|
2496 * 'spelllang' to get it loaded. */
|
|
2497 if (added_word && !didit)
|
|
2498 did_set_spelllang(curbuf);
|
310
|
2499 }
|
|
2500
|
|
2501
|
236
|
2502 /*
|
|
2503 * Functions for ":mkspell".
|
|
2504 */
|
|
2505
|
300
|
2506 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
|
236
|
2507 and .dic file. */
|
|
2508 /*
|
|
2509 * Main structure to store the contents of a ".aff" file.
|
|
2510 */
|
|
2511 typedef struct afffile_S
|
|
2512 {
|
|
2513 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
|
310
|
2514 int af_rar; /* RAR ID for rare word */
|
|
2515 int af_kep; /* KEP ID for keep-case word */
|
346
|
2516 int af_bad; /* BAD ID for banned word */
|
339
|
2517 int af_pfxpostpone; /* postpone prefixes without chop string */
|
236
|
2518 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
|
|
2519 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
|
|
2520 } afffile_T;
|
|
2521
|
|
2522 typedef struct affentry_S affentry_T;
|
|
2523 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */
|
|
2524 struct affentry_S
|
|
2525 {
|
|
2526 affentry_T *ae_next; /* next affix with same name/number */
|
|
2527 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
|
|
2528 char_u *ae_add; /* text to add to basic word (can be NULL) */
|
|
2529 char_u *ae_cond; /* condition (NULL for ".") */
|
|
2530 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
|
366
|
2531 int ae_rare; /* rare affix */
|
300
|
2532 };
|
|
2533
|
|
2534 /* Affix header from ".aff" file. Used for af_pref and af_suff. */
|
|
2535 typedef struct affheader_S
|
|
2536 {
|
|
2537 char_u ah_key[2]; /* key for hashtable == name of affix entry */
|
339
|
2538 int ah_newID; /* prefix ID after renumbering */
|
300
|
2539 int ah_combine; /* suffix may combine with prefix */
|
|
2540 affentry_T *ah_first; /* first affix entry */
|
|
2541 } affheader_T;
|
|
2542
|
|
2543 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
|
|
2544
|
|
2545 /*
|
|
2546 * Structure that is used to store the items in the word tree. This avoids
|
|
2547 * the need to keep track of each allocated thing, it's freed all at once
|
|
2548 * after ":mkspell" is done.
|
|
2549 */
|
|
2550 #define SBLOCKSIZE 16000 /* size of sb_data */
|
|
2551 typedef struct sblock_S sblock_T;
|
|
2552 struct sblock_S
|
|
2553 {
|
|
2554 sblock_T *sb_next; /* next block in list */
|
|
2555 int sb_used; /* nr of bytes already in use */
|
|
2556 char_u sb_data[1]; /* data, actually longer */
|
236
|
2557 };
|
|
2558
|
|
2559 /*
|
300
|
2560 * A node in the tree.
|
236
|
2561 */
|
300
|
2562 typedef struct wordnode_S wordnode_T;
|
|
2563 struct wordnode_S
|
236
|
2564 {
|
346
|
2565 union /* shared to save space */
|
|
2566 {
|
|
2567 char_u hashkey[6]; /* room for the hash key */
|
|
2568 int index; /* index in written nodes (valid after first
|
|
2569 round) */
|
|
2570 } wn_u1;
|
|
2571 union /* shared to save space */
|
|
2572 {
|
|
2573 wordnode_T *next; /* next node with same hash key */
|
|
2574 wordnode_T *wnode; /* parent node that will write this node */
|
|
2575 } wn_u2;
|
300
|
2576 wordnode_T *wn_child; /* child (next byte in word) */
|
|
2577 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
|
|
2578 always sorted) */
|
|
2579 char_u wn_byte; /* Byte for this node. NUL for word end */
|
|
2580 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
|
339
|
2581 short wn_region; /* when wn_byte is NUL: region mask; for
|
|
2582 PREFIXTREE it's the prefcondnr */
|
|
2583 char_u wn_prefixID; /* supported/required prefix ID or 0 */
|
236
|
2584 };
|
|
2585
|
300
|
2586 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
|
236
|
2587
|
300
|
2588 /*
|
|
2589 * Info used while reading the spell files.
|
|
2590 */
|
|
2591 typedef struct spellinfo_S
|
249
|
2592 {
|
300
|
2593 wordnode_T *si_foldroot; /* tree with case-folded words */
|
334
|
2594 long si_foldwcount; /* nr of words in si_foldroot */
|
300
|
2595 wordnode_T *si_keeproot; /* tree with keep-case words */
|
334
|
2596 long si_keepwcount; /* nr of words in si_keeproot */
|
339
|
2597 wordnode_T *si_prefroot; /* tree with postponed prefixes */
|
300
|
2598 sblock_T *si_blocks; /* memory blocks used */
|
|
2599 int si_ascii; /* handling only ASCII words */
|
310
|
2600 int si_add; /* addition file */
|
351
|
2601 int si_clear_chartab; /* when TRUE clear char tables */
|
300
|
2602 int si_region; /* region mask */
|
|
2603 vimconv_T si_conv; /* for conversion to 'encoding' */
|
302
|
2604 int si_memtot; /* runtime memory used */
|
310
|
2605 int si_verbose; /* verbose messages */
|
316
|
2606 int si_region_count; /* number of regions supported (1 when there
|
|
2607 are no regions) */
|
|
2608 char_u si_region_name[16]; /* region names (if count > 1) */
|
323
|
2609
|
|
2610 garray_T si_rep; /* list of fromto_T entries from REP lines */
|
|
2611 garray_T si_sal; /* list of fromto_T entries from SAL lines */
|
|
2612 int si_followup; /* soundsalike: ? */
|
|
2613 int si_collapse; /* soundsalike: ? */
|
|
2614 int si_rem_accents; /* soundsalike: remove accents */
|
|
2615 garray_T si_map; /* MAP info concatenated */
|
366
|
2616 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */
|
339
|
2617 garray_T si_prefcond; /* table with conditions for postponed
|
|
2618 * prefixes, each stored as a string */
|
|
2619 int si_newID; /* current value for ah_newID */
|
300
|
2620 } spellinfo_T;
|
249
|
2621
|
300
|
2622 static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
|
339
|
2623 static int str_equal __ARGS((char_u *s1, char_u *s2));
|
323
|
2624 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
|
|
2625 static int sal_to_bool __ARGS((char_u *s));
|
240
|
2626 static int has_non_ascii __ARGS((char_u *s));
|
300
|
2627 static void spell_free_aff __ARGS((afffile_T *aff));
|
|
2628 static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
|
339
|
2629 static char_u *get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, sblock_T **blp));
|
|
2630 static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist));
|
300
|
2631 static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
|
|
2632 static void *getroom __ARGS((sblock_T **blp, size_t len));
|
|
2633 static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
|
|
2634 static void free_blocks __ARGS((sblock_T *bl));
|
|
2635 static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
|
339
|
2636 static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist));
|
|
2637 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp));
|
310
|
2638 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
|
300
|
2639 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
|
|
2640 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
|
316
|
2641 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin));
|
346
|
2642 static void clear_node __ARGS((wordnode_T *node));
|
|
2643 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
|
323
|
2644 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
|
310
|
2645 static void init_spellfile __ARGS((void));
|
236
|
2646
|
|
2647 /*
|
323
|
2648 * Read the affix file "fname".
|
316
|
2649 * Returns an afffile_T, NULL for complete failure.
|
236
|
2650 */
|
|
2651 static afffile_T *
|
300
|
2652 spell_read_aff(fname, spin)
|
236
|
2653 char_u *fname;
|
300
|
2654 spellinfo_T *spin;
|
236
|
2655 {
|
|
2656 FILE *fd;
|
|
2657 afffile_T *aff;
|
|
2658 char_u rline[MAXLINELEN];
|
|
2659 char_u *line;
|
|
2660 char_u *pc = NULL;
|
334
|
2661 #define MAXITEMCNT 7
|
|
2662 char_u *(items[MAXITEMCNT]);
|
236
|
2663 int itemcnt;
|
|
2664 char_u *p;
|
|
2665 int lnum = 0;
|
|
2666 affheader_T *cur_aff = NULL;
|
|
2667 int aff_todo = 0;
|
|
2668 hashtab_T *tp;
|
255
|
2669 char_u *low = NULL;
|
|
2670 char_u *fol = NULL;
|
|
2671 char_u *upp = NULL;
|
307
|
2672 static char *e_affname = N_("Affix name too long in %s line %d: %s");
|
323
|
2673 int do_rep;
|
|
2674 int do_sal;
|
|
2675 int do_map;
|
366
|
2676 int do_midword;
|
323
|
2677 int found_map = FALSE;
|
324
|
2678 hashitem_T *hi;
|
236
|
2679
|
300
|
2680 /*
|
|
2681 * Open the file.
|
|
2682 */
|
310
|
2683 fd = mch_fopen((char *)fname, "r");
|
236
|
2684 if (fd == NULL)
|
|
2685 {
|
|
2686 EMSG2(_(e_notopen), fname);
|
|
2687 return NULL;
|
|
2688 }
|
|
2689
|
310
|
2690 if (spin->si_verbose || p_verbose > 2)
|
|
2691 {
|
|
2692 if (!spin->si_verbose)
|
|
2693 verbose_enter();
|
366
|
2694 smsg((char_u *)_("Reading affix file %s ..."), fname);
|
310
|
2695 out_flush();
|
|
2696 if (!spin->si_verbose)
|
|
2697 verbose_leave();
|
|
2698 }
|
236
|
2699
|
323
|
2700 /* Only do REP lines when not done in another .aff file already. */
|
|
2701 do_rep = spin->si_rep.ga_len == 0;
|
|
2702
|
|
2703 /* Only do SAL lines when not done in another .aff file already. */
|
|
2704 do_sal = spin->si_sal.ga_len == 0;
|
|
2705
|
|
2706 /* Only do MAP lines when not done in another .aff file already. */
|
|
2707 do_map = spin->si_map.ga_len == 0;
|
|
2708
|
366
|
2709 /* Only do MIDWORD line when not done in another .aff file already */
|
|
2710 do_midword = spin->si_midword == NULL;
|
|
2711
|
300
|
2712 /*
|
|
2713 * Allocate and init the afffile_T structure.
|
|
2714 */
|
|
2715 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
|
236
|
2716 if (aff == NULL)
|
|
2717 return NULL;
|
|
2718 hash_init(&aff->af_pref);
|
|
2719 hash_init(&aff->af_suff);
|
|
2720
|
|
2721 /*
|
|
2722 * Read all the lines in the file one by one.
|
|
2723 */
|
255
|
2724 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
|
236
|
2725 {
|
255
|
2726 line_breakcheck();
|
236
|
2727 ++lnum;
|
|
2728
|
|
2729 /* Skip comment lines. */
|
|
2730 if (*rline == '#')
|
|
2731 continue;
|
|
2732
|
|
2733 /* Convert from "SET" to 'encoding' when needed. */
|
|
2734 vim_free(pc);
|
310
|
2735 #ifdef FEAT_MBYTE
|
300
|
2736 if (spin->si_conv.vc_type != CONV_NONE)
|
236
|
2737 {
|
300
|
2738 pc = string_convert(&spin->si_conv, rline, NULL);
|
255
|
2739 if (pc == NULL)
|
|
2740 {
|
|
2741 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
2742 fname, lnum, rline);
|
|
2743 continue;
|
|
2744 }
|
236
|
2745 line = pc;
|
|
2746 }
|
|
2747 else
|
310
|
2748 #endif
|
236
|
2749 {
|
|
2750 pc = NULL;
|
|
2751 line = rline;
|
|
2752 }
|
|
2753
|
|
2754 /* Split the line up in white separated items. Put a NUL after each
|
|
2755 * item. */
|
|
2756 itemcnt = 0;
|
|
2757 for (p = line; ; )
|
|
2758 {
|
|
2759 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
|
|
2760 ++p;
|
|
2761 if (*p == NUL)
|
|
2762 break;
|
334
|
2763 if (itemcnt == MAXITEMCNT) /* too many items */
|
300
|
2764 break;
|
236
|
2765 items[itemcnt++] = p;
|
300
|
2766 while (*p > ' ') /* skip until white space or CR/NL */
|
236
|
2767 ++p;
|
|
2768 if (*p == NUL)
|
|
2769 break;
|
|
2770 *p++ = NUL;
|
|
2771 }
|
|
2772
|
|
2773 /* Handle non-empty lines. */
|
|
2774 if (itemcnt > 0)
|
|
2775 {
|
|
2776 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
|
|
2777 && aff->af_enc == NULL)
|
|
2778 {
|
310
|
2779 #ifdef FEAT_MBYTE
|
300
|
2780 /* Setup for conversion from "ENC" to 'encoding'. */
|
|
2781 aff->af_enc = enc_canonize(items[1]);
|
|
2782 if (aff->af_enc != NULL && !spin->si_ascii
|
|
2783 && convert_setup(&spin->si_conv, aff->af_enc,
|
|
2784 p_enc) == FAIL)
|
|
2785 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
|
|
2786 fname, aff->af_enc, p_enc);
|
310
|
2787 #else
|
|
2788 smsg((char_u *)_("Conversion in %s not supported"), fname);
|
|
2789 #endif
|
236
|
2790 }
|
366
|
2791 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2)
|
|
2792 {
|
|
2793 if (do_midword)
|
|
2794 spin->si_midword = vim_strsave(items[1]);
|
|
2795 }
|
302
|
2796 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
|
|
2797 {
|
323
|
2798 /* ignored, we always split */
|
302
|
2799 }
|
323
|
2800 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
|
300
|
2801 {
|
323
|
2802 /* ignored, we look in the tree for what chars may appear */
|
300
|
2803 }
|
307
|
2804 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
|
|
2805 && aff->af_rar == 0)
|
|
2806 {
|
|
2807 aff->af_rar = items[1][0];
|
|
2808 if (items[1][1] != NUL)
|
|
2809 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
|
2810 }
|
310
|
2811 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
|
|
2812 && aff->af_kep == 0)
|
307
|
2813 {
|
310
|
2814 aff->af_kep = items[1][0];
|
307
|
2815 if (items[1][1] != NUL)
|
|
2816 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
|
2817 }
|
346
|
2818 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
|
|
2819 && aff->af_bad == 0)
|
|
2820 {
|
|
2821 aff->af_bad = items[1][0];
|
|
2822 if (items[1][1] != NUL)
|
|
2823 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
|
2824 }
|
339
|
2825 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
|
|
2826 {
|
|
2827 aff->af_pfxpostpone = TRUE;
|
|
2828 }
|
236
|
2829 else if ((STRCMP(items[0], "PFX") == 0
|
|
2830 || STRCMP(items[0], "SFX") == 0)
|
|
2831 && aff_todo == 0
|
334
|
2832 && itemcnt >= 4)
|
236
|
2833 {
|
334
|
2834 /* Myspell allows extra text after the item, but that might
|
|
2835 * mean mistakes go unnoticed. Require a comment-starter. */
|
|
2836 if (itemcnt > 4 && *items[4] != '#')
|
|
2837 smsg((char_u *)_("Trailing text in %s line %d: %s"),
|
|
2838 fname, lnum, items[4]);
|
|
2839
|
236
|
2840 /* New affix letter. */
|
300
|
2841 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
|
|
2842 sizeof(affheader_T));
|
236
|
2843 if (cur_aff == NULL)
|
|
2844 break;
|
339
|
2845 cur_aff->ah_key[0] = *items[1]; /* TODO: multi-byte? */
|
236
|
2846 cur_aff->ah_key[1] = NUL;
|
|
2847 if (items[1][1] != NUL)
|
307
|
2848 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
236
|
2849 if (*items[2] == 'Y')
|
|
2850 cur_aff->ah_combine = TRUE;
|
300
|
2851 else if (*items[2] != 'N')
|
236
|
2852 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
|
|
2853 fname, lnum, items[2]);
|
339
|
2854
|
236
|
2855 if (*items[0] == 'P')
|
339
|
2856 {
|
236
|
2857 tp = &aff->af_pref;
|
339
|
2858 /* Use a new number in the .spl file later, to be able to
|
|
2859 * handle multiple .aff files. */
|
|
2860 if (aff->af_pfxpostpone)
|
344
|
2861 cur_aff->ah_newID = ++spin->si_newID;
|
339
|
2862 }
|
236
|
2863 else
|
|
2864 tp = &aff->af_suff;
|
300
|
2865 aff_todo = atoi((char *)items[3]);
|
324
|
2866 hi = hash_find(tp, cur_aff->ah_key);
|
|
2867 if (!HASHITEM_EMPTY(hi))
|
300
|
2868 {
|
236
|
2869 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
|
|
2870 fname, lnum, items[1]);
|
300
|
2871 aff_todo = 0;
|
|
2872 }
|
236
|
2873 else
|
|
2874 hash_add(tp, cur_aff->ah_key);
|
|
2875 }
|
|
2876 else if ((STRCMP(items[0], "PFX") == 0
|
|
2877 || STRCMP(items[0], "SFX") == 0)
|
|
2878 && aff_todo > 0
|
|
2879 && STRCMP(cur_aff->ah_key, items[1]) == 0
|
334
|
2880 && itemcnt >= 5)
|
236
|
2881 {
|
|
2882 affentry_T *aff_entry;
|
366
|
2883 int rare = FALSE;
|
|
2884 int lasti = 5;
|
|
2885
|
|
2886 /* Check for "rare" after the other info. */
|
|
2887 if (itemcnt > 5 && STRICMP(items[5], "rare") == 0)
|
|
2888 {
|
|
2889 rare = TRUE;
|
|
2890 lasti = 6;
|
|
2891 }
|
236
|
2892
|
334
|
2893 /* Myspell allows extra text after the item, but that might
|
|
2894 * mean mistakes go unnoticed. Require a comment-starter. */
|
366
|
2895 if (itemcnt > lasti && *items[lasti] != '#')
|
334
|
2896 smsg((char_u *)_("Trailing text in %s line %d: %s"),
|
366
|
2897 fname, lnum, items[lasti]);
|
334
|
2898
|
236
|
2899 /* New item for an affix letter. */
|
|
2900 --aff_todo;
|
300
|
2901 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
|
|
2902 sizeof(affentry_T));
|
236
|
2903 if (aff_entry == NULL)
|
|
2904 break;
|
366
|
2905 aff_entry->ae_rare = rare;
|
240
|
2906
|
236
|
2907 if (STRCMP(items[2], "0") != 0)
|
300
|
2908 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
|
|
2909 items[2]);
|
236
|
2910 if (STRCMP(items[3], "0") != 0)
|
300
|
2911 aff_entry->ae_add = getroom_save(&spin->si_blocks,
|
|
2912 items[3]);
|
236
|
2913
|
300
|
2914 /* Don't use an affix entry with non-ASCII characters when
|
|
2915 * "spin->si_ascii" is TRUE. */
|
|
2916 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
|
240
|
2917 || has_non_ascii(aff_entry->ae_add)))
|
|
2918 {
|
|
2919 aff_entry->ae_next = cur_aff->ah_first;
|
|
2920 cur_aff->ah_first = aff_entry;
|
300
|
2921
|
|
2922 if (STRCMP(items[4], ".") != 0)
|
|
2923 {
|
|
2924 char_u buf[MAXLINELEN];
|
|
2925
|
|
2926 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
|
|
2927 items[4]);
|
|
2928 if (*items[0] == 'P')
|
|
2929 sprintf((char *)buf, "^%s", items[4]);
|
|
2930 else
|
|
2931 sprintf((char *)buf, "%s$", items[4]);
|
|
2932 aff_entry->ae_prog = vim_regcomp(buf,
|
|
2933 RE_MAGIC + RE_STRING);
|
|
2934 }
|
339
|
2935
|
|
2936 /* For postponed prefixes we need an entry in si_prefcond
|
|
2937 * for the condition. Use an existing one if possible. */
|
|
2938 if (*items[0] == 'P' && aff->af_pfxpostpone
|
|
2939 && aff_entry->ae_chop == NULL)
|
|
2940 {
|
|
2941 int idx;
|
|
2942 char_u **pp;
|
|
2943
|
|
2944 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
|
|
2945 --idx)
|
|
2946 {
|
|
2947 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
|
|
2948 if (str_equal(p, aff_entry->ae_cond))
|
|
2949 break;
|
|
2950 }
|
|
2951 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
|
|
2952 {
|
|
2953 /* Not found, add a new condition. */
|
|
2954 idx = spin->si_prefcond.ga_len++;
|
|
2955 pp = ((char_u **)spin->si_prefcond.ga_data) + idx;
|
|
2956 if (aff_entry->ae_cond == NULL)
|
|
2957 *pp = NULL;
|
|
2958 else
|
|
2959 *pp = getroom_save(&spin->si_blocks,
|
|
2960 aff_entry->ae_cond);
|
|
2961 }
|
|
2962
|
|
2963 /* Add the prefix to the prefix tree. */
|
|
2964 if (aff_entry->ae_add == NULL)
|
|
2965 p = (char_u *)"";
|
|
2966 else
|
|
2967 p = aff_entry->ae_add;
|
366
|
2968 tree_add_word(p, spin->si_prefroot, rare ? -2 : -1,
|
|
2969 idx, cur_aff->ah_newID, &spin->si_blocks);
|
339
|
2970 }
|
240
|
2971 }
|
236
|
2972 }
|
255
|
2973 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
|
|
2974 {
|
|
2975 if (fol != NULL)
|
|
2976 smsg((char_u *)_("Duplicate FOL in %s line %d"),
|
|
2977 fname, lnum);
|
|
2978 else
|
|
2979 fol = vim_strsave(items[1]);
|
|
2980 }
|
|
2981 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
|
|
2982 {
|
|
2983 if (low != NULL)
|
|
2984 smsg((char_u *)_("Duplicate LOW in %s line %d"),
|
|
2985 fname, lnum);
|
|
2986 else
|
|
2987 low = vim_strsave(items[1]);
|
|
2988 }
|
|
2989 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
|
|
2990 {
|
|
2991 if (upp != NULL)
|
|
2992 smsg((char_u *)_("Duplicate UPP in %s line %d"),
|
|
2993 fname, lnum);
|
|
2994 else
|
|
2995 upp = vim_strsave(items[1]);
|
|
2996 }
|
236
|
2997 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
|
323
|
2998 {
|
236
|
2999 /* Ignore REP count */;
|
323
|
3000 if (!isdigit(*items[1]))
|
|
3001 smsg((char_u *)_("Expected REP count in %s line %d"),
|
|
3002 fname, lnum);
|
|
3003 }
|
236
|
3004 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
|
|
3005 {
|
|
3006 /* REP item */
|
323
|
3007 if (do_rep)
|
|
3008 add_fromto(spin, &spin->si_rep, items[1], items[2]);
|
|
3009 }
|
|
3010 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
|
|
3011 {
|
|
3012 /* MAP item or count */
|
|
3013 if (!found_map)
|
|
3014 {
|
|
3015 /* First line contains the count. */
|
|
3016 found_map = TRUE;
|
|
3017 if (!isdigit(*items[1]))
|
|
3018 smsg((char_u *)_("Expected MAP count in %s line %d"),
|
|
3019 fname, lnum);
|
|
3020 }
|
|
3021 else if (do_map)
|
|
3022 {
|
346
|
3023 int c;
|
|
3024
|
|
3025 /* Check that every character appears only once. */
|
|
3026 for (p = items[1]; *p != NUL; )
|
|
3027 {
|
|
3028 #ifdef FEAT_MBYTE
|
|
3029 c = mb_ptr2char_adv(&p);
|
|
3030 #else
|
|
3031 c = *p++;
|
|
3032 #endif
|
|
3033 if ((spin->si_map.ga_len > 0
|
|
3034 && vim_strchr(spin->si_map.ga_data, c)
|
|
3035 != NULL)
|
|
3036 || vim_strchr(p, c) != NULL)
|
|
3037 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
|
|
3038 fname, lnum);
|
|
3039 }
|
|
3040
|
323
|
3041 /* We simply concatenate all the MAP strings, separated by
|
|
3042 * slashes. */
|
|
3043 ga_concat(&spin->si_map, items[1]);
|
|
3044 ga_append(&spin->si_map, '/');
|
|
3045 }
|
|
3046 }
|
|
3047 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
|
|
3048 {
|
|
3049 if (do_sal)
|
|
3050 {
|
|
3051 /* SAL item (sounds-a-like)
|
|
3052 * Either one of the known keys or a from-to pair. */
|
|
3053 if (STRCMP(items[1], "followup") == 0)
|
|
3054 spin->si_followup = sal_to_bool(items[2]);
|
|
3055 else if (STRCMP(items[1], "collapse_result") == 0)
|
|
3056 spin->si_collapse = sal_to_bool(items[2]);
|
|
3057 else if (STRCMP(items[1], "remove_accents") == 0)
|
|
3058 spin->si_rem_accents = sal_to_bool(items[2]);
|
|
3059 else
|
|
3060 /* when "to" is "_" it means empty */
|
|
3061 add_fromto(spin, &spin->si_sal, items[1],
|
|
3062 STRCMP(items[2], "_") == 0 ? (char_u *)""
|
|
3063 : items[2]);
|
|
3064 }
|
236
|
3065 }
|
300
|
3066 else
|
236
|
3067 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
|
|
3068 fname, lnum, items[0]);
|
|
3069 }
|
|
3070 }
|
|
3071
|
255
|
3072 if (fol != NULL || low != NULL || upp != NULL)
|
|
3073 {
|
351
|
3074 if (spin->si_clear_chartab)
|
|
3075 {
|
|
3076 /* Clear the char type tables, don't want to use any of the
|
|
3077 * currently used spell properties. */
|
|
3078 init_spell_chartab();
|
|
3079 spin->si_clear_chartab = FALSE;
|
|
3080 }
|
|
3081
|
316
|
3082 /*
|
|
3083 * Don't write a word table for an ASCII file, so that we don't check
|
|
3084 * for conflicts with a word table that matches 'encoding'.
|
324
|
3085 * Don't write one for utf-8 either, we use utf_*() and
|
316
|
3086 * mb_get_class(), the list of chars in the file will be incomplete.
|
|
3087 */
|
|
3088 if (!spin->si_ascii
|
|
3089 #ifdef FEAT_MBYTE
|
|
3090 && !enc_utf8
|
|
3091 #endif
|
|
3092 )
|
260
|
3093 {
|
|
3094 if (fol == NULL || low == NULL || upp == NULL)
|
|
3095 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
|
|
3096 else
|
316
|
3097 (void)set_spell_chartab(fol, low, upp);
|
260
|
3098 }
|
255
|
3099
|
|
3100 vim_free(fol);
|
|
3101 vim_free(low);
|
|
3102 vim_free(upp);
|
|
3103 }
|
|
3104
|
236
|
3105 vim_free(pc);
|
|
3106 fclose(fd);
|
|
3107 return aff;
|
|
3108 }
|
|
3109
|
|
3110 /*
|
339
|
3111 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
|
|
3112 * NULL as equal.
|
|
3113 */
|
|
3114 static int
|
|
3115 str_equal(s1, s2)
|
|
3116 char_u *s1;
|
|
3117 char_u *s2;
|
|
3118 {
|
|
3119 if (s1 == NULL || s2 == NULL)
|
|
3120 return s1 == s2;
|
|
3121 return STRCMP(s1, s2) == 0;
|
|
3122 }
|
|
3123
|
|
3124 /*
|
323
|
3125 * Add a from-to item to "gap". Used for REP and SAL items.
|
|
3126 * They are stored case-folded.
|
|
3127 */
|
|
3128 static void
|
|
3129 add_fromto(spin, gap, from, to)
|
|
3130 spellinfo_T *spin;
|
|
3131 garray_T *gap;
|
|
3132 char_u *from;
|
|
3133 char_u *to;
|
|
3134 {
|
|
3135 fromto_T *ftp;
|
|
3136 char_u word[MAXWLEN];
|
|
3137
|
|
3138 if (ga_grow(gap, 1) == OK)
|
|
3139 {
|
|
3140 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
|
|
3141 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
|
|
3142 ftp->ft_from = getroom_save(&spin->si_blocks, word);
|
|
3143 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
|
|
3144 ftp->ft_to = getroom_save(&spin->si_blocks, word);
|
|
3145 ++gap->ga_len;
|
|
3146 }
|
|
3147 }
|
|
3148
|
|
3149 /*
|
|
3150 * Convert a boolean argument in a SAL line to TRUE or FALSE;
|
|
3151 */
|
|
3152 static int
|
|
3153 sal_to_bool(s)
|
|
3154 char_u *s;
|
|
3155 {
|
|
3156 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
|
|
3157 }
|
|
3158
|
|
3159 /*
|
240
|
3160 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
|
|
3161 * When "s" is NULL FALSE is returned.
|
|
3162 */
|
|
3163 static int
|
|
3164 has_non_ascii(s)
|
|
3165 char_u *s;
|
|
3166 {
|
|
3167 char_u *p;
|
|
3168
|
|
3169 if (s != NULL)
|
|
3170 for (p = s; *p != NUL; ++p)
|
|
3171 if (*p >= 128)
|
|
3172 return TRUE;
|
|
3173 return FALSE;
|
|
3174 }
|
|
3175
|
|
3176 /*
|
236
|
3177 * Free the structure filled by spell_read_aff().
|
|
3178 */
|
|
3179 static void
|
|
3180 spell_free_aff(aff)
|
|
3181 afffile_T *aff;
|
|
3182 {
|
|
3183 hashtab_T *ht;
|
|
3184 hashitem_T *hi;
|
|
3185 int todo;
|
|
3186 affheader_T *ah;
|
300
|
3187 affentry_T *ae;
|
236
|
3188
|
|
3189 vim_free(aff->af_enc);
|
|
3190
|
339
|
3191 /* All this trouble to free the "ae_prog" items... */
|
236
|
3192 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
|
|
3193 {
|
|
3194 todo = ht->ht_used;
|
|
3195 for (hi = ht->ht_array; todo > 0; ++hi)
|
|
3196 {
|
|
3197 if (!HASHITEM_EMPTY(hi))
|
|
3198 {
|
|
3199 --todo;
|
|
3200 ah = HI2AH(hi);
|
300
|
3201 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
|
|
3202 vim_free(ae->ae_prog);
|
236
|
3203 }
|
|
3204 }
|
|
3205 if (ht == &aff->af_suff)
|
|
3206 break;
|
|
3207 }
|
300
|
3208
|
236
|
3209 hash_clear(&aff->af_pref);
|
|
3210 hash_clear(&aff->af_suff);
|
|
3211 }
|
|
3212
|
|
3213 /*
|
300
|
3214 * Read dictionary file "fname".
|
236
|
3215 * Returns OK or FAIL;
|
|
3216 */
|
|
3217 static int
|
300
|
3218 spell_read_dic(fname, spin, affile)
|
236
|
3219 char_u *fname;
|
300
|
3220 spellinfo_T *spin;
|
|
3221 afffile_T *affile;
|
236
|
3222 {
|
300
|
3223 hashtab_T ht;
|
236
|
3224 char_u line[MAXLINELEN];
|
300
|
3225 char_u *afflist;
|
339
|
3226 char_u *pfxlist;
|
300
|
3227 char_u *dw;
|
236
|
3228 char_u *pc;
|
|
3229 char_u *w;
|
|
3230 int l;
|
|
3231 hash_T hash;
|
|
3232 hashitem_T *hi;
|
|
3233 FILE *fd;
|
|
3234 int lnum = 1;
|
300
|
3235 int non_ascii = 0;
|
|
3236 int retval = OK;
|
|
3237 char_u message[MAXLINELEN + MAXWLEN];
|
307
|
3238 int flags;
|
236
|
3239
|
300
|
3240 /*
|
|
3241 * Open the file.
|
|
3242 */
|
310
|
3243 fd = mch_fopen((char *)fname, "r");
|
236
|
3244 if (fd == NULL)
|
|
3245 {
|
|
3246 EMSG2(_(e_notopen), fname);
|
|
3247 return FAIL;
|
|
3248 }
|
|
3249
|
300
|
3250 /* The hashtable is only used to detect duplicated words. */
|
|
3251 hash_init(&ht);
|
|
3252
|
334
|
3253 spin->si_foldwcount = 0;
|
|
3254 spin->si_keepwcount = 0;
|
|
3255
|
310
|
3256 if (spin->si_verbose || p_verbose > 2)
|
|
3257 {
|
|
3258 if (!spin->si_verbose)
|
|
3259 verbose_enter();
|
366
|
3260 smsg((char_u *)_("Reading dictionary file %s ..."), fname);
|
310
|
3261 out_flush();
|
|
3262 if (!spin->si_verbose)
|
|
3263 verbose_leave();
|
|
3264 }
|
236
|
3265
|
|
3266 /* Read and ignore the first line: word count. */
|
|
3267 (void)vim_fgets(line, MAXLINELEN, fd);
|
324
|
3268 if (!vim_isdigit(*skipwhite(line)))
|
236
|
3269 EMSG2(_("E760: No word count in %s"), fname);
|
|
3270
|
|
3271 /*
|
|
3272 * Read all the lines in the file one by one.
|
|
3273 * The words are converted to 'encoding' here, before being added to
|
|
3274 * the hashtable.
|
|
3275 */
|
255
|
3276 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
|
236
|
3277 {
|
255
|
3278 line_breakcheck();
|
236
|
3279 ++lnum;
|
351
|
3280 if (line[0] == '#')
|
|
3281 continue; /* comment line */
|
236
|
3282
|
300
|
3283 /* Remove CR, LF and white space from the end. White space halfway
|
|
3284 * the word is kept to allow e.g., "et al.". */
|
236
|
3285 l = STRLEN(line);
|
|
3286 while (l > 0 && line[l - 1] <= ' ')
|
|
3287 --l;
|
|
3288 if (l == 0)
|
|
3289 continue; /* empty line */
|
|
3290 line[l] = NUL;
|
|
3291
|
|
3292 /* Find the optional affix names. */
|
300
|
3293 afflist = vim_strchr(line, '/');
|
|
3294 if (afflist != NULL)
|
|
3295 *afflist++ = NUL;
|
236
|
3296
|
300
|
3297 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
|
|
3298 if (spin->si_ascii && has_non_ascii(line))
|
|
3299 {
|
|
3300 ++non_ascii;
|
240
|
3301 continue;
|
300
|
3302 }
|
240
|
3303
|
310
|
3304 #ifdef FEAT_MBYTE
|
236
|
3305 /* Convert from "SET" to 'encoding' when needed. */
|
300
|
3306 if (spin->si_conv.vc_type != CONV_NONE)
|
236
|
3307 {
|
300
|
3308 pc = string_convert(&spin->si_conv, line, NULL);
|
255
|
3309 if (pc == NULL)
|
|
3310 {
|
|
3311 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
3312 fname, lnum, line);
|
|
3313 continue;
|
|
3314 }
|
236
|
3315 w = pc;
|
|
3316 }
|
|
3317 else
|
310
|
3318 #endif
|
236
|
3319 {
|
|
3320 pc = NULL;
|
|
3321 w = line;
|
|
3322 }
|
|
3323
|
339
|
3324 /* This takes time, print a message now and then. */
|
|
3325 if (spin->si_verbose && (lnum & 0x3ff) == 0)
|
|
3326 {
|
|
3327 vim_snprintf((char *)message, sizeof(message),
|
|
3328 _("line %6d, word %6d - %s"),
|
|
3329 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
|
|
3330 msg_start();
|
|
3331 msg_puts_long_attr(message, 0);
|
|
3332 msg_clr_eos();
|
|
3333 msg_didout = FALSE;
|
|
3334 msg_col = 0;
|
|
3335 out_flush();
|
|
3336 }
|
|
3337
|
300
|
3338 /* Store the word in the hashtable to be able to find duplicates. */
|
|
3339 dw = (char_u *)getroom_save(&spin->si_blocks, w);
|
236
|
3340 if (dw == NULL)
|
300
|
3341 retval = FAIL;
|
|
3342 vim_free(pc);
|
|
3343 if (retval == FAIL)
|
236
|
3344 break;
|
|
3345
|
300
|
3346 hash = hash_hash(dw);
|
|
3347 hi = hash_lookup(&ht, dw, hash);
|
236
|
3348 if (!HASHITEM_EMPTY(hi))
|
|
3349 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
|
339
|
3350 fname, lnum, w);
|
236
|
3351 else
|
300
|
3352 hash_add_item(&ht, hi, dw, hash);
|
|
3353
|
307
|
3354 flags = 0;
|
339
|
3355 pfxlist = NULL;
|
307
|
3356 if (afflist != NULL)
|
|
3357 {
|
|
3358 /* Check for affix name that stands for keep-case word and stands
|
|
3359 * for rare word (if defined). */
|
310
|
3360 if (affile->af_kep != NUL
|
|
3361 && vim_strchr(afflist, affile->af_kep) != NULL)
|
307
|
3362 flags |= WF_KEEPCAP;
|
|
3363 if (affile->af_rar != NUL
|
|
3364 && vim_strchr(afflist, affile->af_rar) != NULL)
|
|
3365 flags |= WF_RARE;
|
346
|
3366 if (affile->af_bad != NUL
|
|
3367 && vim_strchr(afflist, affile->af_bad) != NULL)
|
|
3368 flags |= WF_BANNED;
|
339
|
3369
|
|
3370 if (affile->af_pfxpostpone)
|
|
3371 /* Need to store the list of prefix IDs with the word. */
|
|
3372 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks);
|
307
|
3373 }
|
|
3374
|
300
|
3375 /* Add the word to the word tree(s). */
|
339
|
3376 if (store_word(dw, spin, flags, spin->si_region, pfxlist) == FAIL)
|
300
|
3377 retval = FAIL;
|
236
|
3378
|
300
|
3379 if (afflist != NULL)
|
|
3380 {
|
|
3381 /* Find all matching suffixes and add the resulting words.
|
|
3382 * Additionally do matching prefixes that combine. */
|
339
|
3383 if (store_aff_word(dw, spin, afflist, affile,
|
307
|
3384 &affile->af_suff, &affile->af_pref,
|
339
|
3385 FALSE, flags, pfxlist) == FAIL)
|
300
|
3386 retval = FAIL;
|
|
3387
|
|
3388 /* Find all matching prefixes and add the resulting words. */
|
339
|
3389 if (store_aff_word(dw, spin, afflist, affile,
|
|
3390 &affile->af_pref, NULL,
|
|
3391 FALSE, flags, pfxlist) == FAIL)
|
300
|
3392 retval = FAIL;
|
|
3393 }
|
236
|
3394 }
|
|
3395
|
300
|
3396 if (spin->si_ascii && non_ascii > 0)
|
|
3397 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
|
|
3398 non_ascii);
|
|
3399 hash_clear(&ht);
|
|
3400
|
236
|
3401 fclose(fd);
|
300
|
3402 return retval;
|
236
|
3403 }
|
|
3404
|
|
3405 /*
|
339
|
3406 * Get the list of prefix IDs from the affix list "afflist".
|
|
3407 * Used for PFXPOSTPONE.
|
|
3408 * Returns a string allocated with getroom(). NULL when there are no prefixes
|
|
3409 * or when out of memory.
|
|
3410 */
|
|
3411 static char_u *
|
|
3412 get_pfxlist(affile, afflist, blp)
|
|
3413 afffile_T *affile;
|
|
3414 char_u *afflist;
|
|
3415 sblock_T **blp;
|
|
3416 {
|
|
3417 char_u *p;
|
|
3418 int cnt;
|
|
3419 int round;
|
|
3420 char_u *res = NULL;
|
|
3421 char_u key[2];
|
|
3422 hashitem_T *hi;
|
|
3423
|
|
3424 key[1] = NUL;
|
|
3425
|
|
3426 /* round 1: count the number of prefix IDs.
|
|
3427 * round 2: move prefix IDs to "res" */
|
|
3428 for (round = 1; round <= 2; ++round)
|
|
3429 {
|
|
3430 cnt = 0;
|
|
3431 for (p = afflist; *p != NUL; ++p)
|
|
3432 {
|
|
3433 key[0] = *p;
|
|
3434 hi = hash_find(&affile->af_pref, key);
|
|
3435 if (!HASHITEM_EMPTY(hi))
|
|
3436 {
|
|
3437 /* This is a prefix ID, use the new number. */
|
|
3438 if (round == 2)
|
|
3439 res[cnt] = HI2AH(hi)->ah_newID;
|
|
3440 ++cnt;
|
|
3441 }
|
|
3442 }
|
|
3443 if (round == 1 && cnt > 0)
|
|
3444 res = getroom(blp, cnt + 1);
|
|
3445 if (res == NULL)
|
|
3446 break;
|
|
3447 }
|
|
3448
|
|
3449 if (res != NULL)
|
|
3450 res[cnt] = NUL;
|
|
3451 return res;
|
|
3452 }
|
|
3453
|
|
3454 /*
|
300
|
3455 * Apply affixes to a word and store the resulting words.
|
|
3456 * "ht" is the hashtable with affentry_T that need to be applied, either
|
|
3457 * prefixes or suffixes.
|
|
3458 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
|
|
3459 * the resulting words for combining affixes.
|
|
3460 *
|
|
3461 * Returns FAIL when out of memory.
|
236
|
3462 */
|
300
|
3463 static int
|
339
|
3464 store_aff_word(word, spin, afflist, affile, ht, xht, comb, flags, pfxlist)
|
300
|
3465 char_u *word; /* basic word start */
|
|
3466 spellinfo_T *spin; /* spell info */
|
|
3467 char_u *afflist; /* list of names of supported affixes */
|
339
|
3468 afffile_T *affile;
|
300
|
3469 hashtab_T *ht;
|
|
3470 hashtab_T *xht;
|
|
3471 int comb; /* only use affixes that combine */
|
307
|
3472 int flags; /* flags for the word */
|
339
|
3473 char_u *pfxlist; /* list of prefix IDs */
|
236
|
3474 {
|
|
3475 int todo;
|
|
3476 hashitem_T *hi;
|
300
|
3477 affheader_T *ah;
|
|
3478 affentry_T *ae;
|
|
3479 regmatch_T regmatch;
|
|
3480 char_u newword[MAXWLEN];
|
|
3481 int retval = OK;
|
|
3482 int i;
|
|
3483 char_u *p;
|
366
|
3484 int use_flags;
|
236
|
3485
|
300
|
3486 todo = ht->ht_used;
|
|
3487 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
|
236
|
3488 {
|
|
3489 if (!HASHITEM_EMPTY(hi))
|
|
3490 {
|
|
3491 --todo;
|
300
|
3492 ah = HI2AH(hi);
|
236
|
3493
|
300
|
3494 /* Check that the affix combines, if required, and that the word
|
|
3495 * supports this affix. */
|
|
3496 if ((!comb || ah->ah_combine)
|
|
3497 && vim_strchr(afflist, *ah->ah_key) != NULL)
|
236
|
3498 {
|
300
|
3499 /* Loop over all affix entries with this name. */
|
|
3500 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
|
236
|
3501 {
|
300
|
3502 /* Check the condition. It's not logical to match case
|
|
3503 * here, but it is required for compatibility with
|
339
|
3504 * Myspell.
|
|
3505 * For prefixes, when "PFXPOSTPONE" was used, only do
|
|
3506 * prefixes with a chop string. */
|
300
|
3507 regmatch.regprog = ae->ae_prog;
|
|
3508 regmatch.rm_ic = FALSE;
|
339
|
3509 if ((xht != NULL || !affile->af_pfxpostpone
|
|
3510 || ae->ae_chop != NULL)
|
|
3511 && (ae->ae_prog == NULL
|
|
3512 || vim_regexec(®match, word, (colnr_T)0)))
|
300
|
3513 {
|
|
3514 /* Match. Remove the chop and add the affix. */
|
|
3515 if (xht == NULL)
|
240
|
3516 {
|
300
|
3517 /* prefix: chop/add at the start of the word */
|
|
3518 if (ae->ae_add == NULL)
|
|
3519 *newword = NUL;
|
|
3520 else
|
|
3521 STRCPY(newword, ae->ae_add);
|
|
3522 p = word;
|
|
3523 if (ae->ae_chop != NULL)
|
310
|
3524 {
|
300
|
3525 /* Skip chop string. */
|
310
|
3526 #ifdef FEAT_MBYTE
|
|
3527 if (has_mbyte)
|
324
|
3528 {
|
310
|
3529 i = mb_charlen(ae->ae_chop);
|
324
|
3530 for ( ; i > 0; --i)
|
|
3531 mb_ptr_adv(p);
|
|
3532 }
|
310
|
3533 else
|
|
3534 #endif
|
324
|
3535 p += STRLEN(ae->ae_chop);
|
310
|
3536 }
|
300
|
3537 STRCAT(newword, p);
|
|
3538 }
|
|
3539 else
|
|
3540 {
|
|
3541 /* suffix: chop/add at the end of the word */
|
|
3542 STRCPY(newword, word);
|
|
3543 if (ae->ae_chop != NULL)
|
|
3544 {
|
|
3545 /* Remove chop string. */
|
|
3546 p = newword + STRLEN(newword);
|
310
|
3547 #ifdef FEAT_MBYTE
|
|
3548 if (has_mbyte)
|
|
3549 i = mb_charlen(ae->ae_chop);
|
|
3550 else
|
|
3551 #endif
|
|
3552 i = STRLEN(ae->ae_chop);
|
|
3553 for ( ; i > 0; --i)
|
300
|
3554 mb_ptr_back(newword, p);
|
|
3555 *p = NUL;
|
|
3556 }
|
|
3557 if (ae->ae_add != NULL)
|
|
3558 STRCAT(newword, ae->ae_add);
|
240
|
3559 }
|
|
3560
|
366
|
3561 /* Obey the "rare" flag of the affix. */
|
|
3562 if (ae->ae_rare)
|
|
3563 use_flags = flags | WF_RARE;
|
|
3564 else
|
|
3565 use_flags = flags;
|
|
3566
|
300
|
3567 /* Store the modified word. */
|
366
|
3568 if (store_word(newword, spin, use_flags,
|
|
3569 spin->si_region, pfxlist) == FAIL)
|
300
|
3570 retval = FAIL;
|
236
|
3571
|
300
|
3572 /* When added a suffix and combining is allowed also
|
|
3573 * try adding prefixes additionally. */
|
|
3574 if (xht != NULL && ah->ah_combine)
|
339
|
3575 if (store_aff_word(newword, spin, afflist, affile,
|
366
|
3576 xht, NULL, TRUE, use_flags, pfxlist)
|
|
3577 == FAIL)
|
300
|
3578 retval = FAIL;
|
236
|
3579 }
|
|
3580 }
|
|
3581 }
|
|
3582 }
|
|
3583 }
|
|
3584
|
|
3585 return retval;
|
|
3586 }
|
|
3587
|
|
3588 /*
|
300
|
3589 * Read a file with a list of words.
|
236
|
3590 */
|
|
3591 static int
|
300
|
3592 spell_read_wordfile(fname, spin)
|
|
3593 char_u *fname;
|
|
3594 spellinfo_T *spin;
|
236
|
3595 {
|
300
|
3596 FILE *fd;
|
|
3597 long lnum = 0;
|
|
3598 char_u rline[MAXLINELEN];
|
|
3599 char_u *line;
|
|
3600 char_u *pc = NULL;
|
|
3601 int l;
|
|
3602 int retval = OK;
|
|
3603 int did_word = FALSE;
|
|
3604 int non_ascii = 0;
|
307
|
3605 int flags;
|
316
|
3606 int regionmask;
|
236
|
3607
|
300
|
3608 /*
|
|
3609 * Open the file.
|
|
3610 */
|
310
|
3611 fd = mch_fopen((char *)fname, "r");
|
300
|
3612 if (fd == NULL)
|
236
|
3613 {
|
300
|
3614 EMSG2(_(e_notopen), fname);
|
|
3615 return FAIL;
|
236
|
3616 }
|
|
3617
|
310
|
3618 if (spin->si_verbose || p_verbose > 2)
|
|
3619 {
|
|
3620 if (!spin->si_verbose)
|
|
3621 verbose_enter();
|
366
|
3622 smsg((char_u *)_("Reading word file %s ..."), fname);
|
310
|
3623 out_flush();
|
|
3624 if (!spin->si_verbose)
|
|
3625 verbose_leave();
|
|
3626 }
|
300
|
3627
|
|
3628 /*
|
|
3629 * Read all the lines in the file one by one.
|
|
3630 */
|
|
3631 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
|
|
3632 {
|
|
3633 line_breakcheck();
|
|
3634 ++lnum;
|
|
3635
|
|
3636 /* Skip comment lines. */
|
|
3637 if (*rline == '#')
|
|
3638 continue;
|
|
3639
|
|
3640 /* Remove CR, LF and white space from the end. */
|
|
3641 l = STRLEN(rline);
|
|
3642 while (l > 0 && rline[l - 1] <= ' ')
|
|
3643 --l;
|
|
3644 if (l == 0)
|
|
3645 continue; /* empty or blank line */
|
|
3646 rline[l] = NUL;
|
|
3647
|
|
3648 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
|
|
3649 vim_free(pc);
|
310
|
3650 #ifdef FEAT_MBYTE
|
300
|
3651 if (spin->si_conv.vc_type != CONV_NONE)
|
|
3652 {
|
|
3653 pc = string_convert(&spin->si_conv, rline, NULL);
|
|
3654 if (pc == NULL)
|
|
3655 {
|
|
3656 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
3657 fname, lnum, rline);
|
|
3658 continue;
|
|
3659 }
|
|
3660 line = pc;
|
|
3661 }
|
|
3662 else
|
310
|
3663 #endif
|
300
|
3664 {
|
|
3665 pc = NULL;
|
|
3666 line = rline;
|
|
3667 }
|
|
3668
|
307
|
3669 flags = 0;
|
316
|
3670 regionmask = spin->si_region;
|
307
|
3671
|
|
3672 if (*line == '/')
|
300
|
3673 {
|
307
|
3674 ++line;
|
316
|
3675
|
307
|
3676 if (STRNCMP(line, "encoding=", 9) == 0)
|
300
|
3677 {
|
|
3678 if (spin->si_conv.vc_type != CONV_NONE)
|
316
|
3679 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
|
|
3680 fname, lnum, line - 1);
|
300
|
3681 else if (did_word)
|
316
|
3682 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
|
|
3683 fname, lnum, line - 1);
|
300
|
3684 else
|
|
3685 {
|
310
|
3686 #ifdef FEAT_MBYTE
|
|
3687 char_u *enc;
|
|
3688
|
300
|
3689 /* Setup for conversion to 'encoding'. */
|
316
|
3690 line += 10;
|
|
3691 enc = enc_canonize(line);
|
300
|
3692 if (enc != NULL && !spin->si_ascii
|
|
3693 && convert_setup(&spin->si_conv, enc,
|
|
3694 p_enc) == FAIL)
|
|
3695 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
|
316
|
3696 fname, line, p_enc);
|
300
|
3697 vim_free(enc);
|
310
|
3698 #else
|
|
3699 smsg((char_u *)_("Conversion in %s not supported"), fname);
|
|
3700 #endif
|
300
|
3701 }
|
307
|
3702 continue;
|
300
|
3703 }
|
307
|
3704
|
316
|
3705 if (STRNCMP(line, "regions=", 8) == 0)
|
|
3706 {
|
|
3707 if (spin->si_region_count > 1)
|
|
3708 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
|
|
3709 fname, lnum, line);
|
|
3710 else
|
|
3711 {
|
|
3712 line += 8;
|
|
3713 if (STRLEN(line) > 16)
|
|
3714 smsg((char_u *)_("Too many regions in %s line %d: %s"),
|
|
3715 fname, lnum, line);
|
|
3716 else
|
|
3717 {
|
|
3718 spin->si_region_count = STRLEN(line) / 2;
|
|
3719 STRCPY(spin->si_region_name, line);
|
|
3720 }
|
|
3721 }
|
|
3722 continue;
|
|
3723 }
|
|
3724
|
307
|
3725 if (*line == '=')
|
|
3726 {
|
|
3727 /* keep-case word */
|
|
3728 flags |= WF_KEEPCAP;
|
|
3729 ++line;
|
|
3730 }
|
|
3731
|
|
3732 if (*line == '!')
|
|
3733 {
|
|
3734 /* Bad, bad, wicked word. */
|
|
3735 flags |= WF_BANNED;
|
|
3736 ++line;
|
|
3737 }
|
|
3738 else if (*line == '?')
|
|
3739 {
|
|
3740 /* Rare word. */
|
|
3741 flags |= WF_RARE;
|
|
3742 ++line;
|
|
3743 }
|
|
3744
|
316
|
3745 if (VIM_ISDIGIT(*line))
|
|
3746 {
|
|
3747 /* region number(s) */
|
|
3748 regionmask = 0;
|
|
3749 while (VIM_ISDIGIT(*line))
|
|
3750 {
|
|
3751 l = *line - '0';
|
|
3752 if (l > spin->si_region_count)
|
|
3753 {
|
|
3754 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
|
|
3755 fname, lnum, line);
|
|
3756 break;
|
|
3757 }
|
|
3758 regionmask |= 1 << (l - 1);
|
|
3759 ++line;
|
|
3760 }
|
|
3761 flags |= WF_REGION;
|
|
3762 }
|
|
3763
|
307
|
3764 if (flags == 0)
|
|
3765 {
|
|
3766 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
|
300
|
3767 fname, lnum, line);
|
307
|
3768 continue;
|
|
3769 }
|
300
|
3770 }
|
|
3771
|
|
3772 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
|
|
3773 if (spin->si_ascii && has_non_ascii(line))
|
|
3774 {
|
|
3775 ++non_ascii;
|
|
3776 continue;
|
|
3777 }
|
|
3778
|
|
3779 /* Normal word: store it. */
|
339
|
3780 if (store_word(line, spin, flags, regionmask, NULL) == FAIL)
|
300
|
3781 {
|
|
3782 retval = FAIL;
|
|
3783 break;
|
|
3784 }
|
|
3785 did_word = TRUE;
|
|
3786 }
|
|
3787
|
|
3788 vim_free(pc);
|
|
3789 fclose(fd);
|
|
3790
|
310
|
3791 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
|
|
3792 {
|
|
3793 if (p_verbose > 2)
|
|
3794 verbose_enter();
|
300
|
3795 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
|
|
3796 non_ascii);
|
310
|
3797 if (p_verbose > 2)
|
|
3798 verbose_leave();
|
|
3799 }
|
300
|
3800 return retval;
|
236
|
3801 }
|
|
3802
|
|
3803 /*
|
300
|
3804 * Get part of an sblock_T, "len" bytes long.
|
|
3805 * This avoids calling free() for every little struct we use.
|
|
3806 * The memory is cleared to all zeros.
|
|
3807 * Returns NULL when out of memory.
|
|
3808 */
|
|
3809 static void *
|
|
3810 getroom(blp, len)
|
|
3811 sblock_T **blp;
|
|
3812 size_t len; /* length needed */
|
|
3813 {
|
|
3814 char_u *p;
|
|
3815 sblock_T *bl = *blp;
|
|
3816
|
|
3817 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
|
|
3818 {
|
|
3819 /* Allocate a block of memory. This is not freed until much later. */
|
|
3820 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
|
|
3821 if (bl == NULL)
|
|
3822 return NULL;
|
|
3823 bl->sb_next = *blp;
|
|
3824 *blp = bl;
|
|
3825 bl->sb_used = 0;
|
|
3826 }
|
|
3827
|
|
3828 p = bl->sb_data + bl->sb_used;
|
|
3829 bl->sb_used += len;
|
|
3830
|
|
3831 return p;
|
|
3832 }
|
|
3833
|
|
3834 /*
|
|
3835 * Make a copy of a string into memory allocated with getroom().
|
|
3836 */
|
|
3837 static char_u *
|
|
3838 getroom_save(blp, s)
|
|
3839 sblock_T **blp;
|
|
3840 char_u *s;
|
|
3841 {
|
|
3842 char_u *sc;
|
|
3843
|
|
3844 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
|
|
3845 if (sc != NULL)
|
|
3846 STRCPY(sc, s);
|
|
3847 return sc;
|
|
3848 }
|
|
3849
|
|
3850
|
|
3851 /*
|
|
3852 * Free the list of allocated sblock_T.
|
236
|
3853 */
|
|
3854 static void
|
300
|
3855 free_blocks(bl)
|
|
3856 sblock_T *bl;
|
236
|
3857 {
|
300
|
3858 sblock_T *next;
|
236
|
3859
|
300
|
3860 while (bl != NULL)
|
236
|
3861 {
|
300
|
3862 next = bl->sb_next;
|
|
3863 vim_free(bl);
|
|
3864 bl = next;
|
236
|
3865 }
|
|
3866 }
|
|
3867
|
|
3868 /*
|
300
|
3869 * Allocate the root of a word tree.
|
236
|
3870 */
|
300
|
3871 static wordnode_T *
|
|
3872 wordtree_alloc(blp)
|
|
3873 sblock_T **blp;
|
236
|
3874 {
|
300
|
3875 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
|
236
|
3876 }
|
|
3877
|
|
3878 /*
|
300
|
3879 * Store a word in the tree(s).
|
307
|
3880 * Always store it in the case-folded tree. A keep-case word can also be used
|
|
3881 * with all caps.
|
300
|
3882 * For a keep-case word also store it in the keep-case tree.
|
339
|
3883 * When "pfxlist" is not NULL store the word for each prefix ID.
|
236
|
3884 */
|
|
3885 static int
|
339
|
3886 store_word(word, spin, flags, region, pfxlist)
|
300
|
3887 char_u *word;
|
|
3888 spellinfo_T *spin;
|
307
|
3889 int flags; /* extra flags, WF_BANNED */
|
316
|
3890 int region; /* supported region(s) */
|
339
|
3891 char_u *pfxlist; /* list of prefix IDs or NULL */
|
236
|
3892 {
|
300
|
3893 int len = STRLEN(word);
|
|
3894 int ct = captype(word, word + len);
|
|
3895 char_u foldword[MAXWLEN];
|
339
|
3896 int res = OK;
|
|
3897 char_u *p;
|
236
|
3898
|
323
|
3899 (void)spell_casefold(word, len, foldword, MAXWLEN);
|
339
|
3900 for (p = pfxlist; res == OK; ++p)
|
|
3901 {
|
|
3902 res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
|
|
3903 region, p == NULL ? 0 : *p, &spin->si_blocks);
|
|
3904 if (p == NULL || *p == NUL)
|
|
3905 break;
|
|
3906 }
|
334
|
3907 ++spin->si_foldwcount;
|
307
|
3908
|
|
3909 if (res == OK && (ct == WF_KEEPCAP || flags & WF_KEEPCAP))
|
334
|
3910 {
|
339
|
3911 for (p = pfxlist; res == OK; ++p)
|
|
3912 {
|
|
3913 res = tree_add_word(word, spin->si_keeproot, flags,
|
|
3914 region, p == NULL ? 0 : *p, &spin->si_blocks);
|
|
3915 if (p == NULL || *p == NUL)
|
|
3916 break;
|
|
3917 }
|
334
|
3918 ++spin->si_keepwcount;
|
|
3919 }
|
300
|
3920 return res;
|
236
|
3921 }
|
|
3922
|
|
3923 /*
|
300
|
3924 * Add word "word" to a word tree at "root".
|
366
|
3925 * When "flags" < 0 we are adding to the prefix tree where flags is used for
|
|
3926 * "rare" and "region" is the condition nr.
|
255
|
3927 * Returns FAIL when out of memory.
|
236
|
3928 */
|
255
|
3929 static int
|
339
|
3930 tree_add_word(word, root, flags, region, prefixID, blp)
|
300
|
3931 char_u *word;
|
|
3932 wordnode_T *root;
|
|
3933 int flags;
|
|
3934 int region;
|
339
|
3935 int prefixID;
|
300
|
3936 sblock_T **blp;
|
236
|
3937 {
|
300
|
3938 wordnode_T *node = root;
|
|
3939 wordnode_T *np;
|
|
3940 wordnode_T **prev = NULL;
|
|
3941 int i;
|
255
|
3942
|
300
|
3943 /* Add each byte of the word to the tree, including the NUL at the end. */
|
|
3944 for (i = 0; ; ++i)
|
255
|
3945 {
|
300
|
3946 /* Look for the sibling that has the same character. They are sorted
|
|
3947 * on byte value, thus stop searching when a sibling is found with a
|
339
|
3948 * higher byte value. For zero bytes (end of word) the sorting is
|
|
3949 * done on flags and then on prefixID
|
300
|
3950 */
|
339
|
3951 while (node != NULL
|
|
3952 && (node->wn_byte < word[i]
|
|
3953 || (node->wn_byte == NUL
|
|
3954 && (flags < 0
|
|
3955 ? node->wn_prefixID < prefixID
|
|
3956 : node->wn_flags < (flags & 0xff)
|
|
3957 || (node->wn_flags == (flags & 0xff)
|
|
3958 && node->wn_prefixID < prefixID)))))
|
236
|
3959 {
|
300
|
3960 prev = &node->wn_sibling;
|
|
3961 node = *prev;
|
236
|
3962 }
|
339
|
3963 if (node == NULL
|
|
3964 || node->wn_byte != word[i]
|
|
3965 || (word[i] == NUL
|
|
3966 && (flags < 0
|
|
3967 || node->wn_flags != (flags & 0xff)
|
|
3968 || node->wn_prefixID != prefixID)))
|
255
|
3969 {
|
300
|
3970 /* Allocate a new node. */
|
|
3971 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
|
|
3972 if (np == NULL)
|
|
3973 return FAIL;
|
|
3974 np->wn_byte = word[i];
|
|
3975 *prev = np;
|
|
3976 np->wn_sibling = node;
|
|
3977 node = np;
|
255
|
3978 }
|
300
|
3979
|
|
3980 if (word[i] == NUL)
|
|
3981 {
|
|
3982 node->wn_flags = flags;
|
|
3983 node->wn_region |= region;
|
339
|
3984 node->wn_prefixID = prefixID;
|
300
|
3985 break;
|
|
3986 }
|
|
3987 prev = &node->wn_child;
|
|
3988 node = *prev;
|
255
|
3989 }
|
|
3990
|
|
3991 return OK;
|
236
|
3992 }
|
|
3993
|
|
3994 /*
|
300
|
3995 * Compress a tree: find tails that are identical and can be shared.
|
|
3996 */
|
|
3997 static void
|
310
|
3998 wordtree_compress(root, spin)
|
300
|
3999 wordnode_T *root;
|
310
|
4000 spellinfo_T *spin;
|
300
|
4001 {
|
|
4002 hashtab_T ht;
|
|
4003 int n;
|
|
4004 int tot = 0;
|
|
4005
|
|
4006 if (root != NULL)
|
|
4007 {
|
|
4008 hash_init(&ht);
|
|
4009 n = node_compress(root, &ht, &tot);
|
310
|
4010 if (spin->si_verbose || p_verbose > 2)
|
|
4011 {
|
|
4012 if (!spin->si_verbose)
|
|
4013 verbose_enter();
|
|
4014 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
|
300
|
4015 n, tot, (tot - n) * 100 / tot);
|
310
|
4016 if (p_verbose > 2)
|
|
4017 verbose_leave();
|
|
4018 }
|
300
|
4019 hash_clear(&ht);
|
|
4020 }
|
|
4021 }
|
|
4022
|
|
4023 /*
|
|
4024 * Compress a node, its siblings and its children, depth first.
|
|
4025 * Returns the number of compressed nodes.
|
236
|
4026 */
|
255
|
4027 static int
|
300
|
4028 node_compress(node, ht, tot)
|
|
4029 wordnode_T *node;
|
|
4030 hashtab_T *ht;
|
|
4031 int *tot; /* total count of nodes before compressing,
|
|
4032 incremented while going through the tree */
|
236
|
4033 {
|
300
|
4034 wordnode_T *np;
|
|
4035 wordnode_T *tp;
|
|
4036 wordnode_T *child;
|
|
4037 hash_T hash;
|
236
|
4038 hashitem_T *hi;
|
300
|
4039 int len = 0;
|
|
4040 unsigned nr, n;
|
|
4041 int compressed = 0;
|
236
|
4042
|
300
|
4043 /*
|
|
4044 * Go through the list of siblings. Compress each child and then try
|
|
4045 * finding an identical child to replace it.
|
|
4046 * Note that with "child" we mean not just the node that is pointed to,
|
|
4047 * but the whole list of siblings, of which the node is the first.
|
|
4048 */
|
|
4049 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
4050 {
|
300
|
4051 ++len;
|
|
4052 if ((child = np->wn_child) != NULL)
|
|
4053 {
|
346
|
4054 /* Compress the child. This fills hashkey. */
|
300
|
4055 compressed += node_compress(child, ht, tot);
|
|
4056
|
|
4057 /* Try to find an identical child. */
|
346
|
4058 hash = hash_hash(child->wn_u1.hashkey);
|
|
4059 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
|
300
|
4060 tp = NULL;
|
|
4061 if (!HASHITEM_EMPTY(hi))
|
|
4062 {
|
|
4063 /* There are children with an identical hash value. Now check
|
|
4064 * if there is one that is really identical. */
|
346
|
4065 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
|
300
|
4066 if (node_equal(child, tp))
|
|
4067 {
|
|
4068 /* Found one! Now use that child in place of the
|
|
4069 * current one. This means the current child is
|
|
4070 * dropped from the tree. */
|
|
4071 np->wn_child = tp;
|
|
4072 ++compressed;
|
|
4073 break;
|
|
4074 }
|
|
4075 if (tp == NULL)
|
|
4076 {
|
|
4077 /* No other child with this hash value equals the child of
|
|
4078 * the node, add it to the linked list after the first
|
|
4079 * item. */
|
|
4080 tp = HI2WN(hi);
|
346
|
4081 child->wn_u2.next = tp->wn_u2.next;
|
|
4082 tp->wn_u2.next = child;
|
300
|
4083 }
|
|
4084 }
|
|
4085 else
|
|
4086 /* No other child has this hash value, add it to the
|
|
4087 * hashtable. */
|
346
|
4088 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
|
300
|
4089 }
|
236
|
4090 }
|
300
|
4091 *tot += len;
|
|
4092
|
|
4093 /*
|
|
4094 * Make a hash key for the node and its siblings, so that we can quickly
|
|
4095 * find a lookalike node. This must be done after compressing the sibling
|
|
4096 * list, otherwise the hash key would become invalid by the compression.
|
|
4097 */
|
346
|
4098 node->wn_u1.hashkey[0] = len;
|
300
|
4099 nr = 0;
|
|
4100 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
4101 {
|
300
|
4102 if (np->wn_byte == NUL)
|
339
|
4103 /* end node: use wn_flags, wn_region and wn_prefixID */
|
|
4104 n = np->wn_flags + (np->wn_region << 8) + (np->wn_prefixID << 16);
|
300
|
4105 else
|
|
4106 /* byte node: use the byte value and the child pointer */
|
|
4107 n = np->wn_byte + ((long_u)np->wn_child << 8);
|
|
4108 nr = nr * 101 + n;
|
236
|
4109 }
|
300
|
4110
|
|
4111 /* Avoid NUL bytes, it terminates the hash key. */
|
|
4112 n = nr & 0xff;
|
346
|
4113 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
|
300
|
4114 n = (nr >> 8) & 0xff;
|
346
|
4115 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
|
300
|
4116 n = (nr >> 16) & 0xff;
|
346
|
4117 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
|
300
|
4118 n = (nr >> 24) & 0xff;
|
346
|
4119 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
|
|
4120 node->wn_u1.hashkey[5] = NUL;
|
300
|
4121
|
|
4122 return compressed;
|
|
4123 }
|
|
4124
|
|
4125 /*
|
|
4126 * Return TRUE when two nodes have identical siblings and children.
|
|
4127 */
|
|
4128 static int
|
|
4129 node_equal(n1, n2)
|
|
4130 wordnode_T *n1;
|
|
4131 wordnode_T *n2;
|
|
4132 {
|
|
4133 wordnode_T *p1;
|
|
4134 wordnode_T *p2;
|
|
4135
|
|
4136 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
|
|
4137 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
|
|
4138 if (p1->wn_byte != p2->wn_byte
|
|
4139 || (p1->wn_byte == NUL
|
|
4140 ? (p1->wn_flags != p2->wn_flags
|
339
|
4141 || p1->wn_region != p2->wn_region
|
|
4142 || p1->wn_prefixID != p2->wn_prefixID)
|
300
|
4143 : (p1->wn_child != p2->wn_child)))
|
|
4144 break;
|
|
4145
|
|
4146 return p1 == NULL && p2 == NULL;
|
236
|
4147 }
|
|
4148
|
|
4149 /*
|
|
4150 * Write a number to file "fd", MSB first, in "len" bytes.
|
|
4151 */
|
255
|
4152 void
|
236
|
4153 put_bytes(fd, nr, len)
|
|
4154 FILE *fd;
|
|
4155 long_u nr;
|
|
4156 int len;
|
|
4157 {
|
|
4158 int i;
|
|
4159
|
|
4160 for (i = len - 1; i >= 0; --i)
|
|
4161 putc((int)(nr >> (i * 8)), fd);
|
|
4162 }
|
|
4163
|
323
|
4164 static int
|
|
4165 #ifdef __BORLANDC__
|
|
4166 _RTLENTRYF
|
|
4167 #endif
|
|
4168 rep_compare __ARGS((const void *s1, const void *s2));
|
|
4169
|
|
4170 /*
|
|
4171 * Function given to qsort() to sort the REP items on "from" string.
|
|
4172 */
|
|
4173 static int
|
|
4174 #ifdef __BORLANDC__
|
|
4175 _RTLENTRYF
|
|
4176 #endif
|
|
4177 rep_compare(s1, s2)
|
|
4178 const void *s1;
|
|
4179 const void *s2;
|
|
4180 {
|
|
4181 fromto_T *p1 = (fromto_T *)s1;
|
|
4182 fromto_T *p2 = (fromto_T *)s2;
|
|
4183
|
|
4184 return STRCMP(p1->ft_from, p2->ft_from);
|
|
4185 }
|
|
4186
|
236
|
4187 /*
|
|
4188 * Write the Vim spell file "fname".
|
|
4189 */
|
|
4190 static void
|
316
|
4191 write_vim_spell(fname, spin)
|
236
|
4192 char_u *fname;
|
300
|
4193 spellinfo_T *spin;
|
236
|
4194 {
|
300
|
4195 FILE *fd;
|
|
4196 int regionmask;
|
236
|
4197 int round;
|
300
|
4198 wordnode_T *tree;
|
|
4199 int nodecount;
|
323
|
4200 int i;
|
|
4201 int l;
|
|
4202 garray_T *gap;
|
|
4203 fromto_T *ftp;
|
|
4204 char_u *p;
|
|
4205 int rr;
|
236
|
4206
|
310
|
4207 fd = mch_fopen((char *)fname, "w");
|
300
|
4208 if (fd == NULL)
|
236
|
4209 {
|
|
4210 EMSG2(_(e_notopen), fname);
|
|
4211 return;
|
|
4212 }
|
|
4213
|
255
|
4214 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
|
339
|
4215 * <charflagslen> <charflags>
|
|
4216 * <fcharslen> <fchars>
|
366
|
4217 * <midwordlen> <midword>
|
339
|
4218 * <prefcondcnt> <prefcond> ... */
|
300
|
4219
|
|
4220 /* <fileID> */
|
|
4221 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
|
|
4222 EMSG(_(e_write));
|
236
|
4223
|
|
4224 /* write the region names if there is more than one */
|
316
|
4225 if (spin->si_region_count > 1)
|
236
|
4226 {
|
316
|
4227 putc(spin->si_region_count, fd); /* <regioncnt> <regionname> ... */
|
|
4228 fwrite(spin->si_region_name, (size_t)(spin->si_region_count * 2),
|
|
4229 (size_t)1, fd);
|
|
4230 regionmask = (1 << spin->si_region_count) - 1;
|
236
|
4231 }
|
|
4232 else
|
|
4233 {
|
300
|
4234 putc(0, fd);
|
|
4235 regionmask = 0;
|
236
|
4236 }
|
|
4237
|
323
|
4238 /*
|
|
4239 * Write the table with character flags and table for case folding.
|
260
|
4240 * <charflagslen> <charflags> <fcharlen> <fchars>
|
|
4241 * Skip this for ASCII, the table may conflict with the one used for
|
323
|
4242 * 'encoding'.
|
|
4243 * Also skip this for an .add.spl file, the main spell file must contain
|
|
4244 * the table (avoids that it conflicts). File is shorter too.
|
|
4245 */
|
|
4246 if (spin->si_ascii || spin->si_add)
|
260
|
4247 {
|
300
|
4248 putc(0, fd);
|
|
4249 putc(0, fd);
|
|
4250 putc(0, fd);
|
260
|
4251 }
|
|
4252 else
|
300
|
4253 write_spell_chartab(fd);
|
255
|
4254
|
366
|
4255
|
|
4256 if (spin->si_midword == NULL)
|
|
4257 put_bytes(fd, 0L, 2); /* <midwordlen> */
|
|
4258 else
|
|
4259 {
|
|
4260 i = STRLEN(spin->si_midword);
|
|
4261 put_bytes(fd, (long_u)i, 2); /* <midwordlen> */
|
|
4262 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
|
|
4263 }
|
|
4264
|
|
4265
|
339
|
4266 /* Write the prefix conditions. */
|
|
4267 write_spell_prefcond(fd, &spin->si_prefcond);
|
|
4268
|
366
|
4269 /* <SUGGEST> : <repcount> <rep> ...
|
|
4270 * <salflags> <salcount> <sal> ...
|
|
4271 * <maplen> <mapstr> */
|
|
4272
|
323
|
4273 /* Sort the REP items. */
|
|
4274 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
|
|
4275 sizeof(fromto_T), rep_compare);
|
|
4276
|
|
4277 for (round = 1; round <= 2; ++round)
|
|
4278 {
|
|
4279 if (round == 1)
|
|
4280 gap = &spin->si_rep;
|
|
4281 else
|
|
4282 {
|
|
4283 gap = &spin->si_sal;
|
|
4284
|
|
4285 i = 0;
|
|
4286 if (spin->si_followup)
|
|
4287 i |= SAL_F0LLOWUP;
|
|
4288 if (spin->si_collapse)
|
|
4289 i |= SAL_COLLAPSE;
|
|
4290 if (spin->si_rem_accents)
|
|
4291 i |= SAL_REM_ACCENTS;
|
|
4292 putc(i, fd); /* <salflags> */
|
|
4293 }
|
|
4294
|
|
4295 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
|
|
4296 for (i = 0; i < gap->ga_len; ++i)
|
|
4297 {
|
|
4298 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
|
|
4299 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
|
|
4300 ftp = &((fromto_T *)gap->ga_data)[i];
|
|
4301 for (rr = 1; rr <= 2; ++rr)
|
|
4302 {
|
|
4303 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
|
|
4304 l = STRLEN(p);
|
|
4305 putc(l, fd);
|
|
4306 fwrite(p, l, (size_t)1, fd);
|
|
4307 }
|
|
4308 }
|
|
4309 }
|
|
4310
|
|
4311 put_bytes(fd, (long_u)spin->si_map.ga_len, 2); /* <maplen> */
|
|
4312 if (spin->si_map.ga_len > 0) /* <mapstr> */
|
|
4313 fwrite(spin->si_map.ga_data, (size_t)spin->si_map.ga_len,
|
|
4314 (size_t)1, fd);
|
302
|
4315
|
236
|
4316 /*
|
339
|
4317 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
|
236
|
4318 */
|
323
|
4319 spin->si_memtot = 0;
|
339
|
4320 for (round = 1; round <= 3; ++round)
|
236
|
4321 {
|
339
|
4322 if (round == 1)
|
|
4323 tree = spin->si_foldroot;
|
|
4324 else if (round == 2)
|
|
4325 tree = spin->si_keeproot;
|
|
4326 else
|
|
4327 tree = spin->si_prefroot;
|
236
|
4328
|
346
|
4329 /* Clear the index and wnode fields in the tree. */
|
|
4330 clear_node(tree);
|
|
4331
|
300
|
4332 /* Count the number of nodes. Needed to be able to allocate the
|
346
|
4333 * memory when reading the nodes. Also fills in index for shared
|
300
|
4334 * nodes. */
|
346
|
4335 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
|
236
|
4336
|
300
|
4337 /* number of nodes in 4 bytes */
|
|
4338 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
|
302
|
4339 spin->si_memtot += nodecount + nodecount * sizeof(int);
|
236
|
4340
|
300
|
4341 /* Write the nodes. */
|
346
|
4342 (void)put_node(fd, tree, 0, regionmask, round == 3);
|
236
|
4343 }
|
|
4344
|
300
|
4345 fclose(fd);
|
236
|
4346 }
|
|
4347
|
|
4348 /*
|
346
|
4349 * Clear the index and wnode fields of "node", it siblings and its
|
|
4350 * children. This is needed because they are a union with other items to save
|
|
4351 * space.
|
|
4352 */
|
|
4353 static void
|
|
4354 clear_node(node)
|
|
4355 wordnode_T *node;
|
|
4356 {
|
|
4357 wordnode_T *np;
|
|
4358
|
|
4359 if (node != NULL)
|
|
4360 for (np = node; np != NULL; np = np->wn_sibling)
|
|
4361 {
|
|
4362 np->wn_u1.index = 0;
|
|
4363 np->wn_u2.wnode = NULL;
|
|
4364
|
|
4365 if (np->wn_byte != NUL)
|
|
4366 clear_node(np->wn_child);
|
|
4367 }
|
|
4368 }
|
|
4369
|
|
4370
|
|
4371 /*
|
300
|
4372 * Dump a word tree at node "node".
|
|
4373 *
|
|
4374 * This first writes the list of possible bytes (siblings). Then for each
|
|
4375 * byte recursively write the children.
|
|
4376 *
|
|
4377 * NOTE: The code here must match the code in read_tree(), since assumptions
|
|
4378 * are made about the indexes (so that we don't have to write them in the
|
|
4379 * file).
|
236
|
4380 *
|
300
|
4381 * Returns the number of nodes used.
|
236
|
4382 */
|
300
|
4383 static int
|
346
|
4384 put_node(fd, node, index, regionmask, prefixtree)
|
339
|
4385 FILE *fd; /* NULL when only counting */
|
300
|
4386 wordnode_T *node;
|
|
4387 int index;
|
|
4388 int regionmask;
|
339
|
4389 int prefixtree; /* TRUE for PREFIXTREE */
|
236
|
4390 {
|
300
|
4391 int newindex = index;
|
|
4392 int siblingcount = 0;
|
|
4393 wordnode_T *np;
|
236
|
4394 int flags;
|
300
|
4395
|
|
4396 /* If "node" is zero the tree is empty. */
|
|
4397 if (node == NULL)
|
|
4398 return 0;
|
|
4399
|
|
4400 /* Store the index where this node is written. */
|
346
|
4401 node->wn_u1.index = index;
|
236
|
4402
|
300
|
4403 /* Count the number of siblings. */
|
|
4404 for (np = node; np != NULL; np = np->wn_sibling)
|
|
4405 ++siblingcount;
|
236
|
4406
|
300
|
4407 /* Write the sibling count. */
|
|
4408 if (fd != NULL)
|
|
4409 putc(siblingcount, fd); /* <siblingcount> */
|
236
|
4410
|
300
|
4411 /* Write each sibling byte and optionally extra info. */
|
|
4412 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
4413 {
|
300
|
4414 if (np->wn_byte == 0)
|
|
4415 {
|
|
4416 if (fd != NULL)
|
|
4417 {
|
339
|
4418 /* For a NUL byte (end of word) write the flags etc. */
|
|
4419 if (prefixtree)
|
300
|
4420 {
|
339
|
4421 /* In PREFIXTREE write the required prefixID and the
|
|
4422 * associated condition nr (stored in wn_region). */
|
366
|
4423 if (np->wn_flags == (char_u)-2)
|
|
4424 putc(BY_FLAGS, fd); /* <byte> rare */
|
|
4425 else
|
|
4426 putc(BY_NOFLAGS, fd); /* <byte> */
|
339
|
4427 putc(np->wn_prefixID, fd); /* <prefixID> */
|
|
4428 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
|
300
|
4429 }
|
|
4430 else
|
|
4431 {
|
339
|
4432 /* For word trees we write the flag/region items. */
|
|
4433 flags = np->wn_flags;
|
|
4434 if (regionmask != 0 && np->wn_region != regionmask)
|
|
4435 flags |= WF_REGION;
|
|
4436 if (np->wn_prefixID != 0)
|
|
4437 flags |= WF_PFX;
|
|
4438 if (flags == 0)
|
|
4439 {
|
|
4440 /* word without flags or region */
|
|
4441 putc(BY_NOFLAGS, fd); /* <byte> */
|
|
4442 }
|
|
4443 else
|
|
4444 {
|
|
4445 putc(BY_FLAGS, fd); /* <byte> */
|
|
4446 putc(flags, fd); /* <flags> */
|
|
4447 if (flags & WF_REGION)
|
|
4448 putc(np->wn_region, fd); /* <region> */
|
|
4449 if (flags & WF_PFX)
|
|
4450 putc(np->wn_prefixID, fd); /* <prefixID> */
|
|
4451 }
|
300
|
4452 }
|
|
4453 }
|
|
4454 }
|
|
4455 else
|
|
4456 {
|
346
|
4457 if (np->wn_child->wn_u1.index != 0
|
|
4458 && np->wn_child->wn_u2.wnode != node)
|
300
|
4459 {
|
|
4460 /* The child is written elsewhere, write the reference. */
|
|
4461 if (fd != NULL)
|
|
4462 {
|
|
4463 putc(BY_INDEX, fd); /* <byte> */
|
|
4464 /* <nodeidx> */
|
346
|
4465 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
|
300
|
4466 }
|
|
4467 }
|
346
|
4468 else if (np->wn_child->wn_u2.wnode == NULL)
|
300
|
4469 /* We will write the child below and give it an index. */
|
346
|
4470 np->wn_child->wn_u2.wnode = node;
|
236
|
4471
|
300
|
4472 if (fd != NULL)
|
|
4473 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
|
|
4474 {
|
|
4475 EMSG(_(e_write));
|
|
4476 return 0;
|
|
4477 }
|
|
4478 }
|
236
|
4479 }
|
|
4480
|
300
|
4481 /* Space used in the array when reading: one for each sibling and one for
|
|
4482 * the count. */
|
|
4483 newindex += siblingcount + 1;
|
249
|
4484
|
300
|
4485 /* Recursively dump the children of each sibling. */
|
|
4486 for (np = node; np != NULL; np = np->wn_sibling)
|
346
|
4487 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
|
|
4488 newindex = put_node(fd, np->wn_child, newindex, regionmask,
|
339
|
4489 prefixtree);
|
249
|
4490
|
300
|
4491 return newindex;
|
236
|
4492 }
|
|
4493
|
|
4494
|
|
4495 /*
|
310
|
4496 * ":mkspell [-ascii] outfile infile ..."
|
|
4497 * ":mkspell [-ascii] addfile"
|
236
|
4498 */
|
|
4499 void
|
|
4500 ex_mkspell(eap)
|
|
4501 exarg_T *eap;
|
|
4502 {
|
|
4503 int fcount;
|
|
4504 char_u **fnames;
|
310
|
4505 char_u *arg = eap->arg;
|
|
4506 int ascii = FALSE;
|
|
4507
|
|
4508 if (STRNCMP(arg, "-ascii", 6) == 0)
|
|
4509 {
|
|
4510 ascii = TRUE;
|
|
4511 arg = skipwhite(arg + 6);
|
|
4512 }
|
|
4513
|
|
4514 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
|
|
4515 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
|
|
4516 {
|
323
|
4517 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
|
310
|
4518 FreeWild(fcount, fnames);
|
|
4519 }
|
|
4520 }
|
|
4521
|
|
4522 /*
|
|
4523 * Create a Vim spell file from one or more word lists.
|
|
4524 * "fnames[0]" is the output file name.
|
|
4525 * "fnames[fcount - 1]" is the last input file name.
|
|
4526 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
|
|
4527 * and ".spl" is appended to make the output file name.
|
|
4528 */
|
|
4529 static void
|
323
|
4530 mkspell(fcount, fnames, ascii, overwrite, added_word)
|
310
|
4531 int fcount;
|
|
4532 char_u **fnames;
|
|
4533 int ascii; /* -ascii argument given */
|
|
4534 int overwrite; /* overwrite existing output file */
|
323
|
4535 int added_word; /* invoked through "zg" */
|
310
|
4536 {
|
236
|
4537 char_u fname[MAXPATHL];
|
|
4538 char_u wfname[MAXPATHL];
|
310
|
4539 char_u **innames;
|
|
4540 int incount;
|
236
|
4541 afffile_T *(afile[8]);
|
|
4542 int i;
|
|
4543 int len;
|
|
4544 struct stat st;
|
255
|
4545 int error = FALSE;
|
300
|
4546 spellinfo_T spin;
|
|
4547
|
|
4548 vim_memset(&spin, 0, sizeof(spin));
|
323
|
4549 spin.si_verbose = !added_word;
|
310
|
4550 spin.si_ascii = ascii;
|
323
|
4551 spin.si_followup = TRUE;
|
|
4552 spin.si_rem_accents = TRUE;
|
|
4553 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
|
|
4554 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
|
|
4555 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
|
339
|
4556 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
|
310
|
4557
|
|
4558 /* default: fnames[0] is output file, following are input files */
|
|
4559 innames = &fnames[1];
|
|
4560 incount = fcount - 1;
|
|
4561
|
|
4562 if (fcount >= 1)
|
240
|
4563 {
|
310
|
4564 len = STRLEN(fnames[0]);
|
|
4565 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
|
|
4566 {
|
|
4567 /* For ":mkspell path/en.latin1.add" output file is
|
|
4568 * "path/en.latin1.add.spl". */
|
|
4569 innames = &fnames[0];
|
|
4570 incount = 1;
|
|
4571 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
|
|
4572 }
|
366
|
4573 else if (fcount == 1)
|
|
4574 {
|
|
4575 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
|
|
4576 innames = &fnames[0];
|
|
4577 incount = 1;
|
|
4578 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
|
|
4579 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
|
|
4580 }
|
310
|
4581 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
|
|
4582 {
|
|
4583 /* Name ends in ".spl", use as the file name. */
|
323
|
4584 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
|
310
|
4585 }
|
|
4586 else
|
|
4587 /* Name should be language, make the file name from it. */
|
|
4588 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
|
|
4589 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
|
|
4590
|
|
4591 /* Check for .ascii.spl. */
|
|
4592 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
|
|
4593 spin.si_ascii = TRUE;
|
|
4594
|
|
4595 /* Check for .add.spl. */
|
|
4596 if (strstr((char *)gettail(wfname), ".add.") != NULL)
|
|
4597 spin.si_add = TRUE;
|
240
|
4598 }
|
|
4599
|
310
|
4600 if (incount <= 0)
|
236
|
4601 EMSG(_(e_invarg)); /* need at least output and input names */
|
351
|
4602 else if (vim_strchr(gettail(wfname), '_') != NULL)
|
|
4603 EMSG(_("E751: Output file name must not have region name"));
|
310
|
4604 else if (incount > 8)
|
236
|
4605 EMSG(_("E754: Only up to 8 regions supported"));
|
|
4606 else
|
|
4607 {
|
|
4608 /* Check for overwriting before doing things that may take a lot of
|
|
4609 * time. */
|
310
|
4610 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
|
236
|
4611 {
|
|
4612 EMSG(_(e_exists));
|
310
|
4613 return;
|
236
|
4614 }
|
310
|
4615 if (mch_isdir(wfname))
|
236
|
4616 {
|
310
|
4617 EMSG2(_(e_isadir2), wfname);
|
|
4618 return;
|
236
|
4619 }
|
|
4620
|
|
4621 /*
|
|
4622 * Init the aff and dic pointers.
|
|
4623 * Get the region names if there are more than 2 arguments.
|
|
4624 */
|
310
|
4625 for (i = 0; i < incount; ++i)
|
236
|
4626 {
|
310
|
4627 afile[i] = NULL;
|
300
|
4628
|
316
|
4629 if (incount > 1)
|
236
|
4630 {
|
310
|
4631 len = STRLEN(innames[i]);
|
|
4632 if (STRLEN(gettail(innames[i])) < 5
|
|
4633 || innames[i][len - 3] != '_')
|
236
|
4634 {
|
310
|
4635 EMSG2(_("E755: Invalid region in %s"), innames[i]);
|
|
4636 return;
|
236
|
4637 }
|
316
|
4638 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
|
|
4639 spin.si_region_name[i * 2 + 1] =
|
|
4640 TOLOWER_ASC(innames[i][len - 1]);
|
236
|
4641 }
|
|
4642 }
|
316
|
4643 spin.si_region_count = incount;
|
236
|
4644
|
300
|
4645 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
|
|
4646 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
|
339
|
4647 spin.si_prefroot = wordtree_alloc(&spin.si_blocks);
|
|
4648 if (spin.si_foldroot == NULL
|
|
4649 || spin.si_keeproot == NULL
|
|
4650 || spin.si_prefroot == NULL)
|
300
|
4651 {
|
|
4652 error = TRUE;
|
310
|
4653 return;
|
300
|
4654 }
|
|
4655
|
351
|
4656 /* When not producing a .add.spl file clear the character table when
|
|
4657 * we encounter one in the .aff file. This means we dump the current
|
|
4658 * one in the .spl file if the .aff file doesn't define one. That's
|
|
4659 * better than guessing the contents, the table will match a
|
|
4660 * previously loaded spell file. */
|
|
4661 if (!spin.si_add)
|
|
4662 spin.si_clear_chartab = TRUE;
|
|
4663
|
236
|
4664 /*
|
|
4665 * Read all the .aff and .dic files.
|
|
4666 * Text is converted to 'encoding'.
|
300
|
4667 * Words are stored in the case-folded and keep-case trees.
|
236
|
4668 */
|
310
|
4669 for (i = 0; i < incount && !error; ++i)
|
236
|
4670 {
|
300
|
4671 spin.si_conv.vc_type = CONV_NONE;
|
310
|
4672 spin.si_region = 1 << i;
|
|
4673
|
|
4674 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
|
300
|
4675 if (mch_stat((char *)fname, &st) >= 0)
|
|
4676 {
|
|
4677 /* Read the .aff file. Will init "spin->si_conv" based on the
|
|
4678 * "SET" line. */
|
310
|
4679 afile[i] = spell_read_aff(fname, &spin);
|
|
4680 if (afile[i] == NULL)
|
300
|
4681 error = TRUE;
|
|
4682 else
|
|
4683 {
|
|
4684 /* Read the .dic file and store the words in the trees. */
|
|
4685 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
|
310
|
4686 innames[i]);
|
|
4687 if (spell_read_dic(fname, &spin, afile[i]) == FAIL)
|
300
|
4688 error = TRUE;
|
|
4689 }
|
|
4690 }
|
|
4691 else
|
|
4692 {
|
|
4693 /* No .aff file, try reading the file as a word list. Store
|
|
4694 * the words in the trees. */
|
310
|
4695 if (spell_read_wordfile(innames[i], &spin) == FAIL)
|
300
|
4696 error = TRUE;
|
|
4697 }
|
236
|
4698
|
310
|
4699 #ifdef FEAT_MBYTE
|
236
|
4700 /* Free any conversion stuff. */
|
300
|
4701 convert_setup(&spin.si_conv, NULL, NULL);
|
310
|
4702 #endif
|
236
|
4703 }
|
|
4704
|
300
|
4705 if (!error)
|
236
|
4706 {
|
|
4707 /*
|
300
|
4708 * Remove the dummy NUL from the start of the tree root.
|
236
|
4709 */
|
300
|
4710 spin.si_foldroot = spin.si_foldroot->wn_sibling;
|
|
4711 spin.si_keeproot = spin.si_keeproot->wn_sibling;
|
339
|
4712 spin.si_prefroot = spin.si_prefroot->wn_sibling;
|
236
|
4713
|
|
4714 /*
|
300
|
4715 * Combine tails in the tree.
|
236
|
4716 */
|
323
|
4717 if (!added_word || p_verbose > 2)
|
310
|
4718 {
|
323
|
4719 if (added_word)
|
310
|
4720 verbose_enter();
|
|
4721 MSG(_("Compressing word tree..."));
|
|
4722 out_flush();
|
323
|
4723 if (added_word)
|
310
|
4724 verbose_leave();
|
|
4725 }
|
|
4726 wordtree_compress(spin.si_foldroot, &spin);
|
|
4727 wordtree_compress(spin.si_keeproot, &spin);
|
339
|
4728 wordtree_compress(spin.si_prefroot, &spin);
|
236
|
4729 }
|
|
4730
|
300
|
4731 if (!error)
|
|
4732 {
|
|
4733 /*
|
|
4734 * Write the info in the spell file.
|
|
4735 */
|
323
|
4736 if (!added_word || p_verbose > 2)
|
310
|
4737 {
|
323
|
4738 if (added_word)
|
310
|
4739 verbose_enter();
|
366
|
4740 smsg((char_u *)_("Writing spell file %s ..."), wfname);
|
310
|
4741 out_flush();
|
323
|
4742 if (added_word)
|
310
|
4743 verbose_leave();
|
|
4744 }
|
|
4745
|
316
|
4746 write_vim_spell(wfname, &spin);
|
310
|
4747
|
323
|
4748 if (!added_word || p_verbose > 2)
|
310
|
4749 {
|
323
|
4750 if (added_word)
|
310
|
4751 verbose_enter();
|
|
4752 MSG(_("Done!"));
|
|
4753 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
|
302
|
4754 spin.si_memtot);
|
310
|
4755 out_flush();
|
323
|
4756 if (added_word)
|
310
|
4757 verbose_leave();
|
|
4758 }
|
|
4759
|
|
4760 /* If the file is loaded need to reload it. */
|
323
|
4761 spell_reload_one(wfname, added_word);
|
300
|
4762 }
|
|
4763
|
|
4764 /* Free the allocated memory. */
|
323
|
4765 ga_clear(&spin.si_rep);
|
|
4766 ga_clear(&spin.si_sal);
|
|
4767 ga_clear(&spin.si_map);
|
339
|
4768 ga_clear(&spin.si_prefcond);
|
366
|
4769 vim_free(spin.si_midword);
|
300
|
4770
|
|
4771 /* Free the .aff file structures. */
|
310
|
4772 for (i = 0; i < incount; ++i)
|
|
4773 if (afile[i] != NULL)
|
|
4774 spell_free_aff(afile[i]);
|
339
|
4775
|
|
4776 /* Free all the bits and pieces at once. */
|
|
4777 free_blocks(spin.si_blocks);
|
236
|
4778 }
|
310
|
4779 }
|
|
4780
|
|
4781
|
|
4782 /*
|
|
4783 * ":spellgood {word}"
|
|
4784 * ":spellwrong {word}"
|
|
4785 */
|
|
4786 void
|
|
4787 ex_spell(eap)
|
|
4788 exarg_T *eap;
|
|
4789 {
|
|
4790 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong);
|
236
|
4791 }
|
|
4792
|
310
|
4793 /*
|
|
4794 * Add "word[len]" to 'spellfile' as a good or bad word.
|
|
4795 */
|
|
4796 void
|
|
4797 spell_add_word(word, len, bad)
|
|
4798 char_u *word;
|
|
4799 int len;
|
|
4800 int bad;
|
|
4801 {
|
|
4802 FILE *fd;
|
|
4803 buf_T *buf;
|
351
|
4804 int new_spf = FALSE;
|
|
4805 struct stat st;
|
|
4806
|
|
4807 /* If 'spellfile' isn't set figure out a good default value. */
|
310
|
4808 if (*curbuf->b_p_spf == NUL)
|
351
|
4809 {
|
310
|
4810 init_spellfile();
|
351
|
4811 new_spf = TRUE;
|
|
4812 }
|
|
4813
|
310
|
4814 if (*curbuf->b_p_spf == NUL)
|
323
|
4815 EMSG(_("E764: 'spellfile' is not set"));
|
310
|
4816 else
|
|
4817 {
|
|
4818 /* Check that the user isn't editing the .add file somewhere. */
|
|
4819 buf = buflist_findname_exp(curbuf->b_p_spf);
|
|
4820 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
|
|
4821 buf = NULL;
|
|
4822 if (buf != NULL && bufIsChanged(buf))
|
|
4823 EMSG(_(e_bufloaded));
|
|
4824 else
|
|
4825 {
|
|
4826 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
|
351
|
4827 if (fd == NULL && new_spf)
|
|
4828 {
|
|
4829 /* We just initialized the 'spellfile' option and can't open
|
|
4830 * the file. We may need to create the "spell" directory
|
|
4831 * first. We already checked the runtime directory is
|
|
4832 * writable in init_spellfile(). */
|
|
4833 STRCPY(NameBuff, curbuf->b_p_spf);
|
|
4834 *gettail_sep(NameBuff) = NUL;
|
|
4835 if (mch_stat((char *)NameBuff, &st) < 0)
|
|
4836 {
|
|
4837 /* The directory doesn't exist. Try creating it and
|
|
4838 * opening the file again. */
|
|
4839 vim_mkdir(NameBuff, 0755);
|
|
4840 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
|
|
4841 }
|
|
4842 }
|
|
4843
|
310
|
4844 if (fd == NULL)
|
|
4845 EMSG2(_(e_notopen), curbuf->b_p_spf);
|
|
4846 else
|
|
4847 {
|
|
4848 if (bad)
|
|
4849 fprintf(fd, "/!%.*s\n", len, word);
|
|
4850 else
|
|
4851 fprintf(fd, "%.*s\n", len, word);
|
|
4852 fclose(fd);
|
|
4853
|
|
4854 /* Update the .add.spl file. */
|
323
|
4855 mkspell(1, &curbuf->b_p_spf, FALSE, TRUE, TRUE);
|
310
|
4856
|
|
4857 /* If the .add file is edited somewhere, reload it. */
|
|
4858 if (buf != NULL)
|
|
4859 buf_reload(buf);
|
323
|
4860
|
|
4861 redraw_all_later(NOT_VALID);
|
310
|
4862 }
|
|
4863 }
|
|
4864 }
|
|
4865 }
|
|
4866
|
|
4867 /*
|
|
4868 * Initialize 'spellfile' for the current buffer.
|
|
4869 */
|
|
4870 static void
|
|
4871 init_spellfile()
|
|
4872 {
|
|
4873 char_u buf[MAXPATHL];
|
|
4874 int l;
|
|
4875 slang_T *sl;
|
|
4876 char_u *rtp;
|
351
|
4877 char_u *lend;
|
310
|
4878
|
|
4879 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
|
|
4880 {
|
351
|
4881 /* Find the end of the language name. Exclude the region. */
|
|
4882 for (lend = curbuf->b_p_spl; *lend != NUL
|
|
4883 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
|
|
4884 ;
|
|
4885
|
|
4886 /* Loop over all entries in 'runtimepath'. Use the first one where we
|
|
4887 * are allowed to write. */
|
310
|
4888 rtp = p_rtp;
|
|
4889 while (*rtp != NUL)
|
|
4890 {
|
|
4891 /* Copy the path from 'runtimepath' to buf[]. */
|
|
4892 copy_option_part(&rtp, buf, MAXPATHL, ",");
|
|
4893 if (filewritable(buf) == 2)
|
|
4894 {
|
316
|
4895 /* Use the first language name from 'spelllang' and the
|
|
4896 * encoding used in the first loaded .spl file. */
|
310
|
4897 sl = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang;
|
|
4898 l = STRLEN(buf);
|
|
4899 vim_snprintf((char *)buf + l, MAXPATHL - l,
|
316
|
4900 "/spell/%.*s.%s.add",
|
351
|
4901 (int)(lend - curbuf->b_p_spl), curbuf->b_p_spl,
|
310
|
4902 strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
|
|
4903 ? (char_u *)"ascii" : spell_enc());
|
|
4904 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
|
|
4905 break;
|
|
4906 }
|
|
4907 }
|
|
4908 }
|
|
4909 }
|
236
|
4910
|
300
|
4911
|
307
|
4912 /*
|
|
4913 * Init the chartab used for spelling for ASCII.
|
|
4914 * EBCDIC is not supported!
|
|
4915 */
|
|
4916 static void
|
|
4917 clear_spell_chartab(sp)
|
|
4918 spelltab_T *sp;
|
|
4919 {
|
324
|
4920 int i;
|
307
|
4921
|
|
4922 /* Init everything to FALSE. */
|
|
4923 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
|
|
4924 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
|
|
4925 for (i = 0; i < 256; ++i)
|
324
|
4926 {
|
307
|
4927 sp->st_fold[i] = i;
|
324
|
4928 sp->st_upper[i] = i;
|
|
4929 }
|
307
|
4930
|
|
4931 /* We include digits. A word shouldn't start with a digit, but handling
|
|
4932 * that is done separately. */
|
|
4933 for (i = '0'; i <= '9'; ++i)
|
|
4934 sp->st_isw[i] = TRUE;
|
|
4935 for (i = 'A'; i <= 'Z'; ++i)
|
|
4936 {
|
|
4937 sp->st_isw[i] = TRUE;
|
|
4938 sp->st_isu[i] = TRUE;
|
|
4939 sp->st_fold[i] = i + 0x20;
|
|
4940 }
|
|
4941 for (i = 'a'; i <= 'z'; ++i)
|
324
|
4942 {
|
307
|
4943 sp->st_isw[i] = TRUE;
|
324
|
4944 sp->st_upper[i] = i - 0x20;
|
|
4945 }
|
307
|
4946 }
|
|
4947
|
|
4948 /*
|
|
4949 * Init the chartab used for spelling. Only depends on 'encoding'.
|
|
4950 * Called once while starting up and when 'encoding' changes.
|
|
4951 * The default is to use isalpha(), but the spell file should define the word
|
|
4952 * characters to make it possible that 'encoding' differs from the current
|
|
4953 * locale.
|
|
4954 */
|
|
4955 void
|
|
4956 init_spell_chartab()
|
|
4957 {
|
|
4958 int i;
|
|
4959
|
|
4960 did_set_spelltab = FALSE;
|
|
4961 clear_spell_chartab(&spelltab);
|
366
|
4962 vim_memset(spell_ismw, FALSE, sizeof(spell_ismw));
|
307
|
4963 #ifdef FEAT_MBYTE
|
366
|
4964 vim_free(spell_ismw_mb);
|
|
4965 spell_ismw_mb = NULL;
|
|
4966
|
307
|
4967 if (enc_dbcs)
|
|
4968 {
|
|
4969 /* DBCS: assume double-wide characters are word characters. */
|
|
4970 for (i = 128; i <= 255; ++i)
|
|
4971 if (MB_BYTE2LEN(i) == 2)
|
|
4972 spelltab.st_isw[i] = TRUE;
|
|
4973 }
|
324
|
4974 else if (enc_utf8)
|
|
4975 {
|
|
4976 for (i = 128; i < 256; ++i)
|
|
4977 {
|
|
4978 spelltab.st_isu[i] = utf_isupper(i);
|
|
4979 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
|
|
4980 spelltab.st_fold[i] = utf_fold(i);
|
|
4981 spelltab.st_upper[i] = utf_toupper(i);
|
|
4982 }
|
|
4983 }
|
307
|
4984 else
|
|
4985 #endif
|
|
4986 {
|
324
|
4987 /* Rough guess: use locale-dependent library functions. */
|
307
|
4988 for (i = 128; i < 256; ++i)
|
|
4989 {
|
|
4990 if (MB_ISUPPER(i))
|
|
4991 {
|
324
|
4992 spelltab.st_isw[i] = TRUE;
|
307
|
4993 spelltab.st_isu[i] = TRUE;
|
|
4994 spelltab.st_fold[i] = MB_TOLOWER(i);
|
|
4995 }
|
324
|
4996 else if (MB_ISLOWER(i))
|
|
4997 {
|
|
4998 spelltab.st_isw[i] = TRUE;
|
|
4999 spelltab.st_upper[i] = MB_TOUPPER(i);
|
|
5000 }
|
307
|
5001 }
|
|
5002 }
|
|
5003 }
|
|
5004
|
|
5005 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
|
|
5006 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
|
|
5007
|
|
5008 /*
|
|
5009 * Set the spell character tables from strings in the affix file.
|
|
5010 */
|
|
5011 static int
|
|
5012 set_spell_chartab(fol, low, upp)
|
|
5013 char_u *fol;
|
|
5014 char_u *low;
|
|
5015 char_u *upp;
|
|
5016 {
|
|
5017 /* We build the new tables here first, so that we can compare with the
|
|
5018 * previous one. */
|
|
5019 spelltab_T new_st;
|
|
5020 char_u *pf = fol, *pl = low, *pu = upp;
|
|
5021 int f, l, u;
|
|
5022
|
|
5023 clear_spell_chartab(&new_st);
|
|
5024
|
|
5025 while (*pf != NUL)
|
|
5026 {
|
|
5027 if (*pl == NUL || *pu == NUL)
|
|
5028 {
|
|
5029 EMSG(_(e_affform));
|
|
5030 return FAIL;
|
|
5031 }
|
|
5032 #ifdef FEAT_MBYTE
|
|
5033 f = mb_ptr2char_adv(&pf);
|
|
5034 l = mb_ptr2char_adv(&pl);
|
|
5035 u = mb_ptr2char_adv(&pu);
|
|
5036 #else
|
|
5037 f = *pf++;
|
|
5038 l = *pl++;
|
|
5039 u = *pu++;
|
|
5040 #endif
|
|
5041 /* Every character that appears is a word character. */
|
|
5042 if (f < 256)
|
|
5043 new_st.st_isw[f] = TRUE;
|
|
5044 if (l < 256)
|
|
5045 new_st.st_isw[l] = TRUE;
|
|
5046 if (u < 256)
|
|
5047 new_st.st_isw[u] = TRUE;
|
|
5048
|
|
5049 /* if "LOW" and "FOL" are not the same the "LOW" char needs
|
|
5050 * case-folding */
|
|
5051 if (l < 256 && l != f)
|
|
5052 {
|
|
5053 if (f >= 256)
|
|
5054 {
|
|
5055 EMSG(_(e_affrange));
|
|
5056 return FAIL;
|
|
5057 }
|
|
5058 new_st.st_fold[l] = f;
|
|
5059 }
|
|
5060
|
|
5061 /* if "UPP" and "FOL" are not the same the "UPP" char needs
|
324
|
5062 * case-folding, it's upper case and the "UPP" is the upper case of
|
|
5063 * "FOL" . */
|
307
|
5064 if (u < 256 && u != f)
|
|
5065 {
|
|
5066 if (f >= 256)
|
|
5067 {
|
|
5068 EMSG(_(e_affrange));
|
|
5069 return FAIL;
|
|
5070 }
|
|
5071 new_st.st_fold[u] = f;
|
|
5072 new_st.st_isu[u] = TRUE;
|
324
|
5073 new_st.st_upper[f] = u;
|
307
|
5074 }
|
|
5075 }
|
|
5076
|
|
5077 if (*pl != NUL || *pu != NUL)
|
|
5078 {
|
|
5079 EMSG(_(e_affform));
|
|
5080 return FAIL;
|
|
5081 }
|
|
5082
|
|
5083 return set_spell_finish(&new_st);
|
|
5084 }
|
|
5085
|
|
5086 /*
|
|
5087 * Set the spell character tables from strings in the .spl file.
|
|
5088 */
|
|
5089 static int
|
|
5090 set_spell_charflags(flags, cnt, upp)
|
|
5091 char_u *flags;
|
|
5092 int cnt;
|
|
5093 char_u *upp;
|
|
5094 {
|
|
5095 /* We build the new tables here first, so that we can compare with the
|
|
5096 * previous one. */
|
|
5097 spelltab_T new_st;
|
|
5098 int i;
|
|
5099 char_u *p = upp;
|
324
|
5100 int c;
|
307
|
5101
|
|
5102 clear_spell_chartab(&new_st);
|
|
5103
|
|
5104 for (i = 0; i < cnt; ++i)
|
|
5105 {
|
324
|
5106 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
|
|
5107 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
|
307
|
5108
|
|
5109 if (*p == NUL)
|
|
5110 return FAIL;
|
|
5111 #ifdef FEAT_MBYTE
|
324
|
5112 c = mb_ptr2char_adv(&p);
|
307
|
5113 #else
|
324
|
5114 c = *p++;
|
307
|
5115 #endif
|
324
|
5116 new_st.st_fold[i + 128] = c;
|
|
5117 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
|
|
5118 new_st.st_upper[c] = i + 128;
|
307
|
5119 }
|
|
5120
|
|
5121 return set_spell_finish(&new_st);
|
|
5122 }
|
|
5123
|
|
5124 static int
|
|
5125 set_spell_finish(new_st)
|
|
5126 spelltab_T *new_st;
|
|
5127 {
|
|
5128 int i;
|
|
5129
|
|
5130 if (did_set_spelltab)
|
|
5131 {
|
|
5132 /* check that it's the same table */
|
|
5133 for (i = 0; i < 256; ++i)
|
|
5134 {
|
|
5135 if (spelltab.st_isw[i] != new_st->st_isw[i]
|
|
5136 || spelltab.st_isu[i] != new_st->st_isu[i]
|
324
|
5137 || spelltab.st_fold[i] != new_st->st_fold[i]
|
|
5138 || spelltab.st_upper[i] != new_st->st_upper[i])
|
307
|
5139 {
|
|
5140 EMSG(_("E763: Word characters differ between spell files"));
|
|
5141 return FAIL;
|
|
5142 }
|
|
5143 }
|
|
5144 }
|
|
5145 else
|
|
5146 {
|
|
5147 /* copy the new spelltab into the one being used */
|
|
5148 spelltab = *new_st;
|
|
5149 did_set_spelltab = TRUE;
|
|
5150 }
|
|
5151
|
|
5152 return OK;
|
|
5153 }
|
|
5154
|
|
5155 /*
|
358
|
5156 * Return TRUE if "p" points to a word character.
|
366
|
5157 * As a special case we see "midword" characters as word character when it is
|
358
|
5158 * followed by a word character. This finds they'there but not 'they there'.
|
366
|
5159 * Thus this only works properly when past the first character of the word.
|
358
|
5160 */
|
|
5161 static int
|
|
5162 spell_iswordp(p)
|
|
5163 char_u *p;
|
|
5164 {
|
366
|
5165 #ifdef FEAT_MBYTE
|
358
|
5166 char_u *s;
|
366
|
5167 int l;
|
|
5168 int c;
|
|
5169
|
|
5170 if (has_mbyte)
|
|
5171 {
|
|
5172 l = MB_BYTE2LEN(*p);
|
358
|
5173 s = p;
|
366
|
5174 if (l == 1)
|
|
5175 {
|
|
5176 /* be quick for ASCII */
|
|
5177 if (spell_ismw[*p])
|
|
5178 {
|
|
5179 s = p + 1; /* skip a mid-word character */
|
|
5180 l = MB_BYTE2LEN(*s);
|
|
5181 }
|
|
5182 }
|
|
5183 else
|
|
5184 {
|
|
5185 c = mb_ptr2char(p);
|
|
5186 if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL
|
|
5187 && vim_strchr(spell_ismw_mb, c) != NULL))
|
|
5188 {
|
|
5189 s = p + l;
|
|
5190 l = MB_BYTE2LEN(*s);
|
|
5191 }
|
|
5192 }
|
|
5193
|
|
5194 if (l > 1)
|
|
5195 return mb_get_class(s) >= 2;
|
|
5196 return spelltab.st_isw[*s];
|
|
5197 }
|
358
|
5198 #endif
|
366
|
5199
|
|
5200 return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]];
|
358
|
5201 }
|
|
5202
|
|
5203 /*
|
339
|
5204 * Write the table with prefix conditions to the .spl file.
|
|
5205 */
|
|
5206 static void
|
|
5207 write_spell_prefcond(fd, gap)
|
|
5208 FILE *fd;
|
|
5209 garray_T *gap;
|
|
5210 {
|
|
5211 int i;
|
|
5212 char_u *p;
|
|
5213 int len;
|
|
5214
|
|
5215 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
|
|
5216
|
|
5217 for (i = 0; i < gap->ga_len; ++i)
|
|
5218 {
|
|
5219 /* <prefcond> : <condlen> <condstr> */
|
|
5220 p = ((char_u **)gap->ga_data)[i];
|
|
5221 if (p == NULL)
|
|
5222 fputc(0, fd);
|
|
5223 else
|
|
5224 {
|
|
5225 len = STRLEN(p);
|
|
5226 fputc(len, fd);
|
|
5227 fwrite(p, (size_t)len, (size_t)1, fd);
|
|
5228 }
|
|
5229 }
|
|
5230 }
|
|
5231
|
|
5232 /*
|
307
|
5233 * Write the current tables into the .spl file.
|
|
5234 * This makes sure the same characters are recognized as word characters when
|
|
5235 * generating an when using a spell file.
|
|
5236 */
|
|
5237 static void
|
|
5238 write_spell_chartab(fd)
|
|
5239 FILE *fd;
|
|
5240 {
|
|
5241 char_u charbuf[256 * 4];
|
|
5242 int len = 0;
|
|
5243 int flags;
|
|
5244 int i;
|
|
5245
|
|
5246 fputc(128, fd); /* <charflagslen> */
|
|
5247 for (i = 128; i < 256; ++i)
|
|
5248 {
|
|
5249 flags = 0;
|
|
5250 if (spelltab.st_isw[i])
|
324
|
5251 flags |= CF_WORD;
|
307
|
5252 if (spelltab.st_isu[i])
|
324
|
5253 flags |= CF_UPPER;
|
307
|
5254 fputc(flags, fd); /* <charflags> */
|
|
5255
|
310
|
5256 #ifdef FEAT_MBYTE
|
|
5257 if (has_mbyte)
|
|
5258 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
|
|
5259 else
|
|
5260 #endif
|
|
5261 charbuf[len++] = spelltab.st_fold[i];
|
307
|
5262 }
|
|
5263
|
|
5264 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
|
|
5265 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
|
|
5266 }
|
|
5267
|
|
5268 /*
|
324
|
5269 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
|
|
5270 * Uses the character definitions from the .spl file.
|
307
|
5271 * When using a multi-byte 'encoding' the length may change!
|
|
5272 * Returns FAIL when something wrong.
|
|
5273 */
|
|
5274 static int
|
324
|
5275 spell_casefold(str, len, buf, buflen)
|
|
5276 char_u *str;
|
307
|
5277 int len;
|
|
5278 char_u *buf;
|
|
5279 int buflen;
|
|
5280 {
|
|
5281 int i;
|
|
5282
|
|
5283 if (len >= buflen)
|
|
5284 {
|
|
5285 buf[0] = NUL;
|
|
5286 return FAIL; /* result will not fit */
|
|
5287 }
|
|
5288
|
|
5289 #ifdef FEAT_MBYTE
|
|
5290 if (has_mbyte)
|
|
5291 {
|
324
|
5292 int outi = 0;
|
|
5293 char_u *p;
|
307
|
5294 int c;
|
|
5295
|
|
5296 /* Fold one character at a time. */
|
324
|
5297 for (p = str; p < str + len; )
|
307
|
5298 {
|
|
5299 if (outi + MB_MAXBYTES > buflen)
|
|
5300 {
|
|
5301 buf[outi] = NUL;
|
|
5302 return FAIL;
|
|
5303 }
|
324
|
5304 c = mb_ptr2char_adv(&p);
|
|
5305 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
|
307
|
5306 }
|
|
5307 buf[outi] = NUL;
|
|
5308 }
|
|
5309 else
|
|
5310 #endif
|
|
5311 {
|
|
5312 /* Be quick for non-multibyte encodings. */
|
|
5313 for (i = 0; i < len; ++i)
|
324
|
5314 buf[i] = spelltab.st_fold[str[i]];
|
307
|
5315 buf[i] = NUL;
|
|
5316 }
|
|
5317
|
|
5318 return OK;
|
|
5319 }
|
|
5320
|
323
|
5321 /*
|
|
5322 * "z?": Find badly spelled word under or after the cursor.
|
|
5323 * Give suggestions for the properly spelled word.
|
|
5324 */
|
|
5325 void
|
|
5326 spell_suggest()
|
|
5327 {
|
|
5328 char_u *line;
|
|
5329 pos_T prev_cursor = curwin->w_cursor;
|
|
5330 char_u wcopy[MAXWLEN + 2];
|
|
5331 char_u *p;
|
|
5332 int i;
|
|
5333 int c;
|
|
5334 suginfo_T sug;
|
|
5335 suggest_T *stp;
|
|
5336
|
344
|
5337 /* Find the start of the badly spelled word. */
|
346
|
5338 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL
|
|
5339 || curwin->w_cursor.col > prev_cursor.col)
|
|
5340 {
|
|
5341 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
|
|
5342 return;
|
|
5343
|
|
5344 /* No bad word or it starts after the cursor: use the word under the
|
|
5345 * cursor. */
|
|
5346 curwin->w_cursor = prev_cursor;
|
|
5347 line = ml_get_curline();
|
|
5348 p = line + curwin->w_cursor.col;
|
|
5349 /* Backup to before start of word. */
|
|
5350 while (p > line && SPELL_ISWORDP(p))
|
|
5351 mb_ptr_back(line, p);
|
|
5352 /* Forward to start of word. */
|
|
5353 while (!SPELL_ISWORDP(p))
|
|
5354 mb_ptr_adv(p);
|
|
5355
|
|
5356 if (!SPELL_ISWORDP(p)) /* No word found. */
|
|
5357 {
|
|
5358 beep_flush();
|
|
5359 return;
|
|
5360 }
|
|
5361 curwin->w_cursor.col = p - line;
|
323
|
5362 }
|
|
5363
|
344
|
5364 /* Get the word and its length. */
|
323
|
5365 line = ml_get_curline();
|
344
|
5366
|
|
5367 /* Get the list of suggestions */
|
358
|
5368 spell_find_suggest(line + curwin->w_cursor.col, &sug, (int)Rows - 2, TRUE);
|
323
|
5369
|
|
5370 if (sug.su_ga.ga_len == 0)
|
|
5371 MSG(_("Sorry, no suggestions"));
|
|
5372 else
|
|
5373 {
|
|
5374 /* List the suggestions. */
|
|
5375 msg_start();
|
|
5376 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
|
|
5377 sug.su_badlen, sug.su_badptr);
|
|
5378 msg_puts(IObuff);
|
|
5379 msg_clr_eos();
|
|
5380 msg_putchar('\n');
|
346
|
5381
|
323
|
5382 msg_scroll = TRUE;
|
|
5383 for (i = 0; i < sug.su_ga.ga_len; ++i)
|
|
5384 {
|
344
|
5385 stp = &SUG(sug.su_ga, i);
|
323
|
5386
|
|
5387 /* The suggested word may replace only part of the bad word, add
|
|
5388 * the not replaced part. */
|
|
5389 STRCPY(wcopy, stp->st_word);
|
|
5390 if (sug.su_badlen > stp->st_orglen)
|
|
5391 vim_strncpy(wcopy + STRLEN(wcopy),
|
|
5392 sug.su_badptr + stp->st_orglen,
|
|
5393 sug.su_badlen - stp->st_orglen);
|
346
|
5394 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), i + 1, wcopy);
|
|
5395 msg_puts(IObuff);
|
|
5396
|
|
5397 /* The word may replace more than "su_badlen". */
|
|
5398 if (sug.su_badlen < stp->st_orglen)
|
|
5399 {
|
|
5400 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
|
|
5401 stp->st_orglen, sug.su_badptr);
|
|
5402 msg_puts(IObuff);
|
|
5403 }
|
|
5404
|
324
|
5405 if (p_verbose > 0)
|
344
|
5406 {
|
346
|
5407 /* Add the score. */
|
351
|
5408 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
|
346
|
5409 vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"),
|
344
|
5410 stp->st_salscore ? "s " : "",
|
|
5411 stp->st_score, stp->st_altscore);
|
|
5412 else
|
346
|
5413 vim_snprintf((char *)IObuff, IOSIZE, _(" (%d)"),
|
|
5414 stp->st_score);
|
|
5415 msg_advance(30);
|
|
5416 msg_puts(IObuff);
|
344
|
5417 }
|
323
|
5418 lines_left = 3; /* avoid more prompt */
|
|
5419 msg_putchar('\n');
|
|
5420 }
|
|
5421
|
|
5422 /* Ask for choice. */
|
|
5423 i = prompt_for_number();
|
344
|
5424 if (i > 0 && i <= sug.su_ga.ga_len && u_save_cursor() == OK)
|
323
|
5425 {
|
|
5426 /* Replace the word. */
|
344
|
5427 stp = &SUG(sug.su_ga, i - 1);
|
323
|
5428 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1);
|
|
5429 if (p != NULL)
|
|
5430 {
|
|
5431 c = sug.su_badptr - line;
|
|
5432 mch_memmove(p, line, c);
|
|
5433 STRCPY(p + c, stp->st_word);
|
|
5434 STRCAT(p, sug.su_badptr + stp->st_orglen);
|
|
5435 ml_replace(curwin->w_cursor.lnum, p, FALSE);
|
|
5436 curwin->w_cursor.col = c;
|
|
5437 changed_bytes(curwin->w_cursor.lnum, c);
|
344
|
5438
|
|
5439 /* For redo we use a change-word command. */
|
|
5440 ResetRedobuff();
|
|
5441 AppendToRedobuff((char_u *)"ciw");
|
|
5442 AppendToRedobuff(stp->st_word);
|
|
5443 AppendCharToRedobuff(ESC);
|
323
|
5444 }
|
|
5445 }
|
|
5446 else
|
|
5447 curwin->w_cursor = prev_cursor;
|
|
5448 }
|
|
5449
|
344
|
5450 spell_find_cleanup(&sug);
|
|
5451 }
|
|
5452
|
|
5453 /*
|
|
5454 * Find spell suggestions for "word". Return them in the growarray "*gap" as
|
|
5455 * a list of allocated strings.
|
|
5456 */
|
|
5457 void
|
|
5458 spell_suggest_list(gap, word, maxcount)
|
|
5459 garray_T *gap;
|
|
5460 char_u *word;
|
|
5461 int maxcount; /* maximum nr of suggestions */
|
|
5462 {
|
|
5463 suginfo_T sug;
|
|
5464 int i;
|
|
5465 suggest_T *stp;
|
|
5466 char_u *wcopy;
|
|
5467
|
358
|
5468 spell_find_suggest(word, &sug, maxcount, FALSE);
|
344
|
5469
|
|
5470 /* Make room in "gap". */
|
|
5471 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
|
|
5472 if (ga_grow(gap, sug.su_ga.ga_len) == FAIL)
|
|
5473 return;
|
|
5474
|
323
|
5475 for (i = 0; i < sug.su_ga.ga_len; ++i)
|
344
|
5476 {
|
|
5477 stp = &SUG(sug.su_ga, i);
|
|
5478
|
|
5479 /* The suggested word may replace only part of "word", add the not
|
|
5480 * replaced part. */
|
|
5481 wcopy = alloc(STRLEN(stp->st_word)
|
|
5482 + STRLEN(sug.su_badptr + stp->st_orglen) + 1);
|
|
5483 if (wcopy == NULL)
|
|
5484 break;
|
|
5485 STRCPY(wcopy, stp->st_word);
|
|
5486 STRCAT(wcopy, sug.su_badptr + stp->st_orglen);
|
|
5487 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
|
|
5488 }
|
|
5489
|
|
5490 spell_find_cleanup(&sug);
|
|
5491 }
|
|
5492
|
|
5493 /*
|
|
5494 * Find spell suggestions for the word at the start of "badptr".
|
|
5495 * Return the suggestions in "su->su_ga".
|
|
5496 * The maximum number of suggestions is "maxcount".
|
|
5497 * Note: does use info for the current window.
|
|
5498 * This is based on the mechanisms of Aspell, but completely reimplemented.
|
|
5499 */
|
|
5500 static void
|
358
|
5501 spell_find_suggest(badptr, su, maxcount, banbadword)
|
344
|
5502 char_u *badptr;
|
|
5503 suginfo_T *su;
|
|
5504 int maxcount;
|
358
|
5505 int banbadword; /* don't include badword in suggestions */
|
344
|
5506 {
|
|
5507 int attr;
|
|
5508
|
|
5509 /*
|
|
5510 * Set the info in "*su".
|
|
5511 */
|
|
5512 vim_memset(su, 0, sizeof(suginfo_T));
|
|
5513 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
|
|
5514 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
|
355
|
5515 if (*badptr == NUL)
|
|
5516 return;
|
344
|
5517 hash_init(&su->su_banned);
|
|
5518
|
|
5519 su->su_badptr = badptr;
|
|
5520 su->su_badlen = spell_check(curwin, su->su_badptr, &attr);
|
|
5521 su->su_maxcount = maxcount;
|
|
5522
|
|
5523 if (su->su_badlen >= MAXWLEN)
|
|
5524 su->su_badlen = MAXWLEN - 1; /* just in case */
|
|
5525 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
|
|
5526 (void)spell_casefold(su->su_badptr, su->su_badlen,
|
|
5527 su->su_fbadword, MAXWLEN);
|
346
|
5528 /* get caps flags for bad word */
|
|
5529 su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
|
344
|
5530
|
|
5531 /* Ban the bad word itself. It may appear in another region. */
|
358
|
5532 if (banbadword)
|
|
5533 add_banned(su, su->su_badword);
|
344
|
5534
|
|
5535 /*
|
346
|
5536 * 1. Try special cases, such as repeating a word: "the the" -> "the".
|
344
|
5537 *
|
|
5538 * Set a maximum score to limit the combination of operations that is
|
|
5539 * tried.
|
|
5540 */
|
|
5541 su->su_maxscore = SCORE_MAXINIT;
|
346
|
5542 suggest_try_special(su);
|
|
5543
|
|
5544 /*
|
|
5545 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
|
|
5546 * from the .aff file and inserting a space (split the word).
|
|
5547 */
|
|
5548 suggest_try_change(su);
|
344
|
5549
|
|
5550 /* For the resulting top-scorers compute the sound-a-like score. */
|
|
5551 if (sps_flags & SPS_DOUBLE)
|
|
5552 score_comp_sal(su);
|
|
5553
|
|
5554 /*
|
346
|
5555 * 3. Try finding sound-a-like words.
|
344
|
5556 *
|
|
5557 * Only do this when we don't have a lot of suggestions yet, because it's
|
|
5558 * very slow and often doesn't find new suggestions.
|
|
5559 */
|
|
5560 if ((sps_flags & SPS_DOUBLE)
|
|
5561 || (!(sps_flags & SPS_FAST)
|
|
5562 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su)))
|
|
5563 {
|
|
5564 /* Allow a higher score now. */
|
|
5565 su->su_maxscore = SCORE_MAXMAX;
|
346
|
5566 suggest_try_soundalike(su);
|
344
|
5567 }
|
|
5568
|
|
5569 /* When CTRL-C was hit while searching do show the results. */
|
|
5570 ui_breakcheck();
|
|
5571 if (got_int)
|
|
5572 {
|
|
5573 (void)vgetc();
|
|
5574 got_int = FALSE;
|
|
5575 }
|
|
5576
|
|
5577 if (sps_flags & SPS_DOUBLE)
|
|
5578 {
|
|
5579 /* Combine the two list of suggestions. */
|
|
5580 score_combine(su);
|
|
5581 }
|
|
5582 else if (su->su_ga.ga_len != 0)
|
|
5583 {
|
|
5584 if (sps_flags & SPS_BEST)
|
|
5585 /* Adjust the word score for how it sounds like. */
|
|
5586 rescore_suggestions(su);
|
|
5587
|
|
5588 /* Sort the suggestions and truncate at "maxcount". */
|
|
5589 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, maxcount);
|
|
5590 }
|
|
5591 }
|
|
5592
|
|
5593 /*
|
|
5594 * Free the info put in "*su" by spell_find_suggest().
|
|
5595 */
|
|
5596 static void
|
|
5597 spell_find_cleanup(su)
|
|
5598 suginfo_T *su;
|
|
5599 {
|
|
5600 int i;
|
|
5601
|
|
5602 /* Free the suggestions. */
|
|
5603 for (i = 0; i < su->su_ga.ga_len; ++i)
|
|
5604 vim_free(SUG(su->su_ga, i).st_word);
|
|
5605 ga_clear(&su->su_ga);
|
|
5606 for (i = 0; i < su->su_sga.ga_len; ++i)
|
|
5607 vim_free(SUG(su->su_sga, i).st_word);
|
|
5608 ga_clear(&su->su_sga);
|
323
|
5609
|
|
5610 /* Free the banned words. */
|
344
|
5611 free_banned(su);
|
323
|
5612 }
|
|
5613
|
|
5614 /*
|
324
|
5615 * Make a copy of "word", with the first letter upper or lower cased, to
|
|
5616 * "wcopy[MAXWLEN]". "word" must not be empty.
|
|
5617 * The result is NUL terminated.
|
323
|
5618 */
|
|
5619 static void
|
324
|
5620 onecap_copy(word, wcopy, upper)
|
323
|
5621 char_u *word;
|
|
5622 char_u *wcopy;
|
|
5623 int upper; /* TRUE: first letter made upper case */
|
|
5624 {
|
|
5625 char_u *p;
|
|
5626 int c;
|
|
5627 int l;
|
|
5628
|
|
5629 p = word;
|
|
5630 #ifdef FEAT_MBYTE
|
|
5631 if (has_mbyte)
|
|
5632 c = mb_ptr2char_adv(&p);
|
|
5633 else
|
|
5634 #endif
|
|
5635 c = *p++;
|
|
5636 if (upper)
|
324
|
5637 c = SPELL_TOUPPER(c);
|
323
|
5638 else
|
324
|
5639 c = SPELL_TOFOLD(c);
|
323
|
5640 #ifdef FEAT_MBYTE
|
|
5641 if (has_mbyte)
|
|
5642 l = mb_char2bytes(c, wcopy);
|
|
5643 else
|
|
5644 #endif
|
|
5645 {
|
|
5646 l = 1;
|
|
5647 wcopy[0] = c;
|
|
5648 }
|
324
|
5649 vim_strncpy(wcopy + l, p, MAXWLEN - l);
|
323
|
5650 }
|
|
5651
|
|
5652 /*
|
324
|
5653 * Make a copy of "word" with all the letters upper cased into
|
|
5654 * "wcopy[MAXWLEN]". The result is NUL terminated.
|
323
|
5655 */
|
|
5656 static void
|
|
5657 allcap_copy(word, wcopy)
|
|
5658 char_u *word;
|
|
5659 char_u *wcopy;
|
|
5660 {
|
|
5661 char_u *s;
|
|
5662 char_u *d;
|
|
5663 int c;
|
|
5664
|
|
5665 d = wcopy;
|
|
5666 for (s = word; *s != NUL; )
|
|
5667 {
|
|
5668 #ifdef FEAT_MBYTE
|
|
5669 if (has_mbyte)
|
|
5670 c = mb_ptr2char_adv(&s);
|
|
5671 else
|
|
5672 #endif
|
|
5673 c = *s++;
|
324
|
5674 c = SPELL_TOUPPER(c);
|
323
|
5675
|
|
5676 #ifdef FEAT_MBYTE
|
|
5677 if (has_mbyte)
|
|
5678 {
|
|
5679 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
|
|
5680 break;
|
|
5681 d += mb_char2bytes(c, d);
|
|
5682 }
|
|
5683 else
|
|
5684 #endif
|
|
5685 {
|
|
5686 if (d - wcopy >= MAXWLEN - 1)
|
|
5687 break;
|
|
5688 *d++ = c;
|
|
5689 }
|
|
5690 }
|
|
5691 *d = NUL;
|
|
5692 }
|
|
5693
|
|
5694 /*
|
346
|
5695 * Try finding suggestions by recognizing specific situations.
|
|
5696 */
|
|
5697 static void
|
|
5698 suggest_try_special(su)
|
|
5699 suginfo_T *su;
|
|
5700 {
|
|
5701 char_u *p;
|
|
5702 int len;
|
|
5703 int c;
|
|
5704 char_u word[MAXWLEN];
|
|
5705
|
|
5706 /*
|
|
5707 * Recognize a word that is repeated: "the the".
|
|
5708 */
|
|
5709 p = skiptowhite(su->su_fbadword);
|
|
5710 len = p - su->su_fbadword;
|
|
5711 p = skipwhite(p);
|
|
5712 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
|
|
5713 {
|
|
5714 /* Include badflags: if the badword is onecap or allcap
|
|
5715 * use that for the goodword too: "The the" -> "The". */
|
|
5716 c = su->su_fbadword[len];
|
|
5717 su->su_fbadword[len] = NUL;
|
|
5718 make_case_word(su->su_fbadword, word, su->su_badflags);
|
|
5719 su->su_fbadword[len] = c;
|
351
|
5720 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 0, TRUE);
|
346
|
5721 }
|
|
5722 }
|
|
5723
|
|
5724 /*
|
323
|
5725 * Try finding suggestions by adding/removing/swapping letters.
|
330
|
5726 *
|
|
5727 * This uses a state machine. At each node in the tree we try various
|
|
5728 * operations. When trying if an operation work "depth" is increased and the
|
|
5729 * stack[] is used to store info. This allows combinations, thus insert one
|
|
5730 * character, replace one and delete another. The number of changes is
|
|
5731 * limited by su->su_maxscore, checked in try_deeper().
|
323
|
5732 */
|
|
5733 static void
|
346
|
5734 suggest_try_change(su)
|
323
|
5735 suginfo_T *su;
|
|
5736 {
|
|
5737 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
|
|
5738 char_u tword[MAXWLEN]; /* good word collected so far */
|
|
5739 trystate_T stack[MAXWLEN];
|
|
5740 char_u preword[MAXWLEN * 3]; /* word found with proper case (appended
|
|
5741 * to for word split) */
|
|
5742 char_u prewordlen = 0; /* length of word in "preword" */
|
|
5743 int splitoff = 0; /* index in tword after last split */
|
|
5744 trystate_T *sp;
|
|
5745 int newscore;
|
|
5746 langp_T *lp;
|
|
5747 char_u *byts;
|
324
|
5748 idx_T *idxs;
|
323
|
5749 int depth;
|
330
|
5750 int c, c2, c3;
|
|
5751 int n = 0;
|
323
|
5752 int flags;
|
|
5753 garray_T *gap;
|
324
|
5754 idx_T arridx;
|
323
|
5755 int len;
|
|
5756 char_u *p;
|
|
5757 fromto_T *ftp;
|
330
|
5758 int fl = 0, tl;
|
346
|
5759 int repextra = 0; /* extra bytes in fword[] from REP item */
|
323
|
5760
|
|
5761 /* We make a copy of the case-folded bad word, so that we can modify it
|
346
|
5762 * to find matches (esp. REP items). Append some more text, changing
|
|
5763 * chars after the bad word may help. */
|
323
|
5764 STRCPY(fword, su->su_fbadword);
|
346
|
5765 n = STRLEN(fword);
|
|
5766 p = su->su_badptr + su->su_badlen;
|
|
5767 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
|
323
|
5768
|
|
5769 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
5770 lp->lp_slang != NULL; ++lp)
|
|
5771 {
|
|
5772 /*
|
|
5773 * Go through the whole case-fold tree, try changes at each node.
|
|
5774 * "tword[]" contains the word collected from nodes in the tree.
|
|
5775 * "fword[]" the word we are trying to match with (initially the bad
|
|
5776 * word).
|
|
5777 */
|
|
5778 byts = lp->lp_slang->sl_fbyts;
|
|
5779 idxs = lp->lp_slang->sl_fidxs;
|
|
5780
|
|
5781 depth = 0;
|
|
5782 stack[0].ts_state = STATE_START;
|
|
5783 stack[0].ts_score = 0;
|
|
5784 stack[0].ts_curi = 1;
|
|
5785 stack[0].ts_fidx = 0;
|
|
5786 stack[0].ts_fidxtry = 0;
|
|
5787 stack[0].ts_twordlen = 0;
|
|
5788 stack[0].ts_arridx = 0;
|
330
|
5789 #ifdef FEAT_MBYTE
|
|
5790 stack[0].ts_tcharlen = 0;
|
|
5791 #endif
|
|
5792
|
|
5793 /*
|
|
5794 * Loop to find all suggestions. At each round we either:
|
|
5795 * - For the current state try one operation, advance "ts_curi",
|
|
5796 * increase "depth".
|
|
5797 * - When a state is done go to the next, set "ts_state".
|
|
5798 * - When all states are tried decrease "depth".
|
|
5799 */
|
323
|
5800 while (depth >= 0 && !got_int)
|
|
5801 {
|
|
5802 sp = &stack[depth];
|
|
5803 switch (sp->ts_state)
|
|
5804 {
|
|
5805 case STATE_START:
|
|
5806 /*
|
|
5807 * Start of node: Deal with NUL bytes, which means
|
|
5808 * tword[] may end here.
|
|
5809 */
|
|
5810 arridx = sp->ts_arridx; /* current node in the tree */
|
|
5811 len = byts[arridx]; /* bytes in this node */
|
|
5812 arridx += sp->ts_curi; /* index of current byte */
|
|
5813
|
346
|
5814 if (sp->ts_curi > len || byts[arridx] != 0)
|
323
|
5815 {
|
|
5816 /* Past bytes in node and/or past NUL bytes. */
|
|
5817 sp->ts_state = STATE_ENDNUL;
|
|
5818 break;
|
|
5819 }
|
|
5820
|
|
5821 /*
|
|
5822 * End of word in tree.
|
|
5823 */
|
|
5824 ++sp->ts_curi; /* eat one NUL byte */
|
|
5825
|
324
|
5826 flags = (int)idxs[arridx];
|
323
|
5827
|
|
5828 /*
|
|
5829 * Form the word with proper case in preword.
|
|
5830 * If there is a word from a previous split, append.
|
|
5831 */
|
|
5832 tword[sp->ts_twordlen] = NUL;
|
|
5833 if (flags & WF_KEEPCAP)
|
|
5834 /* Must find the word in the keep-case tree. */
|
|
5835 find_keepcap_word(lp->lp_slang, tword + splitoff,
|
|
5836 preword + prewordlen);
|
|
5837 else
|
346
|
5838 {
|
323
|
5839 /* Include badflags: if the badword is onecap or allcap
|
346
|
5840 * use that for the goodword too. But if the badword is
|
|
5841 * allcap and it's only one char long use onecap. */
|
|
5842 c = su->su_badflags;
|
|
5843 if ((c & WF_ALLCAP)
|
|
5844 #ifdef FEAT_MBYTE
|
|
5845 && su->su_badlen == mb_ptr2len_check(su->su_badptr)
|
|
5846 #else
|
|
5847 && su->su_badlen == 1
|
|
5848 #endif
|
|
5849 )
|
|
5850 c = WF_ONECAP;
|
323
|
5851 make_case_word(tword + splitoff,
|
346
|
5852 preword + prewordlen, flags | c);
|
|
5853 }
|
323
|
5854
|
|
5855 /* Don't use a banned word. It may appear again as a good
|
|
5856 * word, thus remember it. */
|
|
5857 if (flags & WF_BANNED)
|
|
5858 {
|
|
5859 add_banned(su, preword + prewordlen);
|
|
5860 break;
|
|
5861 }
|
|
5862 if (was_banned(su, preword + prewordlen))
|
|
5863 break;
|
|
5864
|
|
5865 newscore = 0;
|
|
5866 if ((flags & WF_REGION)
|
|
5867 && (((unsigned)flags >> 8) & lp->lp_region) == 0)
|
|
5868 newscore += SCORE_REGION;
|
|
5869 if (flags & WF_RARE)
|
|
5870 newscore += SCORE_RARE;
|
|
5871
|
346
|
5872 if (!spell_valid_case(su->su_badflags,
|
323
|
5873 captype(preword + prewordlen, NULL)))
|
|
5874 newscore += SCORE_ICASE;
|
|
5875
|
346
|
5876 if ((fword[sp->ts_fidx] == NUL
|
358
|
5877 || !spell_iswordp(fword + sp->ts_fidx))
|
346
|
5878 && sp->ts_fidx >= sp->ts_fidxtry)
|
323
|
5879 {
|
366
|
5880 /* The badword also ends: add suggestions. Give a penalty
|
|
5881 * when changing non-word char to word char, e.g., "thes,"
|
|
5882 * -> "these". */
|
|
5883 p = fword + sp->ts_fidx;
|
|
5884 #ifdef FEAT_MBYTE
|
|
5885 if (has_mbyte)
|
|
5886 mb_ptr_back(fword, p);
|
|
5887 else
|
|
5888 #endif
|
|
5889 --p;
|
|
5890 if (!spell_iswordp(p))
|
|
5891 {
|
|
5892 p = preword + STRLEN(preword);
|
|
5893 #ifdef FEAT_MBYTE
|
|
5894 if (has_mbyte)
|
|
5895 mb_ptr_back(preword, p);
|
|
5896 else
|
|
5897 #endif
|
|
5898 --p;
|
|
5899 if (spell_iswordp(p))
|
|
5900 newscore += SCORE_NONWORD;
|
|
5901 }
|
|
5902
|
344
|
5903 add_suggestion(su, &su->su_ga, preword,
|
346
|
5904 sp->ts_fidx - repextra,
|
351
|
5905 sp->ts_score + newscore, 0, FALSE);
|
323
|
5906 }
|
330
|
5907 else if (sp->ts_fidx >= sp->ts_fidxtry
|
|
5908 #ifdef FEAT_MBYTE
|
|
5909 /* Don't split halfway a character. */
|
|
5910 && (!has_mbyte || sp->ts_tcharlen == 0)
|
|
5911 #endif
|
|
5912 )
|
323
|
5913 {
|
|
5914 /* The word in the tree ends but the badword
|
|
5915 * continues: try inserting a space and check that a valid
|
|
5916 * words starts at fword[sp->ts_fidx]. */
|
|
5917 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT))
|
|
5918 {
|
|
5919 /* Save things to be restored at STATE_SPLITUNDO. */
|
|
5920 sp->ts_save_prewordlen = prewordlen;
|
346
|
5921 sp->ts_save_badflags = su->su_badflags;
|
323
|
5922 sp->ts_save_splitoff = splitoff;
|
|
5923
|
|
5924 /* Append a space to preword. */
|
|
5925 STRCAT(preword, " ");
|
|
5926 prewordlen = STRLEN(preword);
|
|
5927 splitoff = sp->ts_twordlen;
|
324
|
5928 #ifdef FEAT_MBYTE
|
|
5929 if (has_mbyte)
|
|
5930 {
|
|
5931 int i = 0;
|
|
5932
|
|
5933 /* Case-folding may change the number of bytes:
|
|
5934 * Count nr of chars in fword[sp->ts_fidx] and
|
|
5935 * advance that many chars in su->su_badptr. */
|
|
5936 for (p = fword; p < fword + sp->ts_fidx;
|
|
5937 mb_ptr_adv(p))
|
|
5938 ++i;
|
|
5939 for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
|
|
5940 --i;
|
|
5941 }
|
|
5942 else
|
|
5943 #endif
|
|
5944 p = su->su_badptr + sp->ts_fidx;
|
346
|
5945 su->su_badflags = captype(p, su->su_badptr
|
|
5946 + su->su_badlen);
|
323
|
5947
|
|
5948 sp->ts_state = STATE_SPLITUNDO;
|
|
5949 ++depth;
|
|
5950 /* Restart at top of the tree. */
|
|
5951 stack[depth].ts_arridx = 0;
|
|
5952 }
|
|
5953 }
|
|
5954 break;
|
|
5955
|
|
5956 case STATE_SPLITUNDO:
|
346
|
5957 /* Undo the changes done for word split. */
|
|
5958 su->su_badflags = sp->ts_save_badflags;
|
323
|
5959 splitoff = sp->ts_save_splitoff;
|
|
5960 prewordlen = sp->ts_save_prewordlen;
|
|
5961
|
|
5962 /* Continue looking for NUL bytes. */
|
|
5963 sp->ts_state = STATE_START;
|
|
5964 break;
|
|
5965
|
|
5966 case STATE_ENDNUL:
|
|
5967 /* Past the NUL bytes in the node. */
|
346
|
5968 if (fword[sp->ts_fidx] == NUL)
|
323
|
5969 {
|
|
5970 /* The badword ends, can't use the bytes in this node. */
|
|
5971 sp->ts_state = STATE_DEL;
|
|
5972 break;
|
|
5973 }
|
|
5974 sp->ts_state = STATE_PLAIN;
|
|
5975 /*FALLTHROUGH*/
|
|
5976
|
|
5977 case STATE_PLAIN:
|
|
5978 /*
|
|
5979 * Go over all possible bytes at this node, add each to
|
|
5980 * tword[] and use child node. "ts_curi" is the index.
|
|
5981 */
|
|
5982 arridx = sp->ts_arridx;
|
|
5983 if (sp->ts_curi > byts[arridx])
|
|
5984 {
|
|
5985 /* Done all bytes at this node, do next state. When still
|
|
5986 * at already changed bytes skip the other tricks. */
|
|
5987 if (sp->ts_fidx >= sp->ts_fidxtry)
|
|
5988 sp->ts_state = STATE_DEL;
|
|
5989 else
|
|
5990 sp->ts_state = STATE_FINAL;
|
|
5991 }
|
|
5992 else
|
|
5993 {
|
|
5994 arridx += sp->ts_curi++;
|
|
5995 c = byts[arridx];
|
|
5996
|
|
5997 /* Normal byte, go one level deeper. If it's not equal to
|
|
5998 * the byte in the bad word adjust the score. But don't
|
|
5999 * even try when the byte was already changed. */
|
330
|
6000 if (c == fword[sp->ts_fidx]
|
|
6001 #ifdef FEAT_MBYTE
|
|
6002 || (sp->ts_tcharlen > 0
|
|
6003 && sp->ts_isdiff != DIFF_NONE)
|
|
6004 #endif
|
|
6005 )
|
323
|
6006 newscore = 0;
|
|
6007 else
|
|
6008 newscore = SCORE_SUBST;
|
|
6009 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
|
|
6010 && try_deeper(su, stack, depth, newscore))
|
|
6011 {
|
|
6012 ++depth;
|
330
|
6013 sp = &stack[depth];
|
|
6014 ++sp->ts_fidx;
|
|
6015 tword[sp->ts_twordlen++] = c;
|
|
6016 sp->ts_arridx = idxs[arridx];
|
|
6017 #ifdef FEAT_MBYTE
|
|
6018 if (newscore == SCORE_SUBST)
|
|
6019 sp->ts_isdiff = DIFF_YES;
|
|
6020 if (has_mbyte)
|
|
6021 {
|
|
6022 /* Multi-byte characters are a bit complicated to
|
|
6023 * handle: They differ when any of the bytes
|
|
6024 * differ and then their length may also differ. */
|
|
6025 if (sp->ts_tcharlen == 0)
|
|
6026 {
|
|
6027 /* First byte. */
|
|
6028 sp->ts_tcharidx = 0;
|
|
6029 sp->ts_tcharlen = MB_BYTE2LEN(c);
|
|
6030 sp->ts_fcharstart = sp->ts_fidx - 1;
|
|
6031 sp->ts_isdiff = (newscore != 0)
|
|
6032 ? DIFF_YES : DIFF_NONE;
|
|
6033 }
|
|
6034 else if (sp->ts_isdiff == DIFF_INSERT)
|
|
6035 /* When inserting trail bytes don't advance in
|
|
6036 * the bad word. */
|
|
6037 --sp->ts_fidx;
|
|
6038 if (++sp->ts_tcharidx == sp->ts_tcharlen)
|
|
6039 {
|
|
6040 /* Last byte of character. */
|
|
6041 if (sp->ts_isdiff == DIFF_YES)
|
|
6042 {
|
|
6043 /* Correct ts_fidx for the byte length of
|
|
6044 * the character (we didn't check that
|
|
6045 * before). */
|
|
6046 sp->ts_fidx = sp->ts_fcharstart
|
|
6047 + MB_BYTE2LEN(
|
|
6048 fword[sp->ts_fcharstart]);
|
|
6049
|
|
6050 /* For a similar character adjust score
|
|
6051 * from SCORE_SUBST to SCORE_SIMILAR. */
|
|
6052 if (lp->lp_slang->sl_has_map
|
|
6053 && similar_chars(lp->lp_slang,
|
|
6054 mb_ptr2char(tword
|
|
6055 + sp->ts_twordlen
|
|
6056 - sp->ts_tcharlen),
|
|
6057 mb_ptr2char(fword
|
|
6058 + sp->ts_fcharstart)))
|
|
6059 sp->ts_score -=
|
|
6060 SCORE_SUBST - SCORE_SIMILAR;
|
|
6061 }
|
358
|
6062 else if (sp->ts_isdiff == DIFF_INSERT
|
|
6063 && sp->ts_twordlen > sp->ts_tcharlen)
|
|
6064 {
|
|
6065 /* If the previous character was the same,
|
|
6066 * thus doubling a character, give a bonus
|
|
6067 * to the score. */
|
|
6068 p = tword + sp->ts_twordlen
|
|
6069 - sp->ts_tcharlen;
|
|
6070 c = mb_ptr2char(p);
|
|
6071 mb_ptr_back(tword, p);
|
|
6072 if (c == mb_ptr2char(p))
|
|
6073 sp->ts_score -= SCORE_INS
|
|
6074 - SCORE_INSDUP;
|
|
6075 }
|
330
|
6076
|
|
6077 /* Starting a new char, reset the length. */
|
|
6078 sp->ts_tcharlen = 0;
|
|
6079 }
|
|
6080 }
|
|
6081 else
|
|
6082 #endif
|
|
6083 {
|
|
6084 /* If we found a similar char adjust the score.
|
|
6085 * We do this after calling try_deeper() because
|
|
6086 * it's slow. */
|
|
6087 if (newscore != 0
|
|
6088 && lp->lp_slang->sl_has_map
|
|
6089 && similar_chars(lp->lp_slang,
|
|
6090 c, fword[sp->ts_fidx - 1]))
|
|
6091 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
|
|
6092 }
|
323
|
6093 }
|
|
6094 }
|
|
6095 break;
|
|
6096
|
|
6097 case STATE_DEL:
|
330
|
6098 #ifdef FEAT_MBYTE
|
|
6099 /* When past the first byte of a multi-byte char don't try
|
|
6100 * delete/insert/swap a character. */
|
|
6101 if (has_mbyte && sp->ts_tcharlen > 0)
|
|
6102 {
|
|
6103 sp->ts_state = STATE_FINAL;
|
|
6104 break;
|
|
6105 }
|
|
6106 #endif
|
|
6107 /*
|
|
6108 * Try skipping one character in the bad word (delete it).
|
|
6109 */
|
323
|
6110 sp->ts_state = STATE_INS;
|
|
6111 sp->ts_curi = 1;
|
|
6112 if (fword[sp->ts_fidx] != NUL
|
|
6113 && try_deeper(su, stack, depth, SCORE_DEL))
|
|
6114 {
|
|
6115 ++depth;
|
358
|
6116
|
|
6117 /* Advance over the character in fword[]. Give a bonus to
|
|
6118 * the score if the same character is following "nn" ->
|
|
6119 * "n". */
|
330
|
6120 #ifdef FEAT_MBYTE
|
|
6121 if (has_mbyte)
|
358
|
6122 {
|
|
6123 c = mb_ptr2char(fword + sp->ts_fidx);
|
330
|
6124 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
|
358
|
6125 if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
|
|
6126 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
|
|
6127 }
|
330
|
6128 else
|
|
6129 #endif
|
358
|
6130 {
|
330
|
6131 ++stack[depth].ts_fidx;
|
358
|
6132 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
|
|
6133 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
|
|
6134 }
|
323
|
6135 break;
|
|
6136 }
|
|
6137 /*FALLTHROUGH*/
|
|
6138
|
|
6139 case STATE_INS:
|
330
|
6140 /* Insert one byte. Do this for each possible byte at this
|
323
|
6141 * node. */
|
|
6142 n = sp->ts_arridx;
|
|
6143 if (sp->ts_curi > byts[n])
|
|
6144 {
|
|
6145 /* Done all bytes at this node, do next state. */
|
|
6146 sp->ts_state = STATE_SWAP;
|
|
6147 }
|
|
6148 else
|
|
6149 {
|
330
|
6150 /* Do one more byte at this node. Skip NUL bytes. */
|
323
|
6151 n += sp->ts_curi++;
|
|
6152 c = byts[n];
|
|
6153 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
|
|
6154 {
|
|
6155 ++depth;
|
330
|
6156 sp = &stack[depth];
|
|
6157 tword[sp->ts_twordlen++] = c;
|
|
6158 sp->ts_arridx = idxs[n];
|
|
6159 #ifdef FEAT_MBYTE
|
|
6160 if (has_mbyte)
|
|
6161 {
|
|
6162 fl = MB_BYTE2LEN(c);
|
|
6163 if (fl > 1)
|
|
6164 {
|
|
6165 /* There are following bytes for the same
|
|
6166 * character. We must find all bytes before
|
|
6167 * trying delete/insert/swap/etc. */
|
|
6168 sp->ts_tcharlen = fl;
|
|
6169 sp->ts_tcharidx = 1;
|
|
6170 sp->ts_isdiff = DIFF_INSERT;
|
|
6171 }
|
|
6172 }
|
358
|
6173 else
|
|
6174 fl = 1;
|
|
6175 if (fl == 1)
|
330
|
6176 #endif
|
358
|
6177 {
|
|
6178 /* If the previous character was the same, thus
|
|
6179 * doubling a character, give a bonus to the
|
|
6180 * score. */
|
|
6181 if (sp->ts_twordlen >= 2
|
|
6182 && tword[sp->ts_twordlen - 2] == c)
|
|
6183 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
|
|
6184 }
|
323
|
6185 }
|
|
6186 }
|
|
6187 break;
|
|
6188
|
|
6189 case STATE_SWAP:
|
330
|
6190 /*
|
|
6191 * Swap two bytes in the bad word: "12" -> "21".
|
|
6192 * We change "fword" here, it's changed back afterwards.
|
|
6193 */
|
|
6194 p = fword + sp->ts_fidx;
|
|
6195 c = *p;
|
|
6196 if (c == NUL)
|
|
6197 {
|
|
6198 /* End of word, can't swap or replace. */
|
|
6199 sp->ts_state = STATE_FINAL;
|
|
6200 break;
|
|
6201 }
|
|
6202 #ifdef FEAT_MBYTE
|
|
6203 if (has_mbyte)
|
|
6204 {
|
|
6205 n = mb_ptr2len_check(p);
|
|
6206 c = mb_ptr2char(p);
|
|
6207 c2 = mb_ptr2char(p + n);
|
|
6208 }
|
|
6209 else
|
|
6210 #endif
|
|
6211 c2 = p[1];
|
|
6212 if (c == c2)
|
323
|
6213 {
|
330
|
6214 /* Characters are identical, swap won't do anything. */
|
|
6215 sp->ts_state = STATE_SWAP3;
|
|
6216 break;
|
|
6217 }
|
|
6218 if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP))
|
|
6219 {
|
|
6220 sp->ts_state = STATE_UNSWAP;
|
323
|
6221 ++depth;
|
330
|
6222 #ifdef FEAT_MBYTE
|
|
6223 if (has_mbyte)
|
|
6224 {
|
|
6225 fl = mb_char2len(c2);
|
|
6226 mch_memmove(p, p + n, fl);
|
|
6227 mb_char2bytes(c, p + fl);
|
|
6228 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
|
|
6229 }
|
|
6230 else
|
|
6231 #endif
|
|
6232 {
|
|
6233 p[0] = c2;
|
|
6234 p[1] = c;
|
|
6235 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
|
|
6236 }
|
323
|
6237 }
|
|
6238 else
|
|
6239 /* If this swap doesn't work then SWAP3 won't either. */
|
|
6240 sp->ts_state = STATE_REP_INI;
|
|
6241 break;
|
|
6242
|
330
|
6243 case STATE_UNSWAP:
|
|
6244 /* Undo the STATE_SWAP swap: "21" -> "12". */
|
|
6245 p = fword + sp->ts_fidx;
|
|
6246 #ifdef FEAT_MBYTE
|
|
6247 if (has_mbyte)
|
|
6248 {
|
|
6249 n = MB_BYTE2LEN(*p);
|
|
6250 c = mb_ptr2char(p + n);
|
|
6251 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
|
|
6252 mb_char2bytes(c, p);
|
|
6253 }
|
|
6254 else
|
|
6255 #endif
|
|
6256 {
|
|
6257 c = *p;
|
|
6258 *p = p[1];
|
|
6259 p[1] = c;
|
|
6260 }
|
|
6261 /*FALLTHROUGH*/
|
|
6262
|
|
6263 case STATE_SWAP3:
|
323
|
6264 /* Swap two bytes, skipping one: "123" -> "321". We change
|
330
|
6265 * "fword" here, it's changed back afterwards. */
|
|
6266 p = fword + sp->ts_fidx;
|
|
6267 #ifdef FEAT_MBYTE
|
|
6268 if (has_mbyte)
|
|
6269 {
|
|
6270 n = mb_ptr2len_check(p);
|
|
6271 c = mb_ptr2char(p);
|
|
6272 fl = mb_ptr2len_check(p + n);
|
|
6273 c2 = mb_ptr2char(p + n);
|
|
6274 c3 = mb_ptr2char(p + n + fl);
|
|
6275 }
|
|
6276 else
|
|
6277 #endif
|
323
|
6278 {
|
330
|
6279 c = *p;
|
|
6280 c2 = p[1];
|
|
6281 c3 = p[2];
|
|
6282 }
|
|
6283
|
|
6284 /* When characters are identical: "121" then SWAP3 result is
|
|
6285 * identical, ROT3L result is same as SWAP: "211", ROT3L
|
|
6286 * result is same as SWAP on next char: "112". Thus skip all
|
|
6287 * swapping. Also skip when c3 is NUL. */
|
|
6288 if (c == c3 || c3 == NUL)
|
|
6289 {
|
|
6290 sp->ts_state = STATE_REP_INI;
|
|
6291 break;
|
|
6292 }
|
|
6293 if (try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
6294 {
|
|
6295 sp->ts_state = STATE_UNSWAP3;
|
323
|
6296 ++depth;
|
330
|
6297 #ifdef FEAT_MBYTE
|
|
6298 if (has_mbyte)
|
|
6299 {
|
|
6300 tl = mb_char2len(c3);
|
|
6301 mch_memmove(p, p + n + fl, tl);
|
|
6302 mb_char2bytes(c2, p + tl);
|
|
6303 mb_char2bytes(c, p + fl + tl);
|
|
6304 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
|
|
6305 }
|
|
6306 else
|
|
6307 #endif
|
|
6308 {
|
|
6309 p[0] = p[2];
|
|
6310 p[2] = c;
|
|
6311 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
6312 }
|
323
|
6313 }
|
|
6314 else
|
|
6315 sp->ts_state = STATE_REP_INI;
|
|
6316 break;
|
|
6317
|
330
|
6318 case STATE_UNSWAP3:
|
|
6319 /* Undo STATE_SWAP3: "321" -> "123" */
|
|
6320 p = fword + sp->ts_fidx;
|
|
6321 #ifdef FEAT_MBYTE
|
|
6322 if (has_mbyte)
|
|
6323 {
|
|
6324 n = MB_BYTE2LEN(*p);
|
|
6325 c2 = mb_ptr2char(p + n);
|
|
6326 fl = MB_BYTE2LEN(p[n]);
|
|
6327 c = mb_ptr2char(p + n + fl);
|
|
6328 tl = MB_BYTE2LEN(p[n + fl]);
|
|
6329 mch_memmove(p + fl + tl, p, n);
|
|
6330 mb_char2bytes(c, p);
|
|
6331 mb_char2bytes(c2, p + tl);
|
|
6332 }
|
|
6333 else
|
|
6334 #endif
|
|
6335 {
|
|
6336 c = *p;
|
|
6337 *p = p[2];
|
|
6338 p[2] = c;
|
|
6339 }
|
346
|
6340
|
330
|
6341 /* Rotate three characters left: "123" -> "231". We change
|
|
6342 * "fword" here, it's changed back afterwards. */
|
323
|
6343 if (try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
6344 {
|
330
|
6345 sp->ts_state = STATE_UNROT3L;
|
323
|
6346 ++depth;
|
330
|
6347 p = fword + sp->ts_fidx;
|
|
6348 #ifdef FEAT_MBYTE
|
|
6349 if (has_mbyte)
|
|
6350 {
|
|
6351 n = mb_ptr2len_check(p);
|
|
6352 c = mb_ptr2char(p);
|
|
6353 fl = mb_ptr2len_check(p + n);
|
|
6354 fl += mb_ptr2len_check(p + n + fl);
|
|
6355 mch_memmove(p, p + n, fl);
|
|
6356 mb_char2bytes(c, p + fl);
|
|
6357 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
|
|
6358 }
|
|
6359 else
|
|
6360 #endif
|
|
6361 {
|
|
6362 c = *p;
|
|
6363 *p = p[1];
|
|
6364 p[1] = p[2];
|
|
6365 p[2] = c;
|
|
6366 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
6367 }
|
323
|
6368 }
|
|
6369 else
|
|
6370 sp->ts_state = STATE_REP_INI;
|
|
6371 break;
|
|
6372
|
330
|
6373 case STATE_UNROT3L:
|
346
|
6374 /* Undo ROT3L: "231" -> "123" */
|
330
|
6375 p = fword + sp->ts_fidx;
|
|
6376 #ifdef FEAT_MBYTE
|
|
6377 if (has_mbyte)
|
|
6378 {
|
|
6379 n = MB_BYTE2LEN(*p);
|
|
6380 n += MB_BYTE2LEN(p[n]);
|
|
6381 c = mb_ptr2char(p + n);
|
|
6382 tl = MB_BYTE2LEN(p[n]);
|
|
6383 mch_memmove(p + tl, p, n);
|
|
6384 mb_char2bytes(c, p);
|
|
6385 }
|
|
6386 else
|
|
6387 #endif
|
|
6388 {
|
|
6389 c = p[2];
|
|
6390 p[2] = p[1];
|
|
6391 p[1] = *p;
|
|
6392 *p = c;
|
|
6393 }
|
346
|
6394
|
323
|
6395 /* Rotate three bytes right: "123" -> "312". We change
|
330
|
6396 * "fword" here, it's changed back afterwards. */
|
323
|
6397 if (try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
6398 {
|
330
|
6399 sp->ts_state = STATE_UNROT3R;
|
323
|
6400 ++depth;
|
330
|
6401 p = fword + sp->ts_fidx;
|
|
6402 #ifdef FEAT_MBYTE
|
|
6403 if (has_mbyte)
|
|
6404 {
|
|
6405 n = mb_ptr2len_check(p);
|
|
6406 n += mb_ptr2len_check(p + n);
|
|
6407 c = mb_ptr2char(p + n);
|
|
6408 tl = mb_ptr2len_check(p + n);
|
|
6409 mch_memmove(p + tl, p, n);
|
|
6410 mb_char2bytes(c, p);
|
|
6411 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
|
|
6412 }
|
|
6413 else
|
|
6414 #endif
|
|
6415 {
|
|
6416 c = p[2];
|
|
6417 p[2] = p[1];
|
|
6418 p[1] = *p;
|
|
6419 *p = c;
|
|
6420 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
6421 }
|
323
|
6422 }
|
|
6423 else
|
|
6424 sp->ts_state = STATE_REP_INI;
|
|
6425 break;
|
|
6426
|
330
|
6427 case STATE_UNROT3R:
|
346
|
6428 /* Undo ROT3R: "312" -> "123" */
|
330
|
6429 p = fword + sp->ts_fidx;
|
|
6430 #ifdef FEAT_MBYTE
|
|
6431 if (has_mbyte)
|
|
6432 {
|
|
6433 c = mb_ptr2char(p);
|
|
6434 tl = MB_BYTE2LEN(*p);
|
|
6435 n = MB_BYTE2LEN(p[tl]);
|
|
6436 n += MB_BYTE2LEN(p[tl + n]);
|
|
6437 mch_memmove(p, p + tl, n);
|
|
6438 mb_char2bytes(c, p + n);
|
|
6439 }
|
|
6440 else
|
|
6441 #endif
|
|
6442 {
|
|
6443 c = *p;
|
|
6444 *p = p[1];
|
|
6445 p[1] = p[2];
|
|
6446 p[2] = c;
|
|
6447 }
|
323
|
6448 /*FALLTHROUGH*/
|
|
6449
|
|
6450 case STATE_REP_INI:
|
|
6451 /* Check if matching with REP items from the .aff file would
|
|
6452 * work. Quickly skip if there are no REP items or the score
|
|
6453 * is going to be too high anyway. */
|
|
6454 gap = &lp->lp_slang->sl_rep;
|
|
6455 if (gap->ga_len == 0
|
|
6456 || sp->ts_score + SCORE_REP >= su->su_maxscore)
|
|
6457 {
|
|
6458 sp->ts_state = STATE_FINAL;
|
|
6459 break;
|
|
6460 }
|
|
6461
|
|
6462 /* Use the first byte to quickly find the first entry that
|
330
|
6463 * may match. If the index is -1 there is none. */
|
323
|
6464 sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
|
|
6465 if (sp->ts_curi < 0)
|
|
6466 {
|
|
6467 sp->ts_state = STATE_FINAL;
|
|
6468 break;
|
|
6469 }
|
|
6470
|
|
6471 sp->ts_state = STATE_REP;
|
|
6472 /*FALLTHROUGH*/
|
|
6473
|
|
6474 case STATE_REP:
|
|
6475 /* Try matching with REP items from the .aff file. For each
|
330
|
6476 * match replace the characters and check if the resulting
|
|
6477 * word is valid. */
|
323
|
6478 p = fword + sp->ts_fidx;
|
|
6479
|
|
6480 gap = &lp->lp_slang->sl_rep;
|
|
6481 while (sp->ts_curi < gap->ga_len)
|
|
6482 {
|
|
6483 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
|
|
6484 if (*ftp->ft_from != *p)
|
|
6485 {
|
|
6486 /* past possible matching entries */
|
|
6487 sp->ts_curi = gap->ga_len;
|
|
6488 break;
|
|
6489 }
|
|
6490 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
|
|
6491 && try_deeper(su, stack, depth, SCORE_REP))
|
|
6492 {
|
|
6493 /* Need to undo this afterwards. */
|
|
6494 sp->ts_state = STATE_REP_UNDO;
|
|
6495
|
|
6496 /* Change the "from" to the "to" string. */
|
|
6497 ++depth;
|
|
6498 fl = STRLEN(ftp->ft_from);
|
|
6499 tl = STRLEN(ftp->ft_to);
|
|
6500 if (fl != tl)
|
346
|
6501 {
|
323
|
6502 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
|
346
|
6503 repextra += tl - fl;
|
|
6504 }
|
323
|
6505 mch_memmove(p, ftp->ft_to, tl);
|
|
6506 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
|
330
|
6507 #ifdef FEAT_MBYTE
|
|
6508 stack[depth].ts_tcharlen = 0;
|
|
6509 #endif
|
323
|
6510 break;
|
|
6511 }
|
|
6512 }
|
|
6513
|
|
6514 if (sp->ts_curi >= gap->ga_len)
|
|
6515 /* No (more) matches. */
|
|
6516 sp->ts_state = STATE_FINAL;
|
|
6517
|
|
6518 break;
|
|
6519
|
|
6520 case STATE_REP_UNDO:
|
|
6521 /* Undo a REP replacement and continue with the next one. */
|
|
6522 ftp = (fromto_T *)lp->lp_slang->sl_rep.ga_data
|
|
6523 + sp->ts_curi - 1;
|
|
6524 fl = STRLEN(ftp->ft_from);
|
|
6525 tl = STRLEN(ftp->ft_to);
|
|
6526 p = fword + sp->ts_fidx;
|
|
6527 if (fl != tl)
|
346
|
6528 {
|
323
|
6529 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
|
346
|
6530 repextra -= tl - fl;
|
|
6531 }
|
323
|
6532 mch_memmove(p, ftp->ft_from, fl);
|
|
6533 sp->ts_state = STATE_REP;
|
|
6534 break;
|
|
6535
|
|
6536 default:
|
|
6537 /* Did all possible states at this level, go up one level. */
|
|
6538 --depth;
|
344
|
6539
|
|
6540 /* Don't check for CTRL-C too often, it takes time. */
|
|
6541 line_breakcheck();
|
323
|
6542 }
|
|
6543 }
|
|
6544 }
|
|
6545 }
|
|
6546
|
|
6547 /*
|
|
6548 * Try going one level deeper in the tree.
|
|
6549 */
|
|
6550 static int
|
|
6551 try_deeper(su, stack, depth, score_add)
|
|
6552 suginfo_T *su;
|
|
6553 trystate_T *stack;
|
|
6554 int depth;
|
|
6555 int score_add;
|
|
6556 {
|
|
6557 int newscore;
|
|
6558
|
|
6559 /* Refuse to go deeper if the scrore is getting too big. */
|
|
6560 newscore = stack[depth].ts_score + score_add;
|
|
6561 if (newscore >= su->su_maxscore)
|
|
6562 return FALSE;
|
|
6563
|
330
|
6564 stack[depth + 1] = stack[depth];
|
323
|
6565 stack[depth + 1].ts_state = STATE_START;
|
|
6566 stack[depth + 1].ts_score = newscore;
|
|
6567 stack[depth + 1].ts_curi = 1; /* start just after length byte */
|
|
6568 return TRUE;
|
|
6569 }
|
|
6570
|
|
6571 /*
|
|
6572 * "fword" is a good word with case folded. Find the matching keep-case
|
|
6573 * words and put it in "kword".
|
|
6574 * Theoretically there could be several keep-case words that result in the
|
|
6575 * same case-folded word, but we only find one...
|
|
6576 */
|
|
6577 static void
|
|
6578 find_keepcap_word(slang, fword, kword)
|
|
6579 slang_T *slang;
|
|
6580 char_u *fword;
|
|
6581 char_u *kword;
|
|
6582 {
|
|
6583 char_u uword[MAXWLEN]; /* "fword" in upper-case */
|
|
6584 int depth;
|
324
|
6585 idx_T tryidx;
|
323
|
6586
|
|
6587 /* The following arrays are used at each depth in the tree. */
|
324
|
6588 idx_T arridx[MAXWLEN];
|
323
|
6589 int round[MAXWLEN];
|
|
6590 int fwordidx[MAXWLEN];
|
|
6591 int uwordidx[MAXWLEN];
|
|
6592 int kwordlen[MAXWLEN];
|
|
6593
|
|
6594 int flen, ulen;
|
|
6595 int l;
|
|
6596 int len;
|
|
6597 int c;
|
324
|
6598 idx_T lo, hi, m;
|
323
|
6599 char_u *p;
|
|
6600 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
|
324
|
6601 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
|
323
|
6602
|
|
6603 if (byts == NULL)
|
|
6604 {
|
|
6605 /* array is empty: "cannot happen" */
|
|
6606 *kword = NUL;
|
|
6607 return;
|
|
6608 }
|
|
6609
|
|
6610 /* Make an all-cap version of "fword". */
|
|
6611 allcap_copy(fword, uword);
|
|
6612
|
|
6613 /*
|
|
6614 * Each character needs to be tried both case-folded and upper-case.
|
|
6615 * All this gets very complicated if we keep in mind that changing case
|
|
6616 * may change the byte length of a multi-byte character...
|
|
6617 */
|
|
6618 depth = 0;
|
|
6619 arridx[0] = 0;
|
|
6620 round[0] = 0;
|
|
6621 fwordidx[0] = 0;
|
|
6622 uwordidx[0] = 0;
|
|
6623 kwordlen[0] = 0;
|
|
6624 while (depth >= 0)
|
|
6625 {
|
|
6626 if (fword[fwordidx[depth]] == NUL)
|
|
6627 {
|
|
6628 /* We are at the end of "fword". If the tree allows a word to end
|
|
6629 * here we have found a match. */
|
|
6630 if (byts[arridx[depth] + 1] == 0)
|
|
6631 {
|
|
6632 kword[kwordlen[depth]] = NUL;
|
|
6633 return;
|
|
6634 }
|
|
6635
|
|
6636 /* kword is getting too long, continue one level up */
|
|
6637 --depth;
|
|
6638 }
|
|
6639 else if (++round[depth] > 2)
|
|
6640 {
|
|
6641 /* tried both fold-case and upper-case character, continue one
|
|
6642 * level up */
|
|
6643 --depth;
|
|
6644 }
|
|
6645 else
|
|
6646 {
|
|
6647 /*
|
|
6648 * round[depth] == 1: Try using the folded-case character.
|
|
6649 * round[depth] == 2: Try using the upper-case character.
|
|
6650 */
|
|
6651 #ifdef FEAT_MBYTE
|
|
6652 if (has_mbyte)
|
|
6653 {
|
|
6654 flen = mb_ptr2len_check(fword + fwordidx[depth]);
|
|
6655 ulen = mb_ptr2len_check(uword + uwordidx[depth]);
|
|
6656 }
|
|
6657 else
|
|
6658 #endif
|
|
6659 ulen = flen = 1;
|
|
6660 if (round[depth] == 1)
|
|
6661 {
|
|
6662 p = fword + fwordidx[depth];
|
|
6663 l = flen;
|
|
6664 }
|
|
6665 else
|
|
6666 {
|
|
6667 p = uword + uwordidx[depth];
|
|
6668 l = ulen;
|
|
6669 }
|
|
6670
|
|
6671 for (tryidx = arridx[depth]; l > 0; --l)
|
|
6672 {
|
|
6673 /* Perform a binary search in the list of accepted bytes. */
|
|
6674 len = byts[tryidx++];
|
|
6675 c = *p++;
|
|
6676 lo = tryidx;
|
|
6677 hi = tryidx + len - 1;
|
|
6678 while (lo < hi)
|
|
6679 {
|
|
6680 m = (lo + hi) / 2;
|
|
6681 if (byts[m] > c)
|
|
6682 hi = m - 1;
|
|
6683 else if (byts[m] < c)
|
|
6684 lo = m + 1;
|
|
6685 else
|
|
6686 {
|
|
6687 lo = hi = m;
|
|
6688 break;
|
|
6689 }
|
|
6690 }
|
|
6691
|
|
6692 /* Stop if there is no matching byte. */
|
|
6693 if (hi < lo || byts[lo] != c)
|
|
6694 break;
|
|
6695
|
|
6696 /* Continue at the child (if there is one). */
|
|
6697 tryidx = idxs[lo];
|
|
6698 }
|
|
6699
|
|
6700 if (l == 0)
|
|
6701 {
|
|
6702 /*
|
|
6703 * Found the matching char. Copy it to "kword" and go a
|
|
6704 * level deeper.
|
|
6705 */
|
|
6706 if (round[depth] == 1)
|
|
6707 {
|
|
6708 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
|
|
6709 flen);
|
|
6710 kwordlen[depth + 1] = kwordlen[depth] + flen;
|
|
6711 }
|
|
6712 else
|
|
6713 {
|
|
6714 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
|
|
6715 ulen);
|
|
6716 kwordlen[depth + 1] = kwordlen[depth] + ulen;
|
|
6717 }
|
|
6718 fwordidx[depth + 1] = fwordidx[depth] + flen;
|
|
6719 uwordidx[depth + 1] = uwordidx[depth] + ulen;
|
|
6720
|
|
6721 ++depth;
|
|
6722 arridx[depth] = tryidx;
|
|
6723 round[depth] = 0;
|
|
6724 }
|
|
6725 }
|
|
6726 }
|
|
6727
|
|
6728 /* Didn't find it: "cannot happen". */
|
|
6729 *kword = NUL;
|
|
6730 }
|
|
6731
|
|
6732 /*
|
344
|
6733 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
|
|
6734 * su->su_sga.
|
|
6735 */
|
|
6736 static void
|
|
6737 score_comp_sal(su)
|
|
6738 suginfo_T *su;
|
|
6739 {
|
|
6740 langp_T *lp;
|
|
6741 char_u badsound[MAXWLEN];
|
|
6742 int i;
|
|
6743 suggest_T *stp;
|
|
6744 suggest_T *sstp;
|
|
6745 int score;
|
|
6746
|
|
6747 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
|
|
6748 return;
|
|
6749
|
|
6750 /* Use the sound-folding of the first language that supports it. */
|
|
6751 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
6752 lp->lp_slang != NULL; ++lp)
|
|
6753 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
6754 {
|
|
6755 /* soundfold the bad word */
|
|
6756 spell_soundfold(lp->lp_slang, su->su_fbadword, badsound);
|
|
6757
|
|
6758 for (i = 0; i < su->su_ga.ga_len; ++i)
|
|
6759 {
|
|
6760 stp = &SUG(su->su_ga, i);
|
|
6761
|
351
|
6762 /* Case-fold the suggested word, sound-fold it and compute the
|
|
6763 * sound-a-like score. */
|
|
6764 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
|
344
|
6765 if (score < SCORE_MAXMAX)
|
|
6766 {
|
|
6767 /* Add the suggestion. */
|
|
6768 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
|
|
6769 sstp->st_word = vim_strsave(stp->st_word);
|
|
6770 if (sstp->st_word != NULL)
|
|
6771 {
|
|
6772 sstp->st_score = score;
|
|
6773 sstp->st_altscore = 0;
|
|
6774 sstp->st_orglen = stp->st_orglen;
|
|
6775 ++su->su_sga.ga_len;
|
|
6776 }
|
|
6777 }
|
|
6778 }
|
|
6779 break;
|
|
6780 }
|
|
6781 }
|
|
6782
|
|
6783 /*
|
|
6784 * Combine the list of suggestions in su->su_ga and su->su_sga.
|
|
6785 * They are intwined.
|
|
6786 */
|
|
6787 static void
|
|
6788 score_combine(su)
|
|
6789 suginfo_T *su;
|
|
6790 {
|
|
6791 int i;
|
|
6792 int j;
|
|
6793 garray_T ga;
|
|
6794 garray_T *gap;
|
|
6795 langp_T *lp;
|
|
6796 suggest_T *stp;
|
|
6797 char_u *p;
|
|
6798 char_u badsound[MAXWLEN];
|
|
6799 int round;
|
|
6800
|
|
6801 /* Add the alternate score to su_ga. */
|
|
6802 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
6803 lp->lp_slang != NULL; ++lp)
|
|
6804 {
|
|
6805 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
6806 {
|
|
6807 /* soundfold the bad word */
|
|
6808 spell_soundfold(lp->lp_slang, su->su_fbadword, badsound);
|
|
6809
|
|
6810 for (i = 0; i < su->su_ga.ga_len; ++i)
|
|
6811 {
|
|
6812 stp = &SUG(su->su_ga, i);
|
351
|
6813 stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang,
|
|
6814 badsound);
|
344
|
6815 if (stp->st_altscore == SCORE_MAXMAX)
|
|
6816 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
|
|
6817 else
|
|
6818 stp->st_score = (stp->st_score * 3
|
|
6819 + stp->st_altscore) / 4;
|
|
6820 stp->st_salscore = FALSE;
|
|
6821 }
|
|
6822 break;
|
|
6823 }
|
|
6824 }
|
|
6825
|
|
6826 /* Add the alternate score to su_sga. */
|
|
6827 for (i = 0; i < su->su_sga.ga_len; ++i)
|
|
6828 {
|
|
6829 stp = &SUG(su->su_sga, i);
|
|
6830 stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word);
|
|
6831 if (stp->st_score == SCORE_MAXMAX)
|
|
6832 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
|
|
6833 else
|
|
6834 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
|
|
6835 stp->st_salscore = TRUE;
|
|
6836 }
|
|
6837
|
|
6838 /* Sort the suggestions and truncate at "maxcount" for both lists. */
|
|
6839 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
|
|
6840 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
|
|
6841
|
|
6842 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
|
|
6843 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
|
|
6844 return;
|
|
6845
|
|
6846 stp = &SUG(ga, 0);
|
|
6847 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
|
|
6848 {
|
|
6849 /* round 1: get a suggestion from su_ga
|
|
6850 * round 2: get a suggestion from su_sga */
|
|
6851 for (round = 1; round <= 2; ++round)
|
|
6852 {
|
|
6853 gap = round == 1 ? &su->su_ga : &su->su_sga;
|
|
6854 if (i < gap->ga_len)
|
|
6855 {
|
|
6856 /* Don't add a word if it's already there. */
|
|
6857 p = SUG(*gap, i).st_word;
|
|
6858 for (j = 0; j < ga.ga_len; ++j)
|
|
6859 if (STRCMP(stp[j].st_word, p) == 0)
|
|
6860 break;
|
|
6861 if (j == ga.ga_len)
|
|
6862 stp[ga.ga_len++] = SUG(*gap, i);
|
|
6863 else
|
|
6864 vim_free(p);
|
|
6865 }
|
|
6866 }
|
|
6867 }
|
|
6868
|
|
6869 ga_clear(&su->su_ga);
|
|
6870 ga_clear(&su->su_sga);
|
|
6871
|
|
6872 /* Truncate the list to the number of suggestions that will be displayed. */
|
|
6873 if (ga.ga_len > su->su_maxcount)
|
|
6874 {
|
|
6875 for (i = su->su_maxcount; i < ga.ga_len; ++i)
|
|
6876 vim_free(stp[i].st_word);
|
|
6877 ga.ga_len = su->su_maxcount;
|
|
6878 }
|
|
6879
|
|
6880 su->su_ga = ga;
|
|
6881 }
|
|
6882
|
|
6883 /*
|
351
|
6884 * For the goodword in "stp" compute the soundalike score compared to the
|
|
6885 * badword.
|
|
6886 */
|
|
6887 static int
|
|
6888 stp_sal_score(stp, su, slang, badsound)
|
|
6889 suggest_T *stp;
|
|
6890 suginfo_T *su;
|
|
6891 slang_T *slang;
|
|
6892 char_u *badsound; /* sound-folded badword */
|
|
6893 {
|
|
6894 char_u *p;
|
|
6895 char_u badsound2[MAXWLEN];
|
|
6896 char_u fword[MAXWLEN];
|
|
6897 char_u goodsound[MAXWLEN];
|
|
6898
|
|
6899 if (stp->st_orglen <= su->su_badlen)
|
|
6900 p = badsound;
|
|
6901 else
|
|
6902 {
|
|
6903 /* soundfold the bad word with more characters following */
|
|
6904 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
|
|
6905
|
|
6906 /* When joining two words the sound often changes a lot. E.g., "t he"
|
|
6907 * sounds like "t h" while "the" sounds like "@". Avoid that by
|
|
6908 * removing the space. Don't do it when the good word also contains a
|
|
6909 * space. */
|
|
6910 if (vim_iswhite(su->su_badptr[su->su_badlen])
|
|
6911 && *skiptowhite(stp->st_word) == NUL)
|
|
6912 for (p = fword; *(p = skiptowhite(p)) != NUL; )
|
|
6913 mch_memmove(p, p + 1, STRLEN(p));
|
|
6914
|
|
6915 spell_soundfold(slang, fword, badsound2);
|
|
6916 p = badsound2;
|
|
6917 }
|
|
6918
|
|
6919 /* Case-fold the word, sound-fold the word and compute the score for the
|
|
6920 * difference. */
|
|
6921 (void)spell_casefold(stp->st_word, STRLEN(stp->st_word), fword, MAXWLEN);
|
|
6922 spell_soundfold(slang, fword, goodsound);
|
|
6923
|
|
6924 return soundalike_score(goodsound, p);
|
|
6925 }
|
|
6926
|
|
6927 /*
|
323
|
6928 * Find suggestions by comparing the word in a sound-a-like form.
|
|
6929 */
|
|
6930 static void
|
346
|
6931 suggest_try_soundalike(su)
|
323
|
6932 suginfo_T *su;
|
|
6933 {
|
|
6934 char_u salword[MAXWLEN];
|
|
6935 char_u tword[MAXWLEN];
|
|
6936 char_u tfword[MAXWLEN];
|
|
6937 char_u tsalword[MAXWLEN];
|
324
|
6938 idx_T arridx[MAXWLEN];
|
323
|
6939 int curi[MAXWLEN];
|
|
6940 langp_T *lp;
|
|
6941 char_u *byts;
|
324
|
6942 idx_T *idxs;
|
323
|
6943 int depth;
|
|
6944 int c;
|
324
|
6945 idx_T n;
|
323
|
6946 int round;
|
|
6947 int flags;
|
344
|
6948 int sound_score;
|
|
6949
|
|
6950 /* Do this for all languages that support sound folding. */
|
323
|
6951 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
6952 lp->lp_slang != NULL; ++lp)
|
|
6953 {
|
|
6954 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
6955 {
|
|
6956 /* soundfold the bad word */
|
|
6957 spell_soundfold(lp->lp_slang, su->su_fbadword, salword);
|
|
6958
|
|
6959 /*
|
|
6960 * Go through the whole tree, soundfold each word and compare.
|
|
6961 * round 1: use the case-folded tree.
|
|
6962 * round 2: use the keep-case tree.
|
|
6963 */
|
|
6964 for (round = 1; round <= 2; ++round)
|
|
6965 {
|
|
6966 if (round == 1)
|
|
6967 {
|
|
6968 byts = lp->lp_slang->sl_fbyts;
|
|
6969 idxs = lp->lp_slang->sl_fidxs;
|
|
6970 }
|
|
6971 else
|
|
6972 {
|
|
6973 byts = lp->lp_slang->sl_kbyts;
|
|
6974 idxs = lp->lp_slang->sl_kidxs;
|
|
6975 }
|
|
6976
|
|
6977 depth = 0;
|
|
6978 arridx[0] = 0;
|
|
6979 curi[0] = 1;
|
|
6980 while (depth >= 0 && !got_int)
|
|
6981 {
|
|
6982 if (curi[depth] > byts[arridx[depth]])
|
351
|
6983 {
|
323
|
6984 /* Done all bytes at this node, go up one level. */
|
|
6985 --depth;
|
351
|
6986 line_breakcheck();
|
|
6987 }
|
323
|
6988 else
|
|
6989 {
|
|
6990 /* Do one more byte at this node. */
|
|
6991 n = arridx[depth] + curi[depth];
|
|
6992 ++curi[depth];
|
|
6993 c = byts[n];
|
|
6994 if (c == 0)
|
|
6995 {
|
|
6996 /* End of word, deal with the word. */
|
324
|
6997 flags = (int)idxs[n];
|
323
|
6998 if (round == 2 || (flags & WF_KEEPCAP) == 0)
|
|
6999 {
|
|
7000 tword[depth] = NUL;
|
|
7001 if (round == 1)
|
|
7002 spell_soundfold(lp->lp_slang,
|
|
7003 tword, tsalword);
|
|
7004 else
|
|
7005 {
|
|
7006 /* In keep-case tree need to case-fold the
|
|
7007 * word. */
|
|
7008 (void)spell_casefold(tword, depth,
|
|
7009 tfword, MAXWLEN);
|
|
7010 spell_soundfold(lp->lp_slang,
|
|
7011 tfword, tsalword);
|
|
7012 }
|
|
7013
|
344
|
7014 /* Compute the edit distance between the
|
|
7015 * sound-a-like words. */
|
|
7016 sound_score = soundalike_score(salword,
|
|
7017 tsalword);
|
324
|
7018 if (sound_score < SCORE_MAXMAX)
|
323
|
7019 {
|
324
|
7020 char_u cword[MAXWLEN];
|
|
7021 char_u *p;
|
344
|
7022 int score;
|
324
|
7023
|
351
|
7024 if (round == 1 && (flags & WF_CAPMASK) != 0)
|
323
|
7025 {
|
324
|
7026 /* Need to fix case according to
|
|
7027 * "flags". */
|
323
|
7028 make_case_word(tword, cword, flags);
|
324
|
7029 p = cword;
|
323
|
7030 }
|
|
7031 else
|
324
|
7032 p = tword;
|
|
7033
|
344
|
7034 if (sps_flags & SPS_DOUBLE)
|
|
7035 add_suggestion(su, &su->su_sga, p,
|
346
|
7036 su->su_badlen,
|
351
|
7037 sound_score, 0, FALSE);
|
344
|
7038 else
|
|
7039 {
|
|
7040 /* Compute the score. */
|
|
7041 score = spell_edit_score(
|
|
7042 su->su_badword, p);
|
|
7043 if (sps_flags & SPS_BEST)
|
|
7044 /* give a bonus for the good word
|
|
7045 * sounding the same as the bad
|
|
7046 * word */
|
|
7047 add_suggestion(su, &su->su_ga, p,
|
346
|
7048 su->su_badlen,
|
344
|
7049 RESCORE(score, sound_score),
|
351
|
7050 sound_score, TRUE);
|
344
|
7051 else
|
|
7052 add_suggestion(su, &su->su_ga, p,
|
346
|
7053 su->su_badlen,
|
351
|
7054 score + sound_score, 0, FALSE);
|
344
|
7055 }
|
323
|
7056 }
|
|
7057 }
|
|
7058
|
|
7059 /* Skip over other NUL bytes. */
|
|
7060 while (byts[n + 1] == 0)
|
|
7061 {
|
|
7062 ++n;
|
|
7063 ++curi[depth];
|
|
7064 }
|
|
7065 }
|
|
7066 else
|
|
7067 {
|
|
7068 /* Normal char, go one level deeper. */
|
|
7069 tword[depth++] = c;
|
|
7070 arridx[depth] = idxs[n];
|
|
7071 curi[depth] = 1;
|
|
7072 }
|
|
7073 }
|
|
7074 }
|
|
7075 }
|
|
7076 }
|
|
7077 }
|
|
7078 }
|
|
7079
|
|
7080 /*
|
324
|
7081 * Copy "fword" to "cword", fixing case according to "flags".
|
323
|
7082 */
|
|
7083 static void
|
|
7084 make_case_word(fword, cword, flags)
|
|
7085 char_u *fword;
|
|
7086 char_u *cword;
|
|
7087 int flags;
|
|
7088 {
|
|
7089 if (flags & WF_ALLCAP)
|
|
7090 /* Make it all upper-case */
|
|
7091 allcap_copy(fword, cword);
|
|
7092 else if (flags & WF_ONECAP)
|
|
7093 /* Make the first letter upper-case */
|
324
|
7094 onecap_copy(fword, cword, TRUE);
|
323
|
7095 else
|
|
7096 /* Use goodword as-is. */
|
|
7097 STRCPY(cword, fword);
|
|
7098 }
|
|
7099
|
330
|
7100 /*
|
|
7101 * Use map string "map" for languages "lp".
|
|
7102 */
|
|
7103 static void
|
|
7104 set_map_str(lp, map)
|
|
7105 slang_T *lp;
|
|
7106 char_u *map;
|
|
7107 {
|
|
7108 char_u *p;
|
|
7109 int headc = 0;
|
|
7110 int c;
|
|
7111 int i;
|
|
7112
|
|
7113 if (*map == NUL)
|
|
7114 {
|
|
7115 lp->sl_has_map = FALSE;
|
|
7116 return;
|
|
7117 }
|
|
7118 lp->sl_has_map = TRUE;
|
|
7119
|
|
7120 /* Init the array and hash table empty. */
|
|
7121 for (i = 0; i < 256; ++i)
|
|
7122 lp->sl_map_array[i] = 0;
|
|
7123 #ifdef FEAT_MBYTE
|
|
7124 hash_init(&lp->sl_map_hash);
|
|
7125 #endif
|
|
7126
|
|
7127 /*
|
|
7128 * The similar characters are stored separated with slashes:
|
|
7129 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
|
|
7130 * before the same slash. For characters above 255 sl_map_hash is used.
|
|
7131 */
|
|
7132 for (p = map; *p != NUL; )
|
|
7133 {
|
|
7134 #ifdef FEAT_MBYTE
|
|
7135 c = mb_ptr2char_adv(&p);
|
|
7136 #else
|
|
7137 c = *p++;
|
|
7138 #endif
|
|
7139 if (c == '/')
|
|
7140 headc = 0;
|
|
7141 else
|
|
7142 {
|
|
7143 if (headc == 0)
|
|
7144 headc = c;
|
|
7145
|
|
7146 #ifdef FEAT_MBYTE
|
|
7147 /* Characters above 255 don't fit in sl_map_array[], put them in
|
|
7148 * the hash table. Each entry is the char, a NUL the headchar and
|
|
7149 * a NUL. */
|
|
7150 if (c >= 256)
|
|
7151 {
|
|
7152 int cl = mb_char2len(c);
|
|
7153 int headcl = mb_char2len(headc);
|
|
7154 char_u *b;
|
|
7155 hash_T hash;
|
|
7156 hashitem_T *hi;
|
|
7157
|
|
7158 b = alloc((unsigned)(cl + headcl + 2));
|
|
7159 if (b == NULL)
|
|
7160 return;
|
|
7161 mb_char2bytes(c, b);
|
|
7162 b[cl] = NUL;
|
|
7163 mb_char2bytes(headc, b + cl + 1);
|
|
7164 b[cl + 1 + headcl] = NUL;
|
|
7165 hash = hash_hash(b);
|
|
7166 hi = hash_lookup(&lp->sl_map_hash, b, hash);
|
|
7167 if (HASHITEM_EMPTY(hi))
|
|
7168 hash_add_item(&lp->sl_map_hash, hi, b, hash);
|
|
7169 else
|
|
7170 {
|
|
7171 /* This should have been checked when generating the .spl
|
|
7172 * file. */
|
|
7173 EMSG(_("E999: duplicate char in MAP entry"));
|
|
7174 vim_free(b);
|
|
7175 }
|
|
7176 }
|
|
7177 else
|
|
7178 #endif
|
|
7179 lp->sl_map_array[c] = headc;
|
|
7180 }
|
|
7181 }
|
|
7182 }
|
|
7183
|
323
|
7184 /*
|
|
7185 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
|
|
7186 * lines in the .aff file.
|
|
7187 */
|
|
7188 static int
|
|
7189 similar_chars(slang, c1, c2)
|
|
7190 slang_T *slang;
|
|
7191 int c1;
|
|
7192 int c2;
|
|
7193 {
|
330
|
7194 int m1, m2;
|
|
7195 #ifdef FEAT_MBYTE
|
|
7196 char_u buf[MB_MAXBYTES];
|
|
7197 hashitem_T *hi;
|
|
7198
|
|
7199 if (c1 >= 256)
|
|
7200 {
|
|
7201 buf[mb_char2bytes(c1, buf)] = 0;
|
|
7202 hi = hash_find(&slang->sl_map_hash, buf);
|
|
7203 if (HASHITEM_EMPTY(hi))
|
|
7204 m1 = 0;
|
|
7205 else
|
|
7206 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
|
|
7207 }
|
|
7208 else
|
|
7209 #endif
|
|
7210 m1 = slang->sl_map_array[c1];
|
|
7211 if (m1 == 0)
|
323
|
7212 return FALSE;
|
330
|
7213
|
|
7214
|
|
7215 #ifdef FEAT_MBYTE
|
|
7216 if (c2 >= 256)
|
|
7217 {
|
|
7218 buf[mb_char2bytes(c2, buf)] = 0;
|
|
7219 hi = hash_find(&slang->sl_map_hash, buf);
|
|
7220 if (HASHITEM_EMPTY(hi))
|
|
7221 m2 = 0;
|
|
7222 else
|
|
7223 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
|
|
7224 }
|
|
7225 else
|
|
7226 #endif
|
|
7227 m2 = slang->sl_map_array[c2];
|
|
7228
|
|
7229 return m1 == m2;
|
323
|
7230 }
|
|
7231
|
|
7232 /*
|
|
7233 * Add a suggestion to the list of suggestions.
|
|
7234 * Do not add a duplicate suggestion or suggestions with a bad score.
|
|
7235 * When "use_score" is not zero it's used, otherwise the score is computed
|
|
7236 * with spell_edit_score().
|
|
7237 */
|
|
7238 static void
|
351
|
7239 add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus)
|
323
|
7240 suginfo_T *su;
|
344
|
7241 garray_T *gap;
|
323
|
7242 char_u *goodword;
|
346
|
7243 int badlen; /* length of bad word used */
|
324
|
7244 int score;
|
351
|
7245 int altscore;
|
344
|
7246 int had_bonus; /* value for st_had_bonus */
|
323
|
7247 {
|
|
7248 suggest_T *stp;
|
|
7249 int i;
|
346
|
7250 char_u *p = NULL;
|
|
7251 int c = 0;
|
323
|
7252
|
|
7253 /* Check that the word wasn't banned. */
|
|
7254 if (was_banned(su, goodword))
|
|
7255 return;
|
|
7256
|
346
|
7257 /* If past "su_badlen" and the rest is identical stop at "su_badlen".
|
|
7258 * Remove the common part from "goodword". */
|
|
7259 i = badlen - su->su_badlen;
|
|
7260 if (i > 0)
|
|
7261 {
|
|
7262 /* This assumes there was no case folding or it didn't change the
|
|
7263 * length... */
|
|
7264 p = goodword + STRLEN(goodword) - i;
|
|
7265 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0)
|
|
7266 {
|
|
7267 badlen = su->su_badlen;
|
|
7268 c = *p;
|
|
7269 *p = NUL;
|
|
7270 }
|
|
7271 else
|
|
7272 p = NULL;
|
|
7273 }
|
|
7274
|
323
|
7275 if (score <= su->su_maxscore)
|
|
7276 {
|
366
|
7277 /* Check if the word is already there. Also check the length that is
|
|
7278 * being replaced "thes," -> "these" is a different suggestion from
|
|
7279 * "thes" -> "these". */
|
344
|
7280 stp = &SUG(*gap, 0);
|
|
7281 for (i = gap->ga_len - 1; i >= 0; --i)
|
366
|
7282 if (STRCMP(stp[i].st_word, goodword) == 0
|
|
7283 && stp[i].st_orglen == badlen)
|
323
|
7284 {
|
|
7285 /* Found it. Remember the lowest score. */
|
|
7286 if (stp[i].st_score > score)
|
324
|
7287 {
|
323
|
7288 stp[i].st_score = score;
|
324
|
7289 stp[i].st_had_bonus = had_bonus;
|
|
7290 }
|
323
|
7291 break;
|
|
7292 }
|
|
7293
|
344
|
7294 if (i < 0 && ga_grow(gap, 1) == OK)
|
323
|
7295 {
|
|
7296 /* Add a suggestion. */
|
344
|
7297 stp = &SUG(*gap, gap->ga_len);
|
323
|
7298 stp->st_word = vim_strsave(goodword);
|
|
7299 if (stp->st_word != NULL)
|
|
7300 {
|
|
7301 stp->st_score = score;
|
351
|
7302 stp->st_altscore = altscore;
|
324
|
7303 stp->st_had_bonus = had_bonus;
|
346
|
7304 stp->st_orglen = badlen;
|
344
|
7305 ++gap->ga_len;
|
323
|
7306
|
|
7307 /* If we have too many suggestions now, sort the list and keep
|
|
7308 * the best suggestions. */
|
344
|
7309 if (gap->ga_len > SUG_MAX_COUNT(su))
|
|
7310 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore,
|
|
7311 SUG_CLEAN_COUNT(su));
|
323
|
7312 }
|
|
7313 }
|
|
7314 }
|
346
|
7315
|
|
7316 if (p != NULL)
|
|
7317 *p = c; /* restore "goodword" */
|
323
|
7318 }
|
|
7319
|
|
7320 /*
|
|
7321 * Add a word to be banned.
|
|
7322 */
|
|
7323 static void
|
|
7324 add_banned(su, word)
|
|
7325 suginfo_T *su;
|
|
7326 char_u *word;
|
|
7327 {
|
|
7328 char_u *s = vim_strsave(word);
|
|
7329 hash_T hash;
|
|
7330 hashitem_T *hi;
|
|
7331
|
|
7332 if (s != NULL)
|
|
7333 {
|
|
7334 hash = hash_hash(s);
|
|
7335 hi = hash_lookup(&su->su_banned, s, hash);
|
|
7336 if (HASHITEM_EMPTY(hi))
|
|
7337 hash_add_item(&su->su_banned, hi, s, hash);
|
355
|
7338 else
|
|
7339 vim_free(s);
|
323
|
7340 }
|
|
7341 }
|
|
7342
|
|
7343 /*
|
|
7344 * Return TRUE if a word appears in the list of banned words.
|
|
7345 */
|
|
7346 static int
|
|
7347 was_banned(su, word)
|
|
7348 suginfo_T *su;
|
|
7349 char_u *word;
|
|
7350 {
|
324
|
7351 hashitem_T *hi = hash_find(&su->su_banned, word);
|
|
7352
|
|
7353 return !HASHITEM_EMPTY(hi);
|
323
|
7354 }
|
|
7355
|
|
7356 /*
|
|
7357 * Free the banned words in "su".
|
|
7358 */
|
|
7359 static void
|
|
7360 free_banned(su)
|
|
7361 suginfo_T *su;
|
|
7362 {
|
|
7363 int todo;
|
|
7364 hashitem_T *hi;
|
|
7365
|
|
7366 todo = su->su_banned.ht_used;
|
|
7367 for (hi = su->su_banned.ht_array; todo > 0; ++hi)
|
|
7368 {
|
|
7369 if (!HASHITEM_EMPTY(hi))
|
|
7370 {
|
|
7371 vim_free(hi->hi_key);
|
|
7372 --todo;
|
|
7373 }
|
|
7374 }
|
|
7375 hash_clear(&su->su_banned);
|
|
7376 }
|
|
7377
|
324
|
7378 /*
|
|
7379 * Recompute the score if sound-folding is possible. This is slow,
|
|
7380 * thus only done for the final results.
|
|
7381 */
|
|
7382 static void
|
|
7383 rescore_suggestions(su)
|
|
7384 suginfo_T *su;
|
|
7385 {
|
|
7386 langp_T *lp;
|
|
7387 suggest_T *stp;
|
|
7388 char_u sal_badword[MAXWLEN];
|
|
7389 int i;
|
|
7390
|
|
7391 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
7392 lp->lp_slang != NULL; ++lp)
|
|
7393 {
|
|
7394 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
7395 {
|
|
7396 /* soundfold the bad word */
|
|
7397 spell_soundfold(lp->lp_slang, su->su_fbadword, sal_badword);
|
|
7398
|
|
7399 for (i = 0; i < su->su_ga.ga_len; ++i)
|
|
7400 {
|
344
|
7401 stp = &SUG(su->su_ga, i);
|
324
|
7402 if (!stp->st_had_bonus)
|
|
7403 {
|
351
|
7404 stp->st_altscore = stp_sal_score(stp, su,
|
|
7405 lp->lp_slang, sal_badword);
|
|
7406 if (stp->st_altscore == SCORE_MAXMAX)
|
|
7407 stp->st_altscore = SCORE_BIG;
|
|
7408 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
|
324
|
7409 }
|
|
7410 }
|
|
7411 break;
|
|
7412 }
|
|
7413 }
|
|
7414 }
|
|
7415
|
323
|
7416 static int
|
|
7417 #ifdef __BORLANDC__
|
|
7418 _RTLENTRYF
|
|
7419 #endif
|
|
7420 sug_compare __ARGS((const void *s1, const void *s2));
|
|
7421
|
|
7422 /*
|
|
7423 * Function given to qsort() to sort the suggestions on st_score.
|
|
7424 */
|
|
7425 static int
|
|
7426 #ifdef __BORLANDC__
|
|
7427 _RTLENTRYF
|
|
7428 #endif
|
|
7429 sug_compare(s1, s2)
|
|
7430 const void *s1;
|
|
7431 const void *s2;
|
|
7432 {
|
|
7433 suggest_T *p1 = (suggest_T *)s1;
|
|
7434 suggest_T *p2 = (suggest_T *)s2;
|
344
|
7435 int n = p1->st_score - p2->st_score;
|
|
7436
|
|
7437 if (n == 0)
|
|
7438 return p1->st_altscore - p2->st_altscore;
|
|
7439 return n;
|
323
|
7440 }
|
|
7441
|
|
7442 /*
|
|
7443 * Cleanup the suggestions:
|
|
7444 * - Sort on score.
|
|
7445 * - Remove words that won't be displayed.
|
344
|
7446 * Returns the maximum score in the list or "maxscore" unmodified.
|
323
|
7447 */
|
344
|
7448 static int
|
|
7449 cleanup_suggestions(gap, maxscore, keep)
|
|
7450 garray_T *gap;
|
|
7451 int maxscore;
|
324
|
7452 int keep; /* nr of suggestions to keep */
|
323
|
7453 {
|
344
|
7454 suggest_T *stp = &SUG(*gap, 0);
|
323
|
7455 int i;
|
|
7456
|
|
7457 /* Sort the list. */
|
344
|
7458 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
|
323
|
7459
|
|
7460 /* Truncate the list to the number of suggestions that will be displayed. */
|
344
|
7461 if (gap->ga_len > keep)
|
323
|
7462 {
|
344
|
7463 for (i = keep; i < gap->ga_len; ++i)
|
323
|
7464 vim_free(stp[i].st_word);
|
344
|
7465 gap->ga_len = keep;
|
|
7466 return stp[keep - 1].st_score;
|
323
|
7467 }
|
344
|
7468 return maxscore;
|
323
|
7469 }
|
|
7470
|
|
7471 /*
|
|
7472 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
|
|
7473 */
|
|
7474 static void
|
|
7475 spell_soundfold(slang, inword, res)
|
|
7476 slang_T *slang;
|
|
7477 char_u *inword;
|
|
7478 char_u *res;
|
|
7479 {
|
344
|
7480 salitem_T *smp;
|
323
|
7481 char_u word[MAXWLEN];
|
|
7482 #ifdef FEAT_MBYTE
|
|
7483 int l;
|
324
|
7484 int found_mbyte = FALSE;
|
323
|
7485 #endif
|
|
7486 char_u *s;
|
|
7487 char_u *t;
|
344
|
7488 char_u *pf;
|
323
|
7489 int i, j, z;
|
344
|
7490 int reslen;
|
323
|
7491 int n, k = 0;
|
|
7492 int z0;
|
|
7493 int k0;
|
|
7494 int n0;
|
|
7495 int c;
|
|
7496 int pri;
|
|
7497 int p0 = -333;
|
|
7498 int c0;
|
|
7499
|
324
|
7500 /* Remove accents, if wanted. We actually remove all non-word characters.
|
|
7501 * But keep white space. */
|
323
|
7502 if (slang->sl_rem_accents)
|
|
7503 {
|
|
7504 t = word;
|
|
7505 for (s = inword; *s != NUL; )
|
|
7506 {
|
324
|
7507 if (vim_iswhite(*s))
|
344
|
7508 {
|
|
7509 *t++ = ' ';
|
|
7510 s = skipwhite(s);
|
|
7511 }
|
323
|
7512 #ifdef FEAT_MBYTE
|
324
|
7513 else if (has_mbyte)
|
323
|
7514 {
|
|
7515 l = mb_ptr2len_check(s);
|
358
|
7516 if (spell_iswordp(s))
|
323
|
7517 {
|
|
7518 mch_memmove(t, s, l);
|
|
7519 t += l;
|
324
|
7520 if (l > 1)
|
|
7521 found_mbyte = TRUE;
|
323
|
7522 }
|
|
7523 s += l;
|
|
7524 }
|
324
|
7525 #endif
|
323
|
7526 else
|
|
7527 {
|
358
|
7528 if (spell_iswordp(s))
|
323
|
7529 *t++ = *s;
|
|
7530 ++s;
|
|
7531 }
|
|
7532 }
|
|
7533 *t = NUL;
|
|
7534 }
|
|
7535 else
|
324
|
7536 {
|
|
7537 #ifdef FEAT_MBYTE
|
|
7538 if (has_mbyte)
|
|
7539 for (s = inword; *s != NUL; s += l)
|
|
7540 if ((l = mb_ptr2len_check(s)) > 1)
|
|
7541 {
|
|
7542 found_mbyte = TRUE;
|
|
7543 break;
|
|
7544 }
|
|
7545 #endif
|
323
|
7546 STRCPY(word, inword);
|
324
|
7547 }
|
|
7548
|
|
7549 #ifdef FEAT_MBYTE
|
|
7550 /* If there are multi-byte characters in the word return it as-is, because
|
|
7551 * the following won't work. */
|
|
7552 if (found_mbyte)
|
|
7553 {
|
|
7554 STRCPY(res, word);
|
|
7555 return;
|
|
7556 }
|
|
7557 #endif
|
323
|
7558
|
344
|
7559 smp = (salitem_T *)slang->sl_sal.ga_data;
|
323
|
7560
|
|
7561 /*
|
|
7562 * This comes from Aspell phonet.cpp. Converted from C++ to C.
|
324
|
7563 * Changed to keep spaces.
|
323
|
7564 * TODO: support for multi-byte chars.
|
|
7565 */
|
344
|
7566 i = reslen = z = 0;
|
323
|
7567 while ((c = word[i]) != NUL)
|
|
7568 {
|
344
|
7569 /* Start with the first rule that has the character in the word. */
|
323
|
7570 n = slang->sl_sal_first[c];
|
|
7571 z0 = 0;
|
|
7572
|
|
7573 if (n >= 0)
|
|
7574 {
|
|
7575 /* check all rules for the same letter */
|
344
|
7576 for (; (s = smp[n].sm_lead)[0] == c; ++n)
|
323
|
7577 {
|
344
|
7578 /* Quickly skip entries that don't match the word. Most
|
|
7579 * entries are less then three chars, optimize for that. */
|
|
7580 k = smp[n].sm_leadlen;
|
|
7581 if (k > 1)
|
323
|
7582 {
|
344
|
7583 if (word[i + 1] != s[1])
|
|
7584 continue;
|
|
7585 if (k > 2)
|
|
7586 {
|
|
7587 for (j = 2; j < k; ++j)
|
|
7588 if (word[i + j] != s[j])
|
|
7589 break;
|
|
7590 if (j < k)
|
|
7591 continue;
|
|
7592 }
|
323
|
7593 }
|
|
7594
|
344
|
7595 if ((pf = smp[n].sm_oneoff) != NULL)
|
323
|
7596 {
|
344
|
7597 /* Check for match with one of the chars in "sm_oneoff". */
|
|
7598 while (*pf != NUL && *pf != word[i + k])
|
|
7599 ++pf;
|
|
7600 if (*pf == NUL)
|
|
7601 continue;
|
|
7602 ++k;
|
323
|
7603 }
|
344
|
7604 s = smp[n].sm_rules;
|
|
7605 pri = 5; /* default priority */
|
323
|
7606
|
|
7607 p0 = *s;
|
|
7608 k0 = k;
|
|
7609 while (*s == '-' && k > 1)
|
|
7610 {
|
|
7611 k--;
|
|
7612 s++;
|
|
7613 }
|
|
7614 if (*s == '<')
|
|
7615 s++;
|
344
|
7616 if (VIM_ISDIGIT(*s))
|
323
|
7617 {
|
|
7618 /* determine priority */
|
|
7619 pri = *s - '0';
|
|
7620 s++;
|
|
7621 }
|
|
7622 if (*s == '^' && *(s + 1) == '^')
|
|
7623 s++;
|
|
7624
|
|
7625 if (*s == NUL
|
|
7626 || (*s == '^'
|
324
|
7627 && (i == 0 || !(word[i - 1] == ' '
|
358
|
7628 || spell_iswordp(word + i - 1)))
|
323
|
7629 && (*(s + 1) != '$'
|
358
|
7630 || (!spell_iswordp(word + i + k0))))
|
323
|
7631 || (*s == '$' && i > 0
|
358
|
7632 && spell_iswordp(word + i - 1)
|
|
7633 && (!spell_iswordp(word + i + k0))))
|
323
|
7634 {
|
|
7635 /* search for followup rules, if: */
|
|
7636 /* followup and k > 1 and NO '-' in searchstring */
|
|
7637 c0 = word[i + k - 1];
|
|
7638 n0 = slang->sl_sal_first[c0];
|
|
7639
|
|
7640 if (slang->sl_followup && k > 1 && n0 >= 0
|
344
|
7641 && p0 != '-' && word[i + k] != NUL)
|
323
|
7642 {
|
|
7643 /* test follow-up rule for "word[i + k]" */
|
344
|
7644 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
|
323
|
7645 {
|
344
|
7646 /* Quickly skip entries that don't match the word.
|
|
7647 * */
|
|
7648 k0 = smp[n0].sm_leadlen;
|
|
7649 if (k0 > 1)
|
323
|
7650 {
|
344
|
7651 if (word[i + k] != s[1])
|
|
7652 continue;
|
|
7653 if (k0 > 2)
|
|
7654 {
|
|
7655 pf = word + i + k + 1;
|
|
7656 for (j = 2; j < k0; ++j)
|
|
7657 if (*pf++ != s[j])
|
|
7658 break;
|
|
7659 if (j < k0)
|
|
7660 continue;
|
|
7661 }
|
323
|
7662 }
|
344
|
7663 k0 += k - 1;
|
|
7664
|
|
7665 if ((pf = smp[n0].sm_oneoff) != NULL)
|
323
|
7666 {
|
344
|
7667 /* Check for match with one of the chars in
|
|
7668 * "sm_oneoff". */
|
|
7669 while (*pf != NUL && *pf != word[i + k0])
|
|
7670 ++pf;
|
|
7671 if (*pf == NUL)
|
|
7672 continue;
|
|
7673 ++k0;
|
323
|
7674 }
|
344
|
7675
|
|
7676 p0 = 5;
|
|
7677 s = smp[n0].sm_rules;
|
323
|
7678 while (*s == '-')
|
|
7679 {
|
344
|
7680 /* "k0" gets NOT reduced because
|
|
7681 * "if (k0 == k)" */
|
323
|
7682 s++;
|
|
7683 }
|
|
7684 if (*s == '<')
|
|
7685 s++;
|
344
|
7686 if (VIM_ISDIGIT(*s))
|
323
|
7687 {
|
|
7688 p0 = *s - '0';
|
|
7689 s++;
|
|
7690 }
|
|
7691
|
|
7692 if (*s == NUL
|
|
7693 /* *s == '^' cuts */
|
|
7694 || (*s == '$'
|
358
|
7695 && !spell_iswordp(word + i + k0)))
|
323
|
7696 {
|
|
7697 if (k0 == k)
|
|
7698 /* this is just a piece of the string */
|
|
7699 continue;
|
|
7700
|
|
7701 if (p0 < pri)
|
|
7702 /* priority too low */
|
|
7703 continue;
|
|
7704 /* rule fits; stop search */
|
|
7705 break;
|
|
7706 }
|
|
7707 }
|
|
7708
|
344
|
7709 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
|
323
|
7710 continue;
|
|
7711 }
|
|
7712
|
|
7713 /* replace string */
|
344
|
7714 s = smp[n].sm_to;
|
|
7715 pf = smp[n].sm_rules;
|
|
7716 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
|
323
|
7717 if (p0 == 1 && z == 0)
|
|
7718 {
|
|
7719 /* rule with '<' is used */
|
344
|
7720 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
|
|
7721 || res[reslen - 1] == *s))
|
|
7722 reslen--;
|
323
|
7723 z0 = 1;
|
|
7724 z = 1;
|
|
7725 k0 = 0;
|
|
7726 while (*s != NUL && word[i+k0] != NUL)
|
|
7727 {
|
|
7728 word[i + k0] = *s;
|
|
7729 k0++;
|
|
7730 s++;
|
|
7731 }
|
|
7732 if (k > k0)
|
|
7733 mch_memmove(word + i + k0, word + i + k,
|
|
7734 STRLEN(word + i + k) + 1);
|
|
7735
|
|
7736 /* new "actual letter" */
|
|
7737 c = word[i];
|
|
7738 }
|
|
7739 else
|
|
7740 {
|
|
7741 /* no '<' rule used */
|
|
7742 i += k - 1;
|
|
7743 z = 0;
|
344
|
7744 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
|
323
|
7745 {
|
344
|
7746 if (reslen == 0 || res[reslen - 1] != *s)
|
323
|
7747 {
|
344
|
7748 res[reslen] = *s;
|
|
7749 reslen++;
|
323
|
7750 }
|
|
7751 s++;
|
|
7752 }
|
|
7753 /* new "actual letter" */
|
|
7754 c = *s;
|
344
|
7755 if (strstr((char *)pf, "^^") != NULL)
|
323
|
7756 {
|
|
7757 if (c != NUL)
|
|
7758 {
|
344
|
7759 res[reslen] = c;
|
|
7760 reslen++;
|
323
|
7761 }
|
|
7762 mch_memmove(word, word + i + 1,
|
|
7763 STRLEN(word + i + 1) + 1);
|
|
7764 i = 0;
|
|
7765 z0 = 1;
|
|
7766 }
|
|
7767 }
|
|
7768 break;
|
|
7769 }
|
|
7770 }
|
|
7771 }
|
324
|
7772 else if (vim_iswhite(c))
|
|
7773 {
|
|
7774 c = ' ';
|
|
7775 k = 1;
|
|
7776 }
|
323
|
7777
|
|
7778 if (z0 == 0)
|
|
7779 {
|
344
|
7780 if (k && !p0 && reslen < MAXWLEN && c != NUL
|
|
7781 && (!slang->sl_collapse || reslen == 0
|
|
7782 || res[reslen - 1] != c))
|
323
|
7783 {
|
|
7784 /* condense only double letters */
|
344
|
7785 res[reslen] = c;
|
|
7786 reslen++;
|
323
|
7787 }
|
|
7788
|
|
7789 i++;
|
|
7790 z = 0;
|
|
7791 k = 0;
|
|
7792 }
|
|
7793 }
|
|
7794
|
344
|
7795 res[reslen] = NUL;
|
323
|
7796 }
|
|
7797
|
324
|
7798 /*
|
344
|
7799 * Compute a score for two sound-a-like words.
|
|
7800 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
|
|
7801 * Instead of a generic loop we write out the code. That keeps it fast by
|
|
7802 * avoiding checks that will not be possible.
|
|
7803 */
|
|
7804 static int
|
351
|
7805 soundalike_score(goodstart, badstart)
|
|
7806 char_u *goodstart; /* sound-folded good word */
|
|
7807 char_u *badstart; /* sound-folded bad word */
|
344
|
7808 {
|
351
|
7809 char_u *goodsound = goodstart;
|
|
7810 char_u *badsound = badstart;
|
|
7811 int goodlen;
|
|
7812 int badlen;
|
344
|
7813 int n;
|
|
7814 char_u *pl, *ps;
|
|
7815 char_u *pl2, *ps2;
|
351
|
7816 int score = 0;
|
|
7817
|
|
7818 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
|
|
7819 * counted so much, vowels halfway the word aren't counted at all. */
|
|
7820 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
|
|
7821 {
|
|
7822 score = SCORE_DEL / 2;
|
|
7823 if (*badsound == '*')
|
|
7824 ++badsound;
|
|
7825 else
|
|
7826 ++goodsound;
|
|
7827 }
|
|
7828
|
|
7829 goodlen = STRLEN(goodsound);
|
|
7830 badlen = STRLEN(badsound);
|
344
|
7831
|
|
7832 /* Return quickly if the lenghts are too different to be fixed by two
|
|
7833 * changes. */
|
|
7834 n = goodlen - badlen;
|
|
7835 if (n < -2 || n > 2)
|
|
7836 return SCORE_MAXMAX;
|
|
7837
|
|
7838 if (n > 0)
|
|
7839 {
|
351
|
7840 pl = goodsound; /* goodsound is longest */
|
344
|
7841 ps = badsound;
|
|
7842 }
|
|
7843 else
|
|
7844 {
|
351
|
7845 pl = badsound; /* badsound is longest */
|
344
|
7846 ps = goodsound;
|
|
7847 }
|
|
7848
|
|
7849 /* Skip over the identical part. */
|
|
7850 while (*pl == *ps && *pl != NUL)
|
|
7851 {
|
|
7852 ++pl;
|
|
7853 ++ps;
|
|
7854 }
|
|
7855
|
|
7856 switch (n)
|
|
7857 {
|
|
7858 case -2:
|
|
7859 case 2:
|
|
7860 /*
|
|
7861 * Must delete two characters from "pl".
|
|
7862 */
|
|
7863 ++pl; /* first delete */
|
|
7864 while (*pl == *ps)
|
|
7865 {
|
|
7866 ++pl;
|
|
7867 ++ps;
|
|
7868 }
|
|
7869 /* strings must be equal after second delete */
|
|
7870 if (STRCMP(pl + 1, ps) == 0)
|
351
|
7871 return score + SCORE_DEL * 2;
|
344
|
7872
|
|
7873 /* Failed to compare. */
|
|
7874 break;
|
|
7875
|
|
7876 case -1:
|
|
7877 case 1:
|
|
7878 /*
|
|
7879 * Minimal one delete from "pl" required.
|
|
7880 */
|
|
7881
|
|
7882 /* 1: delete */
|
|
7883 pl2 = pl + 1;
|
|
7884 ps2 = ps;
|
|
7885 while (*pl2 == *ps2)
|
|
7886 {
|
|
7887 if (*pl2 == NUL) /* reached the end */
|
351
|
7888 return score + SCORE_DEL;
|
344
|
7889 ++pl2;
|
|
7890 ++ps2;
|
|
7891 }
|
|
7892
|
|
7893 /* 2: delete then swap, then rest must be equal */
|
|
7894 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
|
|
7895 && STRCMP(pl2 + 2, ps2 + 2) == 0)
|
351
|
7896 return score + SCORE_DEL + SCORE_SWAP;
|
344
|
7897
|
|
7898 /* 3: delete then substitute, then the rest must be equal */
|
|
7899 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
|
351
|
7900 return score + SCORE_DEL + SCORE_SUBST;
|
344
|
7901
|
|
7902 /* 4: first swap then delete */
|
|
7903 if (pl[0] == ps[1] && pl[1] == ps[0])
|
|
7904 {
|
|
7905 pl2 = pl + 2; /* swap, skip two chars */
|
|
7906 ps2 = ps + 2;
|
|
7907 while (*pl2 == *ps2)
|
|
7908 {
|
|
7909 ++pl2;
|
|
7910 ++ps2;
|
|
7911 }
|
|
7912 /* delete a char and then strings must be equal */
|
|
7913 if (STRCMP(pl2 + 1, ps2) == 0)
|
351
|
7914 return score + SCORE_SWAP + SCORE_DEL;
|
344
|
7915 }
|
|
7916
|
|
7917 /* 5: first substitute then delete */
|
|
7918 pl2 = pl + 1; /* substitute, skip one char */
|
|
7919 ps2 = ps + 1;
|
|
7920 while (*pl2 == *ps2)
|
|
7921 {
|
|
7922 ++pl2;
|
|
7923 ++ps2;
|
|
7924 }
|
|
7925 /* delete a char and then strings must be equal */
|
|
7926 if (STRCMP(pl2 + 1, ps2) == 0)
|
351
|
7927 return score + SCORE_SUBST + SCORE_DEL;
|
344
|
7928
|
|
7929 /* Failed to compare. */
|
|
7930 break;
|
|
7931
|
|
7932 case 0:
|
|
7933 /*
|
|
7934 * Lenghts are equal, thus changes must result in same length: An
|
|
7935 * insert is only possible in combination with a delete.
|
|
7936 * 1: check if for identical strings
|
|
7937 */
|
|
7938 if (*pl == NUL)
|
351
|
7939 return score;
|
344
|
7940
|
|
7941 /* 2: swap */
|
|
7942 if (pl[0] == ps[1] && pl[1] == ps[0])
|
|
7943 {
|
|
7944 pl2 = pl + 2; /* swap, skip two chars */
|
|
7945 ps2 = ps + 2;
|
|
7946 while (*pl2 == *ps2)
|
|
7947 {
|
|
7948 if (*pl2 == NUL) /* reached the end */
|
351
|
7949 return score + SCORE_SWAP;
|
344
|
7950 ++pl2;
|
|
7951 ++ps2;
|
|
7952 }
|
|
7953 /* 3: swap and swap again */
|
|
7954 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
|
|
7955 && STRCMP(pl2 + 2, ps2 + 2) == 0)
|
351
|
7956 return score + SCORE_SWAP + SCORE_SWAP;
|
344
|
7957
|
|
7958 /* 4: swap and substitute */
|
|
7959 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
|
351
|
7960 return score + SCORE_SWAP + SCORE_SUBST;
|
344
|
7961 }
|
|
7962
|
|
7963 /* 5: substitute */
|
|
7964 pl2 = pl + 1;
|
|
7965 ps2 = ps + 1;
|
|
7966 while (*pl2 == *ps2)
|
|
7967 {
|
|
7968 if (*pl2 == NUL) /* reached the end */
|
351
|
7969 return score + SCORE_SUBST;
|
344
|
7970 ++pl2;
|
|
7971 ++ps2;
|
|
7972 }
|
|
7973
|
|
7974 /* 6: substitute and swap */
|
|
7975 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
|
|
7976 && STRCMP(pl2 + 2, ps2 + 2) == 0)
|
351
|
7977 return score + SCORE_SUBST + SCORE_SWAP;
|
344
|
7978
|
|
7979 /* 7: substitute and substitute */
|
|
7980 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
|
351
|
7981 return score + SCORE_SUBST + SCORE_SUBST;
|
344
|
7982
|
|
7983 /* 8: insert then delete */
|
|
7984 pl2 = pl;
|
|
7985 ps2 = ps + 1;
|
|
7986 while (*pl2 == *ps2)
|
|
7987 {
|
|
7988 ++pl2;
|
|
7989 ++ps2;
|
|
7990 }
|
|
7991 if (STRCMP(pl2 + 1, ps2) == 0)
|
351
|
7992 return score + SCORE_INS + SCORE_DEL;
|
344
|
7993
|
|
7994 /* 9: delete then insert */
|
|
7995 pl2 = pl + 1;
|
|
7996 ps2 = ps;
|
|
7997 while (*pl2 == *ps2)
|
|
7998 {
|
|
7999 ++pl2;
|
|
8000 ++ps2;
|
|
8001 }
|
|
8002 if (STRCMP(pl2, ps2 + 1) == 0)
|
351
|
8003 return score + SCORE_INS + SCORE_DEL;
|
344
|
8004
|
|
8005 /* Failed to compare. */
|
|
8006 break;
|
|
8007 }
|
|
8008
|
|
8009 return SCORE_MAXMAX;
|
|
8010 }
|
324
|
8011
|
323
|
8012 /*
|
|
8013 * Compute the "edit distance" to turn "badword" into "goodword". The less
|
344
|
8014 * deletes/inserts/substitutes/swaps are required the lower the score.
|
324
|
8015 *
|
323
|
8016 * The algorithm comes from Aspell editdist.cpp, edit_distance().
|
324
|
8017 * It has been converted from C++ to C and modified to support multi-byte
|
|
8018 * characters.
|
323
|
8019 */
|
|
8020 static int
|
|
8021 spell_edit_score(badword, goodword)
|
|
8022 char_u *badword;
|
|
8023 char_u *goodword;
|
|
8024 {
|
|
8025 int *cnt;
|
|
8026 int badlen, goodlen;
|
|
8027 int j, i;
|
|
8028 int t;
|
|
8029 int bc, gc;
|
324
|
8030 int pbc, pgc;
|
|
8031 #ifdef FEAT_MBYTE
|
|
8032 char_u *p;
|
|
8033 int wbadword[MAXWLEN];
|
|
8034 int wgoodword[MAXWLEN];
|
|
8035
|
|
8036 if (has_mbyte)
|
|
8037 {
|
|
8038 /* Get the characters from the multi-byte strings and put them in an
|
|
8039 * int array for easy access. */
|
|
8040 for (p = badword, badlen = 0; *p != NUL; )
|
|
8041 wbadword[badlen++] = mb_ptr2char_adv(&p);
|
|
8042 ++badlen;
|
|
8043 for (p = goodword, goodlen = 0; *p != NUL; )
|
|
8044 wgoodword[goodlen++] = mb_ptr2char_adv(&p);
|
|
8045 ++goodlen;
|
|
8046 }
|
|
8047 else
|
|
8048 #endif
|
|
8049 {
|
|
8050 badlen = STRLEN(badword) + 1;
|
|
8051 goodlen = STRLEN(goodword) + 1;
|
|
8052 }
|
323
|
8053
|
|
8054 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
|
|
8055 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
|
|
8056 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
|
|
8057 TRUE);
|
324
|
8058 if (cnt == NULL)
|
|
8059 return 0; /* out of memory */
|
323
|
8060
|
|
8061 CNT(0, 0) = 0;
|
|
8062 for (j = 1; j <= goodlen; ++j)
|
|
8063 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL;
|
|
8064
|
|
8065 for (i = 1; i <= badlen; ++i)
|
|
8066 {
|
|
8067 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
|
|
8068 for (j = 1; j <= goodlen; ++j)
|
|
8069 {
|
324
|
8070 #ifdef FEAT_MBYTE
|
|
8071 if (has_mbyte)
|
|
8072 {
|
|
8073 bc = wbadword[i - 1];
|
|
8074 gc = wgoodword[j - 1];
|
|
8075 }
|
|
8076 else
|
|
8077 #endif
|
|
8078 {
|
|
8079 bc = badword[i - 1];
|
|
8080 gc = goodword[j - 1];
|
|
8081 }
|
323
|
8082 if (bc == gc)
|
|
8083 CNT(i, j) = CNT(i - 1, j - 1);
|
|
8084 else
|
|
8085 {
|
|
8086 /* Use a better score when there is only a case difference. */
|
324
|
8087 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
|
323
|
8088 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
|
|
8089 else
|
|
8090 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
|
|
8091
|
324
|
8092 if (i > 1 && j > 1)
|
323
|
8093 {
|
324
|
8094 #ifdef FEAT_MBYTE
|
|
8095 if (has_mbyte)
|
|
8096 {
|
|
8097 pbc = wbadword[i - 2];
|
|
8098 pgc = wgoodword[j - 2];
|
|
8099 }
|
|
8100 else
|
|
8101 #endif
|
|
8102 {
|
|
8103 pbc = badword[i - 2];
|
|
8104 pgc = goodword[j - 2];
|
|
8105 }
|
|
8106 if (bc == pgc && pbc == gc)
|
|
8107 {
|
|
8108 t = SCORE_SWAP + CNT(i - 2, j - 2);
|
|
8109 if (t < CNT(i, j))
|
|
8110 CNT(i, j) = t;
|
|
8111 }
|
323
|
8112 }
|
|
8113 t = SCORE_DEL + CNT(i - 1, j);
|
|
8114 if (t < CNT(i, j))
|
|
8115 CNT(i, j) = t;
|
|
8116 t = SCORE_INS + CNT(i, j - 1);
|
|
8117 if (t < CNT(i, j))
|
|
8118 CNT(i, j) = t;
|
|
8119 }
|
|
8120 }
|
|
8121 }
|
344
|
8122
|
|
8123 i = CNT(badlen - 1, goodlen - 1);
|
|
8124 vim_free(cnt);
|
|
8125 return i;
|
323
|
8126 }
|
307
|
8127
|
351
|
8128 /*
|
|
8129 * ":spelldump"
|
|
8130 */
|
|
8131 /*ARGSUSED*/
|
|
8132 void
|
|
8133 ex_spelldump(eap)
|
|
8134 exarg_T *eap;
|
|
8135 {
|
|
8136 buf_T *buf = curbuf;
|
|
8137 langp_T *lp;
|
|
8138 slang_T *slang;
|
|
8139 idx_T arridx[MAXWLEN];
|
|
8140 int curi[MAXWLEN];
|
|
8141 char_u word[MAXWLEN];
|
|
8142 int c;
|
|
8143 char_u *byts;
|
|
8144 idx_T *idxs;
|
|
8145 linenr_T lnum = 0;
|
|
8146 int round;
|
|
8147 int depth;
|
|
8148 int n;
|
|
8149 int flags;
|
|
8150
|
|
8151 if (no_spell_checking())
|
|
8152 return;
|
|
8153
|
|
8154 /* Create a new empty buffer by splitting the window. */
|
|
8155 do_cmdline_cmd((char_u *)"new");
|
|
8156 if (!bufempty() || !buf_valid(buf))
|
|
8157 return;
|
|
8158
|
|
8159 for (lp = LANGP_ENTRY(buf->b_langp, 0); lp->lp_slang != NULL; ++lp)
|
|
8160 {
|
|
8161 slang = lp->lp_slang;
|
|
8162
|
|
8163 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
|
|
8164 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
|
|
8165
|
|
8166 /* round 1: case-folded tree
|
|
8167 * round 2: keep-case tree */
|
|
8168 for (round = 1; round <= 2; ++round)
|
|
8169 {
|
|
8170 if (round == 1)
|
|
8171 {
|
|
8172 byts = slang->sl_fbyts;
|
|
8173 idxs = slang->sl_fidxs;
|
|
8174 }
|
|
8175 else
|
|
8176 {
|
|
8177 byts = slang->sl_kbyts;
|
|
8178 idxs = slang->sl_kidxs;
|
|
8179 }
|
|
8180 if (byts == NULL)
|
|
8181 continue; /* array is empty */
|
|
8182
|
|
8183 depth = 0;
|
|
8184 arridx[0] = 0;
|
|
8185 curi[0] = 1;
|
|
8186 while (depth >= 0 && !got_int)
|
|
8187 {
|
|
8188 if (curi[depth] > byts[arridx[depth]])
|
|
8189 {
|
|
8190 /* Done all bytes at this node, go up one level. */
|
|
8191 --depth;
|
|
8192 line_breakcheck();
|
|
8193 }
|
|
8194 else
|
|
8195 {
|
|
8196 /* Do one more byte at this node. */
|
|
8197 n = arridx[depth] + curi[depth];
|
|
8198 ++curi[depth];
|
|
8199 c = byts[n];
|
|
8200 if (c == 0)
|
|
8201 {
|
|
8202 /* End of word, deal with the word.
|
|
8203 * Don't use keep-case words in the fold-case tree,
|
|
8204 * they will appear in the keep-case tree.
|
|
8205 * Only use the word when the region matches. */
|
|
8206 flags = (int)idxs[n];
|
|
8207 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
|
|
8208 && ((flags & WF_REGION) == 0
|
|
8209 || (((unsigned)flags >> 8)
|
|
8210 & lp->lp_region) != 0))
|
|
8211 {
|
|
8212 word[depth] = NUL;
|
355
|
8213
|
|
8214 /* Dump the basic word if there is no prefix or
|
|
8215 * when it's the first one. */
|
|
8216 c = (unsigned)flags >> 16;
|
|
8217 if (c == 0 || curi[depth] == 2)
|
|
8218 dump_word(word, round, flags, lnum++);
|
351
|
8219
|
|
8220 /* Apply the prefix, if there is one. */
|
355
|
8221 if (c != 0)
|
351
|
8222 lnum = apply_prefixes(slang, word, round,
|
|
8223 flags, lnum);
|
|
8224 }
|
|
8225 }
|
|
8226 else
|
|
8227 {
|
|
8228 /* Normal char, go one level deeper. */
|
|
8229 word[depth++] = c;
|
|
8230 arridx[depth] = idxs[n];
|
|
8231 curi[depth] = 1;
|
|
8232 }
|
|
8233 }
|
|
8234 }
|
|
8235 }
|
|
8236 }
|
|
8237
|
|
8238 /* Delete the empty line that we started with. */
|
|
8239 if (curbuf->b_ml.ml_line_count > 1)
|
|
8240 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
|
|
8241
|
|
8242 redraw_later(NOT_VALID);
|
|
8243 }
|
|
8244
|
|
8245 /*
|
|
8246 * Dump one word: apply case modifications and append a line to the buffer.
|
|
8247 */
|
|
8248 static void
|
|
8249 dump_word(word, round, flags, lnum)
|
|
8250 char_u *word;
|
|
8251 int round;
|
|
8252 int flags;
|
|
8253 linenr_T lnum;
|
|
8254 {
|
|
8255 int keepcap = FALSE;
|
|
8256 char_u *p;
|
|
8257 char_u cword[MAXWLEN];
|
|
8258 char_u badword[MAXWLEN + 3];
|
|
8259
|
|
8260 if (round == 1 && (flags & WF_CAPMASK) != 0)
|
|
8261 {
|
|
8262 /* Need to fix case according to "flags". */
|
|
8263 make_case_word(word, cword, flags);
|
|
8264 p = cword;
|
|
8265 }
|
|
8266 else
|
|
8267 {
|
|
8268 p = word;
|
|
8269 if (round == 2 && (captype(word, NULL) & WF_KEEPCAP) == 0)
|
|
8270 keepcap = TRUE;
|
|
8271 }
|
|
8272
|
|
8273 /* Bad word is preceded by "/!" and some other
|
|
8274 * flags. */
|
|
8275 if ((flags & (WF_BANNED | WF_RARE)) || keepcap)
|
|
8276 {
|
|
8277 STRCPY(badword, "/");
|
|
8278 if (keepcap)
|
|
8279 STRCAT(badword, "=");
|
|
8280 if (flags & WF_BANNED)
|
|
8281 STRCAT(badword, "!");
|
|
8282 else if (flags & WF_RARE)
|
|
8283 STRCAT(badword, "?");
|
|
8284 STRCAT(badword, p);
|
|
8285 p = badword;
|
|
8286 }
|
|
8287
|
|
8288 ml_append(lnum, p, (colnr_T)0, FALSE);
|
|
8289 }
|
|
8290
|
|
8291 /*
|
|
8292 * Find matching prefixes for "word". Prepend each to "word" and append
|
|
8293 * a line to the buffer.
|
|
8294 * Return the updated line number.
|
|
8295 */
|
|
8296 static linenr_T
|
|
8297 apply_prefixes(slang, word, round, flags, startlnum)
|
|
8298 slang_T *slang;
|
|
8299 char_u *word; /* case-folded word */
|
|
8300 int round;
|
|
8301 int flags; /* flags with prefix ID */
|
|
8302 linenr_T startlnum;
|
|
8303 {
|
|
8304 idx_T arridx[MAXWLEN];
|
|
8305 int curi[MAXWLEN];
|
|
8306 char_u prefix[MAXWLEN];
|
|
8307 int c;
|
|
8308 char_u *byts;
|
|
8309 idx_T *idxs;
|
|
8310 linenr_T lnum = startlnum;
|
|
8311 int depth;
|
|
8312 int n;
|
|
8313 int len;
|
|
8314 int prefid = (unsigned)flags >> 16;
|
|
8315 int i;
|
|
8316
|
|
8317 byts = slang->sl_pbyts;
|
|
8318 idxs = slang->sl_pidxs;
|
|
8319 if (byts != NULL) /* array not is empty */
|
|
8320 {
|
|
8321 /*
|
|
8322 * Loop over all prefixes, building them byte-by-byte in prefix[].
|
|
8323 * When at the end of a prefix check that it supports "prefid".
|
|
8324 */
|
|
8325 depth = 0;
|
|
8326 arridx[0] = 0;
|
|
8327 curi[0] = 1;
|
|
8328 while (depth >= 0 && !got_int)
|
|
8329 {
|
|
8330 len = arridx[depth];
|
|
8331 if (curi[depth] > byts[len])
|
|
8332 {
|
|
8333 /* Done all bytes at this node, go up one level. */
|
|
8334 --depth;
|
|
8335 line_breakcheck();
|
|
8336 }
|
|
8337 else
|
|
8338 {
|
|
8339 /* Do one more byte at this node. */
|
|
8340 n = len + curi[depth];
|
|
8341 ++curi[depth];
|
|
8342 c = byts[n];
|
|
8343 if (c == 0)
|
|
8344 {
|
|
8345 /* End of prefix, find out how many IDs there are. */
|
|
8346 for (i = 1; i < len; ++i)
|
|
8347 if (byts[n + i] != 0)
|
|
8348 break;
|
|
8349 curi[depth] += i - 1;
|
|
8350
|
366
|
8351 i = valid_word_prefix(i, n, prefid, word, slang);
|
|
8352 if (i != 0)
|
351
|
8353 {
|
|
8354 vim_strncpy(prefix + depth, word, MAXWLEN - depth);
|
366
|
8355 dump_word(prefix, round,
|
|
8356 (i & WF_RAREPFX) ? (flags | WF_RARE)
|
|
8357 : flags, lnum++);
|
351
|
8358 }
|
|
8359 }
|
|
8360 else
|
|
8361 {
|
|
8362 /* Normal char, go one level deeper. */
|
|
8363 prefix[depth++] = c;
|
|
8364 arridx[depth] = idxs[n];
|
|
8365 curi[depth] = 1;
|
|
8366 }
|
|
8367 }
|
|
8368 }
|
|
8369 }
|
|
8370
|
|
8371 return lnum;
|
|
8372 }
|
|
8373
|
236
|
8374 #endif /* FEAT_SYN_HL */
|