223
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * spell.c: code for spell checking
|
226
|
12 *
|
300
|
13 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
|
|
14 * has a list of bytes that can appear (siblings). For each byte there is a
|
|
15 * pointer to the node with the byte that follows in the word (child).
|
324
|
16 *
|
|
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
|
|
18 * binary searching can be used and the NUL bytes are at the start. The
|
|
19 * number of possible bytes is stored before the list of bytes.
|
|
20 *
|
|
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
|
|
22 * either the next index or flags. The tree starts at index 0. For example,
|
|
23 * to lookup "vi" this sequence is followed:
|
|
24 * i = 0
|
|
25 * len = byts[i]
|
|
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
|
|
27 * i = idxs[n]
|
|
28 * len = byts[i]
|
|
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
|
|
30 * i = idxs[n]
|
|
31 * len = byts[i]
|
|
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
|
300
|
33 *
|
|
34 * There are two trees: one with case-folded words and one with words in
|
|
35 * original case. The second one is only used for keep-case words and is
|
|
36 * usually small.
|
|
37 *
|
|
38 * Thanks to Olaf Seibert for providing an example implementation of this tree
|
|
39 * and the compression mechanism.
|
243
|
40 *
|
|
41 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
|
|
42 *
|
236
|
43 * Why doesn't Vim use aspell/ispell/myspell/etc.?
|
|
44 * See ":help develop-spell".
|
|
45 */
|
|
46
|
300
|
47 /*
|
323
|
48 * Use this to let the score depend in how much a suggestion sounds like the
|
324
|
49 * bad word. It's quite slow and only occasionally makes the sorting better.
|
|
50 #define SOUNDFOLD_SCORE
|
|
51 */
|
|
52
|
|
53 /*
|
|
54 * Use this to adjust the score after finding suggestions, based on the
|
|
55 * suggested word sounding like the bad word. This is much faster than doing
|
|
56 * it for every possible suggestion.
|
|
57 * Disadvantage: When "the" is typed as "hte" it sounds different and goes
|
|
58 * down in the list.
|
|
59 #define RESCORE(word_score, sound_score) ((2 * word_score + sound_score) / 3)
|
323
|
60 */
|
|
61
|
|
62 /*
|
300
|
63 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
|
|
64 *
|
|
65 * <HEADER>: <fileID> <regioncnt> <regionname> ...
|
|
66 * <charflagslen> <charflags> <fcharslen> <fchars>
|
|
67 *
|
323
|
68 * <fileID> 10 bytes "VIMspell06"
|
300
|
69 * <regioncnt> 1 byte number of regions following (8 supported)
|
307
|
70 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
|
300
|
71 * First <regionname> is region 1.
|
|
72 *
|
|
73 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
|
|
74 * <charflags> N bytes List of flags (first one is for character 128):
|
324
|
75 * 0x01 word character CF_WORD
|
|
76 * 0x02 upper-case character CF_UPPER
|
300
|
77 * <fcharslen> 2 bytes Number of bytes in <fchars>.
|
|
78 * <fchars> N bytes Folded characters, first one is for character 128.
|
|
79 *
|
|
80 *
|
323
|
81 * <SUGGEST> : <repcount> <rep> ...
|
|
82 * <salflags> <salcount> <sal> ...
|
|
83 * <maplen> <mapstr>
|
|
84 *
|
|
85 * <repcount> 2 bytes number of <rep> items, MSB first.
|
|
86 *
|
|
87 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
|
|
88 *
|
|
89 * <repfromlen> 1 byte length of <repfrom>
|
|
90 *
|
|
91 * <repfrom> N bytes "from" part of replacement
|
|
92 *
|
|
93 * <reptolen> 1 byte length of <repto>
|
|
94 *
|
|
95 * <repto> N bytes "to" part of replacement
|
300
|
96 *
|
323
|
97 * <salflags> 1 byte flags for soundsalike conversion:
|
|
98 * SAL_F0LLOWUP
|
|
99 * SAL_COLLAPSE
|
|
100 * SAL_REM_ACCENTS
|
|
101 *
|
|
102 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
|
|
103 *
|
|
104 * <salfromlen> 1 byte length of <salfrom>
|
|
105 *
|
|
106 * <salfrom> N bytes "from" part of soundsalike
|
|
107 *
|
|
108 * <saltolen> 1 byte length of <salto>
|
|
109 *
|
|
110 * <salto> N bytes "to" part of soundsalike
|
|
111 *
|
|
112 * <maplen> 2 bytes length of <mapstr>, MSB first
|
|
113 *
|
|
114 * <mapstr> N bytes String with sequences of similar characters,
|
|
115 * separated by slashes.
|
300
|
116 *
|
|
117 *
|
|
118 * <LWORDTREE>: <wordtree>
|
|
119 *
|
|
120 * <wordtree>: <nodecount> <nodedata> ...
|
|
121 *
|
|
122 * <nodecount> 4 bytes Number of nodes following. MSB first.
|
|
123 *
|
|
124 * <nodedata>: <siblingcount> <sibling> ...
|
|
125 *
|
|
126 * <siblingcount> 1 byte Number of siblings in this node. The siblings
|
|
127 * follow in sorted order.
|
|
128 *
|
|
129 * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
|
|
130 *
|
|
131 * <byte> 1 byte Byte value of the sibling. Special cases:
|
|
132 * BY_NOFLAGS: End of word without flags and for all
|
|
133 * regions.
|
|
134 * BY_FLAGS: End of word, <flags> follow.
|
|
135 * BY_INDEX: Child of sibling is shared, <nodeidx>
|
|
136 * and <xbyte> follow.
|
|
137 *
|
|
138 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
|
|
139 *
|
|
140 * <xbyte> 1 byte byte value of the sibling.
|
|
141 *
|
|
142 * <flags> 1 byte bitmask of:
|
|
143 * WF_ALLCAP word must have only capitals
|
|
144 * WF_ONECAP first char of word must be capital
|
|
145 * WF_RARE rare word
|
|
146 * WF_REGION <region> follows
|
|
147 *
|
|
148 * <region> 1 byte Bitmask for regions in which word is valid. When
|
|
149 * omitted it's valid in all regions.
|
|
150 * Lowest bit is for region 1.
|
|
151 *
|
|
152 * <KWORDTREE>: <wordtree>
|
|
153 *
|
|
154 * All text characters are in 'encoding', but stored as single bytes.
|
|
155 */
|
|
156
|
223
|
157 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
|
|
158 # include <io.h> /* for lseek(), must be before vim.h */
|
|
159 #endif
|
|
160
|
|
161 #include "vim.h"
|
|
162
|
|
163 #if defined(FEAT_SYN_HL) || defined(PROTO)
|
|
164
|
|
165 #ifdef HAVE_FCNTL_H
|
|
166 # include <fcntl.h>
|
|
167 #endif
|
|
168
|
323
|
169 #define MAXWLEN 250 /* Assume max. word len is this many bytes.
|
|
170 Some places assume a word length fits in a
|
|
171 byte, thus it can't be above 255. */
|
226
|
172
|
324
|
173 /* Type used for indexes in the word tree need to be at least 3 bytes. If int
|
|
174 * is 8 bytes we could use something smaller, but what? */
|
|
175 #if SIZEOF_INT > 2
|
|
176 typedef int idx_T;
|
|
177 #else
|
|
178 typedef long idx_T;
|
|
179 #endif
|
|
180
|
|
181 /* Flags used for a word. Only the lowest byte can be used, the region byte
|
|
182 * comes above it. */
|
300
|
183 #define WF_REGION 0x01 /* region byte follows */
|
|
184 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
|
|
185 #define WF_ALLCAP 0x04 /* word must be all capitals */
|
|
186 #define WF_RARE 0x08 /* rare word */
|
307
|
187 #define WF_BANNED 0x10 /* bad word */
|
323
|
188 #define WF_KEEPCAP 0x80 /* keep-case word */
|
|
189
|
|
190 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
|
300
|
191
|
324
|
192 #define WF_USED 0x10000 /* Word was found in text. Must be in separate
|
|
193 byte before region and flags. */
|
|
194
|
300
|
195 #define BY_NOFLAGS 0 /* end of word without flags or region */
|
|
196 #define BY_FLAGS 1 /* end of word, flag byte follows */
|
|
197 #define BY_INDEX 2 /* child is shared, index follows */
|
|
198 #define BY_SPECIAL BY_INDEX /* hightest special byte value */
|
236
|
199
|
323
|
200 /* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep,
|
|
201 * si_sal and sl_sal.
|
|
202 * One replacement: from "ft_from" to "ft_to". */
|
|
203 typedef struct fromto_S
|
236
|
204 {
|
323
|
205 char_u *ft_from;
|
|
206 char_u *ft_to;
|
|
207 } fromto_T;
|
236
|
208
|
|
209 /*
|
243
|
210 * Structure used to store words and other info for one language, loaded from
|
|
211 * a .spl file.
|
300
|
212 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
|
|
213 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
|
|
214 *
|
|
215 * The "byts" array stores the possible bytes in each tree node, preceded by
|
|
216 * the number of possible bytes, sorted on byte value:
|
|
217 * <len> <byte1> <byte2> ...
|
|
218 * The "idxs" array stores the index of the child node corresponding to the
|
|
219 * byte in "byts".
|
|
220 * Exception: when the byte is zero, the word may end here and "idxs" holds
|
|
221 * the flags and region for the word. There may be several zeros in sequence
|
|
222 * for alternative flag/region combinations.
|
236
|
223 */
|
|
224 typedef struct slang_S slang_T;
|
|
225 struct slang_S
|
|
226 {
|
|
227 slang_T *sl_next; /* next language */
|
|
228 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
|
310
|
229 char_u *sl_fname; /* name of .spl file */
|
323
|
230 int sl_add; /* TRUE if it's a .add file. */
|
300
|
231 char_u *sl_fbyts; /* case-folded word bytes */
|
324
|
232 idx_T *sl_fidxs; /* case-folded word indexes */
|
300
|
233 char_u *sl_kbyts; /* keep-case word bytes */
|
324
|
234 idx_T *sl_kidxs; /* keep-case word indexes */
|
236
|
235 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
|
323
|
236
|
|
237 garray_T sl_rep; /* list of fromto_T entries from REP lines */
|
|
238 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
|
|
239 there is none */
|
|
240 garray_T sl_sal; /* list of fromto_T entries from SAL lines */
|
|
241 short sl_sal_first[256]; /* indexes where byte first appears, -1 if
|
|
242 there is none */
|
|
243 int sl_followup; /* SAL followup */
|
|
244 int sl_collapse; /* SAL collapse_result */
|
|
245 int sl_rem_accents; /* SAL remove_accents */
|
|
246 char_u *sl_map; /* string with similar chars from MAP lines */
|
236
|
247 };
|
|
248
|
243
|
249 /* First language that is loaded, start of the linked list of loaded
|
|
250 * languages. */
|
236
|
251 static slang_T *first_lang = NULL;
|
|
252
|
323
|
253 /* Flags used in .spl file for soundsalike flags. */
|
|
254 #define SAL_F0LLOWUP 1
|
|
255 #define SAL_COLLAPSE 2
|
|
256 #define SAL_REM_ACCENTS 4
|
|
257
|
236
|
258 /*
|
|
259 * Structure used in "b_langp", filled from 'spelllang'.
|
|
260 */
|
|
261 typedef struct langp_S
|
|
262 {
|
|
263 slang_T *lp_slang; /* info for this language (NULL for last one) */
|
|
264 int lp_region; /* bitmask for region or REGION_ALL */
|
|
265 } langp_T;
|
|
266
|
|
267 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
|
|
268
|
307
|
269 #define REGION_ALL 0xff /* word valid in all regions */
|
|
270
|
|
271 /* Result values. Lower number is accepted over higher one. */
|
|
272 #define SP_BANNED -1
|
236
|
273 #define SP_OK 0
|
307
|
274 #define SP_RARE 1
|
|
275 #define SP_LOCAL 2
|
|
276 #define SP_BAD 3
|
236
|
277
|
323
|
278 #define VIMSPELLMAGIC "VIMspell06" /* string at start of Vim spell file */
|
236
|
279 #define VIMSPELLMAGICL 10
|
|
280
|
|
281 /*
|
323
|
282 * Information used when looking for suggestions.
|
|
283 */
|
|
284 typedef struct suginfo_S
|
|
285 {
|
|
286 garray_T su_ga; /* suggestions, contains "suggest_T" */
|
|
287 int su_maxscore; /* maximum score for adding to su_ga */
|
|
288 int su_icase; /* accept words with wrong case */
|
|
289 int su_icase_add; /* add matches while ignoring case */
|
|
290 char_u *su_badptr; /* start of bad word in line */
|
|
291 int su_badlen; /* length of detected bad word in line */
|
|
292 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
|
|
293 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
|
|
294 hashtab_T su_banned; /* table with banned words */
|
|
295 #ifdef SOUNDFOLD_SCORE
|
|
296 slang_T *su_slang; /* currently used slang_T */
|
|
297 char_u su_salword[MAXWLEN]; /* soundfolded badword */
|
|
298 #endif
|
|
299 } suginfo_T;
|
|
300
|
|
301 /* One word suggestion. Used in "si_ga". */
|
|
302 typedef struct suggest_S
|
|
303 {
|
|
304 char_u *st_word; /* suggested word, allocated string */
|
|
305 int st_orglen; /* length of replaced text */
|
|
306 int st_score; /* lower is better */
|
324
|
307 #ifdef RESCORE
|
|
308 int st_had_bonus; /* bonus already included in score */
|
|
309 #endif
|
323
|
310 } suggest_T;
|
|
311
|
|
312 #define SUG(sup, i) (((suggest_T *)(sup)->su_ga.ga_data)[i])
|
|
313
|
|
314 /* Number of suggestions displayed. */
|
|
315 #define SUG_PROMPT_COUNT ((int)Rows - 2)
|
|
316
|
324
|
317 /* Number of suggestions kept when cleaning up. When rescore_suggestions() is
|
|
318 * called the score may change, thus we need to keep more than what is
|
|
319 * displayed. */
|
|
320 #define SUG_CLEAN_COUNT (SUG_PROMPT_COUNT < 25 ? 25 : SUG_PROMPT_COUNT)
|
|
321
|
|
322 /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
|
|
323 * of suggestions that are not going to be displayed. */
|
|
324 #define SUG_MAX_COUNT (SUG_PROMPT_COUNT + 50)
|
323
|
325
|
|
326 /* score for various changes */
|
|
327 #define SCORE_SPLIT 99 /* split bad word */
|
|
328 #define SCORE_ICASE 52 /* slightly different case */
|
|
329 #define SCORE_ALLCAP 120 /* need all-cap case */
|
|
330 #define SCORE_REGION 70 /* word is for different region */
|
|
331 #define SCORE_RARE 180 /* rare word */
|
324
|
332 #define SCORE_NOTUSED 11 /* word not found in text yet */
|
323
|
333
|
|
334 /* score for edit distance */
|
|
335 #define SCORE_SWAP 90 /* swap two characters */
|
|
336 #define SCORE_SWAP3 110 /* swap two characters in three */
|
|
337 #define SCORE_REP 87 /* REP replacement */
|
|
338 #define SCORE_SUBST 93 /* substitute a character */
|
|
339 #define SCORE_SIMILAR 33 /* substitute a similar character */
|
324
|
340 #define SCORE_DEL 94 /* delete a character */
|
|
341 #define SCORE_INS 96 /* insert a character */
|
323
|
342
|
|
343 #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
|
|
344 * 350 allows for about three changes. */
|
|
345 #define SCORE_MAXMAX 999999 /* accept any score */
|
|
346
|
|
347 /*
|
236
|
348 * Structure to store info for word matching.
|
|
349 */
|
|
350 typedef struct matchinf_S
|
|
351 {
|
|
352 langp_T *mi_lp; /* info for language and region */
|
243
|
353
|
|
354 /* pointers to original text to be checked */
|
236
|
355 char_u *mi_word; /* start of word being checked */
|
300
|
356 char_u *mi_end; /* end of matching word */
|
243
|
357 char_u *mi_fend; /* next char to be added to mi_fword */
|
300
|
358 char_u *mi_cend; /* char after what was used for
|
|
359 mi_capflags */
|
243
|
360
|
|
361 /* case-folded text */
|
|
362 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
|
300
|
363 int mi_fwordlen; /* nr of valid bytes in mi_fword */
|
243
|
364
|
|
365 /* others */
|
236
|
366 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
|
300
|
367 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
|
236
|
368 } matchinf_T;
|
|
369
|
307
|
370 /*
|
|
371 * The tables used for recognizing word characters according to spelling.
|
|
372 * These are only used for the first 256 characters of 'encoding'.
|
|
373 */
|
|
374 typedef struct spelltab_S
|
|
375 {
|
|
376 char_u st_isw[256]; /* flags: is word char */
|
|
377 char_u st_isu[256]; /* flags: is uppercase char */
|
|
378 char_u st_fold[256]; /* chars: folded case */
|
324
|
379 char_u st_upper[256]; /* chars: upper case */
|
307
|
380 } spelltab_T;
|
|
381
|
|
382 static spelltab_T spelltab;
|
|
383 static int did_set_spelltab;
|
|
384
|
324
|
385 #define CF_WORD 0x01
|
|
386 #define CF_UPPER 0x02
|
307
|
387
|
|
388 static void clear_spell_chartab __ARGS((spelltab_T *sp));
|
|
389 static int set_spell_finish __ARGS((spelltab_T *new_st));
|
|
390
|
|
391 /*
|
|
392 * Return TRUE if "p" points to a word character or "c" is a word character
|
|
393 * for spelling.
|
|
394 * Checking for a word character is done very often, avoid the function call
|
|
395 * overhead.
|
|
396 */
|
|
397 #ifdef FEAT_MBYTE
|
|
398 # define SPELL_ISWORDP(p) ((has_mbyte && MB_BYTE2LEN(*(p)) > 1) \
|
|
399 ? (mb_get_class(p) >= 2) : spelltab.st_isw[*(p)])
|
|
400 #else
|
|
401 # define SPELL_ISWORDP(p) (spelltab.st_isw[*(p)])
|
|
402 #endif
|
|
403
|
323
|
404 /*
|
|
405 * Struct to keep the state at each level in spell_try_change().
|
|
406 */
|
|
407 typedef struct trystate_S
|
|
408 {
|
|
409 int ts_state; /* state at this level, STATE_ */
|
|
410 int ts_score; /* score */
|
|
411 int ts_curi; /* index in list of child nodes */
|
|
412 int ts_fidx; /* index in fword[], case-folded bad word */
|
|
413 int ts_fidxtry; /* ts_fidx at which bytes may be changed */
|
|
414 int ts_twordlen; /* valid length of tword[] */
|
324
|
415 idx_T ts_arridx; /* index in tree array, start of node */
|
323
|
416 char_u ts_save_prewordlen; /* saved "prewordlen" */
|
|
417 int ts_save_splitoff; /* su_splitoff saved here */
|
|
418 int ts_save_badflags; /* badflags saved here */
|
|
419 } trystate_T;
|
|
420
|
236
|
421 static slang_T *slang_alloc __ARGS((char_u *lang));
|
|
422 static void slang_free __ARGS((slang_T *lp));
|
310
|
423 static void slang_clear __ARGS((slang_T *lp));
|
300
|
424 static void find_word __ARGS((matchinf_T *mip, int keepcap));
|
323
|
425 static int spell_valid_case __ARGS((int origflags, int treeflags));
|
307
|
426 static void spell_load_lang __ARGS((char_u *lang));
|
310
|
427 static char_u *spell_enc __ARGS((void));
|
|
428 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
|
323
|
429 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
|
324
|
430 static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx));
|
236
|
431 static int find_region __ARGS((char_u *rp, char_u *region));
|
|
432 static int captype __ARGS((char_u *word, char_u *end));
|
323
|
433 static void spell_reload_one __ARGS((char_u *fname, int added_word));
|
307
|
434 static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
|
|
435 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
|
|
436 static void write_spell_chartab __ARGS((FILE *fd));
|
|
437 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
|
324
|
438 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
|
323
|
439 static void spell_try_change __ARGS((suginfo_T *su));
|
|
440 static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
|
|
441 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
|
|
442 static void spell_try_soundalike __ARGS((suginfo_T *su));
|
|
443 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
|
324
|
444 #if 0
|
323
|
445 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
|
324
|
446 #endif
|
|
447 #ifdef RESCORE
|
|
448 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus));
|
|
449 #else
|
323
|
450 static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score));
|
324
|
451 #endif
|
323
|
452 static void add_banned __ARGS((suginfo_T *su, char_u *word));
|
|
453 static int was_banned __ARGS((suginfo_T *su, char_u *word));
|
|
454 static void free_banned __ARGS((suginfo_T *su));
|
324
|
455 #ifdef RESCORE
|
|
456 static void rescore_suggestions __ARGS((suginfo_T *su));
|
|
457 #endif
|
|
458 static void cleanup_suggestions __ARGS((suginfo_T *su, int keep));
|
323
|
459 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
|
324
|
460 #if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
|
|
461 static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u *badsound));
|
|
462 #endif
|
323
|
463 static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
|
|
464
|
324
|
465 /*
|
|
466 * Use our own character-case definitions, because the current locale may
|
|
467 * differ from what the .spl file uses.
|
|
468 * These must not be called with negative number!
|
|
469 */
|
|
470 #ifndef FEAT_MBYTE
|
|
471 /* Non-multi-byte implementation. */
|
|
472 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
|
|
473 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
|
|
474 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
|
|
475 #else
|
|
476 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
|
|
477 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
|
|
478 * the "w" library function for characters above 255 if available. */
|
|
479 # ifdef HAVE_TOWLOWER
|
|
480 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
|
|
481 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
|
|
482 # else
|
|
483 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
|
|
484 : (c) < 256 ? spelltab.st_fold[c] : (c))
|
|
485 # endif
|
|
486
|
|
487 # ifdef HAVE_TOWUPPER
|
|
488 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
|
|
489 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
|
|
490 # else
|
|
491 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
|
|
492 : (c) < 256 ? spelltab.st_upper[c] : (c))
|
|
493 # endif
|
|
494
|
|
495 # ifdef HAVE_ISWUPPER
|
|
496 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
|
|
497 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
|
|
498 # else
|
|
499 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
|
|
500 : (c) < 256 ? spelltab.st_isu[c] : (c))
|
|
501 # endif
|
|
502 #endif
|
|
503
|
307
|
504
|
|
505 static char *e_format = N_("E759: Format error in spell file");
|
236
|
506
|
|
507 /*
|
|
508 * Main spell-checking function.
|
300
|
509 * "ptr" points to a character that could be the start of a word.
|
236
|
510 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
|
|
511 * or when it's OK it remains unchanged.
|
|
512 * This must only be called when 'spelllang' is not empty.
|
323
|
513 *
|
|
514 * "sug" is normally NULL. When looking for suggestions it points to
|
|
515 * suginfo_T. It's passed as a void pointer to keep the struct local.
|
|
516 *
|
236
|
517 * Returns the length of the word in bytes, also when it's OK, so that the
|
|
518 * caller can skip over the word.
|
|
519 */
|
|
520 int
|
300
|
521 spell_check(wp, ptr, attrp)
|
236
|
522 win_T *wp; /* current window */
|
|
523 char_u *ptr;
|
|
524 int *attrp;
|
|
525 {
|
|
526 matchinf_T mi; /* Most things are put in "mi" so that it can
|
|
527 be passed to functions quickly. */
|
|
528
|
307
|
529 /* A word never starts at a space or a control character. Return quickly
|
|
530 * then, skipping over the character. */
|
|
531 if (*ptr <= ' ')
|
|
532 return 1;
|
236
|
533
|
300
|
534 /* A word starting with a number is always OK. Also skip hexadecimal
|
|
535 * numbers 0xFF99 and 0X99FF. */
|
|
536 if (*ptr >= '0' && *ptr <= '9')
|
|
537 {
|
316
|
538 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
|
|
539 mi.mi_end = skiphex(ptr + 2);
|
300
|
540 else
|
|
541 mi.mi_end = skipdigits(ptr);
|
|
542 }
|
|
543 else
|
236
|
544 {
|
307
|
545 /* Find the end of the word. */
|
|
546 mi.mi_word = ptr;
|
300
|
547 mi.mi_fend = ptr;
|
323
|
548
|
307
|
549 if (SPELL_ISWORDP(mi.mi_fend))
|
300
|
550 {
|
|
551 /* Make case-folded copy of the characters until the next non-word
|
|
552 * character. */
|
|
553 do
|
|
554 {
|
|
555 mb_ptr_adv(mi.mi_fend);
|
307
|
556 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend));
|
300
|
557 }
|
307
|
558
|
|
559 /* We always use the characters up to the next non-word character,
|
|
560 * also for bad words. */
|
|
561 mi.mi_end = mi.mi_fend;
|
323
|
562
|
|
563 /* Check caps type later. */
|
|
564 mi.mi_capflags = 0;
|
|
565 mi.mi_cend = NULL;
|
300
|
566
|
307
|
567 /* Include one non-word character so that we can check for the
|
|
568 * word end. */
|
|
569 if (*mi.mi_fend != NUL)
|
|
570 mb_ptr_adv(mi.mi_fend);
|
|
571
|
|
572 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
|
|
573 MAXWLEN + 1);
|
|
574 mi.mi_fwordlen = STRLEN(mi.mi_fword);
|
|
575
|
300
|
576 /* The word is bad unless we recognize it. */
|
|
577 mi.mi_result = SP_BAD;
|
236
|
578
|
300
|
579 /*
|
|
580 * Loop over the languages specified in 'spelllang'.
|
|
581 * We check them all, because a matching word may be longer than an
|
|
582 * already found matching word.
|
|
583 */
|
|
584 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
|
|
585 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
|
243
|
586 {
|
300
|
587 /* Check for a matching word in case-folded words. */
|
|
588 find_word(&mi, FALSE);
|
|
589
|
324
|
590 /* Check for a matching word in keep-case words. */
|
300
|
591 find_word(&mi, TRUE);
|
|
592 }
|
243
|
593
|
300
|
594 if (mi.mi_result != SP_OK)
|
|
595 {
|
|
596 /* When we are at a non-word character there is no error, just
|
|
597 * skip over the character (try looking for a word after it). */
|
307
|
598 if (!SPELL_ISWORDP(ptr))
|
243
|
599 {
|
300
|
600 #ifdef FEAT_MBYTE
|
|
601 if (has_mbyte)
|
|
602 return mb_ptr2len_check(ptr);
|
|
603 #endif
|
|
604 return 1;
|
243
|
605 }
|
|
606
|
307
|
607 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
|
300
|
608 *attrp = highlight_attr[HLF_SPB];
|
|
609 else if (mi.mi_result == SP_RARE)
|
|
610 *attrp = highlight_attr[HLF_SPR];
|
|
611 else
|
|
612 *attrp = highlight_attr[HLF_SPL];
|
243
|
613 }
|
|
614 }
|
|
615
|
300
|
616 return (int)(mi.mi_end - ptr);
|
236
|
617 }
|
|
618
|
|
619 /*
|
300
|
620 * Check if the word at "mip->mi_word" is in the tree.
|
|
621 * When "keepcap" is TRUE check in keep-case word tree.
|
|
622 *
|
|
623 * For a match mip->mi_result is updated.
|
243
|
624 */
|
|
625 static void
|
300
|
626 find_word(mip, keepcap)
|
243
|
627 matchinf_T *mip;
|
300
|
628 int keepcap;
|
243
|
629 {
|
324
|
630 idx_T arridx = 0;
|
300
|
631 int endlen[MAXWLEN]; /* length at possible word endings */
|
324
|
632 idx_T endidx[MAXWLEN]; /* possible word endings */
|
300
|
633 int endidxcnt = 0;
|
|
634 int len;
|
|
635 int wlen = 0;
|
|
636 int flen;
|
|
637 int c;
|
|
638 char_u *ptr;
|
324
|
639 idx_T lo, hi, m;
|
243
|
640 #ifdef FEAT_MBYTE
|
300
|
641 char_u *s;
|
307
|
642 #endif
|
300
|
643 char_u *p;
|
307
|
644 int res = SP_BAD;
|
|
645 int valid;
|
300
|
646 slang_T *slang = mip->mi_lp->lp_slang;
|
|
647 unsigned flags;
|
|
648 char_u *byts;
|
324
|
649 idx_T *idxs;
|
243
|
650
|
300
|
651 if (keepcap)
|
236
|
652 {
|
300
|
653 /* Check for word with matching case in keep-case tree. */
|
|
654 ptr = mip->mi_word;
|
|
655 flen = 9999; /* no case folding, always enough bytes */
|
|
656 byts = slang->sl_kbyts;
|
|
657 idxs = slang->sl_kidxs;
|
236
|
658 }
|
|
659 else
|
|
660 {
|
300
|
661 /* Check for case-folded in case-folded tree. */
|
|
662 ptr = mip->mi_fword;
|
|
663 flen = mip->mi_fwordlen; /* available case-folded bytes */
|
|
664 byts = slang->sl_fbyts;
|
|
665 idxs = slang->sl_fidxs;
|
243
|
666 }
|
|
667
|
300
|
668 if (byts == NULL)
|
|
669 return; /* array is empty */
|
236
|
670
|
|
671 /*
|
307
|
672 * Repeat advancing in the tree until:
|
|
673 * - there is a byte that doesn't match,
|
|
674 * - we reach the end of the tree,
|
|
675 * - or we reach the end of the line.
|
236
|
676 */
|
300
|
677 for (;;)
|
236
|
678 {
|
300
|
679 if (flen == 0 && *mip->mi_fend != NUL)
|
236
|
680 {
|
300
|
681 /* Need to fold at least one more character. Do until next
|
|
682 * non-word character for efficiency. */
|
307
|
683 p = mip->mi_fend;
|
300
|
684 do
|
236
|
685 {
|
307
|
686 mb_ptr_adv(mip->mi_fend);
|
|
687 } while (*mip->mi_fend != NUL && SPELL_ISWORDP(mip->mi_fend));
|
|
688
|
|
689 /* Include the non-word character so that we can check for the
|
|
690 * word end. */
|
|
691 if (*mip->mi_fend != NUL)
|
|
692 mb_ptr_adv(mip->mi_fend);
|
|
693
|
|
694 (void)spell_casefold(p, (int)(mip->mi_fend - p),
|
300
|
695 mip->mi_fword + mip->mi_fwordlen,
|
|
696 MAXWLEN - mip->mi_fwordlen);
|
|
697 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
|
|
698 mip->mi_fwordlen += flen;
|
|
699 }
|
|
700
|
|
701 len = byts[arridx++];
|
|
702
|
|
703 /* If the first possible byte is a zero the word could end here.
|
|
704 * Remember this index, we first check for the longest word. */
|
|
705 if (byts[arridx] == 0)
|
|
706 {
|
307
|
707 if (endidxcnt == MAXWLEN)
|
|
708 {
|
|
709 /* Must be a corrupted spell file. */
|
|
710 EMSG(_(e_format));
|
|
711 return;
|
|
712 }
|
300
|
713 endlen[endidxcnt] = wlen;
|
|
714 endidx[endidxcnt++] = arridx++;
|
|
715 --len;
|
|
716
|
|
717 /* Skip over the zeros, there can be several flag/region
|
|
718 * combinations. */
|
|
719 while (len > 0 && byts[arridx] == 0)
|
|
720 {
|
|
721 ++arridx;
|
|
722 --len;
|
|
723 }
|
|
724 if (len == 0)
|
|
725 break; /* no children, word must end here */
|
|
726 }
|
|
727
|
|
728 /* Stop looking at end of the line. */
|
|
729 if (ptr[wlen] == NUL)
|
|
730 break;
|
|
731
|
|
732 /* Perform a binary search in the list of accepted bytes. */
|
|
733 c = ptr[wlen];
|
|
734 lo = arridx;
|
|
735 hi = arridx + len - 1;
|
|
736 while (lo < hi)
|
|
737 {
|
|
738 m = (lo + hi) / 2;
|
|
739 if (byts[m] > c)
|
|
740 hi = m - 1;
|
|
741 else if (byts[m] < c)
|
|
742 lo = m + 1;
|
|
743 else
|
|
744 {
|
|
745 lo = hi = m;
|
|
746 break;
|
236
|
747 }
|
|
748 }
|
300
|
749
|
|
750 /* Stop if there is no matching byte. */
|
|
751 if (hi < lo || byts[lo] != c)
|
|
752 break;
|
|
753
|
|
754 /* Continue at the child (if there is one). */
|
|
755 arridx = idxs[lo];
|
|
756 ++wlen;
|
|
757 --flen;
|
236
|
758 }
|
|
759
|
300
|
760 /*
|
|
761 * Verify that one of the possible endings is valid. Try the longest
|
|
762 * first.
|
|
763 */
|
|
764 while (endidxcnt > 0)
|
|
765 {
|
|
766 --endidxcnt;
|
|
767 arridx = endidx[endidxcnt];
|
|
768 wlen = endlen[endidxcnt];
|
236
|
769
|
300
|
770 #ifdef FEAT_MBYTE
|
|
771 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
|
|
772 continue; /* not at first byte of character */
|
|
773 #endif
|
307
|
774 if (SPELL_ISWORDP(ptr + wlen))
|
300
|
775 continue; /* next char is a word character */
|
|
776
|
|
777 #ifdef FEAT_MBYTE
|
|
778 if (!keepcap && has_mbyte)
|
|
779 {
|
|
780 /* Compute byte length in original word, length may change
|
|
781 * when folding case. */
|
|
782 p = mip->mi_word;
|
|
783 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
|
|
784 mb_ptr_adv(p);
|
|
785 wlen = p - mip->mi_word;
|
|
786 }
|
|
787 #endif
|
236
|
788
|
300
|
789 /* Check flags and region. Repeat this if there are more
|
|
790 * flags/region alternatives until there is a match. */
|
|
791 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len)
|
|
792 {
|
|
793 flags = idxs[arridx];
|
324
|
794
|
|
795 /* Set a flag for words that were used. The region and case
|
|
796 * doesn't matter here, it's only used to rate the suggestions. */
|
|
797 idxs[arridx] = flags | WF_USED;
|
|
798
|
300
|
799 if (keepcap)
|
|
800 {
|
|
801 /* For "keepcap" tree the case is always right. */
|
|
802 valid = TRUE;
|
|
803 }
|
|
804 else
|
|
805 {
|
|
806 /* Check that the word is in the required case. */
|
|
807 if (mip->mi_cend != mip->mi_word + wlen)
|
|
808 {
|
323
|
809 /* mi_capflags was set for a different word length, need
|
|
810 * to do it again. */
|
300
|
811 mip->mi_cend = mip->mi_word + wlen;
|
323
|
812 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
|
300
|
813 }
|
|
814
|
323
|
815 valid = spell_valid_case(mip->mi_capflags, flags);
|
300
|
816 }
|
236
|
817
|
307
|
818 if (valid)
|
300
|
819 {
|
307
|
820 if (flags & WF_BANNED)
|
|
821 res = SP_BANNED;
|
|
822 else if (flags & WF_REGION)
|
300
|
823 {
|
|
824 /* Check region. */
|
|
825 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
|
|
826 res = SP_OK;
|
|
827 else
|
|
828 res = SP_LOCAL;
|
|
829 }
|
|
830 else if (flags & WF_RARE)
|
|
831 res = SP_RARE;
|
|
832 else
|
|
833 res = SP_OK;
|
307
|
834
|
|
835 /* Always use the longest match and the best result. */
|
|
836 if (mip->mi_result > res)
|
|
837 {
|
|
838 mip->mi_result = res;
|
|
839 mip->mi_end = mip->mi_word + wlen;
|
|
840 }
|
|
841 else if (mip->mi_result == res
|
|
842 && mip->mi_end < mip->mi_word + wlen)
|
|
843 mip->mi_end = mip->mi_word + wlen;
|
|
844
|
|
845 if (res == SP_OK)
|
|
846 break;
|
300
|
847 }
|
307
|
848 else
|
|
849 res = SP_BAD;
|
|
850
|
300
|
851 ++arridx;
|
|
852 }
|
|
853
|
307
|
854 if (res == SP_OK)
|
300
|
855 break;
|
|
856 }
|
236
|
857 }
|
|
858
|
323
|
859 /*
|
|
860 * Check case flags for a word. Return TRUE if the word has the requested
|
|
861 * case.
|
|
862 */
|
|
863 static int
|
|
864 spell_valid_case(origflags, treeflags)
|
|
865 int origflags; /* flags for the checked word. */
|
|
866 int treeflags; /* flags for the word in the spell tree */
|
|
867 {
|
|
868 return (origflags == WF_ALLCAP
|
|
869 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
|
|
870 && ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
|
|
871 }
|
|
872
|
300
|
873
|
236
|
874 /*
|
|
875 * Move to next spell error.
|
323
|
876 * "curline" is TRUE for "z?": find word under/after cursor in the same line.
|
236
|
877 * Return OK if found, FAIL otherwise.
|
|
878 */
|
|
879 int
|
323
|
880 spell_move_to(dir, allwords, curline)
|
236
|
881 int dir; /* FORWARD or BACKWARD */
|
|
882 int allwords; /* TRUE for "[s" and "]s" */
|
323
|
883 int curline;
|
236
|
884 {
|
249
|
885 linenr_T lnum;
|
|
886 pos_T found_pos;
|
236
|
887 char_u *line;
|
|
888 char_u *p;
|
|
889 int attr = 0;
|
|
890 int len;
|
249
|
891 int has_syntax = syntax_present(curbuf);
|
|
892 int col;
|
|
893 int can_spell;
|
236
|
894
|
310
|
895 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
|
236
|
896 {
|
|
897 EMSG(_("E756: Spell checking not enabled"));
|
|
898 return FAIL;
|
|
899 }
|
|
900
|
249
|
901 /*
|
|
902 * Start looking for bad word at the start of the line, because we can't
|
|
903 * start halfway a word, we don't know where it starts or ends.
|
|
904 *
|
|
905 * When searching backwards, we continue in the line to find the last
|
|
906 * bad word (in the cursor line: before the cursor).
|
|
907 */
|
|
908 lnum = curwin->w_cursor.lnum;
|
|
909 found_pos.lnum = 0;
|
236
|
910
|
|
911 while (!got_int)
|
|
912 {
|
249
|
913 line = ml_get(lnum);
|
|
914 p = line;
|
|
915
|
236
|
916 while (*p != NUL)
|
|
917 {
|
300
|
918 /* When searching backward don't search after the cursor. */
|
|
919 if (dir == BACKWARD
|
|
920 && lnum == curwin->w_cursor.lnum
|
|
921 && (colnr_T)(p - line) >= curwin->w_cursor.col)
|
|
922 break;
|
249
|
923
|
300
|
924 /* start of word */
|
|
925 len = spell_check(curwin, p, &attr);
|
249
|
926
|
300
|
927 if (attr != 0)
|
|
928 {
|
|
929 /* We found a bad word. Check the attribute. */
|
|
930 if (allwords || attr == highlight_attr[HLF_SPB])
|
236
|
931 {
|
300
|
932 /* When searching forward only accept a bad word after
|
|
933 * the cursor. */
|
|
934 if (dir == BACKWARD
|
|
935 || lnum > curwin->w_cursor.lnum
|
|
936 || (lnum == curwin->w_cursor.lnum
|
323
|
937 && (colnr_T)(curline ? p - line + len
|
|
938 : p - line)
|
300
|
939 > curwin->w_cursor.col))
|
236
|
940 {
|
300
|
941 if (has_syntax)
|
249
|
942 {
|
300
|
943 col = p - line;
|
|
944 (void)syn_get_id(lnum, (colnr_T)col,
|
|
945 FALSE, &can_spell);
|
249
|
946
|
300
|
947 /* have to get the line again, a multi-line
|
|
948 * regexp may make it invalid */
|
|
949 line = ml_get(lnum);
|
|
950 p = line + col;
|
|
951 }
|
|
952 else
|
|
953 can_spell = TRUE;
|
249
|
954
|
300
|
955 if (can_spell)
|
|
956 {
|
|
957 found_pos.lnum = lnum;
|
|
958 found_pos.col = p - line;
|
249
|
959 #ifdef FEAT_VIRTUALEDIT
|
300
|
960 found_pos.coladd = 0;
|
249
|
961 #endif
|
300
|
962 if (dir == FORWARD)
|
|
963 {
|
|
964 /* No need to search further. */
|
|
965 curwin->w_cursor = found_pos;
|
|
966 return OK;
|
249
|
967 }
|
|
968 }
|
236
|
969 }
|
|
970 }
|
300
|
971 attr = 0;
|
236
|
972 }
|
|
973
|
300
|
974 /* advance to character after the word */
|
|
975 p += len;
|
|
976 if (*p == NUL)
|
|
977 break;
|
236
|
978 }
|
|
979
|
323
|
980 if (curline)
|
|
981 return FAIL; /* only check cursor line */
|
|
982
|
236
|
983 /* Advance to next line. */
|
249
|
984 if (dir == BACKWARD)
|
|
985 {
|
|
986 if (found_pos.lnum != 0)
|
|
987 {
|
|
988 /* Use the last match in the line. */
|
|
989 curwin->w_cursor = found_pos;
|
|
990 return OK;
|
|
991 }
|
|
992 if (lnum == 1)
|
|
993 return FAIL;
|
|
994 --lnum;
|
|
995 }
|
|
996 else
|
|
997 {
|
|
998 if (lnum == curbuf->b_ml.ml_line_count)
|
|
999 return FAIL;
|
|
1000 ++lnum;
|
|
1001 }
|
236
|
1002
|
|
1003 line_breakcheck();
|
|
1004 }
|
|
1005
|
|
1006 return FAIL; /* interrupted */
|
|
1007 }
|
|
1008
|
|
1009 /*
|
307
|
1010 * Load word list(s) for "lang" from Vim spell file(s).
|
310
|
1011 * "lang" must be the language without the region: e.g., "en".
|
236
|
1012 */
|
307
|
1013 static void
|
236
|
1014 spell_load_lang(lang)
|
|
1015 char_u *lang;
|
|
1016 {
|
310
|
1017 char_u fname_enc[85];
|
236
|
1018 int r;
|
307
|
1019 char_u langcp[MAXWLEN + 1];
|
|
1020
|
310
|
1021 /* Copy the language name to pass it to spell_load_cb() as a cookie.
|
307
|
1022 * It's truncated when an error is detected. */
|
|
1023 STRCPY(langcp, lang);
|
|
1024
|
310
|
1025 /*
|
|
1026 * Find the first spell file for "lang" in 'runtimepath' and load it.
|
|
1027 */
|
|
1028 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
|
1029 "spell/%s.%s.spl", lang, spell_enc());
|
|
1030 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
|
307
|
1031
|
|
1032 if (r == FAIL && *langcp != NUL)
|
|
1033 {
|
|
1034 /* Try loading the ASCII version. */
|
310
|
1035 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
|
272
|
1036 "spell/%s.ascii.spl", lang);
|
310
|
1037 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
|
307
|
1038 }
|
|
1039
|
|
1040 if (r == FAIL)
|
|
1041 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
|
236
|
1042 fname_enc + 6);
|
310
|
1043 else if (*langcp != NUL)
|
|
1044 {
|
|
1045 /* Load all the additions. */
|
|
1046 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
|
|
1047 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &langcp);
|
|
1048 }
|
|
1049 }
|
|
1050
|
|
1051 /*
|
|
1052 * Return the encoding used for spell checking: Use 'encoding', except that we
|
|
1053 * use "latin1" for "latin9". And limit to 60 characters (just in case).
|
|
1054 */
|
|
1055 static char_u *
|
|
1056 spell_enc()
|
|
1057 {
|
|
1058
|
|
1059 #ifdef FEAT_MBYTE
|
|
1060 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
|
|
1061 return p_enc;
|
|
1062 #endif
|
|
1063 return (char_u *)"latin1";
|
236
|
1064 }
|
|
1065
|
|
1066 /*
|
|
1067 * Allocate a new slang_T.
|
|
1068 * Caller must fill "sl_next".
|
|
1069 */
|
|
1070 static slang_T *
|
|
1071 slang_alloc(lang)
|
|
1072 char_u *lang;
|
|
1073 {
|
|
1074 slang_T *lp;
|
|
1075
|
300
|
1076 lp = (slang_T *)alloc_clear(sizeof(slang_T));
|
236
|
1077 if (lp != NULL)
|
|
1078 {
|
|
1079 lp->sl_name = vim_strsave(lang);
|
323
|
1080 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
|
|
1081 ga_init2(&lp->sl_sal, sizeof(fromto_T), 10);
|
236
|
1082 }
|
|
1083 return lp;
|
|
1084 }
|
|
1085
|
|
1086 /*
|
|
1087 * Free the contents of an slang_T and the structure itself.
|
|
1088 */
|
|
1089 static void
|
|
1090 slang_free(lp)
|
|
1091 slang_T *lp;
|
|
1092 {
|
|
1093 vim_free(lp->sl_name);
|
310
|
1094 vim_free(lp->sl_fname);
|
|
1095 slang_clear(lp);
|
|
1096 vim_free(lp);
|
|
1097 }
|
|
1098
|
|
1099 /*
|
|
1100 * Clear an slang_T so that the file can be reloaded.
|
|
1101 */
|
|
1102 static void
|
|
1103 slang_clear(lp)
|
|
1104 slang_T *lp;
|
|
1105 {
|
323
|
1106 garray_T *gap;
|
|
1107 fromto_T *ftp;
|
|
1108 int round;
|
|
1109
|
300
|
1110 vim_free(lp->sl_fbyts);
|
310
|
1111 lp->sl_fbyts = NULL;
|
300
|
1112 vim_free(lp->sl_kbyts);
|
310
|
1113 lp->sl_kbyts = NULL;
|
300
|
1114 vim_free(lp->sl_fidxs);
|
310
|
1115 lp->sl_fidxs = NULL;
|
300
|
1116 vim_free(lp->sl_kidxs);
|
310
|
1117 lp->sl_kidxs = NULL;
|
323
|
1118
|
|
1119 for (round = 1; round <= 2; ++round)
|
|
1120 {
|
|
1121 gap = round == 1 ? &lp->sl_rep : &lp->sl_sal;
|
|
1122 while (gap->ga_len > 0)
|
|
1123 {
|
|
1124 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
|
|
1125 vim_free(ftp->ft_from);
|
|
1126 vim_free(ftp->ft_to);
|
|
1127 }
|
|
1128 ga_clear(gap);
|
|
1129 }
|
|
1130
|
|
1131 vim_free(lp->sl_map);
|
|
1132 lp->sl_map = NULL;
|
236
|
1133 }
|
|
1134
|
|
1135 /*
|
307
|
1136 * Load one spell file and store the info into a slang_T.
|
236
|
1137 * Invoked through do_in_runtimepath().
|
|
1138 */
|
|
1139 static void
|
310
|
1140 spell_load_cb(fname, cookie)
|
236
|
1141 char_u *fname;
|
307
|
1142 void *cookie; /* points to the language name */
|
236
|
1143 {
|
323
|
1144 (void)spell_load_file(fname, (char_u *)cookie, NULL, FALSE);
|
310
|
1145 }
|
|
1146
|
|
1147 /*
|
|
1148 * Load one spell file and store the info into a slang_T.
|
|
1149 *
|
|
1150 * This is invoked in two ways:
|
|
1151 * - From spell_load_cb() to load a spell file for the first time. "lang" is
|
|
1152 * the language name, "old_lp" is NULL. Will allocate an slang_T.
|
|
1153 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
|
|
1154 * points to the existing slang_T.
|
323
|
1155 * Returns the slang_T the spell file was loaded into. NULL for error.
|
310
|
1156 */
|
323
|
1157 static slang_T *
|
|
1158 spell_load_file(fname, lang, old_lp, silent)
|
310
|
1159 char_u *fname;
|
|
1160 char_u *lang;
|
|
1161 slang_T *old_lp;
|
323
|
1162 int silent; /* no error if file doesn't exist */
|
310
|
1163 {
|
236
|
1164 FILE *fd;
|
|
1165 char_u buf[MAXWLEN + 1];
|
|
1166 char_u *p;
|
|
1167 int i;
|
300
|
1168 int len;
|
236
|
1169 int round;
|
|
1170 char_u *save_sourcing_name = sourcing_name;
|
|
1171 linenr_T save_sourcing_lnum = sourcing_lnum;
|
255
|
1172 int cnt, ccnt;
|
|
1173 char_u *fol;
|
307
|
1174 slang_T *lp = NULL;
|
323
|
1175 garray_T *gap;
|
|
1176 fromto_T *ftp;
|
|
1177 int rr;
|
|
1178 short *first;
|
324
|
1179 idx_T idx;
|
236
|
1180
|
310
|
1181 fd = mch_fopen((char *)fname, "r");
|
236
|
1182 if (fd == NULL)
|
|
1183 {
|
323
|
1184 if (!silent)
|
|
1185 EMSG2(_(e_notopen), fname);
|
|
1186 else if (p_verbose > 2)
|
|
1187 {
|
|
1188 verbose_enter();
|
|
1189 smsg((char_u *)e_notopen, fname);
|
|
1190 verbose_leave();
|
|
1191 }
|
255
|
1192 goto endFAIL;
|
236
|
1193 }
|
310
|
1194 if (p_verbose > 2)
|
|
1195 {
|
|
1196 verbose_enter();
|
|
1197 smsg((char_u *)_("Reading spell file \"%s\""), fname);
|
|
1198 verbose_leave();
|
|
1199 }
|
|
1200
|
|
1201 if (old_lp == NULL)
|
|
1202 {
|
|
1203 lp = slang_alloc(lang);
|
|
1204 if (lp == NULL)
|
|
1205 goto endFAIL;
|
|
1206
|
|
1207 /* Remember the file name, used to reload the file when it's updated. */
|
|
1208 lp->sl_fname = vim_strsave(fname);
|
|
1209 if (lp->sl_fname == NULL)
|
|
1210 goto endFAIL;
|
|
1211
|
|
1212 /* Check for .add.spl. */
|
|
1213 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
|
|
1214 }
|
|
1215 else
|
|
1216 lp = old_lp;
|
307
|
1217
|
236
|
1218 /* Set sourcing_name, so that error messages mention the file name. */
|
|
1219 sourcing_name = fname;
|
|
1220 sourcing_lnum = 0;
|
|
1221
|
255
|
1222 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
|
|
1223 * <charflagslen> <charflags> <fcharslen> <fchars> */
|
236
|
1224 for (i = 0; i < VIMSPELLMAGICL; ++i)
|
|
1225 buf[i] = getc(fd); /* <fileID> */
|
|
1226 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
|
|
1227 {
|
|
1228 EMSG(_("E757: Wrong file ID in spell file"));
|
255
|
1229 goto endFAIL;
|
236
|
1230 }
|
|
1231
|
|
1232 cnt = getc(fd); /* <regioncnt> */
|
255
|
1233 if (cnt < 0)
|
236
|
1234 {
|
|
1235 truncerr:
|
|
1236 EMSG(_("E758: Truncated spell file"));
|
255
|
1237 goto endFAIL;
|
236
|
1238 }
|
|
1239 if (cnt > 8)
|
|
1240 {
|
|
1241 formerr:
|
307
|
1242 EMSG(_(e_format));
|
255
|
1243 goto endFAIL;
|
236
|
1244 }
|
|
1245 for (i = 0; i < cnt; ++i)
|
|
1246 {
|
|
1247 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
|
|
1248 lp->sl_regions[i * 2 + 1] = getc(fd);
|
|
1249 }
|
|
1250 lp->sl_regions[cnt * 2] = NUL;
|
|
1251
|
255
|
1252 cnt = getc(fd); /* <charflagslen> */
|
|
1253 if (cnt > 0)
|
|
1254 {
|
300
|
1255 p = alloc((unsigned)cnt);
|
255
|
1256 if (p == NULL)
|
|
1257 goto endFAIL;
|
|
1258 for (i = 0; i < cnt; ++i)
|
|
1259 p[i] = getc(fd); /* <charflags> */
|
|
1260
|
|
1261 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
|
|
1262 if (ccnt <= 0)
|
300
|
1263 {
|
|
1264 vim_free(p);
|
255
|
1265 goto formerr;
|
300
|
1266 }
|
|
1267 fol = alloc((unsigned)ccnt + 1);
|
255
|
1268 if (fol == NULL)
|
300
|
1269 {
|
|
1270 vim_free(p);
|
255
|
1271 goto endFAIL;
|
300
|
1272 }
|
255
|
1273 for (i = 0; i < ccnt; ++i)
|
|
1274 fol[i] = getc(fd); /* <fchars> */
|
|
1275 fol[i] = NUL;
|
|
1276
|
324
|
1277 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
|
300
|
1278 i = set_spell_charflags(p, cnt, fol);
|
|
1279 vim_free(p);
|
|
1280 vim_free(fol);
|
|
1281 if (i == FAIL)
|
255
|
1282 goto formerr;
|
|
1283 }
|
|
1284 else
|
|
1285 {
|
|
1286 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
|
|
1287 cnt = (getc(fd) << 8) + getc(fd);
|
|
1288 if (cnt != 0)
|
|
1289 goto formerr;
|
|
1290 }
|
|
1291
|
323
|
1292 /* <SUGGEST> : <repcount> <rep> ...
|
|
1293 * <salflags> <salcount> <sal> ...
|
|
1294 * <maplen> <mapstr> */
|
|
1295 for (round = 1; round <= 2; ++round)
|
|
1296 {
|
|
1297 if (round == 1)
|
|
1298 {
|
|
1299 gap = &lp->sl_rep;
|
|
1300 first = lp->sl_rep_first;
|
|
1301 }
|
|
1302 else
|
|
1303 {
|
|
1304 gap = &lp->sl_sal;
|
|
1305 first = lp->sl_sal_first;
|
|
1306
|
|
1307 i = getc(fd); /* <salflags> */
|
|
1308 if (i & SAL_F0LLOWUP)
|
|
1309 lp->sl_followup = TRUE;
|
|
1310 if (i & SAL_COLLAPSE)
|
|
1311 lp->sl_collapse = TRUE;
|
|
1312 if (i & SAL_REM_ACCENTS)
|
|
1313 lp->sl_rem_accents = TRUE;
|
|
1314 }
|
|
1315
|
|
1316 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> or <salcount> */
|
|
1317 if (cnt < 0)
|
|
1318 goto formerr;
|
|
1319
|
|
1320 if (ga_grow(gap, cnt) == FAIL)
|
|
1321 goto endFAIL;
|
|
1322 for (; gap->ga_len < cnt; ++gap->ga_len)
|
|
1323 {
|
|
1324 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
|
|
1325 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
|
|
1326 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
|
|
1327 for (rr = 1; rr <= 2; ++rr)
|
|
1328 {
|
|
1329 ccnt = getc(fd);
|
|
1330 if (ccnt < 0)
|
|
1331 {
|
|
1332 if (rr == 2)
|
|
1333 vim_free(ftp->ft_from);
|
|
1334 goto formerr;
|
|
1335 }
|
|
1336 if ((p = alloc(ccnt + 1)) == NULL)
|
|
1337 {
|
|
1338 if (rr == 2)
|
|
1339 vim_free(ftp->ft_from);
|
|
1340 goto endFAIL;
|
|
1341 }
|
|
1342 for (i = 0; i < ccnt; ++i)
|
|
1343 p[i] = getc(fd); /* <repfrom> or <salfrom> */
|
|
1344 p[i] = NUL;
|
|
1345 if (rr == 1)
|
|
1346 ftp->ft_from = p;
|
|
1347 else
|
|
1348 ftp->ft_to = p;
|
|
1349 }
|
|
1350 }
|
|
1351
|
|
1352 /* Fill the first-index table. */
|
|
1353 for (i = 0; i < 256; ++i)
|
|
1354 first[i] = -1;
|
|
1355 for (i = 0; i < gap->ga_len; ++i)
|
|
1356 {
|
|
1357 ftp = &((fromto_T *)gap->ga_data)[i];
|
|
1358 if (first[*ftp->ft_from] == -1)
|
|
1359 first[*ftp->ft_from] = i;
|
|
1360 }
|
|
1361 }
|
|
1362
|
|
1363 cnt = (getc(fd) << 8) + getc(fd); /* <maplen> */
|
|
1364 if (cnt < 0)
|
|
1365 goto formerr;
|
|
1366 p = alloc(cnt + 1);
|
|
1367 if (p == NULL)
|
|
1368 goto endFAIL;
|
|
1369 for (i = 0; i < cnt; ++i)
|
|
1370 p[i] = getc(fd); /* <mapstr> */
|
|
1371 p[i] = NUL;
|
|
1372 lp->sl_map = p;
|
|
1373
|
236
|
1374
|
300
|
1375 /* round 1: <LWORDTREE>
|
|
1376 * round 2: <KWORDTREE> */
|
|
1377 for (round = 1; round <= 2; ++round)
|
236
|
1378 {
|
300
|
1379 /* The tree size was computed when writing the file, so that we can
|
|
1380 * allocate it as one long block. <nodecount> */
|
|
1381 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
|
|
1382 if (len < 0)
|
|
1383 goto truncerr;
|
|
1384 if (len > 0)
|
236
|
1385 {
|
300
|
1386 /* Allocate the byte array. */
|
|
1387 p = lalloc((long_u)len, TRUE);
|
|
1388 if (p == NULL)
|
|
1389 goto endFAIL;
|
|
1390 if (round == 1)
|
|
1391 lp->sl_fbyts = p;
|
|
1392 else
|
|
1393 lp->sl_kbyts = p;
|
236
|
1394
|
300
|
1395 /* Allocate the index array. */
|
|
1396 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE);
|
|
1397 if (p == NULL)
|
|
1398 goto endFAIL;
|
|
1399 if (round == 1)
|
324
|
1400 lp->sl_fidxs = (idx_T *)p;
|
300
|
1401 else
|
324
|
1402 lp->sl_kidxs = (idx_T *)p;
|
300
|
1403
|
|
1404
|
|
1405 /* Read the tree and store it in the array. */
|
324
|
1406 idx = read_tree(fd,
|
300
|
1407 round == 1 ? lp->sl_fbyts : lp->sl_kbyts,
|
|
1408 round == 1 ? lp->sl_fidxs : lp->sl_kidxs,
|
|
1409 len, 0);
|
324
|
1410 if (idx == -1)
|
300
|
1411 goto truncerr;
|
324
|
1412 if (idx < 0)
|
236
|
1413 goto formerr;
|
|
1414 }
|
300
|
1415 }
|
243
|
1416
|
310
|
1417 /* For a new file link it in the list of spell files. */
|
|
1418 if (old_lp == NULL)
|
|
1419 {
|
|
1420 lp->sl_next = first_lang;
|
|
1421 first_lang = lp;
|
|
1422 }
|
307
|
1423
|
255
|
1424 goto endOK;
|
|
1425
|
|
1426 endFAIL:
|
310
|
1427 if (lang != NULL)
|
|
1428 /* truncating the name signals the error to spell_load_lang() */
|
|
1429 *lang = NUL;
|
|
1430 if (lp != NULL && old_lp == NULL)
|
323
|
1431 {
|
307
|
1432 slang_free(lp);
|
323
|
1433 lp = NULL;
|
|
1434 }
|
255
|
1435
|
|
1436 endOK:
|
236
|
1437 if (fd != NULL)
|
|
1438 fclose(fd);
|
|
1439 sourcing_name = save_sourcing_name;
|
|
1440 sourcing_lnum = save_sourcing_lnum;
|
323
|
1441
|
|
1442 return lp;
|
236
|
1443 }
|
|
1444
|
|
1445 /*
|
300
|
1446 * Read one row of siblings from the spell file and store it in the byte array
|
|
1447 * "byts" and index array "idxs". Recursively read the children.
|
|
1448 *
|
|
1449 * NOTE: The code here must match put_tree().
|
|
1450 *
|
|
1451 * Returns the index follosing the siblings.
|
|
1452 * Returns -1 if the file is shorter than expected.
|
|
1453 * Returns -2 if there is a format error.
|
236
|
1454 */
|
324
|
1455 static idx_T
|
300
|
1456 read_tree(fd, byts, idxs, maxidx, startidx)
|
|
1457 FILE *fd;
|
|
1458 char_u *byts;
|
324
|
1459 idx_T *idxs;
|
300
|
1460 int maxidx; /* size of arrays */
|
324
|
1461 idx_T startidx; /* current index in "byts" and "idxs" */
|
236
|
1462 {
|
300
|
1463 int len;
|
|
1464 int i;
|
|
1465 int n;
|
324
|
1466 idx_T idx = startidx;
|
300
|
1467 int c;
|
|
1468 #define SHARED_MASK 0x8000000
|
236
|
1469
|
300
|
1470 len = getc(fd); /* <siblingcount> */
|
|
1471 if (len <= 0)
|
|
1472 return -1;
|
|
1473
|
|
1474 if (startidx + len >= maxidx)
|
|
1475 return -2;
|
|
1476 byts[idx++] = len;
|
|
1477
|
|
1478 /* Read the byte values, flag/region bytes and shared indexes. */
|
|
1479 for (i = 1; i <= len; ++i)
|
236
|
1480 {
|
300
|
1481 c = getc(fd); /* <byte> */
|
|
1482 if (c < 0)
|
|
1483 return -1;
|
|
1484 if (c <= BY_SPECIAL)
|
|
1485 {
|
|
1486 if (c == BY_NOFLAGS)
|
|
1487 {
|
|
1488 /* No flags, all regions. */
|
|
1489 idxs[idx] = 0;
|
|
1490 c = 0;
|
|
1491 }
|
|
1492 else if (c == BY_FLAGS)
|
|
1493 {
|
|
1494 /* Read flags and option region. */
|
|
1495 c = getc(fd); /* <flags> */
|
|
1496 if (c & WF_REGION)
|
|
1497 c = (getc(fd) << 8) + c; /* <region> */
|
|
1498 idxs[idx] = c;
|
|
1499 c = 0;
|
|
1500 }
|
|
1501 else /* c == BY_INDEX */
|
|
1502 {
|
|
1503 /* <nodeidx> */
|
|
1504 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
|
|
1505 if (n < 0 || n >= maxidx)
|
|
1506 return -2;
|
|
1507 idxs[idx] = n + SHARED_MASK;
|
|
1508 c = getc(fd); /* <xbyte> */
|
|
1509 }
|
|
1510 }
|
|
1511 byts[idx++] = c;
|
236
|
1512 }
|
|
1513
|
300
|
1514 /* Recursively read the children for non-shared siblings.
|
|
1515 * Skip the end-of-word ones (zero byte value) and the shared ones (and
|
|
1516 * remove SHARED_MASK) */
|
|
1517 for (i = 1; i <= len; ++i)
|
|
1518 if (byts[startidx + i] != 0)
|
|
1519 {
|
|
1520 if (idxs[startidx + i] & SHARED_MASK)
|
|
1521 idxs[startidx + i] &= ~SHARED_MASK;
|
|
1522 else
|
|
1523 {
|
|
1524 idxs[startidx + i] = idx;
|
|
1525 idx = read_tree(fd, byts, idxs, maxidx, idx);
|
|
1526 if (idx < 0)
|
|
1527 break;
|
|
1528 }
|
|
1529 }
|
236
|
1530
|
300
|
1531 return idx;
|
236
|
1532 }
|
|
1533
|
|
1534 /*
|
|
1535 * Parse 'spelllang' and set buf->b_langp accordingly.
|
|
1536 * Returns an error message or NULL.
|
|
1537 */
|
|
1538 char_u *
|
|
1539 did_set_spelllang(buf)
|
|
1540 buf_T *buf;
|
|
1541 {
|
|
1542 garray_T ga;
|
|
1543 char_u *lang;
|
|
1544 char_u *e;
|
|
1545 char_u *region;
|
|
1546 int region_mask;
|
|
1547 slang_T *lp;
|
|
1548 int c;
|
|
1549 char_u lbuf[MAXWLEN + 1];
|
323
|
1550 char_u spf_name[MAXPATHL];
|
|
1551 int did_spf = FALSE;
|
236
|
1552
|
|
1553 ga_init2(&ga, sizeof(langp_T), 2);
|
|
1554
|
323
|
1555 /* Get the name of the .spl file associated with 'spellfile'. */
|
|
1556 if (*buf->b_p_spf == NUL)
|
|
1557 did_spf = TRUE;
|
|
1558 else
|
|
1559 vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
|
|
1560 buf->b_p_spf);
|
|
1561
|
236
|
1562 /* loop over comma separated languages. */
|
|
1563 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
|
|
1564 {
|
|
1565 e = vim_strchr(lang, ',');
|
|
1566 if (e == NULL)
|
|
1567 e = lang + STRLEN(lang);
|
240
|
1568 region = NULL;
|
236
|
1569 if (e > lang + 2)
|
|
1570 {
|
|
1571 if (e - lang >= MAXWLEN)
|
|
1572 {
|
|
1573 ga_clear(&ga);
|
|
1574 return e_invarg;
|
|
1575 }
|
|
1576 if (lang[2] == '_')
|
|
1577 region = lang + 3;
|
|
1578 }
|
|
1579
|
307
|
1580 /* Check if we loaded this language before. */
|
236
|
1581 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
1582 if (STRNICMP(lp->sl_name, lang, 2) == 0)
|
|
1583 break;
|
|
1584
|
|
1585 if (lp == NULL)
|
|
1586 {
|
|
1587 /* Not found, load the language. */
|
323
|
1588 vim_strncpy(lbuf, lang, e - lang);
|
236
|
1589 if (region != NULL)
|
|
1590 mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
|
307
|
1591 spell_load_lang(lbuf);
|
236
|
1592 }
|
|
1593
|
307
|
1594 /*
|
|
1595 * Loop over the languages, there can be several files for each.
|
|
1596 */
|
|
1597 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
1598 if (STRNICMP(lp->sl_name, lang, 2) == 0)
|
236
|
1599 {
|
316
|
1600 region_mask = REGION_ALL;
|
|
1601 if (region != NULL)
|
236
|
1602 {
|
307
|
1603 /* find region in sl_regions */
|
|
1604 c = find_region(lp->sl_regions, region);
|
|
1605 if (c == REGION_ALL)
|
|
1606 {
|
316
|
1607 if (!lp->sl_add)
|
|
1608 {
|
|
1609 c = *e;
|
|
1610 *e = NUL;
|
|
1611 smsg((char_u *)_("Warning: region %s not supported"),
|
307
|
1612 lang);
|
316
|
1613 *e = c;
|
|
1614 }
|
307
|
1615 }
|
|
1616 else
|
|
1617 region_mask = 1 << c;
|
236
|
1618 }
|
307
|
1619
|
|
1620 if (ga_grow(&ga, 1) == FAIL)
|
|
1621 {
|
|
1622 ga_clear(&ga);
|
|
1623 return e_outofmem;
|
|
1624 }
|
|
1625 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
|
|
1626 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
|
|
1627 ++ga.ga_len;
|
323
|
1628
|
|
1629 /* Check if this is the 'spellfile' spell file. */
|
|
1630 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
|
|
1631 did_spf = TRUE;
|
236
|
1632 }
|
|
1633
|
|
1634 if (*e == ',')
|
|
1635 ++e;
|
|
1636 }
|
|
1637
|
323
|
1638 /*
|
|
1639 * Make sure the 'spellfile' file is loaded. It may be in 'runtimepath',
|
|
1640 * then it's probably loaded above already. Otherwise load it here.
|
|
1641 */
|
|
1642 if (!did_spf)
|
|
1643 {
|
|
1644 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
1645 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
|
|
1646 break;
|
|
1647 if (lp == NULL)
|
|
1648 {
|
|
1649 vim_strncpy(lbuf, gettail(spf_name), 2);
|
|
1650 lp = spell_load_file(spf_name, lbuf, NULL, TRUE);
|
|
1651 }
|
|
1652 if (lp != NULL && ga_grow(&ga, 1) == OK)
|
|
1653 {
|
|
1654 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
|
|
1655 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL;
|
|
1656 ++ga.ga_len;
|
|
1657 }
|
|
1658 }
|
|
1659
|
236
|
1660 /* Add a NULL entry to mark the end of the list. */
|
|
1661 if (ga_grow(&ga, 1) == FAIL)
|
|
1662 {
|
|
1663 ga_clear(&ga);
|
|
1664 return e_outofmem;
|
|
1665 }
|
|
1666 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
|
|
1667 ++ga.ga_len;
|
|
1668
|
|
1669 /* Everything is fine, store the new b_langp value. */
|
|
1670 ga_clear(&buf->b_langp);
|
|
1671 buf->b_langp = ga;
|
|
1672
|
|
1673 return NULL;
|
|
1674 }
|
|
1675
|
|
1676 /*
|
|
1677 * Find the region "region[2]" in "rp" (points to "sl_regions").
|
|
1678 * Each region is simply stored as the two characters of it's name.
|
|
1679 * Returns the index if found, REGION_ALL if not found.
|
|
1680 */
|
|
1681 static int
|
|
1682 find_region(rp, region)
|
|
1683 char_u *rp;
|
|
1684 char_u *region;
|
|
1685 {
|
|
1686 int i;
|
|
1687
|
|
1688 for (i = 0; ; i += 2)
|
|
1689 {
|
|
1690 if (rp[i] == NUL)
|
|
1691 return REGION_ALL;
|
|
1692 if (rp[i] == region[0] && rp[i + 1] == region[1])
|
|
1693 break;
|
|
1694 }
|
|
1695 return i / 2;
|
|
1696 }
|
|
1697
|
|
1698 /*
|
323
|
1699 * Return case type of word:
|
236
|
1700 * w word 0
|
300
|
1701 * Word WF_ONECAP
|
|
1702 * W WORD WF_ALLCAP
|
|
1703 * WoRd wOrd WF_KEEPCAP
|
236
|
1704 */
|
|
1705 static int
|
|
1706 captype(word, end)
|
|
1707 char_u *word;
|
323
|
1708 char_u *end; /* When NULL use up to NUL byte. */
|
236
|
1709 {
|
|
1710 char_u *p;
|
|
1711 int c;
|
|
1712 int firstcap;
|
|
1713 int allcap;
|
|
1714 int past_second = FALSE; /* past second word char */
|
|
1715
|
|
1716 /* find first letter */
|
307
|
1717 for (p = word; !SPELL_ISWORDP(p); mb_ptr_adv(p))
|
323
|
1718 if (end == NULL ? *p == NUL : p >= end)
|
236
|
1719 return 0; /* only non-word characters, illegal word */
|
|
1720 #ifdef FEAT_MBYTE
|
310
|
1721 if (has_mbyte)
|
|
1722 c = mb_ptr2char_adv(&p);
|
|
1723 else
|
236
|
1724 #endif
|
310
|
1725 c = *p++;
|
324
|
1726 firstcap = allcap = SPELL_ISUPPER(c);
|
236
|
1727
|
|
1728 /*
|
|
1729 * Need to check all letters to find a word with mixed upper/lower.
|
|
1730 * But a word with an upper char only at start is a ONECAP.
|
|
1731 */
|
323
|
1732 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
|
307
|
1733 if (SPELL_ISWORDP(p))
|
236
|
1734 {
|
|
1735 #ifdef FEAT_MBYTE
|
|
1736 c = mb_ptr2char(p);
|
|
1737 #else
|
|
1738 c = *p;
|
|
1739 #endif
|
324
|
1740 if (!SPELL_ISUPPER(c))
|
236
|
1741 {
|
|
1742 /* UUl -> KEEPCAP */
|
|
1743 if (past_second && allcap)
|
300
|
1744 return WF_KEEPCAP;
|
236
|
1745 allcap = FALSE;
|
|
1746 }
|
|
1747 else if (!allcap)
|
|
1748 /* UlU -> KEEPCAP */
|
300
|
1749 return WF_KEEPCAP;
|
236
|
1750 past_second = TRUE;
|
|
1751 }
|
|
1752
|
|
1753 if (allcap)
|
300
|
1754 return WF_ALLCAP;
|
236
|
1755 if (firstcap)
|
300
|
1756 return WF_ONECAP;
|
236
|
1757 return 0;
|
|
1758 }
|
|
1759
|
|
1760 # if defined(FEAT_MBYTE) || defined(PROTO)
|
|
1761 /*
|
|
1762 * Clear all spelling tables and reload them.
|
307
|
1763 * Used after 'encoding' is set and when ":mkspell" was used.
|
236
|
1764 */
|
|
1765 void
|
|
1766 spell_reload()
|
|
1767 {
|
|
1768 buf_T *buf;
|
|
1769 slang_T *lp;
|
316
|
1770 win_T *wp;
|
236
|
1771
|
307
|
1772 /* Initialize the table for SPELL_ISWORDP(). */
|
236
|
1773 init_spell_chartab();
|
|
1774
|
|
1775 /* Unload all allocated memory. */
|
|
1776 while (first_lang != NULL)
|
|
1777 {
|
|
1778 lp = first_lang;
|
|
1779 first_lang = lp->sl_next;
|
|
1780 slang_free(lp);
|
|
1781 }
|
|
1782
|
|
1783 /* Go through all buffers and handle 'spelllang'. */
|
|
1784 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
|
|
1785 {
|
|
1786 ga_clear(&buf->b_langp);
|
316
|
1787
|
|
1788 /* Only load the wordlists when 'spelllang' is set and there is a
|
|
1789 * window for this buffer in which 'spell' is set. */
|
236
|
1790 if (*buf->b_p_spl != NUL)
|
316
|
1791 {
|
|
1792 FOR_ALL_WINDOWS(wp)
|
|
1793 if (wp->w_buffer == buf && wp->w_p_spell)
|
|
1794 {
|
|
1795 (void)did_set_spelllang(buf);
|
|
1796 # ifdef FEAT_WINDOWS
|
|
1797 break;
|
|
1798 # endif
|
|
1799 }
|
|
1800 }
|
236
|
1801 }
|
|
1802 }
|
|
1803 # endif
|
|
1804
|
310
|
1805 /*
|
|
1806 * Reload the spell file "fname" if it's loaded.
|
|
1807 */
|
|
1808 static void
|
323
|
1809 spell_reload_one(fname, added_word)
|
310
|
1810 char_u *fname;
|
323
|
1811 int added_word; /* invoked through "zg" */
|
310
|
1812 {
|
|
1813 slang_T *lp;
|
323
|
1814 int didit = FALSE;
|
310
|
1815
|
|
1816 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
1817 if (fullpathcmp(fname, lp->sl_fname, FALSE) == FPC_SAME)
|
|
1818 {
|
|
1819 slang_clear(lp);
|
323
|
1820 (void)spell_load_file(fname, NULL, lp, FALSE);
|
310
|
1821 redraw_all_later(NOT_VALID);
|
323
|
1822 didit = TRUE;
|
310
|
1823 }
|
323
|
1824
|
|
1825 /* When "zg" was used and the file wasn't loaded yet, should redo
|
|
1826 * 'spelllang' to get it loaded. */
|
|
1827 if (added_word && !didit)
|
|
1828 did_set_spelllang(curbuf);
|
310
|
1829 }
|
|
1830
|
|
1831
|
236
|
1832 /*
|
|
1833 * Functions for ":mkspell".
|
|
1834 */
|
|
1835
|
300
|
1836 #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
|
236
|
1837 and .dic file. */
|
|
1838 /*
|
|
1839 * Main structure to store the contents of a ".aff" file.
|
|
1840 */
|
|
1841 typedef struct afffile_S
|
|
1842 {
|
|
1843 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
|
310
|
1844 int af_rar; /* RAR ID for rare word */
|
|
1845 int af_kep; /* KEP ID for keep-case word */
|
236
|
1846 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
|
|
1847 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
|
|
1848 } afffile_T;
|
|
1849
|
|
1850 typedef struct affentry_S affentry_T;
|
|
1851 /* Affix entry from ".aff" file. Used for prefixes and suffixes. */
|
|
1852 struct affentry_S
|
|
1853 {
|
|
1854 affentry_T *ae_next; /* next affix with same name/number */
|
|
1855 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
|
|
1856 char_u *ae_add; /* text to add to basic word (can be NULL) */
|
|
1857 char_u *ae_cond; /* condition (NULL for ".") */
|
|
1858 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
|
300
|
1859 };
|
|
1860
|
|
1861 /* Affix header from ".aff" file. Used for af_pref and af_suff. */
|
|
1862 typedef struct affheader_S
|
|
1863 {
|
|
1864 char_u ah_key[2]; /* key for hashtable == name of affix entry */
|
|
1865 int ah_combine; /* suffix may combine with prefix */
|
|
1866 affentry_T *ah_first; /* first affix entry */
|
|
1867 } affheader_T;
|
|
1868
|
|
1869 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
|
|
1870
|
|
1871 /*
|
|
1872 * Structure that is used to store the items in the word tree. This avoids
|
|
1873 * the need to keep track of each allocated thing, it's freed all at once
|
|
1874 * after ":mkspell" is done.
|
|
1875 */
|
|
1876 #define SBLOCKSIZE 16000 /* size of sb_data */
|
|
1877 typedef struct sblock_S sblock_T;
|
|
1878 struct sblock_S
|
|
1879 {
|
|
1880 sblock_T *sb_next; /* next block in list */
|
|
1881 int sb_used; /* nr of bytes already in use */
|
|
1882 char_u sb_data[1]; /* data, actually longer */
|
236
|
1883 };
|
|
1884
|
|
1885 /*
|
300
|
1886 * A node in the tree.
|
236
|
1887 */
|
300
|
1888 typedef struct wordnode_S wordnode_T;
|
|
1889 struct wordnode_S
|
236
|
1890 {
|
300
|
1891 char_u wn_hashkey[6]; /* room for the hash key */
|
|
1892 wordnode_T *wn_next; /* next node with same hash key */
|
|
1893 wordnode_T *wn_child; /* child (next byte in word) */
|
|
1894 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
|
|
1895 always sorted) */
|
|
1896 wordnode_T *wn_wnode; /* parent node that will write this node */
|
|
1897 int wn_index; /* index in written nodes (valid after first
|
|
1898 round) */
|
|
1899 char_u wn_byte; /* Byte for this node. NUL for word end */
|
|
1900 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
|
|
1901 char_u wn_region; /* when wn_byte is NUL: region mask */
|
236
|
1902 };
|
|
1903
|
300
|
1904 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
|
236
|
1905
|
300
|
1906 /*
|
|
1907 * Info used while reading the spell files.
|
|
1908 */
|
|
1909 typedef struct spellinfo_S
|
249
|
1910 {
|
300
|
1911 wordnode_T *si_foldroot; /* tree with case-folded words */
|
|
1912 wordnode_T *si_keeproot; /* tree with keep-case words */
|
|
1913 sblock_T *si_blocks; /* memory blocks used */
|
|
1914 int si_ascii; /* handling only ASCII words */
|
310
|
1915 int si_add; /* addition file */
|
300
|
1916 int si_region; /* region mask */
|
|
1917 vimconv_T si_conv; /* for conversion to 'encoding' */
|
302
|
1918 int si_memtot; /* runtime memory used */
|
310
|
1919 int si_verbose; /* verbose messages */
|
316
|
1920 int si_region_count; /* number of regions supported (1 when there
|
|
1921 are no regions) */
|
|
1922 char_u si_region_name[16]; /* region names (if count > 1) */
|
323
|
1923
|
|
1924 garray_T si_rep; /* list of fromto_T entries from REP lines */
|
|
1925 garray_T si_sal; /* list of fromto_T entries from SAL lines */
|
|
1926 int si_followup; /* soundsalike: ? */
|
|
1927 int si_collapse; /* soundsalike: ? */
|
|
1928 int si_rem_accents; /* soundsalike: remove accents */
|
|
1929 garray_T si_map; /* MAP info concatenated */
|
300
|
1930 } spellinfo_T;
|
249
|
1931
|
300
|
1932 static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
|
323
|
1933 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
|
|
1934 static int sal_to_bool __ARGS((char_u *s));
|
240
|
1935 static int has_non_ascii __ARGS((char_u *s));
|
300
|
1936 static void spell_free_aff __ARGS((afffile_T *aff));
|
|
1937 static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
|
307
|
1938 static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, hashtab_T *ht, hashtab_T *xht, int comb, int flags));
|
300
|
1939 static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
|
|
1940 static void *getroom __ARGS((sblock_T **blp, size_t len));
|
|
1941 static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
|
|
1942 static void free_blocks __ARGS((sblock_T *bl));
|
|
1943 static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
|
316
|
1944 static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region));
|
300
|
1945 static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, sblock_T **blp));
|
310
|
1946 static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
|
300
|
1947 static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
|
|
1948 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
|
316
|
1949 static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin));
|
300
|
1950 static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask));
|
323
|
1951 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
|
310
|
1952 static void init_spellfile __ARGS((void));
|
236
|
1953
|
|
1954 /*
|
323
|
1955 * Read the affix file "fname".
|
316
|
1956 * Returns an afffile_T, NULL for complete failure.
|
236
|
1957 */
|
|
1958 static afffile_T *
|
300
|
1959 spell_read_aff(fname, spin)
|
236
|
1960 char_u *fname;
|
300
|
1961 spellinfo_T *spin;
|
236
|
1962 {
|
|
1963 FILE *fd;
|
|
1964 afffile_T *aff;
|
|
1965 char_u rline[MAXLINELEN];
|
|
1966 char_u *line;
|
|
1967 char_u *pc = NULL;
|
|
1968 char_u *(items[6]);
|
|
1969 int itemcnt;
|
|
1970 char_u *p;
|
|
1971 int lnum = 0;
|
|
1972 affheader_T *cur_aff = NULL;
|
|
1973 int aff_todo = 0;
|
|
1974 hashtab_T *tp;
|
255
|
1975 char_u *low = NULL;
|
|
1976 char_u *fol = NULL;
|
|
1977 char_u *upp = NULL;
|
307
|
1978 static char *e_affname = N_("Affix name too long in %s line %d: %s");
|
323
|
1979 int do_rep;
|
|
1980 int do_sal;
|
|
1981 int do_map;
|
|
1982 int found_map = FALSE;
|
324
|
1983 hashitem_T *hi;
|
236
|
1984
|
300
|
1985 /*
|
|
1986 * Open the file.
|
|
1987 */
|
310
|
1988 fd = mch_fopen((char *)fname, "r");
|
236
|
1989 if (fd == NULL)
|
|
1990 {
|
|
1991 EMSG2(_(e_notopen), fname);
|
|
1992 return NULL;
|
|
1993 }
|
|
1994
|
310
|
1995 if (spin->si_verbose || p_verbose > 2)
|
|
1996 {
|
|
1997 if (!spin->si_verbose)
|
|
1998 verbose_enter();
|
|
1999 smsg((char_u *)_("Reading affix file %s..."), fname);
|
|
2000 out_flush();
|
|
2001 if (!spin->si_verbose)
|
|
2002 verbose_leave();
|
|
2003 }
|
236
|
2004
|
323
|
2005 /* Only do REP lines when not done in another .aff file already. */
|
|
2006 do_rep = spin->si_rep.ga_len == 0;
|
|
2007
|
|
2008 /* Only do SAL lines when not done in another .aff file already. */
|
|
2009 do_sal = spin->si_sal.ga_len == 0;
|
|
2010
|
|
2011 /* Only do MAP lines when not done in another .aff file already. */
|
|
2012 do_map = spin->si_map.ga_len == 0;
|
|
2013
|
300
|
2014 /*
|
|
2015 * Allocate and init the afffile_T structure.
|
|
2016 */
|
|
2017 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
|
236
|
2018 if (aff == NULL)
|
|
2019 return NULL;
|
|
2020 hash_init(&aff->af_pref);
|
|
2021 hash_init(&aff->af_suff);
|
|
2022
|
|
2023 /*
|
|
2024 * Read all the lines in the file one by one.
|
|
2025 */
|
255
|
2026 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
|
236
|
2027 {
|
255
|
2028 line_breakcheck();
|
236
|
2029 ++lnum;
|
|
2030
|
|
2031 /* Skip comment lines. */
|
|
2032 if (*rline == '#')
|
|
2033 continue;
|
|
2034
|
|
2035 /* Convert from "SET" to 'encoding' when needed. */
|
|
2036 vim_free(pc);
|
310
|
2037 #ifdef FEAT_MBYTE
|
300
|
2038 if (spin->si_conv.vc_type != CONV_NONE)
|
236
|
2039 {
|
300
|
2040 pc = string_convert(&spin->si_conv, rline, NULL);
|
255
|
2041 if (pc == NULL)
|
|
2042 {
|
|
2043 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
2044 fname, lnum, rline);
|
|
2045 continue;
|
|
2046 }
|
236
|
2047 line = pc;
|
|
2048 }
|
|
2049 else
|
310
|
2050 #endif
|
236
|
2051 {
|
|
2052 pc = NULL;
|
|
2053 line = rline;
|
|
2054 }
|
|
2055
|
|
2056 /* Split the line up in white separated items. Put a NUL after each
|
|
2057 * item. */
|
|
2058 itemcnt = 0;
|
|
2059 for (p = line; ; )
|
|
2060 {
|
|
2061 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
|
|
2062 ++p;
|
|
2063 if (*p == NUL)
|
|
2064 break;
|
300
|
2065 if (itemcnt == 6) /* too many items */
|
|
2066 break;
|
236
|
2067 items[itemcnt++] = p;
|
300
|
2068 while (*p > ' ') /* skip until white space or CR/NL */
|
236
|
2069 ++p;
|
|
2070 if (*p == NUL)
|
|
2071 break;
|
|
2072 *p++ = NUL;
|
|
2073 }
|
|
2074
|
|
2075 /* Handle non-empty lines. */
|
|
2076 if (itemcnt > 0)
|
|
2077 {
|
|
2078 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
|
|
2079 && aff->af_enc == NULL)
|
|
2080 {
|
310
|
2081 #ifdef FEAT_MBYTE
|
300
|
2082 /* Setup for conversion from "ENC" to 'encoding'. */
|
|
2083 aff->af_enc = enc_canonize(items[1]);
|
|
2084 if (aff->af_enc != NULL && !spin->si_ascii
|
|
2085 && convert_setup(&spin->si_conv, aff->af_enc,
|
|
2086 p_enc) == FAIL)
|
|
2087 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
|
|
2088 fname, aff->af_enc, p_enc);
|
310
|
2089 #else
|
|
2090 smsg((char_u *)_("Conversion in %s not supported"), fname);
|
|
2091 #endif
|
236
|
2092 }
|
302
|
2093 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
|
|
2094 {
|
323
|
2095 /* ignored, we always split */
|
302
|
2096 }
|
323
|
2097 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
|
300
|
2098 {
|
323
|
2099 /* ignored, we look in the tree for what chars may appear */
|
300
|
2100 }
|
307
|
2101 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
|
|
2102 && aff->af_rar == 0)
|
|
2103 {
|
|
2104 aff->af_rar = items[1][0];
|
|
2105 if (items[1][1] != NUL)
|
|
2106 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
|
2107 }
|
310
|
2108 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
|
|
2109 && aff->af_kep == 0)
|
307
|
2110 {
|
310
|
2111 aff->af_kep = items[1][0];
|
307
|
2112 if (items[1][1] != NUL)
|
|
2113 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
|
2114 }
|
236
|
2115 else if ((STRCMP(items[0], "PFX") == 0
|
|
2116 || STRCMP(items[0], "SFX") == 0)
|
|
2117 && aff_todo == 0
|
|
2118 && itemcnt == 4)
|
|
2119 {
|
|
2120 /* New affix letter. */
|
300
|
2121 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
|
|
2122 sizeof(affheader_T));
|
236
|
2123 if (cur_aff == NULL)
|
|
2124 break;
|
|
2125 cur_aff->ah_key[0] = *items[1];
|
|
2126 cur_aff->ah_key[1] = NUL;
|
|
2127 if (items[1][1] != NUL)
|
307
|
2128 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
|
236
|
2129 if (*items[2] == 'Y')
|
|
2130 cur_aff->ah_combine = TRUE;
|
300
|
2131 else if (*items[2] != 'N')
|
236
|
2132 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
|
|
2133 fname, lnum, items[2]);
|
|
2134 if (*items[0] == 'P')
|
|
2135 tp = &aff->af_pref;
|
|
2136 else
|
|
2137 tp = &aff->af_suff;
|
300
|
2138 aff_todo = atoi((char *)items[3]);
|
324
|
2139 hi = hash_find(tp, cur_aff->ah_key);
|
|
2140 if (!HASHITEM_EMPTY(hi))
|
300
|
2141 {
|
236
|
2142 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
|
|
2143 fname, lnum, items[1]);
|
300
|
2144 aff_todo = 0;
|
|
2145 }
|
236
|
2146 else
|
|
2147 hash_add(tp, cur_aff->ah_key);
|
|
2148 }
|
|
2149 else if ((STRCMP(items[0], "PFX") == 0
|
|
2150 || STRCMP(items[0], "SFX") == 0)
|
|
2151 && aff_todo > 0
|
|
2152 && STRCMP(cur_aff->ah_key, items[1]) == 0
|
|
2153 && itemcnt == 5)
|
|
2154 {
|
|
2155 affentry_T *aff_entry;
|
|
2156
|
|
2157 /* New item for an affix letter. */
|
|
2158 --aff_todo;
|
300
|
2159 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
|
|
2160 sizeof(affentry_T));
|
236
|
2161 if (aff_entry == NULL)
|
|
2162 break;
|
240
|
2163
|
236
|
2164 if (STRCMP(items[2], "0") != 0)
|
300
|
2165 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
|
|
2166 items[2]);
|
236
|
2167 if (STRCMP(items[3], "0") != 0)
|
300
|
2168 aff_entry->ae_add = getroom_save(&spin->si_blocks,
|
|
2169 items[3]);
|
236
|
2170
|
300
|
2171 /* Don't use an affix entry with non-ASCII characters when
|
|
2172 * "spin->si_ascii" is TRUE. */
|
|
2173 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
|
240
|
2174 || has_non_ascii(aff_entry->ae_add)))
|
|
2175 {
|
|
2176 aff_entry->ae_next = cur_aff->ah_first;
|
|
2177 cur_aff->ah_first = aff_entry;
|
300
|
2178
|
|
2179 if (STRCMP(items[4], ".") != 0)
|
|
2180 {
|
|
2181 char_u buf[MAXLINELEN];
|
|
2182
|
|
2183 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
|
|
2184 items[4]);
|
|
2185 if (*items[0] == 'P')
|
|
2186 sprintf((char *)buf, "^%s", items[4]);
|
|
2187 else
|
|
2188 sprintf((char *)buf, "%s$", items[4]);
|
|
2189 aff_entry->ae_prog = vim_regcomp(buf,
|
|
2190 RE_MAGIC + RE_STRING);
|
|
2191 }
|
240
|
2192 }
|
236
|
2193 }
|
255
|
2194 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
|
|
2195 {
|
|
2196 if (fol != NULL)
|
|
2197 smsg((char_u *)_("Duplicate FOL in %s line %d"),
|
|
2198 fname, lnum);
|
|
2199 else
|
|
2200 fol = vim_strsave(items[1]);
|
|
2201 }
|
|
2202 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
|
|
2203 {
|
|
2204 if (low != NULL)
|
|
2205 smsg((char_u *)_("Duplicate LOW in %s line %d"),
|
|
2206 fname, lnum);
|
|
2207 else
|
|
2208 low = vim_strsave(items[1]);
|
|
2209 }
|
|
2210 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
|
|
2211 {
|
|
2212 if (upp != NULL)
|
|
2213 smsg((char_u *)_("Duplicate UPP in %s line %d"),
|
|
2214 fname, lnum);
|
|
2215 else
|
|
2216 upp = vim_strsave(items[1]);
|
|
2217 }
|
236
|
2218 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
|
323
|
2219 {
|
236
|
2220 /* Ignore REP count */;
|
323
|
2221 if (!isdigit(*items[1]))
|
|
2222 smsg((char_u *)_("Expected REP count in %s line %d"),
|
|
2223 fname, lnum);
|
|
2224 }
|
236
|
2225 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
|
|
2226 {
|
|
2227 /* REP item */
|
323
|
2228 if (do_rep)
|
|
2229 add_fromto(spin, &spin->si_rep, items[1], items[2]);
|
|
2230 }
|
|
2231 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
|
|
2232 {
|
|
2233 /* MAP item or count */
|
|
2234 if (!found_map)
|
|
2235 {
|
|
2236 /* First line contains the count. */
|
|
2237 found_map = TRUE;
|
|
2238 if (!isdigit(*items[1]))
|
|
2239 smsg((char_u *)_("Expected MAP count in %s line %d"),
|
|
2240 fname, lnum);
|
|
2241 }
|
|
2242 else if (do_map)
|
|
2243 {
|
|
2244 /* We simply concatenate all the MAP strings, separated by
|
|
2245 * slashes. */
|
|
2246 ga_concat(&spin->si_map, items[1]);
|
|
2247 ga_append(&spin->si_map, '/');
|
|
2248 }
|
|
2249 }
|
|
2250 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
|
|
2251 {
|
|
2252 if (do_sal)
|
|
2253 {
|
|
2254 /* SAL item (sounds-a-like)
|
|
2255 * Either one of the known keys or a from-to pair. */
|
|
2256 if (STRCMP(items[1], "followup") == 0)
|
|
2257 spin->si_followup = sal_to_bool(items[2]);
|
|
2258 else if (STRCMP(items[1], "collapse_result") == 0)
|
|
2259 spin->si_collapse = sal_to_bool(items[2]);
|
|
2260 else if (STRCMP(items[1], "remove_accents") == 0)
|
|
2261 spin->si_rem_accents = sal_to_bool(items[2]);
|
|
2262 else
|
|
2263 /* when "to" is "_" it means empty */
|
|
2264 add_fromto(spin, &spin->si_sal, items[1],
|
|
2265 STRCMP(items[2], "_") == 0 ? (char_u *)""
|
|
2266 : items[2]);
|
|
2267 }
|
236
|
2268 }
|
300
|
2269 else
|
236
|
2270 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
|
|
2271 fname, lnum, items[0]);
|
|
2272 }
|
|
2273 }
|
|
2274
|
255
|
2275 if (fol != NULL || low != NULL || upp != NULL)
|
|
2276 {
|
316
|
2277 /*
|
|
2278 * Don't write a word table for an ASCII file, so that we don't check
|
|
2279 * for conflicts with a word table that matches 'encoding'.
|
324
|
2280 * Don't write one for utf-8 either, we use utf_*() and
|
316
|
2281 * mb_get_class(), the list of chars in the file will be incomplete.
|
|
2282 */
|
|
2283 if (!spin->si_ascii
|
|
2284 #ifdef FEAT_MBYTE
|
|
2285 && !enc_utf8
|
|
2286 #endif
|
|
2287 )
|
260
|
2288 {
|
|
2289 if (fol == NULL || low == NULL || upp == NULL)
|
|
2290 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
|
|
2291 else
|
316
|
2292 (void)set_spell_chartab(fol, low, upp);
|
260
|
2293 }
|
255
|
2294
|
|
2295 vim_free(fol);
|
|
2296 vim_free(low);
|
|
2297 vim_free(upp);
|
|
2298 }
|
|
2299
|
236
|
2300 vim_free(pc);
|
|
2301 fclose(fd);
|
|
2302 return aff;
|
|
2303 }
|
|
2304
|
|
2305 /*
|
323
|
2306 * Add a from-to item to "gap". Used for REP and SAL items.
|
|
2307 * They are stored case-folded.
|
|
2308 */
|
|
2309 static void
|
|
2310 add_fromto(spin, gap, from, to)
|
|
2311 spellinfo_T *spin;
|
|
2312 garray_T *gap;
|
|
2313 char_u *from;
|
|
2314 char_u *to;
|
|
2315 {
|
|
2316 fromto_T *ftp;
|
|
2317 char_u word[MAXWLEN];
|
|
2318
|
|
2319 if (ga_grow(gap, 1) == OK)
|
|
2320 {
|
|
2321 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
|
|
2322 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
|
|
2323 ftp->ft_from = getroom_save(&spin->si_blocks, word);
|
|
2324 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
|
|
2325 ftp->ft_to = getroom_save(&spin->si_blocks, word);
|
|
2326 ++gap->ga_len;
|
|
2327 }
|
|
2328 }
|
|
2329
|
|
2330 /*
|
|
2331 * Convert a boolean argument in a SAL line to TRUE or FALSE;
|
|
2332 */
|
|
2333 static int
|
|
2334 sal_to_bool(s)
|
|
2335 char_u *s;
|
|
2336 {
|
|
2337 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
|
|
2338 }
|
|
2339
|
|
2340 /*
|
240
|
2341 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
|
|
2342 * When "s" is NULL FALSE is returned.
|
|
2343 */
|
|
2344 static int
|
|
2345 has_non_ascii(s)
|
|
2346 char_u *s;
|
|
2347 {
|
|
2348 char_u *p;
|
|
2349
|
|
2350 if (s != NULL)
|
|
2351 for (p = s; *p != NUL; ++p)
|
|
2352 if (*p >= 128)
|
|
2353 return TRUE;
|
|
2354 return FALSE;
|
|
2355 }
|
|
2356
|
|
2357 /*
|
236
|
2358 * Free the structure filled by spell_read_aff().
|
|
2359 */
|
|
2360 static void
|
|
2361 spell_free_aff(aff)
|
|
2362 afffile_T *aff;
|
|
2363 {
|
|
2364 hashtab_T *ht;
|
|
2365 hashitem_T *hi;
|
|
2366 int todo;
|
|
2367 affheader_T *ah;
|
300
|
2368 affentry_T *ae;
|
236
|
2369
|
|
2370 vim_free(aff->af_enc);
|
|
2371
|
300
|
2372 /* All this trouble to foree the "ae_prog" items... */
|
236
|
2373 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
|
|
2374 {
|
|
2375 todo = ht->ht_used;
|
|
2376 for (hi = ht->ht_array; todo > 0; ++hi)
|
|
2377 {
|
|
2378 if (!HASHITEM_EMPTY(hi))
|
|
2379 {
|
|
2380 --todo;
|
|
2381 ah = HI2AH(hi);
|
300
|
2382 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
|
|
2383 vim_free(ae->ae_prog);
|
236
|
2384 }
|
|
2385 }
|
|
2386 if (ht == &aff->af_suff)
|
|
2387 break;
|
|
2388 }
|
300
|
2389
|
236
|
2390 hash_clear(&aff->af_pref);
|
|
2391 hash_clear(&aff->af_suff);
|
|
2392 }
|
|
2393
|
|
2394 /*
|
300
|
2395 * Read dictionary file "fname".
|
236
|
2396 * Returns OK or FAIL;
|
|
2397 */
|
|
2398 static int
|
300
|
2399 spell_read_dic(fname, spin, affile)
|
236
|
2400 char_u *fname;
|
300
|
2401 spellinfo_T *spin;
|
|
2402 afffile_T *affile;
|
236
|
2403 {
|
300
|
2404 hashtab_T ht;
|
236
|
2405 char_u line[MAXLINELEN];
|
300
|
2406 char_u *afflist;
|
|
2407 char_u *dw;
|
236
|
2408 char_u *pc;
|
|
2409 char_u *w;
|
|
2410 int l;
|
|
2411 hash_T hash;
|
|
2412 hashitem_T *hi;
|
|
2413 FILE *fd;
|
|
2414 int lnum = 1;
|
300
|
2415 int non_ascii = 0;
|
|
2416 int retval = OK;
|
|
2417 char_u message[MAXLINELEN + MAXWLEN];
|
307
|
2418 int flags;
|
236
|
2419
|
300
|
2420 /*
|
|
2421 * Open the file.
|
|
2422 */
|
310
|
2423 fd = mch_fopen((char *)fname, "r");
|
236
|
2424 if (fd == NULL)
|
|
2425 {
|
|
2426 EMSG2(_(e_notopen), fname);
|
|
2427 return FAIL;
|
|
2428 }
|
|
2429
|
300
|
2430 /* The hashtable is only used to detect duplicated words. */
|
|
2431 hash_init(&ht);
|
|
2432
|
310
|
2433 if (spin->si_verbose || p_verbose > 2)
|
|
2434 {
|
|
2435 if (!spin->si_verbose)
|
|
2436 verbose_enter();
|
|
2437 smsg((char_u *)_("Reading dictionary file %s..."), fname);
|
|
2438 out_flush();
|
|
2439 if (!spin->si_verbose)
|
|
2440 verbose_leave();
|
|
2441 }
|
236
|
2442
|
|
2443 /* Read and ignore the first line: word count. */
|
|
2444 (void)vim_fgets(line, MAXLINELEN, fd);
|
324
|
2445 if (!vim_isdigit(*skipwhite(line)))
|
236
|
2446 EMSG2(_("E760: No word count in %s"), fname);
|
|
2447
|
|
2448 /*
|
|
2449 * Read all the lines in the file one by one.
|
|
2450 * The words are converted to 'encoding' here, before being added to
|
|
2451 * the hashtable.
|
|
2452 */
|
255
|
2453 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
|
236
|
2454 {
|
255
|
2455 line_breakcheck();
|
236
|
2456 ++lnum;
|
|
2457
|
300
|
2458 /* Remove CR, LF and white space from the end. White space halfway
|
|
2459 * the word is kept to allow e.g., "et al.". */
|
236
|
2460 l = STRLEN(line);
|
|
2461 while (l > 0 && line[l - 1] <= ' ')
|
|
2462 --l;
|
|
2463 if (l == 0)
|
|
2464 continue; /* empty line */
|
|
2465 line[l] = NUL;
|
|
2466
|
300
|
2467 /* This takes time, print a message now and then. */
|
310
|
2468 if (spin->si_verbose && (lnum & 0x3ff) == 0)
|
300
|
2469 {
|
|
2470 vim_snprintf((char *)message, sizeof(message),
|
|
2471 _("line %6d - %s"), lnum, line);
|
|
2472 msg_start();
|
|
2473 msg_outtrans_attr(message, 0);
|
|
2474 msg_clr_eos();
|
|
2475 msg_didout = FALSE;
|
|
2476 msg_col = 0;
|
|
2477 out_flush();
|
|
2478 }
|
|
2479
|
236
|
2480 /* Find the optional affix names. */
|
300
|
2481 afflist = vim_strchr(line, '/');
|
|
2482 if (afflist != NULL)
|
|
2483 *afflist++ = NUL;
|
236
|
2484
|
300
|
2485 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
|
|
2486 if (spin->si_ascii && has_non_ascii(line))
|
|
2487 {
|
|
2488 ++non_ascii;
|
240
|
2489 continue;
|
300
|
2490 }
|
240
|
2491
|
310
|
2492 #ifdef FEAT_MBYTE
|
236
|
2493 /* Convert from "SET" to 'encoding' when needed. */
|
300
|
2494 if (spin->si_conv.vc_type != CONV_NONE)
|
236
|
2495 {
|
300
|
2496 pc = string_convert(&spin->si_conv, line, NULL);
|
255
|
2497 if (pc == NULL)
|
|
2498 {
|
|
2499 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
2500 fname, lnum, line);
|
|
2501 continue;
|
|
2502 }
|
236
|
2503 w = pc;
|
|
2504 }
|
|
2505 else
|
310
|
2506 #endif
|
236
|
2507 {
|
|
2508 pc = NULL;
|
|
2509 w = line;
|
|
2510 }
|
|
2511
|
300
|
2512 /* Store the word in the hashtable to be able to find duplicates. */
|
|
2513 dw = (char_u *)getroom_save(&spin->si_blocks, w);
|
236
|
2514 if (dw == NULL)
|
300
|
2515 retval = FAIL;
|
|
2516 vim_free(pc);
|
|
2517 if (retval == FAIL)
|
236
|
2518 break;
|
|
2519
|
300
|
2520 hash = hash_hash(dw);
|
|
2521 hi = hash_lookup(&ht, dw, hash);
|
236
|
2522 if (!HASHITEM_EMPTY(hi))
|
|
2523 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
|
300
|
2524 fname, lnum, line);
|
236
|
2525 else
|
300
|
2526 hash_add_item(&ht, hi, dw, hash);
|
|
2527
|
307
|
2528 flags = 0;
|
|
2529 if (afflist != NULL)
|
|
2530 {
|
|
2531 /* Check for affix name that stands for keep-case word and stands
|
|
2532 * for rare word (if defined). */
|
310
|
2533 if (affile->af_kep != NUL
|
|
2534 && vim_strchr(afflist, affile->af_kep) != NULL)
|
307
|
2535 flags |= WF_KEEPCAP;
|
|
2536 if (affile->af_rar != NUL
|
|
2537 && vim_strchr(afflist, affile->af_rar) != NULL)
|
|
2538 flags |= WF_RARE;
|
|
2539 }
|
|
2540
|
300
|
2541 /* Add the word to the word tree(s). */
|
316
|
2542 if (store_word(dw, spin, flags, spin->si_region) == FAIL)
|
300
|
2543 retval = FAIL;
|
236
|
2544
|
300
|
2545 if (afflist != NULL)
|
|
2546 {
|
|
2547 /* Find all matching suffixes and add the resulting words.
|
|
2548 * Additionally do matching prefixes that combine. */
|
|
2549 if (store_aff_word(dw, spin, afflist,
|
307
|
2550 &affile->af_suff, &affile->af_pref,
|
|
2551 FALSE, flags) == FAIL)
|
300
|
2552 retval = FAIL;
|
|
2553
|
|
2554 /* Find all matching prefixes and add the resulting words. */
|
|
2555 if (store_aff_word(dw, spin, afflist,
|
307
|
2556 &affile->af_pref, NULL, FALSE, flags) == FAIL)
|
300
|
2557 retval = FAIL;
|
|
2558 }
|
236
|
2559 }
|
|
2560
|
300
|
2561 if (spin->si_ascii && non_ascii > 0)
|
|
2562 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
|
|
2563 non_ascii);
|
|
2564 hash_clear(&ht);
|
|
2565
|
236
|
2566 fclose(fd);
|
300
|
2567 return retval;
|
236
|
2568 }
|
|
2569
|
|
2570 /*
|
300
|
2571 * Apply affixes to a word and store the resulting words.
|
|
2572 * "ht" is the hashtable with affentry_T that need to be applied, either
|
|
2573 * prefixes or suffixes.
|
|
2574 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
|
|
2575 * the resulting words for combining affixes.
|
|
2576 *
|
|
2577 * Returns FAIL when out of memory.
|
236
|
2578 */
|
300
|
2579 static int
|
307
|
2580 store_aff_word(word, spin, afflist, ht, xht, comb, flags)
|
300
|
2581 char_u *word; /* basic word start */
|
|
2582 spellinfo_T *spin; /* spell info */
|
|
2583 char_u *afflist; /* list of names of supported affixes */
|
|
2584 hashtab_T *ht;
|
|
2585 hashtab_T *xht;
|
|
2586 int comb; /* only use affixes that combine */
|
307
|
2587 int flags; /* flags for the word */
|
236
|
2588 {
|
|
2589 int todo;
|
|
2590 hashitem_T *hi;
|
300
|
2591 affheader_T *ah;
|
|
2592 affentry_T *ae;
|
|
2593 regmatch_T regmatch;
|
|
2594 char_u newword[MAXWLEN];
|
|
2595 int retval = OK;
|
|
2596 int i;
|
|
2597 char_u *p;
|
236
|
2598
|
300
|
2599 todo = ht->ht_used;
|
|
2600 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
|
236
|
2601 {
|
|
2602 if (!HASHITEM_EMPTY(hi))
|
|
2603 {
|
|
2604 --todo;
|
300
|
2605 ah = HI2AH(hi);
|
236
|
2606
|
300
|
2607 /* Check that the affix combines, if required, and that the word
|
|
2608 * supports this affix. */
|
|
2609 if ((!comb || ah->ah_combine)
|
|
2610 && vim_strchr(afflist, *ah->ah_key) != NULL)
|
236
|
2611 {
|
300
|
2612 /* Loop over all affix entries with this name. */
|
|
2613 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
|
236
|
2614 {
|
300
|
2615 /* Check the condition. It's not logical to match case
|
|
2616 * here, but it is required for compatibility with
|
|
2617 * Myspell. */
|
|
2618 regmatch.regprog = ae->ae_prog;
|
|
2619 regmatch.rm_ic = FALSE;
|
|
2620 if (ae->ae_prog == NULL
|
|
2621 || vim_regexec(®match, word, (colnr_T)0))
|
|
2622 {
|
|
2623 /* Match. Remove the chop and add the affix. */
|
|
2624 if (xht == NULL)
|
240
|
2625 {
|
300
|
2626 /* prefix: chop/add at the start of the word */
|
|
2627 if (ae->ae_add == NULL)
|
|
2628 *newword = NUL;
|
|
2629 else
|
|
2630 STRCPY(newword, ae->ae_add);
|
|
2631 p = word;
|
|
2632 if (ae->ae_chop != NULL)
|
310
|
2633 {
|
300
|
2634 /* Skip chop string. */
|
310
|
2635 #ifdef FEAT_MBYTE
|
|
2636 if (has_mbyte)
|
324
|
2637 {
|
310
|
2638 i = mb_charlen(ae->ae_chop);
|
324
|
2639 for ( ; i > 0; --i)
|
|
2640 mb_ptr_adv(p);
|
|
2641 }
|
310
|
2642 else
|
|
2643 #endif
|
324
|
2644 p += STRLEN(ae->ae_chop);
|
310
|
2645 }
|
300
|
2646 STRCAT(newword, p);
|
|
2647 }
|
|
2648 else
|
|
2649 {
|
|
2650 /* suffix: chop/add at the end of the word */
|
|
2651 STRCPY(newword, word);
|
|
2652 if (ae->ae_chop != NULL)
|
|
2653 {
|
|
2654 /* Remove chop string. */
|
|
2655 p = newword + STRLEN(newword);
|
310
|
2656 #ifdef FEAT_MBYTE
|
|
2657 if (has_mbyte)
|
|
2658 i = mb_charlen(ae->ae_chop);
|
|
2659 else
|
|
2660 #endif
|
|
2661 i = STRLEN(ae->ae_chop);
|
|
2662 for ( ; i > 0; --i)
|
300
|
2663 mb_ptr_back(newword, p);
|
|
2664 *p = NUL;
|
|
2665 }
|
|
2666 if (ae->ae_add != NULL)
|
|
2667 STRCAT(newword, ae->ae_add);
|
240
|
2668 }
|
|
2669
|
300
|
2670 /* Store the modified word. */
|
316
|
2671 if (store_word(newword, spin,
|
|
2672 flags, spin->si_region) == FAIL)
|
300
|
2673 retval = FAIL;
|
236
|
2674
|
300
|
2675 /* When added a suffix and combining is allowed also
|
|
2676 * try adding prefixes additionally. */
|
|
2677 if (xht != NULL && ah->ah_combine)
|
|
2678 if (store_aff_word(newword, spin, afflist,
|
307
|
2679 xht, NULL, TRUE, flags) == FAIL)
|
300
|
2680 retval = FAIL;
|
236
|
2681 }
|
|
2682 }
|
|
2683 }
|
|
2684 }
|
|
2685 }
|
|
2686
|
|
2687 return retval;
|
|
2688 }
|
|
2689
|
|
2690 /*
|
300
|
2691 * Read a file with a list of words.
|
236
|
2692 */
|
|
2693 static int
|
300
|
2694 spell_read_wordfile(fname, spin)
|
|
2695 char_u *fname;
|
|
2696 spellinfo_T *spin;
|
236
|
2697 {
|
300
|
2698 FILE *fd;
|
|
2699 long lnum = 0;
|
|
2700 char_u rline[MAXLINELEN];
|
|
2701 char_u *line;
|
|
2702 char_u *pc = NULL;
|
|
2703 int l;
|
|
2704 int retval = OK;
|
|
2705 int did_word = FALSE;
|
|
2706 int non_ascii = 0;
|
307
|
2707 int flags;
|
316
|
2708 int regionmask;
|
236
|
2709
|
300
|
2710 /*
|
|
2711 * Open the file.
|
|
2712 */
|
310
|
2713 fd = mch_fopen((char *)fname, "r");
|
300
|
2714 if (fd == NULL)
|
236
|
2715 {
|
300
|
2716 EMSG2(_(e_notopen), fname);
|
|
2717 return FAIL;
|
236
|
2718 }
|
|
2719
|
310
|
2720 if (spin->si_verbose || p_verbose > 2)
|
|
2721 {
|
|
2722 if (!spin->si_verbose)
|
|
2723 verbose_enter();
|
|
2724 smsg((char_u *)_("Reading word file %s..."), fname);
|
|
2725 out_flush();
|
|
2726 if (!spin->si_verbose)
|
|
2727 verbose_leave();
|
|
2728 }
|
300
|
2729
|
|
2730 /*
|
|
2731 * Read all the lines in the file one by one.
|
|
2732 */
|
|
2733 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
|
|
2734 {
|
|
2735 line_breakcheck();
|
|
2736 ++lnum;
|
|
2737
|
|
2738 /* Skip comment lines. */
|
|
2739 if (*rline == '#')
|
|
2740 continue;
|
|
2741
|
|
2742 /* Remove CR, LF and white space from the end. */
|
|
2743 l = STRLEN(rline);
|
|
2744 while (l > 0 && rline[l - 1] <= ' ')
|
|
2745 --l;
|
|
2746 if (l == 0)
|
|
2747 continue; /* empty or blank line */
|
|
2748 rline[l] = NUL;
|
|
2749
|
|
2750 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
|
|
2751 vim_free(pc);
|
310
|
2752 #ifdef FEAT_MBYTE
|
300
|
2753 if (spin->si_conv.vc_type != CONV_NONE)
|
|
2754 {
|
|
2755 pc = string_convert(&spin->si_conv, rline, NULL);
|
|
2756 if (pc == NULL)
|
|
2757 {
|
|
2758 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
|
|
2759 fname, lnum, rline);
|
|
2760 continue;
|
|
2761 }
|
|
2762 line = pc;
|
|
2763 }
|
|
2764 else
|
310
|
2765 #endif
|
300
|
2766 {
|
|
2767 pc = NULL;
|
|
2768 line = rline;
|
|
2769 }
|
|
2770
|
307
|
2771 flags = 0;
|
316
|
2772 regionmask = spin->si_region;
|
307
|
2773
|
|
2774 if (*line == '/')
|
300
|
2775 {
|
307
|
2776 ++line;
|
316
|
2777
|
307
|
2778 if (STRNCMP(line, "encoding=", 9) == 0)
|
300
|
2779 {
|
|
2780 if (spin->si_conv.vc_type != CONV_NONE)
|
316
|
2781 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
|
|
2782 fname, lnum, line - 1);
|
300
|
2783 else if (did_word)
|
316
|
2784 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
|
|
2785 fname, lnum, line - 1);
|
300
|
2786 else
|
|
2787 {
|
310
|
2788 #ifdef FEAT_MBYTE
|
|
2789 char_u *enc;
|
|
2790
|
300
|
2791 /* Setup for conversion to 'encoding'. */
|
316
|
2792 line += 10;
|
|
2793 enc = enc_canonize(line);
|
300
|
2794 if (enc != NULL && !spin->si_ascii
|
|
2795 && convert_setup(&spin->si_conv, enc,
|
|
2796 p_enc) == FAIL)
|
|
2797 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
|
316
|
2798 fname, line, p_enc);
|
300
|
2799 vim_free(enc);
|
310
|
2800 #else
|
|
2801 smsg((char_u *)_("Conversion in %s not supported"), fname);
|
|
2802 #endif
|
300
|
2803 }
|
307
|
2804 continue;
|
300
|
2805 }
|
307
|
2806
|
316
|
2807 if (STRNCMP(line, "regions=", 8) == 0)
|
|
2808 {
|
|
2809 if (spin->si_region_count > 1)
|
|
2810 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
|
|
2811 fname, lnum, line);
|
|
2812 else
|
|
2813 {
|
|
2814 line += 8;
|
|
2815 if (STRLEN(line) > 16)
|
|
2816 smsg((char_u *)_("Too many regions in %s line %d: %s"),
|
|
2817 fname, lnum, line);
|
|
2818 else
|
|
2819 {
|
|
2820 spin->si_region_count = STRLEN(line) / 2;
|
|
2821 STRCPY(spin->si_region_name, line);
|
|
2822 }
|
|
2823 }
|
|
2824 continue;
|
|
2825 }
|
|
2826
|
307
|
2827 if (*line == '=')
|
|
2828 {
|
|
2829 /* keep-case word */
|
|
2830 flags |= WF_KEEPCAP;
|
|
2831 ++line;
|
|
2832 }
|
|
2833
|
|
2834 if (*line == '!')
|
|
2835 {
|
|
2836 /* Bad, bad, wicked word. */
|
|
2837 flags |= WF_BANNED;
|
|
2838 ++line;
|
|
2839 }
|
|
2840 else if (*line == '?')
|
|
2841 {
|
|
2842 /* Rare word. */
|
|
2843 flags |= WF_RARE;
|
|
2844 ++line;
|
|
2845 }
|
|
2846
|
316
|
2847 if (VIM_ISDIGIT(*line))
|
|
2848 {
|
|
2849 /* region number(s) */
|
|
2850 regionmask = 0;
|
|
2851 while (VIM_ISDIGIT(*line))
|
|
2852 {
|
|
2853 l = *line - '0';
|
|
2854 if (l > spin->si_region_count)
|
|
2855 {
|
|
2856 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
|
|
2857 fname, lnum, line);
|
|
2858 break;
|
|
2859 }
|
|
2860 regionmask |= 1 << (l - 1);
|
|
2861 ++line;
|
|
2862 }
|
|
2863 flags |= WF_REGION;
|
|
2864 }
|
|
2865
|
307
|
2866 if (flags == 0)
|
|
2867 {
|
|
2868 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
|
300
|
2869 fname, lnum, line);
|
307
|
2870 continue;
|
|
2871 }
|
300
|
2872 }
|
|
2873
|
|
2874 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
|
|
2875 if (spin->si_ascii && has_non_ascii(line))
|
|
2876 {
|
|
2877 ++non_ascii;
|
|
2878 continue;
|
|
2879 }
|
|
2880
|
|
2881 /* Normal word: store it. */
|
316
|
2882 if (store_word(line, spin, flags, regionmask) == FAIL)
|
300
|
2883 {
|
|
2884 retval = FAIL;
|
|
2885 break;
|
|
2886 }
|
|
2887 did_word = TRUE;
|
|
2888 }
|
|
2889
|
|
2890 vim_free(pc);
|
|
2891 fclose(fd);
|
|
2892
|
310
|
2893 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
|
|
2894 {
|
|
2895 if (p_verbose > 2)
|
|
2896 verbose_enter();
|
300
|
2897 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
|
|
2898 non_ascii);
|
310
|
2899 if (p_verbose > 2)
|
|
2900 verbose_leave();
|
|
2901 }
|
300
|
2902 return retval;
|
236
|
2903 }
|
|
2904
|
|
2905 /*
|
300
|
2906 * Get part of an sblock_T, "len" bytes long.
|
|
2907 * This avoids calling free() for every little struct we use.
|
|
2908 * The memory is cleared to all zeros.
|
|
2909 * Returns NULL when out of memory.
|
|
2910 */
|
|
2911 static void *
|
|
2912 getroom(blp, len)
|
|
2913 sblock_T **blp;
|
|
2914 size_t len; /* length needed */
|
|
2915 {
|
|
2916 char_u *p;
|
|
2917 sblock_T *bl = *blp;
|
|
2918
|
|
2919 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
|
|
2920 {
|
|
2921 /* Allocate a block of memory. This is not freed until much later. */
|
|
2922 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
|
|
2923 if (bl == NULL)
|
|
2924 return NULL;
|
|
2925 bl->sb_next = *blp;
|
|
2926 *blp = bl;
|
|
2927 bl->sb_used = 0;
|
|
2928 }
|
|
2929
|
|
2930 p = bl->sb_data + bl->sb_used;
|
|
2931 bl->sb_used += len;
|
|
2932
|
|
2933 return p;
|
|
2934 }
|
|
2935
|
|
2936 /*
|
|
2937 * Make a copy of a string into memory allocated with getroom().
|
|
2938 */
|
|
2939 static char_u *
|
|
2940 getroom_save(blp, s)
|
|
2941 sblock_T **blp;
|
|
2942 char_u *s;
|
|
2943 {
|
|
2944 char_u *sc;
|
|
2945
|
|
2946 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
|
|
2947 if (sc != NULL)
|
|
2948 STRCPY(sc, s);
|
|
2949 return sc;
|
|
2950 }
|
|
2951
|
|
2952
|
|
2953 /*
|
|
2954 * Free the list of allocated sblock_T.
|
236
|
2955 */
|
|
2956 static void
|
300
|
2957 free_blocks(bl)
|
|
2958 sblock_T *bl;
|
236
|
2959 {
|
300
|
2960 sblock_T *next;
|
236
|
2961
|
300
|
2962 while (bl != NULL)
|
236
|
2963 {
|
300
|
2964 next = bl->sb_next;
|
|
2965 vim_free(bl);
|
|
2966 bl = next;
|
236
|
2967 }
|
|
2968 }
|
|
2969
|
|
2970 /*
|
300
|
2971 * Allocate the root of a word tree.
|
236
|
2972 */
|
300
|
2973 static wordnode_T *
|
|
2974 wordtree_alloc(blp)
|
|
2975 sblock_T **blp;
|
236
|
2976 {
|
300
|
2977 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
|
236
|
2978 }
|
|
2979
|
|
2980 /*
|
300
|
2981 * Store a word in the tree(s).
|
307
|
2982 * Always store it in the case-folded tree. A keep-case word can also be used
|
|
2983 * with all caps.
|
300
|
2984 * For a keep-case word also store it in the keep-case tree.
|
236
|
2985 */
|
|
2986 static int
|
316
|
2987 store_word(word, spin, flags, region)
|
300
|
2988 char_u *word;
|
|
2989 spellinfo_T *spin;
|
307
|
2990 int flags; /* extra flags, WF_BANNED */
|
316
|
2991 int region; /* supported region(s) */
|
236
|
2992 {
|
300
|
2993 int len = STRLEN(word);
|
|
2994 int ct = captype(word, word + len);
|
|
2995 char_u foldword[MAXWLEN];
|
|
2996 int res;
|
236
|
2997
|
323
|
2998 (void)spell_casefold(word, len, foldword, MAXWLEN);
|
|
2999 res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
|
|
3000 region, &spin->si_blocks);
|
307
|
3001
|
|
3002 if (res == OK && (ct == WF_KEEPCAP || flags & WF_KEEPCAP))
|
|
3003 res = tree_add_word(word, spin->si_keeproot, flags,
|
316
|
3004 region, &spin->si_blocks);
|
300
|
3005 return res;
|
236
|
3006 }
|
|
3007
|
|
3008 /*
|
300
|
3009 * Add word "word" to a word tree at "root".
|
255
|
3010 * Returns FAIL when out of memory.
|
236
|
3011 */
|
255
|
3012 static int
|
300
|
3013 tree_add_word(word, root, flags, region, blp)
|
|
3014 char_u *word;
|
|
3015 wordnode_T *root;
|
|
3016 int flags;
|
|
3017 int region;
|
|
3018 sblock_T **blp;
|
236
|
3019 {
|
300
|
3020 wordnode_T *node = root;
|
|
3021 wordnode_T *np;
|
|
3022 wordnode_T **prev = NULL;
|
|
3023 int i;
|
255
|
3024
|
300
|
3025 /* Add each byte of the word to the tree, including the NUL at the end. */
|
|
3026 for (i = 0; ; ++i)
|
255
|
3027 {
|
300
|
3028 /* Look for the sibling that has the same character. They are sorted
|
|
3029 * on byte value, thus stop searching when a sibling is found with a
|
|
3030 * higher byte value. For zero bytes (end of word) check that the
|
|
3031 * flags are equal, there is a separate zero byte for each flag value.
|
|
3032 */
|
|
3033 while (node != NULL && (node->wn_byte < word[i]
|
307
|
3034 || (node->wn_byte == 0 && node->wn_flags != (flags & 0xff))))
|
236
|
3035 {
|
300
|
3036 prev = &node->wn_sibling;
|
|
3037 node = *prev;
|
236
|
3038 }
|
300
|
3039 if (node == NULL || node->wn_byte != word[i])
|
255
|
3040 {
|
300
|
3041 /* Allocate a new node. */
|
|
3042 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
|
|
3043 if (np == NULL)
|
|
3044 return FAIL;
|
|
3045 np->wn_byte = word[i];
|
|
3046 *prev = np;
|
|
3047 np->wn_sibling = node;
|
|
3048 node = np;
|
255
|
3049 }
|
300
|
3050
|
|
3051 if (word[i] == NUL)
|
|
3052 {
|
|
3053 node->wn_flags = flags;
|
|
3054 node->wn_region |= region;
|
|
3055 break;
|
|
3056 }
|
|
3057 prev = &node->wn_child;
|
|
3058 node = *prev;
|
255
|
3059 }
|
|
3060
|
|
3061 return OK;
|
236
|
3062 }
|
|
3063
|
|
3064 /*
|
300
|
3065 * Compress a tree: find tails that are identical and can be shared.
|
|
3066 */
|
|
3067 static void
|
310
|
3068 wordtree_compress(root, spin)
|
300
|
3069 wordnode_T *root;
|
310
|
3070 spellinfo_T *spin;
|
300
|
3071 {
|
|
3072 hashtab_T ht;
|
|
3073 int n;
|
|
3074 int tot = 0;
|
|
3075
|
|
3076 if (root != NULL)
|
|
3077 {
|
|
3078 hash_init(&ht);
|
|
3079 n = node_compress(root, &ht, &tot);
|
310
|
3080 if (spin->si_verbose || p_verbose > 2)
|
|
3081 {
|
|
3082 if (!spin->si_verbose)
|
|
3083 verbose_enter();
|
|
3084 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
|
300
|
3085 n, tot, (tot - n) * 100 / tot);
|
310
|
3086 if (p_verbose > 2)
|
|
3087 verbose_leave();
|
|
3088 }
|
300
|
3089 hash_clear(&ht);
|
|
3090 }
|
|
3091 }
|
|
3092
|
|
3093 /*
|
|
3094 * Compress a node, its siblings and its children, depth first.
|
|
3095 * Returns the number of compressed nodes.
|
236
|
3096 */
|
255
|
3097 static int
|
300
|
3098 node_compress(node, ht, tot)
|
|
3099 wordnode_T *node;
|
|
3100 hashtab_T *ht;
|
|
3101 int *tot; /* total count of nodes before compressing,
|
|
3102 incremented while going through the tree */
|
236
|
3103 {
|
300
|
3104 wordnode_T *np;
|
|
3105 wordnode_T *tp;
|
|
3106 wordnode_T *child;
|
|
3107 hash_T hash;
|
236
|
3108 hashitem_T *hi;
|
300
|
3109 int len = 0;
|
|
3110 unsigned nr, n;
|
|
3111 int compressed = 0;
|
236
|
3112
|
300
|
3113 /*
|
|
3114 * Go through the list of siblings. Compress each child and then try
|
|
3115 * finding an identical child to replace it.
|
|
3116 * Note that with "child" we mean not just the node that is pointed to,
|
|
3117 * but the whole list of siblings, of which the node is the first.
|
|
3118 */
|
|
3119 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
3120 {
|
300
|
3121 ++len;
|
|
3122 if ((child = np->wn_child) != NULL)
|
|
3123 {
|
|
3124 /* Compress the child. This fills wn_hashkey. */
|
|
3125 compressed += node_compress(child, ht, tot);
|
|
3126
|
|
3127 /* Try to find an identical child. */
|
|
3128 hash = hash_hash(child->wn_hashkey);
|
|
3129 hi = hash_lookup(ht, child->wn_hashkey, hash);
|
|
3130 tp = NULL;
|
|
3131 if (!HASHITEM_EMPTY(hi))
|
|
3132 {
|
|
3133 /* There are children with an identical hash value. Now check
|
|
3134 * if there is one that is really identical. */
|
|
3135 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next)
|
|
3136 if (node_equal(child, tp))
|
|
3137 {
|
|
3138 /* Found one! Now use that child in place of the
|
|
3139 * current one. This means the current child is
|
|
3140 * dropped from the tree. */
|
|
3141 np->wn_child = tp;
|
|
3142 ++compressed;
|
|
3143 break;
|
|
3144 }
|
|
3145 if (tp == NULL)
|
|
3146 {
|
|
3147 /* No other child with this hash value equals the child of
|
|
3148 * the node, add it to the linked list after the first
|
|
3149 * item. */
|
|
3150 tp = HI2WN(hi);
|
|
3151 child->wn_next = tp->wn_next;
|
|
3152 tp->wn_next = child;
|
|
3153 }
|
|
3154 }
|
|
3155 else
|
|
3156 /* No other child has this hash value, add it to the
|
|
3157 * hashtable. */
|
|
3158 hash_add_item(ht, hi, child->wn_hashkey, hash);
|
|
3159 }
|
236
|
3160 }
|
300
|
3161 *tot += len;
|
|
3162
|
|
3163 /*
|
|
3164 * Make a hash key for the node and its siblings, so that we can quickly
|
|
3165 * find a lookalike node. This must be done after compressing the sibling
|
|
3166 * list, otherwise the hash key would become invalid by the compression.
|
|
3167 */
|
|
3168 node->wn_hashkey[0] = len;
|
|
3169 nr = 0;
|
|
3170 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
3171 {
|
300
|
3172 if (np->wn_byte == NUL)
|
|
3173 /* end node: only use wn_flags and wn_region */
|
|
3174 n = np->wn_flags + (np->wn_region << 8);
|
|
3175 else
|
|
3176 /* byte node: use the byte value and the child pointer */
|
|
3177 n = np->wn_byte + ((long_u)np->wn_child << 8);
|
|
3178 nr = nr * 101 + n;
|
236
|
3179 }
|
300
|
3180
|
|
3181 /* Avoid NUL bytes, it terminates the hash key. */
|
|
3182 n = nr & 0xff;
|
|
3183 node->wn_hashkey[1] = n == 0 ? 1 : n;
|
|
3184 n = (nr >> 8) & 0xff;
|
|
3185 node->wn_hashkey[2] = n == 0 ? 1 : n;
|
|
3186 n = (nr >> 16) & 0xff;
|
|
3187 node->wn_hashkey[3] = n == 0 ? 1 : n;
|
|
3188 n = (nr >> 24) & 0xff;
|
|
3189 node->wn_hashkey[4] = n == 0 ? 1 : n;
|
|
3190 node->wn_hashkey[5] = NUL;
|
|
3191
|
|
3192 return compressed;
|
|
3193 }
|
|
3194
|
|
3195 /*
|
|
3196 * Return TRUE when two nodes have identical siblings and children.
|
|
3197 */
|
|
3198 static int
|
|
3199 node_equal(n1, n2)
|
|
3200 wordnode_T *n1;
|
|
3201 wordnode_T *n2;
|
|
3202 {
|
|
3203 wordnode_T *p1;
|
|
3204 wordnode_T *p2;
|
|
3205
|
|
3206 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
|
|
3207 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
|
|
3208 if (p1->wn_byte != p2->wn_byte
|
|
3209 || (p1->wn_byte == NUL
|
|
3210 ? (p1->wn_flags != p2->wn_flags
|
|
3211 || p1->wn_region != p2->wn_region)
|
|
3212 : (p1->wn_child != p2->wn_child)))
|
|
3213 break;
|
|
3214
|
|
3215 return p1 == NULL && p2 == NULL;
|
236
|
3216 }
|
|
3217
|
|
3218 /*
|
|
3219 * Write a number to file "fd", MSB first, in "len" bytes.
|
|
3220 */
|
255
|
3221 void
|
236
|
3222 put_bytes(fd, nr, len)
|
|
3223 FILE *fd;
|
|
3224 long_u nr;
|
|
3225 int len;
|
|
3226 {
|
|
3227 int i;
|
|
3228
|
|
3229 for (i = len - 1; i >= 0; --i)
|
|
3230 putc((int)(nr >> (i * 8)), fd);
|
|
3231 }
|
|
3232
|
323
|
3233 static int
|
|
3234 #ifdef __BORLANDC__
|
|
3235 _RTLENTRYF
|
|
3236 #endif
|
|
3237 rep_compare __ARGS((const void *s1, const void *s2));
|
|
3238
|
|
3239 /*
|
|
3240 * Function given to qsort() to sort the REP items on "from" string.
|
|
3241 */
|
|
3242 static int
|
|
3243 #ifdef __BORLANDC__
|
|
3244 _RTLENTRYF
|
|
3245 #endif
|
|
3246 rep_compare(s1, s2)
|
|
3247 const void *s1;
|
|
3248 const void *s2;
|
|
3249 {
|
|
3250 fromto_T *p1 = (fromto_T *)s1;
|
|
3251 fromto_T *p2 = (fromto_T *)s2;
|
|
3252
|
|
3253 return STRCMP(p1->ft_from, p2->ft_from);
|
|
3254 }
|
|
3255
|
236
|
3256 /*
|
|
3257 * Write the Vim spell file "fname".
|
|
3258 */
|
|
3259 static void
|
316
|
3260 write_vim_spell(fname, spin)
|
236
|
3261 char_u *fname;
|
300
|
3262 spellinfo_T *spin;
|
236
|
3263 {
|
300
|
3264 FILE *fd;
|
|
3265 int regionmask;
|
236
|
3266 int round;
|
300
|
3267 wordnode_T *tree;
|
|
3268 int nodecount;
|
323
|
3269 int i;
|
|
3270 int l;
|
|
3271 garray_T *gap;
|
|
3272 fromto_T *ftp;
|
|
3273 char_u *p;
|
|
3274 int rr;
|
236
|
3275
|
310
|
3276 fd = mch_fopen((char *)fname, "w");
|
300
|
3277 if (fd == NULL)
|
236
|
3278 {
|
|
3279 EMSG2(_(e_notopen), fname);
|
|
3280 return;
|
|
3281 }
|
|
3282
|
255
|
3283 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
|
|
3284 * <charflagslen> <charflags> <fcharslen> <fchars> */
|
300
|
3285
|
|
3286 /* <fileID> */
|
|
3287 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
|
|
3288 EMSG(_(e_write));
|
236
|
3289
|
|
3290 /* write the region names if there is more than one */
|
316
|
3291 if (spin->si_region_count > 1)
|
236
|
3292 {
|
316
|
3293 putc(spin->si_region_count, fd); /* <regioncnt> <regionname> ... */
|
|
3294 fwrite(spin->si_region_name, (size_t)(spin->si_region_count * 2),
|
|
3295 (size_t)1, fd);
|
|
3296 regionmask = (1 << spin->si_region_count) - 1;
|
236
|
3297 }
|
|
3298 else
|
|
3299 {
|
300
|
3300 putc(0, fd);
|
|
3301 regionmask = 0;
|
236
|
3302 }
|
|
3303
|
323
|
3304 /*
|
|
3305 * Write the table with character flags and table for case folding.
|
260
|
3306 * <charflagslen> <charflags> <fcharlen> <fchars>
|
|
3307 * Skip this for ASCII, the table may conflict with the one used for
|
323
|
3308 * 'encoding'.
|
|
3309 * Also skip this for an .add.spl file, the main spell file must contain
|
|
3310 * the table (avoids that it conflicts). File is shorter too.
|
|
3311 */
|
|
3312 if (spin->si_ascii || spin->si_add)
|
260
|
3313 {
|
300
|
3314 putc(0, fd);
|
|
3315 putc(0, fd);
|
|
3316 putc(0, fd);
|
260
|
3317 }
|
|
3318 else
|
300
|
3319 write_spell_chartab(fd);
|
255
|
3320
|
323
|
3321 /* Sort the REP items. */
|
|
3322 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
|
|
3323 sizeof(fromto_T), rep_compare);
|
|
3324
|
|
3325 /* <SUGGEST> : <repcount> <rep> ...
|
|
3326 * <salflags> <salcount> <sal> ...
|
|
3327 * <maplen> <mapstr> */
|
|
3328 for (round = 1; round <= 2; ++round)
|
|
3329 {
|
|
3330 if (round == 1)
|
|
3331 gap = &spin->si_rep;
|
|
3332 else
|
|
3333 {
|
|
3334 gap = &spin->si_sal;
|
|
3335
|
|
3336 i = 0;
|
|
3337 if (spin->si_followup)
|
|
3338 i |= SAL_F0LLOWUP;
|
|
3339 if (spin->si_collapse)
|
|
3340 i |= SAL_COLLAPSE;
|
|
3341 if (spin->si_rem_accents)
|
|
3342 i |= SAL_REM_ACCENTS;
|
|
3343 putc(i, fd); /* <salflags> */
|
|
3344 }
|
|
3345
|
|
3346 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
|
|
3347 for (i = 0; i < gap->ga_len; ++i)
|
|
3348 {
|
|
3349 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
|
|
3350 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
|
|
3351 ftp = &((fromto_T *)gap->ga_data)[i];
|
|
3352 for (rr = 1; rr <= 2; ++rr)
|
|
3353 {
|
|
3354 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
|
|
3355 l = STRLEN(p);
|
|
3356 putc(l, fd);
|
|
3357 fwrite(p, l, (size_t)1, fd);
|
|
3358 }
|
|
3359 }
|
|
3360 }
|
|
3361
|
|
3362 put_bytes(fd, (long_u)spin->si_map.ga_len, 2); /* <maplen> */
|
|
3363 if (spin->si_map.ga_len > 0) /* <mapstr> */
|
|
3364 fwrite(spin->si_map.ga_data, (size_t)spin->si_map.ga_len,
|
|
3365 (size_t)1, fd);
|
302
|
3366
|
236
|
3367 /*
|
300
|
3368 * <LWORDTREE> <KWORDTREE>
|
236
|
3369 */
|
323
|
3370 spin->si_memtot = 0;
|
300
|
3371 for (round = 1; round <= 2; ++round)
|
236
|
3372 {
|
300
|
3373 tree = (round == 1) ? spin->si_foldroot : spin->si_keeproot;
|
236
|
3374
|
300
|
3375 /* Count the number of nodes. Needed to be able to allocate the
|
|
3376 * memory when reading the nodes. Also fills in the index for shared
|
|
3377 * nodes. */
|
|
3378 nodecount = put_tree(NULL, tree, 0, regionmask);
|
236
|
3379
|
300
|
3380 /* number of nodes in 4 bytes */
|
|
3381 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
|
302
|
3382 spin->si_memtot += nodecount + nodecount * sizeof(int);
|
236
|
3383
|
300
|
3384 /* Write the nodes. */
|
|
3385 (void)put_tree(fd, tree, 0, regionmask);
|
236
|
3386 }
|
|
3387
|
300
|
3388 fclose(fd);
|
236
|
3389 }
|
|
3390
|
|
3391 /*
|
300
|
3392 * Dump a word tree at node "node".
|
|
3393 *
|
|
3394 * This first writes the list of possible bytes (siblings). Then for each
|
|
3395 * byte recursively write the children.
|
|
3396 *
|
|
3397 * NOTE: The code here must match the code in read_tree(), since assumptions
|
|
3398 * are made about the indexes (so that we don't have to write them in the
|
|
3399 * file).
|
236
|
3400 *
|
300
|
3401 * Returns the number of nodes used.
|
236
|
3402 */
|
300
|
3403 static int
|
|
3404 put_tree(fd, node, index, regionmask)
|
|
3405 FILE *fd; /* NULL when only counting */
|
|
3406 wordnode_T *node;
|
|
3407 int index;
|
|
3408 int regionmask;
|
236
|
3409 {
|
300
|
3410 int newindex = index;
|
|
3411 int siblingcount = 0;
|
|
3412 wordnode_T *np;
|
236
|
3413 int flags;
|
300
|
3414
|
|
3415 /* If "node" is zero the tree is empty. */
|
|
3416 if (node == NULL)
|
|
3417 return 0;
|
|
3418
|
|
3419 /* Store the index where this node is written. */
|
|
3420 node->wn_index = index;
|
236
|
3421
|
300
|
3422 /* Count the number of siblings. */
|
|
3423 for (np = node; np != NULL; np = np->wn_sibling)
|
|
3424 ++siblingcount;
|
236
|
3425
|
300
|
3426 /* Write the sibling count. */
|
|
3427 if (fd != NULL)
|
|
3428 putc(siblingcount, fd); /* <siblingcount> */
|
236
|
3429
|
300
|
3430 /* Write each sibling byte and optionally extra info. */
|
|
3431 for (np = node; np != NULL; np = np->wn_sibling)
|
236
|
3432 {
|
300
|
3433 if (np->wn_byte == 0)
|
|
3434 {
|
|
3435 if (fd != NULL)
|
|
3436 {
|
|
3437 /* For a NUL byte (end of word) instead of the byte itself
|
|
3438 * we write the flag/region items. */
|
|
3439 flags = np->wn_flags;
|
|
3440 if (regionmask != 0 && np->wn_region != regionmask)
|
|
3441 flags |= WF_REGION;
|
|
3442 if (flags == 0)
|
|
3443 {
|
|
3444 /* word without flags or region */
|
|
3445 putc(BY_NOFLAGS, fd); /* <byte> */
|
|
3446 }
|
|
3447 else
|
|
3448 {
|
|
3449 putc(BY_FLAGS, fd); /* <byte> */
|
|
3450 putc(flags, fd); /* <flags> */
|
|
3451 if (flags & WF_REGION)
|
|
3452 putc(np->wn_region, fd); /* <regionmask> */
|
|
3453 }
|
|
3454 }
|
|
3455 }
|
|
3456 else
|
|
3457 {
|
|
3458 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node)
|
|
3459 {
|
|
3460 /* The child is written elsewhere, write the reference. */
|
|
3461 if (fd != NULL)
|
|
3462 {
|
|
3463 putc(BY_INDEX, fd); /* <byte> */
|
|
3464 /* <nodeidx> */
|
|
3465 put_bytes(fd, (long_u)np->wn_child->wn_index, 3);
|
|
3466 }
|
|
3467 }
|
|
3468 else if (np->wn_child->wn_wnode == NULL)
|
|
3469 /* We will write the child below and give it an index. */
|
|
3470 np->wn_child->wn_wnode = node;
|
236
|
3471
|
300
|
3472 if (fd != NULL)
|
|
3473 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
|
|
3474 {
|
|
3475 EMSG(_(e_write));
|
|
3476 return 0;
|
|
3477 }
|
|
3478 }
|
236
|
3479 }
|
|
3480
|
300
|
3481 /* Space used in the array when reading: one for each sibling and one for
|
|
3482 * the count. */
|
|
3483 newindex += siblingcount + 1;
|
249
|
3484
|
300
|
3485 /* Recursively dump the children of each sibling. */
|
|
3486 for (np = node; np != NULL; np = np->wn_sibling)
|
|
3487 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node)
|
|
3488 newindex = put_tree(fd, np->wn_child, newindex, regionmask);
|
249
|
3489
|
300
|
3490 return newindex;
|
236
|
3491 }
|
|
3492
|
|
3493
|
|
3494 /*
|
310
|
3495 * ":mkspell [-ascii] outfile infile ..."
|
|
3496 * ":mkspell [-ascii] addfile"
|
236
|
3497 */
|
|
3498 void
|
|
3499 ex_mkspell(eap)
|
|
3500 exarg_T *eap;
|
|
3501 {
|
|
3502 int fcount;
|
|
3503 char_u **fnames;
|
310
|
3504 char_u *arg = eap->arg;
|
|
3505 int ascii = FALSE;
|
|
3506
|
|
3507 if (STRNCMP(arg, "-ascii", 6) == 0)
|
|
3508 {
|
|
3509 ascii = TRUE;
|
|
3510 arg = skipwhite(arg + 6);
|
|
3511 }
|
|
3512
|
|
3513 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
|
|
3514 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
|
|
3515 {
|
323
|
3516 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
|
310
|
3517 FreeWild(fcount, fnames);
|
|
3518 }
|
|
3519 }
|
|
3520
|
|
3521 /*
|
|
3522 * Create a Vim spell file from one or more word lists.
|
|
3523 * "fnames[0]" is the output file name.
|
|
3524 * "fnames[fcount - 1]" is the last input file name.
|
|
3525 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
|
|
3526 * and ".spl" is appended to make the output file name.
|
|
3527 */
|
|
3528 static void
|
323
|
3529 mkspell(fcount, fnames, ascii, overwrite, added_word)
|
310
|
3530 int fcount;
|
|
3531 char_u **fnames;
|
|
3532 int ascii; /* -ascii argument given */
|
|
3533 int overwrite; /* overwrite existing output file */
|
323
|
3534 int added_word; /* invoked through "zg" */
|
310
|
3535 {
|
236
|
3536 char_u fname[MAXPATHL];
|
|
3537 char_u wfname[MAXPATHL];
|
310
|
3538 char_u **innames;
|
|
3539 int incount;
|
236
|
3540 afffile_T *(afile[8]);
|
|
3541 int i;
|
|
3542 int len;
|
|
3543 struct stat st;
|
255
|
3544 int error = FALSE;
|
300
|
3545 spellinfo_T spin;
|
|
3546
|
|
3547 vim_memset(&spin, 0, sizeof(spin));
|
323
|
3548 spin.si_verbose = !added_word;
|
310
|
3549 spin.si_ascii = ascii;
|
323
|
3550 spin.si_followup = TRUE;
|
|
3551 spin.si_rem_accents = TRUE;
|
|
3552 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
|
|
3553 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
|
|
3554 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
|
310
|
3555
|
|
3556 /* default: fnames[0] is output file, following are input files */
|
|
3557 innames = &fnames[1];
|
|
3558 incount = fcount - 1;
|
|
3559
|
|
3560 if (fcount >= 1)
|
240
|
3561 {
|
310
|
3562 len = STRLEN(fnames[0]);
|
|
3563 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
|
|
3564 {
|
|
3565 /* For ":mkspell path/en.latin1.add" output file is
|
|
3566 * "path/en.latin1.add.spl". */
|
|
3567 innames = &fnames[0];
|
|
3568 incount = 1;
|
|
3569 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
|
|
3570 }
|
|
3571 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
|
|
3572 {
|
|
3573 /* Name ends in ".spl", use as the file name. */
|
323
|
3574 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
|
310
|
3575 }
|
|
3576 else
|
|
3577 /* Name should be language, make the file name from it. */
|
|
3578 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
|
|
3579 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
|
|
3580
|
|
3581 /* Check for .ascii.spl. */
|
|
3582 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
|
|
3583 spin.si_ascii = TRUE;
|
|
3584
|
|
3585 /* Check for .add.spl. */
|
|
3586 if (strstr((char *)gettail(wfname), ".add.") != NULL)
|
|
3587 spin.si_add = TRUE;
|
240
|
3588 }
|
|
3589
|
310
|
3590 if (incount <= 0)
|
236
|
3591 EMSG(_(e_invarg)); /* need at least output and input names */
|
310
|
3592 else if (incount > 8)
|
236
|
3593 EMSG(_("E754: Only up to 8 regions supported"));
|
|
3594 else
|
|
3595 {
|
|
3596 /* Check for overwriting before doing things that may take a lot of
|
|
3597 * time. */
|
310
|
3598 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
|
236
|
3599 {
|
|
3600 EMSG(_(e_exists));
|
310
|
3601 return;
|
236
|
3602 }
|
310
|
3603 if (mch_isdir(wfname))
|
236
|
3604 {
|
310
|
3605 EMSG2(_(e_isadir2), wfname);
|
|
3606 return;
|
236
|
3607 }
|
|
3608
|
|
3609 /*
|
|
3610 * Init the aff and dic pointers.
|
|
3611 * Get the region names if there are more than 2 arguments.
|
|
3612 */
|
310
|
3613 for (i = 0; i < incount; ++i)
|
236
|
3614 {
|
310
|
3615 afile[i] = NULL;
|
300
|
3616
|
316
|
3617 if (incount > 1)
|
236
|
3618 {
|
310
|
3619 len = STRLEN(innames[i]);
|
|
3620 if (STRLEN(gettail(innames[i])) < 5
|
|
3621 || innames[i][len - 3] != '_')
|
236
|
3622 {
|
310
|
3623 EMSG2(_("E755: Invalid region in %s"), innames[i]);
|
|
3624 return;
|
236
|
3625 }
|
316
|
3626 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
|
|
3627 spin.si_region_name[i * 2 + 1] =
|
|
3628 TOLOWER_ASC(innames[i][len - 1]);
|
236
|
3629 }
|
|
3630 }
|
316
|
3631 spin.si_region_count = incount;
|
236
|
3632
|
310
|
3633 if (!spin.si_add)
|
|
3634 /* Clear the char type tables, don't want to use any of the
|
|
3635 * currently used spell properties. */
|
|
3636 init_spell_chartab();
|
255
|
3637
|
300
|
3638 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
|
|
3639 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
|
|
3640 if (spin.si_foldroot == NULL || spin.si_keeproot == NULL)
|
|
3641 {
|
|
3642 error = TRUE;
|
310
|
3643 return;
|
300
|
3644 }
|
|
3645
|
236
|
3646 /*
|
|
3647 * Read all the .aff and .dic files.
|
|
3648 * Text is converted to 'encoding'.
|
300
|
3649 * Words are stored in the case-folded and keep-case trees.
|
236
|
3650 */
|
310
|
3651 for (i = 0; i < incount && !error; ++i)
|
236
|
3652 {
|
300
|
3653 spin.si_conv.vc_type = CONV_NONE;
|
310
|
3654 spin.si_region = 1 << i;
|
|
3655
|
|
3656 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
|
300
|
3657 if (mch_stat((char *)fname, &st) >= 0)
|
|
3658 {
|
|
3659 /* Read the .aff file. Will init "spin->si_conv" based on the
|
|
3660 * "SET" line. */
|
310
|
3661 afile[i] = spell_read_aff(fname, &spin);
|
|
3662 if (afile[i] == NULL)
|
300
|
3663 error = TRUE;
|
|
3664 else
|
|
3665 {
|
|
3666 /* Read the .dic file and store the words in the trees. */
|
|
3667 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
|
310
|
3668 innames[i]);
|
|
3669 if (spell_read_dic(fname, &spin, afile[i]) == FAIL)
|
300
|
3670 error = TRUE;
|
|
3671 }
|
|
3672 }
|
|
3673 else
|
|
3674 {
|
|
3675 /* No .aff file, try reading the file as a word list. Store
|
|
3676 * the words in the trees. */
|
310
|
3677 if (spell_read_wordfile(innames[i], &spin) == FAIL)
|
300
|
3678 error = TRUE;
|
|
3679 }
|
236
|
3680
|
310
|
3681 #ifdef FEAT_MBYTE
|
236
|
3682 /* Free any conversion stuff. */
|
300
|
3683 convert_setup(&spin.si_conv, NULL, NULL);
|
310
|
3684 #endif
|
236
|
3685 }
|
|
3686
|
300
|
3687 if (!error)
|
236
|
3688 {
|
|
3689 /*
|
300
|
3690 * Remove the dummy NUL from the start of the tree root.
|
236
|
3691 */
|
300
|
3692 spin.si_foldroot = spin.si_foldroot->wn_sibling;
|
|
3693 spin.si_keeproot = spin.si_keeproot->wn_sibling;
|
236
|
3694
|
|
3695 /*
|
300
|
3696 * Combine tails in the tree.
|
236
|
3697 */
|
323
|
3698 if (!added_word || p_verbose > 2)
|
310
|
3699 {
|
323
|
3700 if (added_word)
|
310
|
3701 verbose_enter();
|
|
3702 MSG(_("Compressing word tree..."));
|
|
3703 out_flush();
|
323
|
3704 if (added_word)
|
310
|
3705 verbose_leave();
|
|
3706 }
|
|
3707 wordtree_compress(spin.si_foldroot, &spin);
|
|
3708 wordtree_compress(spin.si_keeproot, &spin);
|
236
|
3709 }
|
|
3710
|
300
|
3711 if (!error)
|
|
3712 {
|
|
3713 /*
|
|
3714 * Write the info in the spell file.
|
|
3715 */
|
323
|
3716 if (!added_word || p_verbose > 2)
|
310
|
3717 {
|
323
|
3718 if (added_word)
|
310
|
3719 verbose_enter();
|
|
3720 smsg((char_u *)_("Writing spell file %s..."), wfname);
|
|
3721 out_flush();
|
323
|
3722 if (added_word)
|
310
|
3723 verbose_leave();
|
|
3724 }
|
|
3725
|
316
|
3726 write_vim_spell(wfname, &spin);
|
310
|
3727
|
323
|
3728 if (!added_word || p_verbose > 2)
|
310
|
3729 {
|
323
|
3730 if (added_word)
|
310
|
3731 verbose_enter();
|
|
3732 MSG(_("Done!"));
|
|
3733 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
|
302
|
3734 spin.si_memtot);
|
310
|
3735 out_flush();
|
323
|
3736 if (added_word)
|
310
|
3737 verbose_leave();
|
|
3738 }
|
|
3739
|
|
3740 /* If the file is loaded need to reload it. */
|
323
|
3741 spell_reload_one(wfname, added_word);
|
300
|
3742 }
|
|
3743
|
|
3744 /* Free the allocated memory. */
|
|
3745 free_blocks(spin.si_blocks);
|
323
|
3746 ga_clear(&spin.si_rep);
|
|
3747 ga_clear(&spin.si_sal);
|
|
3748 ga_clear(&spin.si_map);
|
300
|
3749
|
|
3750 /* Free the .aff file structures. */
|
310
|
3751 for (i = 0; i < incount; ++i)
|
|
3752 if (afile[i] != NULL)
|
|
3753 spell_free_aff(afile[i]);
|
236
|
3754 }
|
310
|
3755 }
|
|
3756
|
|
3757
|
|
3758 /*
|
|
3759 * ":spellgood {word}"
|
|
3760 * ":spellwrong {word}"
|
|
3761 */
|
|
3762 void
|
|
3763 ex_spell(eap)
|
|
3764 exarg_T *eap;
|
|
3765 {
|
|
3766 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong);
|
236
|
3767 }
|
|
3768
|
310
|
3769 /*
|
|
3770 * Add "word[len]" to 'spellfile' as a good or bad word.
|
|
3771 */
|
|
3772 void
|
|
3773 spell_add_word(word, len, bad)
|
|
3774 char_u *word;
|
|
3775 int len;
|
|
3776 int bad;
|
|
3777 {
|
|
3778 FILE *fd;
|
|
3779 buf_T *buf;
|
|
3780
|
|
3781 if (*curbuf->b_p_spf == NUL)
|
|
3782 init_spellfile();
|
|
3783 if (*curbuf->b_p_spf == NUL)
|
323
|
3784 EMSG(_("E764: 'spellfile' is not set"));
|
310
|
3785 else
|
|
3786 {
|
|
3787 /* Check that the user isn't editing the .add file somewhere. */
|
|
3788 buf = buflist_findname_exp(curbuf->b_p_spf);
|
|
3789 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
|
|
3790 buf = NULL;
|
|
3791 if (buf != NULL && bufIsChanged(buf))
|
|
3792 EMSG(_(e_bufloaded));
|
|
3793 else
|
|
3794 {
|
|
3795 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
|
|
3796 if (fd == NULL)
|
|
3797 EMSG2(_(e_notopen), curbuf->b_p_spf);
|
|
3798 else
|
|
3799 {
|
|
3800 if (bad)
|
|
3801 fprintf(fd, "/!%.*s\n", len, word);
|
|
3802 else
|
|
3803 fprintf(fd, "%.*s\n", len, word);
|
|
3804 fclose(fd);
|
|
3805
|
|
3806 /* Update the .add.spl file. */
|
323
|
3807 mkspell(1, &curbuf->b_p_spf, FALSE, TRUE, TRUE);
|
310
|
3808
|
|
3809 /* If the .add file is edited somewhere, reload it. */
|
|
3810 if (buf != NULL)
|
|
3811 buf_reload(buf);
|
323
|
3812
|
|
3813 redraw_all_later(NOT_VALID);
|
310
|
3814 }
|
|
3815 }
|
|
3816 }
|
|
3817 }
|
|
3818
|
|
3819 /*
|
|
3820 * Initialize 'spellfile' for the current buffer.
|
|
3821 */
|
|
3822 static void
|
|
3823 init_spellfile()
|
|
3824 {
|
|
3825 char_u buf[MAXPATHL];
|
|
3826 int l;
|
|
3827 slang_T *sl;
|
|
3828 char_u *rtp;
|
|
3829
|
|
3830 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
|
|
3831 {
|
|
3832 /* Loop over all entries in 'runtimepath'. */
|
|
3833 rtp = p_rtp;
|
|
3834 while (*rtp != NUL)
|
|
3835 {
|
|
3836 /* Copy the path from 'runtimepath' to buf[]. */
|
|
3837 copy_option_part(&rtp, buf, MAXPATHL, ",");
|
|
3838 if (filewritable(buf) == 2)
|
|
3839 {
|
316
|
3840 /* Use the first language name from 'spelllang' and the
|
|
3841 * encoding used in the first loaded .spl file. */
|
310
|
3842 sl = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang;
|
|
3843 l = STRLEN(buf);
|
|
3844 vim_snprintf((char *)buf + l, MAXPATHL - l,
|
316
|
3845 "/spell/%.*s.%s.add",
|
|
3846 2, curbuf->b_p_spl,
|
310
|
3847 strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
|
|
3848 ? (char_u *)"ascii" : spell_enc());
|
|
3849 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
|
|
3850 break;
|
|
3851 }
|
|
3852 }
|
|
3853 }
|
|
3854 }
|
236
|
3855
|
300
|
3856
|
307
|
3857 /*
|
|
3858 * Init the chartab used for spelling for ASCII.
|
|
3859 * EBCDIC is not supported!
|
|
3860 */
|
|
3861 static void
|
|
3862 clear_spell_chartab(sp)
|
|
3863 spelltab_T *sp;
|
|
3864 {
|
324
|
3865 int i;
|
307
|
3866
|
|
3867 /* Init everything to FALSE. */
|
|
3868 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
|
|
3869 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
|
|
3870 for (i = 0; i < 256; ++i)
|
324
|
3871 {
|
307
|
3872 sp->st_fold[i] = i;
|
324
|
3873 sp->st_upper[i] = i;
|
|
3874 }
|
307
|
3875
|
|
3876 /* We include digits. A word shouldn't start with a digit, but handling
|
|
3877 * that is done separately. */
|
|
3878 for (i = '0'; i <= '9'; ++i)
|
|
3879 sp->st_isw[i] = TRUE;
|
|
3880 for (i = 'A'; i <= 'Z'; ++i)
|
|
3881 {
|
|
3882 sp->st_isw[i] = TRUE;
|
|
3883 sp->st_isu[i] = TRUE;
|
|
3884 sp->st_fold[i] = i + 0x20;
|
|
3885 }
|
|
3886 for (i = 'a'; i <= 'z'; ++i)
|
324
|
3887 {
|
307
|
3888 sp->st_isw[i] = TRUE;
|
324
|
3889 sp->st_upper[i] = i - 0x20;
|
|
3890 }
|
307
|
3891 }
|
|
3892
|
|
3893 /*
|
|
3894 * Init the chartab used for spelling. Only depends on 'encoding'.
|
|
3895 * Called once while starting up and when 'encoding' changes.
|
|
3896 * The default is to use isalpha(), but the spell file should define the word
|
|
3897 * characters to make it possible that 'encoding' differs from the current
|
|
3898 * locale.
|
|
3899 */
|
|
3900 void
|
|
3901 init_spell_chartab()
|
|
3902 {
|
|
3903 int i;
|
|
3904
|
|
3905 did_set_spelltab = FALSE;
|
|
3906 clear_spell_chartab(&spelltab);
|
|
3907
|
|
3908 #ifdef FEAT_MBYTE
|
|
3909 if (enc_dbcs)
|
|
3910 {
|
|
3911 /* DBCS: assume double-wide characters are word characters. */
|
|
3912 for (i = 128; i <= 255; ++i)
|
|
3913 if (MB_BYTE2LEN(i) == 2)
|
|
3914 spelltab.st_isw[i] = TRUE;
|
|
3915 }
|
324
|
3916 else if (enc_utf8)
|
|
3917 {
|
|
3918 for (i = 128; i < 256; ++i)
|
|
3919 {
|
|
3920 spelltab.st_isu[i] = utf_isupper(i);
|
|
3921 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
|
|
3922 spelltab.st_fold[i] = utf_fold(i);
|
|
3923 spelltab.st_upper[i] = utf_toupper(i);
|
|
3924 }
|
|
3925 }
|
307
|
3926 else
|
|
3927 #endif
|
|
3928 {
|
324
|
3929 /* Rough guess: use locale-dependent library functions. */
|
307
|
3930 for (i = 128; i < 256; ++i)
|
|
3931 {
|
|
3932 if (MB_ISUPPER(i))
|
|
3933 {
|
324
|
3934 spelltab.st_isw[i] = TRUE;
|
307
|
3935 spelltab.st_isu[i] = TRUE;
|
|
3936 spelltab.st_fold[i] = MB_TOLOWER(i);
|
|
3937 }
|
324
|
3938 else if (MB_ISLOWER(i))
|
|
3939 {
|
|
3940 spelltab.st_isw[i] = TRUE;
|
|
3941 spelltab.st_upper[i] = MB_TOUPPER(i);
|
|
3942 }
|
307
|
3943 }
|
|
3944 }
|
|
3945 }
|
|
3946
|
|
3947 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
|
|
3948 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
|
|
3949
|
|
3950 /*
|
|
3951 * Set the spell character tables from strings in the affix file.
|
|
3952 */
|
|
3953 static int
|
|
3954 set_spell_chartab(fol, low, upp)
|
|
3955 char_u *fol;
|
|
3956 char_u *low;
|
|
3957 char_u *upp;
|
|
3958 {
|
|
3959 /* We build the new tables here first, so that we can compare with the
|
|
3960 * previous one. */
|
|
3961 spelltab_T new_st;
|
|
3962 char_u *pf = fol, *pl = low, *pu = upp;
|
|
3963 int f, l, u;
|
|
3964
|
|
3965 clear_spell_chartab(&new_st);
|
|
3966
|
|
3967 while (*pf != NUL)
|
|
3968 {
|
|
3969 if (*pl == NUL || *pu == NUL)
|
|
3970 {
|
|
3971 EMSG(_(e_affform));
|
|
3972 return FAIL;
|
|
3973 }
|
|
3974 #ifdef FEAT_MBYTE
|
|
3975 f = mb_ptr2char_adv(&pf);
|
|
3976 l = mb_ptr2char_adv(&pl);
|
|
3977 u = mb_ptr2char_adv(&pu);
|
|
3978 #else
|
|
3979 f = *pf++;
|
|
3980 l = *pl++;
|
|
3981 u = *pu++;
|
|
3982 #endif
|
|
3983 /* Every character that appears is a word character. */
|
|
3984 if (f < 256)
|
|
3985 new_st.st_isw[f] = TRUE;
|
|
3986 if (l < 256)
|
|
3987 new_st.st_isw[l] = TRUE;
|
|
3988 if (u < 256)
|
|
3989 new_st.st_isw[u] = TRUE;
|
|
3990
|
|
3991 /* if "LOW" and "FOL" are not the same the "LOW" char needs
|
|
3992 * case-folding */
|
|
3993 if (l < 256 && l != f)
|
|
3994 {
|
|
3995 if (f >= 256)
|
|
3996 {
|
|
3997 EMSG(_(e_affrange));
|
|
3998 return FAIL;
|
|
3999 }
|
|
4000 new_st.st_fold[l] = f;
|
|
4001 }
|
|
4002
|
|
4003 /* if "UPP" and "FOL" are not the same the "UPP" char needs
|
324
|
4004 * case-folding, it's upper case and the "UPP" is the upper case of
|
|
4005 * "FOL" . */
|
307
|
4006 if (u < 256 && u != f)
|
|
4007 {
|
|
4008 if (f >= 256)
|
|
4009 {
|
|
4010 EMSG(_(e_affrange));
|
|
4011 return FAIL;
|
|
4012 }
|
|
4013 new_st.st_fold[u] = f;
|
|
4014 new_st.st_isu[u] = TRUE;
|
324
|
4015 new_st.st_upper[f] = u;
|
307
|
4016 }
|
|
4017 }
|
|
4018
|
|
4019 if (*pl != NUL || *pu != NUL)
|
|
4020 {
|
|
4021 EMSG(_(e_affform));
|
|
4022 return FAIL;
|
|
4023 }
|
|
4024
|
|
4025 return set_spell_finish(&new_st);
|
|
4026 }
|
|
4027
|
|
4028 /*
|
|
4029 * Set the spell character tables from strings in the .spl file.
|
|
4030 */
|
|
4031 static int
|
|
4032 set_spell_charflags(flags, cnt, upp)
|
|
4033 char_u *flags;
|
|
4034 int cnt;
|
|
4035 char_u *upp;
|
|
4036 {
|
|
4037 /* We build the new tables here first, so that we can compare with the
|
|
4038 * previous one. */
|
|
4039 spelltab_T new_st;
|
|
4040 int i;
|
|
4041 char_u *p = upp;
|
324
|
4042 int c;
|
307
|
4043
|
|
4044 clear_spell_chartab(&new_st);
|
|
4045
|
|
4046 for (i = 0; i < cnt; ++i)
|
|
4047 {
|
324
|
4048 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
|
|
4049 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
|
307
|
4050
|
|
4051 if (*p == NUL)
|
|
4052 return FAIL;
|
|
4053 #ifdef FEAT_MBYTE
|
324
|
4054 c = mb_ptr2char_adv(&p);
|
307
|
4055 #else
|
324
|
4056 c = *p++;
|
307
|
4057 #endif
|
324
|
4058 new_st.st_fold[i + 128] = c;
|
|
4059 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
|
|
4060 new_st.st_upper[c] = i + 128;
|
307
|
4061 }
|
|
4062
|
|
4063 return set_spell_finish(&new_st);
|
|
4064 }
|
|
4065
|
|
4066 static int
|
|
4067 set_spell_finish(new_st)
|
|
4068 spelltab_T *new_st;
|
|
4069 {
|
|
4070 int i;
|
|
4071
|
|
4072 if (did_set_spelltab)
|
|
4073 {
|
|
4074 /* check that it's the same table */
|
|
4075 for (i = 0; i < 256; ++i)
|
|
4076 {
|
|
4077 if (spelltab.st_isw[i] != new_st->st_isw[i]
|
|
4078 || spelltab.st_isu[i] != new_st->st_isu[i]
|
324
|
4079 || spelltab.st_fold[i] != new_st->st_fold[i]
|
|
4080 || spelltab.st_upper[i] != new_st->st_upper[i])
|
307
|
4081 {
|
|
4082 EMSG(_("E763: Word characters differ between spell files"));
|
|
4083 return FAIL;
|
|
4084 }
|
|
4085 }
|
|
4086 }
|
|
4087 else
|
|
4088 {
|
|
4089 /* copy the new spelltab into the one being used */
|
|
4090 spelltab = *new_st;
|
|
4091 did_set_spelltab = TRUE;
|
|
4092 }
|
|
4093
|
|
4094 return OK;
|
|
4095 }
|
|
4096
|
|
4097 /*
|
|
4098 * Write the current tables into the .spl file.
|
|
4099 * This makes sure the same characters are recognized as word characters when
|
|
4100 * generating an when using a spell file.
|
|
4101 */
|
|
4102 static void
|
|
4103 write_spell_chartab(fd)
|
|
4104 FILE *fd;
|
|
4105 {
|
|
4106 char_u charbuf[256 * 4];
|
|
4107 int len = 0;
|
|
4108 int flags;
|
|
4109 int i;
|
|
4110
|
|
4111 fputc(128, fd); /* <charflagslen> */
|
|
4112 for (i = 128; i < 256; ++i)
|
|
4113 {
|
|
4114 flags = 0;
|
|
4115 if (spelltab.st_isw[i])
|
324
|
4116 flags |= CF_WORD;
|
307
|
4117 if (spelltab.st_isu[i])
|
324
|
4118 flags |= CF_UPPER;
|
307
|
4119 fputc(flags, fd); /* <charflags> */
|
|
4120
|
310
|
4121 #ifdef FEAT_MBYTE
|
|
4122 if (has_mbyte)
|
|
4123 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
|
|
4124 else
|
|
4125 #endif
|
|
4126 charbuf[len++] = spelltab.st_fold[i];
|
307
|
4127 }
|
|
4128
|
|
4129 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
|
|
4130 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
|
|
4131 }
|
|
4132
|
|
4133 /*
|
324
|
4134 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
|
|
4135 * Uses the character definitions from the .spl file.
|
307
|
4136 * When using a multi-byte 'encoding' the length may change!
|
|
4137 * Returns FAIL when something wrong.
|
|
4138 */
|
|
4139 static int
|
324
|
4140 spell_casefold(str, len, buf, buflen)
|
|
4141 char_u *str;
|
307
|
4142 int len;
|
|
4143 char_u *buf;
|
|
4144 int buflen;
|
|
4145 {
|
|
4146 int i;
|
|
4147
|
|
4148 if (len >= buflen)
|
|
4149 {
|
|
4150 buf[0] = NUL;
|
|
4151 return FAIL; /* result will not fit */
|
|
4152 }
|
|
4153
|
|
4154 #ifdef FEAT_MBYTE
|
|
4155 if (has_mbyte)
|
|
4156 {
|
324
|
4157 int outi = 0;
|
|
4158 char_u *p;
|
307
|
4159 int c;
|
|
4160
|
|
4161 /* Fold one character at a time. */
|
324
|
4162 for (p = str; p < str + len; )
|
307
|
4163 {
|
|
4164 if (outi + MB_MAXBYTES > buflen)
|
|
4165 {
|
|
4166 buf[outi] = NUL;
|
|
4167 return FAIL;
|
|
4168 }
|
324
|
4169 c = mb_ptr2char_adv(&p);
|
|
4170 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
|
307
|
4171 }
|
|
4172 buf[outi] = NUL;
|
|
4173 }
|
|
4174 else
|
|
4175 #endif
|
|
4176 {
|
|
4177 /* Be quick for non-multibyte encodings. */
|
|
4178 for (i = 0; i < len; ++i)
|
324
|
4179 buf[i] = spelltab.st_fold[str[i]];
|
307
|
4180 buf[i] = NUL;
|
|
4181 }
|
|
4182
|
|
4183 return OK;
|
|
4184 }
|
|
4185
|
323
|
4186 /*
|
|
4187 * "z?": Find badly spelled word under or after the cursor.
|
|
4188 * Give suggestions for the properly spelled word.
|
|
4189 * This is based on the mechanisms of Aspell, but completely reimplemented.
|
|
4190 */
|
|
4191 void
|
|
4192 spell_suggest()
|
|
4193 {
|
|
4194 char_u *line;
|
|
4195 pos_T prev_cursor = curwin->w_cursor;
|
|
4196 int attr;
|
|
4197 char_u wcopy[MAXWLEN + 2];
|
|
4198 char_u *p;
|
|
4199 int i;
|
|
4200 int c;
|
|
4201 suginfo_T sug;
|
|
4202 suggest_T *stp;
|
|
4203
|
|
4204 /*
|
|
4205 * Find the start of the badly spelled word.
|
|
4206 */
|
|
4207 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL)
|
|
4208 {
|
|
4209 beep_flush();
|
|
4210 return;
|
|
4211 }
|
|
4212
|
|
4213 /*
|
|
4214 * Set the info in "sug".
|
|
4215 */
|
|
4216 vim_memset(&sug, 0, sizeof(sug));
|
|
4217 ga_init2(&sug.su_ga, (int)sizeof(suggest_T), 10);
|
|
4218 hash_init(&sug.su_banned);
|
|
4219 line = ml_get_curline();
|
|
4220 sug.su_badptr = line + curwin->w_cursor.col;
|
|
4221 sug.su_badlen = spell_check(curwin, sug.su_badptr, &attr);
|
|
4222 if (sug.su_badlen >= MAXWLEN)
|
|
4223 sug.su_badlen = MAXWLEN - 1; /* just in case */
|
|
4224 vim_strncpy(sug.su_badword, sug.su_badptr, sug.su_badlen);
|
|
4225 (void)spell_casefold(sug.su_badptr, sug.su_badlen,
|
|
4226 sug.su_fbadword, MAXWLEN);
|
|
4227
|
|
4228 /* Ban the bad word itself. It may appear in another region. */
|
|
4229 add_banned(&sug, sug.su_badword);
|
|
4230
|
|
4231 /*
|
|
4232 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries
|
|
4233 * from the .aff file and inserting a space (split the word).
|
324
|
4234 *
|
|
4235 * Set a maximum score to limit the combination of operations that is
|
|
4236 * tried.
|
323
|
4237 */
|
|
4238 sug.su_maxscore = SCORE_MAXINIT;
|
|
4239 spell_try_change(&sug);
|
|
4240
|
|
4241 /*
|
|
4242 * 2. Try finding sound-a-like words.
|
324
|
4243 *
|
|
4244 * Only do this when we don't have a lot of suggestions yet, because it's
|
|
4245 * very slow and often doesn't find new suggestions.
|
323
|
4246 */
|
324
|
4247 if (sug.su_ga.ga_len < SUG_CLEAN_COUNT)
|
|
4248 {
|
|
4249 /* Allow a higher score now. */
|
323
|
4250 sug.su_maxscore = SCORE_MAXMAX;
|
324
|
4251 spell_try_soundalike(&sug);
|
|
4252 }
|
323
|
4253
|
|
4254 /* When CTRL-C was hit while searching do show the results. */
|
324
|
4255 ui_breakcheck();
|
323
|
4256 if (got_int)
|
|
4257 {
|
|
4258 (void)vgetc();
|
|
4259 got_int = FALSE;
|
|
4260 }
|
|
4261
|
|
4262 if (sug.su_ga.ga_len == 0)
|
|
4263 MSG(_("Sorry, no suggestions"));
|
|
4264 else
|
|
4265 {
|
324
|
4266 #ifdef RESCORE
|
|
4267 /* Do slow but more accurate computation of the word score. */
|
|
4268 rescore_suggestions(&sug);
|
|
4269 #endif
|
|
4270
|
|
4271 /* Sort the suggestions and truncate at SUG_PROMPT_COUNT. */
|
|
4272 cleanup_suggestions(&sug, SUG_PROMPT_COUNT);
|
323
|
4273
|
|
4274 /* List the suggestions. */
|
|
4275 msg_start();
|
|
4276 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
|
|
4277 sug.su_badlen, sug.su_badptr);
|
|
4278 msg_puts(IObuff);
|
|
4279 msg_clr_eos();
|
|
4280 msg_putchar('\n');
|
|
4281 msg_scroll = TRUE;
|
|
4282 for (i = 0; i < sug.su_ga.ga_len; ++i)
|
|
4283 {
|
|
4284 stp = &SUG(&sug, i);
|
|
4285
|
|
4286 /* The suggested word may replace only part of the bad word, add
|
|
4287 * the not replaced part. */
|
|
4288 STRCPY(wcopy, stp->st_word);
|
|
4289 if (sug.su_badlen > stp->st_orglen)
|
|
4290 vim_strncpy(wcopy + STRLEN(wcopy),
|
|
4291 sug.su_badptr + stp->st_orglen,
|
|
4292 sug.su_badlen - stp->st_orglen);
|
324
|
4293 if (p_verbose > 0)
|
|
4294 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"),
|
323
|
4295 i + 1, wcopy, stp->st_score);
|
324
|
4296 else
|
|
4297 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""),
|
|
4298 i + 1, wcopy);
|
323
|
4299 msg_puts(IObuff);
|
|
4300 lines_left = 3; /* avoid more prompt */
|
|
4301 msg_putchar('\n');
|
|
4302 }
|
|
4303
|
|
4304 /* Ask for choice. */
|
|
4305 i = prompt_for_number();
|
|
4306 if (i > 0 && i <= sug.su_ga.ga_len && u_save_cursor())
|
|
4307 {
|
|
4308 /* Replace the word. */
|
|
4309 stp = &SUG(&sug, i - 1);
|
|
4310 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1);
|
|
4311 if (p != NULL)
|
|
4312 {
|
|
4313 c = sug.su_badptr - line;
|
|
4314 mch_memmove(p, line, c);
|
|
4315 STRCPY(p + c, stp->st_word);
|
|
4316 STRCAT(p, sug.su_badptr + stp->st_orglen);
|
|
4317 ml_replace(curwin->w_cursor.lnum, p, FALSE);
|
|
4318 curwin->w_cursor.col = c;
|
|
4319 changed_bytes(curwin->w_cursor.lnum, c);
|
|
4320 }
|
|
4321 }
|
|
4322 else
|
|
4323 curwin->w_cursor = prev_cursor;
|
|
4324 }
|
|
4325
|
|
4326 /* Free the suggestions. */
|
|
4327 for (i = 0; i < sug.su_ga.ga_len; ++i)
|
|
4328 vim_free(SUG(&sug, i).st_word);
|
|
4329 ga_clear(&sug.su_ga);
|
|
4330
|
|
4331 /* Free the banned words. */
|
|
4332 free_banned(&sug);
|
|
4333 }
|
|
4334
|
|
4335 /*
|
324
|
4336 * Make a copy of "word", with the first letter upper or lower cased, to
|
|
4337 * "wcopy[MAXWLEN]". "word" must not be empty.
|
|
4338 * The result is NUL terminated.
|
323
|
4339 */
|
|
4340 static void
|
324
|
4341 onecap_copy(word, wcopy, upper)
|
323
|
4342 char_u *word;
|
|
4343 char_u *wcopy;
|
|
4344 int upper; /* TRUE: first letter made upper case */
|
|
4345 {
|
|
4346 char_u *p;
|
|
4347 int c;
|
|
4348 int l;
|
|
4349
|
|
4350 p = word;
|
|
4351 #ifdef FEAT_MBYTE
|
|
4352 if (has_mbyte)
|
|
4353 c = mb_ptr2char_adv(&p);
|
|
4354 else
|
|
4355 #endif
|
|
4356 c = *p++;
|
|
4357 if (upper)
|
324
|
4358 c = SPELL_TOUPPER(c);
|
323
|
4359 else
|
324
|
4360 c = SPELL_TOFOLD(c);
|
323
|
4361 #ifdef FEAT_MBYTE
|
|
4362 if (has_mbyte)
|
|
4363 l = mb_char2bytes(c, wcopy);
|
|
4364 else
|
|
4365 #endif
|
|
4366 {
|
|
4367 l = 1;
|
|
4368 wcopy[0] = c;
|
|
4369 }
|
324
|
4370 vim_strncpy(wcopy + l, p, MAXWLEN - l);
|
323
|
4371 }
|
|
4372
|
|
4373 /*
|
324
|
4374 * Make a copy of "word" with all the letters upper cased into
|
|
4375 * "wcopy[MAXWLEN]". The result is NUL terminated.
|
323
|
4376 */
|
|
4377 static void
|
|
4378 allcap_copy(word, wcopy)
|
|
4379 char_u *word;
|
|
4380 char_u *wcopy;
|
|
4381 {
|
|
4382 char_u *s;
|
|
4383 char_u *d;
|
|
4384 int c;
|
|
4385
|
|
4386 d = wcopy;
|
|
4387 for (s = word; *s != NUL; )
|
|
4388 {
|
|
4389 #ifdef FEAT_MBYTE
|
|
4390 if (has_mbyte)
|
|
4391 c = mb_ptr2char_adv(&s);
|
|
4392 else
|
|
4393 #endif
|
|
4394 c = *s++;
|
324
|
4395 c = SPELL_TOUPPER(c);
|
323
|
4396
|
|
4397 #ifdef FEAT_MBYTE
|
|
4398 if (has_mbyte)
|
|
4399 {
|
|
4400 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
|
|
4401 break;
|
|
4402 d += mb_char2bytes(c, d);
|
|
4403 }
|
|
4404 else
|
|
4405 #endif
|
|
4406 {
|
|
4407 if (d - wcopy >= MAXWLEN - 1)
|
|
4408 break;
|
|
4409 *d++ = c;
|
|
4410 }
|
|
4411 }
|
|
4412 *d = NUL;
|
|
4413 }
|
|
4414
|
|
4415 /*
|
|
4416 * Try finding suggestions by adding/removing/swapping letters.
|
|
4417 */
|
|
4418 static void
|
|
4419 spell_try_change(su)
|
|
4420 suginfo_T *su;
|
|
4421 {
|
|
4422 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
|
|
4423 char_u tword[MAXWLEN]; /* good word collected so far */
|
|
4424 trystate_T stack[MAXWLEN];
|
|
4425 char_u preword[MAXWLEN * 3]; /* word found with proper case (appended
|
|
4426 * to for word split) */
|
|
4427 char_u prewordlen = 0; /* length of word in "preword" */
|
|
4428 int splitoff = 0; /* index in tword after last split */
|
|
4429 trystate_T *sp;
|
|
4430 int newscore;
|
|
4431 langp_T *lp;
|
|
4432 char_u *byts;
|
324
|
4433 idx_T *idxs;
|
323
|
4434 int depth;
|
|
4435 int c;
|
|
4436 int n;
|
|
4437 int flags;
|
|
4438 int badflags;
|
|
4439 garray_T *gap;
|
324
|
4440 idx_T arridx;
|
323
|
4441 int len;
|
|
4442 char_u *p;
|
|
4443 fromto_T *ftp;
|
|
4444 int fl, tl;
|
|
4445
|
|
4446 /* get caps flags for bad word */
|
|
4447 badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
|
|
4448
|
|
4449 /* We make a copy of the case-folded bad word, so that we can modify it
|
|
4450 * to find matches (esp. REP items). */
|
|
4451 STRCPY(fword, su->su_fbadword);
|
|
4452
|
|
4453 /*
|
|
4454 * At each node in the tree these states are tried:
|
|
4455 */
|
|
4456 #define STATE_START 0 /* At start of node, check if word may end or
|
|
4457 * split word. */
|
|
4458 #define STATE_SPLITUNDO 1 /* Undo word split. */
|
|
4459 #define STATE_ENDNUL 2 /* Past NUL bytes at start of the node. */
|
|
4460 #define STATE_PLAIN 3 /* Use each byte of the node. */
|
|
4461 #define STATE_DEL 4 /* Delete a byte from the bad word. */
|
|
4462 #define STATE_INS 5 /* Insert a byte in the bad word. */
|
|
4463 #define STATE_SWAP 6 /* Swap two bytes. */
|
|
4464 #define STATE_SWAP3A 7 /* Swap two bytes over three. */
|
|
4465 #define STATE_ROT3L 8 /* Rotate three bytes left */
|
|
4466 #define STATE_ROT3R 9 /* Rotate three bytes right */
|
|
4467 #define STATE_ROT_UNDO 10 /* undo rotating */
|
|
4468 #define STATE_REP_INI 11 /* Prepare for using REP items. */
|
|
4469 #define STATE_REP 12 /* Use matching REP items from the .aff file. */
|
|
4470 #define STATE_REP_UNDO 13 /* Undo a REP item replacement. */
|
|
4471 #define STATE_FINAL 99 /* End of this node. */
|
|
4472
|
|
4473
|
|
4474 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
4475 lp->lp_slang != NULL; ++lp)
|
|
4476 {
|
|
4477 #ifdef SOUNDFOLD_SCORE
|
|
4478 su->su_slang = lp->lp_slang;
|
|
4479 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
4480 /* soundfold the bad word */
|
|
4481 spell_soundfold(lp->lp_slang, su->su_fbadword, su->su_salword);
|
|
4482 #endif
|
|
4483
|
|
4484 /*
|
|
4485 * Go through the whole case-fold tree, try changes at each node.
|
|
4486 * "tword[]" contains the word collected from nodes in the tree.
|
|
4487 * "fword[]" the word we are trying to match with (initially the bad
|
|
4488 * word).
|
|
4489 */
|
|
4490 byts = lp->lp_slang->sl_fbyts;
|
|
4491 idxs = lp->lp_slang->sl_fidxs;
|
|
4492
|
|
4493 depth = 0;
|
|
4494 stack[0].ts_state = STATE_START;
|
|
4495 stack[0].ts_score = 0;
|
|
4496 stack[0].ts_curi = 1;
|
|
4497 stack[0].ts_fidx = 0;
|
|
4498 stack[0].ts_fidxtry = 0;
|
|
4499 stack[0].ts_twordlen = 0;
|
|
4500 stack[0].ts_arridx = 0;
|
|
4501
|
|
4502 while (depth >= 0 && !got_int)
|
|
4503 {
|
|
4504 sp = &stack[depth];
|
|
4505 switch (sp->ts_state)
|
|
4506 {
|
|
4507 case STATE_START:
|
|
4508 /*
|
|
4509 * Start of node: Deal with NUL bytes, which means
|
|
4510 * tword[] may end here.
|
|
4511 */
|
|
4512 arridx = sp->ts_arridx; /* current node in the tree */
|
|
4513 len = byts[arridx]; /* bytes in this node */
|
|
4514 arridx += sp->ts_curi; /* index of current byte */
|
|
4515
|
|
4516 if (sp->ts_curi > len || (c = byts[arridx]) != 0)
|
|
4517 {
|
|
4518 /* Past bytes in node and/or past NUL bytes. */
|
|
4519 sp->ts_state = STATE_ENDNUL;
|
|
4520 break;
|
|
4521 }
|
|
4522
|
|
4523 /*
|
|
4524 * End of word in tree.
|
|
4525 */
|
|
4526 ++sp->ts_curi; /* eat one NUL byte */
|
|
4527
|
324
|
4528 flags = (int)idxs[arridx];
|
323
|
4529
|
|
4530 /*
|
|
4531 * Form the word with proper case in preword.
|
|
4532 * If there is a word from a previous split, append.
|
|
4533 */
|
|
4534 tword[sp->ts_twordlen] = NUL;
|
|
4535 if (flags & WF_KEEPCAP)
|
|
4536 /* Must find the word in the keep-case tree. */
|
|
4537 find_keepcap_word(lp->lp_slang, tword + splitoff,
|
|
4538 preword + prewordlen);
|
|
4539 else
|
|
4540 /* Include badflags: if the badword is onecap or allcap
|
|
4541 * use that for the goodword too. */
|
|
4542 make_case_word(tword + splitoff,
|
|
4543 preword + prewordlen, flags | badflags);
|
|
4544
|
|
4545 /* Don't use a banned word. It may appear again as a good
|
|
4546 * word, thus remember it. */
|
|
4547 if (flags & WF_BANNED)
|
|
4548 {
|
|
4549 add_banned(su, preword + prewordlen);
|
|
4550 break;
|
|
4551 }
|
|
4552 if (was_banned(su, preword + prewordlen))
|
|
4553 break;
|
|
4554
|
|
4555 newscore = 0;
|
|
4556 if ((flags & WF_REGION)
|
|
4557 && (((unsigned)flags >> 8) & lp->lp_region) == 0)
|
|
4558 newscore += SCORE_REGION;
|
|
4559 if (flags & WF_RARE)
|
|
4560 newscore += SCORE_RARE;
|
|
4561
|
324
|
4562 /* Words that were not found in the text get a penalty. */
|
|
4563 if ((flags & WF_USED) == 0)
|
|
4564 newscore += SCORE_NOTUSED;
|
|
4565
|
323
|
4566 if (!spell_valid_case(badflags,
|
|
4567 captype(preword + prewordlen, NULL)))
|
|
4568 newscore += SCORE_ICASE;
|
|
4569
|
|
4570 if (fword[sp->ts_fidx] == 0)
|
|
4571 {
|
|
4572 /* The badword also ends: add suggestions, */
|
324
|
4573 add_suggestion(su, preword, sp->ts_score + newscore
|
|
4574 #ifdef RESCORE
|
|
4575 , FALSE
|
|
4576 #endif
|
|
4577 );
|
323
|
4578 }
|
|
4579 else if (sp->ts_fidx >= sp->ts_fidxtry)
|
|
4580 {
|
|
4581 /* The word in the tree ends but the badword
|
|
4582 * continues: try inserting a space and check that a valid
|
|
4583 * words starts at fword[sp->ts_fidx]. */
|
|
4584 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT))
|
|
4585 {
|
|
4586 /* Save things to be restored at STATE_SPLITUNDO. */
|
|
4587 sp->ts_save_prewordlen = prewordlen;
|
|
4588 sp->ts_save_badflags = badflags;
|
|
4589 sp->ts_save_splitoff = splitoff;
|
|
4590
|
|
4591 /* Append a space to preword. */
|
|
4592 STRCAT(preword, " ");
|
|
4593 prewordlen = STRLEN(preword);
|
|
4594 splitoff = sp->ts_twordlen;
|
324
|
4595 #ifdef FEAT_MBYTE
|
|
4596 if (has_mbyte)
|
|
4597 {
|
|
4598 int i = 0;
|
|
4599
|
|
4600 /* Case-folding may change the number of bytes:
|
|
4601 * Count nr of chars in fword[sp->ts_fidx] and
|
|
4602 * advance that many chars in su->su_badptr. */
|
|
4603 for (p = fword; p < fword + sp->ts_fidx;
|
|
4604 mb_ptr_adv(p))
|
|
4605 ++i;
|
|
4606 for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
|
|
4607 --i;
|
|
4608 }
|
|
4609 else
|
|
4610 #endif
|
|
4611 p = su->su_badptr + sp->ts_fidx;
|
|
4612 badflags = captype(p, su->su_badptr + su->su_badlen);
|
323
|
4613
|
|
4614 sp->ts_state = STATE_SPLITUNDO;
|
|
4615 ++depth;
|
|
4616 /* Restart at top of the tree. */
|
|
4617 stack[depth].ts_arridx = 0;
|
|
4618 }
|
|
4619 }
|
|
4620 break;
|
|
4621
|
|
4622 case STATE_SPLITUNDO:
|
|
4623 /* Fixup the changes done for word split. */
|
|
4624 badflags = sp->ts_save_badflags;
|
|
4625 splitoff = sp->ts_save_splitoff;
|
|
4626 prewordlen = sp->ts_save_prewordlen;
|
|
4627
|
|
4628 /* Continue looking for NUL bytes. */
|
|
4629 sp->ts_state = STATE_START;
|
|
4630 break;
|
|
4631
|
|
4632 case STATE_ENDNUL:
|
|
4633 /* Past the NUL bytes in the node. */
|
|
4634 if (fword[sp->ts_fidx] == 0)
|
|
4635 {
|
|
4636 /* The badword ends, can't use the bytes in this node. */
|
|
4637 sp->ts_state = STATE_DEL;
|
|
4638 break;
|
|
4639 }
|
|
4640 sp->ts_state = STATE_PLAIN;
|
|
4641 /*FALLTHROUGH*/
|
|
4642
|
|
4643 case STATE_PLAIN:
|
|
4644 /*
|
|
4645 * Go over all possible bytes at this node, add each to
|
|
4646 * tword[] and use child node. "ts_curi" is the index.
|
|
4647 */
|
|
4648 arridx = sp->ts_arridx;
|
|
4649 if (sp->ts_curi > byts[arridx])
|
|
4650 {
|
|
4651 /* Done all bytes at this node, do next state. When still
|
|
4652 * at already changed bytes skip the other tricks. */
|
|
4653 if (sp->ts_fidx >= sp->ts_fidxtry)
|
|
4654 sp->ts_state = STATE_DEL;
|
|
4655 else
|
|
4656 sp->ts_state = STATE_FINAL;
|
|
4657 }
|
|
4658 else
|
|
4659 {
|
|
4660 arridx += sp->ts_curi++;
|
|
4661 c = byts[arridx];
|
|
4662
|
|
4663 /* Normal byte, go one level deeper. If it's not equal to
|
|
4664 * the byte in the bad word adjust the score. But don't
|
|
4665 * even try when the byte was already changed. */
|
|
4666 if (c == fword[sp->ts_fidx])
|
|
4667 newscore = 0;
|
324
|
4668
|
|
4669 /* TODO: this is too slow and comparing bytes isn't right
|
|
4670 * for multi-byte characters. */
|
|
4671 #if 0
|
323
|
4672 else if (lp->lp_slang->sl_map != NULL
|
324
|
4673 && similar_chars(lp->lp_slang,
|
323
|
4674 c, fword[sp->ts_fidx]))
|
|
4675 newscore = SCORE_SIMILAR;
|
324
|
4676 #endif
|
323
|
4677 else
|
|
4678 newscore = SCORE_SUBST;
|
|
4679 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
|
|
4680 && try_deeper(su, stack, depth, newscore))
|
|
4681 {
|
|
4682 ++depth;
|
|
4683 ++stack[depth].ts_fidx;
|
|
4684 tword[stack[depth].ts_twordlen++] = c;
|
|
4685 stack[depth].ts_arridx = idxs[arridx];
|
|
4686 }
|
|
4687 }
|
|
4688 break;
|
|
4689
|
|
4690 case STATE_DEL:
|
|
4691 /* Try skipping one byte in the bad word (delete it). */
|
|
4692 sp->ts_state = STATE_INS;
|
|
4693 sp->ts_curi = 1;
|
|
4694 if (fword[sp->ts_fidx] != NUL
|
|
4695 && try_deeper(su, stack, depth, SCORE_DEL))
|
|
4696 {
|
|
4697 ++depth;
|
|
4698 ++stack[depth].ts_fidx;
|
|
4699 break;
|
|
4700 }
|
|
4701 /*FALLTHROUGH*/
|
|
4702
|
|
4703 case STATE_INS:
|
|
4704 /* Insert one byte. Do this for each possible bytes at this
|
|
4705 * node. */
|
|
4706 n = sp->ts_arridx;
|
|
4707 if (sp->ts_curi > byts[n])
|
|
4708 {
|
|
4709 /* Done all bytes at this node, do next state. */
|
|
4710 sp->ts_state = STATE_SWAP;
|
|
4711 sp->ts_curi = 1;
|
|
4712 }
|
|
4713 else
|
|
4714 {
|
|
4715 /* Do one more byte at this node. */
|
|
4716 n += sp->ts_curi++;
|
|
4717 c = byts[n];
|
|
4718 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
|
|
4719 {
|
|
4720 ++depth;
|
|
4721 tword[stack[depth].ts_twordlen++] = c;
|
|
4722 stack[depth].ts_arridx = idxs[n];
|
|
4723 }
|
|
4724 }
|
|
4725 break;
|
|
4726
|
|
4727 case STATE_SWAP:
|
|
4728 /* Swap two bytes: "12" -> "21". This means looking for the
|
|
4729 * following byte at the current node and the current byte at
|
|
4730 * its child node. We change "fword" here, it's changed back
|
|
4731 * afterwards. TODO: should swap characters instead of bytes.
|
|
4732 * */
|
|
4733 c = fword[sp->ts_fidx];
|
|
4734 if (c != NUL && fword[sp->ts_fidx + 1] != NUL
|
|
4735 && try_deeper(su, stack, depth, SCORE_SWAP))
|
|
4736 {
|
|
4737 sp->ts_state = STATE_SWAP3A;
|
|
4738 ++depth;
|
|
4739 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
|
|
4740 fword[sp->ts_fidx + 1] = c;
|
|
4741 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
|
|
4742 }
|
|
4743 else
|
|
4744 /* If this swap doesn't work then SWAP3 won't either. */
|
|
4745 sp->ts_state = STATE_REP_INI;
|
|
4746 break;
|
|
4747
|
|
4748 case STATE_SWAP3A:
|
|
4749 /* First undo the STATE_SWAP swap: "21" -> "12". */
|
|
4750 c = fword[sp->ts_fidx];
|
|
4751 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
|
|
4752 fword[sp->ts_fidx + 1] = c;
|
|
4753
|
|
4754 /* Swap two bytes, skipping one: "123" -> "321". We change
|
|
4755 * "fword" here, it's changed back afterwards. TODO: should
|
|
4756 * swap characters instead of bytes. */
|
|
4757 c = fword[sp->ts_fidx];
|
|
4758 if (c != NUL && fword[sp->ts_fidx + 1] != NUL
|
|
4759 && fword[sp->ts_fidx + 2] != NUL
|
|
4760 && try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
4761 {
|
|
4762 sp->ts_state = STATE_ROT3L;
|
|
4763 ++depth;
|
|
4764 fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
|
|
4765 fword[sp->ts_fidx + 2] = c;
|
|
4766 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
4767 }
|
|
4768 else
|
|
4769 sp->ts_state = STATE_REP_INI;
|
|
4770 break;
|
|
4771
|
|
4772 case STATE_ROT3L:
|
|
4773 /* First undo STATE_SWAP3A: "321" -> "123" */
|
|
4774 c = fword[sp->ts_fidx];
|
|
4775 fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
|
|
4776 fword[sp->ts_fidx + 2] = c;
|
|
4777
|
|
4778 /* Rotate three bytes left: "123" -> "231". We change
|
|
4779 * "fword" here, it's changed back afterwards. TODO: should
|
|
4780 * swap characters instead of bytes. */
|
|
4781 if (try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
4782 {
|
|
4783 sp->ts_state = STATE_ROT3R;
|
|
4784 ++depth;
|
|
4785 c = fword[sp->ts_fidx];
|
|
4786 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
|
|
4787 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
|
|
4788 fword[sp->ts_fidx + 2] = c;
|
|
4789 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
4790 }
|
|
4791 else
|
|
4792 sp->ts_state = STATE_REP_INI;
|
|
4793 break;
|
|
4794
|
|
4795 case STATE_ROT3R:
|
|
4796 /* First undo STATE_ROT3L: "231" -> "123" */
|
|
4797 c = fword[sp->ts_fidx + 2];
|
|
4798 fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
|
|
4799 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
|
|
4800 fword[sp->ts_fidx] = c;
|
|
4801
|
|
4802 /* Rotate three bytes right: "123" -> "312". We change
|
|
4803 * "fword" here, it's changed back afterwards. TODO: should
|
|
4804 * swap characters instead of bytes. */
|
|
4805 if (try_deeper(su, stack, depth, SCORE_SWAP3))
|
|
4806 {
|
|
4807 sp->ts_state = STATE_ROT_UNDO;
|
|
4808 ++depth;
|
|
4809 c = fword[sp->ts_fidx + 2];
|
|
4810 fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
|
|
4811 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
|
|
4812 fword[sp->ts_fidx] = c;
|
|
4813 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
|
|
4814 }
|
|
4815 else
|
|
4816 sp->ts_state = STATE_REP_INI;
|
|
4817 break;
|
|
4818
|
|
4819 case STATE_ROT_UNDO:
|
|
4820 /* Undo STATE_ROT3R: "312" -> "123" */
|
|
4821 c = fword[sp->ts_fidx];
|
|
4822 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
|
|
4823 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
|
|
4824 fword[sp->ts_fidx + 2] = c;
|
|
4825 /*FALLTHROUGH*/
|
|
4826
|
|
4827 case STATE_REP_INI:
|
|
4828 /* Check if matching with REP items from the .aff file would
|
|
4829 * work. Quickly skip if there are no REP items or the score
|
|
4830 * is going to be too high anyway. */
|
|
4831 gap = &lp->lp_slang->sl_rep;
|
|
4832 if (gap->ga_len == 0
|
|
4833 || sp->ts_score + SCORE_REP >= su->su_maxscore)
|
|
4834 {
|
|
4835 sp->ts_state = STATE_FINAL;
|
|
4836 break;
|
|
4837 }
|
|
4838
|
|
4839 /* Use the first byte to quickly find the first entry that
|
|
4840 * matches. If the index is -1 there is none. */
|
|
4841 sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
|
|
4842 if (sp->ts_curi < 0)
|
|
4843 {
|
|
4844 sp->ts_state = STATE_FINAL;
|
|
4845 break;
|
|
4846 }
|
|
4847
|
|
4848 sp->ts_state = STATE_REP;
|
|
4849 /*FALLTHROUGH*/
|
|
4850
|
|
4851 case STATE_REP:
|
|
4852 /* Try matching with REP items from the .aff file. For each
|
|
4853 * match replace the charactes and check if the resulting word
|
|
4854 * is valid. */
|
|
4855 p = fword + sp->ts_fidx;
|
|
4856
|
|
4857 gap = &lp->lp_slang->sl_rep;
|
|
4858 while (sp->ts_curi < gap->ga_len)
|
|
4859 {
|
|
4860 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
|
|
4861 if (*ftp->ft_from != *p)
|
|
4862 {
|
|
4863 /* past possible matching entries */
|
|
4864 sp->ts_curi = gap->ga_len;
|
|
4865 break;
|
|
4866 }
|
|
4867 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
|
|
4868 && try_deeper(su, stack, depth, SCORE_REP))
|
|
4869 {
|
|
4870 /* Need to undo this afterwards. */
|
|
4871 sp->ts_state = STATE_REP_UNDO;
|
|
4872
|
|
4873 /* Change the "from" to the "to" string. */
|
|
4874 ++depth;
|
|
4875 fl = STRLEN(ftp->ft_from);
|
|
4876 tl = STRLEN(ftp->ft_to);
|
|
4877 if (fl != tl)
|
|
4878 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
|
|
4879 mch_memmove(p, ftp->ft_to, tl);
|
|
4880 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
|
|
4881 break;
|
|
4882 }
|
|
4883 }
|
|
4884
|
|
4885 if (sp->ts_curi >= gap->ga_len)
|
|
4886 /* No (more) matches. */
|
|
4887 sp->ts_state = STATE_FINAL;
|
|
4888
|
|
4889 break;
|
|
4890
|
|
4891 case STATE_REP_UNDO:
|
|
4892 /* Undo a REP replacement and continue with the next one. */
|
|
4893 ftp = (fromto_T *)lp->lp_slang->sl_rep.ga_data
|
|
4894 + sp->ts_curi - 1;
|
|
4895 fl = STRLEN(ftp->ft_from);
|
|
4896 tl = STRLEN(ftp->ft_to);
|
|
4897 p = fword + sp->ts_fidx;
|
|
4898 if (fl != tl)
|
|
4899 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
|
|
4900 mch_memmove(p, ftp->ft_from, fl);
|
|
4901 sp->ts_state = STATE_REP;
|
|
4902 break;
|
|
4903
|
|
4904 default:
|
|
4905 /* Did all possible states at this level, go up one level. */
|
|
4906 --depth;
|
|
4907 }
|
|
4908
|
|
4909 line_breakcheck();
|
|
4910 }
|
|
4911 }
|
|
4912 }
|
|
4913
|
|
4914 /*
|
|
4915 * Try going one level deeper in the tree.
|
|
4916 */
|
|
4917 static int
|
|
4918 try_deeper(su, stack, depth, score_add)
|
|
4919 suginfo_T *su;
|
|
4920 trystate_T *stack;
|
|
4921 int depth;
|
|
4922 int score_add;
|
|
4923 {
|
|
4924 int newscore;
|
|
4925
|
|
4926 /* Refuse to go deeper if the scrore is getting too big. */
|
|
4927 newscore = stack[depth].ts_score + score_add;
|
|
4928 if (newscore >= su->su_maxscore)
|
|
4929 return FALSE;
|
|
4930
|
|
4931 stack[depth + 1].ts_state = STATE_START;
|
|
4932 stack[depth + 1].ts_score = newscore;
|
|
4933 stack[depth + 1].ts_curi = 1; /* start just after length byte */
|
|
4934 stack[depth + 1].ts_fidx = stack[depth].ts_fidx;
|
|
4935 stack[depth + 1].ts_fidxtry = stack[depth].ts_fidxtry;
|
|
4936 stack[depth + 1].ts_twordlen = stack[depth].ts_twordlen;
|
|
4937 stack[depth + 1].ts_arridx = stack[depth].ts_arridx;
|
|
4938 return TRUE;
|
|
4939 }
|
|
4940
|
|
4941 /*
|
|
4942 * "fword" is a good word with case folded. Find the matching keep-case
|
|
4943 * words and put it in "kword".
|
|
4944 * Theoretically there could be several keep-case words that result in the
|
|
4945 * same case-folded word, but we only find one...
|
|
4946 */
|
|
4947 static void
|
|
4948 find_keepcap_word(slang, fword, kword)
|
|
4949 slang_T *slang;
|
|
4950 char_u *fword;
|
|
4951 char_u *kword;
|
|
4952 {
|
|
4953 char_u uword[MAXWLEN]; /* "fword" in upper-case */
|
|
4954 int depth;
|
324
|
4955 idx_T tryidx;
|
323
|
4956
|
|
4957 /* The following arrays are used at each depth in the tree. */
|
324
|
4958 idx_T arridx[MAXWLEN];
|
323
|
4959 int round[MAXWLEN];
|
|
4960 int fwordidx[MAXWLEN];
|
|
4961 int uwordidx[MAXWLEN];
|
|
4962 int kwordlen[MAXWLEN];
|
|
4963
|
|
4964 int flen, ulen;
|
|
4965 int l;
|
|
4966 int len;
|
|
4967 int c;
|
324
|
4968 idx_T lo, hi, m;
|
323
|
4969 char_u *p;
|
|
4970 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
|
324
|
4971 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
|
323
|
4972
|
|
4973 if (byts == NULL)
|
|
4974 {
|
|
4975 /* array is empty: "cannot happen" */
|
|
4976 *kword = NUL;
|
|
4977 return;
|
|
4978 }
|
|
4979
|
|
4980 /* Make an all-cap version of "fword". */
|
|
4981 allcap_copy(fword, uword);
|
|
4982
|
|
4983 /*
|
|
4984 * Each character needs to be tried both case-folded and upper-case.
|
|
4985 * All this gets very complicated if we keep in mind that changing case
|
|
4986 * may change the byte length of a multi-byte character...
|
|
4987 */
|
|
4988 depth = 0;
|
|
4989 arridx[0] = 0;
|
|
4990 round[0] = 0;
|
|
4991 fwordidx[0] = 0;
|
|
4992 uwordidx[0] = 0;
|
|
4993 kwordlen[0] = 0;
|
|
4994 while (depth >= 0)
|
|
4995 {
|
|
4996 if (fword[fwordidx[depth]] == NUL)
|
|
4997 {
|
|
4998 /* We are at the end of "fword". If the tree allows a word to end
|
|
4999 * here we have found a match. */
|
|
5000 if (byts[arridx[depth] + 1] == 0)
|
|
5001 {
|
|
5002 kword[kwordlen[depth]] = NUL;
|
|
5003 return;
|
|
5004 }
|
|
5005
|
|
5006 /* kword is getting too long, continue one level up */
|
|
5007 --depth;
|
|
5008 }
|
|
5009 else if (++round[depth] > 2)
|
|
5010 {
|
|
5011 /* tried both fold-case and upper-case character, continue one
|
|
5012 * level up */
|
|
5013 --depth;
|
|
5014 }
|
|
5015 else
|
|
5016 {
|
|
5017 /*
|
|
5018 * round[depth] == 1: Try using the folded-case character.
|
|
5019 * round[depth] == 2: Try using the upper-case character.
|
|
5020 */
|
|
5021 #ifdef FEAT_MBYTE
|
|
5022 if (has_mbyte)
|
|
5023 {
|
|
5024 flen = mb_ptr2len_check(fword + fwordidx[depth]);
|
|
5025 ulen = mb_ptr2len_check(uword + uwordidx[depth]);
|
|
5026 }
|
|
5027 else
|
|
5028 #endif
|
|
5029 ulen = flen = 1;
|
|
5030 if (round[depth] == 1)
|
|
5031 {
|
|
5032 p = fword + fwordidx[depth];
|
|
5033 l = flen;
|
|
5034 }
|
|
5035 else
|
|
5036 {
|
|
5037 p = uword + uwordidx[depth];
|
|
5038 l = ulen;
|
|
5039 }
|
|
5040
|
|
5041 for (tryidx = arridx[depth]; l > 0; --l)
|
|
5042 {
|
|
5043 /* Perform a binary search in the list of accepted bytes. */
|
|
5044 len = byts[tryidx++];
|
|
5045 c = *p++;
|
|
5046 lo = tryidx;
|
|
5047 hi = tryidx + len - 1;
|
|
5048 while (lo < hi)
|
|
5049 {
|
|
5050 m = (lo + hi) / 2;
|
|
5051 if (byts[m] > c)
|
|
5052 hi = m - 1;
|
|
5053 else if (byts[m] < c)
|
|
5054 lo = m + 1;
|
|
5055 else
|
|
5056 {
|
|
5057 lo = hi = m;
|
|
5058 break;
|
|
5059 }
|
|
5060 }
|
|
5061
|
|
5062 /* Stop if there is no matching byte. */
|
|
5063 if (hi < lo || byts[lo] != c)
|
|
5064 break;
|
|
5065
|
|
5066 /* Continue at the child (if there is one). */
|
|
5067 tryidx = idxs[lo];
|
|
5068 }
|
|
5069
|
|
5070 if (l == 0)
|
|
5071 {
|
|
5072 /*
|
|
5073 * Found the matching char. Copy it to "kword" and go a
|
|
5074 * level deeper.
|
|
5075 */
|
|
5076 if (round[depth] == 1)
|
|
5077 {
|
|
5078 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
|
|
5079 flen);
|
|
5080 kwordlen[depth + 1] = kwordlen[depth] + flen;
|
|
5081 }
|
|
5082 else
|
|
5083 {
|
|
5084 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
|
|
5085 ulen);
|
|
5086 kwordlen[depth + 1] = kwordlen[depth] + ulen;
|
|
5087 }
|
|
5088 fwordidx[depth + 1] = fwordidx[depth] + flen;
|
|
5089 uwordidx[depth + 1] = uwordidx[depth] + ulen;
|
|
5090
|
|
5091 ++depth;
|
|
5092 arridx[depth] = tryidx;
|
|
5093 round[depth] = 0;
|
|
5094 }
|
|
5095 }
|
|
5096 }
|
|
5097
|
|
5098 /* Didn't find it: "cannot happen". */
|
|
5099 *kword = NUL;
|
|
5100 }
|
|
5101
|
|
5102 /*
|
|
5103 * Find suggestions by comparing the word in a sound-a-like form.
|
|
5104 */
|
|
5105 static void
|
|
5106 spell_try_soundalike(su)
|
|
5107 suginfo_T *su;
|
|
5108 {
|
|
5109 char_u salword[MAXWLEN];
|
|
5110 char_u tword[MAXWLEN];
|
|
5111 char_u tfword[MAXWLEN];
|
|
5112 char_u tsalword[MAXWLEN];
|
324
|
5113 idx_T arridx[MAXWLEN];
|
323
|
5114 int curi[MAXWLEN];
|
|
5115 langp_T *lp;
|
|
5116 char_u *byts;
|
324
|
5117 idx_T *idxs;
|
323
|
5118 int depth;
|
|
5119 int c;
|
324
|
5120 idx_T n;
|
323
|
5121 int round;
|
|
5122 int flags;
|
324
|
5123 int score, sound_score;
|
|
5124 char_u *bp, *sp;
|
323
|
5125
|
|
5126 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
5127 lp->lp_slang != NULL; ++lp)
|
|
5128 {
|
|
5129 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
5130 {
|
|
5131 /* soundfold the bad word */
|
|
5132 spell_soundfold(lp->lp_slang, su->su_fbadword, salword);
|
|
5133
|
|
5134 /*
|
|
5135 * Go through the whole tree, soundfold each word and compare.
|
|
5136 * round 1: use the case-folded tree.
|
|
5137 * round 2: use the keep-case tree.
|
|
5138 */
|
|
5139 for (round = 1; round <= 2; ++round)
|
|
5140 {
|
|
5141 if (round == 1)
|
|
5142 {
|
|
5143 byts = lp->lp_slang->sl_fbyts;
|
|
5144 idxs = lp->lp_slang->sl_fidxs;
|
|
5145 }
|
|
5146 else
|
|
5147 {
|
|
5148 byts = lp->lp_slang->sl_kbyts;
|
|
5149 idxs = lp->lp_slang->sl_kidxs;
|
|
5150 }
|
|
5151
|
|
5152 depth = 0;
|
|
5153 arridx[0] = 0;
|
|
5154 curi[0] = 1;
|
|
5155 while (depth >= 0 && !got_int)
|
|
5156 {
|
|
5157 if (curi[depth] > byts[arridx[depth]])
|
|
5158 /* Done all bytes at this node, go up one level. */
|
|
5159 --depth;
|
|
5160 else
|
|
5161 {
|
|
5162 /* Do one more byte at this node. */
|
|
5163 n = arridx[depth] + curi[depth];
|
|
5164 ++curi[depth];
|
|
5165 c = byts[n];
|
|
5166 if (c == 0)
|
|
5167 {
|
|
5168 /* End of word, deal with the word. */
|
324
|
5169 flags = (int)idxs[n];
|
323
|
5170 if (round == 2 || (flags & WF_KEEPCAP) == 0)
|
|
5171 {
|
|
5172 tword[depth] = NUL;
|
|
5173 if (round == 1)
|
|
5174 spell_soundfold(lp->lp_slang,
|
|
5175 tword, tsalword);
|
|
5176 else
|
|
5177 {
|
|
5178 /* In keep-case tree need to case-fold the
|
|
5179 * word. */
|
|
5180 (void)spell_casefold(tword, depth,
|
|
5181 tfword, MAXWLEN);
|
|
5182 spell_soundfold(lp->lp_slang,
|
|
5183 tfword, tsalword);
|
|
5184 }
|
|
5185
|
324
|
5186 /*
|
|
5187 * Accept the word if the sound-folded words
|
|
5188 * are (almost) equal.
|
|
5189 */
|
|
5190 for (bp = salword, sp = tsalword; *bp == *sp;
|
|
5191 ++bp, ++sp)
|
|
5192 if (*bp == NUL)
|
|
5193 break;
|
|
5194
|
|
5195 if (*bp == *sp)
|
|
5196 /* equal */
|
|
5197 sound_score = 0;
|
|
5198 else if (*bp != NUL && bp[1] != NUL
|
|
5199 && *bp == sp[1] && bp[1] == *sp
|
|
5200 && STRCMP(bp + 2, sp + 2) == 0)
|
|
5201 /* swap two bytes */
|
|
5202 sound_score = SCORE_SWAP;
|
|
5203 else if (STRCMP(bp + 1, sp) == 0)
|
|
5204 /* delete byte */
|
|
5205 sound_score = SCORE_DEL;
|
|
5206 else if (STRCMP(bp, sp + 1) == 0)
|
|
5207 /* insert byte */
|
|
5208 sound_score = SCORE_INS;
|
|
5209 else if (STRCMP(bp + 1, sp + 1) == 0)
|
|
5210 /* skip one byte */
|
|
5211 sound_score = SCORE_SUBST;
|
|
5212 else
|
|
5213 /* not equal or similar */
|
|
5214 sound_score = SCORE_MAXMAX;
|
|
5215
|
|
5216 if (sound_score < SCORE_MAXMAX)
|
323
|
5217 {
|
324
|
5218 char_u cword[MAXWLEN];
|
|
5219 char_u *p;
|
|
5220
|
323
|
5221 if (round == 1 && flags != 0)
|
|
5222 {
|
324
|
5223 /* Need to fix case according to
|
|
5224 * "flags". */
|
323
|
5225 make_case_word(tword, cword, flags);
|
324
|
5226 p = cword;
|
323
|
5227 }
|
|
5228 else
|
324
|
5229 p = tword;
|
|
5230
|
|
5231 /* Compute the score. */
|
|
5232 score = spell_edit_score(su->su_badword, p);
|
|
5233 #ifdef RESCORE
|
|
5234 /* give a bonus for the good word sounding
|
|
5235 * the same as the bad word */
|
|
5236 add_suggestion(su, tword,
|
|
5237 RESCORE(score, sound_score),
|
|
5238 TRUE);
|
|
5239 #else
|
|
5240 add_suggestion(su, tword,
|
|
5241 score + sound_score);
|
|
5242 #endif
|
323
|
5243 }
|
|
5244 }
|
|
5245
|
|
5246 /* Skip over other NUL bytes. */
|
|
5247 while (byts[n + 1] == 0)
|
|
5248 {
|
|
5249 ++n;
|
|
5250 ++curi[depth];
|
|
5251 }
|
|
5252 }
|
|
5253 else
|
|
5254 {
|
|
5255 /* Normal char, go one level deeper. */
|
|
5256 tword[depth++] = c;
|
|
5257 arridx[depth] = idxs[n];
|
|
5258 curi[depth] = 1;
|
|
5259 }
|
|
5260 }
|
324
|
5261
|
|
5262 line_breakcheck();
|
323
|
5263 }
|
|
5264 }
|
|
5265 }
|
|
5266 }
|
|
5267 }
|
|
5268
|
|
5269 /*
|
324
|
5270 * Copy "fword" to "cword", fixing case according to "flags".
|
323
|
5271 */
|
|
5272 static void
|
|
5273 make_case_word(fword, cword, flags)
|
|
5274 char_u *fword;
|
|
5275 char_u *cword;
|
|
5276 int flags;
|
|
5277 {
|
|
5278 if (flags & WF_ALLCAP)
|
|
5279 /* Make it all upper-case */
|
|
5280 allcap_copy(fword, cword);
|
|
5281 else if (flags & WF_ONECAP)
|
|
5282 /* Make the first letter upper-case */
|
324
|
5283 onecap_copy(fword, cword, TRUE);
|
323
|
5284 else
|
|
5285 /* Use goodword as-is. */
|
|
5286 STRCPY(cword, fword);
|
|
5287 }
|
|
5288
|
324
|
5289 #if 0
|
323
|
5290 /*
|
|
5291 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
|
|
5292 * lines in the .aff file.
|
|
5293 */
|
|
5294 static int
|
|
5295 similar_chars(slang, c1, c2)
|
|
5296 slang_T *slang;
|
|
5297 int c1;
|
|
5298 int c2;
|
|
5299 {
|
|
5300 char_u *p1;
|
|
5301 char_u *p2;
|
|
5302
|
|
5303 /* The similar characters are stored separated with slashes:
|
|
5304 * "aaa/bbb/ccc/". Search for each character and if the next slash is the
|
|
5305 * same one they are in the same MAP entry. */
|
|
5306 p1 = vim_strchr(slang->sl_map, c1);
|
|
5307 if (p1 == NULL)
|
|
5308 return FALSE;
|
|
5309 p2 = vim_strchr(slang->sl_map, c2);
|
|
5310 if (p2 == NULL)
|
|
5311 return FALSE;
|
|
5312 return vim_strchr(p1, '/') == vim_strchr(p2, '/');
|
|
5313 }
|
324
|
5314 #endif
|
323
|
5315
|
|
5316 /*
|
|
5317 * Add a suggestion to the list of suggestions.
|
|
5318 * Do not add a duplicate suggestion or suggestions with a bad score.
|
|
5319 * When "use_score" is not zero it's used, otherwise the score is computed
|
|
5320 * with spell_edit_score().
|
|
5321 */
|
|
5322 static void
|
324
|
5323 add_suggestion(su, goodword, score
|
|
5324 #ifdef RESCORE
|
|
5325 , had_bonus
|
|
5326 #endif
|
|
5327 )
|
323
|
5328 suginfo_T *su;
|
|
5329 char_u *goodword;
|
324
|
5330 int score;
|
|
5331 #ifdef RESCORE
|
|
5332 int had_bonus; /* set st_had_bonus */
|
|
5333 #endif
|
323
|
5334 {
|
|
5335 suggest_T *stp;
|
|
5336 int i;
|
|
5337 #ifdef SOUNDFOLD_SCORE
|
|
5338 char_u fword[MAXWLEN];
|
|
5339 char_u salword[MAXWLEN];
|
|
5340 #endif
|
|
5341
|
|
5342 /* Check that the word wasn't banned. */
|
|
5343 if (was_banned(su, goodword))
|
|
5344 return;
|
|
5345
|
|
5346 if (score <= su->su_maxscore)
|
|
5347 {
|
|
5348 #ifdef SOUNDFOLD_SCORE
|
|
5349 /* Add to the score when the word sounds differently.
|
|
5350 * This is slow... */
|
|
5351 if (su->su_slang->sl_sal.ga_len > 0)
|
324
|
5352 score += spell_sound_score(su->su_slang, fword, su->su_salword);
|
323
|
5353 #endif
|
|
5354
|
|
5355 /* Check if the word is already there. */
|
|
5356 stp = &SUG(su, 0);
|
|
5357 for (i = su->su_ga.ga_len - 1; i >= 0; --i)
|
|
5358 if (STRCMP(stp[i].st_word, goodword) == 0)
|
|
5359 {
|
|
5360 /* Found it. Remember the lowest score. */
|
|
5361 if (stp[i].st_score > score)
|
324
|
5362 {
|
323
|
5363 stp[i].st_score = score;
|
324
|
5364 #ifdef RESCORE
|
|
5365 stp[i].st_had_bonus = had_bonus;
|
|
5366 #endif
|
|
5367 }
|
323
|
5368 break;
|
|
5369 }
|
|
5370
|
|
5371 if (i < 0 && ga_grow(&su->su_ga, 1) == OK)
|
|
5372 {
|
|
5373 /* Add a suggestion. */
|
|
5374 stp = &SUG(su, su->su_ga.ga_len);
|
|
5375 stp->st_word = vim_strsave(goodword);
|
|
5376 if (stp->st_word != NULL)
|
|
5377 {
|
|
5378 stp->st_score = score;
|
324
|
5379 #ifdef RESCORE
|
|
5380 stp->st_had_bonus = had_bonus;
|
|
5381 #endif
|
323
|
5382 stp->st_orglen = su->su_badlen;
|
|
5383 ++su->su_ga.ga_len;
|
|
5384
|
|
5385 /* If we have too many suggestions now, sort the list and keep
|
|
5386 * the best suggestions. */
|
324
|
5387 if (su->su_ga.ga_len > SUG_MAX_COUNT)
|
|
5388 cleanup_suggestions(su, SUG_CLEAN_COUNT);
|
323
|
5389 }
|
|
5390 }
|
|
5391 }
|
|
5392 }
|
|
5393
|
|
5394 /*
|
|
5395 * Add a word to be banned.
|
|
5396 */
|
|
5397 static void
|
|
5398 add_banned(su, word)
|
|
5399 suginfo_T *su;
|
|
5400 char_u *word;
|
|
5401 {
|
|
5402 char_u *s = vim_strsave(word);
|
|
5403 hash_T hash;
|
|
5404 hashitem_T *hi;
|
|
5405
|
|
5406 if (s != NULL)
|
|
5407 {
|
|
5408 hash = hash_hash(s);
|
|
5409 hi = hash_lookup(&su->su_banned, s, hash);
|
|
5410 if (HASHITEM_EMPTY(hi))
|
|
5411 hash_add_item(&su->su_banned, hi, s, hash);
|
|
5412 }
|
|
5413 }
|
|
5414
|
|
5415 /*
|
|
5416 * Return TRUE if a word appears in the list of banned words.
|
|
5417 */
|
|
5418 static int
|
|
5419 was_banned(su, word)
|
|
5420 suginfo_T *su;
|
|
5421 char_u *word;
|
|
5422 {
|
324
|
5423 hashitem_T *hi = hash_find(&su->su_banned, word);
|
|
5424
|
|
5425 return !HASHITEM_EMPTY(hi);
|
323
|
5426 }
|
|
5427
|
|
5428 /*
|
|
5429 * Free the banned words in "su".
|
|
5430 */
|
|
5431 static void
|
|
5432 free_banned(su)
|
|
5433 suginfo_T *su;
|
|
5434 {
|
|
5435 int todo;
|
|
5436 hashitem_T *hi;
|
|
5437
|
|
5438 todo = su->su_banned.ht_used;
|
|
5439 for (hi = su->su_banned.ht_array; todo > 0; ++hi)
|
|
5440 {
|
|
5441 if (!HASHITEM_EMPTY(hi))
|
|
5442 {
|
|
5443 vim_free(hi->hi_key);
|
|
5444 --todo;
|
|
5445 }
|
|
5446 }
|
|
5447 hash_clear(&su->su_banned);
|
|
5448 }
|
|
5449
|
324
|
5450 #ifdef RESCORE
|
|
5451 /*
|
|
5452 * Recompute the score if sound-folding is possible. This is slow,
|
|
5453 * thus only done for the final results.
|
|
5454 */
|
|
5455 static void
|
|
5456 rescore_suggestions(su)
|
|
5457 suginfo_T *su;
|
|
5458 {
|
|
5459 langp_T *lp;
|
|
5460 suggest_T *stp;
|
|
5461 char_u sal_badword[MAXWLEN];
|
|
5462 int score;
|
|
5463 int i;
|
|
5464
|
|
5465 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
|
|
5466 lp->lp_slang != NULL; ++lp)
|
|
5467 {
|
|
5468 if (lp->lp_slang->sl_sal.ga_len > 0)
|
|
5469 {
|
|
5470 /* soundfold the bad word */
|
|
5471 spell_soundfold(lp->lp_slang, su->su_fbadword, sal_badword);
|
|
5472
|
|
5473 for (i = 0; i < su->su_ga.ga_len; ++i)
|
|
5474 {
|
|
5475 stp = &SUG(su, i);
|
|
5476 if (!stp->st_had_bonus)
|
|
5477 {
|
|
5478 score = spell_sound_score(lp->lp_slang, stp->st_word,
|
|
5479 sal_badword);
|
|
5480 stp->st_score = RESCORE(stp->st_score, score);
|
|
5481 }
|
|
5482 }
|
|
5483 break;
|
|
5484 }
|
|
5485 }
|
|
5486 }
|
|
5487 #endif
|
|
5488
|
323
|
5489 static int
|
|
5490 #ifdef __BORLANDC__
|
|
5491 _RTLENTRYF
|
|
5492 #endif
|
|
5493 sug_compare __ARGS((const void *s1, const void *s2));
|
|
5494
|
|
5495 /*
|
|
5496 * Function given to qsort() to sort the suggestions on st_score.
|
|
5497 */
|
|
5498 static int
|
|
5499 #ifdef __BORLANDC__
|
|
5500 _RTLENTRYF
|
|
5501 #endif
|
|
5502 sug_compare(s1, s2)
|
|
5503 const void *s1;
|
|
5504 const void *s2;
|
|
5505 {
|
|
5506 suggest_T *p1 = (suggest_T *)s1;
|
|
5507 suggest_T *p2 = (suggest_T *)s2;
|
|
5508
|
|
5509 return p1->st_score - p2->st_score;
|
|
5510 }
|
|
5511
|
|
5512 /*
|
|
5513 * Cleanup the suggestions:
|
|
5514 * - Sort on score.
|
|
5515 * - Remove words that won't be displayed.
|
|
5516 */
|
|
5517 static void
|
324
|
5518 cleanup_suggestions(su, keep)
|
323
|
5519 suginfo_T *su;
|
324
|
5520 int keep; /* nr of suggestions to keep */
|
323
|
5521 {
|
|
5522 suggest_T *stp = &SUG(su, 0);
|
|
5523 int i;
|
|
5524
|
|
5525 /* Sort the list. */
|
|
5526 qsort(su->su_ga.ga_data, (size_t)su->su_ga.ga_len,
|
|
5527 sizeof(suggest_T), sug_compare);
|
|
5528
|
|
5529 /* Truncate the list to the number of suggestions that will be displayed. */
|
324
|
5530 if (su->su_ga.ga_len > keep)
|
323
|
5531 {
|
324
|
5532 for (i = keep; i < su->su_ga.ga_len; ++i)
|
323
|
5533 vim_free(stp[i].st_word);
|
324
|
5534 su->su_ga.ga_len = keep;
|
|
5535 su->su_maxscore = stp[keep - 1].st_score;
|
323
|
5536 }
|
|
5537 }
|
|
5538
|
|
5539 /*
|
|
5540 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
|
|
5541 */
|
|
5542 static void
|
|
5543 spell_soundfold(slang, inword, res)
|
|
5544 slang_T *slang;
|
|
5545 char_u *inword;
|
|
5546 char_u *res;
|
|
5547 {
|
|
5548 fromto_T *ftp;
|
|
5549 char_u word[MAXWLEN];
|
|
5550 #ifdef FEAT_MBYTE
|
|
5551 int l;
|
324
|
5552 int found_mbyte = FALSE;
|
323
|
5553 #endif
|
|
5554 char_u *s;
|
|
5555 char_u *t;
|
|
5556 int i, j, z;
|
|
5557 int n, k = 0;
|
|
5558 int z0;
|
|
5559 int k0;
|
|
5560 int n0;
|
|
5561 int c;
|
|
5562 int pri;
|
|
5563 int p0 = -333;
|
|
5564 int c0;
|
|
5565
|
324
|
5566 /* Remove accents, if wanted. We actually remove all non-word characters.
|
|
5567 * But keep white space. */
|
323
|
5568 if (slang->sl_rem_accents)
|
|
5569 {
|
|
5570 t = word;
|
|
5571 for (s = inword; *s != NUL; )
|
|
5572 {
|
324
|
5573 if (vim_iswhite(*s))
|
|
5574 *t++ = *s++;
|
323
|
5575 #ifdef FEAT_MBYTE
|
324
|
5576 else if (has_mbyte)
|
323
|
5577 {
|
|
5578 l = mb_ptr2len_check(s);
|
|
5579 if (SPELL_ISWORDP(s))
|
|
5580 {
|
|
5581 mch_memmove(t, s, l);
|
|
5582 t += l;
|
324
|
5583 if (l > 1)
|
|
5584 found_mbyte = TRUE;
|
323
|
5585 }
|
|
5586 s += l;
|
|
5587 }
|
324
|
5588 #endif
|
323
|
5589 else
|
|
5590 {
|
|
5591 if (SPELL_ISWORDP(s))
|
|
5592 *t++ = *s;
|
|
5593 ++s;
|
|
5594 }
|
|
5595 }
|
|
5596 *t = NUL;
|
|
5597 }
|
|
5598 else
|
324
|
5599 {
|
|
5600 #ifdef FEAT_MBYTE
|
|
5601 if (has_mbyte)
|
|
5602 for (s = inword; *s != NUL; s += l)
|
|
5603 if ((l = mb_ptr2len_check(s)) > 1)
|
|
5604 {
|
|
5605 found_mbyte = TRUE;
|
|
5606 break;
|
|
5607 }
|
|
5608 #endif
|
323
|
5609 STRCPY(word, inword);
|
324
|
5610 }
|
|
5611
|
|
5612 #ifdef FEAT_MBYTE
|
|
5613 /* If there are multi-byte characters in the word return it as-is, because
|
|
5614 * the following won't work. */
|
|
5615 if (found_mbyte)
|
|
5616 {
|
|
5617 STRCPY(res, word);
|
|
5618 return;
|
|
5619 }
|
|
5620 #endif
|
323
|
5621
|
|
5622 ftp = (fromto_T *)slang->sl_sal.ga_data;
|
|
5623
|
|
5624 /*
|
|
5625 * This comes from Aspell phonet.cpp. Converted from C++ to C.
|
324
|
5626 * Changed to keep spaces.
|
323
|
5627 * TODO: support for multi-byte chars.
|
|
5628 */
|
|
5629 i = j = z = 0;
|
|
5630 while ((c = word[i]) != NUL)
|
|
5631 {
|
|
5632 n = slang->sl_sal_first[c];
|
|
5633 z0 = 0;
|
|
5634
|
|
5635 if (n >= 0)
|
|
5636 {
|
|
5637 /* check all rules for the same letter */
|
|
5638 while (ftp[n].ft_from[0] == c)
|
|
5639 {
|
|
5640 /* check whole string */
|
|
5641 k = 1; /* number of found letters */
|
|
5642 pri = 5; /* default priority */
|
|
5643 s = ftp[n].ft_from;
|
|
5644 s++; /* important for (see below) "*(s-1)" */
|
|
5645
|
|
5646 /* Skip over normal letters that match with the word. */
|
|
5647 while (*s != NUL && word[i + k] == *s
|
|
5648 && !vim_isdigit(*s) && strchr("(-<^$", *s) == NULL)
|
|
5649 {
|
|
5650 k++;
|
|
5651 s++;
|
|
5652 }
|
|
5653
|
|
5654 if (*s == '(')
|
|
5655 {
|
|
5656 /* check alternate letters in "(..)" */
|
|
5657 for (t = s + 1; *t != ')' && *t != NUL; ++t)
|
|
5658 if (*t == word[i + k])
|
|
5659 {
|
|
5660 /* match */
|
|
5661 ++k;
|
|
5662 for (s = t + 1; *s != NUL; ++s)
|
|
5663 if (*s == ')')
|
|
5664 {
|
|
5665 ++s;
|
|
5666 break;
|
|
5667 }
|
|
5668 break;
|
|
5669 }
|
|
5670 }
|
|
5671
|
|
5672 p0 = *s;
|
|
5673 k0 = k;
|
|
5674 while (*s == '-' && k > 1)
|
|
5675 {
|
|
5676 k--;
|
|
5677 s++;
|
|
5678 }
|
|
5679 if (*s == '<')
|
|
5680 s++;
|
|
5681 if (vim_isdigit(*s))
|
|
5682 {
|
|
5683 /* determine priority */
|
|
5684 pri = *s - '0';
|
|
5685 s++;
|
|
5686 }
|
|
5687 if (*s == '^' && *(s + 1) == '^')
|
|
5688 s++;
|
|
5689
|
|
5690 if (*s == NUL
|
|
5691 || (*s == '^'
|
324
|
5692 && (i == 0 || !(word[i - 1] == ' '
|
|
5693 || SPELL_ISWORDP(word + i - 1)))
|
323
|
5694 && (*(s + 1) != '$'
|
|
5695 || (!SPELL_ISWORDP(word + i + k0))))
|
|
5696 || (*s == '$' && i > 0
|
|
5697 && SPELL_ISWORDP(word + i - 1)
|
|
5698 && (!SPELL_ISWORDP(word + i + k0))))
|
|
5699 {
|
|
5700 /* search for followup rules, if: */
|
|
5701 /* followup and k > 1 and NO '-' in searchstring */
|
|
5702 c0 = word[i + k - 1];
|
|
5703 n0 = slang->sl_sal_first[c0];
|
|
5704
|
|
5705 if (slang->sl_followup && k > 1 && n0 >= 0
|
|
5706 && p0 != '-' && word[i + k] != NUL)
|
|
5707 {
|
|
5708 /* test follow-up rule for "word[i + k]" */
|
|
5709 while (ftp[n0].ft_from[0] == c0)
|
|
5710 {
|
|
5711
|
|
5712 /* check whole string */
|
|
5713 k0 = k;
|
|
5714 p0 = 5;
|
|
5715 s = ftp[n0].ft_from;
|
|
5716 s++;
|
|
5717 while (*s != NUL && word[i+k0] == *s
|
|
5718 && !vim_isdigit(*s)
|
|
5719 && strchr("(-<^$",*s) == NULL)
|
|
5720 {
|
|
5721 k0++;
|
|
5722 s++;
|
|
5723 }
|
|
5724 if (*s == '(')
|
|
5725 {
|
|
5726 /* check alternate letters in "(..)" */
|
|
5727 for (t = s + 1; *t != ')' && *t != NUL; ++t)
|
|
5728 if (*t == word[i + k0])
|
|
5729 {
|
|
5730 /* match */
|
|
5731 ++k0;
|
|
5732 for (s = t + 1; *s != NUL; ++s)
|
|
5733 if (*s == ')')
|
|
5734 {
|
|
5735 ++s;
|
|
5736 break;
|
|
5737 }
|
|
5738 break;
|
|
5739 }
|
|
5740 }
|
|
5741 while (*s == '-')
|
|
5742 {
|
|
5743 /* "k0" gets NOT reduced */
|
|
5744 /* because "if (k0 == k)" */
|
|
5745 s++;
|
|
5746 }
|
|
5747 if (*s == '<')
|
|
5748 s++;
|
|
5749 if (vim_isdigit(*s))
|
|
5750 {
|
|
5751 p0 = *s - '0';
|
|
5752 s++;
|
|
5753 }
|
|
5754
|
|
5755 if (*s == NUL
|
|
5756 /* *s == '^' cuts */
|
|
5757 || (*s == '$'
|
|
5758 && !SPELL_ISWORDP(word + i + k0)))
|
|
5759 {
|
|
5760 if (k0 == k)
|
|
5761 {
|
|
5762 /* this is just a piece of the string */
|
|
5763 ++n0;
|
|
5764 continue;
|
|
5765 }
|
|
5766
|
|
5767 if (p0 < pri)
|
|
5768 {
|
|
5769 /* priority too low */
|
|
5770 ++n0;
|
|
5771 continue;
|
|
5772 }
|
|
5773 /* rule fits; stop search */
|
|
5774 break;
|
|
5775 }
|
|
5776 ++n0;
|
|
5777 }
|
|
5778
|
|
5779 if (p0 >= pri && ftp[n0].ft_from[0] == c0)
|
|
5780 {
|
|
5781 ++n;
|
|
5782 continue;
|
|
5783 }
|
|
5784 }
|
|
5785
|
|
5786 /* replace string */
|
|
5787 s = ftp[n].ft_to;
|
|
5788 p0 = (ftp[n].ft_from[0] != NUL
|
|
5789 && vim_strchr(ftp[n].ft_from + 1,
|
|
5790 '<') != NULL) ? 1 : 0;
|
|
5791 if (p0 == 1 && z == 0)
|
|
5792 {
|
|
5793 /* rule with '<' is used */
|
|
5794 if (j > 0 && *s != NUL
|
|
5795 && (res[j - 1] == c || res[j - 1] == *s))
|
|
5796 j--;
|
|
5797 z0 = 1;
|
|
5798 z = 1;
|
|
5799 k0 = 0;
|
|
5800 while (*s != NUL && word[i+k0] != NUL)
|
|
5801 {
|
|
5802 word[i + k0] = *s;
|
|
5803 k0++;
|
|
5804 s++;
|
|
5805 }
|
|
5806 if (k > k0)
|
|
5807 mch_memmove(word + i + k0, word + i + k,
|
|
5808 STRLEN(word + i + k) + 1);
|
|
5809
|
|
5810 /* new "actual letter" */
|
|
5811 c = word[i];
|
|
5812 }
|
|
5813 else
|
|
5814 {
|
|
5815 /* no '<' rule used */
|
|
5816 i += k - 1;
|
|
5817 z = 0;
|
|
5818 while (*s != NUL && s[1] != NUL && j < MAXWLEN)
|
|
5819 {
|
|
5820 if (j == 0 || res[j - 1] != *s)
|
|
5821 {
|
|
5822 res[j] = *s;
|
|
5823 j++;
|
|
5824 }
|
|
5825 s++;
|
|
5826 }
|
|
5827 /* new "actual letter" */
|
|
5828 c = *s;
|
|
5829 if (ftp[n].ft_from[0] != NUL
|
|
5830 && strstr((char *)ftp[n].ft_from + 1,
|
|
5831 "^^") != NULL)
|
|
5832 {
|
|
5833 if (c != NUL)
|
|
5834 {
|
|
5835 res[j] = c;
|
|
5836 j++;
|
|
5837 }
|
|
5838 mch_memmove(word, word + i + 1,
|
|
5839 STRLEN(word + i + 1) + 1);
|
|
5840 i = 0;
|
|
5841 z0 = 1;
|
|
5842 }
|
|
5843 }
|
|
5844 break;
|
|
5845 }
|
|
5846 ++n;
|
|
5847 }
|
|
5848 }
|
324
|
5849 else if (vim_iswhite(c))
|
|
5850 {
|
|
5851 c = ' ';
|
|
5852 k = 1;
|
|
5853 }
|
323
|
5854
|
|
5855 if (z0 == 0)
|
|
5856 {
|
|
5857 if (k && !p0 && j < MAXWLEN && c != NUL
|
|
5858 && (!slang->sl_collapse || j == 0 || res[j - 1] != c))
|
|
5859 {
|
|
5860 /* condense only double letters */
|
|
5861 res[j] = c;
|
|
5862 j++;
|
|
5863 }
|
|
5864
|
|
5865 i++;
|
|
5866 z = 0;
|
|
5867 k = 0;
|
|
5868 }
|
|
5869 }
|
|
5870
|
|
5871 res[j] = NUL;
|
|
5872 }
|
|
5873
|
324
|
5874 #if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
|
|
5875 /*
|
|
5876 * Return the score for how much words sound different.
|
|
5877 */
|
|
5878 static int
|
|
5879 spell_sound_score(slang, goodword, badsound)
|
|
5880 slang_T *slang;
|
|
5881 char_u *goodword; /* good word */
|
|
5882 char_u *badsound; /* sound-folded bad word */
|
|
5883 {
|
|
5884 char_u fword[MAXWLEN];
|
|
5885 char_u goodsound[MAXWLEN];
|
|
5886 int score;
|
|
5887
|
|
5888 /* Case-fold the word, needed for sound folding. */
|
|
5889 (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
|
|
5890
|
|
5891 /* sound-fold the good word */
|
|
5892 spell_soundfold(slang, fword, goodsound);
|
|
5893
|
|
5894 /* compute the edit distance-score of the sounds */
|
|
5895 score = spell_edit_score(badsound, goodsound);
|
|
5896
|
|
5897 /* Correction: adding/inserting "*" at the start (word starts with vowel)
|
|
5898 * shouldn't be counted so much, vowels halfway the word aren't counted at
|
|
5899 * all. */
|
|
5900 if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*'))
|
|
5901 score -= SCORE_DEL / 2;
|
|
5902
|
|
5903 return score;
|
|
5904 }
|
|
5905 #endif
|
|
5906
|
323
|
5907 /*
|
|
5908 * Compute the "edit distance" to turn "badword" into "goodword". The less
|
|
5909 * deletes/inserts/swaps are required the lower the score.
|
324
|
5910 *
|
323
|
5911 * The algorithm comes from Aspell editdist.cpp, edit_distance().
|
324
|
5912 * It has been converted from C++ to C and modified to support multi-byte
|
|
5913 * characters.
|
323
|
5914 */
|
|
5915 static int
|
|
5916 spell_edit_score(badword, goodword)
|
|
5917 char_u *badword;
|
|
5918 char_u *goodword;
|
|
5919 {
|
|
5920 int *cnt;
|
|
5921 int badlen, goodlen;
|
|
5922 int j, i;
|
|
5923 int t;
|
|
5924 int bc, gc;
|
324
|
5925 int pbc, pgc;
|
|
5926 #ifdef FEAT_MBYTE
|
|
5927 char_u *p;
|
|
5928 int wbadword[MAXWLEN];
|
|
5929 int wgoodword[MAXWLEN];
|
|
5930
|
|
5931 if (has_mbyte)
|
|
5932 {
|
|
5933 /* Get the characters from the multi-byte strings and put them in an
|
|
5934 * int array for easy access. */
|
|
5935 for (p = badword, badlen = 0; *p != NUL; )
|
|
5936 wbadword[badlen++] = mb_ptr2char_adv(&p);
|
|
5937 ++badlen;
|
|
5938 for (p = goodword, goodlen = 0; *p != NUL; )
|
|
5939 wgoodword[goodlen++] = mb_ptr2char_adv(&p);
|
|
5940 ++goodlen;
|
|
5941 }
|
|
5942 else
|
|
5943 #endif
|
|
5944 {
|
|
5945 badlen = STRLEN(badword) + 1;
|
|
5946 goodlen = STRLEN(goodword) + 1;
|
|
5947 }
|
323
|
5948
|
|
5949 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
|
|
5950 #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
|
|
5951 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
|
|
5952 TRUE);
|
324
|
5953 if (cnt == NULL)
|
|
5954 return 0; /* out of memory */
|
323
|
5955
|
|
5956 CNT(0, 0) = 0;
|
|
5957 for (j = 1; j <= goodlen; ++j)
|
|
5958 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL;
|
|
5959
|
|
5960 for (i = 1; i <= badlen; ++i)
|
|
5961 {
|
|
5962 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
|
|
5963 for (j = 1; j <= goodlen; ++j)
|
|
5964 {
|
324
|
5965 #ifdef FEAT_MBYTE
|
|
5966 if (has_mbyte)
|
|
5967 {
|
|
5968 bc = wbadword[i - 1];
|
|
5969 gc = wgoodword[j - 1];
|
|
5970 }
|
|
5971 else
|
|
5972 #endif
|
|
5973 {
|
|
5974 bc = badword[i - 1];
|
|
5975 gc = goodword[j - 1];
|
|
5976 }
|
323
|
5977 if (bc == gc)
|
|
5978 CNT(i, j) = CNT(i - 1, j - 1);
|
|
5979 else
|
|
5980 {
|
|
5981 /* Use a better score when there is only a case difference. */
|
324
|
5982 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
|
323
|
5983 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
|
|
5984 else
|
|
5985 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
|
|
5986
|
324
|
5987 if (i > 1 && j > 1)
|
323
|
5988 {
|
324
|
5989 #ifdef FEAT_MBYTE
|
|
5990 if (has_mbyte)
|
|
5991 {
|
|
5992 pbc = wbadword[i - 2];
|
|
5993 pgc = wgoodword[j - 2];
|
|
5994 }
|
|
5995 else
|
|
5996 #endif
|
|
5997 {
|
|
5998 pbc = badword[i - 2];
|
|
5999 pgc = goodword[j - 2];
|
|
6000 }
|
|
6001 if (bc == pgc && pbc == gc)
|
|
6002 {
|
|
6003 t = SCORE_SWAP + CNT(i - 2, j - 2);
|
|
6004 if (t < CNT(i, j))
|
|
6005 CNT(i, j) = t;
|
|
6006 }
|
323
|
6007 }
|
|
6008 t = SCORE_DEL + CNT(i - 1, j);
|
|
6009 if (t < CNT(i, j))
|
|
6010 CNT(i, j) = t;
|
|
6011 t = SCORE_INS + CNT(i, j - 1);
|
|
6012 if (t < CNT(i, j))
|
|
6013 CNT(i, j) = t;
|
|
6014 }
|
|
6015 }
|
|
6016 }
|
|
6017 return CNT(badlen - 1, goodlen - 1);
|
|
6018 }
|
307
|
6019
|
236
|
6020 #endif /* FEAT_SYN_HL */
|