223
|
1 /* vi:set ts=8 sts=4 sw=4:
|
|
2 *
|
|
3 * VIM - Vi IMproved by Bram Moolenaar
|
|
4 *
|
|
5 * Do ":help uganda" in Vim to read copying and usage conditions.
|
|
6 * Do ":help credits" in Vim to see a list of people who contributed.
|
|
7 * See README.txt for an overview of the Vim source code.
|
|
8 */
|
|
9
|
|
10 /*
|
|
11 * spell.c: code for spell checking
|
226
|
12 *
|
|
13 * Terminology:
|
|
14 * "dword" is a dictionary word, made out of letters and digits.
|
|
15 * "nword" is a word with a character that's not a letter or digit.
|
|
16 * "word" is either a "dword" or an "nword".
|
223
|
17 */
|
|
18
|
|
19 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
|
|
20 # include <io.h> /* for lseek(), must be before vim.h */
|
|
21 #endif
|
|
22
|
|
23 #include "vim.h"
|
|
24
|
|
25 #if defined(FEAT_SYN_HL) || defined(PROTO)
|
|
26
|
|
27 #ifdef HAVE_FCNTL_H
|
|
28 # include <fcntl.h>
|
|
29 #endif
|
|
30
|
226
|
31 #define MAXWLEN 100 /* assume max. word len is this many bytes */
|
|
32
|
223
|
33 /*
|
|
34 * Structure that is used to store the text from the language file. This
|
|
35 * avoids the need to allocate each individual word and copying it. It's
|
|
36 * allocated in big chunks for speed.
|
|
37 */
|
|
38 #define SBLOCKSIZE 4096 /* default size of sb_data */
|
|
39 typedef struct sblock_S sblock_T;
|
|
40 struct sblock_S
|
|
41 {
|
|
42 sblock_T *sb_next; /* next block in list */
|
|
43 char_u sb_data[1]; /* data, actually longer */
|
|
44 };
|
|
45
|
226
|
46 /* Structure to store words and additions. Used twice : once for case-folded
|
|
47 * and once for keep-case words. */
|
|
48 typedef struct winfo_S
|
|
49 {
|
|
50 hashtab_T wi_ht; /* hashtable with all words, both dword_T and
|
|
51 nword_T (check flags for DW_NWORD) */
|
|
52 garray_T wi_add; /* table with pointers to additions in a
|
|
53 dword_T */
|
|
54 int wi_addlen; /* longest addition length */
|
|
55 } winfo_T;
|
|
56
|
223
|
57 /*
|
|
58 * Structure used to store words and other info for one language.
|
|
59 */
|
|
60 typedef struct slang_S slang_T;
|
|
61 struct slang_S
|
|
62 {
|
|
63 slang_T *sl_next; /* next language */
|
|
64 char_u sl_name[2]; /* language name "en", "nl", etc. */
|
226
|
65 winfo_T sl_fwords; /* case-folded words and additions */
|
|
66 winfo_T sl_kwords; /* keep-case words and additions */
|
|
67 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
|
223
|
68 sblock_T *sl_block; /* list with allocated memory blocks */
|
|
69 };
|
|
70
|
|
71 static slang_T *first_lang = NULL;
|
|
72
|
226
|
73 /* Entry for dword in "sl_ht". Also used for part of an nword, starting with
|
|
74 * the first non-word character. And used for additions in wi_add. */
|
|
75 typedef struct dword_S
|
|
76 {
|
|
77 char_u dw_region; /* one bit per region where it's valid */
|
|
78 char_u dw_flags; /* WF_ flags */
|
|
79 char_u dw_word[1]; /* actually longer, NUL terminated */
|
|
80 } dword_T;
|
|
81
|
|
82 #define REGION_ALL 0xff
|
|
83
|
|
84 #define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
|
|
85
|
|
86 /* Entry for a nword in "sl_ht". Note that the last three items must be
|
|
87 * identical to dword_T, so that they can be in the same hashtable. */
|
|
88 typedef struct nword_S
|
|
89 {
|
|
90 garray_T nw_ga; /* table with pointers to dword_T for part
|
|
91 starting with non-word character */
|
|
92 int nw_maxlen; /* longest nword length (after the dword) */
|
|
93 char_u nw_region; /* one bit per region where it's valid */
|
|
94 char_u nw_flags; /* WF_ flags */
|
|
95 char_u nw_word[1]; /* actually longer, NUL terminated */
|
|
96 } nword_T;
|
|
97
|
|
98 /* Get nword_T pointer from hashitem that uses nw_word */
|
|
99 static nword_T dumnw;
|
|
100 #define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
|
|
101
|
|
102 #define DW_CAP 0x01 /* word must start with capital */
|
|
103 #define DW_RARE 0x02 /* rare word */
|
|
104 #define DW_NWORD 0x04 /* this is an nword_T */
|
|
105 #define DW_DWORD 0x08 /* (also) use as dword without nword */
|
|
106
|
223
|
107 /*
|
|
108 * Structure used in "b_langp", filled from 'spelllang'.
|
|
109 */
|
|
110 typedef struct langp_S
|
|
111 {
|
|
112 slang_T *lp_slang; /* info for this language (NULL for last one) */
|
|
113 int lp_region; /* bitmask for region or REGION_ALL */
|
|
114 } langp_T;
|
|
115
|
|
116 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
|
226
|
117 #define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i)
|
223
|
118
|
|
119 #define SP_OK 0
|
|
120 #define SP_BAD 1
|
|
121 #define SP_RARE 2
|
|
122 #define SP_LOCAL 3
|
|
123
|
226
|
124 static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
|
|
125
|
223
|
126 static slang_T *spell_load_lang __ARGS((char_u *lang));
|
|
127 static void spell_load_file __ARGS((char_u *fname));
|
|
128 static int find_region __ARGS((char_u *rp, char_u *region));
|
|
129
|
|
130 /*
|
|
131 * Main spell-checking function.
|
|
132 * "ptr" points to the start of a word.
|
|
133 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
|
|
134 * or when it's OK it remains unchanged.
|
|
135 * This must only be called when 'spelllang' is not empty.
|
|
136 * Returns the length of the word in bytes, also when it's OK, so that the
|
|
137 * caller can skip over the word.
|
|
138 */
|
|
139 int
|
|
140 spell_check(wp, ptr, attrp)
|
|
141 win_T *wp; /* current window */
|
|
142 char_u *ptr;
|
|
143 int *attrp;
|
|
144 {
|
226
|
145 char_u *e; /* end of word */
|
|
146 char_u *ne; /* new end of word */
|
|
147 char_u *me; /* max. end of match */
|
223
|
148 langp_T *lp;
|
|
149 int result;
|
|
150 int len = 0;
|
|
151 hashitem_T *hi;
|
226
|
152 int round;
|
|
153 char_u kword[MAXWLEN + 1]; /* word copy */
|
|
154 char_u fword[MAXWLEN + 1]; /* word with case folded */
|
|
155 char_u match[MAXWLEN + 1]; /* fword with additional chars */
|
|
156 char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
|
|
157 char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
|
|
158 char_u *clen;
|
|
159 int cidx = 0; /* char index in xwordclen[] */
|
|
160 hash_T fhash; /* hash for fword */
|
|
161 hash_T khash; /* hash for kword */
|
|
162 int match_len = 0; /* length of match[] */
|
|
163 int fmatch_len = 0; /* length of nword match in chars */
|
223
|
164 garray_T *gap;
|
226
|
165 int l, t;
|
|
166 char_u *p, *tp;
|
223
|
167 int n;
|
226
|
168 dword_T *dw;
|
|
169 dword_T *tdw;
|
|
170 winfo_T *wi;
|
|
171 nword_T *nw;
|
|
172 int w_isupper;
|
223
|
173
|
|
174 /* Find the end of the word. We already know that *ptr is a word char. */
|
|
175 e = ptr;
|
|
176 do
|
|
177 {
|
|
178 mb_ptr_adv(e);
|
|
179 ++len;
|
226
|
180 } while (*e != NUL && spell_iswordc(e));
|
|
181
|
|
182 /* A word starting with a number is always OK. */
|
|
183 if (*ptr >= '0' && *ptr <= '9')
|
|
184 return (int)(e - ptr);
|
|
185
|
|
186 #ifdef FEAT_MBYTE
|
|
187 w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
|
|
188 #else
|
|
189 w_isupper = MB_ISUPPER(*ptr);
|
|
190 #endif
|
|
191
|
|
192 /* Make a copy of the word so that it can be NUL terminated.
|
|
193 * Compute hash value. */
|
|
194 mch_memmove(kword, ptr, e - ptr);
|
|
195 kword[e - ptr] = NUL;
|
|
196 khash = hash_hash(kword);
|
|
197
|
|
198 /* Make case-folded copy of the Word. Compute its hash value. */
|
|
199 (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
|
|
200 fhash = hash_hash(fword);
|
|
201
|
|
202 /* Further case-folded characters to check for an nword match go in
|
|
203 * match[]. */
|
|
204 me = e;
|
|
205
|
|
206 /* "ne" is the end for the longest match */
|
|
207 ne = e;
|
223
|
208
|
|
209 /* The word is bad unless we find it in the dictionary. */
|
|
210 result = SP_BAD;
|
|
211
|
|
212 /*
|
|
213 * Loop over the languages specified in 'spelllang'.
|
226
|
214 * We check them all, because a matching nword may be longer than an
|
|
215 * already found dword or nword.
|
223
|
216 */
|
226
|
217 for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
|
223
|
218 {
|
226
|
219 /*
|
|
220 * Check for a matching word in the hashtable.
|
|
221 * Check both the keep-case word and the fold-case word.
|
|
222 */
|
|
223 for (round = 0; round <= 1; ++round)
|
223
|
224 {
|
226
|
225 if (round == 0)
|
|
226 {
|
|
227 wi = &lp->lp_slang->sl_kwords;
|
|
228 hi = hash_lookup(&wi->wi_ht, kword, khash);
|
|
229 }
|
|
230 else
|
|
231 {
|
|
232 wi = &lp->lp_slang->sl_fwords;
|
|
233 hi = hash_lookup(&wi->wi_ht, fword, fhash);
|
|
234 }
|
223
|
235 if (!HASHITEM_EMPTY(hi))
|
|
236 {
|
226
|
237 /*
|
|
238 * If this is an nword entry, check for match with remainder.
|
|
239 */
|
|
240 dw = HI2DWORD(hi);
|
|
241 if (dw->dw_flags & DW_NWORD)
|
223
|
242 {
|
226
|
243 /* If the word is not defined as a dword we must find an
|
|
244 * nword. */
|
|
245 if ((dw->dw_flags & DW_DWORD) == 0)
|
|
246 dw = NULL;
|
|
247
|
|
248 /* Fold more characters when needed for the nword. Need
|
|
249 * to do one extra to check for a non-word character after
|
|
250 * the nword. Also keep the byte-size of each character,
|
|
251 * both before and after folding case. */
|
|
252 nw = HI2NWORD(hi);
|
|
253 while ((round == 0
|
|
254 ? me - e <= nw->nw_maxlen
|
|
255 : match_len <= nw->nw_maxlen)
|
|
256 && *me != NUL)
|
|
257 {
|
223
|
258 #ifdef FEAT_MBYTE
|
226
|
259 l = mb_ptr2len_check(me);
|
223
|
260 #else
|
226
|
261 l = 1;
|
223
|
262 #endif
|
226
|
263 (void)str_foldcase(me, l, match + match_len,
|
|
264 MAXWLEN - match_len + 1);
|
|
265 me += l;
|
|
266 kwordclen[cidx] = l;
|
|
267 fwordclen[cidx] = STRLEN(match + match_len);
|
|
268 match_len += fwordclen[cidx];
|
|
269 ++cidx;
|
|
270 }
|
|
271
|
|
272 if (round == 0)
|
|
273 {
|
|
274 clen = kwordclen;
|
|
275 tp = e;
|
|
276 }
|
|
277 else
|
|
278 {
|
|
279 clen = fwordclen;
|
|
280 tp = match;
|
|
281 }
|
|
282
|
|
283 /* Match with each item. The longest match wins:
|
|
284 * "you've" is longer than "you". */
|
|
285 gap = &nw->nw_ga;
|
|
286 for (t = 0; t < gap->ga_len; ++t)
|
223
|
287 {
|
226
|
288 /* Skip entries with wrong case for first char.
|
|
289 * Continue if it's a rare word without a captial. */
|
|
290 tdw = DWORD_ENTRY(gap, t);
|
|
291 if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
|
|
292 && !w_isupper)
|
|
293 continue;
|
|
294
|
|
295 p = tdw->dw_word;
|
|
296 l = 0;
|
|
297 for (n = 0; p[n] != 0; n += clen[l++])
|
|
298 if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
|
|
299 break;
|
|
300
|
|
301 /* Use a match if it's longer than previous matches
|
|
302 * and the next character is not a word character. */
|
|
303 if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
|
|
304 || !spell_iswordc(tp + n)))
|
|
305 {
|
|
306 dw = tdw;
|
|
307 fmatch_len = l;
|
|
308 if (round == 0)
|
|
309 ne = tp + n;
|
|
310 else
|
|
311 {
|
|
312 /* Need to use the length of the original
|
|
313 * chars, not the fold-case ones. */
|
|
314 ne = e;
|
|
315 for (l = 0; l < fmatch_len; ++l)
|
|
316 ne += kwordclen[l];
|
|
317 }
|
|
318 if ((lp->lp_region & tdw->dw_region) == 0)
|
|
319 result = SP_LOCAL;
|
|
320 else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
|
|
321 result = SP_RARE;
|
|
322 else
|
|
323 result = SP_OK;
|
|
324 }
|
|
325 }
|
|
326
|
|
327 }
|
|
328
|
|
329 if (dw != NULL)
|
|
330 {
|
|
331 if (dw->dw_flags & DW_CAP)
|
|
332 {
|
|
333 /* Need to check first letter is uppercase. If it is,
|
|
334 * check region. If it isn't it may be a rare word.
|
|
335 * */
|
|
336 if (w_isupper)
|
|
337 {
|
|
338 if ((dw->dw_region & lp->lp_region) == 0)
|
|
339 result = SP_LOCAL;
|
|
340 else
|
|
341 result = SP_OK;
|
|
342 }
|
|
343 else if (dw->dw_flags & DW_RARE)
|
|
344 result = SP_RARE;
|
|
345 }
|
|
346 else
|
|
347 {
|
|
348 if ((dw->dw_region & lp->lp_region) == 0)
|
223
|
349 result = SP_LOCAL;
|
226
|
350 else if (dw->dw_flags & DW_RARE)
|
|
351 result = SP_RARE;
|
223
|
352 else
|
|
353 result = SP_OK;
|
|
354 }
|
|
355 }
|
|
356 }
|
|
357 }
|
|
358
|
226
|
359 /*
|
|
360 * Check for an addition.
|
|
361 * Only after a dword, not after an nword.
|
|
362 * Check both the keep-case word and the fold-case word.
|
|
363 */
|
|
364 if (fmatch_len == 0)
|
|
365 for (round = 0; round <= 1; ++round)
|
223
|
366 {
|
226
|
367 if (round == 0)
|
|
368 wi = &lp->lp_slang->sl_kwords;
|
|
369 else
|
|
370 wi = &lp->lp_slang->sl_fwords;
|
|
371 gap = &wi->wi_add;
|
|
372 if (gap->ga_len == 0) /* no additions, skip quickly */
|
|
373 continue;
|
|
374
|
|
375 /* Fold characters when needed for the addition. Need to do one
|
|
376 * extra to check for a word character after the addition. */
|
|
377 while ((round == 0
|
|
378 ? me - e <= wi->wi_addlen
|
|
379 : match_len <= wi->wi_addlen)
|
|
380 && *me != NUL)
|
223
|
381 {
|
226
|
382 #ifdef FEAT_MBYTE
|
|
383 l = mb_ptr2len_check(me);
|
|
384 #else
|
|
385 l = 1;
|
|
386 #endif
|
|
387 (void)str_foldcase(me, l, match + match_len,
|
|
388 MAXWLEN - match_len + 1);
|
|
389 me += l;
|
|
390 kwordclen[cidx] = l;
|
|
391 fwordclen[cidx] = STRLEN(match + match_len);
|
|
392 match_len += fwordclen[cidx];
|
|
393 ++cidx;
|
223
|
394 }
|
226
|
395
|
|
396 if (round == 0)
|
|
397 {
|
|
398 clen = kwordclen;
|
|
399 tp = e;
|
|
400 }
|
|
401 else
|
|
402 {
|
|
403 clen = fwordclen;
|
|
404 tp = match;
|
|
405 }
|
223
|
406
|
226
|
407 /* Addition lookup. Uses a linear search, there should be
|
|
408 * very few. If there is a match adjust "ne" to the end.
|
|
409 * This doesn't change whether a word was good or bad, only
|
|
410 * the length. */
|
|
411 for (t = 0; t < gap->ga_len; ++t)
|
|
412 {
|
|
413 tdw = DWORD_ENTRY(gap, t);
|
|
414 p = tdw->dw_word;
|
|
415 l = 0;
|
|
416 for (n = 0; p[n] != 0; n += clen[l++])
|
|
417 if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
|
|
418 break;
|
|
419
|
|
420 /* Use a match if it's longer than previous matches
|
|
421 * and the next character is not a word character. */
|
|
422 if (p[n] == 0 && l > fmatch_len
|
|
423 && (tp[n] == 0 || !spell_iswordc(tp + n)))
|
|
424 {
|
|
425 fmatch_len = l;
|
|
426 if (round == 0)
|
|
427 ne = tp + n;
|
|
428 else
|
|
429 {
|
|
430 /* Need to use the length of the original
|
|
431 * chars, not the fold-case ones. */
|
|
432 ne = e;
|
|
433 for (l = 0; l < fmatch_len; ++l)
|
|
434 ne += kwordclen[l];
|
|
435 }
|
|
436 }
|
|
437 }
|
223
|
438 }
|
|
439 }
|
|
440
|
|
441 if (result != SP_OK)
|
|
442 {
|
|
443 if (result == SP_BAD)
|
|
444 *attrp = highlight_attr[HLF_SPB];
|
|
445 else if (result == SP_RARE)
|
|
446 *attrp = highlight_attr[HLF_SPR];
|
|
447 else
|
|
448 *attrp = highlight_attr[HLF_SPL];
|
|
449 }
|
|
450
|
226
|
451 return (int)(ne - ptr);
|
223
|
452 }
|
|
453
|
|
454 static slang_T *load_lp; /* passed from spell_load_lang() to
|
|
455 spell_load_file() */
|
|
456
|
|
457 /*
|
|
458 * Load language "lang[2]".
|
|
459 */
|
|
460 static slang_T *
|
|
461 spell_load_lang(lang)
|
|
462 char_u *lang;
|
|
463 {
|
|
464 slang_T *lp;
|
|
465 char_u fname_enc[80];
|
|
466 char_u fname_ascii[20];
|
|
467 char_u *p;
|
226
|
468 int r;
|
223
|
469
|
|
470 lp = (slang_T *)alloc(sizeof(slang_T));
|
|
471 if (lp != NULL)
|
|
472 {
|
|
473 lp->sl_name[0] = lang[0];
|
|
474 lp->sl_name[1] = lang[1];
|
226
|
475 hash_init(&lp->sl_fwords.wi_ht);
|
|
476 ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4);
|
|
477 lp->sl_fwords.wi_addlen = 0;
|
|
478 hash_init(&lp->sl_kwords.wi_ht);
|
|
479 ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4);
|
|
480 lp->sl_kwords.wi_addlen = 0;
|
223
|
481 lp->sl_regions[0] = NUL;
|
|
482 lp->sl_block = NULL;
|
|
483
|
|
484 /* Find all spell files for "lang" in 'runtimepath' and load them.
|
|
485 * Use 'encoding', except that we use "latin1" for "latin9". */
|
|
486 #ifdef FEAT_MBYTE
|
|
487 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
|
|
488 p = p_enc;
|
|
489 else
|
|
490 #endif
|
|
491 p = (char_u *)"latin1";
|
|
492 load_lp = lp;
|
|
493 sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
|
226
|
494 r = do_in_runtimepath(fname_enc, TRUE, spell_load_file);
|
|
495 if (r == FAIL)
|
223
|
496 {
|
|
497 /* Try again to find an ASCII spell file. */
|
|
498 sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
|
226
|
499 r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file);
|
|
500 }
|
|
501
|
|
502 if (r == FAIL)
|
|
503 {
|
|
504 vim_free(lp);
|
|
505 lp = NULL;
|
|
506 smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
|
223
|
507 fname_enc + 6);
|
|
508 }
|
|
509 else
|
|
510 {
|
|
511 lp->sl_next = first_lang;
|
|
512 first_lang = lp;
|
|
513 }
|
|
514 }
|
|
515
|
|
516 return lp;
|
|
517 }
|
|
518
|
|
519 /*
|
|
520 * Load one spell file into "load_lp".
|
|
521 * Invoked through do_in_runtimepath().
|
|
522 */
|
|
523 static void
|
|
524 spell_load_file(fname)
|
|
525 char_u *fname;
|
|
526 {
|
|
527 int fd;
|
|
528 size_t len;
|
229
|
529 int l;
|
226
|
530 char_u *p = NULL, *np;
|
|
531 sblock_T *bl = NULL;
|
|
532 int bl_used = 0;
|
223
|
533 size_t rest = 0;
|
226
|
534 char_u *rbuf; /* read buffer */
|
|
535 char_u *rbuf_end; /* past last valid char in "rbuf" */
|
223
|
536 hash_T hash;
|
|
537 hashitem_T *hi;
|
|
538 int c;
|
226
|
539 int cc;
|
223
|
540 int region = REGION_ALL;
|
226
|
541 int wlen;
|
|
542 winfo_T *wi;
|
233
|
543 dword_T *dw, *edw = NULL;
|
226
|
544 nword_T *nw = NULL;
|
|
545 int flags;
|
|
546 char_u *save_sourcing_name = sourcing_name;
|
|
547 linenr_T save_sourcing_lnum = sourcing_lnum;
|
|
548
|
|
549 rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1));
|
|
550 if (rbuf == NULL)
|
|
551 return;
|
223
|
552
|
|
553 fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
|
|
554 if (fd < 0)
|
|
555 {
|
|
556 EMSG2(_(e_notopen), fname);
|
226
|
557 goto theend;
|
223
|
558 }
|
|
559
|
226
|
560 sourcing_name = fname;
|
|
561 sourcing_lnum = 0;
|
|
562
|
223
|
563 /* Get the length of the whole file. */
|
|
564 len = lseek(fd, (off_t)0, SEEK_END);
|
|
565 lseek(fd, (off_t)0, SEEK_SET);
|
|
566
|
226
|
567 /*
|
|
568 * Read the file one block at a time.
|
223
|
569 * "rest" is the length of an incomplete line at the previous block.
|
226
|
570 * "p" points to the remainder.
|
|
571 */
|
223
|
572 while (len > 0)
|
|
573 {
|
226
|
574 /* Read a block from the file. Prepend the remainder of the previous
|
|
575 * block, if any. */
|
|
576 if (rest > 0)
|
|
577 {
|
|
578 if (rest > MAXWLEN) /* truncate long line (should be comment) */
|
|
579 rest = MAXWLEN;
|
|
580 mch_memmove(rbuf, p, rest);
|
|
581 --sourcing_lnum;
|
|
582 }
|
223
|
583 if (len > SBLOCKSIZE)
|
|
584 l = SBLOCKSIZE;
|
|
585 else
|
|
586 l = len;
|
|
587 len -= l;
|
226
|
588 if (read(fd, rbuf + rest, l) != l)
|
223
|
589 {
|
|
590 EMSG2(_(e_notread), fname);
|
|
591 break;
|
|
592 }
|
226
|
593 rbuf_end = rbuf + l + rest;
|
223
|
594 rest = 0;
|
|
595
|
|
596 /* Deal with each line that was read until we finish the block. */
|
226
|
597 for (p = rbuf; p < rbuf_end; p = np)
|
223
|
598 {
|
226
|
599 ++sourcing_lnum;
|
|
600
|
|
601 /* "np" points to the first char after the line (CR, NL or white
|
|
602 * space). */
|
|
603 for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np))
|
|
604 ;
|
|
605 if (np >= rbuf_end)
|
223
|
606 {
|
226
|
607 /* Incomplete line or end of file. */
|
223
|
608 rest = np - p;
|
|
609 if (len == 0)
|
226
|
610 EMSG(_("E751: Truncated spell file"));
|
223
|
611 break;
|
|
612 }
|
|
613 *np = NUL; /* terminate the line with a NUL */
|
|
614
|
226
|
615 if (*p == '-')
|
223
|
616 {
|
226
|
617 /*
|
|
618 * Region marker: ---, -xx, -xx-yy, etc.
|
|
619 */
|
|
620 ++p;
|
|
621 if (*p == '-')
|
223
|
622 {
|
226
|
623 if (p[1] != '-' || p[2] != NUL)
|
|
624 {
|
|
625 EMSG2(_(e_invchar2), p - 1);
|
|
626 len = 0;
|
|
627 break;
|
|
628 }
|
|
629 region = REGION_ALL;
|
|
630 }
|
|
631 else
|
|
632 {
|
|
633 char_u *rp = load_lp->sl_regions;
|
|
634 int r;
|
223
|
635
|
226
|
636 /* Start of a region. The region may be repeated:
|
|
637 * "-ca-uk". Fill "region" with the bit mask for the
|
|
638 * ones we find. */
|
|
639 region = 0;
|
|
640 for (;;)
|
223
|
641 {
|
226
|
642 r = find_region(rp, p);
|
|
643 if (r == REGION_ALL)
|
|
644 {
|
|
645 /* new region, add it to sl_regions[] */
|
|
646 r = STRLEN(rp);
|
|
647 if (r >= 16)
|
|
648 {
|
|
649 EMSG2(_("E752: Too many regions: %s"), p);
|
|
650 len = 0;
|
223
|
651 break;
|
226
|
652 }
|
|
653 else
|
|
654 {
|
|
655 rp[r] = p[0];
|
|
656 rp[r + 1] = p[1];
|
|
657 rp[r + 2] = NUL;
|
|
658 r = 1 << (r / 2);
|
|
659 }
|
223
|
660 }
|
|
661 else
|
226
|
662 r = 1 << r;
|
|
663
|
|
664 region |= r;
|
|
665 if (p[2] != '-')
|
223
|
666 {
|
226
|
667 if (p[2] > ' ')
|
|
668 {
|
|
669 EMSG2(_(e_invchar2), p - 1);
|
|
670 len = 0;
|
|
671 }
|
|
672 break;
|
223
|
673 }
|
226
|
674 p += 3;
|
223
|
675 }
|
|
676 }
|
226
|
677 }
|
|
678 else if (*p != '#' && *p != NUL)
|
|
679 {
|
|
680 /*
|
|
681 * Not an empty line or comment.
|
|
682 */
|
|
683 if (*p == '!')
|
223
|
684 {
|
226
|
685 wi = &load_lp->sl_kwords; /* keep case */
|
223
|
686 ++p;
|
226
|
687 }
|
|
688 else
|
|
689 wi = &load_lp->sl_fwords; /* fold case */
|
|
690
|
|
691 flags = 0;
|
|
692 c = *p;
|
|
693 if (c == '>') /* rare word */
|
|
694 {
|
|
695 flags = DW_RARE;
|
|
696 ++p;
|
|
697 }
|
|
698 else if (*p == '+') /* addition */
|
|
699 ++p;
|
|
700
|
|
701 if (c != '+' && !spell_iswordc(p))
|
|
702 {
|
|
703 EMSG2(_(e_invchar2), p);
|
|
704 len = 0;
|
|
705 break;
|
|
706 }
|
223
|
707
|
226
|
708 /* Make sure there is room for the word. Folding case may
|
|
709 * double the size. */
|
|
710 wlen = np - p;
|
|
711 if (bl == NULL || bl_used + sizeof(dword_T) + wlen
|
|
712 #ifdef FEAT_MBYTE
|
|
713 * (has_mbyte ? 2 : 1)
|
|
714 #endif
|
|
715 >= SBLOCKSIZE)
|
|
716 {
|
|
717 /* Allocate a block of memory to store the dword_T in.
|
|
718 * This is not freed until spell_reload() is called. */
|
|
719 bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T)
|
|
720 + SBLOCKSIZE));
|
|
721 if (bl == NULL)
|
|
722 {
|
|
723 len = 0;
|
|
724 break;
|
|
725 }
|
|
726 bl->sb_next = load_lp->sl_block;
|
|
727 load_lp->sl_block = bl;
|
|
728 bl_used = 0;
|
|
729 }
|
|
730 dw = (dword_T *)(bl->sb_data + bl_used);
|
223
|
731
|
226
|
732 /* For fold-case words fold the case and check for start
|
|
733 * with uppercase letter. */
|
|
734 if (wi == &load_lp->sl_fwords)
|
|
735 {
|
|
736 #ifdef FEAT_MBYTE
|
|
737 if (MB_ISUPPER(mb_ptr2char(p)))
|
|
738 #else
|
|
739 if (MB_ISUPPER(*p))
|
|
740 #endif
|
|
741 flags |= DW_CAP;
|
|
742
|
|
743 /* Fold case. */
|
|
744 (void)str_foldcase(p, np - p, dw->dw_word, wlen
|
|
745 #ifdef FEAT_MBYTE
|
|
746 * (has_mbyte ? 2 : 1)
|
|
747 #endif
|
|
748 + 1);
|
|
749 #ifdef FEAT_MBYTE
|
|
750 /* case folding may change length of word */
|
|
751 wlen = STRLEN(dw->dw_word);
|
|
752 #endif
|
223
|
753 }
|
|
754 else
|
|
755 {
|
226
|
756 /* Keep case: copy the word as-is. */
|
|
757 mch_memmove(dw->dw_word, p, wlen + 1);
|
|
758 }
|
|
759
|
|
760 if (c == '+')
|
|
761 {
|
|
762 garray_T *gap = &wi->wi_add;
|
|
763
|
|
764 /* Addition. TODO: search for matching entry? */
|
|
765 if (wi->wi_addlen < wlen)
|
|
766 wi->wi_addlen = wlen;
|
|
767 if (ga_grow(gap, 1) == FAIL)
|
223
|
768 {
|
226
|
769 len = 0;
|
|
770 break;
|
223
|
771 }
|
226
|
772 *(((dword_T **)gap->ga_data) + gap->ga_len) = dw;
|
|
773 ++gap->ga_len;
|
|
774 dw->dw_region = region;
|
|
775 dw->dw_flags = flags;
|
|
776 bl_used += sizeof(dword_T) + wlen;
|
|
777 }
|
|
778 else
|
|
779 {
|
|
780 /*
|
|
781 * Check for a non-word character. If found it's
|
|
782 * going to be an nword.
|
|
783 * For an nword we split in two: the leading dword and
|
|
784 * the remainder. The dword goes in the hashtable
|
|
785 * with an nword_T, the remainder is put in the
|
|
786 * dword_T (starting with the first non-word
|
|
787 * character).
|
|
788 */
|
|
789 cc = NUL;
|
|
790 for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p))
|
|
791 if (!spell_iswordc(p))
|
|
792 {
|
|
793 cc = *p;
|
|
794 *p = NUL;
|
|
795 break;
|
|
796 }
|
223
|
797
|
226
|
798 /* check if we already have this dword */
|
|
799 hash = hash_hash(dw->dw_word);
|
|
800 hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash);
|
223
|
801 if (!HASHITEM_EMPTY(hi))
|
|
802 {
|
226
|
803 /* Existing entry. */
|
|
804 edw = HI2DWORD(hi);
|
|
805 if ((edw->dw_flags & (DW_CAP | DW_RARE))
|
|
806 == (dw->dw_flags & (DW_CAP | DW_RARE)))
|
223
|
807 {
|
|
808 if (p_verbose > 0)
|
|
809 smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
|
226
|
810 dw->dw_word, fname);
|
|
811 }
|
|
812 }
|
|
813
|
|
814 if (cc != NUL) /* nword */
|
|
815 {
|
|
816 if (HASHITEM_EMPTY(hi)
|
|
817 || (edw->dw_flags & DW_NWORD) == 0)
|
|
818 {
|
|
819 sblock_T *sb;
|
|
820
|
|
821 /* Need to allocate a new nword_T. Put it in an
|
|
822 * sblock_T, so that we can free it later. */
|
|
823 sb = (sblock_T *)alloc(
|
|
824 (unsigned)(sizeof(sblock_T)
|
|
825 + sizeof(nword_T) + wlen));
|
|
826 if (sb == NULL)
|
|
827 {
|
|
828 len = 0;
|
|
829 break;
|
|
830 }
|
|
831 sb->sb_next = load_lp->sl_block;
|
|
832 load_lp->sl_block = sb;
|
|
833 nw = (nword_T *)sb->sb_data;
|
|
834
|
|
835 ga_init2(&nw->nw_ga, sizeof(dword_T *), 4);
|
|
836 nw->nw_maxlen = 0;
|
|
837 STRCPY(nw->nw_word, dw->dw_word);
|
|
838 if (!HASHITEM_EMPTY(hi))
|
|
839 {
|
|
840 /* Note: the nw_region and nw_flags is for
|
|
841 * the dword that matches with the start
|
|
842 * of this nword, not for the nword
|
|
843 * itself! */
|
|
844 nw->nw_region = edw->dw_region;
|
|
845 nw->nw_flags = edw->dw_flags | DW_NWORD;
|
|
846
|
|
847 /* Remove the dword item so that we can
|
|
848 * add it as an nword. */
|
|
849 hash_remove(&wi->wi_ht, hi);
|
|
850 hi = hash_lookup(&wi->wi_ht,
|
|
851 nw->nw_word, hash);
|
|
852 }
|
|
853 else
|
|
854 {
|
|
855 nw->nw_region = 0;
|
|
856 nw->nw_flags = DW_NWORD;
|
|
857 }
|
223
|
858 }
|
|
859 else
|
226
|
860 nw = HI2NWORD(hi);
|
|
861 }
|
|
862
|
|
863 if (HASHITEM_EMPTY(hi))
|
|
864 {
|
|
865 /* Add new dword or nword entry. */
|
|
866 hash_add_item(&wi->wi_ht, hi, cc == NUL
|
|
867 ? dw->dw_word : nw->nw_word, hash);
|
|
868 if (cc == NUL)
|
|
869 {
|
|
870 /* New dword: init the values and count the
|
|
871 * used space. */
|
|
872 dw->dw_flags = DW_DWORD | flags;
|
|
873 dw->dw_region = region;
|
|
874 bl_used += sizeof(dword_T) + wlen;
|
|
875 }
|
|
876 }
|
|
877 else if (cc == NUL)
|
|
878 {
|
|
879 /* existing dword: add the region and flags */
|
|
880 dw = edw;
|
|
881 dw->dw_region |= region;
|
|
882 dw->dw_flags |= DW_DWORD | flags;
|
223
|
883 }
|
226
|
884
|
|
885 if (cc != NUL)
|
|
886 {
|
|
887 /* Use the dword for the non-word character and
|
|
888 * following characters. */
|
|
889 dw->dw_region = region;
|
|
890 dw->dw_flags = flags;
|
|
891 STRCPY(dw->dw_word + 1, p + 1);
|
|
892 dw->dw_word[0] = cc;
|
|
893 l = wlen - (p - dw->dw_word);
|
|
894 bl_used += sizeof(dword_T) + l;
|
|
895 if (nw->nw_maxlen < l)
|
|
896 nw->nw_maxlen = l;
|
|
897
|
|
898 /* Add the dword to the growarray in the nword. */
|
|
899 if (ga_grow(&nw->nw_ga, 1) == FAIL)
|
|
900 {
|
|
901 len = 0;
|
|
902 break;
|
|
903 }
|
|
904 *((dword_T **)nw->nw_ga.ga_data + nw->nw_ga.ga_len)
|
|
905 = dw;
|
|
906 ++nw->nw_ga.ga_len;
|
|
907 }
|
223
|
908 }
|
|
909 }
|
|
910
|
226
|
911 /* Skip over CR and NL characters and trailing white space. */
|
|
912 while (np < rbuf_end && *np <= ' ')
|
223
|
913 ++np;
|
|
914 }
|
|
915 }
|
|
916
|
|
917 close(fd);
|
226
|
918 theend:
|
|
919 sourcing_name = save_sourcing_name;
|
|
920 sourcing_lnum = save_sourcing_lnum;
|
|
921 vim_free(rbuf);
|
223
|
922 }
|
|
923
|
|
924 /*
|
|
925 * Parse 'spelllang' and set buf->b_langp accordingly.
|
|
926 * Returns an error message or NULL.
|
|
927 */
|
|
928 char_u *
|
|
929 did_set_spelllang(buf)
|
|
930 buf_T *buf;
|
|
931 {
|
|
932 garray_T ga;
|
|
933 char_u *lang;
|
|
934 char_u *e;
|
|
935 char_u *region;
|
|
936 int region_mask;
|
|
937 slang_T *lp;
|
|
938 int c;
|
|
939
|
|
940 ga_init2(&ga, sizeof(langp_T), 2);
|
|
941
|
|
942 /* loop over comma separated languages. */
|
|
943 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
|
|
944 {
|
|
945 e = vim_strchr(lang, ',');
|
|
946 if (e == NULL)
|
|
947 e = lang + STRLEN(lang);
|
|
948 if (e > lang + 2)
|
|
949 {
|
|
950 if (lang[2] != '_' || e - lang != 5)
|
|
951 {
|
|
952 ga_clear(&ga);
|
|
953 return e_invarg;
|
|
954 }
|
|
955 region = lang + 3;
|
|
956 }
|
|
957 else
|
|
958 region = NULL;
|
|
959
|
|
960 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
|
|
961 if (STRNICMP(lp->sl_name, lang, 2) == 0)
|
|
962 break;
|
|
963
|
|
964 if (lp == NULL)
|
|
965 /* Not found, load the language. */
|
|
966 lp = spell_load_lang(lang);
|
|
967
|
|
968 if (lp != NULL)
|
|
969 {
|
|
970 if (region == NULL)
|
|
971 region_mask = REGION_ALL;
|
|
972 else
|
|
973 {
|
|
974 /* find region in sl_regions */
|
|
975 c = find_region(lp->sl_regions, region);
|
|
976 if (c == REGION_ALL)
|
|
977 {
|
|
978 c = lang[5];
|
|
979 lang[5] = NUL;
|
|
980 smsg((char_u *)_("Warning: region %s not supported"), lang);
|
|
981 lang[5] = c;
|
|
982 region_mask = REGION_ALL;
|
|
983 }
|
|
984 else
|
|
985 region_mask = 1 << c;
|
|
986 }
|
|
987
|
|
988 if (ga_grow(&ga, 1) == FAIL)
|
|
989 {
|
|
990 ga_clear(&ga);
|
|
991 return e_outofmem;
|
|
992 }
|
|
993 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
|
|
994 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
|
|
995 ++ga.ga_len;
|
|
996 }
|
|
997
|
|
998 if (*e == ',')
|
|
999 ++e;
|
|
1000 }
|
|
1001
|
|
1002 /* Add a NULL entry to mark the end of the list. */
|
|
1003 if (ga_grow(&ga, 1) == FAIL)
|
|
1004 {
|
|
1005 ga_clear(&ga);
|
|
1006 return e_outofmem;
|
|
1007 }
|
|
1008 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
|
|
1009 ++ga.ga_len;
|
|
1010
|
|
1011 /* Everything is fine, store the new b_langp value. */
|
|
1012 ga_clear(&buf->b_langp);
|
|
1013 buf->b_langp = ga;
|
|
1014
|
|
1015 return NULL;
|
|
1016 }
|
|
1017
|
|
1018 /*
|
|
1019 * Find the region "region[2]" in "rp" (points to "sl_regions").
|
|
1020 * Each region is simply stored as the two characters of it's name.
|
|
1021 * Returns the index if found, REGION_ALL if not found.
|
|
1022 */
|
|
1023 static int
|
|
1024 find_region(rp, region)
|
|
1025 char_u *rp;
|
|
1026 char_u *region;
|
|
1027 {
|
|
1028 int i;
|
|
1029
|
|
1030 for (i = 0; ; i += 2)
|
|
1031 {
|
|
1032 if (rp[i] == NUL)
|
|
1033 return REGION_ALL;
|
|
1034 if (rp[i] == region[0] && rp[i + 1] == region[1])
|
|
1035 break;
|
|
1036 }
|
|
1037 return i / 2;
|
|
1038 }
|
|
1039
|
|
1040 # if defined(FEAT_MBYTE) || defined(PROTO)
|
|
1041 /*
|
|
1042 * Clear all spelling tables and reload them.
|
|
1043 * Used after 'encoding' is set.
|
|
1044 */
|
|
1045 void
|
|
1046 spell_reload()
|
|
1047 {
|
|
1048 buf_T *buf;
|
|
1049 slang_T *lp;
|
|
1050 sblock_T *sp;
|
|
1051
|
226
|
1052 /* Initialize the table for spell_iswordc(). */
|
|
1053 init_spell_chartab();
|
|
1054
|
223
|
1055 /* Unload all allocated memory. */
|
|
1056 while (first_lang != NULL)
|
|
1057 {
|
|
1058 lp = first_lang;
|
|
1059 first_lang = lp->sl_next;
|
|
1060
|
226
|
1061 hash_clear(&lp->sl_fwords.wi_ht);
|
|
1062 ga_clear(&lp->sl_fwords.wi_add);
|
|
1063 hash_clear(&lp->sl_kwords.wi_ht);
|
|
1064 ga_clear(&lp->sl_kwords.wi_add);
|
223
|
1065 while (lp->sl_block != NULL)
|
|
1066 {
|
|
1067 sp = lp->sl_block;
|
|
1068 lp->sl_block = sp->sb_next;
|
|
1069 vim_free(sp);
|
|
1070 }
|
|
1071 }
|
|
1072
|
|
1073 /* Go through all buffers and handle 'spelllang'. */
|
|
1074 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
|
|
1075 {
|
|
1076 ga_clear(&buf->b_langp);
|
|
1077 if (*buf->b_p_spl != NUL)
|
|
1078 did_set_spelllang(buf);
|
|
1079 }
|
|
1080 }
|
|
1081 # endif
|
|
1082
|
|
1083 #endif /* FEAT_SYN_HL */
|