Mercurial > vim
comparison src/spell.h @ 9583:b0c7061d6439 v7.4.2069
commit https://github.com/vim/vim/commit/9ccfebddc3ff2a3c2853cf706fd4c26f639bf381
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Jul 19 16:39:08 2016 +0200
patch 7.4.2069
Problem: spell.c is too big.
Solution: Split it in spell file handling and spell checking.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Tue, 19 Jul 2016 16:45:06 +0200 |
parents | |
children | 4aead6a9b7a9 |
comparison
equal
deleted
inserted
replaced
9582:96737caf272d | 9583:b0c7061d6439 |
---|---|
1 /* vi:set ts=8 sts=4 sw=4: | |
2 * | |
3 * VIM - Vi IMproved by Bram Moolenaar | |
4 * | |
5 * Do ":help uganda" in Vim to read copying and usage conditions. | |
6 * Do ":help credits" in Vim to see a list of people who contributed. | |
7 * See README.txt for an overview of the Vim source code. | |
8 */ | |
9 | |
10 /* | |
11 * spell.h: common code for spell checking, used by spell.c and spellfile.c. | |
12 */ | |
13 | |
14 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. | |
15 * Only use it for small word lists! */ | |
16 #if 0 | |
17 # define SPELL_PRINTTREE | |
18 #endif | |
19 | |
20 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after | |
21 * adding a word. Only use it for small word lists! */ | |
22 #if 0 | |
23 # define SPELL_COMPRESS_ALLWAYS | |
24 #endif | |
25 | |
26 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a | |
27 * specific word. */ | |
28 #if 0 | |
29 # define DEBUG_TRIEWALK | |
30 #endif | |
31 | |
32 #define MAXWLEN 254 /* Assume max. word len is this many bytes. | |
33 Some places assume a word length fits in a | |
34 byte, thus it can't be above 255. | |
35 Must be >= PFD_NOTSPECIAL. */ | |
36 | |
37 /* Type used for indexes in the word tree need to be at least 4 bytes. If int | |
38 * is 8 bytes we could use something smaller, but what? */ | |
39 #if VIM_SIZEOF_INT > 3 | |
40 typedef int idx_T; | |
41 #else | |
42 typedef long idx_T; | |
43 #endif | |
44 | |
45 #ifdef FEAT_MBYTE | |
46 typedef int salfirst_T; | |
47 #else | |
48 typedef short salfirst_T; | |
49 #endif | |
50 | |
51 /* | |
52 * Structure used to store words and other info for one language, loaded from | |
53 * a .spl file. | |
54 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the | |
55 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. | |
56 * | |
57 * The "byts" array stores the possible bytes in each tree node, preceded by | |
58 * the number of possible bytes, sorted on byte value: | |
59 * <len> <byte1> <byte2> ... | |
60 * The "idxs" array stores the index of the child node corresponding to the | |
61 * byte in "byts". | |
62 * Exception: when the byte is zero, the word may end here and "idxs" holds | |
63 * the flags, region mask and affixID for the word. There may be several | |
64 * zeros in sequence for alternative flag/region/affixID combinations. | |
65 */ | |
66 typedef struct slang_S slang_T; | |
67 struct slang_S | |
68 { | |
69 slang_T *sl_next; /* next language */ | |
70 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ | |
71 char_u *sl_fname; /* name of .spl file */ | |
72 int sl_add; /* TRUE if it's a .add file. */ | |
73 | |
74 char_u *sl_fbyts; /* case-folded word bytes */ | |
75 idx_T *sl_fidxs; /* case-folded word indexes */ | |
76 char_u *sl_kbyts; /* keep-case word bytes */ | |
77 idx_T *sl_kidxs; /* keep-case word indexes */ | |
78 char_u *sl_pbyts; /* prefix tree word bytes */ | |
79 idx_T *sl_pidxs; /* prefix tree word indexes */ | |
80 | |
81 char_u *sl_info; /* infotext string or NULL */ | |
82 | |
83 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ | |
84 | |
85 char_u *sl_midword; /* MIDWORD string or NULL */ | |
86 | |
87 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ | |
88 | |
89 int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */ | |
90 int sl_compminlen; /* COMPOUNDMIN (default: 0) */ | |
91 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ | |
92 int sl_compoptions; /* COMP_* flags */ | |
93 garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */ | |
94 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm | |
95 * (NULL when no compounding) */ | |
96 char_u *sl_comprules; /* all COMPOUNDRULE concatenated (or NULL) */ | |
97 char_u *sl_compstartflags; /* flags for first compound word */ | |
98 char_u *sl_compallflags; /* all flags for compound words */ | |
99 char_u sl_nobreak; /* When TRUE: no spaces between words */ | |
100 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ | |
101 garray_T sl_syl_items; /* syllable items */ | |
102 | |
103 int sl_prefixcnt; /* number of items in "sl_prefprog" */ | |
104 regprog_T **sl_prefprog; /* table with regprogs for prefixes */ | |
105 | |
106 garray_T sl_rep; /* list of fromto_T entries from REP lines */ | |
107 short sl_rep_first[256]; /* indexes where byte first appears, -1 if | |
108 there is none */ | |
109 garray_T sl_sal; /* list of salitem_T entries from SAL lines */ | |
110 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if | |
111 there is none */ | |
112 int sl_followup; /* SAL followup */ | |
113 int sl_collapse; /* SAL collapse_result */ | |
114 int sl_rem_accents; /* SAL remove_accents */ | |
115 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: | |
116 * "sl_sal_first" maps chars, when has_mbyte | |
117 * "sl_sal" is a list of wide char lists. */ | |
118 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ | |
119 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ | |
120 int sl_nosplitsugs; /* don't suggest splitting a word */ | |
121 int sl_nocompoundsugs; /* don't suggest compounding */ | |
122 | |
123 /* Info from the .sug file. Loaded on demand. */ | |
124 time_t sl_sugtime; /* timestamp for .sug file */ | |
125 char_u *sl_sbyts; /* soundfolded word bytes */ | |
126 idx_T *sl_sidxs; /* soundfolded word indexes */ | |
127 buf_T *sl_sugbuf; /* buffer with word number table */ | |
128 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to | |
129 load */ | |
130 | |
131 int sl_has_map; /* TRUE if there is a MAP line */ | |
132 #ifdef FEAT_MBYTE | |
133 hashtab_T sl_map_hash; /* MAP for multi-byte chars */ | |
134 int sl_map_array[256]; /* MAP for first 256 chars */ | |
135 #else | |
136 char_u sl_map_array[256]; /* MAP for first 256 chars */ | |
137 #endif | |
138 hashtab_T sl_sounddone; /* table with soundfolded words that have | |
139 handled, see add_sound_suggest() */ | |
140 }; | |
141 | |
142 #ifdef VMS | |
143 # define SPL_FNAME_TMPL "%s_%s.spl" | |
144 # define SPL_FNAME_ADD "_add." | |
145 # define SPL_FNAME_ASCII "_ascii." | |
146 #else | |
147 # define SPL_FNAME_TMPL "%s.%s.spl" | |
148 # define SPL_FNAME_ADD ".add." | |
149 # define SPL_FNAME_ASCII ".ascii." | |
150 #endif | |
151 | |
152 /* Flags used for a word. Only the lowest byte can be used, the region byte | |
153 * comes above it. */ | |
154 #define WF_REGION 0x01 /* region byte follows */ | |
155 #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ | |
156 #define WF_ALLCAP 0x04 /* word must be all capitals */ | |
157 #define WF_RARE 0x08 /* rare word */ | |
158 #define WF_BANNED 0x10 /* bad word */ | |
159 #define WF_AFX 0x20 /* affix ID follows */ | |
160 #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ | |
161 #define WF_KEEPCAP 0x80 /* keep-case word */ | |
162 | |
163 /* for <flags2>, shifted up one byte to be used in wn_flags */ | |
164 #define WF_HAS_AFF 0x0100 /* word includes affix */ | |
165 #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ | |
166 #define WF_NOSUGGEST 0x0400 /* word not to be suggested */ | |
167 #define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */ | |
168 #define WF_NOCOMPBEF 0x1000 /* no compounding before this word */ | |
169 #define WF_NOCOMPAFT 0x2000 /* no compounding after this word */ | |
170 | |
171 /* flags for <pflags> */ | |
172 #define WFP_RARE 0x01 /* rare prefix */ | |
173 #define WFP_NC 0x02 /* prefix is not combining */ | |
174 #define WFP_UP 0x04 /* to-upper prefix */ | |
175 #define WFP_COMPPERMIT 0x08 /* prefix with COMPOUNDPERMITFLAG */ | |
176 #define WFP_COMPFORBID 0x10 /* prefix with COMPOUNDFORBIDFLAG */ | |
177 | |
178 /* Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one | |
179 * byte) and prefcondnr (two bytes). */ | |
180 #define WF_RAREPFX (WFP_RARE << 24) /* rare postponed prefix */ | |
181 #define WF_PFX_NC (WFP_NC << 24) /* non-combining postponed prefix */ | |
182 #define WF_PFX_UP (WFP_UP << 24) /* to-upper postponed prefix */ | |
183 #define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with | |
184 * COMPOUNDPERMITFLAG */ | |
185 #define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with | |
186 * COMPOUNDFORBIDFLAG */ | |
187 | |
188 /* flags for <compoptions> */ | |
189 #define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */ | |
190 #define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */ | |
191 #define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */ | |
192 #define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */ | |
193 | |
194 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, | |
195 * si_repsal, sl_rep, and si_sal. Not for sl_sal! | |
196 * One replacement: from "ft_from" to "ft_to". */ | |
197 typedef struct fromto_S | |
198 { | |
199 char_u *ft_from; | |
200 char_u *ft_to; | |
201 } fromto_T; | |
202 | |
203 /* Info from "SAL" entries in ".aff" file used in sl_sal. | |
204 * The info is split for quick processing by spell_soundfold(). | |
205 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ | |
206 typedef struct salitem_S | |
207 { | |
208 char_u *sm_lead; /* leading letters */ | |
209 int sm_leadlen; /* length of "sm_lead" */ | |
210 char_u *sm_oneof; /* letters from () or NULL */ | |
211 char_u *sm_rules; /* rules like ^, $, priority */ | |
212 char_u *sm_to; /* replacement. */ | |
213 #ifdef FEAT_MBYTE | |
214 int *sm_lead_w; /* wide character copy of "sm_lead" */ | |
215 int *sm_oneof_w; /* wide character copy of "sm_oneof" */ | |
216 int *sm_to_w; /* wide character copy of "sm_to" */ | |
217 #endif | |
218 } salitem_T; | |
219 | |
220 /* Values for SP_*ERROR are negative, positive values are used by | |
221 * read_cnt_string(). */ | |
222 #define SP_TRUNCERROR -1 /* spell file truncated error */ | |
223 #define SP_FORMERROR -2 /* format error in spell file */ | |
224 #define SP_OTHERERROR -3 /* other error while reading spell file */ | |
225 | |
226 /* | |
227 * Structure used in "b_langp", filled from 'spelllang'. | |
228 */ | |
229 typedef struct langp_S | |
230 { | |
231 slang_T *lp_slang; /* info for this language */ | |
232 slang_T *lp_sallang; /* language used for sound folding or NULL */ | |
233 slang_T *lp_replang; /* language used for REP items or NULL */ | |
234 int lp_region; /* bitmask for region or REGION_ALL */ | |
235 } langp_T; | |
236 | |
237 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) | |
238 | |
239 #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ | |
240 #define VIMSUGMAGICL 6 | |
241 #define VIMSUGVERSION 1 | |
242 | |
243 /* | |
244 * The tables used for recognizing word characters according to spelling. | |
245 * These are only used for the first 256 characters of 'encoding'. | |
246 */ | |
247 typedef struct spelltab_S | |
248 { | |
249 char_u st_isw[256]; /* flags: is word char */ | |
250 char_u st_isu[256]; /* flags: is uppercase char */ | |
251 char_u st_fold[256]; /* chars: folded case */ | |
252 char_u st_upper[256]; /* chars: upper case */ | |
253 } spelltab_T; | |
254 | |
255 /* | |
256 * Use our own character-case definitions, because the current locale may | |
257 * differ from what the .spl file uses. | |
258 * These must not be called with negative number! | |
259 */ | |
260 #ifndef FEAT_MBYTE | |
261 /* Non-multi-byte implementation. */ | |
262 # define SPELL_TOFOLD(c) ((c) < 256 ? (int)spelltab.st_fold[c] : (c)) | |
263 # define SPELL_TOUPPER(c) ((c) < 256 ? (int)spelltab.st_upper[c] : (c)) | |
264 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) | |
265 #else | |
266 # if defined(HAVE_WCHAR_H) | |
267 # include <wchar.h> /* for towupper() and towlower() */ | |
268 # endif | |
269 /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do | |
270 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use | |
271 * the "w" library function for characters above 255 if available. */ | |
272 # ifdef HAVE_TOWLOWER | |
273 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
274 : (c) < 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) | |
275 # else | |
276 # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | |
277 : (c) < 256 ? (int)spelltab.st_fold[c] : (c)) | |
278 # endif | |
279 | |
280 # ifdef HAVE_TOWUPPER | |
281 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
282 : (c) < 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) | |
283 # else | |
284 # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | |
285 : (c) < 256 ? (int)spelltab.st_upper[c] : (c)) | |
286 # endif | |
287 | |
288 # ifdef HAVE_ISWUPPER | |
289 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
290 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) | |
291 # else | |
292 # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | |
293 : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) | |
294 # endif | |
295 #endif | |
296 | |
297 #ifdef FEAT_SPELL | |
298 /* First language that is loaded, start of the linked list of loaded | |
299 * languages. */ | |
300 # ifdef IN_SPELL_C | |
301 # define SPELL_EXTERN | |
302 # define SPELL_INIT(x) x | |
303 # else | |
304 # define SPELL_EXTERN extern | |
305 # define SPELL_INIT(x) | |
306 # endif | |
307 | |
308 SPELL_EXTERN slang_T *first_lang SPELL_INIT(= NULL); | |
309 | |
310 /* file used for "zG" and "zW" */ | |
311 SPELL_EXTERN char_u *int_wordlist SPELL_INIT(= NULL); | |
312 | |
313 | |
314 SPELL_EXTERN char e_format[] SPELL_INIT(= N_("E759: Format error in spell file")); | |
315 | |
316 SPELL_EXTERN spelltab_T spelltab; | |
317 SPELL_EXTERN int did_set_spelltab; | |
318 | |
319 #endif |