Mercurial > vim
comparison src/spell.c @ 249:f146656fb903
updated for version 7.0069
author | vimboss |
---|---|
date | Wed, 20 Apr 2005 19:37:22 +0000 |
parents | 8ff168d3720a |
children | c8742c8da9ab |
comparison
equal
deleted
inserted
replaced
248:f2d46e4a859d | 249:f146656fb903 |
---|---|
201 #define BWF_ADDS 0x0100 /* there are additions */ | 201 #define BWF_ADDS 0x0100 /* there are additions */ |
202 #define BWF_PREFIX 0x0200 /* has prefix NR list */ | 202 #define BWF_PREFIX 0x0200 /* has prefix NR list */ |
203 #define BWF_ALLCAP 0x0400 /* all letters must be capital (not used | 203 #define BWF_ALLCAP 0x0400 /* all letters must be capital (not used |
204 for single-letter words) */ | 204 for single-letter words) */ |
205 #define BWF_KEEPCAP 0x0800 /* Keep case as-is */ | 205 #define BWF_KEEPCAP 0x0800 /* Keep case as-is */ |
206 #define BWF_ADDS_M 0x1000 /* there are more than 255 additions */ | |
206 | 207 |
207 #define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */ | 208 #define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */ |
208 | 209 |
209 #define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without | 210 #define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without |
210 any word character */ | 211 any word character */ |
211 | 212 |
212 /* flags used for addition in the spell file */ | 213 /* flags used for addition in the spell file */ |
213 #define ADD_REGION 0x02 /* region byte follows */ | 214 #define ADD_REGION 0x02 /* region byte follows */ |
214 #define ADD_ONECAP 0x04 /* first letter must be capital */ | 215 #define ADD_ONECAP 0x04 /* first letter must be capital */ |
216 #define ADD_LEADLEN 0x10 /* there is a leadlen byte */ | |
217 #define ADD_COPYLEN 0x20 /* there is a copylen byte */ | |
215 #define ADD_ALLCAP 0x40 /* all letters must be capital (not used | 218 #define ADD_ALLCAP 0x40 /* all letters must be capital (not used |
216 for single-letter words) */ | 219 for single-letter words) */ |
217 #define ADD_KEEPCAP 0x80 /* fixed case */ | 220 #define ADD_KEEPCAP 0x80 /* fixed case */ |
218 | 221 |
219 /* Translate ADD_ flags to BWF_ flags. | 222 /* Translate ADD_ flags to BWF_ flags. |
220 * (Needed to keep ADD_ flags in one byte.) */ | 223 * (Needed to keep ADD_ flags in one byte.) */ |
221 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) | 224 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) |
222 | 225 |
223 #define VIMSPELLMAGIC "VIMspell02" /* string at start of Vim spell file */ | 226 #define VIMSPELLMAGIC "VIMspell03" /* string at start of Vim spell file */ |
224 #define VIMSPELLMAGICL 10 | 227 #define VIMSPELLMAGICL 10 |
225 | 228 |
226 /* | 229 /* |
227 * Structure to store info for word matching. | 230 * Structure to store info for word matching. |
228 */ | 231 */ |
1162 int | 1165 int |
1163 spell_move_to(dir, allwords) | 1166 spell_move_to(dir, allwords) |
1164 int dir; /* FORWARD or BACKWARD */ | 1167 int dir; /* FORWARD or BACKWARD */ |
1165 int allwords; /* TRUE for "[s" and "]s" */ | 1168 int allwords; /* TRUE for "[s" and "]s" */ |
1166 { | 1169 { |
1167 pos_T pos; | 1170 linenr_T lnum; |
1171 pos_T found_pos; | |
1168 char_u *line; | 1172 char_u *line; |
1169 char_u *p; | 1173 char_u *p; |
1170 int wc; | 1174 int wc; |
1171 int nwc; | 1175 int nwc; |
1172 int attr = 0; | 1176 int attr = 0; |
1173 int len; | 1177 int len; |
1178 int has_syntax = syntax_present(curbuf); | |
1179 int col; | |
1180 int can_spell; | |
1174 | 1181 |
1175 if (!curwin->w_p_spell || *curwin->w_buffer->b_p_spl == NUL) | 1182 if (!curwin->w_p_spell || *curwin->w_buffer->b_p_spl == NUL) |
1176 { | 1183 { |
1177 EMSG(_("E756: Spell checking not enabled")); | 1184 EMSG(_("E756: Spell checking not enabled")); |
1178 return FAIL; | 1185 return FAIL; |
1179 } | 1186 } |
1180 | 1187 |
1181 /* TODO: moving backwards */ | 1188 /* |
1182 | 1189 * Start looking for bad word at the start of the line, because we can't |
1183 /* Start looking for bad word at the start of the line, because we can't | 1190 * start halfway a word, we don't know where it starts or ends. |
1184 * start halfway a word and know where it ends. */ | 1191 * |
1185 pos = curwin->w_cursor; | 1192 * When searching backwards, we continue in the line to find the last |
1186 pos.col = 0; | 1193 * bad word (in the cursor line: before the cursor). |
1187 wc = FALSE; | 1194 */ |
1195 lnum = curwin->w_cursor.lnum; | |
1196 found_pos.lnum = 0; | |
1188 | 1197 |
1189 while (!got_int) | 1198 while (!got_int) |
1190 { | 1199 { |
1191 line = ml_get(pos.lnum); | 1200 line = ml_get(lnum); |
1192 p = line + pos.col; | 1201 p = line; |
1202 wc = FALSE; | |
1203 | |
1193 while (*p != NUL) | 1204 while (*p != NUL) |
1194 { | 1205 { |
1195 nwc = spell_iswordc(p); | 1206 nwc = spell_iswordc(p); |
1196 if (!wc && nwc) | 1207 if (!wc && nwc) |
1197 { | 1208 { |
1209 /* When searching backward don't search after the cursor. */ | |
1210 if (dir == BACKWARD | |
1211 && lnum == curwin->w_cursor.lnum | |
1212 && (colnr_T)(p - line) >= curwin->w_cursor.col) | |
1213 break; | |
1214 | |
1198 /* start of word */ | 1215 /* start of word */ |
1199 /* TODO: check for bad word attr */ | |
1200 len = spell_check(curwin, line, p, &attr); | 1216 len = spell_check(curwin, line, p, &attr); |
1217 | |
1201 if (attr != 0) | 1218 if (attr != 0) |
1202 { | 1219 { |
1203 if (curwin->w_cursor.lnum < pos.lnum | 1220 /* We found a bad word. Check the attribute. */ |
1204 || (curwin->w_cursor.lnum == pos.lnum | 1221 /* TODO: check for syntax @Spell cluster. */ |
1205 && curwin->w_cursor.col < (colnr_T)(p - line))) | 1222 if (allwords || attr == highlight_attr[HLF_SPB]) |
1206 { | 1223 { |
1207 curwin->w_cursor.lnum = pos.lnum; | 1224 /* When searching forward only accept a bad word after |
1208 curwin->w_cursor.col = p - line; | 1225 * the cursor. */ |
1209 return OK; | 1226 if (dir == BACKWARD |
1227 || lnum > curwin->w_cursor.lnum | |
1228 || (lnum == curwin->w_cursor.lnum | |
1229 && (colnr_T)(p - line) | |
1230 > curwin->w_cursor.col)) | |
1231 { | |
1232 if (has_syntax) | |
1233 { | |
1234 col = p - line; | |
1235 (void)syn_get_id(lnum, (colnr_T)col, | |
1236 FALSE, &can_spell); | |
1237 | |
1238 /* have to get the line again, a multi-line | |
1239 * regexp may make it invalid */ | |
1240 line = ml_get(lnum); | |
1241 p = line + col; | |
1242 } | |
1243 else | |
1244 can_spell = TRUE; | |
1245 | |
1246 if (can_spell) | |
1247 { | |
1248 found_pos.lnum = lnum; | |
1249 found_pos.col = p - line; | |
1250 #ifdef FEAT_VIRTUALEDIT | |
1251 found_pos.coladd = 0; | |
1252 #endif | |
1253 if (dir == FORWARD) | |
1254 { | |
1255 /* No need to search further. */ | |
1256 curwin->w_cursor = found_pos; | |
1257 return OK; | |
1258 } | |
1259 } | |
1260 } | |
1210 } | 1261 } |
1211 attr = 0; /* bad word is before or at cursor */ | 1262 attr = 0; |
1212 } | 1263 } |
1213 p += len; | 1264 p += len; |
1214 if (*p == NUL) | 1265 if (*p == NUL) |
1215 break; | 1266 break; |
1216 nwc = FALSE; | 1267 nwc = FALSE; |
1220 mb_ptr_adv(p); | 1271 mb_ptr_adv(p); |
1221 wc = nwc; | 1272 wc = nwc; |
1222 } | 1273 } |
1223 | 1274 |
1224 /* Advance to next line. */ | 1275 /* Advance to next line. */ |
1225 if (pos.lnum == curbuf->b_ml.ml_line_count) | 1276 if (dir == BACKWARD) |
1226 return FAIL; | 1277 { |
1227 ++pos.lnum; | 1278 if (found_pos.lnum != 0) |
1228 pos.col = 0; | 1279 { |
1229 wc = FALSE; | 1280 /* Use the last match in the line. */ |
1281 curwin->w_cursor = found_pos; | |
1282 return OK; | |
1283 } | |
1284 if (lnum == 1) | |
1285 return FAIL; | |
1286 --lnum; | |
1287 } | |
1288 else | |
1289 { | |
1290 if (lnum == curbuf->b_ml.ml_line_count) | |
1291 return FAIL; | |
1292 ++lnum; | |
1293 } | |
1230 | 1294 |
1231 line_breakcheck(); | 1295 line_breakcheck(); |
1232 } | 1296 } |
1233 | 1297 |
1234 return FAIL; /* interrupted */ | 1298 return FAIL; /* interrupted */ |
1771 fw->fw_region = REGION_ALL; | 1835 fw->fw_region = REGION_ALL; |
1772 | 1836 |
1773 fw->fw_adds = NULL; | 1837 fw->fw_adds = NULL; |
1774 if (flags & BWF_ADDS) | 1838 if (flags & BWF_ADDS) |
1775 { | 1839 { |
1776 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ | 1840 if (flags & BWF_ADDS_M) |
1841 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ | |
1842 else | |
1843 adds = getc(fd); /* <addcnt> */ | |
1777 | 1844 |
1778 if (adds > 30) | 1845 if (adds > 30) |
1779 { | 1846 { |
1780 /* Use a hashtable to loopup the part until the next word end. | 1847 /* Use a hashtable to loopup the part until the next word end. |
1781 * This uses more memory and involves some overhead, thus only | 1848 * This uses more memory and involves some overhead, thus only |
1793 else | 1860 else |
1794 ht = NULL; | 1861 ht = NULL; |
1795 | 1862 |
1796 while (--adds >= 0) | 1863 while (--adds >= 0) |
1797 { | 1864 { |
1798 /* <add>: <addflags> <addlen> [<leadlen> <addstring>] | 1865 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] |
1799 * [<region>] */ | 1866 * [<addstring>] [<region>] */ |
1800 flags = getc(fd); /* <addflags> */ | 1867 flags = getc(fd); /* <addflags> */ |
1801 addlen = getc(fd); /* <addlen> */ | 1868 addlen = getc(fd); /* <addlen> */ |
1802 if (addlen == EOF) | 1869 if (addlen == EOF) |
1803 goto truncerr; | 1870 goto truncerr; |
1804 if (addlen >= MAXWLEN) | 1871 if (addlen >= MAXWLEN) |
1805 goto formerr; | 1872 goto formerr; |
1806 | 1873 |
1874 if (flags & ADD_LEADLEN) | |
1875 leadlen = getc(fd); /* <leadlen> */ | |
1876 else | |
1877 leadlen = 0; | |
1878 | |
1807 if (addlen > 0) | 1879 if (addlen > 0) |
1808 { | 1880 { |
1809 leadlen = getc(fd); /* <leadlen> */ | 1881 if (flags & ADD_COPYLEN) |
1810 for (i = 0; i < addlen; ++i) /* <addstring> */ | 1882 i = getc(fd); /* <copylen> */ |
1883 else | |
1884 i = 0; | |
1885 for ( ; i < addlen; ++i) /* <addstring> */ | |
1811 cbuf[i] = getc(fd); | 1886 cbuf[i] = getc(fd); |
1812 cbuf[i] = NUL; | 1887 cbuf[i] = NUL; |
1813 } | 1888 } |
1814 else | |
1815 leadlen = 0; | |
1816 | 1889 |
1817 if (flags & ADD_KEEPCAP) | 1890 if (flags & ADD_KEEPCAP) |
1818 { | 1891 { |
1819 /* <addstring> is in original case, need to get | 1892 /* <addstring> is in original case, need to get |
1820 * case-folded word too. */ | 1893 * case-folded word too. */ |
2290 } affhash_T; | 2363 } affhash_T; |
2291 | 2364 |
2292 static affhash_T dumas; | 2365 static affhash_T dumas; |
2293 #define HI2AS(hi) ((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas))) | 2366 #define HI2AS(hi) ((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas))) |
2294 | 2367 |
2368 /* info for writing the spell file */ | |
2369 typedef struct winfo_S | |
2370 { | |
2371 FILE *wif_fd; | |
2372 basicword_T *wif_prevbw; /* last written basic word */ | |
2373 int wif_regionmask; /* regions supported */ | |
2374 int wif_prefm; /* 1 or 2 bytes used for prefix NR */ | |
2375 int wif_suffm; /* 1 or 2 bytes used for suffix NR */ | |
2376 long wif_wcount; /* written word count */ | |
2377 long wif_acount; /* written addition count */ | |
2378 long wif_addmax; /* max number of additions on one word */ | |
2379 char_u *wif_addmaxw; /* word with max additions */ | |
2380 } winfo_T; | |
2381 | |
2295 | 2382 |
2296 static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii)); | 2383 static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii)); |
2297 static void spell_free_aff __ARGS((afffile_T *aff)); | 2384 static void spell_free_aff __ARGS((afffile_T *aff)); |
2298 static int has_non_ascii __ARGS((char_u *s)); | 2385 static int has_non_ascii __ARGS((char_u *s)); |
2299 static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii)); | 2386 static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii)); |
2311 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); | 2398 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); |
2312 static void put_bytes __ARGS((FILE *fd, long_u nr, int len)); | 2399 static void put_bytes __ARGS((FILE *fd, long_u nr, int len)); |
2313 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); | 2400 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); |
2314 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); | 2401 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); |
2315 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); | 2402 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); |
2316 static void write_bword __ARGS((FILE *fd, basicword_T *bw, int lowcap, basicword_T **prevbw, int regionmask, int prefm, int suffm)); | 2403 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap)); |
2317 static void free_wordtable __ARGS((hashtab_T *ht)); | 2404 static void free_wordtable __ARGS((hashtab_T *ht)); |
2318 static void free_basicword __ARGS((basicword_T *bw)); | 2405 static void free_basicword __ARGS((basicword_T *bw)); |
2319 static void free_affixentries __ARGS((affentry_T *first)); | 2406 static void free_affixentries __ARGS((affentry_T *first)); |
2320 static void free_affix_entry __ARGS((affentry_T *ap)); | 2407 static void free_affix_entry __ARGS((affentry_T *ap)); |
2321 | 2408 |
4017 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> | 4104 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> |
4018 * <SUGGEST> <WORDLIST> | 4105 * <SUGGEST> <WORDLIST> |
4019 * | 4106 * |
4020 * <HEADER>: <fileID> <regioncnt> <regionname> ... | 4107 * <HEADER>: <fileID> <regioncnt> <regionname> ... |
4021 * | 4108 * |
4022 * <fileID> 10 bytes "VIMspell02" | 4109 * <fileID> 10 bytes "VIMspell03" |
4023 * <regioncnt> 1 byte number of regions following (8 supported) | 4110 * <regioncnt> 1 byte number of regions following (8 supported) |
4024 * <regionname> 2 bytes Region name: ca, au, etc. | 4111 * <regionname> 2 bytes Region name: ca, au, etc. |
4025 * First <regionname> is region 1. | 4112 * First <regionname> is region 1. |
4026 * | 4113 * |
4027 * | 4114 * |
4083 * BWF_ADDS | 4170 * BWF_ADDS |
4084 * 0x02: has prefixes, <affixcnt> and <affixNR> follow | 4171 * 0x02: has prefixes, <affixcnt> and <affixNR> follow |
4085 * BWF_PREFIX | 4172 * BWF_PREFIX |
4086 * 0x04: all letters must be upper-case, BWF_ALLCAP | 4173 * 0x04: all letters must be upper-case, BWF_ALLCAP |
4087 * 0x08: case must match, BWF_KEEPCAP | 4174 * 0x08: case must match, BWF_KEEPCAP |
4175 * 0x10: has more than 255 additions, <addcnt> is two | |
4176 * bytes, BWF_ADDS_M | |
4088 * 0x10-0x80: unset | 4177 * 0x10-0x80: unset |
4089 * <caselen> 1 byte Length of <caseword>. | 4178 * <caselen> 1 byte Length of <caseword>. |
4090 * <caseword> N bytes Word with matching case. | 4179 * <caseword> N bytes Word with matching case. |
4091 * <affixcnt> 1 byte Number of affix NRs following. | 4180 * <affixcnt> 1 byte Number of affix NRs following. |
4092 * <affixNR> 1 or 2 byte Number of possible affix for this word. | 4181 * <affixNR> 1 or 2 byte Number of possible affix for this word. |
4093 * When using 2 bytes MSB comes first. | 4182 * When using 2 bytes MSB comes first. |
4094 * <region> 1 byte Bitmask for regions in which word is valid. When | 4183 * <region> 1 byte Bitmask for regions in which word is valid. When |
4095 * omitted it's valid in all regions. | 4184 * omitted it's valid in all regions. |
4096 * Lowest bit is for region 1. | 4185 * Lowest bit is for region 1. |
4097 * <addcnt> 2 bytes Number of <add> items following. | 4186 * <addcnt> 1 or 2 byte Number of <add> items following. |
4098 * | 4187 * |
4099 * <add>: <addflags> <addlen> [<leadlen> <addstring>] [<region>] | 4188 * <add>: <addflags> <addlen> [<leadlen>] [<copylen>] [<addstring>] [<region>] |
4100 * | 4189 * |
4101 * <addflags> 1 byte 0x01: unset | 4190 * <addflags> 1 byte 0x01: unset |
4102 * 0x02: has region byte, ADD_REGION | 4191 * 0x02: has region byte, ADD_REGION |
4103 * 0x04: first letter must be upper-case, ADD_ONECAP | 4192 * 0x04: first letter must be upper-case, ADD_ONECAP |
4104 * 0x08-0x20: unset | 4193 * 0x08: unset |
4194 * 0x10: has a <leadlen>, ADD_LEADLEN | |
4195 * 0x20: has a <copylen>, ADD_COPYLEN | |
4105 * 0x40: all letters must be upper-case, ADD_ALLCAP | 4196 * 0x40: all letters must be upper-case, ADD_ALLCAP |
4106 * 0x80: fixed case, <addstring> is the whole word | 4197 * 0x80: fixed case, <addstring> is the whole word |
4107 * with matching case, ADD_KEEPCAP. | 4198 * with matching case, ADD_KEEPCAP. |
4108 * <addlen> 1 byte Length of <addstring> in bytes. | 4199 * <addlen> 1 byte Length of <addstring> in bytes. |
4109 * <leadlen> 1 byte Number of bytes at start of <addstring> that must | 4200 * <leadlen> 1 byte Number of bytes at start of <addstring> that must |
4110 * come before the start of the basic word. | 4201 * come before the start of the basic word. |
4202 * <copylen> 1 byte Number of bytes copied from previous <addstring>. | |
4111 * <addstring> N bytes Word characters, before/in/after the word. | 4203 * <addstring> N bytes Word characters, before/in/after the word. |
4112 * | 4204 * |
4113 * All text characters are in 'encoding': <affchop>, <affadd>, <string>, | 4205 * All text characters are in 'encoding': <affchop>, <affadd>, <string>, |
4114 * <caseword>> and <addstring>. | 4206 * <caseword>> and <addstring>. |
4115 * All other fields are ASCII: <regionname> | 4207 * All other fields are ASCII: <regionname> |
4126 garray_T *suffga; /* suffixes, affheader_T entries */ | 4218 garray_T *suffga; /* suffixes, affheader_T entries */ |
4127 hashtab_T *newwords; /* basic words, basicword_T entries */ | 4219 hashtab_T *newwords; /* basic words, basicword_T entries */ |
4128 int regcount; /* number of regions */ | 4220 int regcount; /* number of regions */ |
4129 char_u *regchars; /* region names */ | 4221 char_u *regchars; /* region names */ |
4130 { | 4222 { |
4131 FILE *fd; | 4223 winfo_T wif; |
4132 garray_T *gap; | 4224 garray_T *gap; |
4133 hashitem_T *hi; | 4225 hashitem_T *hi; |
4134 char_u **wtab; | 4226 char_u **wtab; |
4135 int todo; | 4227 int todo; |
4136 int flags, aflags; | 4228 int flags, aflags; |
4137 basicword_T *bw, *bwf, *bw2 = NULL, *prevbw = NULL; | 4229 basicword_T *bw, *bwf, *bw2 = NULL; |
4138 int regionmask; /* mask for all relevant region bits */ | |
4139 int i; | 4230 int i; |
4140 int cnt; | 4231 int cnt; |
4141 affentry_T *ae; | 4232 affentry_T *ae; |
4142 int round; | 4233 int round; |
4143 int prefm, suffm; | |
4144 garray_T bwga; | 4234 garray_T bwga; |
4145 | 4235 |
4146 fd = fopen((char *)fname, "w"); | 4236 vim_memset(&wif, 0, sizeof(winfo_T)); |
4147 if (fd == NULL) | 4237 |
4238 wif.wif_fd = fopen((char *)fname, "w"); | |
4239 if (wif.wif_fd == NULL) | |
4148 { | 4240 { |
4149 EMSG2(_(e_notopen), fname); | 4241 EMSG2(_(e_notopen), fname); |
4150 return; | 4242 return; |
4151 } | 4243 } |
4152 | 4244 |
4153 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); | 4245 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd); |
4154 | 4246 |
4155 /* write the region names if there is more than one */ | 4247 /* write the region names if there is more than one */ |
4156 if (regcount > 1) | 4248 if (regcount > 1) |
4157 { | 4249 { |
4158 putc(regcount, fd); | 4250 putc(regcount, wif.wif_fd); |
4159 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, fd); | 4251 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd); |
4160 regionmask = (1 << regcount) - 1; | 4252 wif.wif_regionmask = (1 << regcount) - 1; |
4161 } | 4253 } |
4162 else | 4254 else |
4163 { | 4255 { |
4164 putc(0, fd); | 4256 putc(0, wif.wif_fd); |
4165 regionmask = 0; | 4257 wif.wif_regionmask = 0; |
4166 } | 4258 } |
4167 | 4259 |
4168 /* Write the prefix and suffix lists. */ | 4260 /* Write the prefix and suffix lists. */ |
4169 for (round = 1; round <= 2; ++round) | 4261 for (round = 1; round <= 2; ++round) |
4170 { | 4262 { |
4171 gap = round == 1 ? prefga : suffga; | 4263 gap = round == 1 ? prefga : suffga; |
4172 put_bytes(fd, (long_u)gap->ga_len, 2); /* <affcount> */ | 4264 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */ |
4173 | 4265 |
4174 /* Count the total number of affix items. */ | 4266 /* Count the total number of affix items. */ |
4175 cnt = 0; | 4267 cnt = 0; |
4176 for (i = 0; i < gap->ga_len; ++i) | 4268 for (i = 0; i < gap->ga_len; ++i) |
4177 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first; | 4269 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first; |
4178 ae != NULL; ae = ae->ae_next) | 4270 ae != NULL; ae = ae->ae_next) |
4179 ++cnt; | 4271 ++cnt; |
4180 put_bytes(fd, (long_u)cnt, 2); /* <afftotcnt> */ | 4272 put_bytes(wif.wif_fd, (long_u)cnt, 2); /* <afftotcnt> */ |
4181 | 4273 |
4182 for (i = 0; i < gap->ga_len; ++i) | 4274 for (i = 0; i < gap->ga_len; ++i) |
4183 write_affix(fd, (affheader_T *)gap->ga_data + i); | 4275 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i); |
4184 } | 4276 } |
4185 | 4277 |
4186 /* Number of bytes used for affix NR depends on affix count. */ | 4278 /* Number of bytes used for affix NR depends on affix count. */ |
4187 prefm = (prefga->ga_len > 256) ? 2 : 1; | 4279 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1; |
4188 suffm = (suffga->ga_len > 256) ? 2 : 1; | 4280 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1; |
4189 | 4281 |
4190 /* Write the suggest info. TODO */ | 4282 /* Write the suggest info. TODO */ |
4191 put_bytes(fd, 0L, 4); | 4283 put_bytes(wif.wif_fd, 0L, 4); |
4192 | 4284 |
4193 /* | 4285 /* |
4194 * Write the word list. <wordcount> <worditem> ... | 4286 * Write the word list. <wordcount> <worditem> ... |
4195 */ | 4287 */ |
4196 /* number of basic words in 4 bytes */ | 4288 /* number of basic words in 4 bytes */ |
4197 put_bytes(fd, newwords->ht_used, 4); /* <wordcount> */ | 4289 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */ |
4198 | 4290 |
4199 /* | 4291 /* |
4200 * Sort the word list, so that we can reuse as many bytes as possible. | 4292 * Sort the word list, so that we can copy as many bytes as possible from |
4293 * the previous word. | |
4201 */ | 4294 */ |
4202 wtab = (char_u **)alloc((unsigned)(sizeof(char_u *) * newwords->ht_used)); | 4295 wtab = (char_u **)alloc((unsigned)(sizeof(char_u *) * newwords->ht_used)); |
4203 if (wtab != NULL) | 4296 if (wtab != NULL) |
4204 { | 4297 { |
4205 /* Make a table with pointers to each word. */ | 4298 /* Make a table with pointers to each word. */ |
4277 | 4370 |
4278 /* Write first basic word. If it's KEEPCAP then we need a word | 4371 /* Write first basic word. If it's KEEPCAP then we need a word |
4279 * without VALID flag first (makes it easier to read the list back | 4372 * without VALID flag first (makes it easier to read the list back |
4280 * in). */ | 4373 * in). */ |
4281 if (bw->bw_flags & BWF_KEEPCAP) | 4374 if (bw->bw_flags & BWF_KEEPCAP) |
4282 write_bword(fd, bw, TRUE, &prevbw, regionmask, prefm, suffm); | 4375 write_bword(&wif, bw, TRUE); |
4283 write_bword(fd, bw, FALSE, &prevbw, regionmask, prefm, suffm); | 4376 write_bword(&wif, bw, FALSE); |
4284 | 4377 |
4285 /* Write other basic words, with different caps. */ | 4378 /* Write other basic words, with different caps. */ |
4286 for (i = 0; i < bwga.ga_len; ++i) | 4379 for (i = 0; i < bwga.ga_len; ++i) |
4287 { | 4380 { |
4288 bw2 = ((basicword_T **)bwga.ga_data)[i]; | 4381 bw2 = ((basicword_T **)bwga.ga_data)[i]; |
4289 if (bw2 != bw) | 4382 if (bw2 != bw) |
4290 write_bword(fd, bw2, FALSE, &prevbw, regionmask, | 4383 write_bword(&wif, bw2, FALSE); |
4291 prefm, suffm); | |
4292 } | 4384 } |
4293 } | 4385 } |
4294 | 4386 |
4295 ga_clear(&bwga); | 4387 ga_clear(&bwga); |
4296 } | 4388 } |
4297 | 4389 |
4298 fclose(fd); | 4390 fclose(wif.wif_fd); |
4391 | |
4392 /* Print a few statistics. */ | |
4393 if (wif.wif_addmaxw == NULL) | |
4394 wif.wif_addmaxw = (char_u *)""; | |
4395 smsg((char_u *)_("Maximum number of adds on a word: %ld (%s)"), | |
4396 wif.wif_addmax, wif.wif_addmaxw); | |
4397 smsg((char_u *)_("Average number of adds on a word: %f"), | |
4398 (float)wif.wif_acount / (float)wif.wif_wcount); | |
4399 } | |
4400 | |
4401 /* | |
4402 * Compare two basic words for their <addstring>. | |
4403 */ | |
4404 static int | |
4405 #ifdef __BORLANDC__ | |
4406 _RTLENTRYF | |
4407 #endif | |
4408 bw_compare __ARGS((const void *s1, const void *s2)); | |
4409 | |
4410 static int | |
4411 #ifdef __BORLANDC__ | |
4412 _RTLENTRYF | |
4413 #endif | |
4414 bw_compare(s1, s2) | |
4415 const void *s1; | |
4416 const void *s2; | |
4417 { | |
4418 basicword_T *bw1 = *(basicword_T **)s1; | |
4419 basicword_T *bw2 = *(basicword_T **)s2; | |
4420 int i = 0; | |
4421 | |
4422 /* compare the leadstrings */ | |
4423 if (bw1->bw_leadstring == NULL) | |
4424 { | |
4425 if (bw2->bw_leadstring != NULL) | |
4426 return 1; | |
4427 } | |
4428 else if (bw2->bw_leadstring == NULL) | |
4429 return -1; | |
4430 else | |
4431 i = STRCMP(bw1->bw_leadstring, bw2->bw_leadstring); | |
4432 | |
4433 if (i == 0) | |
4434 { | |
4435 /* leadstrings are identical, compare the addstrings */ | |
4436 if (bw1->bw_addstring == NULL) | |
4437 { | |
4438 if (bw2->bw_addstring != NULL) | |
4439 return 1; | |
4440 } | |
4441 else if (bw2->bw_addstring == NULL) | |
4442 return -1; | |
4443 else | |
4444 i = STRCMP(bw1->bw_addstring, bw2->bw_addstring); | |
4445 } | |
4446 return i; | |
4299 } | 4447 } |
4300 | 4448 |
4301 /* | 4449 /* |
4302 * Write basic word, followed by any additions. | 4450 * Write basic word, followed by any additions. |
4303 * | 4451 * |
4307 * [<affixcnt> <affixNR> ...] (suffixes) | 4455 * [<affixcnt> <affixNR> ...] (suffixes) |
4308 * [<region>] | 4456 * [<region>] |
4309 * [<addcnt> <add> ...] | 4457 * [<addcnt> <add> ...] |
4310 */ | 4458 */ |
4311 static void | 4459 static void |
4312 write_bword(fd, bwf, lowcap, prevbw, regionmask, prefm, suffm) | 4460 write_bword(wif, bwf, lowcap) |
4313 FILE *fd; | 4461 winfo_T *wif; /* info for writing */ |
4314 basicword_T *bwf; | 4462 basicword_T *bwf; |
4315 int lowcap; /* write KEEPKAP word as not-valid */ | 4463 int lowcap; /* write KEEPKAP word as not-valid */ |
4316 basicword_T **prevbw; /* last written basic word */ | 4464 { |
4317 int regionmask; /* mask that includes all possible regions */ | 4465 FILE *fd = wif->wif_fd; |
4318 int prefm; | |
4319 int suffm; | |
4320 { | |
4321 int flags; | 4466 int flags; |
4322 int aflags; | 4467 int aflags; |
4323 int len; | 4468 int len; |
4324 int leadlen, addlen; | 4469 int leadlen, addlen; |
4470 int copylen; | |
4325 int clen; | 4471 int clen; |
4326 int adds = 0; | 4472 int adds = 0; |
4327 int i; | 4473 int i; |
4474 int idx; | |
4328 basicword_T *bw, *bw2; | 4475 basicword_T *bw, *bw2; |
4476 basicword_T **wtab; | |
4477 int count; | |
4478 int l; | |
4329 | 4479 |
4330 /* Check how many bytes can be copied from the previous word. */ | 4480 /* Check how many bytes can be copied from the previous word. */ |
4331 len = STRLEN(bwf->bw_word); | 4481 len = STRLEN(bwf->bw_word); |
4332 if (*prevbw == NULL) | 4482 if (wif->wif_prevbw == NULL) |
4333 clen = 0; | 4483 clen = 0; |
4334 else | 4484 else |
4335 for (clen = 0; clen < len | 4485 for (clen = 0; clen < len |
4336 && (*prevbw)->bw_word[clen] == bwf->bw_word[clen]; ++clen) | 4486 && wif->wif_prevbw->bw_word[clen] == bwf->bw_word[clen]; ++clen) |
4337 ; | 4487 ; |
4338 putc(clen, fd); /* <nr> */ | 4488 putc(clen, fd); /* <nr> */ |
4339 *prevbw = bwf; | 4489 wif->wif_prevbw = bwf; |
4340 /* <string> */ | 4490 /* <string> */ |
4341 if (len > clen) | 4491 if (len > clen) |
4342 fwrite(bwf->bw_word + clen, (size_t)(len - clen), (size_t)1, fd); | 4492 fwrite(bwf->bw_word + clen, (size_t)(len - clen), (size_t)1, fd); |
4343 | 4493 |
4344 /* Try to find a word without additions to use first. */ | 4494 /* Try to find a word without additions to use first. */ |
4358 { | 4508 { |
4359 flags |= BWF_VALID; | 4509 flags |= BWF_VALID; |
4360 | 4510 |
4361 /* Flags: add the region byte if the word isn't valid in all | 4511 /* Flags: add the region byte if the word isn't valid in all |
4362 * regions. */ | 4512 * regions. */ |
4363 if (regionmask != 0 && (bw->bw_region & regionmask) != regionmask) | 4513 if (wif->wif_regionmask != 0 && (bw->bw_region & wif->wif_regionmask) |
4514 != wif->wif_regionmask) | |
4364 flags |= BWF_REGION; | 4515 flags |= BWF_REGION; |
4365 } | 4516 } |
4366 /* Add the prefix/suffix list if there are prefixes/suffixes. */ | 4517 /* Add the prefix/suffix list if there are prefixes/suffixes. */ |
4367 if (bw->bw_leadstring == NULL && bw->bw_prefix.ga_len > 0) | 4518 if (bw->bw_leadstring == NULL && bw->bw_prefix.ga_len > 0) |
4368 flags |= BWF_PREFIX; | 4519 flags |= BWF_PREFIX; |
4369 if (bw->bw_addstring == NULL && bw->bw_suffix.ga_len > 0) | 4520 if (bw->bw_addstring == NULL && bw->bw_suffix.ga_len > 0) |
4370 flags |= BWF_SUFFIX; | 4521 flags |= BWF_SUFFIX; |
4371 | 4522 |
4372 /* Flags: may have additions. */ | 4523 /* Flags: may have additions. */ |
4373 if (adds > 0) | 4524 if (adds > 0) |
4525 { | |
4374 flags |= BWF_ADDS; | 4526 flags |= BWF_ADDS; |
4527 if (adds >= 256) | |
4528 flags |= BWF_ADDS_M; | |
4529 } | |
4375 | 4530 |
4376 /* The dummy word before a KEEPCAP word doesn't have any flags, they are | 4531 /* The dummy word before a KEEPCAP word doesn't have any flags, they are |
4377 * in the actual word that follows. */ | 4532 * in the actual word that follows. */ |
4378 if (lowcap) | 4533 if (lowcap) |
4379 flags = 0; | 4534 flags = 0; |
4401 putc(bw->bw_caseword[i], fd); /* <caseword> */ | 4556 putc(bw->bw_caseword[i], fd); /* <caseword> */ |
4402 } | 4557 } |
4403 | 4558 |
4404 /* write prefix and suffix lists: <affixcnt> <affixNR> ... */ | 4559 /* write prefix and suffix lists: <affixcnt> <affixNR> ... */ |
4405 if (flags & BWF_PREFIX) | 4560 if (flags & BWF_PREFIX) |
4406 write_affixlist(fd, &bw->bw_prefix, prefm); | 4561 write_affixlist(fd, &bw->bw_prefix, wif->wif_prefm); |
4407 if (flags & BWF_SUFFIX) | 4562 if (flags & BWF_SUFFIX) |
4408 write_affixlist(fd, &bw->bw_suffix, suffm); | 4563 write_affixlist(fd, &bw->bw_suffix, wif->wif_suffm); |
4409 | 4564 |
4410 if (flags & BWF_REGION) | 4565 if (flags & BWF_REGION) |
4411 putc(bw->bw_region, fd); /* <region> */ | 4566 putc(bw->bw_region, fd); /* <region> */ |
4567 | |
4568 ++wif->wif_wcount; | |
4412 | 4569 |
4413 /* | 4570 /* |
4414 * Additions. | 4571 * Additions. |
4415 */ | 4572 */ |
4416 if (adds > 0) | 4573 if (adds > 0) |
4417 { | 4574 { |
4418 put_bytes(fd, (long_u)adds, 2); /* <addcnt> */ | 4575 if (adds >= 256) |
4419 | 4576 put_bytes(fd, (long_u)adds, 2); /* 2 byte <addcnt> */ |
4577 else | |
4578 putc(adds, fd); /* 1 byte <addcnt> */ | |
4579 | |
4580 /* statistics */ | |
4581 wif->wif_acount += adds; | |
4582 if (wif->wif_addmax < adds) | |
4583 { | |
4584 wif->wif_addmax = adds; | |
4585 wif->wif_addmaxw = bw->bw_word; | |
4586 } | |
4587 | |
4588 /* | |
4589 * Sort the list of additions, so that we can copy as many bytes as | |
4590 * possible from the previous addstring. | |
4591 */ | |
4592 | |
4593 /* Make a table with pointers to each basic word that has additions. */ | |
4594 wtab = (basicword_T **)alloc((unsigned)(sizeof(basicword_T *) * adds)); | |
4595 if (wtab == NULL) | |
4596 return; | |
4597 count = 0; | |
4420 for (bw = bwf; bw != NULL; bw = bw->bw_cnext) | 4598 for (bw = bwf; bw != NULL; bw = bw->bw_cnext) |
4421 if (bw->bw_leadstring != NULL || bw->bw_addstring != NULL) | 4599 if (bw->bw_leadstring != NULL || bw->bw_addstring != NULL) |
4422 { | 4600 wtab[count++] = bw; |
4423 /* <add>: <addflags> <addlen> [<leadlen> <addstring>] | 4601 |
4424 * [<region>] */ | 4602 /* Sort. */ |
4425 aflags = 0; | 4603 qsort((void *)wtab, (size_t)count, sizeof(basicword_T *), bw_compare); |
4426 if (bw->bw_flags & BWF_ONECAP) | 4604 |
4427 aflags |= ADD_ONECAP; | 4605 /* Now write each basic word to the spell file. Copy bytes from the |
4428 if (bw->bw_flags & BWF_ALLCAP) | 4606 * previous leadstring/addstring if possible. */ |
4429 aflags |= ADD_ALLCAP; | 4607 bw2 = NULL; |
4430 if (bw->bw_flags & BWF_KEEPCAP) | 4608 for (idx = 0; idx < count; ++idx) |
4431 aflags |= ADD_KEEPCAP; | 4609 { |
4432 if (regionmask != 0 | 4610 bw = wtab[idx]; |
4433 && (bw->bw_region & regionmask) != regionmask) | 4611 |
4434 aflags |= ADD_REGION; | 4612 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>] |
4435 putc(aflags, fd); /* <addflags> */ | 4613 * [<addstring>] [<region>] */ |
4436 | 4614 copylen = 0; |
4437 if (bw->bw_leadstring == NULL) | 4615 if (bw->bw_leadstring == NULL) |
4438 leadlen = 0; | 4616 leadlen = 0; |
4617 else | |
4618 { | |
4619 leadlen = STRLEN(bw->bw_leadstring); | |
4620 if (bw2 != NULL && bw2->bw_leadstring != NULL) | |
4621 for ( ; copylen < leadlen; ++copylen) | |
4622 if (bw->bw_leadstring[copylen] | |
4623 != bw2->bw_leadstring[copylen]) | |
4624 break; | |
4625 } | |
4626 if (bw->bw_addstring == NULL) | |
4627 addlen = 0; | |
4628 else | |
4629 { | |
4630 addlen = STRLEN(bw->bw_addstring); | |
4631 if (bw2 != NULL && copylen == leadlen | |
4632 && bw2->bw_addstring != NULL) | |
4633 { | |
4634 for (i = 0; i < addlen; ++i) | |
4635 if (bw->bw_addstring[i] != bw2->bw_addstring[i]) | |
4636 break; | |
4637 copylen += i; | |
4638 } | |
4639 } | |
4640 | |
4641 aflags = 0; | |
4642 /* Only copy bytes when it's more than one, the length itself | |
4643 * takes an extra byte. */ | |
4644 if (copylen > 1) | |
4645 aflags |= ADD_COPYLEN; | |
4646 else | |
4647 copylen = 0; | |
4648 | |
4649 if (bw->bw_flags & BWF_ONECAP) | |
4650 aflags |= ADD_ONECAP; | |
4651 if (bw->bw_flags & BWF_ALLCAP) | |
4652 aflags |= ADD_ALLCAP; | |
4653 if (bw->bw_flags & BWF_KEEPCAP) | |
4654 aflags |= ADD_KEEPCAP; | |
4655 if (wif->wif_regionmask != 0 && (bw->bw_region | |
4656 & wif->wif_regionmask) != wif->wif_regionmask) | |
4657 aflags |= ADD_REGION; | |
4658 if (leadlen > 0) | |
4659 aflags |= ADD_LEADLEN; | |
4660 putc(aflags, fd); /* <addflags> */ | |
4661 | |
4662 putc(leadlen + addlen, fd); /* <addlen> */ | |
4663 if (aflags & ADD_LEADLEN) | |
4664 putc(leadlen, fd); /* <leadlen> */ | |
4665 if (aflags & ADD_COPYLEN) | |
4666 putc(copylen, fd); /* <copylen> */ | |
4667 | |
4668 /* <addstring> */ | |
4669 if (leadlen > copylen && bw->bw_leadstring != NULL) | |
4670 fwrite(bw->bw_leadstring + copylen, | |
4671 (size_t)(leadlen - copylen), (size_t)1, fd); | |
4672 if (leadlen + addlen > copylen && bw->bw_addstring != NULL) | |
4673 { | |
4674 if (copylen >= leadlen) | |
4675 l = copylen - leadlen; | |
4439 else | 4676 else |
4440 leadlen = STRLEN(bw->bw_leadstring); | 4677 l = 0; |
4441 if (bw->bw_addstring == NULL) | 4678 fwrite(bw->bw_addstring + l, |
4442 addlen = 0; | 4679 (size_t)(addlen - l), (size_t)1, fd); |
4443 else | 4680 } |
4444 addlen = STRLEN(bw->bw_addstring); | 4681 |
4445 putc(leadlen + addlen, fd); /* <addlen> */ | 4682 if (aflags & ADD_REGION) |
4446 putc(leadlen, fd); /* <leadlen> */ | 4683 putc(bw->bw_region, fd); /* <region> */ |
4447 /* <addstring> */ | 4684 |
4448 if (bw->bw_leadstring != NULL) | 4685 bw2 = bw; |
4449 fwrite(bw->bw_leadstring, (size_t)leadlen, (size_t)1, fd); | 4686 } |
4450 if (bw->bw_addstring != NULL) | 4687 vim_free(wtab); |
4451 fwrite(bw->bw_addstring, (size_t)addlen, (size_t)1, fd); | |
4452 | |
4453 if (aflags & ADD_REGION) | |
4454 putc(bw->bw_region, fd); /* <region> */ | |
4455 } | |
4456 } | 4688 } |
4457 } | 4689 } |
4458 | 4690 |
4459 | 4691 |
4460 /* | 4692 /* |