comparison src/spell.c @ 249:f146656fb903

updated for version 7.0069
author vimboss
date Wed, 20 Apr 2005 19:37:22 +0000
parents 8ff168d3720a
children c8742c8da9ab
comparison
equal deleted inserted replaced
248:f2d46e4a859d 249:f146656fb903
201 #define BWF_ADDS 0x0100 /* there are additions */ 201 #define BWF_ADDS 0x0100 /* there are additions */
202 #define BWF_PREFIX 0x0200 /* has prefix NR list */ 202 #define BWF_PREFIX 0x0200 /* has prefix NR list */
203 #define BWF_ALLCAP 0x0400 /* all letters must be capital (not used 203 #define BWF_ALLCAP 0x0400 /* all letters must be capital (not used
204 for single-letter words) */ 204 for single-letter words) */
205 #define BWF_KEEPCAP 0x0800 /* Keep case as-is */ 205 #define BWF_KEEPCAP 0x0800 /* Keep case as-is */
206 #define BWF_ADDS_M 0x1000 /* there are more than 255 additions */
206 207
207 #define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */ 208 #define BWF_ADDHASH 0x8000 /* Internal: use hashtab for additions */
208 209
209 #define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without 210 #define NOWC_KEY (char_u *)"x" /* hashtab key used for additions without
210 any word character */ 211 any word character */
211 212
212 /* flags used for addition in the spell file */ 213 /* flags used for addition in the spell file */
213 #define ADD_REGION 0x02 /* region byte follows */ 214 #define ADD_REGION 0x02 /* region byte follows */
214 #define ADD_ONECAP 0x04 /* first letter must be capital */ 215 #define ADD_ONECAP 0x04 /* first letter must be capital */
216 #define ADD_LEADLEN 0x10 /* there is a leadlen byte */
217 #define ADD_COPYLEN 0x20 /* there is a copylen byte */
215 #define ADD_ALLCAP 0x40 /* all letters must be capital (not used 218 #define ADD_ALLCAP 0x40 /* all letters must be capital (not used
216 for single-letter words) */ 219 for single-letter words) */
217 #define ADD_KEEPCAP 0x80 /* fixed case */ 220 #define ADD_KEEPCAP 0x80 /* fixed case */
218 221
219 /* Translate ADD_ flags to BWF_ flags. 222 /* Translate ADD_ flags to BWF_ flags.
220 * (Needed to keep ADD_ flags in one byte.) */ 223 * (Needed to keep ADD_ flags in one byte.) */
221 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4)) 224 #define ADD2BWF(x) (((x) & 0x0f) | (((x) & 0xf0) << 4))
222 225
223 #define VIMSPELLMAGIC "VIMspell02" /* string at start of Vim spell file */ 226 #define VIMSPELLMAGIC "VIMspell03" /* string at start of Vim spell file */
224 #define VIMSPELLMAGICL 10 227 #define VIMSPELLMAGICL 10
225 228
226 /* 229 /*
227 * Structure to store info for word matching. 230 * Structure to store info for word matching.
228 */ 231 */
1162 int 1165 int
1163 spell_move_to(dir, allwords) 1166 spell_move_to(dir, allwords)
1164 int dir; /* FORWARD or BACKWARD */ 1167 int dir; /* FORWARD or BACKWARD */
1165 int allwords; /* TRUE for "[s" and "]s" */ 1168 int allwords; /* TRUE for "[s" and "]s" */
1166 { 1169 {
1167 pos_T pos; 1170 linenr_T lnum;
1171 pos_T found_pos;
1168 char_u *line; 1172 char_u *line;
1169 char_u *p; 1173 char_u *p;
1170 int wc; 1174 int wc;
1171 int nwc; 1175 int nwc;
1172 int attr = 0; 1176 int attr = 0;
1173 int len; 1177 int len;
1178 int has_syntax = syntax_present(curbuf);
1179 int col;
1180 int can_spell;
1174 1181
1175 if (!curwin->w_p_spell || *curwin->w_buffer->b_p_spl == NUL) 1182 if (!curwin->w_p_spell || *curwin->w_buffer->b_p_spl == NUL)
1176 { 1183 {
1177 EMSG(_("E756: Spell checking not enabled")); 1184 EMSG(_("E756: Spell checking not enabled"));
1178 return FAIL; 1185 return FAIL;
1179 } 1186 }
1180 1187
1181 /* TODO: moving backwards */ 1188 /*
1182 1189 * Start looking for bad word at the start of the line, because we can't
1183 /* Start looking for bad word at the start of the line, because we can't 1190 * start halfway a word, we don't know where it starts or ends.
1184 * start halfway a word and know where it ends. */ 1191 *
1185 pos = curwin->w_cursor; 1192 * When searching backwards, we continue in the line to find the last
1186 pos.col = 0; 1193 * bad word (in the cursor line: before the cursor).
1187 wc = FALSE; 1194 */
1195 lnum = curwin->w_cursor.lnum;
1196 found_pos.lnum = 0;
1188 1197
1189 while (!got_int) 1198 while (!got_int)
1190 { 1199 {
1191 line = ml_get(pos.lnum); 1200 line = ml_get(lnum);
1192 p = line + pos.col; 1201 p = line;
1202 wc = FALSE;
1203
1193 while (*p != NUL) 1204 while (*p != NUL)
1194 { 1205 {
1195 nwc = spell_iswordc(p); 1206 nwc = spell_iswordc(p);
1196 if (!wc && nwc) 1207 if (!wc && nwc)
1197 { 1208 {
1209 /* When searching backward don't search after the cursor. */
1210 if (dir == BACKWARD
1211 && lnum == curwin->w_cursor.lnum
1212 && (colnr_T)(p - line) >= curwin->w_cursor.col)
1213 break;
1214
1198 /* start of word */ 1215 /* start of word */
1199 /* TODO: check for bad word attr */
1200 len = spell_check(curwin, line, p, &attr); 1216 len = spell_check(curwin, line, p, &attr);
1217
1201 if (attr != 0) 1218 if (attr != 0)
1202 { 1219 {
1203 if (curwin->w_cursor.lnum < pos.lnum 1220 /* We found a bad word. Check the attribute. */
1204 || (curwin->w_cursor.lnum == pos.lnum 1221 /* TODO: check for syntax @Spell cluster. */
1205 && curwin->w_cursor.col < (colnr_T)(p - line))) 1222 if (allwords || attr == highlight_attr[HLF_SPB])
1206 { 1223 {
1207 curwin->w_cursor.lnum = pos.lnum; 1224 /* When searching forward only accept a bad word after
1208 curwin->w_cursor.col = p - line; 1225 * the cursor. */
1209 return OK; 1226 if (dir == BACKWARD
1227 || lnum > curwin->w_cursor.lnum
1228 || (lnum == curwin->w_cursor.lnum
1229 && (colnr_T)(p - line)
1230 > curwin->w_cursor.col))
1231 {
1232 if (has_syntax)
1233 {
1234 col = p - line;
1235 (void)syn_get_id(lnum, (colnr_T)col,
1236 FALSE, &can_spell);
1237
1238 /* have to get the line again, a multi-line
1239 * regexp may make it invalid */
1240 line = ml_get(lnum);
1241 p = line + col;
1242 }
1243 else
1244 can_spell = TRUE;
1245
1246 if (can_spell)
1247 {
1248 found_pos.lnum = lnum;
1249 found_pos.col = p - line;
1250 #ifdef FEAT_VIRTUALEDIT
1251 found_pos.coladd = 0;
1252 #endif
1253 if (dir == FORWARD)
1254 {
1255 /* No need to search further. */
1256 curwin->w_cursor = found_pos;
1257 return OK;
1258 }
1259 }
1260 }
1210 } 1261 }
1211 attr = 0; /* bad word is before or at cursor */ 1262 attr = 0;
1212 } 1263 }
1213 p += len; 1264 p += len;
1214 if (*p == NUL) 1265 if (*p == NUL)
1215 break; 1266 break;
1216 nwc = FALSE; 1267 nwc = FALSE;
1220 mb_ptr_adv(p); 1271 mb_ptr_adv(p);
1221 wc = nwc; 1272 wc = nwc;
1222 } 1273 }
1223 1274
1224 /* Advance to next line. */ 1275 /* Advance to next line. */
1225 if (pos.lnum == curbuf->b_ml.ml_line_count) 1276 if (dir == BACKWARD)
1226 return FAIL; 1277 {
1227 ++pos.lnum; 1278 if (found_pos.lnum != 0)
1228 pos.col = 0; 1279 {
1229 wc = FALSE; 1280 /* Use the last match in the line. */
1281 curwin->w_cursor = found_pos;
1282 return OK;
1283 }
1284 if (lnum == 1)
1285 return FAIL;
1286 --lnum;
1287 }
1288 else
1289 {
1290 if (lnum == curbuf->b_ml.ml_line_count)
1291 return FAIL;
1292 ++lnum;
1293 }
1230 1294
1231 line_breakcheck(); 1295 line_breakcheck();
1232 } 1296 }
1233 1297
1234 return FAIL; /* interrupted */ 1298 return FAIL; /* interrupted */
1771 fw->fw_region = REGION_ALL; 1835 fw->fw_region = REGION_ALL;
1772 1836
1773 fw->fw_adds = NULL; 1837 fw->fw_adds = NULL;
1774 if (flags & BWF_ADDS) 1838 if (flags & BWF_ADDS)
1775 { 1839 {
1776 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */ 1840 if (flags & BWF_ADDS_M)
1841 adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */
1842 else
1843 adds = getc(fd); /* <addcnt> */
1777 1844
1778 if (adds > 30) 1845 if (adds > 30)
1779 { 1846 {
1780 /* Use a hashtable to loopup the part until the next word end. 1847 /* Use a hashtable to loopup the part until the next word end.
1781 * This uses more memory and involves some overhead, thus only 1848 * This uses more memory and involves some overhead, thus only
1793 else 1860 else
1794 ht = NULL; 1861 ht = NULL;
1795 1862
1796 while (--adds >= 0) 1863 while (--adds >= 0)
1797 { 1864 {
1798 /* <add>: <addflags> <addlen> [<leadlen> <addstring>] 1865 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>]
1799 * [<region>] */ 1866 * [<addstring>] [<region>] */
1800 flags = getc(fd); /* <addflags> */ 1867 flags = getc(fd); /* <addflags> */
1801 addlen = getc(fd); /* <addlen> */ 1868 addlen = getc(fd); /* <addlen> */
1802 if (addlen == EOF) 1869 if (addlen == EOF)
1803 goto truncerr; 1870 goto truncerr;
1804 if (addlen >= MAXWLEN) 1871 if (addlen >= MAXWLEN)
1805 goto formerr; 1872 goto formerr;
1806 1873
1874 if (flags & ADD_LEADLEN)
1875 leadlen = getc(fd); /* <leadlen> */
1876 else
1877 leadlen = 0;
1878
1807 if (addlen > 0) 1879 if (addlen > 0)
1808 { 1880 {
1809 leadlen = getc(fd); /* <leadlen> */ 1881 if (flags & ADD_COPYLEN)
1810 for (i = 0; i < addlen; ++i) /* <addstring> */ 1882 i = getc(fd); /* <copylen> */
1883 else
1884 i = 0;
1885 for ( ; i < addlen; ++i) /* <addstring> */
1811 cbuf[i] = getc(fd); 1886 cbuf[i] = getc(fd);
1812 cbuf[i] = NUL; 1887 cbuf[i] = NUL;
1813 } 1888 }
1814 else
1815 leadlen = 0;
1816 1889
1817 if (flags & ADD_KEEPCAP) 1890 if (flags & ADD_KEEPCAP)
1818 { 1891 {
1819 /* <addstring> is in original case, need to get 1892 /* <addstring> is in original case, need to get
1820 * case-folded word too. */ 1893 * case-folded word too. */
2290 } affhash_T; 2363 } affhash_T;
2291 2364
2292 static affhash_T dumas; 2365 static affhash_T dumas;
2293 #define HI2AS(hi) ((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas))) 2366 #define HI2AS(hi) ((affhash_T *)((hi)->hi_key - (dumas.as_word - (char_u *)&dumas)))
2294 2367
2368 /* info for writing the spell file */
2369 typedef struct winfo_S
2370 {
2371 FILE *wif_fd;
2372 basicword_T *wif_prevbw; /* last written basic word */
2373 int wif_regionmask; /* regions supported */
2374 int wif_prefm; /* 1 or 2 bytes used for prefix NR */
2375 int wif_suffm; /* 1 or 2 bytes used for suffix NR */
2376 long wif_wcount; /* written word count */
2377 long wif_acount; /* written addition count */
2378 long wif_addmax; /* max number of additions on one word */
2379 char_u *wif_addmaxw; /* word with max additions */
2380 } winfo_T;
2381
2295 2382
2296 static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii)); 2383 static afffile_T *spell_read_aff __ARGS((char_u *fname, vimconv_T *conv, int ascii));
2297 static void spell_free_aff __ARGS((afffile_T *aff)); 2384 static void spell_free_aff __ARGS((afffile_T *aff));
2298 static int has_non_ascii __ARGS((char_u *s)); 2385 static int has_non_ascii __ARGS((char_u *s));
2299 static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii)); 2386 static int spell_read_dic __ARGS((hashtab_T *ht, char_u *fname, vimconv_T *conv, int ascii));
2311 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw)); 2398 static void add_to_wordlist __ARGS((hashtab_T *newwords, basicword_T *bw));
2312 static void put_bytes __ARGS((FILE *fd, long_u nr, int len)); 2399 static void put_bytes __ARGS((FILE *fd, long_u nr, int len));
2313 static void write_affix __ARGS((FILE *fd, affheader_T *ah)); 2400 static void write_affix __ARGS((FILE *fd, affheader_T *ah));
2314 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes)); 2401 static void write_affixlist __ARGS((FILE *fd, garray_T *aff, int bytes));
2315 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars)); 2402 static void write_vim_spell __ARGS((char_u *fname, garray_T *prefga, garray_T *suffga, hashtab_T *newwords, int regcount, char_u *regchars));
2316 static void write_bword __ARGS((FILE *fd, basicword_T *bw, int lowcap, basicword_T **prevbw, int regionmask, int prefm, int suffm)); 2403 static void write_bword __ARGS((winfo_T *wif, basicword_T *bw, int lowcap));
2317 static void free_wordtable __ARGS((hashtab_T *ht)); 2404 static void free_wordtable __ARGS((hashtab_T *ht));
2318 static void free_basicword __ARGS((basicword_T *bw)); 2405 static void free_basicword __ARGS((basicword_T *bw));
2319 static void free_affixentries __ARGS((affentry_T *first)); 2406 static void free_affixentries __ARGS((affentry_T *first));
2320 static void free_affix_entry __ARGS((affentry_T *ap)); 2407 static void free_affix_entry __ARGS((affentry_T *ap));
2321 2408
4017 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST> 4104 * Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST>
4018 * <SUGGEST> <WORDLIST> 4105 * <SUGGEST> <WORDLIST>
4019 * 4106 *
4020 * <HEADER>: <fileID> <regioncnt> <regionname> ... 4107 * <HEADER>: <fileID> <regioncnt> <regionname> ...
4021 * 4108 *
4022 * <fileID> 10 bytes "VIMspell02" 4109 * <fileID> 10 bytes "VIMspell03"
4023 * <regioncnt> 1 byte number of regions following (8 supported) 4110 * <regioncnt> 1 byte number of regions following (8 supported)
4024 * <regionname> 2 bytes Region name: ca, au, etc. 4111 * <regionname> 2 bytes Region name: ca, au, etc.
4025 * First <regionname> is region 1. 4112 * First <regionname> is region 1.
4026 * 4113 *
4027 * 4114 *
4083 * BWF_ADDS 4170 * BWF_ADDS
4084 * 0x02: has prefixes, <affixcnt> and <affixNR> follow 4171 * 0x02: has prefixes, <affixcnt> and <affixNR> follow
4085 * BWF_PREFIX 4172 * BWF_PREFIX
4086 * 0x04: all letters must be upper-case, BWF_ALLCAP 4173 * 0x04: all letters must be upper-case, BWF_ALLCAP
4087 * 0x08: case must match, BWF_KEEPCAP 4174 * 0x08: case must match, BWF_KEEPCAP
4175 * 0x10: has more than 255 additions, <addcnt> is two
4176 * bytes, BWF_ADDS_M
4088 * 0x10-0x80: unset 4177 * 0x10-0x80: unset
4089 * <caselen> 1 byte Length of <caseword>. 4178 * <caselen> 1 byte Length of <caseword>.
4090 * <caseword> N bytes Word with matching case. 4179 * <caseword> N bytes Word with matching case.
4091 * <affixcnt> 1 byte Number of affix NRs following. 4180 * <affixcnt> 1 byte Number of affix NRs following.
4092 * <affixNR> 1 or 2 byte Number of possible affix for this word. 4181 * <affixNR> 1 or 2 byte Number of possible affix for this word.
4093 * When using 2 bytes MSB comes first. 4182 * When using 2 bytes MSB comes first.
4094 * <region> 1 byte Bitmask for regions in which word is valid. When 4183 * <region> 1 byte Bitmask for regions in which word is valid. When
4095 * omitted it's valid in all regions. 4184 * omitted it's valid in all regions.
4096 * Lowest bit is for region 1. 4185 * Lowest bit is for region 1.
4097 * <addcnt> 2 bytes Number of <add> items following. 4186 * <addcnt> 1 or 2 byte Number of <add> items following.
4098 * 4187 *
4099 * <add>: <addflags> <addlen> [<leadlen> <addstring>] [<region>] 4188 * <add>: <addflags> <addlen> [<leadlen>] [<copylen>] [<addstring>] [<region>]
4100 * 4189 *
4101 * <addflags> 1 byte 0x01: unset 4190 * <addflags> 1 byte 0x01: unset
4102 * 0x02: has region byte, ADD_REGION 4191 * 0x02: has region byte, ADD_REGION
4103 * 0x04: first letter must be upper-case, ADD_ONECAP 4192 * 0x04: first letter must be upper-case, ADD_ONECAP
4104 * 0x08-0x20: unset 4193 * 0x08: unset
4194 * 0x10: has a <leadlen>, ADD_LEADLEN
4195 * 0x20: has a <copylen>, ADD_COPYLEN
4105 * 0x40: all letters must be upper-case, ADD_ALLCAP 4196 * 0x40: all letters must be upper-case, ADD_ALLCAP
4106 * 0x80: fixed case, <addstring> is the whole word 4197 * 0x80: fixed case, <addstring> is the whole word
4107 * with matching case, ADD_KEEPCAP. 4198 * with matching case, ADD_KEEPCAP.
4108 * <addlen> 1 byte Length of <addstring> in bytes. 4199 * <addlen> 1 byte Length of <addstring> in bytes.
4109 * <leadlen> 1 byte Number of bytes at start of <addstring> that must 4200 * <leadlen> 1 byte Number of bytes at start of <addstring> that must
4110 * come before the start of the basic word. 4201 * come before the start of the basic word.
4202 * <copylen> 1 byte Number of bytes copied from previous <addstring>.
4111 * <addstring> N bytes Word characters, before/in/after the word. 4203 * <addstring> N bytes Word characters, before/in/after the word.
4112 * 4204 *
4113 * All text characters are in 'encoding': <affchop>, <affadd>, <string>, 4205 * All text characters are in 'encoding': <affchop>, <affadd>, <string>,
4114 * <caseword>> and <addstring>. 4206 * <caseword>> and <addstring>.
4115 * All other fields are ASCII: <regionname> 4207 * All other fields are ASCII: <regionname>
4126 garray_T *suffga; /* suffixes, affheader_T entries */ 4218 garray_T *suffga; /* suffixes, affheader_T entries */
4127 hashtab_T *newwords; /* basic words, basicword_T entries */ 4219 hashtab_T *newwords; /* basic words, basicword_T entries */
4128 int regcount; /* number of regions */ 4220 int regcount; /* number of regions */
4129 char_u *regchars; /* region names */ 4221 char_u *regchars; /* region names */
4130 { 4222 {
4131 FILE *fd; 4223 winfo_T wif;
4132 garray_T *gap; 4224 garray_T *gap;
4133 hashitem_T *hi; 4225 hashitem_T *hi;
4134 char_u **wtab; 4226 char_u **wtab;
4135 int todo; 4227 int todo;
4136 int flags, aflags; 4228 int flags, aflags;
4137 basicword_T *bw, *bwf, *bw2 = NULL, *prevbw = NULL; 4229 basicword_T *bw, *bwf, *bw2 = NULL;
4138 int regionmask; /* mask for all relevant region bits */
4139 int i; 4230 int i;
4140 int cnt; 4231 int cnt;
4141 affentry_T *ae; 4232 affentry_T *ae;
4142 int round; 4233 int round;
4143 int prefm, suffm;
4144 garray_T bwga; 4234 garray_T bwga;
4145 4235
4146 fd = fopen((char *)fname, "w"); 4236 vim_memset(&wif, 0, sizeof(winfo_T));
4147 if (fd == NULL) 4237
4238 wif.wif_fd = fopen((char *)fname, "w");
4239 if (wif.wif_fd == NULL)
4148 { 4240 {
4149 EMSG2(_(e_notopen), fname); 4241 EMSG2(_(e_notopen), fname);
4150 return; 4242 return;
4151 } 4243 }
4152 4244
4153 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd); 4245 fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, wif.wif_fd);
4154 4246
4155 /* write the region names if there is more than one */ 4247 /* write the region names if there is more than one */
4156 if (regcount > 1) 4248 if (regcount > 1)
4157 { 4249 {
4158 putc(regcount, fd); 4250 putc(regcount, wif.wif_fd);
4159 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, fd); 4251 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, wif.wif_fd);
4160 regionmask = (1 << regcount) - 1; 4252 wif.wif_regionmask = (1 << regcount) - 1;
4161 } 4253 }
4162 else 4254 else
4163 { 4255 {
4164 putc(0, fd); 4256 putc(0, wif.wif_fd);
4165 regionmask = 0; 4257 wif.wif_regionmask = 0;
4166 } 4258 }
4167 4259
4168 /* Write the prefix and suffix lists. */ 4260 /* Write the prefix and suffix lists. */
4169 for (round = 1; round <= 2; ++round) 4261 for (round = 1; round <= 2; ++round)
4170 { 4262 {
4171 gap = round == 1 ? prefga : suffga; 4263 gap = round == 1 ? prefga : suffga;
4172 put_bytes(fd, (long_u)gap->ga_len, 2); /* <affcount> */ 4264 put_bytes(wif.wif_fd, (long_u)gap->ga_len, 2); /* <affcount> */
4173 4265
4174 /* Count the total number of affix items. */ 4266 /* Count the total number of affix items. */
4175 cnt = 0; 4267 cnt = 0;
4176 for (i = 0; i < gap->ga_len; ++i) 4268 for (i = 0; i < gap->ga_len; ++i)
4177 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first; 4269 for (ae = ((affheader_T *)gap->ga_data + i)->ah_first;
4178 ae != NULL; ae = ae->ae_next) 4270 ae != NULL; ae = ae->ae_next)
4179 ++cnt; 4271 ++cnt;
4180 put_bytes(fd, (long_u)cnt, 2); /* <afftotcnt> */ 4272 put_bytes(wif.wif_fd, (long_u)cnt, 2); /* <afftotcnt> */
4181 4273
4182 for (i = 0; i < gap->ga_len; ++i) 4274 for (i = 0; i < gap->ga_len; ++i)
4183 write_affix(fd, (affheader_T *)gap->ga_data + i); 4275 write_affix(wif.wif_fd, (affheader_T *)gap->ga_data + i);
4184 } 4276 }
4185 4277
4186 /* Number of bytes used for affix NR depends on affix count. */ 4278 /* Number of bytes used for affix NR depends on affix count. */
4187 prefm = (prefga->ga_len > 256) ? 2 : 1; 4279 wif.wif_prefm = (prefga->ga_len > 256) ? 2 : 1;
4188 suffm = (suffga->ga_len > 256) ? 2 : 1; 4280 wif.wif_suffm = (suffga->ga_len > 256) ? 2 : 1;
4189 4281
4190 /* Write the suggest info. TODO */ 4282 /* Write the suggest info. TODO */
4191 put_bytes(fd, 0L, 4); 4283 put_bytes(wif.wif_fd, 0L, 4);
4192 4284
4193 /* 4285 /*
4194 * Write the word list. <wordcount> <worditem> ... 4286 * Write the word list. <wordcount> <worditem> ...
4195 */ 4287 */
4196 /* number of basic words in 4 bytes */ 4288 /* number of basic words in 4 bytes */
4197 put_bytes(fd, newwords->ht_used, 4); /* <wordcount> */ 4289 put_bytes(wif.wif_fd, newwords->ht_used, 4); /* <wordcount> */
4198 4290
4199 /* 4291 /*
4200 * Sort the word list, so that we can reuse as many bytes as possible. 4292 * Sort the word list, so that we can copy as many bytes as possible from
4293 * the previous word.
4201 */ 4294 */
4202 wtab = (char_u **)alloc((unsigned)(sizeof(char_u *) * newwords->ht_used)); 4295 wtab = (char_u **)alloc((unsigned)(sizeof(char_u *) * newwords->ht_used));
4203 if (wtab != NULL) 4296 if (wtab != NULL)
4204 { 4297 {
4205 /* Make a table with pointers to each word. */ 4298 /* Make a table with pointers to each word. */
4277 4370
4278 /* Write first basic word. If it's KEEPCAP then we need a word 4371 /* Write first basic word. If it's KEEPCAP then we need a word
4279 * without VALID flag first (makes it easier to read the list back 4372 * without VALID flag first (makes it easier to read the list back
4280 * in). */ 4373 * in). */
4281 if (bw->bw_flags & BWF_KEEPCAP) 4374 if (bw->bw_flags & BWF_KEEPCAP)
4282 write_bword(fd, bw, TRUE, &prevbw, regionmask, prefm, suffm); 4375 write_bword(&wif, bw, TRUE);
4283 write_bword(fd, bw, FALSE, &prevbw, regionmask, prefm, suffm); 4376 write_bword(&wif, bw, FALSE);
4284 4377
4285 /* Write other basic words, with different caps. */ 4378 /* Write other basic words, with different caps. */
4286 for (i = 0; i < bwga.ga_len; ++i) 4379 for (i = 0; i < bwga.ga_len; ++i)
4287 { 4380 {
4288 bw2 = ((basicword_T **)bwga.ga_data)[i]; 4381 bw2 = ((basicword_T **)bwga.ga_data)[i];
4289 if (bw2 != bw) 4382 if (bw2 != bw)
4290 write_bword(fd, bw2, FALSE, &prevbw, regionmask, 4383 write_bword(&wif, bw2, FALSE);
4291 prefm, suffm);
4292 } 4384 }
4293 } 4385 }
4294 4386
4295 ga_clear(&bwga); 4387 ga_clear(&bwga);
4296 } 4388 }
4297 4389
4298 fclose(fd); 4390 fclose(wif.wif_fd);
4391
4392 /* Print a few statistics. */
4393 if (wif.wif_addmaxw == NULL)
4394 wif.wif_addmaxw = (char_u *)"";
4395 smsg((char_u *)_("Maximum number of adds on a word: %ld (%s)"),
4396 wif.wif_addmax, wif.wif_addmaxw);
4397 smsg((char_u *)_("Average number of adds on a word: %f"),
4398 (float)wif.wif_acount / (float)wif.wif_wcount);
4399 }
4400
4401 /*
4402 * Compare two basic words for their <addstring>.
4403 */
4404 static int
4405 #ifdef __BORLANDC__
4406 _RTLENTRYF
4407 #endif
4408 bw_compare __ARGS((const void *s1, const void *s2));
4409
4410 static int
4411 #ifdef __BORLANDC__
4412 _RTLENTRYF
4413 #endif
4414 bw_compare(s1, s2)
4415 const void *s1;
4416 const void *s2;
4417 {
4418 basicword_T *bw1 = *(basicword_T **)s1;
4419 basicword_T *bw2 = *(basicword_T **)s2;
4420 int i = 0;
4421
4422 /* compare the leadstrings */
4423 if (bw1->bw_leadstring == NULL)
4424 {
4425 if (bw2->bw_leadstring != NULL)
4426 return 1;
4427 }
4428 else if (bw2->bw_leadstring == NULL)
4429 return -1;
4430 else
4431 i = STRCMP(bw1->bw_leadstring, bw2->bw_leadstring);
4432
4433 if (i == 0)
4434 {
4435 /* leadstrings are identical, compare the addstrings */
4436 if (bw1->bw_addstring == NULL)
4437 {
4438 if (bw2->bw_addstring != NULL)
4439 return 1;
4440 }
4441 else if (bw2->bw_addstring == NULL)
4442 return -1;
4443 else
4444 i = STRCMP(bw1->bw_addstring, bw2->bw_addstring);
4445 }
4446 return i;
4299 } 4447 }
4300 4448
4301 /* 4449 /*
4302 * Write basic word, followed by any additions. 4450 * Write basic word, followed by any additions.
4303 * 4451 *
4307 * [<affixcnt> <affixNR> ...] (suffixes) 4455 * [<affixcnt> <affixNR> ...] (suffixes)
4308 * [<region>] 4456 * [<region>]
4309 * [<addcnt> <add> ...] 4457 * [<addcnt> <add> ...]
4310 */ 4458 */
4311 static void 4459 static void
4312 write_bword(fd, bwf, lowcap, prevbw, regionmask, prefm, suffm) 4460 write_bword(wif, bwf, lowcap)
4313 FILE *fd; 4461 winfo_T *wif; /* info for writing */
4314 basicword_T *bwf; 4462 basicword_T *bwf;
4315 int lowcap; /* write KEEPKAP word as not-valid */ 4463 int lowcap; /* write KEEPKAP word as not-valid */
4316 basicword_T **prevbw; /* last written basic word */ 4464 {
4317 int regionmask; /* mask that includes all possible regions */ 4465 FILE *fd = wif->wif_fd;
4318 int prefm;
4319 int suffm;
4320 {
4321 int flags; 4466 int flags;
4322 int aflags; 4467 int aflags;
4323 int len; 4468 int len;
4324 int leadlen, addlen; 4469 int leadlen, addlen;
4470 int copylen;
4325 int clen; 4471 int clen;
4326 int adds = 0; 4472 int adds = 0;
4327 int i; 4473 int i;
4474 int idx;
4328 basicword_T *bw, *bw2; 4475 basicword_T *bw, *bw2;
4476 basicword_T **wtab;
4477 int count;
4478 int l;
4329 4479
4330 /* Check how many bytes can be copied from the previous word. */ 4480 /* Check how many bytes can be copied from the previous word. */
4331 len = STRLEN(bwf->bw_word); 4481 len = STRLEN(bwf->bw_word);
4332 if (*prevbw == NULL) 4482 if (wif->wif_prevbw == NULL)
4333 clen = 0; 4483 clen = 0;
4334 else 4484 else
4335 for (clen = 0; clen < len 4485 for (clen = 0; clen < len
4336 && (*prevbw)->bw_word[clen] == bwf->bw_word[clen]; ++clen) 4486 && wif->wif_prevbw->bw_word[clen] == bwf->bw_word[clen]; ++clen)
4337 ; 4487 ;
4338 putc(clen, fd); /* <nr> */ 4488 putc(clen, fd); /* <nr> */
4339 *prevbw = bwf; 4489 wif->wif_prevbw = bwf;
4340 /* <string> */ 4490 /* <string> */
4341 if (len > clen) 4491 if (len > clen)
4342 fwrite(bwf->bw_word + clen, (size_t)(len - clen), (size_t)1, fd); 4492 fwrite(bwf->bw_word + clen, (size_t)(len - clen), (size_t)1, fd);
4343 4493
4344 /* Try to find a word without additions to use first. */ 4494 /* Try to find a word without additions to use first. */
4358 { 4508 {
4359 flags |= BWF_VALID; 4509 flags |= BWF_VALID;
4360 4510
4361 /* Flags: add the region byte if the word isn't valid in all 4511 /* Flags: add the region byte if the word isn't valid in all
4362 * regions. */ 4512 * regions. */
4363 if (regionmask != 0 && (bw->bw_region & regionmask) != regionmask) 4513 if (wif->wif_regionmask != 0 && (bw->bw_region & wif->wif_regionmask)
4514 != wif->wif_regionmask)
4364 flags |= BWF_REGION; 4515 flags |= BWF_REGION;
4365 } 4516 }
4366 /* Add the prefix/suffix list if there are prefixes/suffixes. */ 4517 /* Add the prefix/suffix list if there are prefixes/suffixes. */
4367 if (bw->bw_leadstring == NULL && bw->bw_prefix.ga_len > 0) 4518 if (bw->bw_leadstring == NULL && bw->bw_prefix.ga_len > 0)
4368 flags |= BWF_PREFIX; 4519 flags |= BWF_PREFIX;
4369 if (bw->bw_addstring == NULL && bw->bw_suffix.ga_len > 0) 4520 if (bw->bw_addstring == NULL && bw->bw_suffix.ga_len > 0)
4370 flags |= BWF_SUFFIX; 4521 flags |= BWF_SUFFIX;
4371 4522
4372 /* Flags: may have additions. */ 4523 /* Flags: may have additions. */
4373 if (adds > 0) 4524 if (adds > 0)
4525 {
4374 flags |= BWF_ADDS; 4526 flags |= BWF_ADDS;
4527 if (adds >= 256)
4528 flags |= BWF_ADDS_M;
4529 }
4375 4530
4376 /* The dummy word before a KEEPCAP word doesn't have any flags, they are 4531 /* The dummy word before a KEEPCAP word doesn't have any flags, they are
4377 * in the actual word that follows. */ 4532 * in the actual word that follows. */
4378 if (lowcap) 4533 if (lowcap)
4379 flags = 0; 4534 flags = 0;
4401 putc(bw->bw_caseword[i], fd); /* <caseword> */ 4556 putc(bw->bw_caseword[i], fd); /* <caseword> */
4402 } 4557 }
4403 4558
4404 /* write prefix and suffix lists: <affixcnt> <affixNR> ... */ 4559 /* write prefix and suffix lists: <affixcnt> <affixNR> ... */
4405 if (flags & BWF_PREFIX) 4560 if (flags & BWF_PREFIX)
4406 write_affixlist(fd, &bw->bw_prefix, prefm); 4561 write_affixlist(fd, &bw->bw_prefix, wif->wif_prefm);
4407 if (flags & BWF_SUFFIX) 4562 if (flags & BWF_SUFFIX)
4408 write_affixlist(fd, &bw->bw_suffix, suffm); 4563 write_affixlist(fd, &bw->bw_suffix, wif->wif_suffm);
4409 4564
4410 if (flags & BWF_REGION) 4565 if (flags & BWF_REGION)
4411 putc(bw->bw_region, fd); /* <region> */ 4566 putc(bw->bw_region, fd); /* <region> */
4567
4568 ++wif->wif_wcount;
4412 4569
4413 /* 4570 /*
4414 * Additions. 4571 * Additions.
4415 */ 4572 */
4416 if (adds > 0) 4573 if (adds > 0)
4417 { 4574 {
4418 put_bytes(fd, (long_u)adds, 2); /* <addcnt> */ 4575 if (adds >= 256)
4419 4576 put_bytes(fd, (long_u)adds, 2); /* 2 byte <addcnt> */
4577 else
4578 putc(adds, fd); /* 1 byte <addcnt> */
4579
4580 /* statistics */
4581 wif->wif_acount += adds;
4582 if (wif->wif_addmax < adds)
4583 {
4584 wif->wif_addmax = adds;
4585 wif->wif_addmaxw = bw->bw_word;
4586 }
4587
4588 /*
4589 * Sort the list of additions, so that we can copy as many bytes as
4590 * possible from the previous addstring.
4591 */
4592
4593 /* Make a table with pointers to each basic word that has additions. */
4594 wtab = (basicword_T **)alloc((unsigned)(sizeof(basicword_T *) * adds));
4595 if (wtab == NULL)
4596 return;
4597 count = 0;
4420 for (bw = bwf; bw != NULL; bw = bw->bw_cnext) 4598 for (bw = bwf; bw != NULL; bw = bw->bw_cnext)
4421 if (bw->bw_leadstring != NULL || bw->bw_addstring != NULL) 4599 if (bw->bw_leadstring != NULL || bw->bw_addstring != NULL)
4422 { 4600 wtab[count++] = bw;
4423 /* <add>: <addflags> <addlen> [<leadlen> <addstring>] 4601
4424 * [<region>] */ 4602 /* Sort. */
4425 aflags = 0; 4603 qsort((void *)wtab, (size_t)count, sizeof(basicword_T *), bw_compare);
4426 if (bw->bw_flags & BWF_ONECAP) 4604
4427 aflags |= ADD_ONECAP; 4605 /* Now write each basic word to the spell file. Copy bytes from the
4428 if (bw->bw_flags & BWF_ALLCAP) 4606 * previous leadstring/addstring if possible. */
4429 aflags |= ADD_ALLCAP; 4607 bw2 = NULL;
4430 if (bw->bw_flags & BWF_KEEPCAP) 4608 for (idx = 0; idx < count; ++idx)
4431 aflags |= ADD_KEEPCAP; 4609 {
4432 if (regionmask != 0 4610 bw = wtab[idx];
4433 && (bw->bw_region & regionmask) != regionmask) 4611
4434 aflags |= ADD_REGION; 4612 /* <add>: <addflags> <addlen> [<leadlen>] [<copylen>]
4435 putc(aflags, fd); /* <addflags> */ 4613 * [<addstring>] [<region>] */
4436 4614 copylen = 0;
4437 if (bw->bw_leadstring == NULL) 4615 if (bw->bw_leadstring == NULL)
4438 leadlen = 0; 4616 leadlen = 0;
4617 else
4618 {
4619 leadlen = STRLEN(bw->bw_leadstring);
4620 if (bw2 != NULL && bw2->bw_leadstring != NULL)
4621 for ( ; copylen < leadlen; ++copylen)
4622 if (bw->bw_leadstring[copylen]
4623 != bw2->bw_leadstring[copylen])
4624 break;
4625 }
4626 if (bw->bw_addstring == NULL)
4627 addlen = 0;
4628 else
4629 {
4630 addlen = STRLEN(bw->bw_addstring);
4631 if (bw2 != NULL && copylen == leadlen
4632 && bw2->bw_addstring != NULL)
4633 {
4634 for (i = 0; i < addlen; ++i)
4635 if (bw->bw_addstring[i] != bw2->bw_addstring[i])
4636 break;
4637 copylen += i;
4638 }
4639 }
4640
4641 aflags = 0;
4642 /* Only copy bytes when it's more than one, the length itself
4643 * takes an extra byte. */
4644 if (copylen > 1)
4645 aflags |= ADD_COPYLEN;
4646 else
4647 copylen = 0;
4648
4649 if (bw->bw_flags & BWF_ONECAP)
4650 aflags |= ADD_ONECAP;
4651 if (bw->bw_flags & BWF_ALLCAP)
4652 aflags |= ADD_ALLCAP;
4653 if (bw->bw_flags & BWF_KEEPCAP)
4654 aflags |= ADD_KEEPCAP;
4655 if (wif->wif_regionmask != 0 && (bw->bw_region
4656 & wif->wif_regionmask) != wif->wif_regionmask)
4657 aflags |= ADD_REGION;
4658 if (leadlen > 0)
4659 aflags |= ADD_LEADLEN;
4660 putc(aflags, fd); /* <addflags> */
4661
4662 putc(leadlen + addlen, fd); /* <addlen> */
4663 if (aflags & ADD_LEADLEN)
4664 putc(leadlen, fd); /* <leadlen> */
4665 if (aflags & ADD_COPYLEN)
4666 putc(copylen, fd); /* <copylen> */
4667
4668 /* <addstring> */
4669 if (leadlen > copylen && bw->bw_leadstring != NULL)
4670 fwrite(bw->bw_leadstring + copylen,
4671 (size_t)(leadlen - copylen), (size_t)1, fd);
4672 if (leadlen + addlen > copylen && bw->bw_addstring != NULL)
4673 {
4674 if (copylen >= leadlen)
4675 l = copylen - leadlen;
4439 else 4676 else
4440 leadlen = STRLEN(bw->bw_leadstring); 4677 l = 0;
4441 if (bw->bw_addstring == NULL) 4678 fwrite(bw->bw_addstring + l,
4442 addlen = 0; 4679 (size_t)(addlen - l), (size_t)1, fd);
4443 else 4680 }
4444 addlen = STRLEN(bw->bw_addstring); 4681
4445 putc(leadlen + addlen, fd); /* <addlen> */ 4682 if (aflags & ADD_REGION)
4446 putc(leadlen, fd); /* <leadlen> */ 4683 putc(bw->bw_region, fd); /* <region> */
4447 /* <addstring> */ 4684
4448 if (bw->bw_leadstring != NULL) 4685 bw2 = bw;
4449 fwrite(bw->bw_leadstring, (size_t)leadlen, (size_t)1, fd); 4686 }
4450 if (bw->bw_addstring != NULL) 4687 vim_free(wtab);
4451 fwrite(bw->bw_addstring, (size_t)addlen, (size_t)1, fd);
4452
4453 if (aflags & ADD_REGION)
4454 putc(bw->bw_region, fd); /* <region> */
4455 }
4456 } 4688 }
4457 } 4689 }
4458 4690
4459 4691
4460 /* 4692 /*