# HG changeset patch # User vimboss # Date 1111532052 0 # Node ID 4e7dca477feecc68ab6f3d8a9a173492f9e55225 # Parent b78857578493e43832327acd309d684eb02444ca updated for version 7.0063 diff --git a/runtime/doc/Makefile b/runtime/doc/Makefile --- a/runtime/doc/Makefile +++ b/runtime/doc/Makefile @@ -381,31 +381,31 @@ os_win32.txt: touch os_win32.txt vim-it.UTF-8.1: vim-it.1 - iconv -f latin1 -t utf-8 $< >$@ + iconv -f latin1 -t utf-8 $> >$@ evim-it.UTF-8.1: evim-it.1 - iconv -f latin1 -t utf-8 $< >$@ + iconv -f latin1 -t utf-8 $> >$@ vimdiff-it.UTF-8.1: vimdiff-it.1 - iconv -f latin1 -t utf-8 $< >$@ + iconv -f latin1 -t utf-8 $> >$@ vimtutor-it.UTF-8.1: vimtutor-it.1 - iconv -f latin1 -t utf-8 $< >$@ + iconv -f latin1 -t utf-8 $> >$@ xxd-it.UTF-8.1: xxd-it.1 - iconv -f latin1 -t utf-8 $< >$@ + iconv -f latin1 -t utf-8 $> >$@ vim-ru.UTF-8.1: vim-ru.1 - iconv -f KOI8-R -t utf-8 $< >$@ + iconv -f KOI8-R -t utf-8 $> >$@ evim-ru.UTF-8.1: evim-ru.1 - iconv -f KOI8-R -t utf-8 $< >$@ + iconv -f KOI8-R -t utf-8 $> >$@ vimdiff-ru.UTF-8.1: vimdiff-ru.1 - iconv -f KOI8-R -t utf-8 $< >$@ + iconv -f KOI8-R -t utf-8 $> >$@ vimtutor-ru.UTF-8.1: vimtutor-ru.1 - iconv -f KOI8-R -t utf-8 $< >$@ + iconv -f KOI8-R -t utf-8 $> >$@ xxd-ru.UTF-8.1: xxd-ru.1 - iconv -f KOI8-R -t utf-8 $< >$@ + iconv -f KOI8-R -t utf-8 $> >$@ diff --git a/runtime/doc/vim.1 b/runtime/doc/vim.1 --- a/runtime/doc/vim.1 +++ b/runtime/doc/vim.1 @@ -465,7 +465,7 @@ Type ":help" in to get started. Type ":help subject" to get help on a specific subject. For example: ":help ZZ" to get help for the "ZZ" command. -Use and CTRL-D to complete subjects (":help cmdline\-completion"). +Use and CTRL\-D to complete subjects (":help cmdline\-completion"). Tags are present to jump from one place to another (sort of hypertext links, see ":help"). All documentation files can be viewed in this way, for example diff --git a/src/Make_bc5.mak b/src/Make_bc5.mak --- a/src/Make_bc5.mak +++ b/src/Make_bc5.mak @@ -564,6 +564,7 @@ vimobj = \ $(OBJDIR)\regexp.obj \ $(OBJDIR)\screen.obj \ $(OBJDIR)\search.obj \ + $(OBJDIR)\spell.obj \ $(OBJDIR)\syntax.obj \ $(OBJDIR)\tag.obj \ $(OBJDIR)\term.obj \ diff --git a/src/Make_cyg.mak b/src/Make_cyg.mak --- a/src/Make_cyg.mak +++ b/src/Make_cyg.mak @@ -1,6 +1,6 @@ # # Makefile for VIM on Win32, using Cygnus gcc -# Last updated by Dan Sharp. Last Change: 2005 Jan 29 +# Last updated by Dan Sharp. Last Change: 2005 Mar 21 # # Also read INSTALLpc.txt! # @@ -424,6 +424,7 @@ OBJ = \ $(OUTDIR)/regexp.o \ $(OUTDIR)/screen.o \ $(OUTDIR)/search.o \ + $(OUTDIR)/spell.o \ $(OUTDIR)/syntax.o \ $(OUTDIR)/tag.o \ $(OUTDIR)/term.o \ diff --git a/src/normal.c b/src/normal.c --- a/src/normal.c +++ b/src/normal.c @@ -3874,6 +3874,7 @@ check_scrollbind(topline_diff, leftcol_d nv_ignore(cap) cmdarg_T *cap; { + cap->retval |= CA_COMMAND_BUSY; /* don't call edit() now */ } /* @@ -8675,6 +8676,7 @@ nv_cursorhold(cap) { apply_autocmds(EVENT_CURSORHOLD, NULL, NULL, FALSE, curbuf); did_cursorhold = TRUE; -} -#endif - + cap->retval |= CA_COMMAND_BUSY; /* don't call edit() now */ +} +#endif + diff --git a/src/proto/charset.pro b/src/proto/charset.pro --- a/src/proto/charset.pro +++ b/src/proto/charset.pro @@ -20,6 +20,8 @@ int vim_isIDc __ARGS((int c)); int vim_iswordc __ARGS((int c)); int vim_iswordp __ARGS((char_u *p)); int vim_iswordc_buf __ARGS((char_u *p, buf_T *buf)); +void init_spell_chartab __ARGS((void)); +int spell_iswordc __ARGS((char_u *p)); int vim_isfilec __ARGS((int c)); int vim_isprintc __ARGS((int c)); int vim_isprintc_strict __ARGS((int c)); diff --git a/src/spell.c b/src/spell.c --- a/src/spell.c +++ b/src/spell.c @@ -9,6 +9,11 @@ /* * spell.c: code for spell checking + * + * Terminology: + * "dword" is a dictionary word, made out of letters and digits. + * "nword" is a word with a character that's not a letter or digit. + * "word" is either a "dword" or an "nword". */ #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) @@ -23,6 +28,8 @@ # include #endif +#define MAXWLEN 100 /* assume max. word len is this many bytes */ + /* * Structure that is used to store the text from the language file. This * avoids the need to allocate each individual word and copying it. It's @@ -36,24 +43,67 @@ struct sblock_S char_u sb_data[1]; /* data, actually longer */ }; +/* Structure to store words and additions. Used twice : once for case-folded + * and once for keep-case words. */ +typedef struct winfo_S +{ + hashtab_T wi_ht; /* hashtable with all words, both dword_T and + nword_T (check flags for DW_NWORD) */ + garray_T wi_add; /* table with pointers to additions in a + dword_T */ + int wi_addlen; /* longest addition length */ +} winfo_T; + /* * Structure used to store words and other info for one language. */ typedef struct slang_S slang_T; - struct slang_S { slang_T *sl_next; /* next language */ char_u sl_name[2]; /* language name "en", "nl", etc. */ - hashtab_T sl_ht; /* hashtable with all words */ - garray_T sl_match; /* table with pointers to matches */ - garray_T sl_add; /* table with pointers to additions */ - char_u sl_regions[13]; /* table with up to 6 region names */ + winfo_T sl_fwords; /* case-folded words and additions */ + winfo_T sl_kwords; /* keep-case words and additions */ + char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ sblock_T *sl_block; /* list with allocated memory blocks */ }; static slang_T *first_lang = NULL; +/* Entry for dword in "sl_ht". Also used for part of an nword, starting with + * the first non-word character. And used for additions in wi_add. */ +typedef struct dword_S +{ + char_u dw_region; /* one bit per region where it's valid */ + char_u dw_flags; /* WF_ flags */ + char_u dw_word[1]; /* actually longer, NUL terminated */ +} dword_T; + +#define REGION_ALL 0xff + +#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2) + +/* Entry for a nword in "sl_ht". Note that the last three items must be + * identical to dword_T, so that they can be in the same hashtable. */ +typedef struct nword_S +{ + garray_T nw_ga; /* table with pointers to dword_T for part + starting with non-word character */ + int nw_maxlen; /* longest nword length (after the dword) */ + char_u nw_region; /* one bit per region where it's valid */ + char_u nw_flags; /* WF_ flags */ + char_u nw_word[1]; /* actually longer, NUL terminated */ +} nword_T; + +/* Get nword_T pointer from hashitem that uses nw_word */ +static nword_T dumnw; +#define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw))) + +#define DW_CAP 0x01 /* word must start with capital */ +#define DW_RARE 0x02 /* rare word */ +#define DW_NWORD 0x04 /* this is an nword_T */ +#define DW_DWORD 0x08 /* (also) use as dword without nword */ + /* * Structure used in "b_langp", filled from 'spelllang'. */ @@ -64,25 +114,15 @@ typedef struct langp_S } langp_T; #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) -#define MATCH_ENTRY(gap, i) *(((char_u **)(gap)->ga_data) + i) - -/* - * The byte before a word in the hashtable indicates the type of word. - * Also used for the byte just before a match. - * The top two bits are used to indicate rare and case-sensitive words. - * The lower bits are used to indicate the region in which the word is valid. - * Words valid in all regions use REGION_ALL. - */ -#define REGION_MASK 0x3f -#define REGION_ALL 0x3f -#define CASE_MASK 0x40 -#define RARE_MASK 0x80 +#define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i) #define SP_OK 0 #define SP_BAD 1 #define SP_RARE 2 #define SP_LOCAL 3 +static char *e_invchar2 = N_("E753: Invalid character in \"%s\""); + static slang_T *spell_load_lang __ARGS((char_u *lang)); static void spell_load_file __ARGS((char_u *fname)); static int find_region __ARGS((char_u *rp, char_u *region)); @@ -102,19 +142,34 @@ spell_check(wp, ptr, attrp) char_u *ptr; int *attrp; { - char_u *e; + char_u *e; /* end of word */ + char_u *ne; /* new end of word */ + char_u *me; /* max. end of match */ langp_T *lp; int result; int len = 0; - hash_T hash; hashitem_T *hi; - int c; -#define MAXWLEN 80 /* assume max. word len is 80 */ - char_u word[MAXWLEN + 1]; + int round; + char_u kword[MAXWLEN + 1]; /* word copy */ + char_u fword[MAXWLEN + 1]; /* word with case folded */ + char_u match[MAXWLEN + 1]; /* fword with additional chars */ + char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */ + char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */ + char_u *clen; + int cidx = 0; /* char index in xwordclen[] */ + hash_T fhash; /* hash for fword */ + hash_T khash; /* hash for kword */ + int match_len = 0; /* length of match[] */ + int fmatch_len = 0; /* length of nword match in chars */ garray_T *gap; - int l, h, t; - char_u *p; + int l, t; + char_u *p, *tp; int n; + dword_T *dw; + dword_T *tdw; + winfo_T *wi; + nword_T *nw; + int w_isupper; /* Find the end of the word. We already know that *ptr is a word char. */ e = ptr; @@ -122,119 +177,265 @@ spell_check(wp, ptr, attrp) { mb_ptr_adv(e); ++len; - } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer)); + } while (*e != NUL && spell_iswordc(e)); + + /* A word starting with a number is always OK. */ + if (*ptr >= '0' && *ptr <= '9') + return (int)(e - ptr); + +#ifdef FEAT_MBYTE + w_isupper = MB_ISUPPER(mb_ptr2char(ptr)); +#else + w_isupper = MB_ISUPPER(*ptr); +#endif + + /* Make a copy of the word so that it can be NUL terminated. + * Compute hash value. */ + mch_memmove(kword, ptr, e - ptr); + kword[e - ptr] = NUL; + khash = hash_hash(kword); + + /* Make case-folded copy of the Word. Compute its hash value. */ + (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1); + fhash = hash_hash(fword); + + /* Further case-folded characters to check for an nword match go in + * match[]. */ + me = e; + + /* "ne" is the end for the longest match */ + ne = e; /* The word is bad unless we find it in the dictionary. */ result = SP_BAD; - /* Words are always stored with folded case. */ - (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1); - hash = hash_hash(word); - /* * Loop over the languages specified in 'spelllang'. - * We check them all, because a match may find a longer word. + * We check them all, because a matching nword may be longer than an + * already found dword or nword. */ - for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; - ++lp) + for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp) { - /* Check words when it wasn't recognized as a good word yet. */ - if (result != SP_OK) + /* + * Check for a matching word in the hashtable. + * Check both the keep-case word and the fold-case word. + */ + for (round = 0; round <= 1; ++round) { - /* Word lookup. Using a hash table is fast. */ - hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash); + if (round == 0) + { + wi = &lp->lp_slang->sl_kwords; + hi = hash_lookup(&wi->wi_ht, kword, khash); + } + else + { + wi = &lp->lp_slang->sl_fwords; + hi = hash_lookup(&wi->wi_ht, fword, fhash); + } if (!HASHITEM_EMPTY(hi)) { - /* The character before the key indicates the type of word. */ - c = hi->hi_key[-1]; - if ((c & CASE_MASK) != 0) + /* + * If this is an nword entry, check for match with remainder. + */ + dw = HI2DWORD(hi); + if (dw->dw_flags & DW_NWORD) { - /* Need to check first letter is uppercase. If it is, - * check region. If it isn't it may be a rare word. */ - if ( + /* If the word is not defined as a dword we must find an + * nword. */ + if ((dw->dw_flags & DW_DWORD) == 0) + dw = NULL; + + /* Fold more characters when needed for the nword. Need + * to do one extra to check for a non-word character after + * the nword. Also keep the byte-size of each character, + * both before and after folding case. */ + nw = HI2NWORD(hi); + while ((round == 0 + ? me - e <= nw->nw_maxlen + : match_len <= nw->nw_maxlen) + && *me != NUL) + { #ifdef FEAT_MBYTE - MB_ISUPPER(mb_ptr2char(ptr)) + l = mb_ptr2len_check(me); #else - MB_ISUPPER(*ptr) + l = 1; #endif - ) + (void)str_foldcase(me, l, match + match_len, + MAXWLEN - match_len + 1); + me += l; + kwordclen[cidx] = l; + fwordclen[cidx] = STRLEN(match + match_len); + match_len += fwordclen[cidx]; + ++cidx; + } + + if (round == 0) + { + clen = kwordclen; + tp = e; + } + else + { + clen = fwordclen; + tp = match; + } + + /* Match with each item. The longest match wins: + * "you've" is longer than "you". */ + gap = &nw->nw_ga; + for (t = 0; t < gap->ga_len; ++t) { - if ((c & lp->lp_region) == 0) + /* Skip entries with wrong case for first char. + * Continue if it's a rare word without a captial. */ + tdw = DWORD_ENTRY(gap, t); + if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP + && !w_isupper) + continue; + + p = tdw->dw_word; + l = 0; + for (n = 0; p[n] != 0; n += clen[l++]) + if (vim_memcmp(p + n, tp + n, clen[l]) != 0) + break; + + /* Use a match if it's longer than previous matches + * and the next character is not a word character. */ + if (p[n] == 0 && l > fmatch_len && (tp[n] == 0 + || !spell_iswordc(tp + n))) + { + dw = tdw; + fmatch_len = l; + if (round == 0) + ne = tp + n; + else + { + /* Need to use the length of the original + * chars, not the fold-case ones. */ + ne = e; + for (l = 0; l < fmatch_len; ++l) + ne += kwordclen[l]; + } + if ((lp->lp_region & tdw->dw_region) == 0) + result = SP_LOCAL; + else if ((tdw->dw_flags & DW_CAP) && !w_isupper) + result = SP_RARE; + else + result = SP_OK; + } + } + + } + + if (dw != NULL) + { + if (dw->dw_flags & DW_CAP) + { + /* Need to check first letter is uppercase. If it is, + * check region. If it isn't it may be a rare word. + * */ + if (w_isupper) + { + if ((dw->dw_region & lp->lp_region) == 0) + result = SP_LOCAL; + else + result = SP_OK; + } + else if (dw->dw_flags & DW_RARE) + result = SP_RARE; + } + else + { + if ((dw->dw_region & lp->lp_region) == 0) result = SP_LOCAL; + else if (dw->dw_flags & DW_RARE) + result = SP_RARE; else result = SP_OK; } - else if (c & RARE_MASK) - result = SP_RARE; - } - else - { - if ((c & lp->lp_region) == 0) - result = SP_LOCAL; - else if (c & RARE_MASK) - result = SP_RARE; - else - result = SP_OK; } } } - /* Match lookup. Uses a binary search. If there is a match adjust - * "e" to the end. This is also done when a word matched, because - * "you've" is longer than "you". */ - gap = &lp->lp_slang->sl_match; - l = 0; /* low index */ - h = gap->ga_len - 1; /* high index */ - /* keep searching, the match must be between "l" and "h" (inclusive) */ - while (h >= l) - { - t = (h + l) / 2; - p = MATCH_ENTRY(gap, t) + 1; - for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n) - ; - if (p[n] == 0) + /* + * Check for an addition. + * Only after a dword, not after an nword. + * Check both the keep-case word and the fold-case word. + */ + if (fmatch_len == 0) + for (round = 0; round <= 1; ++round) { - if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer))) + if (round == 0) + wi = &lp->lp_slang->sl_kwords; + else + wi = &lp->lp_slang->sl_fwords; + gap = &wi->wi_add; + if (gap->ga_len == 0) /* no additions, skip quickly */ + continue; + + /* Fold characters when needed for the addition. Need to do one + * extra to check for a word character after the addition. */ + while ((round == 0 + ? me - e <= wi->wi_addlen + : match_len <= wi->wi_addlen) + && *me != NUL) { - /* match! */ - e = ptr + n; - if (result != SP_OK) - { - if ((lp->lp_region & p[-1]) == 0) - result = SP_LOCAL; - else - result = SP_OK; - } - break; +#ifdef FEAT_MBYTE + l = mb_ptr2len_check(me); +#else + l = 1; +#endif + (void)str_foldcase(me, l, match + match_len, + MAXWLEN - match_len + 1); + me += l; + kwordclen[cidx] = l; + fwordclen[cidx] = STRLEN(match + match_len); + match_len += fwordclen[cidx]; + ++cidx; } - /* match is too short, next item is new low index */ - l = t + 1; - } - else if (p[n] < ptr[n]) - /* match is before word, next item is new low index */ - l = t + 1; - else - /* match is after word, previous item is new high index */ - h = t - 1; - } + + if (round == 0) + { + clen = kwordclen; + tp = e; + } + else + { + clen = fwordclen; + tp = match; + } - /* Addition lookup. Uses a linear search, there should be very few. - * If there is a match adjust "e" to the end. This doesn't change - * whether a word was good or bad, only the length. */ - gap = &lp->lp_slang->sl_add; - for (t = 0; t < gap->ga_len; ++t) - { - p = MATCH_ENTRY(gap, t) + 1; - for (n = 0; p[n] != 0 && p[n] == e[n]; ++n) - ; - if (p[n] == 0 - && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer))) - { - /* match */ - e += n; - break; + /* Addition lookup. Uses a linear search, there should be + * very few. If there is a match adjust "ne" to the end. + * This doesn't change whether a word was good or bad, only + * the length. */ + for (t = 0; t < gap->ga_len; ++t) + { + tdw = DWORD_ENTRY(gap, t); + p = tdw->dw_word; + l = 0; + for (n = 0; p[n] != 0; n += clen[l++]) + if (vim_memcmp(p + n, tp + n, clen[l]) != 0) + break; + + /* Use a match if it's longer than previous matches + * and the next character is not a word character. */ + if (p[n] == 0 && l > fmatch_len + && (tp[n] == 0 || !spell_iswordc(tp + n))) + { + fmatch_len = l; + if (round == 0) + ne = tp + n; + else + { + /* Need to use the length of the original + * chars, not the fold-case ones. */ + ne = e; + for (l = 0; l < fmatch_len; ++l) + ne += kwordclen[l]; + } + } + } } - } } if (result != SP_OK) @@ -247,7 +448,7 @@ spell_check(wp, ptr, attrp) *attrp = highlight_attr[HLF_SPL]; } - return (int)(e - ptr); + return (int)(ne - ptr); } static slang_T *load_lp; /* passed from spell_load_lang() to @@ -264,15 +465,19 @@ spell_load_lang(lang) char_u fname_enc[80]; char_u fname_ascii[20]; char_u *p; + int r; lp = (slang_T *)alloc(sizeof(slang_T)); if (lp != NULL) { lp->sl_name[0] = lang[0]; lp->sl_name[1] = lang[1]; - hash_init(&lp->sl_ht); - ga_init2(&lp->sl_match, sizeof(char_u *), 20); - ga_init2(&lp->sl_add, sizeof(char_u *), 4); + hash_init(&lp->sl_fwords.wi_ht); + ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4); + lp->sl_fwords.wi_addlen = 0; + hash_init(&lp->sl_kwords.wi_ht); + ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4); + lp->sl_kwords.wi_addlen = 0; lp->sl_regions[0] = NUL; lp->sl_block = NULL; @@ -286,17 +491,20 @@ spell_load_lang(lang) p = (char_u *)"latin1"; load_lp = lp; sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p); - if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL) + r = do_in_runtimepath(fname_enc, TRUE, spell_load_file); + if (r == FAIL) { /* Try again to find an ASCII spell file. */ sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]); - if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL) - { - vim_free(lp); - lp = NULL; - smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""), + r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file); + } + + if (r == FAIL) + { + vim_free(lp); + lp = NULL; + smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""), fname_enc + 6); - } } else { @@ -319,229 +527,398 @@ spell_load_file(fname) int fd; size_t len; size_t l; + char_u *p = NULL, *np; + sblock_T *bl = NULL; + int bl_used = 0; size_t rest = 0; - char_u *p = NULL, *np; - sblock_T *bl; + char_u *rbuf; /* read buffer */ + char_u *rbuf_end; /* past last valid char in "rbuf" */ hash_T hash; hashitem_T *hi; int c; + int cc; int region = REGION_ALL; - char_u word[MAXWLEN + 1]; - int n; + int wlen; + winfo_T *wi; + dword_T *dw, *edw; + nword_T *nw = NULL; + int flags; + char_u *save_sourcing_name = sourcing_name; + linenr_T save_sourcing_lnum = sourcing_lnum; + + rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1)); + if (rbuf == NULL) + return; fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0); if (fd < 0) { EMSG2(_(e_notopen), fname); - return; + goto theend; } + sourcing_name = fname; + sourcing_lnum = 0; + /* Get the length of the whole file. */ len = lseek(fd, (off_t)0, SEEK_END); lseek(fd, (off_t)0, SEEK_SET); - /* Loop, reading the file one block at a time. + /* + * Read the file one block at a time. * "rest" is the length of an incomplete line at the previous block. - * "p" points to the remainder. */ + * "p" points to the remainder. + */ while (len > 0) { - /* Allocate a block of memory to store the info in. This is not freed - * until spell_reload() is called. */ + /* Read a block from the file. Prepend the remainder of the previous + * block, if any. */ + if (rest > 0) + { + if (rest > MAXWLEN) /* truncate long line (should be comment) */ + rest = MAXWLEN; + mch_memmove(rbuf, p, rest); + --sourcing_lnum; + } if (len > SBLOCKSIZE) l = SBLOCKSIZE; else l = len; len -= l; - bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest)); - if (bl == NULL) - break; - bl->sb_next = load_lp->sl_block; - load_lp->sl_block = bl; - - /* Read a block from the file. Prepend the remainder of the previous - * block. */ - if (rest > 0) - mch_memmove(bl->sb_data, p, rest); - if (read(fd, bl->sb_data + rest, l) != l) + if (read(fd, rbuf + rest, l) != l) { EMSG2(_(e_notread), fname); break; } - l += rest; + rbuf_end = rbuf + l + rest; rest = 0; /* Deal with each line that was read until we finish the block. */ - for (p = bl->sb_data; l > 0; p = np) + for (p = rbuf; p < rbuf_end; p = np) { - /* "np" points to the char after the line (CR or NL). */ - for (np = p; l > 0 && *np >= ' '; ++np) - --l; - if (l == 0) + ++sourcing_lnum; + + /* "np" points to the first char after the line (CR, NL or white + * space). */ + for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np)) + ; + if (np >= rbuf_end) { - /* Incomplete line (or end of file). */ + /* Incomplete line or end of file. */ rest = np - p; if (len == 0) - EMSG2(_("E751: Truncated spell file: %s"), fname); + EMSG(_("E751: Truncated spell file")); break; } *np = NUL; /* terminate the line with a NUL */ - /* Skip comment and empty lines. */ - c = *p; - if (c != '#' && np > p) + if (*p == '-') { - if (c == '=' || c == '+') + /* + * Region marker: ---, -xx, -xx-yy, etc. + */ + ++p; + if (*p == '-') { - garray_T *gap; + if (p[1] != '-' || p[2] != NUL) + { + EMSG2(_(e_invchar2), p - 1); + len = 0; + break; + } + region = REGION_ALL; + } + else + { + char_u *rp = load_lp->sl_regions; + int r; - /* Match or Add item. */ - if (c == '=') - gap = &load_lp->sl_match; - else - gap = &load_lp->sl_add; - - if (ga_grow(gap, 1) == OK) + /* Start of a region. The region may be repeated: + * "-ca-uk". Fill "region" with the bit mask for the + * ones we find. */ + region = 0; + for (;;) { - for (n = 0; n < gap->ga_len; ++n) - if ((c = STRCMP(p + 1, - MATCH_ENTRY(gap, n) + 1)) < 0) + r = find_region(rp, p); + if (r == REGION_ALL) + { + /* new region, add it to sl_regions[] */ + r = STRLEN(rp); + if (r >= 16) + { + EMSG2(_("E752: Too many regions: %s"), p); + len = 0; break; - if (c == 0) - { - if (p_verbose > 0) - smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"), - p + 1, fname); + } + else + { + rp[r] = p[0]; + rp[r + 1] = p[1]; + rp[r + 2] = NUL; + r = 1 << (r / 2); + } } else + r = 1 << r; + + region |= r; + if (p[2] != '-') { - mch_memmove((char_u **)gap->ga_data + n + 1, - (char_u **)gap->ga_data + n, - (gap->ga_len - n) * sizeof(char_u *)); - *(((char_u **)gap->ga_data) + n) = p; - *p = region; - ++gap->ga_len; + if (p[2] > ' ') + { + EMSG2(_(e_invchar2), p - 1); + len = 0; + } + break; } + p += 3; } } - else if (c == '-') + } + else if (*p != '#' && *p != NUL) + { + /* + * Not an empty line or comment. + */ + if (*p == '!') { - /* region item */ + wi = &load_lp->sl_kwords; /* keep case */ ++p; - if (*p == '-') - /* end of a region */ - region = REGION_ALL; - else - { - char_u *rp = load_lp->sl_regions; - int r; + } + else + wi = &load_lp->sl_fwords; /* fold case */ + + flags = 0; + c = *p; + if (c == '>') /* rare word */ + { + flags = DW_RARE; + ++p; + } + else if (*p == '+') /* addition */ + ++p; + + if (c != '+' && !spell_iswordc(p)) + { + EMSG2(_(e_invchar2), p); + len = 0; + break; + } - /* The region may be repeated: "-ca-uk". Fill - * "region" with the bit mask for the ones we find. */ - region = 0; - for (;;) - { - /* start of a region */ - r = find_region(rp, p); - if (r == REGION_ALL) - { - /* new region, add it */ - r = STRLEN(rp); - if (r >= 12) - { - EMSG2(_("E752: Too many regions in %s"), - fname); - r = REGION_ALL; - } - else - { - rp[r] = p[0]; - rp[r + 1] = p[1]; - rp[r + 2] = NUL; - r = 1 << (r / 2); - } - } - else - r = 1 << r; + /* Make sure there is room for the word. Folding case may + * double the size. */ + wlen = np - p; + if (bl == NULL || bl_used + sizeof(dword_T) + wlen +#ifdef FEAT_MBYTE + * (has_mbyte ? 2 : 1) +#endif + >= SBLOCKSIZE) + { + /* Allocate a block of memory to store the dword_T in. + * This is not freed until spell_reload() is called. */ + bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) + + SBLOCKSIZE)); + if (bl == NULL) + { + len = 0; + break; + } + bl->sb_next = load_lp->sl_block; + load_lp->sl_block = bl; + bl_used = 0; + } + dw = (dword_T *)(bl->sb_data + bl_used); - region |= r; - if (p[2] != '-') - { - if (p[2] != NUL) - EMSG2(_("E753: Invalid character in \"%s\""), - p - 1); - break; - } - p += 3; - } - } + /* For fold-case words fold the case and check for start + * with uppercase letter. */ + if (wi == &load_lp->sl_fwords) + { +#ifdef FEAT_MBYTE + if (MB_ISUPPER(mb_ptr2char(p))) +#else + if (MB_ISUPPER(*p)) +#endif + flags |= DW_CAP; + + /* Fold case. */ + (void)str_foldcase(p, np - p, dw->dw_word, wlen +#ifdef FEAT_MBYTE + * (has_mbyte ? 2 : 1) +#endif + + 1); +#ifdef FEAT_MBYTE + /* case folding may change length of word */ + wlen = STRLEN(dw->dw_word); +#endif } else { - /* add the word */ - if (c == '>') - c = region | RARE_MASK; - else + /* Keep case: copy the word as-is. */ + mch_memmove(dw->dw_word, p, wlen + 1); + } + + if (c == '+') + { + garray_T *gap = &wi->wi_add; + + /* Addition. TODO: search for matching entry? */ + if (wi->wi_addlen < wlen) + wi->wi_addlen = wlen; + if (ga_grow(gap, 1) == FAIL) { - if (c != ' ') - EMSG2(_("E753: Invalid character in \"%s\""), p); - c = region; + len = 0; + break; } -#ifdef FEAT_MBYTE - if (MB_ISUPPER(mb_ptr2char(p + 1))) -#else - if (MB_ISUPPER(p[1])) -#endif - c |= CASE_MASK; - *p++ = c; - (void)str_foldcase(p, np - p, word, MAXWLEN + 1); - n = STRLEN(word); - if (n > np - p) - { - sblock_T *s; + *(((dword_T **)gap->ga_data) + gap->ga_len) = dw; + ++gap->ga_len; + dw->dw_region = region; + dw->dw_flags = flags; + bl_used += sizeof(dword_T) + wlen; + } + else + { + /* + * Check for a non-word character. If found it's + * going to be an nword. + * For an nword we split in two: the leading dword and + * the remainder. The dword goes in the hashtable + * with an nword_T, the remainder is put in the + * dword_T (starting with the first non-word + * character). + */ + cc = NUL; + for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p)) + if (!spell_iswordc(p)) + { + cc = *p; + *p = NUL; + break; + } - /* Folding case made word longer! We need to allocate - * memory for it. */ - s = (sblock_T *)alloc((unsigned)sizeof(sblock_T) - + n + 1); - if (s != NULL) - { - s->sb_next = load_lp->sl_block; - load_lp->sl_block = s; - s->sb_data[0] = p[-1]; - p = s->sb_data + 1; - } - } - mch_memmove(p, word, n + 1); - - hash = hash_hash(p); - hi = hash_lookup(&load_lp->sl_ht, p, hash); + /* check if we already have this dword */ + hash = hash_hash(dw->dw_word); + hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash); if (!HASHITEM_EMPTY(hi)) { - c = hi->hi_key[-1]; - if ((c & (CASE_MASK | RARE_MASK)) - == (p[-1] & (CASE_MASK | RARE_MASK))) + /* Existing entry. */ + edw = HI2DWORD(hi); + if ((edw->dw_flags & (DW_CAP | DW_RARE)) + == (dw->dw_flags & (DW_CAP | DW_RARE))) { if (p_verbose > 0) smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"), - p, fname); + dw->dw_word, fname); + } + } + + if (cc != NUL) /* nword */ + { + if (HASHITEM_EMPTY(hi) + || (edw->dw_flags & DW_NWORD) == 0) + { + sblock_T *sb; + + /* Need to allocate a new nword_T. Put it in an + * sblock_T, so that we can free it later. */ + sb = (sblock_T *)alloc( + (unsigned)(sizeof(sblock_T) + + sizeof(nword_T) + wlen)); + if (sb == NULL) + { + len = 0; + break; + } + sb->sb_next = load_lp->sl_block; + load_lp->sl_block = sb; + nw = (nword_T *)sb->sb_data; + + ga_init2(&nw->nw_ga, sizeof(dword_T *), 4); + nw->nw_maxlen = 0; + STRCPY(nw->nw_word, dw->dw_word); + if (!HASHITEM_EMPTY(hi)) + { + /* Note: the nw_region and nw_flags is for + * the dword that matches with the start + * of this nword, not for the nword + * itself! */ + nw->nw_region = edw->dw_region; + nw->nw_flags = edw->dw_flags | DW_NWORD; + + /* Remove the dword item so that we can + * add it as an nword. */ + hash_remove(&wi->wi_ht, hi); + hi = hash_lookup(&wi->wi_ht, + nw->nw_word, hash); + } + else + { + nw->nw_region = 0; + nw->nw_flags = DW_NWORD; + } } else - hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK)); + nw = HI2NWORD(hi); + } + + if (HASHITEM_EMPTY(hi)) + { + /* Add new dword or nword entry. */ + hash_add_item(&wi->wi_ht, hi, cc == NUL + ? dw->dw_word : nw->nw_word, hash); + if (cc == NUL) + { + /* New dword: init the values and count the + * used space. */ + dw->dw_flags = DW_DWORD | flags; + dw->dw_region = region; + bl_used += sizeof(dword_T) + wlen; + } + } + else if (cc == NUL) + { + /* existing dword: add the region and flags */ + dw = edw; + dw->dw_region |= region; + dw->dw_flags |= DW_DWORD | flags; } - else - hash_add_item(&load_lp->sl_ht, hi, p, hash); + + if (cc != NUL) + { + /* Use the dword for the non-word character and + * following characters. */ + dw->dw_region = region; + dw->dw_flags = flags; + STRCPY(dw->dw_word + 1, p + 1); + dw->dw_word[0] = cc; + l = wlen - (p - dw->dw_word); + bl_used += sizeof(dword_T) + l; + if (nw->nw_maxlen < l) + nw->nw_maxlen = l; + + /* Add the dword to the growarray in the nword. */ + if (ga_grow(&nw->nw_ga, 1) == FAIL) + { + len = 0; + break; + } + *((dword_T **)nw->nw_ga.ga_data + nw->nw_ga.ga_len) + = dw; + ++nw->nw_ga.ga_len; + } } } - while (l > 0 && *np < ' ') - { + /* Skip over CR and NL characters and trailing white space. */ + while (np < rbuf_end && *np <= ' ') ++np; - --l; - } } } close(fd); +theend: + sourcing_name = save_sourcing_name; + sourcing_lnum = save_sourcing_lnum; + vim_free(rbuf); } /* @@ -672,15 +1049,19 @@ spell_reload() slang_T *lp; sblock_T *sp; + /* Initialize the table for spell_iswordc(). */ + init_spell_chartab(); + /* Unload all allocated memory. */ while (first_lang != NULL) { lp = first_lang; first_lang = lp->sl_next; - hash_clear(&lp->sl_ht); - ga_clear(&lp->sl_match); - ga_clear(&lp->sl_add); + hash_clear(&lp->sl_fwords.wi_ht); + ga_clear(&lp->sl_fwords.wi_add); + hash_clear(&lp->sl_kwords.wi_ht); + ga_clear(&lp->sl_kwords.wi_add); while (lp->sl_block != NULL) { sp = lp->sl_block;