# HG changeset patch # User vimboss # Date 1123660295 0 # Node ID bb99638a7b5fc834cbe7cdef9071be4513c7e7b8 # Parent 0a60be12e47e322c4dbae7b083321c60f599c0b8 updated for version 7.0126 diff --git a/src/spell.c b/src/spell.c --- a/src/spell.c +++ b/src/spell.c @@ -50,6 +50,16 @@ * See ":help develop-spell". */ +/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. + * Only use for small word lists! + * SPELL_COMPRESS_CNT is in how many words we compress the tree. */ +#if 0 +# define SPELL_PRINTTREE +# define SPELL_COMPRESS_CNT 1 +#else +# define SPELL_COMPRESS_CNT 1000000 +#endif + /* * Use this to adjust the score after finding suggestions, based on the * suggested word sounding like the bad word. This is much faster than doing @@ -719,6 +729,7 @@ static linenr_T apply_prefixes __ARGS((s static char *e_format = N_("E759: Format error in spell file"); static char *e_spell_trunc = N_("E758: Truncated spell file"); +static char *msg_compressing = N_("Compressing word tree..."); /* * Main spell-checking function. @@ -3106,7 +3117,7 @@ struct wordnode_S { union /* shared to save space */ { - char_u hashkey[6]; /* room for the hash key */ + char_u hashkey[6]; /* the hash key, only used while compressing */ int index; /* index in written nodes (valid after first round) */ } wn_u1; @@ -3118,11 +3129,16 @@ struct wordnode_S wordnode_T *wn_child; /* child (next byte in word) */ wordnode_T *wn_sibling; /* next sibling (alternate byte in word, always sorted) */ + int wn_refs; /* nr of references to this node */ char_u wn_byte; /* Byte for this node. NUL for word end */ - short_u wn_flags; /* when wn_byte is NUL: WF_ flags */ - short wn_region; /* when wn_byte is NUL: region mask; for + char_u wn_prefixID; /* when "wn_byte" is NUL: supported/required + prefix ID or 0 */ + short_u wn_flags; /* when "wn_byte" is NUL: WF_ flags */ + short wn_region; /* when "wn_byte" is NUL: region mask; for PREFIXTREE it's the prefcondnr */ - char_u wn_prefixID; /* supported/required prefix ID or 0 */ +#ifdef SPELL_PRINTTREE + int wn_nr; /* sequence nr for printing */ +#endif }; #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ @@ -3180,7 +3196,9 @@ static char_u *getroom_save __ARGS((sblo static void free_blocks __ARGS((sblock_T *bl)); static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp)); static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist)); -static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp)); +static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, spellinfo_T *spin)); +static wordnode_T *get_wordnode __ARGS((sblock_T **blp)); +static void free_wordnode __ARGS((wordnode_T *n)); static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin)); static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot)); static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); @@ -3195,6 +3213,109 @@ static void init_spellfile __ARGS((void) * Use a negative number with the lower 8 bits zero. */ #define PFX_FLAGS -256 +static int words_added = 0; /* number of words added to tree */ + +#ifdef SPELL_PRINTTREE +/* + * For debugging the tree code: print the current tree in a (more or less) + * readable format, so that we can see what happens when adding a word and/or + * compressing the tree. + * Based on code from Olaf Seibert. + */ +#define PRINTLINESIZE 1000 +#define PRINTWIDTH 6 + +#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ + PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) + +static char line1[PRINTLINESIZE]; +static char line2[PRINTLINESIZE]; +static char line3[PRINTLINESIZE]; + + static void +spell_clear_flags(wordnode_T *node) +{ + wordnode_T *np; + + for (np = node; np != NULL; np = np->wn_sibling) + { + np->wn_u1.index = FALSE; + spell_clear_flags(np->wn_child); + } +} + + static void +spell_print_node(wordnode_T *node, int depth) +{ + if (node->wn_u1.index) + { + /* Done this node before, print the reference. */ + PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); + PRINTSOME(line2, depth, " ", 0, 0); + PRINTSOME(line3, depth, " ", 0, 0); + msg(line1); + msg(line2); + msg(line3); + } + else + { + node->wn_u1.index = TRUE; + + if (node->wn_byte != NUL) + { + if (node->wn_child != NULL) + PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); + else + /* Cannot happen? */ + PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); + } + else + PRINTSOME(line1, depth, " $ ", 0, 0); + + PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); + + if (node->wn_sibling != NULL) + PRINTSOME(line3, depth, " | ", 0, 0); + else + PRINTSOME(line3, depth, " ", 0, 0); + + if (node->wn_byte == NUL) + { + msg(line1); + msg(line2); + msg(line3); + } + + /* do the children */ + if (node->wn_byte != NUL && node->wn_child != NULL) + spell_print_node(node->wn_child, depth + 1); + + /* do the siblings */ + if (node->wn_sibling != NULL) + { + /* get rid of all parent details except | */ + STRCPY(line1, line3); + STRCPY(line2, line3); + spell_print_node(node->wn_sibling, depth); + } + } +} + + static void +spell_print_tree(wordnode_T *root) +{ + if (root != NULL) + { + /* Clear the "wn_u1.index" fields, used to remember what has been + * done. */ + spell_clear_flags(root); + + /* Recursively print the tree. */ + spell_print_node(root, 0); + } +} +#endif /* SPELL_PRINTTREE */ + /* * Read the affix file "fname". * Returns an afffile_T, NULL for complete failure. @@ -3611,7 +3732,7 @@ spell_read_aff(fname, spin) if (upper) n |= WFP_UP; tree_add_word(p, spin->si_prefroot, n, - idx, cur_aff->ah_newID, &spin->si_blocks); + idx, cur_aff->ah_newID, spin); } } } @@ -4583,7 +4704,7 @@ store_word(word, spin, flags, region, pf for (p = pfxlist; res == OK; ++p) { res = tree_add_word(foldword, spin->si_foldroot, ct | flags, - region, p == NULL ? 0 : *p, &spin->si_blocks); + region, p == NULL ? 0 : *p, spin); if (p == NULL || *p == NUL) break; } @@ -4594,7 +4715,7 @@ store_word(word, spin, flags, region, pf for (p = pfxlist; res == OK; ++p) { res = tree_add_word(word, spin->si_keeproot, flags, - region, p == NULL ? 0 : *p, &spin->si_blocks); + region, p == NULL ? 0 : *p, spin); if (p == NULL || *p == NUL) break; } @@ -4610,27 +4731,63 @@ store_word(word, spin, flags, region, pf * Returns FAIL when out of memory. */ static int -tree_add_word(word, root, flags, region, prefixID, blp) +tree_add_word(word, root, flags, region, prefixID, spin) char_u *word; wordnode_T *root; int flags; int region; int prefixID; - sblock_T **blp; + spellinfo_T *spin; { wordnode_T *node = root; wordnode_T *np; + wordnode_T *copyp, **copyprev; wordnode_T **prev = NULL; int i; + sblock_T **blp = &spin->si_blocks; /* Add each byte of the word to the tree, including the NUL at the end. */ for (i = 0; ; ++i) { + /* When there is more than one reference to this node we need to make + * a copy, so that we can modify it. Copy the whole list of siblings + * (we don't optimize for a partly shared list of siblings). */ + if (node != NULL && node->wn_refs > 1) + { + --node->wn_refs; + copyprev = prev; + for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) + { + /* Allocate a new node and copy the info. */ + np = get_wordnode(blp); + if (np == NULL) + return FAIL; + np->wn_child = copyp->wn_child; + if (np->wn_child != NULL) + ++np->wn_child->wn_refs; /* child gets extra ref */ + np->wn_byte = copyp->wn_byte; + if (np->wn_byte == NUL) + { + np->wn_flags = copyp->wn_flags; + np->wn_region = copyp->wn_region; + np->wn_prefixID = copyp->wn_prefixID; + } + + /* Link the new node in the list, there will be one ref. */ + np->wn_refs = 1; + *copyprev = np; + copyprev = &np->wn_sibling; + + /* Let "node" point to the head of the copied list. */ + if (copyp == node) + node = np; + } + } + /* Look for the sibling that has the same character. They are sorted * on byte value, thus stop searching when a sibling is found with a * higher byte value. For zero bytes (end of word) the sorting is - * done on flags and then on prefixID - */ + * done on flags and then on prefixID. */ while (node != NULL && (node->wn_byte < word[i] || (node->wn_byte == NUL @@ -4651,10 +4808,22 @@ tree_add_word(word, root, flags, region, || node->wn_prefixID != prefixID))) { /* Allocate a new node. */ - np = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE); + np = get_wordnode(blp); if (np == NULL) return FAIL; np->wn_byte = word[i]; + + /* If "node" is NULL this is a new child or the end of the sibling + * list: ref count is one. Otherwise use ref count of sibling and + * make ref count of sibling one (matters when inserting in front + * of the list of siblings). */ + if (node == NULL) + np->wn_refs = 1; + else + { + np->wn_refs = node->wn_refs; + node->wn_refs = 1; + } *prev = np; np->wn_sibling = node; node = np; @@ -4670,10 +4839,73 @@ tree_add_word(word, root, flags, region, prev = &node->wn_child; node = *prev; } +#ifdef SPELL_PRINTTREE + smsg("Added \"%s\"", word); + spell_print_tree(root->wn_sibling); +#endif + + /* + * Every so many words compress the tree, so that we don't use too much + * memory. + */ + if (++words_added >= SPELL_COMPRESS_CNT) + { + words_added = 0; + + msg_start(); + msg_puts((char_u *)_(msg_compressing)); + msg_clr_eos(); + msg_didout = FALSE; + msg_col = 0; + out_flush(); + wordtree_compress(root->wn_sibling, spin); + } return OK; } +/* We keep a list of nodes that have been freed during compression. They are + * re-used when adding a new node. The "wn_child" fields links them. */ +static wordnode_T *first_free_node = NULL; +#ifdef SPELL_PRINTTREE +static int wordnode_nr = 0; +#endif + +/* + * Get a wordnode_T, either from the list of previously freed nodes or + * allocate a new one. + */ + static wordnode_T * +get_wordnode(blp) + sblock_T **blp; +{ + wordnode_T *n; + + if (first_free_node == NULL) + n = (wordnode_T *)getroom(blp, sizeof(wordnode_T), TRUE); + else + { + n = first_free_node; + first_free_node = n->wn_child; + vim_memset(n, 0, sizeof(wordnode_T)); + } +#ifdef SPELL_PRINTTREE + n->wn_nr = ++wordnode_nr; +#endif + return n; +} + +/* + * Free a wordnode_T for re-use later. + */ + static void +free_wordnode(n) + wordnode_T *n; +{ + n->wn_child = first_free_node; + first_free_node = n; +} + /* * Compress a tree: find tails that are identical and can be shared. */ @@ -4685,20 +4917,31 @@ wordtree_compress(root, spin) hashtab_T ht; int n; int tot = 0; + int perc; if (root != NULL) { hash_init(&ht); n = node_compress(root, &ht, &tot); + +#ifndef SPELL_PRINTTREE if (spin->si_verbose || p_verbose > 2) +#endif { if (!spin->si_verbose) verbose_enter(); + if (tot > 1000000) + perc = (tot - n) / (tot / 100); + else + perc = (tot - n) * 100 / tot; smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"), - n, tot, (tot - n) * 100 / tot); + n, tot, perc); if (p_verbose > 2) verbose_leave(); } +#ifdef SPELL_PRINTTREE + spell_print_tree(root); +#endif hash_clear(&ht); } } @@ -4749,8 +4992,17 @@ node_compress(node, ht, tot) if (node_equal(child, tp)) { /* Found one! Now use that child in place of the - * current one. This means the current child is - * dropped from the tree. */ + * current one. This means the current child and all + * its siblings is unlinked from the tree. */ + ++tp->wn_refs; + --child->wn_refs; + if (child->wn_refs == 0) + for (; child != NULL; child = child->wn_sibling) + { + if (child->wn_child != NULL) + --child->wn_child->wn_refs; + free_wordnode(child); + } np->wn_child = tp; ++compressed; break; @@ -5023,11 +5275,11 @@ write_vim_spell(fname, spin) for (round = 1; round <= 3; ++round) { if (round == 1) - tree = spin->si_foldroot; + tree = spin->si_foldroot->wn_sibling; else if (round == 2) - tree = spin->si_keeproot; + tree = spin->si_keeproot->wn_sibling; else - tree = spin->si_prefroot; + tree = spin->si_prefroot->wn_sibling; /* Clear the index and wnode fields in the tree. */ clear_node(tree); @@ -5271,6 +5523,11 @@ mkspell(fcount, fnames, ascii, overwrite ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); ga_init2(&spin.si_map, (int)sizeof(char_u), 100); ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); + first_free_node = NULL; +#ifdef SPELL_PRINTTREE + wordnode_nr = 0; +#endif + words_added = 0; /* default: fnames[0] is output file, following are input files */ innames = &fnames[1]; @@ -5366,7 +5623,7 @@ mkspell(fcount, fnames, ascii, overwrite || spin.si_keeproot == NULL || spin.si_prefroot == NULL) { - error = TRUE; + free_blocks(spin.si_blocks); return; } @@ -5422,27 +5679,20 @@ mkspell(fcount, fnames, ascii, overwrite if (!error) { /* - * Remove the dummy NUL from the start of the tree root. - */ - spin.si_foldroot = spin.si_foldroot->wn_sibling; - spin.si_keeproot = spin.si_keeproot->wn_sibling; - spin.si_prefroot = spin.si_prefroot->wn_sibling; - - /* * Combine tails in the tree. */ if (!added_word || p_verbose > 2) { if (added_word) verbose_enter(); - MSG(_("Compressing word tree...")); + MSG(_(msg_compressing)); out_flush(); if (added_word) verbose_leave(); } - wordtree_compress(spin.si_foldroot, &spin); - wordtree_compress(spin.si_keeproot, &spin); - wordtree_compress(spin.si_prefroot, &spin); + wordtree_compress(spin.si_foldroot->wn_sibling, &spin); + wordtree_compress(spin.si_keeproot->wn_sibling, &spin); + wordtree_compress(spin.si_prefroot->wn_sibling, &spin); } if (!error)