comparison src/spellfile.c @ 20683:3a9dcfe62691 v8.2.0895

patch 8.2.0895: :mkspell output does not mention the tree type Commit: https://github.com/vim/vim/commit/408c23b0794540ee3c568a1569f21406c5ed3ab8 Author: Bram Moolenaar <Bram@vim.org> Date: Wed Jun 3 22:15:45 2020 +0200 patch 8.2.0895: :mkspell output does not mention the tree type Problem: :mkspell output does not mention the tree type. Solution: Back out increasing the limits, it has no effect. Mention the tree being compressed. Only give a message once per second.
author Bram Moolenaar <Bram@vim.org>
date Wed, 03 Jun 2020 22:30:04 +0200
parents a9f2cd2933ef
children 969d389a2e36
comparison
equal deleted inserted replaced
20682:162d37f5c551 20683:3a9dcfe62691
1992 static char_u *getroom_save(spellinfo_T *spin, char_u *s); 1992 static char_u *getroom_save(spellinfo_T *spin, char_u *s);
1993 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); 1993 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
1994 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); 1994 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
1995 static wordnode_T *get_wordnode(spellinfo_T *spin); 1995 static wordnode_T *get_wordnode(spellinfo_T *spin);
1996 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); 1996 static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
1997 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); 1997 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name);
1998 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot); 1998 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
1999 static int node_equal(wordnode_T *n1, wordnode_T *n2); 1999 static int node_equal(wordnode_T *n1, wordnode_T *n2);
2000 static void clear_node(wordnode_T *node); 2000 static void clear_node(wordnode_T *node);
2001 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); 2001 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
2002 static int sug_filltree(spellinfo_T *spin, slang_T *slang); 2002 static int sug_filltree(spellinfo_T *spin, slang_T *slang);
2023 * 'mkspellmem' option. 2023 * 'mkspellmem' option.
2024 */ 2024 */
2025 static long compress_start = 30000; // memory / SBLOCKSIZE 2025 static long compress_start = 30000; // memory / SBLOCKSIZE
2026 static long compress_inc = 100; // memory / SBLOCKSIZE 2026 static long compress_inc = 100; // memory / SBLOCKSIZE
2027 static long compress_added = 500000; // word count 2027 static long compress_added = 500000; // word count
2028
2029 // Actually used values. These can change if compression doesn't result in
2030 // reducing the size.
2031 static long used_compress_inc;
2032 static long used_compress_added;
2033 2028
2034 /* 2029 /*
2035 * Check the 'mkspellmem' option. Return FAIL if it's wrong. 2030 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
2036 * Sets "sps_flags". 2031 * Sets "sps_flags".
2037 */ 2032 */
3504 int non_ascii = 0; 3499 int non_ascii = 0;
3505 int retval = OK; 3500 int retval = OK;
3506 char_u message[MAXLINELEN + MAXWLEN]; 3501 char_u message[MAXLINELEN + MAXWLEN];
3507 int flags; 3502 int flags;
3508 int duplicate = 0; 3503 int duplicate = 0;
3504 time_T last_msg_time = 0;
3509 3505
3510 /* 3506 /*
3511 * Open the file. 3507 * Open the file.
3512 */ 3508 */
3513 fd = mch_fopen((char *)fname, "r"); 3509 fd = mch_fopen((char *)fname, "r");
3592 ++non_ascii; 3588 ++non_ascii;
3593 vim_free(pc); 3589 vim_free(pc);
3594 continue; 3590 continue;
3595 } 3591 }
3596 3592
3597 // This takes time, print a message every 10000 words. 3593 // This takes time, print a message every 10000 words, but not more
3594 // often than once per second.
3598 if (spin->si_verbose && spin->si_msg_count > 10000) 3595 if (spin->si_verbose && spin->si_msg_count > 10000)
3599 { 3596 {
3600 spin->si_msg_count = 0; 3597 spin->si_msg_count = 0;
3601 vim_snprintf((char *)message, sizeof(message), 3598 if (vim_time() > last_msg_time)
3602 _("line %6d, word %6ld - %s"), 3599 {
3603 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3600 last_msg_time = vim_time();
3604 msg_start(); 3601 vim_snprintf((char *)message, sizeof(message),
3605 msg_outtrans_long_attr(message, 0); 3602 _("line %6d, word %6ld - %s"),
3606 msg_clr_eos(); 3603 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3607 msg_didout = FALSE; 3604 msg_start();
3608 msg_col = 0; 3605 msg_outtrans_long_attr(message, 0);
3609 out_flush(); 3606 msg_clr_eos();
3607 msg_didout = FALSE;
3608 msg_col = 0;
3609 out_flush();
3610 }
3610 } 3611 }
3611 3612
3612 // Store the word in the hashtable to be able to find duplicates. 3613 // Store the word in the hashtable to be able to find duplicates.
3613 dw = (char_u *)getroom_save(spin, w); 3614 dw = (char_u *)getroom_save(spin, w);
3614 if (dw == NULL) 3615 if (dw == NULL)
4538 4539
4539 if (spin->si_compress_cnt > 1) 4540 if (spin->si_compress_cnt > 1)
4540 { 4541 {
4541 if (--spin->si_compress_cnt == 1) 4542 if (--spin->si_compress_cnt == 1)
4542 // Did enough words to lower the block count limit. 4543 // Did enough words to lower the block count limit.
4543 spin->si_blocks_cnt += used_compress_inc; 4544 spin->si_blocks_cnt += compress_inc;
4544 } 4545 }
4545 4546
4546 /* 4547 /*
4547 * When we have allocated lots of memory we need to compress the word tree 4548 * When we have allocated lots of memory we need to compress the word tree
4548 * to free up some room. But compression is slow, and we might actually 4549 * to free up some room. But compression is slow, and we might actually
4549 * need that room, thus only compress in the following situations: 4550 * need that room, thus only compress in the following situations:
4550 * 1. When not compressed before (si_compress_cnt == 0): when using 4551 * 1. When not compressed before (si_compress_cnt == 0): when using
4551 * "compress_start" blocks. 4552 * "compress_start" blocks.
4552 * 2. When compressed before and used "used_compress_inc" blocks before 4553 * 2. When compressed before and used "compress_inc" blocks before
4553 * adding "used_compress_added" words (si_compress_cnt > 1). 4554 * adding "compress_added" words (si_compress_cnt > 1).
4554 * 3. When compressed before, added "used_compress_added" words 4555 * 3. When compressed before, added "compress_added" words
4555 * (si_compress_cnt == 1) and the number of free nodes drops below the 4556 * (si_compress_cnt == 1) and the number of free nodes drops below the
4556 * maximum word length. 4557 * maximum word length.
4557 */ 4558 */
4558 #ifndef SPELL_COMPRESS_ALLWAYS 4559 #ifndef SPELL_COMPRESS_ALLWAYS
4559 if (spin->si_compress_cnt == 1 4560 if (spin->si_compress_cnt == 1
4560 ? spin->si_free_count < MAXWLEN 4561 ? spin->si_free_count < MAXWLEN
4561 : spin->si_blocks_cnt >= compress_start) 4562 : spin->si_blocks_cnt >= compress_start)
4562 #endif 4563 #endif
4563 { 4564 {
4564 // Decrement the block counter. The effect is that we compress again 4565 // Decrement the block counter. The effect is that we compress again
4565 // when the freed up room has been used and another "used_compress_inc" 4566 // when the freed up room has been used and another "compress_inc"
4566 // blocks have been allocated. Unless "used_compress_added" words have 4567 // blocks have been allocated. Unless "compress_added" words have
4567 // been added, then the limit is put back again. 4568 // been added, then the limit is put back again.
4568 spin->si_blocks_cnt -= used_compress_inc; 4569 spin->si_blocks_cnt -= compress_inc;
4569 spin->si_compress_cnt = used_compress_added; 4570 spin->si_compress_cnt = compress_added;
4570 4571
4571 if (spin->si_verbose) 4572 if (spin->si_verbose)
4572 { 4573 {
4573 msg_start(); 4574 msg_start();
4574 msg_puts(_(msg_compressing)); 4575 msg_puts(_(msg_compressing));
4580 4581
4581 // Compress both trees. Either they both have many nodes, which makes 4582 // Compress both trees. Either they both have many nodes, which makes
4582 // compression useful, or one of them is small, which means 4583 // compression useful, or one of them is small, which means
4583 // compression goes fast. But when filling the soundfold word tree 4584 // compression goes fast. But when filling the soundfold word tree
4584 // there is no keep-case tree. 4585 // there is no keep-case tree.
4585 wordtree_compress(spin, spin->si_foldroot); 4586 wordtree_compress(spin, spin->si_foldroot, "case-folded");
4586 if (affixID >= 0) 4587 if (affixID >= 0)
4587 wordtree_compress(spin, spin->si_keeproot); 4588 wordtree_compress(spin, spin->si_keeproot, "keep-case");
4588 } 4589 }
4589 4590
4590 return OK; 4591 return OK;
4591 } 4592 }
4592 4593
4656 4657
4657 /* 4658 /*
4658 * Compress a tree: find tails that are identical and can be shared. 4659 * Compress a tree: find tails that are identical and can be shared.
4659 */ 4660 */
4660 static void 4661 static void
4661 wordtree_compress(spellinfo_T *spin, wordnode_T *root) 4662 wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name)
4662 { 4663 {
4663 hashtab_T ht; 4664 hashtab_T ht;
4664 long n; 4665 long n;
4665 long tot = 0; 4666 long tot = 0;
4666 long perc; 4667 long perc;
4669 // start of the tree. 4670 // start of the tree.
4670 if (root->wn_sibling != NULL) 4671 if (root->wn_sibling != NULL)
4671 { 4672 {
4672 hash_init(&ht); 4673 hash_init(&ht);
4673 n = node_compress(spin, root->wn_sibling, &ht, &tot); 4674 n = node_compress(spin, root->wn_sibling, &ht, &tot);
4674
4675 if (tot == 0)
4676 {
4677 // Compression did not have effect. Increase the limits by 20% to
4678 // avoid wasting time on compression, memory will be used anyway.
4679 used_compress_inc += used_compress_inc / 5;
4680 used_compress_added += used_compress_added / 5;
4681 }
4682 4675
4683 #ifndef SPELL_PRINTTREE 4676 #ifndef SPELL_PRINTTREE
4684 if (spin->si_verbose || p_verbose > 2) 4677 if (spin->si_verbose || p_verbose > 2)
4685 #endif 4678 #endif
4686 { 4679 {
4689 else if (tot == 0) 4682 else if (tot == 0)
4690 perc = 0; 4683 perc = 0;
4691 else 4684 else
4692 perc = (tot - n) * 100 / tot; 4685 perc = (tot - n) * 100 / tot;
4693 vim_snprintf((char *)IObuff, IOSIZE, 4686 vim_snprintf((char *)IObuff, IOSIZE,
4694 _("Compressed %ld of %ld nodes; %ld (%ld%%) remaining"), 4687 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"),
4695 n, tot, tot - n, perc); 4688 name, n, tot, tot - n, perc);
4696 spell_message(spin, IObuff); 4689 spell_message(spin, IObuff);
4697 } 4690 }
4698 #ifdef SPELL_PRINTTREE 4691 #ifdef SPELL_PRINTTREE
4699 spell_print_tree(root->wn_sibling); 4692 spell_print_tree(root->wn_sibling);
4700 #endif 4693 #endif
4802 n = (nr >> 24) & 0xff; 4795 n = (nr >> 24) & 0xff;
4803 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; 4796 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4804 node->wn_u1.hashkey[5] = NUL; 4797 node->wn_u1.hashkey[5] = NUL;
4805 4798
4806 // Check for CTRL-C pressed now and then. 4799 // Check for CTRL-C pressed now and then.
4807 fast_breakcheck(); 4800 veryfast_breakcheck();
4808 4801
4809 return compressed; 4802 return compressed;
4810 } 4803 }
4811 4804
4812 /* 4805 /*
5511 5504
5512 /* 5505 /*
5513 * Compress the soundfold trie. 5506 * Compress the soundfold trie.
5514 */ 5507 */
5515 spell_message(spin, (char_u *)_(msg_compressing)); 5508 spell_message(spin, (char_u *)_(msg_compressing));
5516 wordtree_compress(spin, spin->si_foldroot); 5509 wordtree_compress(spin, spin->si_foldroot, "case-folded");
5517 5510
5518 /* 5511 /*
5519 * Write the .sug file. 5512 * Write the .sug file.
5520 * Make the file name by changing ".spl" to ".sug". 5513 * Make the file name by changing ".spl" to ".sug".
5521 */ 5514 */
5911 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); 5904 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
5912 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); 5905 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
5913 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); 5906 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
5914 hash_init(&spin.si_commonwords); 5907 hash_init(&spin.si_commonwords);
5915 spin.si_newcompID = 127; // start compound ID at first maximum 5908 spin.si_newcompID = 127; // start compound ID at first maximum
5916 used_compress_inc = compress_inc;
5917 used_compress_added = compress_added;
5918 5909
5919 // default: fnames[0] is output file, following are input files 5910 // default: fnames[0] is output file, following are input files
5920 innames = &fnames[1]; 5911 innames = &fnames[1];
5921 incount = fcount - 1; 5912 incount = fcount - 1;
5922 5913
6076 { 6067 {
6077 /* 6068 /*
6078 * Combine tails in the tree. 6069 * Combine tails in the tree.
6079 */ 6070 */
6080 spell_message(&spin, (char_u *)_(msg_compressing)); 6071 spell_message(&spin, (char_u *)_(msg_compressing));
6081 wordtree_compress(&spin, spin.si_foldroot); 6072 wordtree_compress(&spin, spin.si_foldroot, "case-folded");
6082 wordtree_compress(&spin, spin.si_keeproot); 6073 wordtree_compress(&spin, spin.si_keeproot, "keep-case");
6083 wordtree_compress(&spin, spin.si_prefroot); 6074 wordtree_compress(&spin, spin.si_prefroot, "prefixes");
6084 } 6075 }
6085 6076
6086 if (!error && !got_int) 6077 if (!error && !got_int)
6087 { 6078 {
6088 /* 6079 /*
6673 lp->sl_map_array[c] = headc; 6664 lp->sl_map_array[c] = headc;
6674 } 6665 }
6675 } 6666 }
6676 } 6667 }
6677 6668
6678
6679 #endif // FEAT_SPELL 6669 #endif // FEAT_SPELL