Mercurial > vim
comparison src/spellfile.c @ 20683:3a9dcfe62691 v8.2.0895
patch 8.2.0895: :mkspell output does not mention the tree type
Commit: https://github.com/vim/vim/commit/408c23b0794540ee3c568a1569f21406c5ed3ab8
Author: Bram Moolenaar <Bram@vim.org>
Date: Wed Jun 3 22:15:45 2020 +0200
patch 8.2.0895: :mkspell output does not mention the tree type
Problem: :mkspell output does not mention the tree type.
Solution: Back out increasing the limits, it has no effect. Mention the
tree being compressed. Only give a message once per second.
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Wed, 03 Jun 2020 22:30:04 +0200 |
parents | a9f2cd2933ef |
children | 969d389a2e36 |
comparison
equal
deleted
inserted
replaced
20682:162d37f5c551 | 20683:3a9dcfe62691 |
---|---|
1992 static char_u *getroom_save(spellinfo_T *spin, char_u *s); | 1992 static char_u *getroom_save(spellinfo_T *spin, char_u *s); |
1993 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); | 1993 static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix); |
1994 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); | 1994 static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID); |
1995 static wordnode_T *get_wordnode(spellinfo_T *spin); | 1995 static wordnode_T *get_wordnode(spellinfo_T *spin); |
1996 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); | 1996 static void free_wordnode(spellinfo_T *spin, wordnode_T *n); |
1997 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root); | 1997 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name); |
1998 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot); | 1998 static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot); |
1999 static int node_equal(wordnode_T *n1, wordnode_T *n2); | 1999 static int node_equal(wordnode_T *n1, wordnode_T *n2); |
2000 static void clear_node(wordnode_T *node); | 2000 static void clear_node(wordnode_T *node); |
2001 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); | 2001 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree); |
2002 static int sug_filltree(spellinfo_T *spin, slang_T *slang); | 2002 static int sug_filltree(spellinfo_T *spin, slang_T *slang); |
2023 * 'mkspellmem' option. | 2023 * 'mkspellmem' option. |
2024 */ | 2024 */ |
2025 static long compress_start = 30000; // memory / SBLOCKSIZE | 2025 static long compress_start = 30000; // memory / SBLOCKSIZE |
2026 static long compress_inc = 100; // memory / SBLOCKSIZE | 2026 static long compress_inc = 100; // memory / SBLOCKSIZE |
2027 static long compress_added = 500000; // word count | 2027 static long compress_added = 500000; // word count |
2028 | |
2029 // Actually used values. These can change if compression doesn't result in | |
2030 // reducing the size. | |
2031 static long used_compress_inc; | |
2032 static long used_compress_added; | |
2033 | 2028 |
2034 /* | 2029 /* |
2035 * Check the 'mkspellmem' option. Return FAIL if it's wrong. | 2030 * Check the 'mkspellmem' option. Return FAIL if it's wrong. |
2036 * Sets "sps_flags". | 2031 * Sets "sps_flags". |
2037 */ | 2032 */ |
3504 int non_ascii = 0; | 3499 int non_ascii = 0; |
3505 int retval = OK; | 3500 int retval = OK; |
3506 char_u message[MAXLINELEN + MAXWLEN]; | 3501 char_u message[MAXLINELEN + MAXWLEN]; |
3507 int flags; | 3502 int flags; |
3508 int duplicate = 0; | 3503 int duplicate = 0; |
3504 time_T last_msg_time = 0; | |
3509 | 3505 |
3510 /* | 3506 /* |
3511 * Open the file. | 3507 * Open the file. |
3512 */ | 3508 */ |
3513 fd = mch_fopen((char *)fname, "r"); | 3509 fd = mch_fopen((char *)fname, "r"); |
3592 ++non_ascii; | 3588 ++non_ascii; |
3593 vim_free(pc); | 3589 vim_free(pc); |
3594 continue; | 3590 continue; |
3595 } | 3591 } |
3596 | 3592 |
3597 // This takes time, print a message every 10000 words. | 3593 // This takes time, print a message every 10000 words, but not more |
3594 // often than once per second. | |
3598 if (spin->si_verbose && spin->si_msg_count > 10000) | 3595 if (spin->si_verbose && spin->si_msg_count > 10000) |
3599 { | 3596 { |
3600 spin->si_msg_count = 0; | 3597 spin->si_msg_count = 0; |
3601 vim_snprintf((char *)message, sizeof(message), | 3598 if (vim_time() > last_msg_time) |
3602 _("line %6d, word %6ld - %s"), | 3599 { |
3603 lnum, spin->si_foldwcount + spin->si_keepwcount, w); | 3600 last_msg_time = vim_time(); |
3604 msg_start(); | 3601 vim_snprintf((char *)message, sizeof(message), |
3605 msg_outtrans_long_attr(message, 0); | 3602 _("line %6d, word %6ld - %s"), |
3606 msg_clr_eos(); | 3603 lnum, spin->si_foldwcount + spin->si_keepwcount, w); |
3607 msg_didout = FALSE; | 3604 msg_start(); |
3608 msg_col = 0; | 3605 msg_outtrans_long_attr(message, 0); |
3609 out_flush(); | 3606 msg_clr_eos(); |
3607 msg_didout = FALSE; | |
3608 msg_col = 0; | |
3609 out_flush(); | |
3610 } | |
3610 } | 3611 } |
3611 | 3612 |
3612 // Store the word in the hashtable to be able to find duplicates. | 3613 // Store the word in the hashtable to be able to find duplicates. |
3613 dw = (char_u *)getroom_save(spin, w); | 3614 dw = (char_u *)getroom_save(spin, w); |
3614 if (dw == NULL) | 3615 if (dw == NULL) |
4538 | 4539 |
4539 if (spin->si_compress_cnt > 1) | 4540 if (spin->si_compress_cnt > 1) |
4540 { | 4541 { |
4541 if (--spin->si_compress_cnt == 1) | 4542 if (--spin->si_compress_cnt == 1) |
4542 // Did enough words to lower the block count limit. | 4543 // Did enough words to lower the block count limit. |
4543 spin->si_blocks_cnt += used_compress_inc; | 4544 spin->si_blocks_cnt += compress_inc; |
4544 } | 4545 } |
4545 | 4546 |
4546 /* | 4547 /* |
4547 * When we have allocated lots of memory we need to compress the word tree | 4548 * When we have allocated lots of memory we need to compress the word tree |
4548 * to free up some room. But compression is slow, and we might actually | 4549 * to free up some room. But compression is slow, and we might actually |
4549 * need that room, thus only compress in the following situations: | 4550 * need that room, thus only compress in the following situations: |
4550 * 1. When not compressed before (si_compress_cnt == 0): when using | 4551 * 1. When not compressed before (si_compress_cnt == 0): when using |
4551 * "compress_start" blocks. | 4552 * "compress_start" blocks. |
4552 * 2. When compressed before and used "used_compress_inc" blocks before | 4553 * 2. When compressed before and used "compress_inc" blocks before |
4553 * adding "used_compress_added" words (si_compress_cnt > 1). | 4554 * adding "compress_added" words (si_compress_cnt > 1). |
4554 * 3. When compressed before, added "used_compress_added" words | 4555 * 3. When compressed before, added "compress_added" words |
4555 * (si_compress_cnt == 1) and the number of free nodes drops below the | 4556 * (si_compress_cnt == 1) and the number of free nodes drops below the |
4556 * maximum word length. | 4557 * maximum word length. |
4557 */ | 4558 */ |
4558 #ifndef SPELL_COMPRESS_ALLWAYS | 4559 #ifndef SPELL_COMPRESS_ALLWAYS |
4559 if (spin->si_compress_cnt == 1 | 4560 if (spin->si_compress_cnt == 1 |
4560 ? spin->si_free_count < MAXWLEN | 4561 ? spin->si_free_count < MAXWLEN |
4561 : spin->si_blocks_cnt >= compress_start) | 4562 : spin->si_blocks_cnt >= compress_start) |
4562 #endif | 4563 #endif |
4563 { | 4564 { |
4564 // Decrement the block counter. The effect is that we compress again | 4565 // Decrement the block counter. The effect is that we compress again |
4565 // when the freed up room has been used and another "used_compress_inc" | 4566 // when the freed up room has been used and another "compress_inc" |
4566 // blocks have been allocated. Unless "used_compress_added" words have | 4567 // blocks have been allocated. Unless "compress_added" words have |
4567 // been added, then the limit is put back again. | 4568 // been added, then the limit is put back again. |
4568 spin->si_blocks_cnt -= used_compress_inc; | 4569 spin->si_blocks_cnt -= compress_inc; |
4569 spin->si_compress_cnt = used_compress_added; | 4570 spin->si_compress_cnt = compress_added; |
4570 | 4571 |
4571 if (spin->si_verbose) | 4572 if (spin->si_verbose) |
4572 { | 4573 { |
4573 msg_start(); | 4574 msg_start(); |
4574 msg_puts(_(msg_compressing)); | 4575 msg_puts(_(msg_compressing)); |
4580 | 4581 |
4581 // Compress both trees. Either they both have many nodes, which makes | 4582 // Compress both trees. Either they both have many nodes, which makes |
4582 // compression useful, or one of them is small, which means | 4583 // compression useful, or one of them is small, which means |
4583 // compression goes fast. But when filling the soundfold word tree | 4584 // compression goes fast. But when filling the soundfold word tree |
4584 // there is no keep-case tree. | 4585 // there is no keep-case tree. |
4585 wordtree_compress(spin, spin->si_foldroot); | 4586 wordtree_compress(spin, spin->si_foldroot, "case-folded"); |
4586 if (affixID >= 0) | 4587 if (affixID >= 0) |
4587 wordtree_compress(spin, spin->si_keeproot); | 4588 wordtree_compress(spin, spin->si_keeproot, "keep-case"); |
4588 } | 4589 } |
4589 | 4590 |
4590 return OK; | 4591 return OK; |
4591 } | 4592 } |
4592 | 4593 |
4656 | 4657 |
4657 /* | 4658 /* |
4658 * Compress a tree: find tails that are identical and can be shared. | 4659 * Compress a tree: find tails that are identical and can be shared. |
4659 */ | 4660 */ |
4660 static void | 4661 static void |
4661 wordtree_compress(spellinfo_T *spin, wordnode_T *root) | 4662 wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name) |
4662 { | 4663 { |
4663 hashtab_T ht; | 4664 hashtab_T ht; |
4664 long n; | 4665 long n; |
4665 long tot = 0; | 4666 long tot = 0; |
4666 long perc; | 4667 long perc; |
4669 // start of the tree. | 4670 // start of the tree. |
4670 if (root->wn_sibling != NULL) | 4671 if (root->wn_sibling != NULL) |
4671 { | 4672 { |
4672 hash_init(&ht); | 4673 hash_init(&ht); |
4673 n = node_compress(spin, root->wn_sibling, &ht, &tot); | 4674 n = node_compress(spin, root->wn_sibling, &ht, &tot); |
4674 | |
4675 if (tot == 0) | |
4676 { | |
4677 // Compression did not have effect. Increase the limits by 20% to | |
4678 // avoid wasting time on compression, memory will be used anyway. | |
4679 used_compress_inc += used_compress_inc / 5; | |
4680 used_compress_added += used_compress_added / 5; | |
4681 } | |
4682 | 4675 |
4683 #ifndef SPELL_PRINTTREE | 4676 #ifndef SPELL_PRINTTREE |
4684 if (spin->si_verbose || p_verbose > 2) | 4677 if (spin->si_verbose || p_verbose > 2) |
4685 #endif | 4678 #endif |
4686 { | 4679 { |
4689 else if (tot == 0) | 4682 else if (tot == 0) |
4690 perc = 0; | 4683 perc = 0; |
4691 else | 4684 else |
4692 perc = (tot - n) * 100 / tot; | 4685 perc = (tot - n) * 100 / tot; |
4693 vim_snprintf((char *)IObuff, IOSIZE, | 4686 vim_snprintf((char *)IObuff, IOSIZE, |
4694 _("Compressed %ld of %ld nodes; %ld (%ld%%) remaining"), | 4687 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"), |
4695 n, tot, tot - n, perc); | 4688 name, n, tot, tot - n, perc); |
4696 spell_message(spin, IObuff); | 4689 spell_message(spin, IObuff); |
4697 } | 4690 } |
4698 #ifdef SPELL_PRINTTREE | 4691 #ifdef SPELL_PRINTTREE |
4699 spell_print_tree(root->wn_sibling); | 4692 spell_print_tree(root->wn_sibling); |
4700 #endif | 4693 #endif |
4802 n = (nr >> 24) & 0xff; | 4795 n = (nr >> 24) & 0xff; |
4803 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; | 4796 node->wn_u1.hashkey[4] = n == 0 ? 1 : n; |
4804 node->wn_u1.hashkey[5] = NUL; | 4797 node->wn_u1.hashkey[5] = NUL; |
4805 | 4798 |
4806 // Check for CTRL-C pressed now and then. | 4799 // Check for CTRL-C pressed now and then. |
4807 fast_breakcheck(); | 4800 veryfast_breakcheck(); |
4808 | 4801 |
4809 return compressed; | 4802 return compressed; |
4810 } | 4803 } |
4811 | 4804 |
4812 /* | 4805 /* |
5511 | 5504 |
5512 /* | 5505 /* |
5513 * Compress the soundfold trie. | 5506 * Compress the soundfold trie. |
5514 */ | 5507 */ |
5515 spell_message(spin, (char_u *)_(msg_compressing)); | 5508 spell_message(spin, (char_u *)_(msg_compressing)); |
5516 wordtree_compress(spin, spin->si_foldroot); | 5509 wordtree_compress(spin, spin->si_foldroot, "case-folded"); |
5517 | 5510 |
5518 /* | 5511 /* |
5519 * Write the .sug file. | 5512 * Write the .sug file. |
5520 * Make the file name by changing ".spl" to ".sug". | 5513 * Make the file name by changing ".spl" to ".sug". |
5521 */ | 5514 */ |
5911 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); | 5904 ga_init2(&spin.si_map, (int)sizeof(char_u), 100); |
5912 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); | 5905 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); |
5913 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); | 5906 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); |
5914 hash_init(&spin.si_commonwords); | 5907 hash_init(&spin.si_commonwords); |
5915 spin.si_newcompID = 127; // start compound ID at first maximum | 5908 spin.si_newcompID = 127; // start compound ID at first maximum |
5916 used_compress_inc = compress_inc; | |
5917 used_compress_added = compress_added; | |
5918 | 5909 |
5919 // default: fnames[0] is output file, following are input files | 5910 // default: fnames[0] is output file, following are input files |
5920 innames = &fnames[1]; | 5911 innames = &fnames[1]; |
5921 incount = fcount - 1; | 5912 incount = fcount - 1; |
5922 | 5913 |
6076 { | 6067 { |
6077 /* | 6068 /* |
6078 * Combine tails in the tree. | 6069 * Combine tails in the tree. |
6079 */ | 6070 */ |
6080 spell_message(&spin, (char_u *)_(msg_compressing)); | 6071 spell_message(&spin, (char_u *)_(msg_compressing)); |
6081 wordtree_compress(&spin, spin.si_foldroot); | 6072 wordtree_compress(&spin, spin.si_foldroot, "case-folded"); |
6082 wordtree_compress(&spin, spin.si_keeproot); | 6073 wordtree_compress(&spin, spin.si_keeproot, "keep-case"); |
6083 wordtree_compress(&spin, spin.si_prefroot); | 6074 wordtree_compress(&spin, spin.si_prefroot, "prefixes"); |
6084 } | 6075 } |
6085 | 6076 |
6086 if (!error && !got_int) | 6077 if (!error && !got_int) |
6087 { | 6078 { |
6088 /* | 6079 /* |
6673 lp->sl_map_array[c] = headc; | 6664 lp->sl_map_array[c] = headc; |
6674 } | 6665 } |
6675 } | 6666 } |
6676 } | 6667 } |
6677 | 6668 |
6678 | |
6679 #endif // FEAT_SPELL | 6669 #endif // FEAT_SPELL |