comparison src/spell.c @ 7526:08c1f73efcde v7.4.1064

commit https://github.com/vim/vim/commit/7b877b360532713dc21a0ff3d55a76ac02eaf573 Author: Bram Moolenaar <Bram@vim.org> Date: Sat Jan 9 13:51:34 2016 +0100 patch 7.4.1064 Problem: When a spell file has single letter compounding creating suggestions takes an awful long time. Solution: Add th eNOCOMPOUNDSUGS flag.
author Christian Brabandt <cb@256bit.org>
date Sat, 09 Jan 2016 14:00:04 +0100
parents a64793340689
children 0b6c37dd858d
comparison
equal deleted inserted replaced
7525:319980d694f4 7526:08c1f73efcde
55 55
56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. 56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
57 * Only use it for small word lists! */ 57 * Only use it for small word lists! */
58 #if 0 58 #if 0
59 # define SPELL_PRINTTREE 59 # define SPELL_PRINTTREE
60 #endif
61
62 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after
63 * adding a word. Only use it for small word lists! */
64 #if 0
65 # define SPELL_COMPRESS_ALLWAYS
60 #endif 66 #endif
61 67
62 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a 68 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
63 * specific word. */ 69 * specific word. */
64 #if 0 70 #if 0
175 * 181 *
176 * sectionID == SN_SUGFILE: <timestamp> 182 * sectionID == SN_SUGFILE: <timestamp>
177 * <timestamp> 8 bytes time in seconds that must match with .sug file 183 * <timestamp> 8 bytes time in seconds that must match with .sug file
178 * 184 *
179 * sectionID == SN_NOSPLITSUGS: nothing 185 * sectionID == SN_NOSPLITSUGS: nothing
186 *
187 * sectionID == SN_NOCOMPOUNDSUGS: nothing
180 * 188 *
181 * sectionID == SN_WORDS: <word> ... 189 * sectionID == SN_WORDS: <word> ...
182 * <word> N bytes NUL terminated common word 190 * <word> N bytes NUL terminated common word
183 * 191 *
184 * sectionID == SN_MAP: <mapstr> 192 * sectionID == SN_MAP: <mapstr>
499 * "sl_sal_first" maps chars, when has_mbyte 507 * "sl_sal_first" maps chars, when has_mbyte
500 * "sl_sal" is a list of wide char lists. */ 508 * "sl_sal" is a list of wide char lists. */
501 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ 509 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */
502 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ 510 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */
503 int sl_nosplitsugs; /* don't suggest splitting a word */ 511 int sl_nosplitsugs; /* don't suggest splitting a word */
512 int sl_nocompoundsugs; /* don't suggest compounding */
504 513
505 /* Info from the .sug file. Loaded on demand. */ 514 /* Info from the .sug file. Loaded on demand. */
506 time_t sl_sugtime; /* timestamp for .sug file */ 515 time_t sl_sugtime; /* timestamp for .sug file */
507 char_u *sl_sbyts; /* soundfolded word bytes */ 516 char_u *sl_sbyts; /* soundfolded word bytes */
508 idx_T *sl_sidxs; /* soundfolded word indexes */ 517 idx_T *sl_sidxs; /* soundfolded word indexes */
568 #define SN_SUGFILE 11 /* timestamp for .sug file */ 577 #define SN_SUGFILE 11 /* timestamp for .sug file */
569 #define SN_REPSAL 12 /* REPSAL items section */ 578 #define SN_REPSAL 12 /* REPSAL items section */
570 #define SN_WORDS 13 /* common words */ 579 #define SN_WORDS 13 /* common words */
571 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ 580 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
572 #define SN_INFO 15 /* info section */ 581 #define SN_INFO 15 /* info section */
582 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
573 #define SN_END 255 /* end of sections */ 583 #define SN_END 255 /* end of sections */
574 584
575 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ 585 #define SNF_REQUIRED 1 /* <sectionflags>: required section */
576 586
577 /* Result values. Lower number is accepted over higher one. */ 587 /* Result values. Lower number is accepted over higher one. */
2911 case SN_SUGFILE: 2921 case SN_SUGFILE:
2912 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ 2922 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
2913 break; 2923 break;
2914 2924
2915 case SN_NOSPLITSUGS: 2925 case SN_NOSPLITSUGS:
2916 lp->sl_nosplitsugs = TRUE; /* <timestamp> */ 2926 lp->sl_nosplitsugs = TRUE;
2927 break;
2928
2929 case SN_NOCOMPOUNDSUGS:
2930 lp->sl_nocompoundsugs = TRUE;
2917 break; 2931 break;
2918 2932
2919 case SN_COMPOUND: 2933 case SN_COMPOUND:
2920 res = read_compound(fd, lp, len); 2934 res = read_compound(fd, lp, len);
2921 break; 2935 break;
5003 garray_T si_sal; /* list of fromto_T entries from SAL lines */ 5017 garray_T si_sal; /* list of fromto_T entries from SAL lines */
5004 char_u *si_sofofr; /* SOFOFROM text */ 5018 char_u *si_sofofr; /* SOFOFROM text */
5005 char_u *si_sofoto; /* SOFOTO text */ 5019 char_u *si_sofoto; /* SOFOTO text */
5006 int si_nosugfile; /* NOSUGFILE item found */ 5020 int si_nosugfile; /* NOSUGFILE item found */
5007 int si_nosplitsugs; /* NOSPLITSUGS item found */ 5021 int si_nosplitsugs; /* NOSPLITSUGS item found */
5022 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
5008 int si_followup; /* soundsalike: ? */ 5023 int si_followup; /* soundsalike: ? */
5009 int si_collapse; /* soundsalike: ? */ 5024 int si_collapse; /* soundsalike: ? */
5010 hashtab_T si_commonwords; /* hashtable for common words */ 5025 hashtab_T si_commonwords; /* hashtable for common words */
5011 time_t si_sugtime; /* timestamp for .sug file */ 5026 time_t si_sugtime; /* timestamp for .sug file */
5012 int si_rem_accents; /* soundsalike: remove accents */ 5027 int si_rem_accents; /* soundsalike: remove accents */
5128 { 5143 {
5129 /* Done this node before, print the reference. */ 5144 /* Done this node before, print the reference. */
5130 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 5145 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
5131 PRINTSOME(line2, depth, " ", 0, 0); 5146 PRINTSOME(line2, depth, " ", 0, 0);
5132 PRINTSOME(line3, depth, " ", 0, 0); 5147 PRINTSOME(line3, depth, " ", 0, 0);
5133 msg(line1); 5148 msg((char_u *)line1);
5134 msg(line2); 5149 msg((char_u *)line2);
5135 msg(line3); 5150 msg((char_u *)line3);
5136 } 5151 }
5137 else 5152 else
5138 { 5153 {
5139 node->wn_u1.index = TRUE; 5154 node->wn_u1.index = TRUE;
5140 5155
5156 else 5171 else
5157 PRINTSOME(line3, depth, " ", 0, 0); 5172 PRINTSOME(line3, depth, " ", 0, 0);
5158 5173
5159 if (node->wn_byte == NUL) 5174 if (node->wn_byte == NUL)
5160 { 5175 {
5161 msg(line1); 5176 msg((char_u *)line1);
5162 msg(line2); 5177 msg((char_u *)line2);
5163 msg(line3); 5178 msg((char_u *)line3);
5164 } 5179 }
5165 5180
5166 /* do the children */ 5181 /* do the children */
5167 if (node->wn_byte != NUL && node->wn_child != NULL) 5182 if (node->wn_byte != NUL && node->wn_child != NULL)
5168 spell_print_node(node->wn_child, depth + 1); 5183 spell_print_node(node->wn_child, depth + 1);
5595 spin->si_nobreak = TRUE; 5610 spin->si_nobreak = TRUE;
5596 } 5611 }
5597 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) 5612 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
5598 { 5613 {
5599 spin->si_nosplitsugs = TRUE; 5614 spin->si_nosplitsugs = TRUE;
5615 }
5616 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
5617 {
5618 spin->si_nocompoundsugs = TRUE;
5600 } 5619 }
5601 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) 5620 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
5602 { 5621 {
5603 spin->si_nosugfile = TRUE; 5622 spin->si_nosugfile = TRUE;
5604 } 5623 }
7619 } 7638 }
7620 prev = &node->wn_child; 7639 prev = &node->wn_child;
7621 node = *prev; 7640 node = *prev;
7622 } 7641 }
7623 #ifdef SPELL_PRINTTREE 7642 #ifdef SPELL_PRINTTREE
7624 smsg("Added \"%s\"", word); 7643 smsg((char_u *)"Added \"%s\"", word);
7625 spell_print_tree(root->wn_sibling); 7644 spell_print_tree(root->wn_sibling);
7626 #endif 7645 #endif
7627 7646
7628 /* count nr of words added since last message */ 7647 /* count nr of words added since last message */
7629 ++spin->si_msg_count; 7648 ++spin->si_msg_count;
7645 * adding "compress_added" words (si_compress_cnt > 1). 7664 * adding "compress_added" words (si_compress_cnt > 1).
7646 * 3. When compressed before, added "compress_added" words 7665 * 3. When compressed before, added "compress_added" words
7647 * (si_compress_cnt == 1) and the number of free nodes drops below the 7666 * (si_compress_cnt == 1) and the number of free nodes drops below the
7648 * maximum word length. 7667 * maximum word length.
7649 */ 7668 */
7650 #ifndef SPELL_PRINTTREE 7669 #ifndef SPELL_COMPRESS_ALLWAYS
7651 if (spin->si_compress_cnt == 1 7670 if (spin->si_compress_cnt == 1
7652 ? spin->si_free_count < MAXWLEN 7671 ? spin->si_free_count < MAXWLEN
7653 : spin->si_blocks_cnt >= compress_start) 7672 : spin->si_blocks_cnt >= compress_start)
7654 #endif 7673 #endif
7655 { 7674 {
8289 * This is used to notify that no suggestions with word splits are to be 8308 * This is used to notify that no suggestions with word splits are to be
8290 * made. */ 8309 * made. */
8291 if (spin->si_nosplitsugs) 8310 if (spin->si_nosplitsugs)
8292 { 8311 {
8293 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ 8312 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
8313 putc(0, fd); /* <sectionflags> */
8314 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8315 }
8316
8317 /* SN_NOCOMPUNDSUGS: nothing
8318 * This is used to notify that no suggestions with compounds are to be
8319 * made. */
8320 if (spin->si_nocompoundsugs)
8321 {
8322 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
8294 putc(0, fd); /* <sectionflags> */ 8323 putc(0, fd); /* <sectionflags> */
8295 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ 8324 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8296 } 8325 }
8297 8326
8298 /* SN_COMPOUND: compound info. 8327 /* SN_COMPOUND: compound info.
11881 * 3. The badword and the word in the tree end. It may still 11910 * 3. The badword and the word in the tree end. It may still
11882 * be possible to compound another (short) word. 11911 * be possible to compound another (short) word.
11883 */ 11912 */
11884 try_compound = FALSE; 11913 try_compound = FALSE;
11885 if (!soundfold 11914 if (!soundfold
11915 && !slang->sl_nocompoundsugs
11886 && slang->sl_compprog != NULL 11916 && slang->sl_compprog != NULL
11887 && ((unsigned)flags >> 24) != 0 11917 && ((unsigned)flags >> 24) != 0
11888 && sp->ts_twordlen - sp->ts_splitoff 11918 && sp->ts_twordlen - sp->ts_splitoff
11889 >= slang->sl_compminlen 11919 >= slang->sl_compminlen
11890 #ifdef FEAT_MBYTE 11920 #ifdef FEAT_MBYTE
11905 compflags[sp->ts_complen + 1] = NUL; 11935 compflags[sp->ts_complen + 1] = NUL;
11906 } 11936 }
11907 11937
11908 /* For NOBREAK we never try splitting, it won't make any word 11938 /* For NOBREAK we never try splitting, it won't make any word
11909 * valid. */ 11939 * valid. */
11910 if (slang->sl_nobreak) 11940 if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
11911 try_compound = TRUE; 11941 try_compound = TRUE;
11912 11942
11913 /* If we could add a compound word, and it's also possible to 11943 /* If we could add a compound word, and it's also possible to
11914 * split at this point, do the split first and set 11944 * split at this point, do the split first and set
11915 * TSF_DIDSPLIT to avoid doing it again. */ 11945 * TSF_DIDSPLIT to avoid doing it again. */