Mercurial > vim
comparison src/spell.c @ 7526:08c1f73efcde v7.4.1064
commit https://github.com/vim/vim/commit/7b877b360532713dc21a0ff3d55a76ac02eaf573
Author: Bram Moolenaar <Bram@vim.org>
Date: Sat Jan 9 13:51:34 2016 +0100
patch 7.4.1064
Problem: When a spell file has single letter compounding creating
suggestions takes an awful long time.
Solution: Add th eNOCOMPOUNDSUGS flag.
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sat, 09 Jan 2016 14:00:04 +0100 |
parents | a64793340689 |
children | 0b6c37dd858d |
comparison
equal
deleted
inserted
replaced
7525:319980d694f4 | 7526:08c1f73efcde |
---|---|
55 | 55 |
56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. | 56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. |
57 * Only use it for small word lists! */ | 57 * Only use it for small word lists! */ |
58 #if 0 | 58 #if 0 |
59 # define SPELL_PRINTTREE | 59 # define SPELL_PRINTTREE |
60 #endif | |
61 | |
62 /* Use SPELL_COMPRESS_ALLWAYS for debugging: compress the word tree after | |
63 * adding a word. Only use it for small word lists! */ | |
64 #if 0 | |
65 # define SPELL_COMPRESS_ALLWAYS | |
60 #endif | 66 #endif |
61 | 67 |
62 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a | 68 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a |
63 * specific word. */ | 69 * specific word. */ |
64 #if 0 | 70 #if 0 |
175 * | 181 * |
176 * sectionID == SN_SUGFILE: <timestamp> | 182 * sectionID == SN_SUGFILE: <timestamp> |
177 * <timestamp> 8 bytes time in seconds that must match with .sug file | 183 * <timestamp> 8 bytes time in seconds that must match with .sug file |
178 * | 184 * |
179 * sectionID == SN_NOSPLITSUGS: nothing | 185 * sectionID == SN_NOSPLITSUGS: nothing |
186 * | |
187 * sectionID == SN_NOCOMPOUNDSUGS: nothing | |
180 * | 188 * |
181 * sectionID == SN_WORDS: <word> ... | 189 * sectionID == SN_WORDS: <word> ... |
182 * <word> N bytes NUL terminated common word | 190 * <word> N bytes NUL terminated common word |
183 * | 191 * |
184 * sectionID == SN_MAP: <mapstr> | 192 * sectionID == SN_MAP: <mapstr> |
499 * "sl_sal_first" maps chars, when has_mbyte | 507 * "sl_sal_first" maps chars, when has_mbyte |
500 * "sl_sal" is a list of wide char lists. */ | 508 * "sl_sal" is a list of wide char lists. */ |
501 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ | 509 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ |
502 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ | 510 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ |
503 int sl_nosplitsugs; /* don't suggest splitting a word */ | 511 int sl_nosplitsugs; /* don't suggest splitting a word */ |
512 int sl_nocompoundsugs; /* don't suggest compounding */ | |
504 | 513 |
505 /* Info from the .sug file. Loaded on demand. */ | 514 /* Info from the .sug file. Loaded on demand. */ |
506 time_t sl_sugtime; /* timestamp for .sug file */ | 515 time_t sl_sugtime; /* timestamp for .sug file */ |
507 char_u *sl_sbyts; /* soundfolded word bytes */ | 516 char_u *sl_sbyts; /* soundfolded word bytes */ |
508 idx_T *sl_sidxs; /* soundfolded word indexes */ | 517 idx_T *sl_sidxs; /* soundfolded word indexes */ |
568 #define SN_SUGFILE 11 /* timestamp for .sug file */ | 577 #define SN_SUGFILE 11 /* timestamp for .sug file */ |
569 #define SN_REPSAL 12 /* REPSAL items section */ | 578 #define SN_REPSAL 12 /* REPSAL items section */ |
570 #define SN_WORDS 13 /* common words */ | 579 #define SN_WORDS 13 /* common words */ |
571 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ | 580 #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ |
572 #define SN_INFO 15 /* info section */ | 581 #define SN_INFO 15 /* info section */ |
582 #define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */ | |
573 #define SN_END 255 /* end of sections */ | 583 #define SN_END 255 /* end of sections */ |
574 | 584 |
575 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ | 585 #define SNF_REQUIRED 1 /* <sectionflags>: required section */ |
576 | 586 |
577 /* Result values. Lower number is accepted over higher one. */ | 587 /* Result values. Lower number is accepted over higher one. */ |
2911 case SN_SUGFILE: | 2921 case SN_SUGFILE: |
2912 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ | 2922 lp->sl_sugtime = get8ctime(fd); /* <timestamp> */ |
2913 break; | 2923 break; |
2914 | 2924 |
2915 case SN_NOSPLITSUGS: | 2925 case SN_NOSPLITSUGS: |
2916 lp->sl_nosplitsugs = TRUE; /* <timestamp> */ | 2926 lp->sl_nosplitsugs = TRUE; |
2927 break; | |
2928 | |
2929 case SN_NOCOMPOUNDSUGS: | |
2930 lp->sl_nocompoundsugs = TRUE; | |
2917 break; | 2931 break; |
2918 | 2932 |
2919 case SN_COMPOUND: | 2933 case SN_COMPOUND: |
2920 res = read_compound(fd, lp, len); | 2934 res = read_compound(fd, lp, len); |
2921 break; | 2935 break; |
5003 garray_T si_sal; /* list of fromto_T entries from SAL lines */ | 5017 garray_T si_sal; /* list of fromto_T entries from SAL lines */ |
5004 char_u *si_sofofr; /* SOFOFROM text */ | 5018 char_u *si_sofofr; /* SOFOFROM text */ |
5005 char_u *si_sofoto; /* SOFOTO text */ | 5019 char_u *si_sofoto; /* SOFOTO text */ |
5006 int si_nosugfile; /* NOSUGFILE item found */ | 5020 int si_nosugfile; /* NOSUGFILE item found */ |
5007 int si_nosplitsugs; /* NOSPLITSUGS item found */ | 5021 int si_nosplitsugs; /* NOSPLITSUGS item found */ |
5022 int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */ | |
5008 int si_followup; /* soundsalike: ? */ | 5023 int si_followup; /* soundsalike: ? */ |
5009 int si_collapse; /* soundsalike: ? */ | 5024 int si_collapse; /* soundsalike: ? */ |
5010 hashtab_T si_commonwords; /* hashtable for common words */ | 5025 hashtab_T si_commonwords; /* hashtable for common words */ |
5011 time_t si_sugtime; /* timestamp for .sug file */ | 5026 time_t si_sugtime; /* timestamp for .sug file */ |
5012 int si_rem_accents; /* soundsalike: remove accents */ | 5027 int si_rem_accents; /* soundsalike: remove accents */ |
5128 { | 5143 { |
5129 /* Done this node before, print the reference. */ | 5144 /* Done this node before, print the reference. */ |
5130 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); | 5145 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); |
5131 PRINTSOME(line2, depth, " ", 0, 0); | 5146 PRINTSOME(line2, depth, " ", 0, 0); |
5132 PRINTSOME(line3, depth, " ", 0, 0); | 5147 PRINTSOME(line3, depth, " ", 0, 0); |
5133 msg(line1); | 5148 msg((char_u *)line1); |
5134 msg(line2); | 5149 msg((char_u *)line2); |
5135 msg(line3); | 5150 msg((char_u *)line3); |
5136 } | 5151 } |
5137 else | 5152 else |
5138 { | 5153 { |
5139 node->wn_u1.index = TRUE; | 5154 node->wn_u1.index = TRUE; |
5140 | 5155 |
5156 else | 5171 else |
5157 PRINTSOME(line3, depth, " ", 0, 0); | 5172 PRINTSOME(line3, depth, " ", 0, 0); |
5158 | 5173 |
5159 if (node->wn_byte == NUL) | 5174 if (node->wn_byte == NUL) |
5160 { | 5175 { |
5161 msg(line1); | 5176 msg((char_u *)line1); |
5162 msg(line2); | 5177 msg((char_u *)line2); |
5163 msg(line3); | 5178 msg((char_u *)line3); |
5164 } | 5179 } |
5165 | 5180 |
5166 /* do the children */ | 5181 /* do the children */ |
5167 if (node->wn_byte != NUL && node->wn_child != NULL) | 5182 if (node->wn_byte != NUL && node->wn_child != NULL) |
5168 spell_print_node(node->wn_child, depth + 1); | 5183 spell_print_node(node->wn_child, depth + 1); |
5595 spin->si_nobreak = TRUE; | 5610 spin->si_nobreak = TRUE; |
5596 } | 5611 } |
5597 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) | 5612 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) |
5598 { | 5613 { |
5599 spin->si_nosplitsugs = TRUE; | 5614 spin->si_nosplitsugs = TRUE; |
5615 } | |
5616 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) | |
5617 { | |
5618 spin->si_nocompoundsugs = TRUE; | |
5600 } | 5619 } |
5601 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) | 5620 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) |
5602 { | 5621 { |
5603 spin->si_nosugfile = TRUE; | 5622 spin->si_nosugfile = TRUE; |
5604 } | 5623 } |
7619 } | 7638 } |
7620 prev = &node->wn_child; | 7639 prev = &node->wn_child; |
7621 node = *prev; | 7640 node = *prev; |
7622 } | 7641 } |
7623 #ifdef SPELL_PRINTTREE | 7642 #ifdef SPELL_PRINTTREE |
7624 smsg("Added \"%s\"", word); | 7643 smsg((char_u *)"Added \"%s\"", word); |
7625 spell_print_tree(root->wn_sibling); | 7644 spell_print_tree(root->wn_sibling); |
7626 #endif | 7645 #endif |
7627 | 7646 |
7628 /* count nr of words added since last message */ | 7647 /* count nr of words added since last message */ |
7629 ++spin->si_msg_count; | 7648 ++spin->si_msg_count; |
7645 * adding "compress_added" words (si_compress_cnt > 1). | 7664 * adding "compress_added" words (si_compress_cnt > 1). |
7646 * 3. When compressed before, added "compress_added" words | 7665 * 3. When compressed before, added "compress_added" words |
7647 * (si_compress_cnt == 1) and the number of free nodes drops below the | 7666 * (si_compress_cnt == 1) and the number of free nodes drops below the |
7648 * maximum word length. | 7667 * maximum word length. |
7649 */ | 7668 */ |
7650 #ifndef SPELL_PRINTTREE | 7669 #ifndef SPELL_COMPRESS_ALLWAYS |
7651 if (spin->si_compress_cnt == 1 | 7670 if (spin->si_compress_cnt == 1 |
7652 ? spin->si_free_count < MAXWLEN | 7671 ? spin->si_free_count < MAXWLEN |
7653 : spin->si_blocks_cnt >= compress_start) | 7672 : spin->si_blocks_cnt >= compress_start) |
7654 #endif | 7673 #endif |
7655 { | 7674 { |
8289 * This is used to notify that no suggestions with word splits are to be | 8308 * This is used to notify that no suggestions with word splits are to be |
8290 * made. */ | 8309 * made. */ |
8291 if (spin->si_nosplitsugs) | 8310 if (spin->si_nosplitsugs) |
8292 { | 8311 { |
8293 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ | 8312 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ |
8313 putc(0, fd); /* <sectionflags> */ | |
8314 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | |
8315 } | |
8316 | |
8317 /* SN_NOCOMPUNDSUGS: nothing | |
8318 * This is used to notify that no suggestions with compounds are to be | |
8319 * made. */ | |
8320 if (spin->si_nocompoundsugs) | |
8321 { | |
8322 putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */ | |
8294 putc(0, fd); /* <sectionflags> */ | 8323 putc(0, fd); /* <sectionflags> */ |
8295 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | 8324 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ |
8296 } | 8325 } |
8297 | 8326 |
8298 /* SN_COMPOUND: compound info. | 8327 /* SN_COMPOUND: compound info. |
11881 * 3. The badword and the word in the tree end. It may still | 11910 * 3. The badword and the word in the tree end. It may still |
11882 * be possible to compound another (short) word. | 11911 * be possible to compound another (short) word. |
11883 */ | 11912 */ |
11884 try_compound = FALSE; | 11913 try_compound = FALSE; |
11885 if (!soundfold | 11914 if (!soundfold |
11915 && !slang->sl_nocompoundsugs | |
11886 && slang->sl_compprog != NULL | 11916 && slang->sl_compprog != NULL |
11887 && ((unsigned)flags >> 24) != 0 | 11917 && ((unsigned)flags >> 24) != 0 |
11888 && sp->ts_twordlen - sp->ts_splitoff | 11918 && sp->ts_twordlen - sp->ts_splitoff |
11889 >= slang->sl_compminlen | 11919 >= slang->sl_compminlen |
11890 #ifdef FEAT_MBYTE | 11920 #ifdef FEAT_MBYTE |
11905 compflags[sp->ts_complen + 1] = NUL; | 11935 compflags[sp->ts_complen + 1] = NUL; |
11906 } | 11936 } |
11907 | 11937 |
11908 /* For NOBREAK we never try splitting, it won't make any word | 11938 /* For NOBREAK we never try splitting, it won't make any word |
11909 * valid. */ | 11939 * valid. */ |
11910 if (slang->sl_nobreak) | 11940 if (slang->sl_nobreak && !slang->sl_nocompoundsugs) |
11911 try_compound = TRUE; | 11941 try_compound = TRUE; |
11912 | 11942 |
11913 /* If we could add a compound word, and it's also possible to | 11943 /* If we could add a compound word, and it's also possible to |
11914 * split at this point, do the split first and set | 11944 * split at this point, do the split first and set |
11915 * TSF_DIDSPLIT to avoid doing it again. */ | 11945 * TSF_DIDSPLIT to avoid doing it again. */ |