comparison src/spell.c @ 490:4321aae7e769

updated for version 7.0134
author vimboss
date Mon, 22 Aug 2005 22:54:29 +0000
parents 13cf73878020
children 81c06952fb1d
comparison
equal deleted inserted replaced
489:b1e87b7f5db6 490:4321aae7e769
248 248
249 #define MAXWLEN 250 /* Assume max. word len is this many bytes. 249 #define MAXWLEN 250 /* Assume max. word len is this many bytes.
250 Some places assume a word length fits in a 250 Some places assume a word length fits in a
251 byte, thus it can't be above 255. */ 251 byte, thus it can't be above 255. */
252 252
253 /* Type used for indexes in the word tree need to be at least 3 bytes. If int 253 /* Type used for indexes in the word tree need to be at least 4 bytes. If int
254 * is 8 bytes we could use something smaller, but what? */ 254 * is 8 bytes we could use something smaller, but what? */
255 #if SIZEOF_INT > 2 255 #if SIZEOF_INT > 3
256 typedef int idx_T; 256 typedef int idx_T;
257 #else 257 #else
258 typedef long idx_T; 258 typedef long idx_T;
259 #endif 259 #endif
260 260
984 int c; 984 int c;
985 char_u *ptr; 985 char_u *ptr;
986 idx_T lo, hi, m; 986 idx_T lo, hi, m;
987 #ifdef FEAT_MBYTE 987 #ifdef FEAT_MBYTE
988 char_u *s; 988 char_u *s;
989 #endif
989 char_u *p; 990 char_u *p;
990 #endif
991 int res = SP_BAD; 991 int res = SP_BAD;
992 slang_T *slang = mip->mi_lp->lp_slang; 992 slang_T *slang = mip->mi_lp->lp_slang;
993 unsigned flags; 993 unsigned flags;
994 char_u *byts; 994 char_u *byts;
995 idx_T *idxs; 995 idx_T *idxs;
1216 /* If there is no flag or the word is shorter than 1216 /* If there is no flag or the word is shorter than
1217 * COMPOUNDMIN reject it quickly. 1217 * COMPOUNDMIN reject it quickly.
1218 * Makes you wonder why someone puts a compound flag on a word 1218 * Makes you wonder why someone puts a compound flag on a word
1219 * that's too short... Myspell compatibility requires this 1219 * that's too short... Myspell compatibility requires this
1220 * anyway. */ 1220 * anyway. */
1221 if (((unsigned)flags >> 24) == 0 || wlen < slang->sl_compminlen) 1221 if (((unsigned)flags >> 24) == 0
1222 || wlen - mip->mi_compoff < slang->sl_compminlen)
1222 continue; 1223 continue;
1223 1224
1224 /* Limit the number of compound words to COMPOUNDMAX. */ 1225 /* Limit the number of compound words to COMPOUNDMAX if no
1225 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax) 1226 * maximum for syllables is specified. */
1227 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax
1228 && slang->sl_compsylmax == MAXWLEN)
1226 continue; 1229 continue;
1227 1230
1228 /* Quickly check if compounding is possible with this flag. */ 1231 /* Quickly check if compounding is possible with this flag. */
1229 if (vim_strchr(mip->mi_complen == 0 1232 if (vim_strchr(mip->mi_complen == 0
1230 ? slang->sl_compstartflags 1233 ? slang->sl_compstartflags
1231 : slang->sl_compallflags, 1234 : slang->sl_compallflags,
1232 ((unsigned)flags >> 24)) == NULL) 1235 ((unsigned)flags >> 24)) == NULL)
1233 continue; 1236 continue;
1237
1238 if (mode == FIND_COMPOUND)
1239 {
1240 int capflags;
1241
1242 /* Need to check the caps type of the appended compound
1243 * word. */
1244 #ifdef FEAT_MBYTE
1245 if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1246 mip->mi_compoff) != 0)
1247 {
1248 /* case folding may have changed the length */
1249 p = mip->mi_word;
1250 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1251 mb_ptr_adv(p);
1252 }
1253 else
1254 #endif
1255 p = mip->mi_word + mip->mi_compoff;
1256 capflags = captype(p, mip->mi_word + wlen);
1257 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1258 && (flags & WF_FIXCAP) != 0))
1259 continue;
1260
1261 if (capflags != WF_ALLCAP)
1262 {
1263 /* When the character before the word is a word
1264 * character we do not accept a Onecap word. We do
1265 * accept a no-caps word, even when the dictionary
1266 * word specifies ONECAP. */
1267 mb_ptr_back(mip->mi_word, p);
1268 if (spell_iswordp_nmw(p)
1269 ? capflags == WF_ONECAP
1270 : (flags & WF_ONECAP) != 0
1271 && capflags != WF_ONECAP)
1272 continue;
1273 }
1274 }
1234 1275
1235 /* If the word ends the sequence of compound flags of the 1276 /* If the word ends the sequence of compound flags of the
1236 * words must match with one of the COMPOUNDFLAGS items and 1277 * words must match with one of the COMPOUNDFLAGS items and
1237 * the number of syllables must not be too large. */ 1278 * the number of syllables must not be too large. */
1238 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); 1279 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1346 regmatch.regprog = slang->sl_compprog; 1387 regmatch.regprog = slang->sl_compprog;
1347 regmatch.rm_ic = FALSE; 1388 regmatch.rm_ic = FALSE;
1348 if (!vim_regexec(&regmatch, flags, 0)) 1389 if (!vim_regexec(&regmatch, flags, 0))
1349 return FALSE; 1390 return FALSE;
1350 1391
1351 /* Count the number of syllables. This may be slow, do it last. */ 1392 /* Count the number of syllables. This may be slow, do it last. If there
1393 * are too many syllables AND the number of compound words is above
1394 * COMPOUNDMAX then compounding is not allowed. */
1352 if (slang->sl_compsylmax < MAXWLEN 1395 if (slang->sl_compsylmax < MAXWLEN
1353 && count_syllables(slang, word) > slang->sl_compsylmax) 1396 && count_syllables(slang, word) > slang->sl_compsylmax)
1354 return FALSE; 1397 return STRLEN(flags) < slang->sl_compmax;
1355 return TRUE; 1398 return TRUE;
1356 } 1399 }
1357 1400
1358 /* 1401 /*
1359 * Return non-zero if the prefix indicated by "arridx" matches with the prefix 1402 * Return non-zero if the prefix indicated by "arridx" matches with the prefix
8594 || sp->ts_twordlen - sp->ts_splitoff 8637 || sp->ts_twordlen - sp->ts_splitoff
8595 < slang->sl_compminlen) 8638 < slang->sl_compminlen)
8596 break; 8639 break;
8597 compflags[sp->ts_complen] = ((unsigned)flags >> 24); 8640 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
8598 compflags[sp->ts_complen + 1] = NUL; 8641 compflags[sp->ts_complen + 1] = NUL;
8599 if (fword_ends && !can_compound(slang, 8642 vim_strncpy(preword + sp->ts_prewordlen,
8600 tword + sp->ts_splitoff, 8643 tword + sp->ts_splitoff,
8644 sp->ts_twordlen - sp->ts_splitoff);
8645 p = preword;
8646 while (*skiptowhite(p) != NUL)
8647 p = skipwhite(skiptowhite(p));
8648 if (fword_ends && !can_compound(slang, p,
8601 compflags + sp->ts_compsplit)) 8649 compflags + sp->ts_compsplit))
8602 break; 8650 break;
8651
8652 /* Get pointer to last char of previous word. */
8653 p = preword + sp->ts_prewordlen;
8654 mb_ptr_back(preword, p);
8603 } 8655 }
8656 else
8657 p = NULL;
8604 8658
8605 /* 8659 /*
8606 * Form the word with proper case in preword. 8660 * Form the word with proper case in preword.
8607 * If there is a word from a previous split, append. 8661 * If there is a word from a previous split, append.
8608 */ 8662 */
8622 #else 8676 #else
8623 && su->su_badlen == 1 8677 && su->su_badlen == 1
8624 #endif 8678 #endif
8625 ) 8679 )
8626 c = WF_ONECAP; 8680 c = WF_ONECAP;
8681 c |= flags;
8682
8683 /* When appending a compound word after a word character
8684 * don't use Onecap. */
8685 if (p != NULL && spell_iswordp_nmw(p))
8686 c &= ~WF_ONECAP;
8627 make_case_word(tword + sp->ts_splitoff, 8687 make_case_word(tword + sp->ts_splitoff,
8628 preword + sp->ts_prewordlen, flags | c); 8688 preword + sp->ts_prewordlen, c);
8629 } 8689 }
8630 8690
8631 /* Don't use a banned word. It may appear again as a good 8691 /* Don't use a banned word. It may appear again as a good
8632 * word, thus remember it. */ 8692 * word, thus remember it. */
8633 if (flags & WF_BANNED) 8693 if (flags & WF_BANNED)
8700 try_compound = FALSE; 8760 try_compound = FALSE;
8701 if (!fword_ends 8761 if (!fword_ends
8702 && ((unsigned)flags >> 24) != 0 8762 && ((unsigned)flags >> 24) != 0
8703 && sp->ts_twordlen - sp->ts_splitoff 8763 && sp->ts_twordlen - sp->ts_splitoff
8704 >= slang->sl_compminlen 8764 >= slang->sl_compminlen
8705 && sp->ts_complen + 1 - sp->ts_compsplit 8765 && (slang->sl_compsylmax < MAXWLEN
8706 < slang->sl_compmax 8766 || sp->ts_complen + 1 - sp->ts_compsplit
8767 < slang->sl_compmax)
8707 && (vim_strchr(sp->ts_complen == sp->ts_compsplit 8768 && (vim_strchr(sp->ts_complen == sp->ts_compsplit
8708 ? slang->sl_compstartflags 8769 ? slang->sl_compstartflags
8709 : slang->sl_compallflags, 8770 : slang->sl_compallflags,
8710 ((unsigned)flags >> 24)) != NULL)) 8771 ((unsigned)flags >> 24)) != NULL))
8711 { 8772 {
8731 8792
8732 if (!try_compound && !fword_ends) 8793 if (!try_compound && !fword_ends)
8733 { 8794 {
8734 /* If we're going to split need to check that the 8795 /* If we're going to split need to check that the
8735 * words so far are valid for compounding. */ 8796 * words so far are valid for compounding. */
8797 p = preword;
8798 while (*skiptowhite(p) != NUL)
8799 p = skipwhite(skiptowhite(p));
8736 if (sp->ts_complen > sp->ts_compsplit 8800 if (sp->ts_complen > sp->ts_compsplit
8737 && !can_compound(slang, 8801 && !can_compound(slang, p,
8738 tword + sp->ts_splitoff,
8739 compflags + sp->ts_compsplit)) 8802 compflags + sp->ts_compsplit))
8740 break; 8803 break;
8741 newscore += SCORE_SPLIT; 8804 newscore += SCORE_SPLIT;
8742 } 8805 }
8743 8806