Mercurial > vim
changeset 492:81c06952fb1d
updated for version 7.0135
author | vimboss |
---|---|
date | Tue, 23 Aug 2005 21:00:13 +0000 |
parents | 21c3634c2113 |
children | 06364aa0d597 |
files | runtime/spell/am/am_ET.diff runtime/spell/main.aap runtime/spell/nl/nl_NL.diff runtime/spell/yi/yi_tr.diff src/charset.c src/macros.h src/mbyte.c src/os_macosx.c src/po/Makefile src/proto/charset.pro src/regexp.c src/spell.c |
diffstat | 11 files changed, 348 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/runtime/spell/main.aap +++ b/runtime/spell/main.aap @@ -4,7 +4,7 @@ # aap generate all the .spl files # aap diff create all the diff files -LANG = af am bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk yi hu +LANG = af am bg ca cs da de el en eo fr fo gl he hr it nl ny pl sk th yi hu # "hu" is at the end, because it takes very long. #
--- a/runtime/spell/nl/nl_NL.diff +++ b/runtime/spell/nl/nl_NL.diff @@ -1,12 +1,12 @@ *** nl_NL.orig.aff Sun Jul 3 18:24:07 2005 ---- nl_NL.aff Tue Aug 16 22:39:54 2005 +--- nl_NL.aff Tue Aug 23 14:03:48 2005 *************** *** 3,6 **** --- 3,30 ---- -+ FOL -+ LOW -+ UPP ++ FOL ++ LOW ++ UPP + + SOFOFROM abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + SOFOTO ebctefghejklnnepkrstevvkesebctefghejklnnepkrstevvkeseeeeeeeceeeeeeeedneeeeeeeeeeepseeeeeeeeceeeeeeeedneeeeeeeeeeep?
--- a/runtime/spell/yi/yi_tr.diff +++ b/runtime/spell/yi/yi_tr.diff @@ -6,7 +6,7 @@ + 84608 gruntelement dzhobendiks -*** /dev/null Fri Aug 19 23:01:14 2005 +*** /dev/null Tue Aug 23 22:51:11 2005 --- yi_tr.aff Tue Aug 16 10:48:01 2005 *************** *** 0 ****
--- a/src/charset.c +++ b/src/charset.c @@ -1531,6 +1531,122 @@ vim_isxdigit(c) || (c >= 'A' && c <= 'F'); } +#if defined(FEAT_MBYTE) || defined(PROTO) +/* + * Vim's own character class functions. These exist because many library + * islower()/toupper() etc. do not work properly: they crash when used with + * invalid values or can't handle latin1 when the locale is C. + * Speed is most important here. + */ +#define LATIN1LOWER 'l' +#define LATIN1UPPER 'U' + +/* !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]%_'abcdefghijklmnopqrstuvwxyz{|}~ */ +static char_u latin1flags[256] = " UUUUUUUUUUUUUUUUUUUUUUUUUU llllllllllllllllllllllllll UUUUUUUUUUUUUUUUUUUUUUU UUUUUUUllllllllllllllllllllllll llllllll"; +static char_u latin1upper[256] = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~"; +static char_u latin1lower[256] = " !\"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; + + int +vim_islower(c) + int c; +{ + if (c <= '@') + return FALSE; + if (c >= 0x80) + { + if (enc_utf8) + return utf_islower(c); + if (c >= 0x100) + { +#ifdef HAVE_ISWLOWER + if (has_mbyte) + return iswlower(c); +#endif + /* islower() can't handle these chars and may crash */ + return FALSE; + } + if (enc_latin1like) + return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; + } + return islower(c); +} + + int +vim_isupper(c) + int c; +{ + if (c <= '@') + return FALSE; + if (c >= 0x80) + { + if (enc_utf8) + return utf_isupper(c); + if (c >= 0x100) + { +#ifdef HAVE_ISWUPPER + if (has_mbyte) + return iswupper(c); +#endif + /* islower() can't handle these chars and may crash */ + return FALSE; + } + if (enc_latin1like) + return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; + } + return isupper(c); +} + + int +vim_toupper(c) + int c; +{ + if (c <= '@') + return c; + if (c >= 0x80) + { + if (enc_utf8) + return utf_toupper(c); + if (c >= 0x100) + { +#ifdef HAVE_TOWUPPER + if (has_mbyte) + return towupper(c); +#endif + /* toupper() can't handle these chars and may crash */ + return c; + } + if (enc_latin1like) + return latin1upper[c]; + } + return TOUPPER_LOC(c); +} + + int +vim_tolower(c) + int c; +{ + if (c <= '@') + return c; + if (c >= 0x80) + { + if (enc_utf8) + return utf_tolower(c); + if (c >= 0x100) + { +#ifdef HAVE_TOWLOWER + if (has_mbyte) + return towlower(c); +#endif + /* tolower() can't handle these chars and may crash */ + return c; + } + if (enc_latin1like) + return latin1lower[c]; + } + return TOLOWER_LOC(c); +} +#endif + /* * skiptowhite: skip over text until ' ' or '\t' or NUL. */
--- a/src/macros.h +++ b/src/macros.h @@ -81,13 +81,13 @@ /* * MB_ISLOWER() and MB_ISUPPER() are to be used on multi-byte characters. But - * don't use them for negative values. + * don't use them for negative values! */ #ifdef FEAT_MBYTE -# define MB_ISLOWER(c) (enc_utf8 && (c) >= 0x80 ? utf_islower(c) : (has_mbyte && c > 255 ? FALSE : islower(c))) -# define MB_ISUPPER(c) (enc_utf8 && (c) >= 0x80 ? utf_isupper(c) : (has_mbyte && c > 255 ? FALSE : isupper(c))) -# define MB_TOLOWER(c) (enc_utf8 && (c) >= 0x80 ? utf_tolower(c) : (has_mbyte && c > 255 ? c : TOLOWER_LOC(c))) -# define MB_TOUPPER(c) (enc_utf8 && (c) >= 0x80 ? utf_toupper(c) : (has_mbyte && c > 255 ? c : TOUPPER_LOC(c))) +# define MB_ISLOWER(c) vim_islower(c) +# define MB_ISUPPER(c) vim_isupper(c) +# define MB_TOLOWER(c) vim_tolower(c) +# define MB_TOUPPER(c) vim_toupper(c) #else # define MB_ISLOWER(c) islower(c) # define MB_ISUPPER(c) isupper(c)
--- a/src/mbyte.c +++ b/src/mbyte.c @@ -584,6 +584,10 @@ codepage_invalid: enc_latin9 = (STRCMP(p_enc, "iso-8859-15") == 0); #endif + /* Detect an encoding that uses latin1 characters. */ + enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0 + || STRCMP(p_enc, "iso-8859-15") == 0); + /* * Set the function pointers. */
--- a/src/os_macosx.c +++ b/src/os_macosx.c @@ -85,6 +85,7 @@ get_textencoding(const char* encodename) {"iso-8859-8", kTextEncodingISOLatinHebrew}, {"iso-8859-9", kTextEncodingISOLatin5}, {"iso-8859-10", kTextEncodingISOLatin6}, + {"iso-8859-15", kTextEncodingISOLatin9}, /* Unicode encodings. */ /* TODO: Add other type of unicode */
--- a/src/po/Makefile +++ b/src/po/Makefile @@ -26,8 +26,7 @@ LANGUAGES = \ zh_CN \ zh_CN.UTF-8 \ zh_TW \ - zh_TW.UTF-8 \ -# end marker + zh_TW.UTF-8 MOFILES = \ af.mo \ @@ -51,8 +50,7 @@ MOFILES = \ zh_CN.UTF-8.mo \ zh_CN.mo \ zh_TW.UTF-8.mo \ - zh_TW.mo \ -# end marker + zh_TW.mo CONVERTED = \ cs.cp1250.mo \ @@ -61,8 +59,7 @@ CONVERTED = \ ru.cp1251.mo \ sk.cp1250.mo \ uk.cp1251.mo \ - zh_CN.cp936.mo \ -# end marker + zh_CN.cp936.mo CHECKFILES = \ af.ck \ @@ -93,8 +90,7 @@ CHECKFILES = \ ru.cp1251.ck \ sk.cp1250.ck \ uk.cp1251.ck \ - zh_CN.cp936.ck \ -# end marker + zh_CN.cp936.ck PACKAGE = vim SHELL = /bin/sh
--- a/src/proto/charset.pro +++ b/src/proto/charset.pro @@ -38,6 +38,10 @@ char_u *skiptodigit __ARGS((char_u *p)); char_u *skiptohex __ARGS((char_u *p)); int vim_isdigit __ARGS((int c)); int vim_isxdigit __ARGS((int c)); +int vim_islower __ARGS((int c)); +int vim_isupper __ARGS((int c)); +int vim_toupper __ARGS((int c)); +int vim_tolower __ARGS((int c)); char_u *skiptowhite __ARGS((char_u *p)); char_u *skiptowhite_esc __ARGS((char_u *p)); long getdigits __ARGS((char_u **pp));
--- a/src/regexp.c +++ b/src/regexp.c @@ -742,7 +742,7 @@ reg_equi_class(c) { #ifdef FEAT_MBYTE if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 - || STRCMP(p_enc, "latin9") == 0) + || STRCMP(p_enc, "iso-8859-15") == 0) #endif { switch (c)
--- a/src/spell.c +++ b/src/spell.c @@ -162,6 +162,8 @@ * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by * slashes. * + * sectionID == SN_NOBREAK: (empty, its presence is enough) + * * sectionID == SN_SYLLABLE: <syllable> * <syllable> N bytes String from SYLLABLE item. * @@ -378,6 +380,7 @@ struct slang_S * (NULL when no compounding) */ char_u *sl_compstartflags; /* flags for first compound word */ char_u *sl_compallflags; /* all flags for compound words */ + char_u sl_nobreak; /* When TRUE: no spaces between words */ char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ garray_T sl_syl_items; /* syllable items */ @@ -442,6 +445,7 @@ typedef struct langp_S #define SN_MAP 7 /* MAP items section */ #define SN_COMPOUND 8 /* compound words section */ #define SN_SYLLABLE 9 /* syllable section */ +#define SN_NOBREAK 10 /* NOBREAK section */ #define SN_END 255 /* end of sections */ #define SNF_REQUIRED 1 /* <sectionflags>: required section */ @@ -560,6 +564,10 @@ typedef struct matchinf_S int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ buf_T *mi_buf; /* buffer being checked */ + + /* for NOBREAK */ + int mi_result2; /* "mi_resul" without following word */ + char_u *mi_end2; /* "mi_end" without following word */ } matchinf_T; /* @@ -638,6 +646,7 @@ typedef struct trystate_S #endif char_u ts_prewordlen; /* length of word in "preword[]" */ char_u ts_splitoff; /* index in "tword" after last split */ + char_u ts_splitfidx; /* "ts_fidx" at word split */ char_u ts_complen; /* nr of compound words used */ char_u ts_compsplit; /* index for "compflags" where word was spit */ char_u ts_save_badflags; /* su_badflags saved here */ @@ -887,6 +896,7 @@ spell_check(wp, ptr, attrp, capcol) /* The word is bad unless we recognize it. */ mi.mi_result = SP_BAD; + mi.mi_result2 = SP_BAD; /* * Loop over the languages specified in 'spelllang'. @@ -904,6 +914,15 @@ spell_check(wp, ptr, attrp, capcol) /* Check for matching prefixes. */ find_prefix(&mi, FIND_FOLDWORD); + + /* For a NOBREAK language, may want to use a word without a following + * word as a backup. */ + if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD + && mi.mi_result2 != SP_BAD) + { + mi.mi_result = mi.mi_result2; + mi.mi_end = mi.mi_end2; + } } if (mi.mi_result != SP_OK) @@ -941,6 +960,33 @@ spell_check(wp, ptr, attrp, capcol) /* Always include at least one character. Required for when there * is a mixup in "midword". */ mb_ptr_adv(mi.mi_end); + else if (mi.mi_result == SP_BAD + && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) + { + char_u *p, *fp; + int save_result = mi.mi_result; + + /* First language in 'spelllang' is NOBREAK. Find first position + * at which any word would be valid. */ + mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); + p = mi.mi_word; + fp = mi.mi_fword; + for (;;) + { + mb_ptr_adv(p); + mb_ptr_adv(fp); + if (p >= mi.mi_end) + break; + mi.mi_compoff = fp - mi.mi_fword; + find_word(&mi, FIND_COMPOUND); + if (mi.mi_result != SP_BAD) + { + mi.mi_end = p; + break; + } + } + mi.mi_result = save_result; + } if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) *attrp = highlight_attr[HLF_SPB]; @@ -995,6 +1041,7 @@ find_word(mip, mode) idx_T *idxs; int word_ends; int prefix_found; + int nobreak_result; if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) { @@ -1137,7 +1184,7 @@ find_word(mip, mode) #endif if (spell_iswordp(ptr + wlen, mip->mi_buf)) { - if (slang->sl_compprog == NULL) + if (slang->sl_compprog == NULL && !slang->sl_nobreak) continue; /* next char is a word character */ word_ends = FALSE; } @@ -1210,8 +1257,20 @@ find_word(mip, mode) prefix_found = TRUE; } - if (mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND - || !word_ends) + if (slang->sl_nobreak) + { + if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) + && (flags & WF_BANNED) == 0) + { + /* NOBREAK: found a valid following word. That's all we + * need to know, so return. */ + mip->mi_result = SP_OK; + break; + } + } + + else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND + || !word_ends)) { /* If there is no flag or the word is shorter than * COMPOUNDMIN reject it quickly. @@ -1295,11 +1354,20 @@ find_word(mip, mode) } } + nobreak_result = SP_OK; + if (!word_ends) { - /* Check that a valid word follows. If there is one, it will - * set "mi_result", thus we are always finished here. + int save_result = mip->mi_result; + char_u *save_end = mip->mi_end; + + /* Check that a valid word follows. If there is one and we + * are compounding, it will set "mi_result", thus we are + * always finished here. For NOBREAK we only check that a + * valid word follows. * Recursive! */ + if (slang->sl_nobreak) + mip->mi_result = SP_BAD; /* Find following word in case-folded tree. */ mip->mi_compoff = endlen[endidxcnt]; @@ -1323,18 +1391,36 @@ find_word(mip, mode) ++mip->mi_complen; find_word(mip, FIND_COMPOUND); - /* Find following word in keep-case tree. */ - mip->mi_compoff = wlen; - find_word(mip, FIND_KEEPCOMPOUND); - - /* Check for following word with prefix. */ - mip->mi_compoff = c; - find_prefix(mip, FIND_COMPOUND); + /* When NOBREAK any word that matches is OK. Otherwise we + * need to find the longest match, thus try with keep-case and + * prefix too. */ + if (!slang->sl_nobreak || mip->mi_result == SP_BAD) + { + /* Find following word in keep-case tree. */ + mip->mi_compoff = wlen; + find_word(mip, FIND_KEEPCOMPOUND); + + if (!slang->sl_nobreak || mip->mi_result == SP_BAD) + { + /* Check for following word with prefix. */ + mip->mi_compoff = c; + find_prefix(mip, FIND_COMPOUND); + } + } --mip->mi_complen; - if (mip->mi_result == SP_OK) - break; - continue; + if (slang->sl_nobreak) + { + nobreak_result = mip->mi_result; + mip->mi_result = save_result; + mip->mi_end = save_end; + } + else + { + if (mip->mi_result == SP_OK) + break; + continue; + } } if (flags & WF_BANNED) @@ -1352,8 +1438,21 @@ find_word(mip, mode) else res = SP_OK; - /* Always use the longest match and the best result. */ - if (mip->mi_result > res) + /* Always use the longest match and the best result. For NOBREAK + * we separately keep the longest match without a following good + * word as a fall-back. */ + if (nobreak_result == SP_BAD) + { + if (mip->mi_result2 > res) + { + mip->mi_result2 = res; + mip->mi_end2 = mip->mi_word + wlen; + } + else if (mip->mi_result2 == res + && mip->mi_end2 < mip->mi_word + wlen) + mip->mi_end2 = mip->mi_word + wlen; + } + else if (mip->mi_result > res) { mip->mi_result = res; mip->mi_end = mip->mi_word + wlen; @@ -1361,11 +1460,11 @@ find_word(mip, mode) else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) mip->mi_end = mip->mi_word + wlen; - if (res == SP_OK) + if (mip->mi_result == SP_OK) break; } - if (res == SP_OK) + if (mip->mi_result == SP_OK) break; } } @@ -2224,6 +2323,10 @@ spell_load_file(fname, lang, old_lp, sil res = read_compound(fd, lp, len); break; + case SN_NOBREAK: + lp->sl_nobreak = TRUE; + break; + case SN_SYLLABLE: lp->sl_syllable = read_string(fd, len); /* <syllable> */ if (lp->sl_syllable == NULL) @@ -3951,6 +4054,7 @@ typedef struct spellinfo_S int si_compminlen; /* minimal length for compounding */ int si_compsylmax; /* max nr of syllables for compounding */ char_u *si_compflags; /* flags used for compounding */ + char_u si_nobreak; /* NOBREAK */ char_u *si_syllable; /* syllable string */ garray_T si_prefcond; /* table with conditions for postponed * prefixes, each stored as a string */ @@ -4362,6 +4466,10 @@ spell_read_aff(spin, fname) { aff->af_syllable = getroom_save(spin, items[1]); } + else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) + { + spin->si_nobreak = TRUE; + } else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) { aff->af_pfxpostpone = TRUE; @@ -6441,6 +6549,16 @@ write_vim_spell(spin, fname) fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd); } + /* SN_NOBREAK: NOBREAK flag */ + if (spin->si_nobreak) + { + putc(SN_NOBREAK, fd); /* <sectionID> */ + putc(0, fd); /* <sectionflags> */ + + /* It's empty, the precense of the section flags the feature. */ + put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ + } + /* SN_SYLLABLE: syllable info. * We don't mark it required, when not supported syllables will not be * counted. */ @@ -6861,6 +6979,9 @@ mkspell(fcount, fnames, ascii, overwrite #endif } + if (spin.si_compflags != NULL && spin.si_nobreak) + MSG(_("Warning: both compounding and NOBREAK specified")); + if (!error) { /* @@ -8360,7 +8481,20 @@ allcap_copy(word, wcopy) else #endif c = *s++; - c = SPELL_TOUPPER(c); + +#ifdef FEAT_MBYTE + /* We only change to SS when we are certain latin1 is used. It + * would cause weird errors in other 8-bit encodings. */ + if (enc_latin1like && c == 0xdf) + { + c = 'S'; + if (d - wcopy >= MAXWLEN - 1) + break; + *d++ = c; + } + else +#endif + c = SPELL_TOUPPER(c); #ifdef FEAT_MBYTE if (has_mbyte) @@ -8629,29 +8763,52 @@ suggest_try_change(su) if (sp->ts_complen > sp->ts_compsplit) { - /* There was a compound word before this word. If this - * word does not support compounding then give up - * (splitting is tried for the word without compound - * flag). */ - if (((unsigned)flags >> 24) == 0 - || sp->ts_twordlen - sp->ts_splitoff + if (slang->sl_nobreak) + { + /* There was a word before this word. When there was + * no change in this word (it was correct) add the + * first word as a suggestion. If this word was + * corrected too, we need to check if a correct word + * follows. */ + if (sp->ts_fidx - sp->ts_splitfidx + == sp->ts_twordlen - sp->ts_splitoff + && STRNCMP(fword + sp->ts_splitfidx, + tword + sp->ts_splitoff, + sp->ts_fidx - sp->ts_splitfidx) == 0) + { + preword[sp->ts_prewordlen] = NUL; + add_suggestion(su, &su->su_ga, preword, + sp->ts_splitfidx - repextra, + sp->ts_score, 0, FALSE); + break; + } + } + else + { + /* There was a compound word before this word. If + * this word does not support compounding then give up + * (splitting is tried for the word without compound + * flag). */ + if (((unsigned)flags >> 24) == 0 + || sp->ts_twordlen - sp->ts_splitoff < slang->sl_compminlen) - break; - compflags[sp->ts_complen] = ((unsigned)flags >> 24); - compflags[sp->ts_complen + 1] = NUL; - vim_strncpy(preword + sp->ts_prewordlen, - tword + sp->ts_splitoff, - sp->ts_twordlen - sp->ts_splitoff); - p = preword; - while (*skiptowhite(p) != NUL) - p = skipwhite(skiptowhite(p)); - if (fword_ends && !can_compound(slang, p, + break; + compflags[sp->ts_complen] = ((unsigned)flags >> 24); + compflags[sp->ts_complen + 1] = NUL; + vim_strncpy(preword + sp->ts_prewordlen, + tword + sp->ts_splitoff, + sp->ts_twordlen - sp->ts_splitoff); + p = preword; + while (*skiptowhite(p) != NUL) + p = skipwhite(skiptowhite(p)); + if (fword_ends && !can_compound(slang, p, compflags + sp->ts_compsplit)) - break; - - /* Get pointer to last char of previous word. */ - p = preword + sp->ts_prewordlen; - mb_ptr_back(preword, p); + break; + + /* Get pointer to last char of previous word. */ + p = preword + sp->ts_prewordlen; + mb_ptr_back(preword, p); + } } else p = NULL; @@ -8753,6 +8910,8 @@ suggest_try_change(su) * If the word allows compounding try that. Otherwise * try a split by inserting a space. For both check * that a valid words starts at fword[sp->ts_fidx]. + * For NOBREAK do like compounding to be able to check + * if the next word is valid. * 2. The badword does end, but it was due to a change * (e.g., a swap). No need to split, but do check that * the following word is valid. @@ -8775,10 +8934,15 @@ suggest_try_change(su) compflags[sp->ts_complen + 1] = NUL; } + /* For NOBREAK we never try splitting, it won't make any + * word valid. */ + if (slang->sl_nobreak) + try_compound = TRUE; + /* If we could add a compound word, and it's also possible * to split at this point, do the split first and set * TSF_DIDSPLIT to avoid doing it again. */ - if (!fword_ends + else if (!fword_ends && try_compound && (sp->ts_flags & TSF_DIDSPLIT) == 0) { @@ -8818,6 +8982,7 @@ suggest_try_change(su) STRCAT(preword, " "); sp->ts_prewordlen = STRLEN(preword); sp->ts_splitoff = sp->ts_twordlen; + sp->ts_splitfidx = sp->ts_fidx; /* If the badword has a non-word character at this * position skip it. That means replacing the