# HG changeset patch # User Bram Moolenaar # Date 1384227841 -3600 # Node ID 853a76c7fdedefbc1dfdc2a4896d37a3806500e2 # Parent 63314ae20838108bae2e1379084ead429c444ab7 updated for version 7.4.088 Problem: When spell checking is enabled Asian characters are always marked as error. Solution: When 'spelllang' contains "cjk" do not mark Asian characters as error. (Ken Takata) diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -6555,6 +6555,9 @@ A jump table for the options with a shor region by listing them: "en_us,en_ca" supports both US and Canadian English, but not words specific for Australia, New Zealand or Great Britain. + If the name "cjk" is included East Asian characters are excluded from + spell checking. This is useful when editing text that also has Asian + words. *E757* As a special case the name of a .spl file can be given as-is. The first "_xx" in the name is removed and used as the region name diff --git a/runtime/doc/spell.txt b/runtime/doc/spell.txt --- a/runtime/doc/spell.txt +++ b/runtime/doc/spell.txt @@ -269,6 +269,13 @@ In a table: latin1 yi transliterated Yiddish utf-8 yi-tr transliterated Yiddish + *spell-cjk* +Chinese, Japanese and other East Asian characters are normally marked as +errors, because spell checking of these characters is not supported. If +'spelllang' includes "cjk", these characters are not marked as errors. This +is useful when editing text with spell checking while some Asian words are +present. + SPELL FILES *spell-load* diff --git a/src/mbyte.c b/src/mbyte.c --- a/src/mbyte.c +++ b/src/mbyte.c @@ -947,8 +947,8 @@ dbcs_class(lead, trail) { case 0x2121: /* ZENKAKU space */ return 0; - case 0x2122: /* KU-TEN (Japanese comma) */ - case 0x2123: /* TOU-TEN (Japanese period) */ + case 0x2122: /* TOU-TEN (Japanese comma) */ + case 0x2123: /* KU-TEN (Japanese period) */ case 0x2124: /* ZENKAKU comma */ case 0x2125: /* ZENKAKU period */ return 1; @@ -2477,9 +2477,9 @@ utf_class(c) /* sorted list of non-overlapping intervals */ static struct clinterval { - unsigned short first; - unsigned short last; - unsigned short class; + unsigned int first; + unsigned int last; + unsigned int class; } classes[] = { {0x037e, 0x037e, 1}, /* Greek question mark */ @@ -2544,6 +2544,10 @@ utf_class(c) {0xff1a, 0xff20, 1}, /* half/fullwidth ASCII */ {0xff3b, 0xff40, 1}, /* half/fullwidth ASCII */ {0xff5b, 0xff65, 1}, /* half/fullwidth ASCII */ + {0x20000, 0x2a6df, 0x4e00}, /* CJK Ideographs */ + {0x2a700, 0x2b73f, 0x4e00}, /* CJK Ideographs */ + {0x2b740, 0x2b81f, 0x4e00}, /* CJK Ideographs */ + {0x2f800, 0x2fa1f, 0x4e00}, /* CJK Ideographs */ }; int bot = 0; int top = sizeof(classes) / sizeof(struct clinterval) - 1; @@ -2563,9 +2567,9 @@ utf_class(c) while (top >= bot) { mid = (bot + top) / 2; - if (classes[mid].last < c) + if (classes[mid].last < (unsigned int)c) bot = mid + 1; - else if (classes[mid].first > c) + else if (classes[mid].first > (unsigned int)c) top = mid - 1; else return (int)classes[mid].class; diff --git a/src/option.c b/src/option.c --- a/src/option.c +++ b/src/option.c @@ -7122,6 +7122,11 @@ did_set_string_option(opt_idx, varp, new if (varp == &(curwin->w_s->b_p_spl)) { char_u fname[200]; + char_u *q = curwin->w_s->b_p_spl; + + /* Skip the first name if it is "cjk". */ + if (STRNCMP(q, "cjk,", 4) == 0) + q += 4; /* * Source the spell/LANG.vim in 'runtimepath'. @@ -7129,11 +7134,10 @@ did_set_string_option(opt_idx, varp, new * Use the first name in 'spelllang' up to '_region' or * '.encoding'. */ - for (p = curwin->w_s->b_p_spl; *p != NUL; ++p) + for (p = q; *p != NUL; ++p) if (vim_strchr((char_u *)"_.,", *p) != NULL) break; - vim_snprintf((char *)fname, 200, "spell/%.*s.vim", - (int)(p - curwin->w_s->b_p_spl), curwin->w_s->b_p_spl); + vim_snprintf((char *)fname, 200, "spell/%.*s.vim", (int)(p - q), q); source_runtime(fname, TRUE); } #endif diff --git a/src/spell.c b/src/spell.c --- a/src/spell.c +++ b/src/spell.c @@ -754,9 +754,9 @@ static int did_set_spelltab; static void clear_spell_chartab __ARGS((spelltab_T *sp)); static int set_spell_finish __ARGS((spelltab_T *new_st)); static int spell_iswordp __ARGS((char_u *p, win_T *wp)); -static int spell_iswordp_nmw __ARGS((char_u *p)); -#ifdef FEAT_MBYTE -static int spell_mb_isword_class __ARGS((int cl)); +static int spell_iswordp_nmw __ARGS((char_u *p, win_T *wp)); +#ifdef FEAT_MBYTE +static int spell_mb_isword_class __ARGS((int cl, win_T *wp)); static int spell_iswordp_w __ARGS((int *p, win_T *wp)); #endif static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); @@ -1149,7 +1149,7 @@ spell_check(wp, ptr, attrp, capcol, doco /* When we are at a non-word character there is no error, just * skip over the character (try looking for a word after it). */ - else if (!spell_iswordp_nmw(ptr)) + else if (!spell_iswordp_nmw(ptr, wp)) { if (capcol != NULL && wp->w_s->b_cap_prog != NULL) { @@ -1561,7 +1561,7 @@ find_word(mip, mode) * accept a no-caps word, even when the dictionary * word specifies ONECAP. */ mb_ptr_back(mip->mi_word, p); - if (spell_iswordp_nmw(p) + if (spell_iswordp_nmw(p, mip->mi_win) ? capflags == WF_ONECAP : (flags & WF_ONECAP) != 0 && capflags != WF_ONECAP) @@ -4234,7 +4234,9 @@ did_set_spelllang(wp) if (spl_copy == NULL) goto theend; - /* loop over comma separated language names. */ + wp->w_s->b_cjk = 0; + + /* Loop over comma separated language names. */ for (splp = spl_copy; *splp != NUL; ) { /* Get one language name. */ @@ -4242,6 +4244,12 @@ did_set_spelllang(wp) region = NULL; len = (int)STRLEN(lang); + if (STRCMP(lang, "cjk") == 0) + { + wp->w_s->b_cjk = 1; + continue; + } + /* If the name ends in ".spl" use it as the name of the spell file. * If there is a region name let "region" point to it and remove it * from the name. */ @@ -4601,7 +4609,7 @@ captype(word, end) int past_second = FALSE; /* past second word char */ /* find first letter */ - for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) + for (p = word; !spell_iswordp_nmw(p, curwin); mb_ptr_adv(p)) if (end == NULL ? *p == NUL : p >= end) return 0; /* only non-word characters, illegal word */ #ifdef FEAT_MBYTE @@ -4617,7 +4625,7 @@ captype(word, end) * But a word with an upper char only at start is a ONECAP. */ for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) - if (spell_iswordp_nmw(p)) + if (spell_iswordp_nmw(p, curwin)) { c = PTR2CHAR(p); if (!SPELL_ISUPPER(c)) @@ -9907,7 +9915,7 @@ spell_iswordp(p, wp) c = mb_ptr2char(s); if (c > 255) - return spell_mb_isword_class(mb_get_class(s)); + return spell_mb_isword_class(mb_get_class(s), wp); return spelltab.st_isw[c]; } #endif @@ -9920,8 +9928,9 @@ spell_iswordp(p, wp) * Unlike spell_iswordp() this doesn't check for "midword" characters. */ static int -spell_iswordp_nmw(p) +spell_iswordp_nmw(p, wp) char_u *p; + win_T *wp; { #ifdef FEAT_MBYTE int c; @@ -9930,7 +9939,7 @@ spell_iswordp_nmw(p) { c = mb_ptr2char(p); if (c > 255) - return spell_mb_isword_class(mb_get_class(p)); + return spell_mb_isword_class(mb_get_class(p), wp); return spelltab.st_isw[c]; } #endif @@ -9942,11 +9951,16 @@ spell_iswordp_nmw(p) * Return TRUE if word class indicates a word character. * Only for characters above 255. * Unicode subscript and superscript are not considered word characters. - */ - static int -spell_mb_isword_class(cl) - int cl; -{ + * See also dbcs_class() and utf_class() in mbyte.c. + */ + static int +spell_mb_isword_class(cl, wp) + int cl; + win_T *wp; +{ + if (wp->w_s->b_cjk) + /* East Asian characters are not considered word characters. */ + return cl == 2 || cl == 0x2800; return cl >= 2 && cl != 0x2070 && cl != 0x2080; } @@ -9971,9 +9985,10 @@ spell_iswordp_w(p, wp) if (*s > 255) { if (enc_utf8) - return spell_mb_isword_class(utf_class(*s)); + return spell_mb_isword_class(utf_class(*s), wp); if (enc_dbcs) - return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; + return spell_mb_isword_class( + dbcs_class((unsigned)*s >> 8, *s & 0xff), wp); return 0; } return spelltab.st_isw[*s]; @@ -10193,13 +10208,13 @@ spell_suggest(count) line = ml_get_curline(); p = line + curwin->w_cursor.col; /* Backup to before start of word. */ - while (p > line && spell_iswordp_nmw(p)) + while (p > line && spell_iswordp_nmw(p, curwin)) mb_ptr_back(line, p); /* Forward to start of word. */ - while (*p != NUL && !spell_iswordp_nmw(p)) + while (*p != NUL && !spell_iswordp_nmw(p, curwin)) mb_ptr_adv(p); - if (!spell_iswordp_nmw(p)) /* No word found. */ + if (!spell_iswordp_nmw(p, curwin)) /* No word found. */ { beep_flush(); return; @@ -10436,7 +10451,7 @@ check_need_cap(lnum, col) for (;;) { mb_ptr_back(line, p); - if (p == line || spell_iswordp_nmw(p)) + if (p == line || spell_iswordp_nmw(p, curwin)) break; if (vim_regexec(®match, p, 0) && regmatch.endp[0] == line + endcol) @@ -11645,7 +11660,7 @@ suggest_trie_walk(su, lp, fword, soundfo /* When appending a compound word after a word character don't * use Onecap. */ - if (p != NULL && spell_iswordp_nmw(p)) + if (p != NULL && spell_iswordp_nmw(p, curwin)) c &= ~WF_ONECAP; make_case_word(tword + sp->ts_splitoff, preword + sp->ts_prewordlen, c); @@ -11895,7 +11910,8 @@ suggest_trie_walk(su, lp, fword, soundfo * character when the word ends. But only when the * good word can end. */ if (((!try_compound && !spell_iswordp_nmw(fword - + sp->ts_fidx)) + + sp->ts_fidx, + curwin)) || fword_ends) && fword[sp->ts_fidx] != NUL && goodword_ends) @@ -14226,7 +14242,7 @@ spell_soundfold_sal(slang, inword, res) } else { - if (spell_iswordp_nmw(s)) + if (spell_iswordp_nmw(s, curwin)) *t++ = *s; ++s; } @@ -14521,7 +14537,7 @@ spell_soundfold_wsal(slang, inword, res) else { did_white = FALSE; - if (!spell_iswordp_nmw(t)) + if (!spell_iswordp_nmw(t, curwin)) continue; } } @@ -16045,7 +16061,7 @@ spell_word_start(startcol) for (p = line + startcol; p > line; ) { mb_ptr_back(line, p); - if (spell_iswordp_nmw(p)) + if (spell_iswordp_nmw(p, curwin)) break; } diff --git a/src/structs.h b/src/structs.h --- a/src/structs.h +++ b/src/structs.h @@ -1310,6 +1310,9 @@ typedef struct { regprog_T *b_cap_prog; /* program for 'spellcapcheck' */ char_u *b_p_spf; /* 'spellfile' */ char_u *b_p_spl; /* 'spelllang' */ +# ifdef FEAT_MBYTE + int b_cjk; /* all CJK letters as OK */ +# endif #endif #if !defined(FEAT_SYN_HL) && !defined(FEAT_SPELL) int dummy; diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -739,6 +739,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 88, +/**/ 87, /**/ 86,