# HG changeset patch # User Bram Moolenaar # Date 1568407504 -7200 # Node ID 8b4f9be5db73467417c510a36c4c8eabbde75212 # Parent 168f1eca04a2012731fd7efc9eabbc6d9bba6748 patch 8.1.2027: MS-Windows: problem with ambiwidth characters Commit: https://github.com/vim/vim/commit/57da69816872d53038e8a7e8dd4dc39a31192f0d Author: Bram Moolenaar Date: Fri Sep 13 22:30:11 2019 +0200 patch 8.1.2027: MS-Windows: problem with ambiwidth characters Problem: MS-Windows: problem with ambiwidth characters. Solution: handle ambiguous width characters in ConPTY on Windows 10 (1903). (Nobuhiro Takasaki, closes #4411) diff --git a/src/Make_cyg_ming.mak b/src/Make_cyg_ming.mak --- a/src/Make_cyg_ming.mak +++ b/src/Make_cyg_ming.mak @@ -1192,7 +1192,8 @@ endif CCCTERM = $(CC) -c $(CFLAGS) -Ilibvterm/include -DINLINE="" \ -DVSNPRINTF=vim_vsnprintf \ -DIS_COMBINING_FUNCTION=utf_iscomposing_uint \ - -DWCWIDTH_FUNCTION=utf_uint2cells + -DWCWIDTH_FUNCTION=utf_uint2cells \ + -DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type $(OUTDIR)/%.o : libvterm/src/%.c $(TERM_DEPS) $(CCCTERM) $< -o $@ diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak --- a/src/Make_mvc.mak +++ b/src/Make_mvc.mak @@ -1716,6 +1716,7 @@ CCCTERM = $(CC) $(CFLAGS) -Ilibvterm/inc -DVSNPRINTF=vim_vsnprintf \ -DIS_COMBINING_FUNCTION=utf_iscomposing_uint \ -DWCWIDTH_FUNCTION=utf_uint2cells \ + -DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type \ -D_CRT_SECURE_NO_WARNINGS # Create a default rule for libvterm. diff --git a/src/libvterm/src/parser.c b/src/libvterm/src/parser.c --- a/src/libvterm/src/parser.c +++ b/src/libvterm/src/parser.c @@ -127,6 +127,9 @@ size_t vterm_input_write(VTerm *vt, cons size_t pos = 0; const char *string_start = NULL; // init to avoid gcc warning + vt->in_backspace = 0; // Count down with BS key and activate when + // it reaches 1 + switch(vt->parser.state) { case NORMAL: case CSI_LEADER: @@ -172,6 +175,13 @@ size_t vterm_input_write(VTerm *vt, cons // fallthrough } else if(c < 0x20) { // other C0 + if(vterm_get_special_pty_type() == 2) { + if(c == 0x08) // BS + // Set the trick for BS output after a sequence, to delay backspace + // activation + if(pos + 2 < len && bytes[pos + 1] == 0x20 && bytes[pos + 2] == 0x08) + vt->in_backspace = 2; // Trigger when count down to 1 + } if(vt->parser.state >= STRING) more_string(vt, string_start, bytes + pos - string_start); do_control(vt, c); diff --git a/src/libvterm/src/state.c b/src/libvterm/src/state.c --- a/src/libvterm/src/state.c +++ b/src/libvterm/src/state.c @@ -336,6 +336,11 @@ static int on_text(const char bytes[], s for( ; i < glyph_ends; i++) { int this_width; + if(vterm_get_special_pty_type() == 2) { + state->vt->in_backspace -= (state->vt->in_backspace > 0) ? 1 : 0; + if(state->vt->in_backspace == 1) + codepoints[i] = 0; // codepoints under this condition must be 0 + } chars[i - glyph_starts] = codepoints[i]; this_width = vterm_unicode_width(codepoints[i]); #ifdef DEBUG @@ -425,6 +430,12 @@ static int on_control(unsigned char cont VTermPos oldpos = state->pos; + VTermScreenCell cell; + + // Preparing to see the leading byte + VTermPos leadpos = state->pos; + leadpos.col -= (leadpos.col >= 2 ? 2 : 0); + switch(control) { case 0x07: // BEL - ECMA-48 8.3.3 if(state->callbacks && state->callbacks->bell) @@ -434,6 +445,12 @@ static int on_control(unsigned char cont case 0x08: // BS - ECMA-48 8.3.5 if(state->pos.col > 0) state->pos.col--; + if(vterm_get_special_pty_type() == 2) { + // In 2 cell letters, go back 2 cells + vterm_screen_get_cell(state->vt->screen, leadpos, &cell); + if(vterm_unicode_width(cell.chars[0]) == 2) + state->pos.col--; + } break; case 0x09: // HT - ECMA-48 8.3.60 @@ -1019,6 +1036,26 @@ static int on_csi(const char *leader, co row = CSI_ARG_OR(args[0], 1); col = argcount < 2 || CSI_ARG_IS_MISSING(args[1]) ? 1 : CSI_ARG(args[1]); // zero-based + if(vterm_get_special_pty_type() == 2) { + // Fix a sequence that is not correct right now + if(state->pos.row == row - 1) { + int cnt, ptr = 0; + for(cnt = 0; cnt < col - 1; ++cnt) { + VTermPos p; + VTermScreenCell c0, c1; + p.row = row - 1; + p.col = ptr; + vterm_screen_get_cell(state->vt->screen, p, &c0); + p.col++; + vterm_screen_get_cell(state->vt->screen, p, &c1); + ptr += (c1.chars[0] == (uint32_t)-1) // double cell? + ? (vterm_unicode_is_ambiguous(c0.chars[0])) // is ambiguous? + ? vterm_unicode_width(0x00a1) : 1 // &ambiwidth + : 1; // not ambiguous + } + col = ptr + 1; + } + } state->pos.row = row-1; state->pos.col = col-1; if(state->mode.origin) { diff --git a/src/libvterm/src/termscreen.c b/src/libvterm/src/termscreen.c --- a/src/libvterm/src/termscreen.c +++ b/src/libvterm/src/termscreen.c @@ -770,11 +770,28 @@ int vterm_screen_get_cell(const VTermScr cell->fg = intcell->pen.fg; cell->bg = intcell->pen.bg; - if(pos.col < (screen->cols - 1) && - getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) - cell->width = 2; - else - cell->width = 1; + if(vterm_get_special_pty_type() == 2) { + /* Get correct cell width from cell information contained in line buffer */ + if(pos.col < (screen->cols - 1) && + getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) { + if(getcell(screen, pos.row, pos.col)->chars[0] == 0x20) { + getcell(screen, pos.row, pos.col)->chars[0] = 0; + cell->width = 2; + } else if(getcell(screen, pos.row, pos.col)->chars[0] == 0) { + getcell(screen, pos.row, pos.col + 1)->chars[0] = 0; + cell->width = 1; + } else { + cell->width = 2; + } + } else + cell->width = 1; + } else { + if(pos.col < (screen->cols - 1) && + getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) + cell->width = 2; + else + cell->width = 1; + } return 1; } diff --git a/src/libvterm/src/unicode.c b/src/libvterm/src/unicode.c --- a/src/libvterm/src/unicode.c +++ b/src/libvterm/src/unicode.c @@ -68,12 +68,13 @@ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c */ -#if !defined(IS_COMBINING_FUNCTION) || !defined(WCWIDTH_FUNCTION) struct interval { int first; int last; }; +#if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION) + // sorted list of non-overlapping intervals of non-spacing characters // generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" // Replaced by the combining table from Vim. @@ -359,6 +360,7 @@ static const struct interval combining[] {0X1E944, 0X1E94A}, {0XE0100, 0XE01EF} }; +#endif // auxiliary function for binary search in interval table static int bisearch(uint32_t ucs, const struct interval *table, int max) { @@ -379,8 +381,6 @@ static int bisearch(uint32_t ucs, const return 0; } -#endif - /* The following two functions define the column width of an ISO 10646 * character as follows: @@ -478,6 +478,7 @@ static int mk_wcswidth(const uint32_t *p */ static int mk_wcwidth_cjk(uint32_t ucs) { +#endif /* sorted list of non-overlapping intervals of East Asian Ambiguous * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */ static const struct interval ambiguous[] = { @@ -534,6 +535,7 @@ static int mk_wcwidth_cjk(uint32_t ucs) { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF }, { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD } }; +#if 0 // binary search in table of non-spacing characters if (bisearch(ucs, ambiguous, @@ -557,6 +559,12 @@ static int mk_wcswidth_cjk(const uint32_ } #endif +INTERNAL int vterm_unicode_is_ambiguous(uint32_t codepoint) +{ + return (bisearch(codepoint, ambiguous, + sizeof(ambiguous) / sizeof(struct interval) - 1)) ? 1 : 0; +} + #ifdef IS_COMBINING_FUNCTION // Use a provided is_combining() function. int IS_COMBINING_FUNCTION(uint32_t codepoint); @@ -569,6 +577,17 @@ vterm_is_combining(uint32_t codepoint) } #endif +#ifdef GET_SPECIAL_PTY_TYPE_FUNCTION +int GET_SPECIAL_PTY_TYPE_FUNCTION(void); +#else +# define GET_SPECIAL_PTY_TYPE_FUNCTION vterm_get_special_pty_type_placeholder + static int +vterm_get_special_pty_type_placeholder(void) +{ + return 0; +} +#endif + // ################################ // ### The rest added by Paul Evans @@ -581,3 +600,8 @@ INTERNAL int vterm_unicode_is_combining( { return IS_COMBINING_FUNCTION(codepoint); } + +INTERNAL int vterm_get_special_pty_type(void) +{ + return GET_SPECIAL_PTY_TYPE_FUNCTION(); +} diff --git a/src/libvterm/src/vterm_internal.h b/src/libvterm/src/vterm_internal.h --- a/src/libvterm/src/vterm_internal.h +++ b/src/libvterm/src/vterm_internal.h @@ -212,6 +212,8 @@ struct VTerm VTermState *state; VTermScreen *screen; + + int in_backspace; }; struct VTermEncoding { @@ -259,5 +261,7 @@ VTermEncoding *vterm_lookup_encoding(VTe int vterm_unicode_width(uint32_t codepoint); int vterm_unicode_is_combining(uint32_t codepoint); +int vterm_unicode_is_ambiguous(uint32_t codepoint); +int vterm_get_special_pty_type(void); #endif diff --git a/src/misc2.c b/src/misc2.c --- a/src/misc2.c +++ b/src/misc2.c @@ -4601,3 +4601,22 @@ build_argv_from_list(list_T *l, char *** } # endif #endif + +/* + * Change the behavior of vterm. + * 0: As usual. + * 1: Windows 10 version 1809 + * The bug causes unstable handling of ambiguous width character. + * 2: Windows 10 version 1903 + * Use the wrong result because each result is different. + * 3: Windows 10 insider preview (current latest logic) + */ + int +get_special_pty_type(void) +{ +#ifdef MSWIN + return get_conpty_type(); +#else + return 0; +#endif +} diff --git a/src/os_win32.c b/src/os_win32.c --- a/src/os_win32.c +++ b/src/os_win32.c @@ -186,6 +186,7 @@ static int win32_setattrs(char_u *name, static int win32_set_archive(char_u *name); static int conpty_working = 0; +static int conpty_type = 0; static int conpty_stable = 0; static void vtp_flag_init(); @@ -7249,9 +7250,25 @@ mch_setenv(char *var, char *value, int x /* * Support for pseudo-console (ConPTY) was added in windows 10 - * version 1809 (October 2018 update). However, that version is unstable. + * version 1809 (October 2018 update). */ #define CONPTY_FIRST_SUPPORT_BUILD MAKE_VER(10, 0, 17763) + +/* + * ConPTY differences between versions, need different logic. + * version 1903 (May 2019 update). + */ +#define CONPTY_1903_BUILD MAKE_VER(10, 0, 18362) + +/* + * Confirm until this version. Also the logic changes. + * insider preview. + */ +#define CONPTY_INSIDER_BUILD MAKE_VER(10, 0, 18898) + +/* + * Not stable now. + */ #define CONPTY_STABLE_BUILD MAKE_VER(10, 0, 32767) // T.B.D. static void @@ -7281,6 +7298,12 @@ vtp_flag_init(void) if (ver >= CONPTY_STABLE_BUILD) conpty_stable = 1; + if (ver <= CONPTY_INSIDER_BUILD) + conpty_type = 3; + if (ver <= CONPTY_1903_BUILD) + conpty_type = 2; + if (ver < CONPTY_FIRST_SUPPORT_BUILD) + conpty_type = 1; } #if !defined(FEAT_GUI_MSWIN) || defined(VIMDLL) || defined(PROTO) @@ -7503,6 +7526,12 @@ has_conpty_working(void) } int +get_conpty_type(void) +{ + return conpty_type; +} + + int is_conpty_stable(void) { return conpty_stable; diff --git a/src/proto/misc2.pro b/src/proto/misc2.pro --- a/src/proto/misc2.pro +++ b/src/proto/misc2.pro @@ -106,4 +106,5 @@ void parse_queued_messages(void); int mch_parse_cmd(char_u *cmd, int use_shcf, char ***argv, int *argc); int build_argv_from_string(char_u *cmd, char ***argv, int *argc); int build_argv_from_list(list_T *l, char ***argv, int *argc); +int get_special_pty_type(void); /* vim: set ft=c : */ diff --git a/src/proto/os_win32.pro b/src/proto/os_win32.pro --- a/src/proto/os_win32.pro +++ b/src/proto/os_win32.pro @@ -76,6 +76,7 @@ int use_vtp(void); int is_term_win32(void); int has_vtp_working(void); int has_conpty_working(void); +int get_conpty_type(void); int is_conpty_stable(void); void resize_console_buf(void); /* vim: set ft=c : */ diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -758,6 +758,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 2027, +/**/ 2026, /**/ 2025,