changeset 18064:8b4f9be5db73 v8.1.2027

patch 8.1.2027: MS-Windows: problem with ambiwidth characters Commit: https://github.com/vim/vim/commit/57da69816872d53038e8a7e8dd4dc39a31192f0d Author: Bram Moolenaar <Bram@vim.org> Date: Fri Sep 13 22:30:11 2019 +0200 patch 8.1.2027: MS-Windows: problem with ambiwidth characters Problem: MS-Windows: problem with ambiwidth characters. Solution: handle ambiguous width characters in ConPTY on Windows 10 (1903). (Nobuhiro Takasaki, closes #4411)
author Bram Moolenaar <Bram@vim.org>
date Fri, 13 Sep 2019 22:45:04 +0200
parents 168f1eca04a2
children 2e037541c5dd
files src/Make_cyg_ming.mak src/Make_mvc.mak src/libvterm/src/parser.c src/libvterm/src/state.c src/libvterm/src/termscreen.c src/libvterm/src/unicode.c src/libvterm/src/vterm_internal.h src/misc2.c src/os_win32.c src/proto/misc2.pro src/proto/os_win32.pro src/version.c
diffstat 12 files changed, 156 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/Make_cyg_ming.mak
+++ b/src/Make_cyg_ming.mak
@@ -1192,7 +1192,8 @@ endif
 CCCTERM = $(CC) -c $(CFLAGS) -Ilibvterm/include -DINLINE="" \
 	  -DVSNPRINTF=vim_vsnprintf \
 	  -DIS_COMBINING_FUNCTION=utf_iscomposing_uint \
-	  -DWCWIDTH_FUNCTION=utf_uint2cells
+	  -DWCWIDTH_FUNCTION=utf_uint2cells \
+	  -DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type
 
 $(OUTDIR)/%.o : libvterm/src/%.c $(TERM_DEPS)
 	$(CCCTERM) $< -o $@
--- a/src/Make_mvc.mak
+++ b/src/Make_mvc.mak
@@ -1716,6 +1716,7 @@ CCCTERM = $(CC) $(CFLAGS) -Ilibvterm/inc
 	-DVSNPRINTF=vim_vsnprintf \
 	-DIS_COMBINING_FUNCTION=utf_iscomposing_uint \
 	-DWCWIDTH_FUNCTION=utf_uint2cells \
+	-DGET_SPECIAL_PTY_TYPE_FUNCTION=get_special_pty_type \
 	-D_CRT_SECURE_NO_WARNINGS
 
 # Create a default rule for libvterm.
--- a/src/libvterm/src/parser.c
+++ b/src/libvterm/src/parser.c
@@ -127,6 +127,9 @@ size_t vterm_input_write(VTerm *vt, cons
   size_t pos = 0;
   const char *string_start = NULL;  // init to avoid gcc warning
 
+  vt->in_backspace = 0;		    // Count down with BS key and activate when
+				    // it reaches 1
+
   switch(vt->parser.state) {
   case NORMAL:
   case CSI_LEADER:
@@ -172,6 +175,13 @@ size_t vterm_input_write(VTerm *vt, cons
       // fallthrough
     }
     else if(c < 0x20) { // other C0
+      if(vterm_get_special_pty_type() == 2) {
+        if(c == 0x08) // BS
+          // Set the trick for BS output after a sequence, to delay backspace
+          // activation
+          if(pos + 2 < len && bytes[pos + 1] == 0x20 && bytes[pos + 2] == 0x08)
+            vt->in_backspace = 2; // Trigger when count down to 1
+      }
       if(vt->parser.state >= STRING)
         more_string(vt, string_start, bytes + pos - string_start);
       do_control(vt, c);
--- a/src/libvterm/src/state.c
+++ b/src/libvterm/src/state.c
@@ -336,6 +336,11 @@ static int on_text(const char bytes[], s
 
     for( ; i < glyph_ends; i++) {
       int this_width;
+      if(vterm_get_special_pty_type() == 2) {
+        state->vt->in_backspace -= (state->vt->in_backspace > 0) ? 1 : 0;
+        if(state->vt->in_backspace == 1)
+          codepoints[i] = 0; // codepoints under this condition must be 0
+      }
       chars[i - glyph_starts] = codepoints[i];
       this_width = vterm_unicode_width(codepoints[i]);
 #ifdef DEBUG
@@ -425,6 +430,12 @@ static int on_control(unsigned char cont
 
   VTermPos oldpos = state->pos;
 
+  VTermScreenCell cell;
+
+  // Preparing to see the leading byte
+  VTermPos leadpos = state->pos;
+  leadpos.col -= (leadpos.col >= 2 ? 2 : 0);
+
   switch(control) {
   case 0x07: // BEL - ECMA-48 8.3.3
     if(state->callbacks && state->callbacks->bell)
@@ -434,6 +445,12 @@ static int on_control(unsigned char cont
   case 0x08: // BS - ECMA-48 8.3.5
     if(state->pos.col > 0)
       state->pos.col--;
+    if(vterm_get_special_pty_type() == 2) {
+      // In 2 cell letters, go back 2 cells
+      vterm_screen_get_cell(state->vt->screen, leadpos, &cell);
+      if(vterm_unicode_width(cell.chars[0]) == 2)
+        state->pos.col--;
+    }
     break;
 
   case 0x09: // HT - ECMA-48 8.3.60
@@ -1019,6 +1036,26 @@ static int on_csi(const char *leader, co
     row = CSI_ARG_OR(args[0], 1);
     col = argcount < 2 || CSI_ARG_IS_MISSING(args[1]) ? 1 : CSI_ARG(args[1]);
     // zero-based
+    if(vterm_get_special_pty_type() == 2) {
+      // Fix a sequence that is not correct right now
+      if(state->pos.row == row - 1) {
+        int cnt, ptr = 0;
+        for(cnt = 0; cnt < col - 1; ++cnt) {
+	  VTermPos p;
+	  VTermScreenCell c0, c1;
+	  p.row = row - 1;
+	  p.col = ptr;
+	  vterm_screen_get_cell(state->vt->screen, p, &c0);
+	  p.col++;
+	  vterm_screen_get_cell(state->vt->screen, p, &c1);
+	  ptr += (c1.chars[0] == (uint32_t)-1)		    // double cell?
+	     ? (vterm_unicode_is_ambiguous(c0.chars[0]))    // is ambiguous?
+	     ? vterm_unicode_width(0x00a1) : 1		    // &ambiwidth
+	     : 1;					    // not ambiguous
+        }
+        col = ptr + 1;
+      }
+    }
     state->pos.row = row-1;
     state->pos.col = col-1;
     if(state->mode.origin) {
--- a/src/libvterm/src/termscreen.c
+++ b/src/libvterm/src/termscreen.c
@@ -770,11 +770,28 @@ int vterm_screen_get_cell(const VTermScr
   cell->fg = intcell->pen.fg;
   cell->bg = intcell->pen.bg;
 
-  if(pos.col < (screen->cols - 1) &&
-     getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1)
-    cell->width = 2;
-  else
-    cell->width = 1;
+  if(vterm_get_special_pty_type() == 2) {
+    /* Get correct cell width from cell information contained in line buffer */
+    if(pos.col < (screen->cols - 1) &&
+       getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1) {
+      if(getcell(screen, pos.row, pos.col)->chars[0] == 0x20) {
+        getcell(screen, pos.row, pos.col)->chars[0] = 0;
+        cell->width = 2;
+      } else if(getcell(screen, pos.row, pos.col)->chars[0] == 0) {
+        getcell(screen, pos.row, pos.col + 1)->chars[0] = 0;
+        cell->width = 1;
+      } else {
+        cell->width = 2;
+      }
+    } else
+      cell->width = 1;
+  } else {
+    if(pos.col < (screen->cols - 1) &&
+       getcell(screen, pos.row, pos.col + 1)->chars[0] == (uint32_t)-1)
+      cell->width = 2;
+    else
+      cell->width = 1;
+  }
 
   return 1;
 }
--- a/src/libvterm/src/unicode.c
+++ b/src/libvterm/src/unicode.c
@@ -68,12 +68,13 @@
  * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
  */
 
-#if !defined(IS_COMBINING_FUNCTION) || !defined(WCWIDTH_FUNCTION)
 struct interval {
   int first;
   int last;
 };
 
+#if !defined(WCWIDTH_FUNCTION) || !defined(IS_COMBINING_FUNCTION)
+
 // sorted list of non-overlapping intervals of non-spacing characters
 // generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
 // Replaced by the combining table from Vim.
@@ -359,6 +360,7 @@ static const struct interval combining[]
 	{0X1E944, 0X1E94A},
 	{0XE0100, 0XE01EF}
 };
+#endif
 
 // auxiliary function for binary search in interval table
 static int bisearch(uint32_t ucs, const struct interval *table, int max) {
@@ -379,8 +381,6 @@ static int bisearch(uint32_t ucs, const 
 
   return 0;
 }
-#endif
-
 
 /* The following two functions define the column width of an ISO 10646
  * character as follows:
@@ -478,6 +478,7 @@ static int mk_wcswidth(const uint32_t *p
  */
 static int mk_wcwidth_cjk(uint32_t ucs)
 {
+#endif
   /* sorted list of non-overlapping intervals of East Asian Ambiguous
    * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
   static const struct interval ambiguous[] = {
@@ -534,6 +535,7 @@ static int mk_wcwidth_cjk(uint32_t ucs)
     { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
     { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
   };
+#if 0
 
   // binary search in table of non-spacing characters
   if (bisearch(ucs, ambiguous,
@@ -557,6 +559,12 @@ static int mk_wcswidth_cjk(const uint32_
 }
 #endif
 
+INTERNAL int vterm_unicode_is_ambiguous(uint32_t codepoint)
+{
+  return (bisearch(codepoint, ambiguous,
+               sizeof(ambiguous) / sizeof(struct interval) - 1)) ? 1 : 0;
+}
+
 #ifdef IS_COMBINING_FUNCTION
 // Use a provided is_combining() function.
 int IS_COMBINING_FUNCTION(uint32_t codepoint);
@@ -569,6 +577,17 @@ vterm_is_combining(uint32_t codepoint)
 }
 #endif
 
+#ifdef GET_SPECIAL_PTY_TYPE_FUNCTION
+int GET_SPECIAL_PTY_TYPE_FUNCTION(void);
+#else
+# define GET_SPECIAL_PTY_TYPE_FUNCTION vterm_get_special_pty_type_placeholder
+	static int
+vterm_get_special_pty_type_placeholder(void)
+{
+  return 0;
+}
+#endif
+
 // ################################
 // ### The rest added by Paul Evans
 
@@ -581,3 +600,8 @@ INTERNAL int vterm_unicode_is_combining(
 {
   return IS_COMBINING_FUNCTION(codepoint);
 }
+
+INTERNAL int vterm_get_special_pty_type(void)
+{
+  return GET_SPECIAL_PTY_TYPE_FUNCTION();
+}
--- a/src/libvterm/src/vterm_internal.h
+++ b/src/libvterm/src/vterm_internal.h
@@ -212,6 +212,8 @@ struct VTerm
 
   VTermState *state;
   VTermScreen *screen;
+
+  int in_backspace;
 };
 
 struct VTermEncoding {
@@ -259,5 +261,7 @@ VTermEncoding *vterm_lookup_encoding(VTe
 
 int vterm_unicode_width(uint32_t codepoint);
 int vterm_unicode_is_combining(uint32_t codepoint);
+int vterm_unicode_is_ambiguous(uint32_t codepoint);
+int vterm_get_special_pty_type(void);
 
 #endif
--- a/src/misc2.c
+++ b/src/misc2.c
@@ -4601,3 +4601,22 @@ build_argv_from_list(list_T *l, char ***
 }
 # endif
 #endif
+
+/*
+ * Change the behavior of vterm.
+ * 0: As usual.
+ * 1: Windows 10 version 1809
+ *      The bug causes unstable handling of ambiguous width character.
+ * 2: Windows 10 version 1903
+ *      Use the wrong result because each result is different.
+ * 3: Windows 10 insider preview (current latest logic)
+ */
+    int
+get_special_pty_type(void)
+{
+#ifdef MSWIN
+    return get_conpty_type();
+#else
+    return 0;
+#endif
+}
--- a/src/os_win32.c
+++ b/src/os_win32.c
@@ -186,6 +186,7 @@ static int win32_setattrs(char_u *name, 
 static int win32_set_archive(char_u *name);
 
 static int conpty_working = 0;
+static int conpty_type = 0;
 static int conpty_stable = 0;
 static void vtp_flag_init();
 
@@ -7249,9 +7250,25 @@ mch_setenv(char *var, char *value, int x
 
 /*
  * Support for pseudo-console (ConPTY) was added in windows 10
- * version 1809 (October 2018 update).  However, that version is unstable.
+ * version 1809 (October 2018 update).
  */
 #define CONPTY_FIRST_SUPPORT_BUILD  MAKE_VER(10, 0, 17763)
+
+/*
+ * ConPTY differences between versions, need different logic.
+ * version 1903 (May 2019 update).
+ */
+#define CONPTY_1903_BUILD	    MAKE_VER(10, 0, 18362)
+
+/*
+ * Confirm until this version.  Also the logic changes.
+ * insider preview.
+ */
+#define CONPTY_INSIDER_BUILD	    MAKE_VER(10, 0, 18898)
+
+/*
+ * Not stable now.
+ */
 #define CONPTY_STABLE_BUILD	    MAKE_VER(10, 0, 32767)  // T.B.D.
 
     static void
@@ -7281,6 +7298,12 @@ vtp_flag_init(void)
     if (ver >= CONPTY_STABLE_BUILD)
 	conpty_stable = 1;
 
+    if (ver <= CONPTY_INSIDER_BUILD)
+	conpty_type = 3;
+    if (ver <= CONPTY_1903_BUILD)
+	conpty_type = 2;
+    if (ver < CONPTY_FIRST_SUPPORT_BUILD)
+	conpty_type = 1;
 }
 
 #if !defined(FEAT_GUI_MSWIN) || defined(VIMDLL) || defined(PROTO)
@@ -7503,6 +7526,12 @@ has_conpty_working(void)
 }
 
     int
+get_conpty_type(void)
+{
+    return conpty_type;
+}
+
+    int
 is_conpty_stable(void)
 {
     return conpty_stable;
--- a/src/proto/misc2.pro
+++ b/src/proto/misc2.pro
@@ -106,4 +106,5 @@ void parse_queued_messages(void);
 int mch_parse_cmd(char_u *cmd, int use_shcf, char ***argv, int *argc);
 int build_argv_from_string(char_u *cmd, char ***argv, int *argc);
 int build_argv_from_list(list_T *l, char ***argv, int *argc);
+int get_special_pty_type(void);
 /* vim: set ft=c : */
--- a/src/proto/os_win32.pro
+++ b/src/proto/os_win32.pro
@@ -76,6 +76,7 @@ int use_vtp(void);
 int is_term_win32(void);
 int has_vtp_working(void);
 int has_conpty_working(void);
+int get_conpty_type(void);
 int is_conpty_stable(void);
 void resize_console_buf(void);
 /* vim: set ft=c : */
--- a/src/version.c
+++ b/src/version.c
@@ -758,6 +758,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2027,
+/**/
     2026,
 /**/
     2025,