changeset 13766:74294f0fbf28 v8.0.1755

patch 8.0.1755: MS-Windows: high unicode char received as two utf-16 words commit https://github.com/vim/vim/commit/f1f2f837bc85c93ba81176383cccd6734f0129fb Author: Bram Moolenaar <Bram@vim.org> Date: Tue Apr 24 16:04:57 2018 +0200 patch 8.0.1755: MS-Windows: high unicode char received as two utf-16 words Problem: MS-Windows GUI: high unicode char received as two utf-16 words. Solution: Keep the first word until the second word is received. (Chris Morgan, closes #2800)
author Christian Brabandt <cb@256bit.org>
date Tue, 24 Apr 2018 16:15:10 +0200
parents 939befd4886c
children 02617aff148e
files src/gui_w32.c src/version.c
diffstat 2 files changed, 33 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/gui_w32.c
+++ b/src/gui_w32.c
@@ -494,9 +494,11 @@ static int	s_getting_focus = FALSE;
 static int	s_x_pending;
 static int	s_y_pending;
 static UINT	s_kFlags_pending;
-static UINT	s_wait_timer = 0;   /* Timer for get char from user */
+static UINT	s_wait_timer = 0;	  // Timer for get char from user
 static int	s_timed_out = FALSE;
-static int	dead_key = 0;	/* 0: no dead key, 1: dead key pressed */
+static int	dead_key = 0;		  // 0: no dead key, 1: dead key pressed
+static UINT	surrogate_pending_ch = 0; // 0: no surrogate pending,
+					  // else a high surrogate
 
 #ifdef FEAT_BEVAL_GUI
 /* balloon-eval WM_NOTIFY_HANDLER */
@@ -708,6 +710,12 @@ gui_mch_start_blink(void)
  * Convert Unicode character "ch" to bytes in "string[slen]".
  * When "had_alt" is TRUE the ALT key was included in "ch".
  * Return the length.
+ * Because the Windows API uses UTF-16, we have to deal with surrogate
+ * pairs; this is where we choose to deal with them: if "ch" is a high
+ * surrogate, it will be stored, and the length returned will be zero; the next
+ * char_to_string call will then include the high surrogate, decoding the pair
+ * of UTF-16 code units to a single Unicode code point, presuming it is the
+ * matching low surrogate.
  */
     static int
 char_to_string(int ch, char_u *string, int slen, int had_alt)
@@ -718,8 +726,27 @@ char_to_string(int ch, char_u *string, i
     WCHAR	wstring[2];
     char_u	*ws = NULL;
 
-    wstring[0] = ch;
-    len = 1;
+    if (surrogate_pending_ch != 0)
+    {
+	/* We don't guarantee ch is a low surrogate to match the high surrogate
+	 * we already have; it should be, but if it isn't, tough luck. */
+	wstring[0] = surrogate_pending_ch;
+	wstring[1] = ch;
+	surrogate_pending_ch = 0;
+	len = 2;
+    }
+    else if (ch >= 0xD800 && ch <= 0xDBFF)	/* high surrogate */
+    {
+	/* We don't have the entire code point yet, only the first UTF-16 code
+	 * unit; so just remember it and use it in the next call. */
+	surrogate_pending_ch = ch;
+	return 0;
+    }
+    else
+    {
+	wstring[0] = ch;
+	len = 1;
+    }
 
     /* "ch" is a UTF-16 character.  Convert it to a string of bytes.  When
      * "enc_codepage" is non-zero use the standard Win32 function,
@@ -743,7 +770,6 @@ char_to_string(int ch, char_u *string, i
     }
     else
     {
-	len = 1;
 	ws = utf16_to_enc(wstring, &len);
 	if (ws == NULL)
 	    len = 0;
--- a/src/version.c
+++ b/src/version.c
@@ -762,6 +762,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1755,
+/**/
     1754,
 /**/
     1753,