Mercurial > vim

--- a/src/misc2.c
+++ b/src/misc2.c
@@ -1622,11 +1622,17 @@ strup_save(char_u *orig)
 		char_u	*s;

 		c = utf_ptr2char(p);
+		l = utf_ptr2len(p);
+		if (c == 0)
+		{
+		    /* overlong sequence, use only the first byte */
+		    c = *p;
+		    l = 1;
+		}
 		uc = utf_toupper(c);

 		/* Reallocate string when byte count changes.  This is rare,
 		 * thus it's OK to do another malloc()/free(). */
-		l = utf_ptr2len(p);
 		newl = utf_char2len(uc);
 		if (newl != l)
 		{
@@ -1685,11 +1691,17 @@ strlow_save(char_u *orig)
 		char_u	*s;

 		c = utf_ptr2char(p);
+		l = utf_ptr2len(p);
+		if (c == 0)
+		{
+		    /* overlong sequence, use only the first byte */
+		    c = *p;
+		    l = 1;
+		}
 		lc = utf_tolower(c);

 		/* Reallocate string when byte count changes.  This is rare,
 		 * thus it's OK to do another malloc()/free(). */
-		l = utf_ptr2len(p);
 		newl = utf_char2len(lc);
 		if (newl != l)
 		{
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -268,6 +268,11 @@ func Test_tolower()
   " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
   " in length (2 to 3 bytes) when lowercased. So let's test them.
   call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+
+  " This call to tolower with invalid utf8 sequence used to cause access to
+  " invalid memory.
+  call tolower("\xC0\x80\xC0")
+  call tolower("123\xC0\x80\xC0")
 endfunc

 func Test_toupper()
@@ -338,6 +343,11 @@ func Test_toupper()
   call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))

   call assert_equal("Ⱥ Ⱦ", toupper("ⱥ ⱦ"))
+
+  " This call to toupper with invalid utf8 sequence used to cause access to
+  " invalid memory.
+  call toupper("\xC0\x80\xC0")
+  call toupper("123\xC0\x80\xC0")
 endfunc

 " Tests for the mode() function
--- a/src/version.c
+++ b/src/version.c
@@ -772,6 +772,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    1421,
+/**/
     1420,
 /**/
     1419,