diff src/regexp.c @ 33284:0943c2a880cc v9.0.1908

patch 9.0.1908: undefined behaviour upper/lower function ptrs Commit: https://github.com/vim/vim/commit/d25021cf036c63d539f845a1ee05b03ea21d61ff Author: Yee Cheng Chin <ychin.git@gmail.com> Date: Mon Sep 18 19:51:56 2023 +0200 patch 9.0.1908: undefined behaviour upper/lower function ptrs Problem: undefined behaviour upper/lower function ptrs Solution: Fix UBSAN error in regexp and simplify upper/lowercase modifier code The implementation of \u / \U / \l / \L modifiers in the substitute command relies on remembering the state by setting function pointers on func_all/func_one in the code. The code signature of `fptr_T` is supposed to return void* (due to C function signatures not being able to return itself due to type recursion), and the definition of the functions (e.g. to_Upper) didn't follow this rule, and so the code tries to cast functions of different signatures, resulting in undefined behavior error under UBSAN in Clang 17. See #12745. We could just fix `do_Upper`/etc to just return void*, which would fix the problem. However, these functions actually do not need to return anything at all. It used to be the case that there was only one pointer "func" to store the pointer, which is why the function needs to either return itself or NULL to indicate whether it's a one time or ongoing modification. However, c2c355df6f094cdb9e599fd395a78c14486ec697 (7.3.873) already made that obsolete by introducing `func_one` and `func_all` to store one-time and ongoing operations separately, so these functions don't actually need to return anything anymore because it's implicit whether it's a one-time or ongoing operation. Simplify the code to reflect that. closes: #13117 Signed-off-by: Christian Brabandt <cb@256bit.org> Co-authored-by: Yee Cheng Chin <ychin.git@gmail.com>
author Christian Brabandt <cb@256bit.org>
date Mon, 18 Sep 2023 20:00:09 +0200
parents 318846db5166
children 90063f44c99a
line wrap: on
line diff
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -1709,46 +1709,20 @@ cstrchr(char_u *s, int c)
 //		      regsub stuff			      //
 ////////////////////////////////////////////////////////////////
 
-/*
- * We should define ftpr as a pointer to a function returning a pointer to
- * a function returning a pointer to a function ...
- * This is impossible, so we declare a pointer to a function returning a
- * void pointer. This should work for all compilers.
- */
-typedef void (*(*fptr_T)(int *, int));
+typedef void (*fptr_T)(int *, int);
 
 static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, int destlen, int flags);
 
-    static fptr_T
+    static void
 do_upper(int *d, int c)
 {
     *d = MB_TOUPPER(c);
-
-    return (fptr_T)NULL;
 }
 
-    static fptr_T
-do_Upper(int *d, int c)
-{
-    *d = MB_TOUPPER(c);
-
-    return (fptr_T)do_Upper;
-}
-
-    static fptr_T
+    static void
 do_lower(int *d, int c)
 {
     *d = MB_TOLOWER(c);
-
-    return (fptr_T)NULL;
-}
-
-    static fptr_T
-do_Lower(int *d, int c)
-{
-    *d = MB_TOLOWER(c);
-
-    return (fptr_T)do_Lower;
 }
 
 /*
@@ -2203,13 +2177,13 @@ vim_regsub_both(
 	    {
 		switch (*src++)
 		{
-		case 'u':   func_one = (fptr_T)do_upper;
+		case 'u':   func_one = do_upper;
 			    continue;
-		case 'U':   func_all = (fptr_T)do_Upper;
+		case 'U':   func_all = do_upper;
 			    continue;
-		case 'l':   func_one = (fptr_T)do_lower;
+		case 'l':   func_one = do_lower;
 			    continue;
-		case 'L':   func_all = (fptr_T)do_Lower;
+		case 'L':   func_all = do_lower;
 			    continue;
 		case 'e':
 		case 'E':   func_one = func_all = (fptr_T)NULL;
@@ -2276,11 +2250,12 @@ vim_regsub_both(
 
 	    // Write to buffer, if copy is set.
 	    if (func_one != (fptr_T)NULL)
-		// Turbo C complains without the typecast
-		func_one = (fptr_T)(func_one(&cc, c));
+	    {
+		func_one(&cc, c);
+		func_one = NULL;
+	    }
 	    else if (func_all != (fptr_T)NULL)
-		// Turbo C complains without the typecast
-		func_all = (fptr_T)(func_all(&cc, c));
+		func_all(&cc, c);
 	    else // just copy
 		cc = c;
 
@@ -2424,11 +2399,12 @@ vim_regsub_both(
 				c = *s;
 
 			    if (func_one != (fptr_T)NULL)
-				// Turbo C complains without the typecast
-				func_one = (fptr_T)(func_one(&cc, c));
+			    {
+				func_one(&cc, c);
+				func_one = NULL;
+			    }
 			    else if (func_all != (fptr_T)NULL)
-				// Turbo C complains without the typecast
-				func_all = (fptr_T)(func_all(&cc, c));
+				func_all(&cc, c);
 			    else // just copy
 				cc = c;