changeset 35150:42f061099b39 v9.1.0404

patch 9.1.0404: [security] xxd: buffer-overflow with specific flags Commit: https://github.com/vim/vim/commit/67797191e039196128c69ba1538ccaf2a4711323 Author: Lennard Hofmann <lennard.hofmann@web.de> Date: Fri May 10 14:17:26 2024 +0200 patch 9.1.0404: [security] xxd: buffer-overflow with specific flags Problem: [security] xxd: buffer-overflow with specific flags Solution: Correctly calculate the required buffer space (Lennard Hofmann) xxd writes each output line into a global buffer before printing. The maximum size of that buffer was not calculated correctly. This command was crashing in AddressSanitizer: $ xxd -Ralways -g1 -c256 -d -o 9223372036854775808 /etc/passwd This prints a line of 6680 bytes but the buffer only had room for 6549 bytes. If the output from "-b" was colored, the line could be even longer. closes: #14738 Co-authored-by: K.Takata <kentkt@csc.jp> Signed-off-by: Lennard Hofmann <lennard.hofmann@web.de> Signed-off-by: Christian Brabandt <cb@256bit.org>
author Christian Brabandt <cb@256bit.org>
date Fri, 10 May 2024 14:45:03 +0200
parents 7720516ffcea
children ad347159a42c
files runtime/doc/xxd.1 runtime/doc/xxd.man src/testdir/test_xxd.vim src/version.c src/xxd/xxd.c
diffstat 5 files changed, 59 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/xxd.1
+++ b/runtime/doc/xxd.1
@@ -75,6 +75,9 @@ No maximum for \-ps. With \-ps, 0 result
 .IR \-C " | " \-capitalize
 Capitalize variable names in C include file style, when using \-i.
 .TP
+.I \-d
+show offset in decimal instead of hex.
+.TP
 .IR \-E " | " \-EBCDIC
 Change the character encoding in the righthand column from ASCII to EBCDIC.
 This does not change the hexadecimal representation. The option is
@@ -138,12 +141,12 @@ anywhere. Use the combination
 to read a bits dump instead of a hex dump.
 .TP
 .IR \-R " " when
-In output the hex-value and the value are both colored with the same color
+In the output the hex-value and the value are both colored with the same color
 depending on the hex-value. Mostly helping to differentiate printable and
 non-printable characters.
 .I \fIwhen\fP
 is
-.BR never ", " always ", or " auto .
+.BR never ", " always ", or " auto " (default: auto).
 When the 
 .BR $NO_COLOR
 environment variable is set, colorization will be disabled.
--- a/runtime/doc/xxd.man
+++ b/runtime/doc/xxd.man
@@ -49,6 +49,8 @@ OPTIONS
               Capitalize  variable  names  in C include file style, when using
               -i.
 
+       -d     show offset in decimal instead of hex.
+
        -E | -EBCDIC
               Change the character encoding in the righthand column from ASCII
               to EBCDIC.  This does not change the hexadecimal representation.
@@ -97,15 +99,15 @@ OPTIONS
               truncating it. Use the combination -r -p to read plain hexadeci‐
               mal dumps without line number information and without a particu‐
               lar column layout. Additional whitespace and line breaks are al‐
-              lowed anywhere.  Use the combination -r -b to read a  bits  dump
+              lowed  anywhere.  Use  the combination -r -b to read a bits dump
               instead of a hex dump.
 
        -R when
-              In  output the hex-value and the value are both colored with the
-              same color depending on the hex-value. Mostly helping to differ‐
-              entiate  printable and non-printable characters.  when is never,
-              always, or auto.  When the  $NO_COLOR  environment  variable  is
-              set, colorization will be disabled.
+              In the output the hex-value and the value are both colored  with
+              the  same  color  depending  on the hex-value. Mostly helping to
+              differentiate printable and non-printable characters.   when  is
+              never,  always, or auto (default: auto).  When the $NO_COLOR en‐
+              vironment variable is set, colorization will be disabled.
 
        -seek offset
               When used after -r: revert with <offset> added to file positions
@@ -113,9 +115,9 @@ OPTIONS
 
        -s [+][-]seek
               Start at <seek> bytes abs. (or rel.) infile offset.  + indicates
-              that  the  seek  is  relative to the current stdin file position
+              that the seek is relative to the  current  stdin  file  position
               (meaningless when not reading from stdin).  - indicates that the
-              seek  should  be  that many characters from the end of the input
+              seek should be that many characters from the end  of  the  input
               (or if combined with +: before the current stdin file position).
               Without -s option, xxd starts at the current file position.
 
@@ -125,20 +127,20 @@ OPTIONS
               Show version string.
 
 CAVEATS
-       xxd  -r  has  some built-in magic while evaluating line number informa‐
-       tion.  If the output file is seekable, then the  line  numbers  at  the
-       start  of each hex dump line may be out of order, lines may be missing,
-       or overlapping. In these cases xxd will lseek(2) to the next  position.
-       If  the  output file is not seekable, only gaps are allowed, which will
+       xxd -r has some built-in magic while evaluating  line  number  informa‐
+       tion.   If  the  output  file is seekable, then the line numbers at the
+       start of each hex dump line may be out of order, lines may be  missing,
+       or  overlapping. In these cases xxd will lseek(2) to the next position.
+       If the output file is not seekable, only gaps are allowed,  which  will
        be filled by null-bytes.
 
        xxd -r never generates parse errors. Garbage is silently skipped.
 
        When editing hex dumps, please note that xxd -r skips everything on the
        input line after reading enough columns of hexadecimal data (see option
-       -c). This also means that changes to the printable  ASCII  (or  EBCDIC)
+       -c).  This  also  means that changes to the printable ASCII (or EBCDIC)
        columns are always ignored. Reverting a plain (or PostScript) style hex
-       dump with xxd -r -p does not depend on the correct number  of  columns.
+       dump  with  xxd -r -p does not depend on the correct number of columns.
        Here, anything that looks like a pair of hex digits is interpreted.
 
        Note the difference between
@@ -146,28 +148,28 @@ CAVEATS
        and
        % xxd -i < file
 
-       xxd  -s +seek may be different from xxd -s seek, as lseek(2) is used to
+       xxd -s +seek may be different from xxd -s seek, as lseek(2) is used  to
        "rewind" input.  A '+' makes a difference if the input source is stdin,
-       and  if  stdin's  file  position is not at the start of the file by the
-       time xxd is started and given its input.  The  following  examples  may
+       and if stdin's file position is not at the start of  the  file  by  the
+       time  xxd  is  started and given its input.  The following examples may
        help to clarify (or further confuse!):
 
-       Rewind  stdin before reading; needed because the `cat' has already read
+       Rewind stdin before reading; needed because the `cat' has already  read
        to the end of stdin.
        % sh -c "cat > plain_copy; xxd -s 0 > hex_copy" < file
 
-       Hex dump from file position 0x480 (=1024+128) onwards.   The  `+'  sign
+       Hex  dump  from  file position 0x480 (=1024+128) onwards.  The `+' sign
        means "relative to the current position", thus the `128' adds to the 1k
        where dd left off.
-       % sh -c "dd of=plain_snippet bs=1k count=1; xxd -s +128 >  hex_snippet"
+       %  sh -c "dd of=plain_snippet bs=1k count=1; xxd -s +128 > hex_snippet"
        < file
 
        Hex dump from file position 0x100 (=1024-768) onwards.
        % sh -c "dd of=plain_snippet bs=1k count=1; xxd -s +-768 > hex_snippet"
        < file
 
-       However, this is a rare situation and the use of `+' is rarely  needed.
-       The  author  prefers  to  monitor  the  effect of xxd with strace(1) or
+       However,  this is a rare situation and the use of `+' is rarely needed.
+       The author prefers to monitor the  effect  of  xxd  with  strace(1)  or
        truss(1), whenever -s is used.
 
 EXAMPLES
@@ -211,7 +213,7 @@ EXAMPLES
        % xxd -s 0x36 -l 13 -c 13 xxd.1
        0000036: 3235 7468 204d 6179 2031 3939 36  25th May 1996
 
-       Create a 65537 byte file with all bytes 0x00, except for the  last  one
+       Create  a  65537 byte file with all bytes 0x00, except for the last one
        which is 'A' (hex 0x41).
        % echo "010000: 41" | xxd -r > file
 
@@ -222,11 +224,11 @@ EXAMPLES
        000fffc: 0000 0000 40                   ....A
 
        Create a 1 byte file containing a single 'A' character.  The number af‐
-       ter '-r -s' adds to the line numbers found in the file; in effect,  the
+       ter  '-r -s' adds to the line numbers found in the file; in effect, the
        leading bytes are suppressed.
        % echo "010000: 41" | xxd -r -s -0x10000 > file
 
-       Use  xxd  as a filter within an editor such as vim(1) to hex dump a re‐
+       Use xxd as a filter within an editor such as vim(1) to hex dump  a  re‐
        gion marked between `a' and `z'.
        :'a,'z!xxd
 
--- a/src/testdir/test_xxd.vim
+++ b/src/testdir/test_xxd.vim
@@ -411,6 +411,19 @@ func Test_xxd_max_cols()
   endfor
 endfunc
 
+
+" Try to trigger a buffer overflow (#14738)
+func Test_xxd_buffer_overflow()
+  CheckUnix
+  new
+  let input = repeat('A', 256)
+  call writefile(['-9223372036854775808: ' . repeat("\e[1;32m41\e[0m ", 256) . ' ' . repeat("\e[1;32mA\e[0m", 256)], 'Xxdexpected', 'D')
+  exe 'r! printf ' . input . '| ' . s:xxd_cmd . ' -Ralways -g1 -c256 -d -o 9223372036854775808 > Xxdout'
+  call assert_equalfile('Xxdexpected', 'Xxdout')
+  call delete('Xxdout')
+  bwipe!
+endfunc
+
 " -c0 selects the format specific default column value, as if no -c was given
 " except for -ps, where it disables extra newlines
 func Test_xxd_c0_is_def_cols()
--- a/src/version.c
+++ b/src/version.c
@@ -705,6 +705,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    404,
+/**/
     403,
 /**/
     402,
--- a/src/xxd/xxd.c
+++ b/src/xxd/xxd.c
@@ -62,6 +62,7 @@
  * 17.01.2024  use size_t instead of usigned int for code-generation (-i), #13876
  * 25.01.2024  revert the previous patch (size_t instead of unsigned int)
  * 10.02.2024  fix buffer-overflow when writing color output to buffer, #14003
+ * 10.05.2024  fix another buffer-overflow when writing colored output to buffer, #14738
  *
  * (c) 1990-1998 by Juergen Weigert (jnweiger@gmail.com)
  *
@@ -142,7 +143,7 @@ extern void perror __P((char *));
 # endif
 #endif
 
-char version[] = "xxd 2024-02-10 by Juergen Weigert et al.";
+char version[] = "xxd 2024-05-10 by Juergen Weigert et al.";
 #ifdef WIN32
 char osver[] = " (Win32)";
 #else
@@ -205,29 +206,16 @@ char osver[] = "";
 /*
  * LLEN is the maximum length of a line; other than the visible characters
  * we need to consider also the escape color sequence prologue/epilogue ,
- * (11 bytes for each character). The most larger format is the default one:
- * addr + 1 word for each col/2 + 1 char for each col
- *
- *   addr        1st group       2nd group
- * +-------+ +-----------------+ +------+
- * 01234567: 1234 5678 9abc def0 12345678
- *
- * - addr: typically 012345678:    -> from 10 up to 18 bytes (including trailing
- *                                    space)
- * - 1st group: 1234 5678 9abc ... -> each byte may be colored, so add 11
- *                                    for each byte
- * - space                         -> 1 byte
- * - 2nd group: 12345678           -> each char may be colore so add 11
- *                                    for each byte
- * - new line                      -> 1 byte
- * - zero (end line)               -> 1 byte
+ * (11 bytes for each character).
  */
-#define LLEN (2*(int)sizeof(unsigned long) + 2 +     /* addr + ": " */ \
-             (11 * 2 + 4 + 1) * (COLS / 2) +         /* 1st group */   \
-	     1 +                                     /* space */       \
-	     (1 + 11) * COLS +                       /* 2nd group */   \
-	     1 +                                     /* new line */    \
-	     1)                                      /* zero */
+#define LLEN \
+    (39            /* addr: ⌈log10(ULONG_MAX)⌉ if "-d" flag given. We assume ULONG_MAX = 2**128 */ \
+    + 2            /* ": " */ \
+    + 13 * COLS    /* hex dump with colors */ \
+    + (COLS - 1)   /* whitespace between groups if "-g1" option given and "-c" maxed out */ \
+    + 2            /* whitespace */ \
+    + 12 * COLS    /* ASCII dump with colors */ \
+    + 2)           /* "\n\0" */
 
 char hexxa[] = "0123456789abcdef0123456789ABCDEF", *hexx = hexxa;