Mercurial > vim
diff src/strings.c @ 32775:2d5e8c46508b v9.0.1704
patch 9.0.1704: Cannot use positional arguments for printf()
Commit: https://github.com/vim/vim/commit/0c6181fec4c362eb9682d5af583341eb20cb1af5
Author: Christ van Willegen <cvwillegen@gmail.com>
Date: Sun Aug 13 18:03:14 2023 +0200
patch 9.0.1704: Cannot use positional arguments for printf()
Problem: Cannot use positional arguments for printf()
Solution: Support positional arguments in string formatting
closes: #12140
Signed-off-by: Christian Brabandt <cb@256bit.org>
Co-authored-by: Christ van Willegen <cvwillegen@gmail.com>
author | Christian Brabandt <cb@256bit.org> |
---|---|
date | Sun, 13 Aug 2023 18:15:04 +0200 |
parents | 695b50472e85 |
children | f48e27f20e6b |
line wrap: on
line diff
--- a/src/strings.c +++ b/src/strings.c @@ -2242,17 +2242,665 @@ vim_vsnprintf( return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL); } +enum +{ + TYPE_UNKNOWN = -1, + TYPE_INT, + TYPE_LONGINT, + TYPE_LONGLONGINT, + TYPE_UNSIGNEDINT, + TYPE_UNSIGNEDLONGINT, + TYPE_UNSIGNEDLONGLONGINT, + TYPE_POINTER, + TYPE_PERCENT, + TYPE_CHAR, + TYPE_STRING, + TYPE_FLOAT +}; + +/* Types that can be used in a format string + */ + int +format_typeof( + const char *type, + int usetvs UNUSED) +{ + // allowed values: \0, h, l, L + char length_modifier = '\0'; + + // current conversion specifier character + char fmt_spec = '\0'; + + // parse 'h', 'l' and 'll' length modifiers + if (*type == 'h' || *type == 'l') + { + length_modifier = *type; + type++; + if (length_modifier == 'l' && *type == 'l') + { + // double l = __int64 / varnumber_T + length_modifier = 'L'; + type++; + } + } + fmt_spec = *type; + + // common synonyms: + switch (fmt_spec) + { + case 'i': fmt_spec = 'd'; break; + case '*': fmt_spec = 'd'; length_modifier = 'h'; break; + case 'D': fmt_spec = 'd'; length_modifier = 'l'; break; + case 'U': fmt_spec = 'u'; length_modifier = 'l'; break; + case 'O': fmt_spec = 'o'; length_modifier = 'l'; break; + default: break; + } + +# if defined(FEAT_EVAL) + if (usetvs) + { + switch (fmt_spec) + { + case 'd': case 'u': case 'o': case 'x': case 'X': + if (length_modifier == '\0') + length_modifier = 'L'; + } + } +# endif + + // get parameter value, do initial processing + switch (fmt_spec) + { + // '%' and 'c' behave similar to 's' regarding flags and field + // widths + case '%': + return TYPE_PERCENT; + + case 'c': + return TYPE_CHAR; + + case 's': + case 'S': + return TYPE_STRING; + + case 'd': case 'u': + case 'b': case 'B': + case 'o': + case 'x': case 'X': + case 'p': + { + // NOTE: the u, b, o, x, X and p conversion specifiers + // imply the value is unsigned; d implies a signed + // value + + // 0 if numeric argument is zero (or if pointer is + // NULL for 'p'), +1 if greater than zero (or nonzero + // for unsigned arguments), -1 if negative (unsigned + // argument is never negative) + + if (fmt_spec == 'p') + return TYPE_POINTER; + else if (fmt_spec == 'b' || fmt_spec == 'B') + return TYPE_UNSIGNEDINT; + else if (fmt_spec == 'd') + { + // signed + switch (length_modifier) + { + case '\0': + case 'h': + // char and short arguments are passed as int. + return TYPE_INT; + case 'l': + return TYPE_LONGINT; + case 'L': + return TYPE_LONGLONGINT; + } + } + else + { + // unsigned + switch (length_modifier) + { + case '\0': + case 'h': + return TYPE_UNSIGNEDINT; + case 'l': + return TYPE_UNSIGNEDLONGINT; + case 'L': + return TYPE_UNSIGNEDLONGLONGINT; + } + } + } + break; + + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + return TYPE_FLOAT; + } + + return TYPE_UNKNOWN; +} + + char * +format_typename( + const char *type) +{ + switch (format_typeof(type, FALSE)) + { + case TYPE_INT: + return _(typename_int); + + case TYPE_LONGINT: + return _(typename_longint); + + case TYPE_LONGLONGINT: + return _(typename_longlongint); + + case TYPE_UNSIGNEDINT: + return _(typename_unsignedint); + + case TYPE_UNSIGNEDLONGINT: + return _(typename_unsignedlongint); + + case TYPE_UNSIGNEDLONGLONGINT: + return _(typename_unsignedlonglongint); + + case TYPE_POINTER: + return _(typename_pointer); + + case TYPE_PERCENT: + return _(typename_percent); + + case TYPE_CHAR: + return _(typename_char); + + case TYPE_STRING: + return _(typename_string); + + case TYPE_FLOAT: + return _(typename_float); + } + + return _(typename_unknown); +} + + int +adjust_types( + const char ***ap_types, + int arg, + int *num_posarg, + const char *type) +{ + if (*ap_types == NULL || *num_posarg < arg) + { + int idx; + const char **new_types; + + if (*ap_types == NULL) + new_types = ALLOC_CLEAR_MULT(const char *, arg); + else + new_types = vim_realloc(*ap_types, arg * sizeof(const char *)); + + if (new_types == NULL) + return FAIL; + + for (idx = *num_posarg; idx < arg; ++idx) + new_types[idx] = NULL; + + *ap_types = new_types; + *num_posarg = arg; + } + + if ((*ap_types)[arg - 1] != NULL) + { + if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*') + { + const char *pt = type; + if (pt[0] == '*') + pt = (*ap_types)[arg - 1]; + + if (pt[0] != '*') + { + switch (pt[0]) + { + case 'd': case 'i': break; + default: + semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type)); + return FAIL; + } + } + } + else + { + if (format_typeof(type, FALSE) != format_typeof((*ap_types)[arg - 1], FALSE)) + { + semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1])); + return FAIL; + } + } + } + + (*ap_types)[arg - 1] = type; + + return OK; +} + + int +parse_fmt_types( + const char ***ap_types, + int *num_posarg, + const char *fmt, + typval_T *tvs UNUSED + ) +{ + const char *p = fmt; + const char *arg = NULL; + + int any_pos = 0; + int any_arg = 0; + int arg_idx; + +#define CHECK_POS_ARG do { \ + if (any_pos && any_arg) \ + { \ + semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \ + goto error; \ + } \ +} while (0); + + if (p == NULL) + return OK; + + while (*p != NUL) + { + if (*p != '%') + { + char *q = strchr(p + 1, '%'); + size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p); + + p += n; + } + else + { + // allowed values: \0, h, l, L + char length_modifier = '\0'; + + // variable for positional arg + int pos_arg = -1; + const char *ptype = NULL; + + p++; // skip '%' + + // First check to see if we find a positional + // argument specifier + ptype = p; + + while (VIM_ISDIGIT(*ptype)) + ++ptype; + + if (*ptype == '$') + { + if (*p == '0') + { + // 0 flag at the wrong place + semsg(_( e_invalid_format_specifier_str), fmt); + goto error; + } + + // Positional argument + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + pos_arg = uj; + + any_pos = 1; + CHECK_POS_ARG; + + ++p; + } + + // parse flags + while (*p == '0' || *p == '-' || *p == '+' || *p == ' ' + || *p == '#' || *p == '\'') + { + switch (*p) + { + case '0': break; + case '-': break; + case '+': break; + case ' ': // If both the ' ' and '+' flags appear, the ' ' + // flag should be ignored + break; + case '#': break; + case '\'': break; + } + p++; + } + // If the '0' and '-' flags both appear, the '0' flag should be + // ignored. + + // parse field width + if (*(arg = p) == '*') + { + p++; + + if (VIM_ISDIGIT((int)(*p))) + { + // Positional argument field width + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + + if (*p != '$') + { + semsg(_( e_invalid_format_specifier_str), fmt); + goto error; + } + else + { + ++p; + any_pos = 1; + CHECK_POS_ARG; + + if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL) + goto error; + } + } + else + { + any_arg = 1; + CHECK_POS_ARG; + } + } + else if (VIM_ISDIGIT((int)(*(arg = p)))) + { + // size_t could be wider than unsigned int; make sure we treat + // argument like common implementations do + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + + if (*p == '$') + { + semsg(_( e_invalid_format_specifier_str), fmt); + goto error; + } + } + + // parse precision + if (*p == '.') + { + p++; + + if (*(arg = p) == '*') + { + p++; + + if (VIM_ISDIGIT((int)(*p))) + { + // Parse precision + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + + if (*p == '$') + { + any_pos = 1; + CHECK_POS_ARG; + + ++p; + + if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL) + goto error; + } + else + { + semsg(_( e_invalid_format_specifier_str), fmt); + goto error; + } + } + else + { + any_arg = 1; + CHECK_POS_ARG; + } + } + else if (VIM_ISDIGIT((int)(*(arg = p)))) + { + // size_t could be wider than unsigned int; make sure we + // treat argument like common implementations do + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + + if (*p == '$') + { + semsg(_( e_invalid_format_specifier_str), fmt); + goto error; + } + } + } + + if (pos_arg != -1) + { + any_pos = 1; + CHECK_POS_ARG; + + ptype = p; + } + + // parse 'h', 'l' and 'll' length modifiers + if (*p == 'h' || *p == 'l') + { + length_modifier = *p; + p++; + if (length_modifier == 'l' && *p == 'l') + { + // double l = __int64 / varnumber_T + length_modifier = 'L'; + p++; + } + } + + switch (*p) + { + // Check for known format specifiers. % is special! + case 'i': + case '*': + case 'd': + case 'u': + case 'o': + case 'D': + case 'U': + case 'O': + case 'x': + case 'X': + case 'b': + case 'B': + case 'c': + case 's': + case 'S': + case 'p': + case 'f': + case 'F': + case 'e': + case 'E': + case 'g': + case 'G': + if (pos_arg != -1) + { + if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL) + goto error; + } + else + { + any_arg = 1; + CHECK_POS_ARG; + } + break; + + default: + if (pos_arg != -1) + { + semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); + goto error; + } + } + + if (*p != NUL) + p++; // step over the just processed conversion specifier + } + } + + for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx) + { + if ((*ap_types)[arg_idx] == NULL) + { + semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt); + goto error; + } + +# if defined(FEAT_EVAL) + if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN) + { + semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt); + goto error; + } +# endif + } + + return OK; + +error: + vim_free(*ap_types); + *ap_types = NULL; + *num_posarg = 0; + return FAIL; +} + + void +skip_to_arg( + const char **ap_types, + va_list ap_start, + va_list *ap, + int *arg_idx, + int *arg_cur) +{ + int arg_min = 0; + + if (*arg_cur + 1 == *arg_idx) + { + ++*arg_cur; + ++*arg_idx; + return; + } + + if (*arg_cur >= *arg_idx) + { + // Reset ap to ap_start and skip arg_idx - 1 types + va_end(*ap); + va_copy(*ap, ap_start); + } + else + { + // Skip over any we should skip + arg_min = *arg_cur; + } + + for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur) + { + const char *p = ap_types[*arg_cur]; + + int fmt_type = format_typeof(p, TRUE); + + // get parameter value, do initial processing + switch (fmt_type) + { + case TYPE_PERCENT: + case TYPE_UNKNOWN: + break; + + case TYPE_CHAR: + va_arg(*ap, int); + break; + + case TYPE_STRING: + va_arg(*ap, char *); + break; + + case TYPE_POINTER: + va_arg(*ap, void *); + break; + + case TYPE_INT: + va_arg(*ap, int); + break; + + case TYPE_LONGINT: + va_arg(*ap, long int); + break; + + case TYPE_LONGLONGINT: + va_arg(*ap, varnumber_T); + break; + + case TYPE_UNSIGNEDINT: + va_arg(*ap, unsigned int); + break; + + case TYPE_UNSIGNEDLONGINT: + va_arg(*ap, unsigned long int); + break; + + case TYPE_UNSIGNEDLONGLONGINT: + va_arg(*ap, uvarnumber_T); + break; + + case TYPE_FLOAT: + va_arg(*ap, double); + break; + } + } + + // Because we know that after we return from this call, + // a va_arg() call is made, we can pre-emptively + // increment the current argument index. + ++*arg_cur; + ++*arg_idx; + + return; +} + int vim_vsnprintf_typval( char *str, size_t str_m, const char *fmt, - va_list ap, + va_list ap_start, typval_T *tvs) { size_t str_l = 0; const char *p = fmt; + int arg_cur = 0; + int num_posarg = 0; int arg_idx = 1; + va_list ap; + const char **ap_types = NULL; + + if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL) + return 0; + + va_copy(ap, ap_start); if (p == NULL) p = ""; @@ -2316,9 +2964,32 @@ vim_vsnprintf_typval( // buffer for 's' and 'S' specs char_u *tofree = NULL; + // variables for positional arg + int pos_arg = -1; + const char *ptype; + p++; // skip '%' + // First check to see if we find a positional + // argument specifier + ptype = p; + + while (VIM_ISDIGIT(*ptype)) + ++ptype; + + if (*ptype == '$') + { + // Positional argument + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + pos_arg = uj; + + ++p; + } + // parse flags while (*p == '0' || *p == '-' || *p == '+' || *p == ' ' || *p == '#' || *p == '\'') @@ -2346,11 +3017,26 @@ vim_vsnprintf_typval( int j; p++; + + if (VIM_ISDIGIT((int)(*p))) + { + // Positional argument field width + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + arg_idx = uj; + + ++p; + } + j = # if defined(FEAT_EVAL) tvs != NULL ? tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, int)); + if (j >= 0) min_field_width = j; else @@ -2375,25 +3061,8 @@ vim_vsnprintf_typval( { p++; precision_specified = 1; - if (*p == '*') - { - int j; - - j = -# if defined(FEAT_EVAL) - tvs != NULL ? tv_nr(tvs, &arg_idx) : -# endif - va_arg(ap, int); - p++; - if (j >= 0) - precision = j; - else - { - precision_specified = 0; - precision = 0; - } - } - else if (VIM_ISDIGIT((int)(*p))) + + if (VIM_ISDIGIT((int)(*p))) { // size_t could be wider than unsigned int; make sure we // treat argument like common implementations do @@ -2403,6 +3072,39 @@ vim_vsnprintf_typval( uj = 10 * uj + (unsigned int)(*p++ - '0'); precision = uj; } + else if (*p == '*') + { + int j; + + p++; + + if (VIM_ISDIGIT((int)(*p))) + { + // positional argument + unsigned int uj = *p++ - '0'; + + while (VIM_ISDIGIT((int)(*p))) + uj = 10 * uj + (unsigned int)(*p++ - '0'); + arg_idx = uj; + + ++p; + } + + j = +# if defined(FEAT_EVAL) + tvs != NULL ? tv_nr(tvs, &arg_idx) : +# endif + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, int)); + + if (j >= 0) + precision = j; + else + { + precision_specified = 0; + precision = 0; + } + } } // parse 'h', 'l' and 'll' length modifiers @@ -2438,6 +3140,9 @@ vim_vsnprintf_typval( } # endif + if (pos_arg != -1) + arg_idx = pos_arg; + // get parameter value, do initial processing switch (fmt_spec) { @@ -2462,7 +3167,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, int)); + // standard demands unsigned char uchar_arg = (unsigned char)j; str_arg = (char *)&uchar_arg; @@ -2475,7 +3182,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) : # endif - va_arg(ap, char *); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, char *)); + if (str_arg == NULL) { str_arg = "[NULL]"; @@ -2570,7 +3279,9 @@ vim_vsnprintf_typval( tvs != NULL ? (void *)tv_str(tvs, &arg_idx, NULL) : # endif - va_arg(ap, void *); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, void *)); + if (ptr_arg != NULL) arg_sign = 1; } @@ -2581,7 +3292,9 @@ vim_vsnprintf_typval( tvs != NULL ? (uvarnumber_T)tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, uvarnumber_T); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, uvarnumber_T)); + if (bin_arg != 0) arg_sign = 1; } @@ -2597,7 +3310,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, int)); + if (int_arg > 0) arg_sign = 1; else if (int_arg < 0) @@ -2608,7 +3323,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, long int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, long int)); + if (long_arg > 0) arg_sign = 1; else if (long_arg < 0) @@ -2619,7 +3336,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, varnumber_T); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, varnumber_T)); + if (llong_arg > 0) arg_sign = 1; else if (llong_arg < 0) @@ -2639,7 +3358,9 @@ vim_vsnprintf_typval( tvs != NULL ? (unsigned) tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, unsigned int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, unsigned int)); + if (uint_arg != 0) arg_sign = 1; break; @@ -2649,7 +3370,9 @@ vim_vsnprintf_typval( tvs != NULL ? (unsigned long) tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, unsigned long int); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, unsigned long int)); + if (ulong_arg != 0) arg_sign = 1; break; @@ -2659,7 +3382,9 @@ vim_vsnprintf_typval( tvs != NULL ? (uvarnumber_T) tv_nr(tvs, &arg_idx) : # endif - va_arg(ap, uvarnumber_T); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, uvarnumber_T)); + if (ullong_arg != 0) arg_sign = 1; break; @@ -2859,7 +3584,9 @@ vim_vsnprintf_typval( # if defined(FEAT_EVAL) tvs != NULL ? tv_float(tvs, &arg_idx) : # endif - va_arg(ap, double); + (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur), + va_arg(ap, double)); + abs_f = f < 0 ? -f : f; if (fmt_spec == 'g' || fmt_spec == 'G') @@ -3143,9 +3870,12 @@ vim_vsnprintf_typval( str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0'; } - if (tvs != NULL && tvs[arg_idx - 1].v_type != VAR_UNKNOWN) + if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN) emsg(_(e_too_many_arguments_to_printf)); + vim_free(ap_types); + va_end(ap); + // Return the number of characters formatted (excluding trailing nul // character), that is, the number of characters that would have been // written to the buffer if it were large enough.