# HG changeset patch # User Bram Moolenaar # Date 1625945404 -7200 # Node ID dc66d0284518dd5efea84ad65a695fd8a1f51a68 # Parent 362cabc968e832b31029250243e679c9e0546345 patch 8.2.3139: functions for string manipulation are spread out Commit: https://github.com/vim/vim/commit/a2438132a675be4dde3acbdf03ba1fdb2f09427c Author: Yegappan Lakshmanan Date: Sat Jul 10 21:29:18 2021 +0200 patch 8.2.3139: functions for string manipulation are spread out Problem: Functions for string manipulation are spread out. Solution: Move string related functions to a new source file. (Yegappan Lakshmanan, closes #8470) diff --git a/Filelist b/Filelist --- a/Filelist +++ b/Filelist @@ -134,6 +134,7 @@ SRC_ALL = \ src/spell.h \ src/spellfile.c \ src/spellsuggest.c \ + src/strings.c \ src/structs.h \ src/syntax.c \ src/tag.c \ @@ -296,6 +297,7 @@ SRC_ALL = \ src/proto/spell.pro \ src/proto/spellfile.pro \ src/proto/spellsuggest.pro \ + src/proto/strings.pro \ src/proto/syntax.pro \ src/proto/tag.pro \ src/proto/term.pro \ diff --git a/src/Make_ami.mak b/src/Make_ami.mak --- a/src/Make_ami.mak +++ b/src/Make_ami.mak @@ -162,6 +162,7 @@ SRC += \ spell.c \ spellfile.c \ spellsuggest.c \ + strings.c \ syntax.c \ tag.c \ term.c \ diff --git a/src/Make_cyg_ming.mak b/src/Make_cyg_ming.mak --- a/src/Make_cyg_ming.mak +++ b/src/Make_cyg_ming.mak @@ -809,6 +809,7 @@ OBJ = \ $(OUTDIR)/spell.o \ $(OUTDIR)/spellfile.o \ $(OUTDIR)/spellsuggest.o \ + $(OUTDIR)/strings.o \ $(OUTDIR)/syntax.o \ $(OUTDIR)/tag.o \ $(OUTDIR)/term.o \ diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak --- a/src/Make_mvc.mak +++ b/src/Make_mvc.mak @@ -819,6 +819,7 @@ OBJ = \ $(OUTDIR)\spell.obj \ $(OUTDIR)\spellfile.obj \ $(OUTDIR)\spellsuggest.obj \ + $(OUTDIR)\strings.obj \ $(OUTDIR)\syntax.obj \ $(OUTDIR)\tag.obj \ $(OUTDIR)\term.obj \ @@ -1792,6 +1793,8 @@ lib$(MZSCHEME_MAIN_LIB)$(MZSCHEME_VER).l $(OUTDIR)/spellsuggest.obj: $(OUTDIR) spellsuggest.c $(INCL) +$(OUTDIR)/strings.obj: $(OUTDIR) strings.c $(INCL) + $(OUTDIR)/syntax.obj: $(OUTDIR) syntax.c $(INCL) $(OUTDIR)/tag.obj: $(OUTDIR) tag.c $(INCL) @@ -2012,6 +2015,7 @@ proto.h: \ proto/spell.pro \ proto/spellfile.pro \ proto/spellsuggest.pro \ + proto/strings.pro \ proto/syntax.pro \ proto/tag.pro \ proto/term.pro \ diff --git a/src/Make_vms.mms b/src/Make_vms.mms --- a/src/Make_vms.mms +++ b/src/Make_vms.mms @@ -393,6 +393,7 @@ SRC = \ spell.c \ spellfile.c \ spellsuggest.c \ + strings.c \ syntax.c \ tag.c \ term.c \ @@ -512,6 +513,7 @@ OBJ = \ spell.obj \ spellfile.obj \ spellsuggest.obj \ + strings.obj \ syntax.obj \ tag.obj \ term.obj \ @@ -1048,6 +1050,10 @@ spellsuggest.obj : spellsuggest.c vim.h ascii.h keymap.h term.h macros.h option.h structs.h \ regexp.h gui.h beval.h [.proto]gui_beval.pro alloc.h ex_cmds.h spell.h \ proto.h errors.h globals.h +strings.obj : strings.c vim.h [.auto]config.h feature.h os_unix.h \ + ascii.h keymap.h term.h macros.h option.h structs.h \ + regexp.h gui.h beval.h [.proto]gui_beval.pro alloc.h ex_cmds.h spell.h \ + proto.h errors.h globals.h syntax.obj : syntax.c vim.h [.auto]config.h feature.h os_unix.h \ ascii.h keymap.h term.h macros.h structs.h regexp.h \ gui.h beval.h [.proto]gui_beval.pro option.h ex_cmds.h proto.h \ diff --git a/src/Makefile b/src/Makefile --- a/src/Makefile +++ b/src/Makefile @@ -1677,6 +1677,7 @@ BASIC_SRC = \ spell.c \ spellfile.c \ spellsuggest.c \ + strings.c \ syntax.c \ tag.c \ term.c \ @@ -1828,6 +1829,7 @@ OBJ_COMMON = \ objects/spell.o \ objects/spellfile.o \ objects/spellsuggest.o \ + objects/strings.o \ objects/syntax.o \ objects/tag.o \ objects/term.o \ @@ -2011,6 +2013,7 @@ PRO_AUTO = \ spell.pro \ spellfile.pro \ spellsuggest.pro \ + strings.pro \ syntax.pro \ tag.pro \ term.pro \ @@ -3516,6 +3519,9 @@ objects/spellfile.o: spellfile.c objects/spellsuggest.o: spellsuggest.c $(CCC) -o $@ spellsuggest.c +objects/strings.o: strings.c + $(CCC) -o $@ strings.c + objects/syntax.o: syntax.c $(CCC) -o $@ syntax.c @@ -4049,6 +4055,10 @@ objects/spellsuggest.o: spellsuggest.c v os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h beval.h \ proto/gui_beval.pro structs.h regexp.h gui.h alloc.h ex_cmds.h spell.h \ proto.h errors.h globals.h +objects/strings.o: strings.c vim.h protodef.h auto/config.h feature.h os_unix.h \ + auto/osdef.h ascii.h keymap.h term.h macros.h option.h beval.h \ + proto/gui_beval.pro structs.h regexp.h gui.h alloc.h ex_cmds.h spell.h \ + proto.h errors.h globals.h objects/syntax.o: syntax.c vim.h protodef.h auto/config.h feature.h os_unix.h \ auto/osdef.h ascii.h keymap.h term.h macros.h option.h beval.h \ proto/gui_beval.pro structs.h regexp.h gui.h alloc.h ex_cmds.h spell.h \ diff --git a/src/README.md b/src/README.md --- a/src/README.md +++ b/src/README.md @@ -80,6 +80,7 @@ sign.c | signs spell.c | spell checking core spellfile.c | spell file handling spellsuggest.c | spell correction suggestions +strings.c | string manipulation functions syntax.c | syntax and other highlighting tag.c | tags term.c | terminal handling, termcap codes diff --git a/src/eval.c b/src/eval.c --- a/src/eval.c +++ b/src/eval.c @@ -57,6 +57,7 @@ static int eval7_leader(typval_T *rettv, static int free_unref_items(int copyID); static char_u *make_expanded_name(char_u *in_start, char_u *expr_start, char_u *expr_end, char_u *in_end); +static char_u *eval_next_line(evalarg_T *evalarg); /* * Return "n1" divided by "n2", taking care of dividing by zero. @@ -2113,7 +2114,7 @@ getline_peek_skip_comments(evalarg_T *ev * FALSE. * "arg" must point somewhere inside a line, not at the start. */ - char_u * + static char_u * eval_next_non_blank(char_u *arg, evalarg_T *evalarg, int *getnext) { char_u *p = skipwhite(arg); @@ -2144,7 +2145,7 @@ eval_next_non_blank(char_u *arg, evalarg * To be called after eval_next_non_blank() sets "getnext" to TRUE. * Only called for Vim9 script. */ - char_u * + static char_u * eval_next_line(evalarg_T *evalarg) { garray_T *gap = &evalarg->eval_ga; @@ -5172,50 +5173,6 @@ echo_string( } /* - * Return string "str" in ' quotes, doubling ' characters. - * If "str" is NULL an empty string is assumed. - * If "function" is TRUE make it function('string'). - */ - char_u * -string_quote(char_u *str, int function) -{ - unsigned len; - char_u *p, *r, *s; - - len = (function ? 13 : 3); - if (str != NULL) - { - len += (unsigned)STRLEN(str); - for (p = str; *p != NUL; MB_PTR_ADV(p)) - if (*p == '\'') - ++len; - } - s = r = alloc(len); - if (r != NULL) - { - if (function) - { - STRCPY(r, "function('"); - r += 10; - } - else - *r++ = '\''; - if (str != NULL) - for (p = str; *p != NUL; ) - { - if (*p == '\'') - *r++ = '\''; - MB_COPY_CHAR(p, r); - } - *r++ = '\''; - if (function) - *r++ = ')'; - *r++ = NUL; - } - return s; -} - -/* * Convert the specified byte index of line 'lnum' in buffer 'buf' to a * character index. Works only for loaded buffers. Returns -1 on failure. * The index of the first byte and the first character is zero. diff --git a/src/evalfunc.c b/src/evalfunc.c --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -29,14 +29,10 @@ static void f_balloon_split(typval_T *ar # endif #endif static void f_byte2line(typval_T *argvars, typval_T *rettv); -static void byteidx(typval_T *argvars, typval_T *rettv, int comp); -static void f_byteidx(typval_T *argvars, typval_T *rettv); -static void f_byteidxcomp(typval_T *argvars, typval_T *rettv); static void f_call(typval_T *argvars, typval_T *rettv); static void f_changenr(typval_T *argvars, typval_T *rettv); static void f_char2nr(typval_T *argvars, typval_T *rettv); static void f_charcol(typval_T *argvars, typval_T *rettv); -static void f_charidx(typval_T *argvars, typval_T *rettv); static void f_col(typval_T *argvars, typval_T *rettv); static void f_confirm(typval_T *argvars, typval_T *rettv); static void f_copy(typval_T *argvars, typval_T *rettv); @@ -83,7 +79,6 @@ static void f_hasmapto(typval_T *argvars static void f_hlID(typval_T *argvars, typval_T *rettv); static void f_hlexists(typval_T *argvars, typval_T *rettv); static void f_hostname(typval_T *argvars, typval_T *rettv); -static void f_iconv(typval_T *argvars, typval_T *rettv); static void f_index(typval_T *argvars, typval_T *rettv); static void f_input(typval_T *argvars, typval_T *rettv); static void f_inputdialog(typval_T *argvars, typval_T *rettv); @@ -173,19 +168,6 @@ static void f_spellbadword(typval_T *arg static void f_spellsuggest(typval_T *argvars, typval_T *rettv); static void f_split(typval_T *argvars, typval_T *rettv); static void f_srand(typval_T *argvars, typval_T *rettv); -static void f_str2list(typval_T *argvars, typval_T *rettv); -static void f_str2nr(typval_T *argvars, typval_T *rettv); -static void f_strcharlen(typval_T *argvars, typval_T *rettv); -static void f_strchars(typval_T *argvars, typval_T *rettv); -static void f_strgetchar(typval_T *argvars, typval_T *rettv); -static void f_stridx(typval_T *argvars, typval_T *rettv); -static void f_strlen(typval_T *argvars, typval_T *rettv); -static void f_strcharpart(typval_T *argvars, typval_T *rettv); -static void f_strpart(typval_T *argvars, typval_T *rettv); -static void f_strridx(typval_T *argvars, typval_T *rettv); -static void f_strtrans(typval_T *argvars, typval_T *rettv); -static void f_strdisplaywidth(typval_T *argvars, typval_T *rettv); -static void f_strwidth(typval_T *argvars, typval_T *rettv); static void f_submatch(typval_T *argvars, typval_T *rettv); static void f_substitute(typval_T *argvars, typval_T *rettv); static void f_swapinfo(typval_T *argvars, typval_T *rettv); @@ -198,10 +180,6 @@ static void f_synconcealed(typval_T *arg static void f_tabpagebuflist(typval_T *argvars, typval_T *rettv); static void f_taglist(typval_T *argvars, typval_T *rettv); static void f_tagfiles(typval_T *argvars, typval_T *rettv); -static void f_tolower(typval_T *argvars, typval_T *rettv); -static void f_toupper(typval_T *argvars, typval_T *rettv); -static void f_tr(typval_T *argvars, typval_T *rettv); -static void f_trim(typval_T *argvars, typval_T *rettv); static void f_type(typval_T *argvars, typval_T *rettv); static void f_virtcol(typval_T *argvars, typval_T *rettv); static void f_visualmode(typval_T *argvars, typval_T *rettv); @@ -2377,50 +2355,6 @@ f_byte2line(typval_T *argvars UNUSED, ty #endif } - static void -byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) -{ - char_u *t; - char_u *str; - varnumber_T idx; - - str = tv_get_string_chk(&argvars[0]); - idx = tv_get_number_chk(&argvars[1], NULL); - rettv->vval.v_number = -1; - if (str == NULL || idx < 0) - return; - - t = str; - for ( ; idx > 0; idx--) - { - if (*t == NUL) // EOL reached - return; - if (enc_utf8 && comp) - t += utf_ptr2len(t); - else - t += (*mb_ptr2len)(t); - } - rettv->vval.v_number = (varnumber_T)(t - str); -} - -/* - * "byteidx()" function - */ - static void -f_byteidx(typval_T *argvars, typval_T *rettv) -{ - byteidx(argvars, rettv, FALSE); -} - -/* - * "byteidxcomp()" function - */ - static void -f_byteidxcomp(typval_T *argvars, typval_T *rettv) -{ - byteidx(argvars, rettv, TRUE); -} - /* * "call(func, arglist [, dict])" function */ @@ -2552,58 +2486,6 @@ f_charcol(typval_T *argvars, typval_T *r get_col(argvars, rettv, TRUE); } -/* - * "charidx()" function - */ - static void -f_charidx(typval_T *argvars, typval_T *rettv) -{ - char_u *str; - varnumber_T idx; - varnumber_T countcc = FALSE; - char_u *p; - int len; - int (*ptr2len)(char_u *); - - rettv->vval.v_number = -1; - - if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER - || (argvars[2].v_type != VAR_UNKNOWN - && argvars[2].v_type != VAR_NUMBER - && argvars[2].v_type != VAR_BOOL)) - { - emsg(_(e_invarg)); - return; - } - - str = tv_get_string_chk(&argvars[0]); - idx = tv_get_number_chk(&argvars[1], NULL); - if (str == NULL || idx < 0) - return; - - if (argvars[2].v_type != VAR_UNKNOWN) - countcc = tv_get_bool(&argvars[2]); - if (countcc < 0 || countcc > 1) - { - semsg(_(e_using_number_as_bool_nr), countcc); - return; - } - - if (enc_utf8 && countcc) - ptr2len = utf_ptr2len; - else - ptr2len = mb_ptr2len; - - for (p = str, len = 0; p <= str + idx; len++) - { - if (*p == NUL) - return; - p += ptr2len(p); - } - - rettv->vval.v_number = len > 0 ? len - 1 : 0; -} - win_T * get_optional_window(typval_T *argvars, int idx) { @@ -5781,37 +5663,6 @@ f_hostname(typval_T *argvars UNUSED, typ } /* - * iconv() function - */ - static void -f_iconv(typval_T *argvars UNUSED, typval_T *rettv) -{ - char_u buf1[NUMBUFLEN]; - char_u buf2[NUMBUFLEN]; - char_u *from, *to, *str; - vimconv_T vimconv; - - rettv->v_type = VAR_STRING; - rettv->vval.v_string = NULL; - - str = tv_get_string(&argvars[0]); - from = enc_canonize(enc_skip(tv_get_string_buf(&argvars[1], buf1))); - to = enc_canonize(enc_skip(tv_get_string_buf(&argvars[2], buf2))); - vimconv.vc_type = CONV_NONE; - convert_setup(&vimconv, from, to); - - // If the encodings are equal, no conversion needed. - if (vimconv.vc_type == CONV_NONE) - rettv->vval.v_string = vim_strsave(str); - else - rettv->vval.v_string = string_convert(&vimconv, str, NULL); - - convert_setup(&vimconv, NULL, NULL); - vim_free(from); - vim_free(to); -} - -/* * "index()" function */ static void @@ -8787,452 +8638,6 @@ theend: } /* - * "str2list()" function - */ - static void -f_str2list(typval_T *argvars, typval_T *rettv) -{ - char_u *p; - int utf8 = FALSE; - - if (rettv_list_alloc(rettv) == FAIL) - return; - - if (argvars[1].v_type != VAR_UNKNOWN) - utf8 = (int)tv_get_bool_chk(&argvars[1], NULL); - - p = tv_get_string(&argvars[0]); - - if (has_mbyte || utf8) - { - int (*ptr2len)(char_u *); - int (*ptr2char)(char_u *); - - if (utf8 || enc_utf8) - { - ptr2len = utf_ptr2len; - ptr2char = utf_ptr2char; - } - else - { - ptr2len = mb_ptr2len; - ptr2char = mb_ptr2char; - } - - for ( ; *p != NUL; p += (*ptr2len)(p)) - list_append_number(rettv->vval.v_list, (*ptr2char)(p)); - } - else - for ( ; *p != NUL; ++p) - list_append_number(rettv->vval.v_list, *p); -} - -/* - * "str2nr()" function - */ - static void -f_str2nr(typval_T *argvars, typval_T *rettv) -{ - int base = 10; - char_u *p; - varnumber_T n; - int what = 0; - int isneg; - - if (argvars[1].v_type != VAR_UNKNOWN) - { - base = (int)tv_get_number(&argvars[1]); - if (base != 2 && base != 8 && base != 10 && base != 16) - { - emsg(_(e_invarg)); - return; - } - if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) - what |= STR2NR_QUOTE; - } - - p = skipwhite(tv_get_string_strict(&argvars[0])); - isneg = (*p == '-'); - if (*p == '+' || *p == '-') - p = skipwhite(p + 1); - switch (base) - { - case 2: what |= STR2NR_BIN + STR2NR_FORCE; break; - case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break; - case 16: what |= STR2NR_HEX + STR2NR_FORCE; break; - } - vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE); - // Text after the number is silently ignored. - if (isneg) - rettv->vval.v_number = -n; - else - rettv->vval.v_number = n; - -} - -/* - * "strgetchar()" function - */ - static void -f_strgetchar(typval_T *argvars, typval_T *rettv) -{ - char_u *str; - int len; - int error = FALSE; - int charidx; - int byteidx = 0; - - rettv->vval.v_number = -1; - str = tv_get_string_chk(&argvars[0]); - if (str == NULL) - return; - len = (int)STRLEN(str); - charidx = (int)tv_get_number_chk(&argvars[1], &error); - if (error) - return; - - while (charidx >= 0 && byteidx < len) - { - if (charidx == 0) - { - rettv->vval.v_number = mb_ptr2char(str + byteidx); - break; - } - --charidx; - byteidx += MB_CPTR2LEN(str + byteidx); - } -} - -/* - * "stridx()" function - */ - static void -f_stridx(typval_T *argvars, typval_T *rettv) -{ - char_u buf[NUMBUFLEN]; - char_u *needle; - char_u *haystack; - char_u *save_haystack; - char_u *pos; - int start_idx; - - needle = tv_get_string_chk(&argvars[1]); - save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf); - rettv->vval.v_number = -1; - if (needle == NULL || haystack == NULL) - return; // type error; errmsg already given - - if (argvars[2].v_type != VAR_UNKNOWN) - { - int error = FALSE; - - start_idx = (int)tv_get_number_chk(&argvars[2], &error); - if (error || start_idx >= (int)STRLEN(haystack)) - return; - if (start_idx >= 0) - haystack += start_idx; - } - - pos = (char_u *)strstr((char *)haystack, (char *)needle); - if (pos != NULL) - rettv->vval.v_number = (varnumber_T)(pos - save_haystack); -} - -/* - * "string()" function - */ - void -f_string(typval_T *argvars, typval_T *rettv) -{ - char_u *tofree; - char_u numbuf[NUMBUFLEN]; - - rettv->v_type = VAR_STRING; - rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf, - get_copyID()); - // Make a copy if we have a value but it's not in allocated memory. - if (rettv->vval.v_string != NULL && tofree == NULL) - rettv->vval.v_string = vim_strsave(rettv->vval.v_string); -} - -/* - * "strlen()" function - */ - static void -f_strlen(typval_T *argvars, typval_T *rettv) -{ - rettv->vval.v_number = (varnumber_T)(STRLEN( - tv_get_string(&argvars[0]))); -} - - static void -strchar_common(typval_T *argvars, typval_T *rettv, int skipcc) -{ - char_u *s = tv_get_string(&argvars[0]); - varnumber_T len = 0; - int (*func_mb_ptr2char_adv)(char_u **pp); - - func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; - while (*s != NUL) - { - func_mb_ptr2char_adv(&s); - ++len; - } - rettv->vval.v_number = len; -} - -/* - * "strcharlen()" function - */ - static void -f_strcharlen(typval_T *argvars, typval_T *rettv) -{ - strchar_common(argvars, rettv, TRUE); -} - -/* - * "strchars()" function - */ - static void -f_strchars(typval_T *argvars, typval_T *rettv) -{ - varnumber_T skipcc = FALSE; - - if (argvars[1].v_type != VAR_UNKNOWN) - skipcc = tv_get_bool(&argvars[1]); - if (skipcc < 0 || skipcc > 1) - semsg(_(e_using_number_as_bool_nr), skipcc); - else - strchar_common(argvars, rettv, skipcc); -} - -/* - * "strdisplaywidth()" function - */ - static void -f_strdisplaywidth(typval_T *argvars, typval_T *rettv) -{ - char_u *s = tv_get_string(&argvars[0]); - int col = 0; - - if (argvars[1].v_type != VAR_UNKNOWN) - col = (int)tv_get_number(&argvars[1]); - - rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col); -} - -/* - * "strwidth()" function - */ - static void -f_strwidth(typval_T *argvars, typval_T *rettv) -{ - char_u *s = tv_get_string_strict(&argvars[0]); - - rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1)); -} - -/* - * "strcharpart()" function - */ - static void -f_strcharpart(typval_T *argvars, typval_T *rettv) -{ - char_u *p; - int nchar; - int nbyte = 0; - int charlen; - int skipcc = FALSE; - int len = 0; - int slen; - int error = FALSE; - - p = tv_get_string(&argvars[0]); - slen = (int)STRLEN(p); - - nchar = (int)tv_get_number_chk(&argvars[1], &error); - if (!error) - { - if (argvars[2].v_type != VAR_UNKNOWN - && argvars[3].v_type != VAR_UNKNOWN) - { - skipcc = tv_get_bool(&argvars[3]); - if (skipcc < 0 || skipcc > 1) - { - semsg(_(e_using_number_as_bool_nr), skipcc); - return; - } - } - - if (nchar > 0) - while (nchar > 0 && nbyte < slen) - { - if (skipcc) - nbyte += mb_ptr2len(p + nbyte); - else - nbyte += MB_CPTR2LEN(p + nbyte); - --nchar; - } - else - nbyte = nchar; - if (argvars[2].v_type != VAR_UNKNOWN) - { - charlen = (int)tv_get_number(&argvars[2]); - while (charlen > 0 && nbyte + len < slen) - { - int off = nbyte + len; - - if (off < 0) - len += 1; - else - { - if (skipcc) - len += mb_ptr2len(p + off); - else - len += MB_CPTR2LEN(p + off); - } - --charlen; - } - } - else - len = slen - nbyte; // default: all bytes that are available. - } - - /* - * Only return the overlap between the specified part and the actual - * string. - */ - if (nbyte < 0) - { - len += nbyte; - nbyte = 0; - } - else if (nbyte > slen) - nbyte = slen; - if (len < 0) - len = 0; - else if (nbyte + len > slen) - len = slen - nbyte; - - rettv->v_type = VAR_STRING; - rettv->vval.v_string = vim_strnsave(p + nbyte, len); -} - -/* - * "strpart()" function - */ - static void -f_strpart(typval_T *argvars, typval_T *rettv) -{ - char_u *p; - int n; - int len; - int slen; - int error = FALSE; - - p = tv_get_string(&argvars[0]); - slen = (int)STRLEN(p); - - n = (int)tv_get_number_chk(&argvars[1], &error); - if (error) - len = 0; - else if (argvars[2].v_type != VAR_UNKNOWN) - len = (int)tv_get_number(&argvars[2]); - else - len = slen - n; // default len: all bytes that are available. - - // Only return the overlap between the specified part and the actual - // string. - if (n < 0) - { - len += n; - n = 0; - } - else if (n > slen) - n = slen; - if (len < 0) - len = 0; - else if (n + len > slen) - len = slen - n; - - if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) - { - int off; - - // length in characters - for (off = n; off < slen && len > 0; --len) - off += mb_ptr2len(p + off); - len = off - n; - } - - rettv->v_type = VAR_STRING; - rettv->vval.v_string = vim_strnsave(p + n, len); -} - -/* - * "strridx()" function - */ - static void -f_strridx(typval_T *argvars, typval_T *rettv) -{ - char_u buf[NUMBUFLEN]; - char_u *needle; - char_u *haystack; - char_u *rest; - char_u *lastmatch = NULL; - int haystack_len, end_idx; - - needle = tv_get_string_chk(&argvars[1]); - haystack = tv_get_string_buf_chk(&argvars[0], buf); - - rettv->vval.v_number = -1; - if (needle == NULL || haystack == NULL) - return; // type error; errmsg already given - - haystack_len = (int)STRLEN(haystack); - if (argvars[2].v_type != VAR_UNKNOWN) - { - // Third argument: upper limit for index - end_idx = (int)tv_get_number_chk(&argvars[2], NULL); - if (end_idx < 0) - return; // can never find a match - } - else - end_idx = haystack_len; - - if (*needle == NUL) - { - // Empty string matches past the end. - lastmatch = haystack + end_idx; - } - else - { - for (rest = haystack; *rest != '\0'; ++rest) - { - rest = (char_u *)strstr((char *)rest, (char *)needle); - if (rest == NULL || rest > haystack + end_idx) - break; - lastmatch = rest; - } - } - - if (lastmatch == NULL) - rettv->vval.v_number = -1; - else - rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); -} - -/* - * "strtrans()" function - */ - static void -f_strtrans(typval_T *argvars, typval_T *rettv) -{ - rettv->v_type = VAR_STRING; - rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); -} - -/* * "submatch()" function */ static void @@ -9628,236 +9033,6 @@ f_taglist(typval_T *argvars, typval_T *r } /* - * "tolower(string)" function - */ - static void -f_tolower(typval_T *argvars, typval_T *rettv) -{ - rettv->v_type = VAR_STRING; - rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0])); -} - -/* - * "toupper(string)" function - */ - static void -f_toupper(typval_T *argvars, typval_T *rettv) -{ - rettv->v_type = VAR_STRING; - rettv->vval.v_string = strup_save(tv_get_string(&argvars[0])); -} - -/* - * "tr(string, fromstr, tostr)" function - */ - static void -f_tr(typval_T *argvars, typval_T *rettv) -{ - char_u *in_str; - char_u *fromstr; - char_u *tostr; - char_u *p; - int inlen; - int fromlen; - int tolen; - int idx; - char_u *cpstr; - int cplen; - int first = TRUE; - char_u buf[NUMBUFLEN]; - char_u buf2[NUMBUFLEN]; - garray_T ga; - - in_str = tv_get_string(&argvars[0]); - fromstr = tv_get_string_buf_chk(&argvars[1], buf); - tostr = tv_get_string_buf_chk(&argvars[2], buf2); - - // Default return value: empty string. - rettv->v_type = VAR_STRING; - rettv->vval.v_string = NULL; - if (fromstr == NULL || tostr == NULL) - return; // type error; errmsg already given - ga_init2(&ga, (int)sizeof(char), 80); - - if (!has_mbyte) - // not multi-byte: fromstr and tostr must be the same length - if (STRLEN(fromstr) != STRLEN(tostr)) - { -error: - semsg(_(e_invarg2), fromstr); - ga_clear(&ga); - return; - } - - // fromstr and tostr have to contain the same number of chars - while (*in_str != NUL) - { - if (has_mbyte) - { - inlen = (*mb_ptr2len)(in_str); - cpstr = in_str; - cplen = inlen; - idx = 0; - for (p = fromstr; *p != NUL; p += fromlen) - { - fromlen = (*mb_ptr2len)(p); - if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) - { - for (p = tostr; *p != NUL; p += tolen) - { - tolen = (*mb_ptr2len)(p); - if (idx-- == 0) - { - cplen = tolen; - cpstr = p; - break; - } - } - if (*p == NUL) // tostr is shorter than fromstr - goto error; - break; - } - ++idx; - } - - if (first && cpstr == in_str) - { - // Check that fromstr and tostr have the same number of - // (multi-byte) characters. Done only once when a character - // of in_str doesn't appear in fromstr. - first = FALSE; - for (p = tostr; *p != NUL; p += tolen) - { - tolen = (*mb_ptr2len)(p); - --idx; - } - if (idx != 0) - goto error; - } - - (void)ga_grow(&ga, cplen); - mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); - ga.ga_len += cplen; - - in_str += inlen; - } - else - { - // When not using multi-byte chars we can do it faster. - p = vim_strchr(fromstr, *in_str); - if (p != NULL) - ga_append(&ga, tostr[p - fromstr]); - else - ga_append(&ga, *in_str); - ++in_str; - } - } - - // add a terminating NUL - (void)ga_grow(&ga, 1); - ga_append(&ga, NUL); - - rettv->vval.v_string = ga.ga_data; -} - -/* - * "trim({expr})" function - */ - static void -f_trim(typval_T *argvars, typval_T *rettv) -{ - char_u buf1[NUMBUFLEN]; - char_u buf2[NUMBUFLEN]; - char_u *head = tv_get_string_buf_chk(&argvars[0], buf1); - char_u *mask = NULL; - char_u *tail; - char_u *prev; - char_u *p; - int c1; - int dir = 0; - - rettv->v_type = VAR_STRING; - rettv->vval.v_string = NULL; - if (head == NULL) - return; - - if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING) - { - semsg(_(e_invarg2), tv_get_string(&argvars[1])); - return; - } - - if (argvars[1].v_type == VAR_STRING) - { - mask = tv_get_string_buf_chk(&argvars[1], buf2); - - if (argvars[2].v_type != VAR_UNKNOWN) - { - int error = 0; - - // leading or trailing characters to trim - dir = (int)tv_get_number_chk(&argvars[2], &error); - if (error) - return; - if (dir < 0 || dir > 2) - { - semsg(_(e_invarg2), tv_get_string(&argvars[2])); - return; - } - } - } - - if (dir == 0 || dir == 1) - { - // Trim leading characters - while (*head != NUL) - { - c1 = PTR2CHAR(head); - if (mask == NULL) - { - if (c1 > ' ' && c1 != 0xa0) - break; - } - else - { - for (p = mask; *p != NUL; MB_PTR_ADV(p)) - if (c1 == PTR2CHAR(p)) - break; - if (*p == NUL) - break; - } - MB_PTR_ADV(head); - } - } - - tail = head + STRLEN(head); - if (dir == 0 || dir == 2) - { - // Trim trailing characters - for (; tail > head; tail = prev) - { - prev = tail; - MB_PTR_BACK(head, prev); - c1 = PTR2CHAR(prev); - if (mask == NULL) - { - if (c1 > ' ' && c1 != 0xa0) - break; - } - else - { - for (p = mask; *p != NUL; MB_PTR_ADV(p)) - if (c1 == PTR2CHAR(p)) - break; - if (*p == NUL) - break; - } - } - } - rettv->vval.v_string = vim_strnsave(head, tail - head); -} - -/* * "type(expr)" function */ static void diff --git a/src/mbyte.c b/src/mbyte.c --- a/src/mbyte.c +++ b/src/mbyte.c @@ -4961,6 +4961,37 @@ f_getimstatus(typval_T *argvars UNUSED, rettv->vval.v_number = im_get_status(); # endif } + +/* + * iconv() function + */ + void +f_iconv(typval_T *argvars UNUSED, typval_T *rettv) +{ + char_u buf1[NUMBUFLEN]; + char_u buf2[NUMBUFLEN]; + char_u *from, *to, *str; + vimconv_T vimconv; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + + str = tv_get_string(&argvars[0]); + from = enc_canonize(enc_skip(tv_get_string_buf(&argvars[1], buf1))); + to = enc_canonize(enc_skip(tv_get_string_buf(&argvars[2], buf2))); + vimconv.vc_type = CONV_NONE; + convert_setup(&vimconv, from, to); + + // If the encodings are equal, no conversion needed. + if (vimconv.vc_type == CONV_NONE) + rettv->vval.v_string = vim_strsave(str); + else + rettv->vval.v_string = string_convert(&vimconv, str, NULL); + + convert_setup(&vimconv, NULL, NULL); + vim_free(from); + vim_free(to); +} #endif /* diff --git a/src/misc1.c b/src/misc1.c --- a/src/misc1.c +++ b/src/misc1.c @@ -695,7 +695,8 @@ f_mode(typval_T *argvars, typval_T *rett if (finish_op) { buf[1] = 'o'; - // to be able to detect force-linewise/blockwise/characterwise operations + // to be able to detect force-linewise/blockwise/characterwise + // operations buf[2] = motion_force; } else if (restart_edit == 'I' || restart_edit == 'R' @@ -2099,29 +2100,6 @@ match_user(char_u *name) return result; } -/* - * Concatenate two strings and return the result in allocated memory. - * Returns NULL when out of memory. - */ - char_u * -concat_str(char_u *str1, char_u *str2) -{ - char_u *dest; - size_t l = str1 == NULL ? 0 : STRLEN(str1); - - dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L); - if (dest != NULL) - { - if (str1 == NULL) - *dest = NUL; - else - STRCPY(dest, str1); - if (str2 != NULL) - STRCPY(dest + l, str2); - } - return dest; -} - static void prepare_to_exit(void) { diff --git a/src/misc2.c b/src/misc2.c --- a/src/misc2.c +++ b/src/misc2.c @@ -1268,42 +1268,6 @@ free_all_mem(void) #endif /* - * Copy "string" into newly allocated memory. - */ - char_u * -vim_strsave(char_u *string) -{ - char_u *p; - size_t len; - - len = STRLEN(string) + 1; - p = alloc(len); - if (p != NULL) - mch_memmove(p, string, len); - return p; -} - -/* - * Copy up to "len" bytes of "string" into newly allocated memory and - * terminate with a NUL. - * The allocated memory always has size "len + 1", also when "string" is - * shorter. - */ - char_u * -vim_strnsave(char_u *string, size_t len) -{ - char_u *p; - - p = alloc(len + 1); - if (p != NULL) - { - STRNCPY(p, string, len); - p[len] = NUL; - } - return p; -} - -/* * Copy "p[len]" into allocated memory, ignoring NUL characters. * Returns NULL when out of memory. */ @@ -1318,465 +1282,6 @@ vim_memsave(char_u *p, size_t len) } /* - * Same as vim_strsave(), but any characters found in esc_chars are preceded - * by a backslash. - */ - char_u * -vim_strsave_escaped(char_u *string, char_u *esc_chars) -{ - return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE); -} - -/* - * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape - * characters where rem_backslash() would remove the backslash. - * Escape the characters with "cc". - */ - char_u * -vim_strsave_escaped_ext( - char_u *string, - char_u *esc_chars, - int cc, - int bsl) -{ - char_u *p; - char_u *p2; - char_u *escaped_string; - unsigned length; - int l; - - /* - * First count the number of backslashes required. - * Then allocate the memory and insert them. - */ - length = 1; // count the trailing NUL - for (p = string; *p; p++) - { - if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - { - length += l; // count a multibyte char - p += l - 1; - continue; - } - if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) - ++length; // count a backslash - ++length; // count an ordinary char - } - escaped_string = alloc(length); - if (escaped_string != NULL) - { - p2 = escaped_string; - for (p = string; *p; p++) - { - if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - { - mch_memmove(p2, p, (size_t)l); - p2 += l; - p += l - 1; // skip multibyte char - continue; - } - if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) - *p2++ = cc; - *p2++ = *p; - } - *p2 = NUL; - } - return escaped_string; -} - -/* - * Return TRUE when 'shell' has "csh" in the tail. - */ - int -csh_like_shell(void) -{ - return (strstr((char *)gettail(p_sh), "csh") != NULL); -} - -/* - * Escape "string" for use as a shell argument with system(). - * This uses single quotes, except when we know we need to use double quotes - * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set). - * PowerShell also uses a novel escaping for enclosed single quotes - double - * them up. - * Escape a newline, depending on the 'shell' option. - * When "do_special" is TRUE also replace "!", "%", "#" and things starting - * with "<" like "". - * When "do_newline" is FALSE do not escape newline unless it is csh shell. - * Returns the result in allocated memory, NULL if we have run out. - */ - char_u * -vim_strsave_shellescape(char_u *string, int do_special, int do_newline) -{ - unsigned length; - char_u *p; - char_u *d; - char_u *escaped_string; - int l; - int csh_like; - char_u *shname; - int powershell; -# ifdef MSWIN - int double_quotes; -# endif - - // Only csh and similar shells expand '!' within single quotes. For sh and - // the like we must not put a backslash before it, it will be taken - // literally. If do_special is set the '!' will be escaped twice. - // Csh also needs to have "\n" escaped twice when do_special is set. - csh_like = csh_like_shell(); - - // PowerShell uses it's own version for quoting single quotes - shname = gettail(p_sh); - powershell = strstr((char *)shname, "pwsh") != NULL; -# ifdef MSWIN - powershell = powershell || strstr((char *)shname, "powershell") != NULL; - // PowerShell only accepts single quotes so override shellslash. - double_quotes = !powershell && !p_ssl; -# endif - - // First count the number of extra bytes required. - length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL - for (p = string; *p != NUL; MB_PTR_ADV(p)) - { -# ifdef MSWIN - if (double_quotes) - { - if (*p == '"') - ++length; // " -> "" - } - else -# endif - if (*p == '\'') - { - if (powershell) - length +=2; // ' => '' - else - length += 3; // ' => '\'' - } - if ((*p == '\n' && (csh_like || do_newline)) - || (*p == '!' && (csh_like || do_special))) - { - ++length; // insert backslash - if (csh_like && do_special) - ++length; // insert backslash - } - if (do_special && find_cmdline_var(p, &l) >= 0) - { - ++length; // insert backslash - p += l - 1; - } - } - - // Allocate memory for the result and fill it. - escaped_string = alloc(length); - if (escaped_string != NULL) - { - d = escaped_string; - - // add opening quote -# ifdef MSWIN - if (double_quotes) - *d++ = '"'; - else -# endif - *d++ = '\''; - - for (p = string; *p != NUL; ) - { -# ifdef MSWIN - if (double_quotes) - { - if (*p == '"') - { - *d++ = '"'; - *d++ = '"'; - ++p; - continue; - } - } - else -# endif - if (*p == '\'') - { - if (powershell) - { - *d++ = '\''; - *d++ = '\''; - } - else - { - *d++ = '\''; - *d++ = '\\'; - *d++ = '\''; - *d++ = '\''; - } - ++p; - continue; - } - if ((*p == '\n' && (csh_like || do_newline)) - || (*p == '!' && (csh_like || do_special))) - { - *d++ = '\\'; - if (csh_like && do_special) - *d++ = '\\'; - *d++ = *p++; - continue; - } - if (do_special && find_cmdline_var(p, &l) >= 0) - { - *d++ = '\\'; // insert backslash - while (--l >= 0) // copy the var - *d++ = *p++; - continue; - } - - MB_COPY_CHAR(p, d); - } - - // add terminating quote and finish with a NUL -# ifdef MSWIN - if (double_quotes) - *d++ = '"'; - else -# endif - *d++ = '\''; - *d = NUL; - } - - return escaped_string; -} - -/* - * Like vim_strsave(), but make all characters uppercase. - * This uses ASCII lower-to-upper case translation, language independent. - */ - char_u * -vim_strsave_up(char_u *string) -{ - char_u *p1; - - p1 = vim_strsave(string); - vim_strup(p1); - return p1; -} - -/* - * Like vim_strnsave(), but make all characters uppercase. - * This uses ASCII lower-to-upper case translation, language independent. - */ - char_u * -vim_strnsave_up(char_u *string, size_t len) -{ - char_u *p1; - - p1 = vim_strnsave(string, len); - vim_strup(p1); - return p1; -} - -/* - * ASCII lower-to-upper case translation, language independent. - */ - void -vim_strup( - char_u *p) -{ - char_u *p2; - int c; - - if (p != NULL) - { - p2 = p; - while ((c = *p2) != NUL) -#ifdef EBCDIC - *p2++ = isalpha(c) ? toupper(c) : c; -#else - *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20); -#endif - } -} - -#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO) -/* - * Make string "s" all upper-case and return it in allocated memory. - * Handles multi-byte characters as well as possible. - * Returns NULL when out of memory. - */ - char_u * -strup_save(char_u *orig) -{ - char_u *p; - char_u *res; - - res = p = vim_strsave(orig); - - if (res != NULL) - while (*p != NUL) - { - int l; - - if (enc_utf8) - { - int c, uc; - int newl; - char_u *s; - - c = utf_ptr2char(p); - l = utf_ptr2len(p); - if (c == 0) - { - // overlong sequence, use only the first byte - c = *p; - l = 1; - } - uc = utf_toupper(c); - - // Reallocate string when byte count changes. This is rare, - // thus it's OK to do another malloc()/free(). - newl = utf_char2len(uc); - if (newl != l) - { - s = alloc(STRLEN(res) + 1 + newl - l); - if (s == NULL) - { - vim_free(res); - return NULL; - } - mch_memmove(s, res, p - res); - STRCPY(s + (p - res) + newl, p + l); - p = s + (p - res); - vim_free(res); - res = s; - } - - utf_char2bytes(uc, p); - p += newl; - } - else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; // skip multi-byte character - else - { - *p = TOUPPER_LOC(*p); // note that toupper() can be a macro - p++; - } - } - - return res; -} - -/* - * Make string "s" all lower-case and return it in allocated memory. - * Handles multi-byte characters as well as possible. - * Returns NULL when out of memory. - */ - char_u * -strlow_save(char_u *orig) -{ - char_u *p; - char_u *res; - - res = p = vim_strsave(orig); - - if (res != NULL) - while (*p != NUL) - { - int l; - - if (enc_utf8) - { - int c, lc; - int newl; - char_u *s; - - c = utf_ptr2char(p); - l = utf_ptr2len(p); - if (c == 0) - { - // overlong sequence, use only the first byte - c = *p; - l = 1; - } - lc = utf_tolower(c); - - // Reallocate string when byte count changes. This is rare, - // thus it's OK to do another malloc()/free(). - newl = utf_char2len(lc); - if (newl != l) - { - s = alloc(STRLEN(res) + 1 + newl - l); - if (s == NULL) - { - vim_free(res); - return NULL; - } - mch_memmove(s, res, p - res); - STRCPY(s + (p - res) + newl, p + l); - p = s + (p - res); - vim_free(res); - res = s; - } - - utf_char2bytes(lc, p); - p += newl; - } - else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; // skip multi-byte character - else - { - *p = TOLOWER_LOC(*p); // note that tolower() can be a macro - p++; - } - } - - return res; -} -#endif - -/* - * delete spaces at the end of a string - */ - void -del_trailing_spaces(char_u *ptr) -{ - char_u *q; - - q = ptr + STRLEN(ptr); - while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) - *q = NUL; -} - -/* - * Like strncpy(), but always terminate the result with one NUL. - * "to" must be "len + 1" long! - */ - void -vim_strncpy(char_u *to, char_u *from, size_t len) -{ - STRNCPY(to, from, len); - to[len] = NUL; -} - -/* - * Like strcat(), but make sure the result fits in "tosize" bytes and is - * always NUL terminated. "from" and "to" may overlap. - */ - void -vim_strcat(char_u *to, char_u *from, size_t tosize) -{ - size_t tolen = STRLEN(to); - size_t fromlen = STRLEN(from); - - if (tolen + fromlen + 1 > tosize) - { - mch_memmove(to + tolen, from, tosize - tolen - 1); - to[tosize - 1] = NUL; - } - else - mch_memmove(to + tolen, from, fromlen + 1); -} - -/* * Isolate one part of a string option where parts are separated with * "sep_chars". * The part is copied into "buf[maxlen]". @@ -1848,180 +1353,6 @@ vim_memset(void *ptr, int c, size_t size } #endif -#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO) -/* - * Compare two strings, ignoring case, using current locale. - * Doesn't work for multi-byte characters. - * return 0 for match, < 0 for smaller, > 0 for bigger - */ - int -vim_stricmp(char *s1, char *s2) -{ - int i; - - for (;;) - { - i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); - if (i != 0) - return i; // this character different - if (*s1 == NUL) - break; // strings match until NUL - ++s1; - ++s2; - } - return 0; // strings match -} -#endif - -#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO) -/* - * Compare two strings, for length "len", ignoring case, using current locale. - * Doesn't work for multi-byte characters. - * return 0 for match, < 0 for smaller, > 0 for bigger - */ - int -vim_strnicmp(char *s1, char *s2, size_t len) -{ - int i; - - while (len > 0) - { - i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); - if (i != 0) - return i; // this character different - if (*s1 == NUL) - break; // strings match until NUL - ++s1; - ++s2; - --len; - } - return 0; // strings match -} -#endif - -/* - * Search for first occurrence of "c" in "string". - * Version of strchr() that handles unsigned char strings with characters from - * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the - * end of the string. - */ - char_u * -vim_strchr(char_u *string, int c) -{ - char_u *p; - int b; - - p = string; - if (enc_utf8 && c >= 0x80) - { - while (*p != NUL) - { - int l = utfc_ptr2len(p); - - // Avoid matching an illegal byte here. - if (utf_ptr2char(p) == c && l > 1) - return p; - p += l; - } - return NULL; - } - if (enc_dbcs != 0 && c > 255) - { - int n2 = c & 0xff; - - c = ((unsigned)c >> 8) & 0xff; - while ((b = *p) != NUL) - { - if (b == c && p[1] == n2) - return p; - p += (*mb_ptr2len)(p); - } - return NULL; - } - if (has_mbyte) - { - while ((b = *p) != NUL) - { - if (b == c) - return p; - p += (*mb_ptr2len)(p); - } - return NULL; - } - while ((b = *p) != NUL) - { - if (b == c) - return p; - ++p; - } - return NULL; -} - -/* - * Version of strchr() that only works for bytes and handles unsigned char - * strings with characters above 128 correctly. It also doesn't return a - * pointer to the NUL at the end of the string. - */ - char_u * -vim_strbyte(char_u *string, int c) -{ - char_u *p = string; - - while (*p != NUL) - { - if (*p == c) - return p; - ++p; - } - return NULL; -} - -/* - * Search for last occurrence of "c" in "string". - * Version of strrchr() that handles unsigned char strings with characters from - * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the - * end of the string. - * Return NULL if not found. - * Does not handle multi-byte char for "c"! - */ - char_u * -vim_strrchr(char_u *string, int c) -{ - char_u *retval = NULL; - char_u *p = string; - - while (*p) - { - if (*p == c) - retval = p; - MB_PTR_ADV(p); - } - return retval; -} - -/* - * Vim's version of strpbrk(), in case it's missing. - * Don't generate a prototype for this, causes problems when it's not used. - */ -#ifndef PROTO -# ifndef HAVE_STRPBRK -# ifdef vim_strpbrk -# undef vim_strpbrk -# endif - char_u * -vim_strpbrk(char_u *s, char_u *charset) -{ - while (*s) - { - if (vim_strchr(charset, *s) != NULL) - return s; - MB_PTR_ADV(s); - } - return NULL; -} -# endif -#endif - /* * Vim has its own isspace() function, because on some machines isspace() * can't handle characters above 128. @@ -3975,25 +3306,6 @@ qsort( #endif /* - * Sort an array of strings. - */ -static int sort_compare(const void *s1, const void *s2); - - static int -sort_compare(const void *s1, const void *s2) -{ - return STRCMP(*(char **)s1, *(char **)s2); -} - - void -sort_strings( - char_u **files, - int count) -{ - qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare); -} - -/* * The putenv() implementation below comes from the "screen" program. * Included with permission from Juergen Weigert. * See pty.c for the copyright notice. @@ -4304,24 +3616,6 @@ put_bytes(FILE *fd, long_u nr, int len) #endif -#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO) -/* - * Return TRUE if string "s" contains a non-ASCII character (128 or higher). - * When "s" is NULL FALSE is returned. - */ - int -has_non_ascii(char_u *s) -{ - char_u *p; - - if (s != NULL) - for (p = s; *p != NUL; ++p) - if (*p >= 128) - return TRUE; - return FALSE; -} -#endif - #ifndef PROTO // proto is defined in vim.h # ifdef ELAPSED_TIMEVAL /* diff --git a/src/proto.h b/src/proto.h --- a/src/proto.h +++ b/src/proto.h @@ -211,6 +211,7 @@ void mbyte_im_set_active(int active_arg) # include "spell.pro" # include "spellfile.pro" # include "spellsuggest.pro" +# include "strings.pro" # include "syntax.pro" # include "tag.pro" # include "term.pro" diff --git a/src/proto/eval.pro b/src/proto/eval.pro --- a/src/proto/eval.pro +++ b/src/proto/eval.pro @@ -32,8 +32,6 @@ int next_for_item(void *fi_void, char_u void free_for_info(void *fi_void); void set_context_for_expression(expand_T *xp, char_u *arg, cmdidx_T cmdidx); int pattern_match(char_u *pat, char_u *text, int ic); -char_u *eval_next_non_blank(char_u *arg, evalarg_T *evalarg, int *getnext); -char_u *eval_next_line(evalarg_T *evalarg); char_u *skipwhite_and_linebreak(char_u *arg, evalarg_T *evalarg); void clear_evalarg(evalarg_T *evalarg, exarg_T *eap); int eval0(char_u *arg, typval_T *rettv, exarg_T *eap, evalarg_T *evalarg); @@ -55,7 +53,6 @@ int set_ref_in_list_items(list_T *l, int int set_ref_in_item(typval_T *tv, int copyID, ht_stack_T **ht_stack, list_stack_T **list_stack); char_u *echo_string_core(typval_T *tv, char_u **tofree, char_u *numbuf, int copyID, int echo_style, int restore_copyID, int composite_val); char_u *echo_string(typval_T *tv, char_u **tofree, char_u *numbuf, int copyID); -char_u *string_quote(char_u *str, int function); int buf_byteidx_to_charidx(buf_T *buf, int lnum, int byteidx); int buf_charidx_to_byteidx(buf_T *buf, int lnum, int charidx); pos_T *var2fpos(typval_T *varp, int dollar_lnum, int *fnum, int charcol); diff --git a/src/proto/evalfunc.pro b/src/proto/evalfunc.pro --- a/src/proto/evalfunc.pro +++ b/src/proto/evalfunc.pro @@ -23,5 +23,4 @@ int dynamic_feature(char_u *feature); void mzscheme_call_vim(char_u *name, typval_T *args, typval_T *rettv); void range_list_materialize(list_T *list); long do_searchpair(char_u *spat, char_u *mpat, char_u *epat, int dir, typval_T *skip, int flags, pos_T *match_pos, linenr_T lnum_stop, long time_limit); -void f_string(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */ diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro --- a/src/proto/mbyte.pro +++ b/src/proto/mbyte.pro @@ -86,4 +86,5 @@ char_u *string_convert(vimconv_T *vcp, c char_u *string_convert_ext(vimconv_T *vcp, char_u *ptr, int *lenp, int *unconvlenp); void f_setcellwidths(typval_T *argvars, typval_T *rettv); void f_charclass(typval_T *argvars, typval_T *rettv); +void f_iconv(typval_T *argvars UNUSED, typval_T *rettv); /* vim: set ft=c : */ diff --git a/src/proto/misc1.pro b/src/proto/misc1.pro --- a/src/proto/misc1.pro +++ b/src/proto/misc1.pro @@ -36,7 +36,6 @@ void vim_setenv(char_u *name, char_u *va char_u *get_env_name(expand_T *xp, int idx); char_u *get_users(expand_T *xp, int idx); int match_user(char_u *name); -char_u *concat_str(char_u *str1, char_u *str2); void preserve_exit(void); void line_breakcheck(void); void fast_breakcheck(void); diff --git a/src/proto/misc2.pro b/src/proto/misc2.pro --- a/src/proto/misc2.pro +++ b/src/proto/misc2.pro @@ -31,28 +31,9 @@ void *lalloc_id(size_t size, int message void *mem_realloc(void *ptr, size_t size); void do_outofmem_msg(size_t size); void free_all_mem(void); -char_u *vim_strsave(char_u *string); -char_u *vim_strnsave(char_u *string, size_t len); char_u *vim_memsave(char_u *p, size_t len); -char_u *vim_strsave_escaped(char_u *string, char_u *esc_chars); -char_u *vim_strsave_escaped_ext(char_u *string, char_u *esc_chars, int cc, int bsl); -int csh_like_shell(void); -char_u *vim_strsave_shellescape(char_u *string, int do_special, int do_newline); -char_u *vim_strsave_up(char_u *string); -char_u *vim_strnsave_up(char_u *string, size_t len); -void vim_strup(char_u *p); -char_u *strup_save(char_u *orig); -char_u *strlow_save(char_u *orig); -void del_trailing_spaces(char_u *ptr); -void vim_strncpy(char_u *to, char_u *from, size_t len); -void vim_strcat(char_u *to, char_u *from, size_t tosize); int copy_option_part(char_u **option, char_u *buf, int maxlen, char *sep_chars); void vim_free(void *x); -int vim_stricmp(char *s1, char *s2); -int vim_strnicmp(char *s1, char *s2, size_t len); -char_u *vim_strchr(char_u *string, int c); -char_u *vim_strbyte(char_u *string, int c); -char_u *vim_strrchr(char_u *string, int c); int vim_isspace(int x); void ga_clear(garray_T *gap); void ga_clear_strings(garray_T *gap); @@ -93,14 +74,12 @@ int get_shape_idx(int mouse); void update_mouseshape(int shape_idx); int vim_chdir(char_u *new_dir); int get_user_name(char_u *buf, int len); -void sort_strings(char_u **files, int count); int filewritable(char_u *fname); int get2c(FILE *fd); int get3c(FILE *fd); int get4c(FILE *fd); char_u *read_string(FILE *fd, int cnt); int put_bytes(FILE *fd, long_u nr, int len); -int has_non_ascii(char_u *s); int mch_parse_cmd(char_u *cmd, int use_shcf, char ***argv, int *argc); int build_argv_from_string(char_u *cmd, char ***argv, int *argc); int build_argv_from_list(list_T *l, char ***argv, int *argc); diff --git a/src/proto/strings.pro b/src/proto/strings.pro new file mode 100644 --- /dev/null +++ b/src/proto/strings.pro @@ -0,0 +1,45 @@ +/* strings.c */ +char_u *vim_strsave(char_u *string); +char_u *vim_strnsave(char_u *string, size_t len); +char_u *vim_strsave_escaped(char_u *string, char_u *esc_chars); +char_u *vim_strsave_escaped_ext(char_u *string, char_u *esc_chars, int cc, int bsl); +int csh_like_shell(void); +char_u *vim_strsave_shellescape(char_u *string, int do_special, int do_newline); +char_u *vim_strsave_up(char_u *string); +char_u *vim_strnsave_up(char_u *string, size_t len); +void vim_strup(char_u *p); +char_u *strlow_save(char_u *orig); +void del_trailing_spaces(char_u *ptr); +void vim_strncpy(char_u *to, char_u *from, size_t len); +void vim_strcat(char_u *to, char_u *from, size_t tosize); +int vim_stricmp(char *s1, char *s2); +int vim_strnicmp(char *s1, char *s2, size_t len); +char_u *vim_strchr(char_u *string, int c); +char_u *vim_strbyte(char_u *string, int c); +char_u *vim_strrchr(char_u *string, int c); +void sort_strings(char_u **files, int count); +int has_non_ascii(char_u *s); +char_u *concat_str(char_u *str1, char_u *str2); +char_u *string_quote(char_u *str, int function); +void f_byteidx(typval_T *argvars, typval_T *rettv); +void f_byteidxcomp(typval_T *argvars, typval_T *rettv); +void f_charidx(typval_T *argvars, typval_T *rettv); +void f_str2list(typval_T *argvars, typval_T *rettv); +void f_str2nr(typval_T *argvars, typval_T *rettv); +void f_strgetchar(typval_T *argvars, typval_T *rettv); +void f_stridx(typval_T *argvars, typval_T *rettv); +void f_string(typval_T *argvars, typval_T *rettv); +void f_strlen(typval_T *argvars, typval_T *rettv); +void f_strcharlen(typval_T *argvars, typval_T *rettv); +void f_strchars(typval_T *argvars, typval_T *rettv); +void f_strdisplaywidth(typval_T *argvars, typval_T *rettv); +void f_strwidth(typval_T *argvars, typval_T *rettv); +void f_strcharpart(typval_T *argvars, typval_T *rettv); +void f_strpart(typval_T *argvars, typval_T *rettv); +void f_strridx(typval_T *argvars, typval_T *rettv); +void f_strtrans(typval_T *argvars, typval_T *rettv); +void f_tolower(typval_T *argvars, typval_T *rettv); +void f_toupper(typval_T *argvars, typval_T *rettv); +void f_tr(typval_T *argvars, typval_T *rettv); +void f_trim(typval_T *argvars, typval_T *rettv); +/* vim: set ft=c : */ diff --git a/src/strings.c b/src/strings.c new file mode 100644 --- /dev/null +++ b/src/strings.c @@ -0,0 +1,1563 @@ +/* vi:set ts=8 sts=4 sw=4 noet: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + */ + +/* + * strings.c: string manipulation functions + */ + +#include "vim.h" + +/* + * Copy "string" into newly allocated memory. + */ + char_u * +vim_strsave(char_u *string) +{ + char_u *p; + size_t len; + + len = STRLEN(string) + 1; + p = alloc(len); + if (p != NULL) + mch_memmove(p, string, len); + return p; +} + +/* + * Copy up to "len" bytes of "string" into newly allocated memory and + * terminate with a NUL. + * The allocated memory always has size "len + 1", also when "string" is + * shorter. + */ + char_u * +vim_strnsave(char_u *string, size_t len) +{ + char_u *p; + + p = alloc(len + 1); + if (p != NULL) + { + STRNCPY(p, string, len); + p[len] = NUL; + } + return p; +} + +/* + * Same as vim_strsave(), but any characters found in esc_chars are preceded + * by a backslash. + */ + char_u * +vim_strsave_escaped(char_u *string, char_u *esc_chars) +{ + return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE); +} + +/* + * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape + * characters where rem_backslash() would remove the backslash. + * Escape the characters with "cc". + */ + char_u * +vim_strsave_escaped_ext( + char_u *string, + char_u *esc_chars, + int cc, + int bsl) +{ + char_u *p; + char_u *p2; + char_u *escaped_string; + unsigned length; + int l; + + /* + * First count the number of backslashes required. + * Then allocate the memory and insert them. + */ + length = 1; // count the trailing NUL + for (p = string; *p; p++) + { + if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) + { + length += l; // count a multibyte char + p += l - 1; + continue; + } + if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) + ++length; // count a backslash + ++length; // count an ordinary char + } + escaped_string = alloc(length); + if (escaped_string != NULL) + { + p2 = escaped_string; + for (p = string; *p; p++) + { + if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) + { + mch_memmove(p2, p, (size_t)l); + p2 += l; + p += l - 1; // skip multibyte char + continue; + } + if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p))) + *p2++ = cc; + *p2++ = *p; + } + *p2 = NUL; + } + return escaped_string; +} + +/* + * Return TRUE when 'shell' has "csh" in the tail. + */ + int +csh_like_shell(void) +{ + return (strstr((char *)gettail(p_sh), "csh") != NULL); +} + +/* + * Escape "string" for use as a shell argument with system(). + * This uses single quotes, except when we know we need to use double quotes + * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set). + * PowerShell also uses a novel escaping for enclosed single quotes - double + * them up. + * Escape a newline, depending on the 'shell' option. + * When "do_special" is TRUE also replace "!", "%", "#" and things starting + * with "<" like "". + * When "do_newline" is FALSE do not escape newline unless it is csh shell. + * Returns the result in allocated memory, NULL if we have run out. + */ + char_u * +vim_strsave_shellescape(char_u *string, int do_special, int do_newline) +{ + unsigned length; + char_u *p; + char_u *d; + char_u *escaped_string; + int l; + int csh_like; + char_u *shname; + int powershell; +# ifdef MSWIN + int double_quotes; +# endif + + // Only csh and similar shells expand '!' within single quotes. For sh and + // the like we must not put a backslash before it, it will be taken + // literally. If do_special is set the '!' will be escaped twice. + // Csh also needs to have "\n" escaped twice when do_special is set. + csh_like = csh_like_shell(); + + // PowerShell uses it's own version for quoting single quotes + shname = gettail(p_sh); + powershell = strstr((char *)shname, "pwsh") != NULL; +# ifdef MSWIN + powershell = powershell || strstr((char *)shname, "powershell") != NULL; + // PowerShell only accepts single quotes so override shellslash. + double_quotes = !powershell && !p_ssl; +# endif + + // First count the number of extra bytes required. + length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL + for (p = string; *p != NUL; MB_PTR_ADV(p)) + { +# ifdef MSWIN + if (double_quotes) + { + if (*p == '"') + ++length; // " -> "" + } + else +# endif + if (*p == '\'') + { + if (powershell) + length +=2; // ' => '' + else + length += 3; // ' => '\'' + } + if ((*p == '\n' && (csh_like || do_newline)) + || (*p == '!' && (csh_like || do_special))) + { + ++length; // insert backslash + if (csh_like && do_special) + ++length; // insert backslash + } + if (do_special && find_cmdline_var(p, &l) >= 0) + { + ++length; // insert backslash + p += l - 1; + } + } + + // Allocate memory for the result and fill it. + escaped_string = alloc(length); + if (escaped_string != NULL) + { + d = escaped_string; + + // add opening quote +# ifdef MSWIN + if (double_quotes) + *d++ = '"'; + else +# endif + *d++ = '\''; + + for (p = string; *p != NUL; ) + { +# ifdef MSWIN + if (double_quotes) + { + if (*p == '"') + { + *d++ = '"'; + *d++ = '"'; + ++p; + continue; + } + } + else +# endif + if (*p == '\'') + { + if (powershell) + { + *d++ = '\''; + *d++ = '\''; + } + else + { + *d++ = '\''; + *d++ = '\\'; + *d++ = '\''; + *d++ = '\''; + } + ++p; + continue; + } + if ((*p == '\n' && (csh_like || do_newline)) + || (*p == '!' && (csh_like || do_special))) + { + *d++ = '\\'; + if (csh_like && do_special) + *d++ = '\\'; + *d++ = *p++; + continue; + } + if (do_special && find_cmdline_var(p, &l) >= 0) + { + *d++ = '\\'; // insert backslash + while (--l >= 0) // copy the var + *d++ = *p++; + continue; + } + + MB_COPY_CHAR(p, d); + } + + // add terminating quote and finish with a NUL +# ifdef MSWIN + if (double_quotes) + *d++ = '"'; + else +# endif + *d++ = '\''; + *d = NUL; + } + + return escaped_string; +} + +/* + * Like vim_strsave(), but make all characters uppercase. + * This uses ASCII lower-to-upper case translation, language independent. + */ + char_u * +vim_strsave_up(char_u *string) +{ + char_u *p1; + + p1 = vim_strsave(string); + vim_strup(p1); + return p1; +} + +/* + * Like vim_strnsave(), but make all characters uppercase. + * This uses ASCII lower-to-upper case translation, language independent. + */ + char_u * +vim_strnsave_up(char_u *string, size_t len) +{ + char_u *p1; + + p1 = vim_strnsave(string, len); + vim_strup(p1); + return p1; +} + +/* + * ASCII lower-to-upper case translation, language independent. + */ + void +vim_strup( + char_u *p) +{ + char_u *p2; + int c; + + if (p != NULL) + { + p2 = p; + while ((c = *p2) != NUL) +#ifdef EBCDIC + *p2++ = isalpha(c) ? toupper(c) : c; +#else + *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20); +#endif + } +} + +#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO) +/* + * Make string "s" all upper-case and return it in allocated memory. + * Handles multi-byte characters as well as possible. + * Returns NULL when out of memory. + */ + static char_u * +strup_save(char_u *orig) +{ + char_u *p; + char_u *res; + + res = p = vim_strsave(orig); + + if (res != NULL) + while (*p != NUL) + { + int l; + + if (enc_utf8) + { + int c, uc; + int newl; + char_u *s; + + c = utf_ptr2char(p); + l = utf_ptr2len(p); + if (c == 0) + { + // overlong sequence, use only the first byte + c = *p; + l = 1; + } + uc = utf_toupper(c); + + // Reallocate string when byte count changes. This is rare, + // thus it's OK to do another malloc()/free(). + newl = utf_char2len(uc); + if (newl != l) + { + s = alloc(STRLEN(res) + 1 + newl - l); + if (s == NULL) + { + vim_free(res); + return NULL; + } + mch_memmove(s, res, p - res); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + vim_free(res); + res = s; + } + + utf_char2bytes(uc, p); + p += newl; + } + else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) + p += l; // skip multi-byte character + else + { + *p = TOUPPER_LOC(*p); // note that toupper() can be a macro + p++; + } + } + + return res; +} + +/* + * Make string "s" all lower-case and return it in allocated memory. + * Handles multi-byte characters as well as possible. + * Returns NULL when out of memory. + */ + char_u * +strlow_save(char_u *orig) +{ + char_u *p; + char_u *res; + + res = p = vim_strsave(orig); + + if (res != NULL) + while (*p != NUL) + { + int l; + + if (enc_utf8) + { + int c, lc; + int newl; + char_u *s; + + c = utf_ptr2char(p); + l = utf_ptr2len(p); + if (c == 0) + { + // overlong sequence, use only the first byte + c = *p; + l = 1; + } + lc = utf_tolower(c); + + // Reallocate string when byte count changes. This is rare, + // thus it's OK to do another malloc()/free(). + newl = utf_char2len(lc); + if (newl != l) + { + s = alloc(STRLEN(res) + 1 + newl - l); + if (s == NULL) + { + vim_free(res); + return NULL; + } + mch_memmove(s, res, p - res); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + vim_free(res); + res = s; + } + + utf_char2bytes(lc, p); + p += newl; + } + else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) + p += l; // skip multi-byte character + else + { + *p = TOLOWER_LOC(*p); // note that tolower() can be a macro + p++; + } + } + + return res; +} +#endif + +/* + * delete spaces at the end of a string + */ + void +del_trailing_spaces(char_u *ptr) +{ + char_u *q; + + q = ptr + STRLEN(ptr); + while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) + *q = NUL; +} + +/* + * Like strncpy(), but always terminate the result with one NUL. + * "to" must be "len + 1" long! + */ + void +vim_strncpy(char_u *to, char_u *from, size_t len) +{ + STRNCPY(to, from, len); + to[len] = NUL; +} + +/* + * Like strcat(), but make sure the result fits in "tosize" bytes and is + * always NUL terminated. "from" and "to" may overlap. + */ + void +vim_strcat(char_u *to, char_u *from, size_t tosize) +{ + size_t tolen = STRLEN(to); + size_t fromlen = STRLEN(from); + + if (tolen + fromlen + 1 > tosize) + { + mch_memmove(to + tolen, from, tosize - tolen - 1); + to[tosize - 1] = NUL; + } + else + mch_memmove(to + tolen, from, fromlen + 1); +} + +#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO) +/* + * Compare two strings, ignoring case, using current locale. + * Doesn't work for multi-byte characters. + * return 0 for match, < 0 for smaller, > 0 for bigger + */ + int +vim_stricmp(char *s1, char *s2) +{ + int i; + + for (;;) + { + i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); + if (i != 0) + return i; // this character different + if (*s1 == NUL) + break; // strings match until NUL + ++s1; + ++s2; + } + return 0; // strings match +} +#endif + +#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO) +/* + * Compare two strings, for length "len", ignoring case, using current locale. + * Doesn't work for multi-byte characters. + * return 0 for match, < 0 for smaller, > 0 for bigger + */ + int +vim_strnicmp(char *s1, char *s2, size_t len) +{ + int i; + + while (len > 0) + { + i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2); + if (i != 0) + return i; // this character different + if (*s1 == NUL) + break; // strings match until NUL + ++s1; + ++s2; + --len; + } + return 0; // strings match +} +#endif + +/* + * Search for first occurrence of "c" in "string". + * Version of strchr() that handles unsigned char strings with characters from + * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the + * end of the string. + */ + char_u * +vim_strchr(char_u *string, int c) +{ + char_u *p; + int b; + + p = string; + if (enc_utf8 && c >= 0x80) + { + while (*p != NUL) + { + int l = utfc_ptr2len(p); + + // Avoid matching an illegal byte here. + if (utf_ptr2char(p) == c && l > 1) + return p; + p += l; + } + return NULL; + } + if (enc_dbcs != 0 && c > 255) + { + int n2 = c & 0xff; + + c = ((unsigned)c >> 8) & 0xff; + while ((b = *p) != NUL) + { + if (b == c && p[1] == n2) + return p; + p += (*mb_ptr2len)(p); + } + return NULL; + } + if (has_mbyte) + { + while ((b = *p) != NUL) + { + if (b == c) + return p; + p += (*mb_ptr2len)(p); + } + return NULL; + } + while ((b = *p) != NUL) + { + if (b == c) + return p; + ++p; + } + return NULL; +} + +/* + * Version of strchr() that only works for bytes and handles unsigned char + * strings with characters above 128 correctly. It also doesn't return a + * pointer to the NUL at the end of the string. + */ + char_u * +vim_strbyte(char_u *string, int c) +{ + char_u *p = string; + + while (*p != NUL) + { + if (*p == c) + return p; + ++p; + } + return NULL; +} + +/* + * Search for last occurrence of "c" in "string". + * Version of strrchr() that handles unsigned char strings with characters from + * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the + * end of the string. + * Return NULL if not found. + * Does not handle multi-byte char for "c"! + */ + char_u * +vim_strrchr(char_u *string, int c) +{ + char_u *retval = NULL; + char_u *p = string; + + while (*p) + { + if (*p == c) + retval = p; + MB_PTR_ADV(p); + } + return retval; +} + +/* + * Vim's version of strpbrk(), in case it's missing. + * Don't generate a prototype for this, causes problems when it's not used. + */ +#ifndef PROTO +# ifndef HAVE_STRPBRK +# ifdef vim_strpbrk +# undef vim_strpbrk +# endif + char_u * +vim_strpbrk(char_u *s, char_u *charset) +{ + while (*s) + { + if (vim_strchr(charset, *s) != NULL) + return s; + MB_PTR_ADV(s); + } + return NULL; +} +# endif +#endif + +/* + * Sort an array of strings. + */ +static int sort_compare(const void *s1, const void *s2); + + static int +sort_compare(const void *s1, const void *s2) +{ + return STRCMP(*(char **)s1, *(char **)s2); +} + + void +sort_strings( + char_u **files, + int count) +{ + qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare); +} + +#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO) +/* + * Return TRUE if string "s" contains a non-ASCII character (128 or higher). + * When "s" is NULL FALSE is returned. + */ + int +has_non_ascii(char_u *s) +{ + char_u *p; + + if (s != NULL) + for (p = s; *p != NUL; ++p) + if (*p >= 128) + return TRUE; + return FALSE; +} +#endif + +/* + * Concatenate two strings and return the result in allocated memory. + * Returns NULL when out of memory. + */ + char_u * +concat_str(char_u *str1, char_u *str2) +{ + char_u *dest; + size_t l = str1 == NULL ? 0 : STRLEN(str1); + + dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L); + if (dest != NULL) + { + if (str1 == NULL) + *dest = NUL; + else + STRCPY(dest, str1); + if (str2 != NULL) + STRCPY(dest + l, str2); + } + return dest; +} + +#if defined(FEAT_EVAL) || defined(PROTO) + +/* + * Return string "str" in ' quotes, doubling ' characters. + * If "str" is NULL an empty string is assumed. + * If "function" is TRUE make it function('string'). + */ + char_u * +string_quote(char_u *str, int function) +{ + unsigned len; + char_u *p, *r, *s; + + len = (function ? 13 : 3); + if (str != NULL) + { + len += (unsigned)STRLEN(str); + for (p = str; *p != NUL; MB_PTR_ADV(p)) + if (*p == '\'') + ++len; + } + s = r = alloc(len); + if (r != NULL) + { + if (function) + { + STRCPY(r, "function('"); + r += 10; + } + else + *r++ = '\''; + if (str != NULL) + for (p = str; *p != NUL; ) + { + if (*p == '\'') + *r++ = '\''; + MB_COPY_CHAR(p, r); + } + *r++ = '\''; + if (function) + *r++ = ')'; + *r++ = NUL; + } + return s; +} + + static void +byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) +{ + char_u *t; + char_u *str; + varnumber_T idx; + + str = tv_get_string_chk(&argvars[0]); + idx = tv_get_number_chk(&argvars[1], NULL); + rettv->vval.v_number = -1; + if (str == NULL || idx < 0) + return; + + t = str; + for ( ; idx > 0; idx--) + { + if (*t == NUL) // EOL reached + return; + if (enc_utf8 && comp) + t += utf_ptr2len(t); + else + t += (*mb_ptr2len)(t); + } + rettv->vval.v_number = (varnumber_T)(t - str); +} + +/* + * "byteidx()" function + */ + void +f_byteidx(typval_T *argvars, typval_T *rettv) +{ + byteidx(argvars, rettv, FALSE); +} + +/* + * "byteidxcomp()" function + */ + void +f_byteidxcomp(typval_T *argvars, typval_T *rettv) +{ + byteidx(argvars, rettv, TRUE); +} + +/* + * "charidx()" function + */ + void +f_charidx(typval_T *argvars, typval_T *rettv) +{ + char_u *str; + varnumber_T idx; + varnumber_T countcc = FALSE; + char_u *p; + int len; + int (*ptr2len)(char_u *); + + rettv->vval.v_number = -1; + + if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER + || (argvars[2].v_type != VAR_UNKNOWN + && argvars[2].v_type != VAR_NUMBER + && argvars[2].v_type != VAR_BOOL)) + { + emsg(_(e_invarg)); + return; + } + + str = tv_get_string_chk(&argvars[0]); + idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) + return; + + if (argvars[2].v_type != VAR_UNKNOWN) + countcc = tv_get_bool(&argvars[2]); + if (countcc < 0 || countcc > 1) + { + semsg(_(e_using_number_as_bool_nr), countcc); + return; + } + + if (enc_utf8 && countcc) + ptr2len = utf_ptr2len; + else + ptr2len = mb_ptr2len; + + for (p = str, len = 0; p <= str + idx; len++) + { + if (*p == NUL) + return; + p += ptr2len(p); + } + + rettv->vval.v_number = len > 0 ? len - 1 : 0; +} + +/* + * "str2list()" function + */ + void +f_str2list(typval_T *argvars, typval_T *rettv) +{ + char_u *p; + int utf8 = FALSE; + + if (rettv_list_alloc(rettv) == FAIL) + return; + + if (argvars[1].v_type != VAR_UNKNOWN) + utf8 = (int)tv_get_bool_chk(&argvars[1], NULL); + + p = tv_get_string(&argvars[0]); + + if (has_mbyte || utf8) + { + int (*ptr2len)(char_u *); + int (*ptr2char)(char_u *); + + if (utf8 || enc_utf8) + { + ptr2len = utf_ptr2len; + ptr2char = utf_ptr2char; + } + else + { + ptr2len = mb_ptr2len; + ptr2char = mb_ptr2char; + } + + for ( ; *p != NUL; p += (*ptr2len)(p)) + list_append_number(rettv->vval.v_list, (*ptr2char)(p)); + } + else + for ( ; *p != NUL; ++p) + list_append_number(rettv->vval.v_list, *p); +} + +/* + * "str2nr()" function + */ + void +f_str2nr(typval_T *argvars, typval_T *rettv) +{ + int base = 10; + char_u *p; + varnumber_T n; + int what = 0; + int isneg; + + if (argvars[1].v_type != VAR_UNKNOWN) + { + base = (int)tv_get_number(&argvars[1]); + if (base != 2 && base != 8 && base != 10 && base != 16) + { + emsg(_(e_invarg)); + return; + } + if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) + what |= STR2NR_QUOTE; + } + + p = skipwhite(tv_get_string_strict(&argvars[0])); + isneg = (*p == '-'); + if (*p == '+' || *p == '-') + p = skipwhite(p + 1); + switch (base) + { + case 2: what |= STR2NR_BIN + STR2NR_FORCE; break; + case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break; + case 16: what |= STR2NR_HEX + STR2NR_FORCE; break; + } + vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE); + // Text after the number is silently ignored. + if (isneg) + rettv->vval.v_number = -n; + else + rettv->vval.v_number = n; + +} + +/* + * "strgetchar()" function + */ + void +f_strgetchar(typval_T *argvars, typval_T *rettv) +{ + char_u *str; + int len; + int error = FALSE; + int charidx; + int byteidx = 0; + + rettv->vval.v_number = -1; + str = tv_get_string_chk(&argvars[0]); + if (str == NULL) + return; + len = (int)STRLEN(str); + charidx = (int)tv_get_number_chk(&argvars[1], &error); + if (error) + return; + + while (charidx >= 0 && byteidx < len) + { + if (charidx == 0) + { + rettv->vval.v_number = mb_ptr2char(str + byteidx); + break; + } + --charidx; + byteidx += MB_CPTR2LEN(str + byteidx); + } +} + +/* + * "stridx()" function + */ + void +f_stridx(typval_T *argvars, typval_T *rettv) +{ + char_u buf[NUMBUFLEN]; + char_u *needle; + char_u *haystack; + char_u *save_haystack; + char_u *pos; + int start_idx; + + needle = tv_get_string_chk(&argvars[1]); + save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf); + rettv->vval.v_number = -1; + if (needle == NULL || haystack == NULL) + return; // type error; errmsg already given + + if (argvars[2].v_type != VAR_UNKNOWN) + { + int error = FALSE; + + start_idx = (int)tv_get_number_chk(&argvars[2], &error); + if (error || start_idx >= (int)STRLEN(haystack)) + return; + if (start_idx >= 0) + haystack += start_idx; + } + + pos = (char_u *)strstr((char *)haystack, (char *)needle); + if (pos != NULL) + rettv->vval.v_number = (varnumber_T)(pos - save_haystack); +} + +/* + * "string()" function + */ + void +f_string(typval_T *argvars, typval_T *rettv) +{ + char_u *tofree; + char_u numbuf[NUMBUFLEN]; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf, + get_copyID()); + // Make a copy if we have a value but it's not in allocated memory. + if (rettv->vval.v_string != NULL && tofree == NULL) + rettv->vval.v_string = vim_strsave(rettv->vval.v_string); +} + +/* + * "strlen()" function + */ + void +f_strlen(typval_T *argvars, typval_T *rettv) +{ + rettv->vval.v_number = (varnumber_T)(STRLEN( + tv_get_string(&argvars[0]))); +} + + static void +strchar_common(typval_T *argvars, typval_T *rettv, int skipcc) +{ + char_u *s = tv_get_string(&argvars[0]); + varnumber_T len = 0; + int (*func_mb_ptr2char_adv)(char_u **pp); + + func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; + while (*s != NUL) + { + func_mb_ptr2char_adv(&s); + ++len; + } + rettv->vval.v_number = len; +} + +/* + * "strcharlen()" function + */ + void +f_strcharlen(typval_T *argvars, typval_T *rettv) +{ + strchar_common(argvars, rettv, TRUE); +} + +/* + * "strchars()" function + */ + void +f_strchars(typval_T *argvars, typval_T *rettv) +{ + varnumber_T skipcc = FALSE; + + if (argvars[1].v_type != VAR_UNKNOWN) + skipcc = tv_get_bool(&argvars[1]); + if (skipcc < 0 || skipcc > 1) + semsg(_(e_using_number_as_bool_nr), skipcc); + else + strchar_common(argvars, rettv, skipcc); +} + +/* + * "strdisplaywidth()" function + */ + void +f_strdisplaywidth(typval_T *argvars, typval_T *rettv) +{ + char_u *s = tv_get_string(&argvars[0]); + int col = 0; + + if (argvars[1].v_type != VAR_UNKNOWN) + col = (int)tv_get_number(&argvars[1]); + + rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col); +} + +/* + * "strwidth()" function + */ + void +f_strwidth(typval_T *argvars, typval_T *rettv) +{ + char_u *s = tv_get_string_strict(&argvars[0]); + + rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1)); +} + +/* + * "strcharpart()" function + */ + void +f_strcharpart(typval_T *argvars, typval_T *rettv) +{ + char_u *p; + int nchar; + int nbyte = 0; + int charlen; + int skipcc = FALSE; + int len = 0; + int slen; + int error = FALSE; + + p = tv_get_string(&argvars[0]); + slen = (int)STRLEN(p); + + nchar = (int)tv_get_number_chk(&argvars[1], &error); + if (!error) + { + if (argvars[2].v_type != VAR_UNKNOWN + && argvars[3].v_type != VAR_UNKNOWN) + { + skipcc = tv_get_bool(&argvars[3]); + if (skipcc < 0 || skipcc > 1) + { + semsg(_(e_using_number_as_bool_nr), skipcc); + return; + } + } + + if (nchar > 0) + while (nchar > 0 && nbyte < slen) + { + if (skipcc) + nbyte += mb_ptr2len(p + nbyte); + else + nbyte += MB_CPTR2LEN(p + nbyte); + --nchar; + } + else + nbyte = nchar; + if (argvars[2].v_type != VAR_UNKNOWN) + { + charlen = (int)tv_get_number(&argvars[2]); + while (charlen > 0 && nbyte + len < slen) + { + int off = nbyte + len; + + if (off < 0) + len += 1; + else + { + if (skipcc) + len += mb_ptr2len(p + off); + else + len += MB_CPTR2LEN(p + off); + } + --charlen; + } + } + else + len = slen - nbyte; // default: all bytes that are available. + } + + /* + * Only return the overlap between the specified part and the actual + * string. + */ + if (nbyte < 0) + { + len += nbyte; + nbyte = 0; + } + else if (nbyte > slen) + nbyte = slen; + if (len < 0) + len = 0; + else if (nbyte + len > slen) + len = slen - nbyte; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = vim_strnsave(p + nbyte, len); +} + +/* + * "strpart()" function + */ + void +f_strpart(typval_T *argvars, typval_T *rettv) +{ + char_u *p; + int n; + int len; + int slen; + int error = FALSE; + + p = tv_get_string(&argvars[0]); + slen = (int)STRLEN(p); + + n = (int)tv_get_number_chk(&argvars[1], &error); + if (error) + len = 0; + else if (argvars[2].v_type != VAR_UNKNOWN) + len = (int)tv_get_number(&argvars[2]); + else + len = slen - n; // default len: all bytes that are available. + + // Only return the overlap between the specified part and the actual + // string. + if (n < 0) + { + len += n; + n = 0; + } + else if (n > slen) + n = slen; + if (len < 0) + len = 0; + else if (n + len > slen) + len = slen - n; + + if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) + { + int off; + + // length in characters + for (off = n; off < slen && len > 0; --len) + off += mb_ptr2len(p + off); + len = off - n; + } + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = vim_strnsave(p + n, len); +} + +/* + * "strridx()" function + */ + void +f_strridx(typval_T *argvars, typval_T *rettv) +{ + char_u buf[NUMBUFLEN]; + char_u *needle; + char_u *haystack; + char_u *rest; + char_u *lastmatch = NULL; + int haystack_len, end_idx; + + needle = tv_get_string_chk(&argvars[1]); + haystack = tv_get_string_buf_chk(&argvars[0], buf); + + rettv->vval.v_number = -1; + if (needle == NULL || haystack == NULL) + return; // type error; errmsg already given + + haystack_len = (int)STRLEN(haystack); + if (argvars[2].v_type != VAR_UNKNOWN) + { + // Third argument: upper limit for index + end_idx = (int)tv_get_number_chk(&argvars[2], NULL); + if (end_idx < 0) + return; // can never find a match + } + else + end_idx = haystack_len; + + if (*needle == NUL) + { + // Empty string matches past the end. + lastmatch = haystack + end_idx; + } + else + { + for (rest = haystack; *rest != '\0'; ++rest) + { + rest = (char_u *)strstr((char *)rest, (char *)needle); + if (rest == NULL || rest > haystack + end_idx) + break; + lastmatch = rest; + } + } + + if (lastmatch == NULL) + rettv->vval.v_number = -1; + else + rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); +} + +/* + * "strtrans()" function + */ + void +f_strtrans(typval_T *argvars, typval_T *rettv) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); +} + +/* + * "tolower(string)" function + */ + void +f_tolower(typval_T *argvars, typval_T *rettv) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0])); +} + +/* + * "toupper(string)" function + */ + void +f_toupper(typval_T *argvars, typval_T *rettv) +{ + rettv->v_type = VAR_STRING; + rettv->vval.v_string = strup_save(tv_get_string(&argvars[0])); +} + +/* + * "tr(string, fromstr, tostr)" function + */ + void +f_tr(typval_T *argvars, typval_T *rettv) +{ + char_u *in_str; + char_u *fromstr; + char_u *tostr; + char_u *p; + int inlen; + int fromlen; + int tolen; + int idx; + char_u *cpstr; + int cplen; + int first = TRUE; + char_u buf[NUMBUFLEN]; + char_u buf2[NUMBUFLEN]; + garray_T ga; + + in_str = tv_get_string(&argvars[0]); + fromstr = tv_get_string_buf_chk(&argvars[1], buf); + tostr = tv_get_string_buf_chk(&argvars[2], buf2); + + // Default return value: empty string. + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + if (fromstr == NULL || tostr == NULL) + return; // type error; errmsg already given + ga_init2(&ga, (int)sizeof(char), 80); + + if (!has_mbyte) + // not multi-byte: fromstr and tostr must be the same length + if (STRLEN(fromstr) != STRLEN(tostr)) + { +error: + semsg(_(e_invarg2), fromstr); + ga_clear(&ga); + return; + } + + // fromstr and tostr have to contain the same number of chars + while (*in_str != NUL) + { + if (has_mbyte) + { + inlen = (*mb_ptr2len)(in_str); + cpstr = in_str; + cplen = inlen; + idx = 0; + for (p = fromstr; *p != NUL; p += fromlen) + { + fromlen = (*mb_ptr2len)(p); + if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) + { + for (p = tostr; *p != NUL; p += tolen) + { + tolen = (*mb_ptr2len)(p); + if (idx-- == 0) + { + cplen = tolen; + cpstr = p; + break; + } + } + if (*p == NUL) // tostr is shorter than fromstr + goto error; + break; + } + ++idx; + } + + if (first && cpstr == in_str) + { + // Check that fromstr and tostr have the same number of + // (multi-byte) characters. Done only once when a character + // of in_str doesn't appear in fromstr. + first = FALSE; + for (p = tostr; *p != NUL; p += tolen) + { + tolen = (*mb_ptr2len)(p); + --idx; + } + if (idx != 0) + goto error; + } + + (void)ga_grow(&ga, cplen); + mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); + ga.ga_len += cplen; + + in_str += inlen; + } + else + { + // When not using multi-byte chars we can do it faster. + p = vim_strchr(fromstr, *in_str); + if (p != NULL) + ga_append(&ga, tostr[p - fromstr]); + else + ga_append(&ga, *in_str); + ++in_str; + } + } + + // add a terminating NUL + (void)ga_grow(&ga, 1); + ga_append(&ga, NUL); + + rettv->vval.v_string = ga.ga_data; +} + +/* + * "trim({expr})" function + */ + void +f_trim(typval_T *argvars, typval_T *rettv) +{ + char_u buf1[NUMBUFLEN]; + char_u buf2[NUMBUFLEN]; + char_u *head = tv_get_string_buf_chk(&argvars[0], buf1); + char_u *mask = NULL; + char_u *tail; + char_u *prev; + char_u *p; + int c1; + int dir = 0; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + if (head == NULL) + return; + + if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING) + { + semsg(_(e_invarg2), tv_get_string(&argvars[1])); + return; + } + + if (argvars[1].v_type == VAR_STRING) + { + mask = tv_get_string_buf_chk(&argvars[1], buf2); + + if (argvars[2].v_type != VAR_UNKNOWN) + { + int error = 0; + + // leading or trailing characters to trim + dir = (int)tv_get_number_chk(&argvars[2], &error); + if (error) + return; + if (dir < 0 || dir > 2) + { + semsg(_(e_invarg2), tv_get_string(&argvars[2])); + return; + } + } + } + + if (dir == 0 || dir == 1) + { + // Trim leading characters + while (*head != NUL) + { + c1 = PTR2CHAR(head); + if (mask == NULL) + { + if (c1 > ' ' && c1 != 0xa0) + break; + } + else + { + for (p = mask; *p != NUL; MB_PTR_ADV(p)) + if (c1 == PTR2CHAR(p)) + break; + if (*p == NUL) + break; + } + MB_PTR_ADV(head); + } + } + + tail = head + STRLEN(head); + if (dir == 0 || dir == 2) + { + // Trim trailing characters + for (; tail > head; tail = prev) + { + prev = tail; + MB_PTR_BACK(head, prev); + c1 = PTR2CHAR(prev); + if (mask == NULL) + { + if (c1 > ' ' && c1 != 0xa0) + break; + } + else + { + for (p = mask; *p != NUL; MB_PTR_ADV(p)) + if (c1 == PTR2CHAR(p)) + break; + if (*p == NUL) + break; + } + } + } + rettv->vval.v_string = vim_strnsave(head, tail - head); +} + +#endif diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -756,6 +756,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 3139, +/**/ 3138, /**/ 3137,