# HG changeset patch # User Bram Moolenaar # Date 1646253003 -3600 # Node ID b65a50d2fa4f4ed5d5729d9b14d96edac93b3f63 # Parent 2550ae69df4f55e2fc7c48a8df618d9127e0a9de patch 8.2.4494: the find_tags() function is much too long Commit: https://github.com/vim/vim/commit/2f87a99b6e9b559d51e130769e7f8377db6749f8 Author: Yegappan Lakshmanan Date: Wed Mar 2 20:29:35 2022 +0000 patch 8.2.4494: the find_tags() function is much too long Problem: The find_tags() function is much too long. Solution: Refactor the function. (Yegappan Lakshmanan, closes https://github.com/vim/vim/issues/9869) diff --git a/src/quickfix.c b/src/quickfix.c --- a/src/quickfix.c +++ b/src/quickfix.c @@ -5013,6 +5013,8 @@ ex_make(exarg_T *eap) int res; char_u *au_name = NULL; int_u save_qfid; + char_u *errorformat = p_efm; + int newlist = TRUE; // Redirect ":grep" to ":vimgrep" if 'grepprg' is "internal". if (grep_internal(eap->cmdidx)) @@ -5059,11 +5061,13 @@ ex_make(exarg_T *eap) incr_quickfix_busy(); - res = qf_init(wp, fname, (eap->cmdidx != CMD_make - && eap->cmdidx != CMD_lmake) ? p_gefm : p_efm, - (eap->cmdidx != CMD_grepadd - && eap->cmdidx != CMD_lgrepadd), - qf_cmdtitle(*eap->cmdlinep), enc); + if (eap->cmdidx != CMD_make && eap->cmdidx != CMD_lmake) + errorformat = p_gefm; + if (eap->cmdidx == CMD_grepadd || eap->cmdidx == CMD_lgrepadd) + newlist = FALSE; + + res = qf_init(wp, fname, errorformat, newlist, qf_cmdtitle(*eap->cmdlinep), + enc); if (wp != NULL) { qi = GET_LOC_LIST(wp); diff --git a/src/tag.c b/src/tag.c --- a/src/tag.c +++ b/src/tag.c @@ -1577,13 +1577,1162 @@ find_tagfunc_tags( #endif /* + * State information used during a tag search + */ +typedef struct { + pat_T orgpat; // holds unconverted pattern info +#ifdef FEAT_MULTI_LANG + char_u *help_lang_find; // lang to be found + int is_txt; // flag of file extension +#endif + int did_open; // did open a tag file + int mincount; // MAXCOL: find all matches + // other: minimal number of matches +#ifdef FEAT_TAG_BINS + int linear; // do a linear search +#endif + char_u *lbuf; // line buffer + int lbuf_size; // length of lbuf +#ifdef FEAT_EMACS_TAGS + char_u *ebuf; // additional buffer for etag fname +#endif + int match_count; // number of matches found + garray_T ga_match[MT_COUNT]; // stores matches in sequence + hashtab_T ht_match[MT_COUNT]; // stores matches by key + int stop_searching; // stop when match found or error +} findtags_state_T; + +/* + * Initialize the state used by find_tags() + */ + static int +findtags_state_init(findtags_state_T *st, char_u *pat, int mincount) +{ + int mtt; + + st->orgpat.pat = pat; + st->orgpat.len = (int)STRLEN(pat); + st->orgpat.regmatch.regprog = NULL; +#ifdef FEAT_MULTI_LANG + st->help_lang_find = NULL; + st->is_txt = FALSE; +#endif + st->did_open = FALSE; + st->mincount = mincount; + st->lbuf_size = LSIZE; + st->lbuf = alloc(st->lbuf_size); +#ifdef FEAT_EMACS_TAGS + st->ebuf = alloc(LSIZE); +#endif + st->match_count = 0; + st->stop_searching = FALSE; + + for (mtt = 0; mtt < MT_COUNT; ++mtt) + { + ga_init2(&st->ga_match[mtt], sizeof(char_u *), 100); + hash_init(&st->ht_match[mtt]); + } + + // check for out of memory situation + if (st->lbuf == NULL +#ifdef FEAT_EMACS_TAGS + || st->ebuf == NULL +#endif + ) + return FAIL; + + return OK; +} + +/* + * Search for tags in the 'tag_fname' tags file. + * Information needed to search for the tags is in the 'st' state structure. + * The matching tags are returned in 'st'. + * Returns OK if successfully processed the file and FAIL on memory allocation + * failure. + */ + static int +find_tags_in_file( + char_u *tag_fname, + findtags_state_T *st, + int flags, + char_u *buf_ffname) +{ + FILE *fp; + tagptrs_T tagp; + int is_static; // current tag line is static + int is_current; // file name matches + int eof = FALSE; // found end-of-file + char_u *p; + char_u *s; + int i; +#ifdef FEAT_MULTI_LANG + int help_pri = 0; + char_u help_lang[3]; // lang of current tags file +#endif +#ifdef FEAT_TAG_BINS + int tag_file_sorted = NUL; // !_TAG_FILE_SORTED value + off_T filesize; + int tagcmp; + off_T offset; +#endif + enum + { + TS_START, // at start of file + TS_LINEAR // linear searching forward, till EOF +#ifdef FEAT_TAG_BINS + , TS_BINARY, // binary searching + TS_SKIP_BACK, // skipping backwards + TS_STEP_FORWARD // stepping forwards +#endif + } state; // Current search state +#ifdef FEAT_TAG_BINS + struct tag_search_info // Binary search file offsets + { + off_T low_offset; // offset for first char of first line that + // could match + off_T high_offset; // offset of char after last line that could + // match + off_T curr_offset; // Current file offset in search range + off_T curr_offset_used; // curr_offset used when skipping back + off_T match_offset; // Where the binary search found a tag + int low_char; // first char at low_offset + int high_char; // first char at high_offset + } search_info; +#endif + + int cmplen; + int match; // matches + int match_no_ic = 0;// matches with rm_ic == FALSE + int match_re; // match with regexp + int matchoff = 0; + +#ifdef FEAT_EMACS_TAGS + /* + * Stack for included emacs-tags file. + * It has a fixed size, to truncate cyclic includes. jw + */ +# define INCSTACK_SIZE 42 + struct + { + FILE *fp; + char_u *etag_fname; + } incstack[INCSTACK_SIZE]; + + int incstack_idx = 0; // index in incstack + int is_etag; // current file is emaces style +#endif + + char_u *mfp; + int mtt; + hash_T hash = 0; + +#ifdef FEAT_TAG_BINS + int sort_error = FALSE; // tags file not sorted + int sortic = FALSE; // tag file sorted in nocase + int noic = (flags & TAG_NOIC); +#endif + int line_error = FALSE; // syntax error + int has_re = (flags & TAG_REGEXP); // regexp used + int help_only = (flags & TAG_HELP); + int name_only = (flags & TAG_NAMES); +#ifdef FEAT_CSCOPE + int use_cscope = (flags & TAG_CSCOPE); +#endif + int get_it_again = FALSE; + vimconv_T vimconv; + + vimconv.vc_type = CONV_NONE; + +#ifdef FEAT_TAG_BINS + // This is only to avoid a compiler warning for using search_info + // uninitialised. + CLEAR_FIELD(search_info); +#endif + + /* + * A file that doesn't exist is silently ignored. Only when not a + * single file is found, an error message is given (further on). + */ +#ifdef FEAT_CSCOPE + if (use_cscope) + fp = NULL; // avoid GCC warning + else +#endif + { +#ifdef FEAT_MULTI_LANG + if (curbuf->b_help) + { + // Keep en if the file extension is .txt + if (st->is_txt) + STRCPY(help_lang, "en"); + else + { + // Prefer help tags according to 'helplang'. Put the + // two-letter language name in help_lang[]. + i = (int)STRLEN(tag_fname); + if (i > 3 && tag_fname[i - 3] == '-') + STRCPY(help_lang, tag_fname + i - 2); + else + STRCPY(help_lang, "en"); + } + // When searching for a specific language skip tags files + // for other languages. + if (st->help_lang_find != NULL + && STRICMP(help_lang, st->help_lang_find) != 0) + return OK; + + // For CTRL-] in a help file prefer a match with the same + // language. + if ((flags & TAG_KEEP_LANG) + && st->help_lang_find == NULL + && curbuf->b_fname != NULL + && (i = (int)STRLEN(curbuf->b_fname)) > 4 + && curbuf->b_fname[i - 1] == 'x' + && curbuf->b_fname[i - 4] == '.' + && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) + help_pri = 0; + else + { + help_pri = 1; + for (s = p_hlg; *s != NUL; ++s) + { + if (STRNICMP(s, help_lang, 2) == 0) + break; + ++help_pri; + if ((s = vim_strchr(s, ',')) == NULL) + break; + } + if (s == NULL || *s == NUL) + { + // Language not in 'helplang': use last, prefer English, + // unless found already. + ++help_pri; + if (STRICMP(help_lang, "en") != 0) + ++help_pri; + } + } + } +#endif + + if ((fp = mch_fopen((char *)tag_fname, "r")) == NULL) + return OK; + + if (p_verbose >= 5) + { + verbose_enter(); + smsg(_("Searching tags file %s"), tag_fname); + verbose_leave(); + } + } + st->did_open = TRUE; // remember that we found at least one file + + state = TS_START; // we're at the start of the file +#ifdef FEAT_EMACS_TAGS + is_etag = 0; // default is: not emacs style +#endif + + /* + * Read and parse the lines in the file one by one + */ + for (;;) + { +#ifdef FEAT_TAG_BINS + // check for CTRL-C typed, more often when jumping around + if (state == TS_BINARY || state == TS_SKIP_BACK) + line_breakcheck(); + else +#endif + fast_breakcheck(); + if ((flags & TAG_INS_COMP)) // Double brackets for gcc + ins_compl_check_keys(30, FALSE); + if (got_int || ins_compl_interrupted()) + { + st->stop_searching = TRUE; + break; + } + // When mincount is TAG_MANY, stop when enough matches have been + // found (for completion). + if (st->mincount == TAG_MANY && st->match_count >= TAG_MANY) + { + st->stop_searching = TRUE; + break; + } + if (get_it_again) + goto line_read_in; +#ifdef FEAT_TAG_BINS + /* + * For binary search: compute the next offset to use. + */ + if (state == TS_BINARY) + { + offset = search_info.low_offset + ((search_info.high_offset + - search_info.low_offset) / 2); + if (offset == search_info.curr_offset) + break; // End the binary search without a match. + else + search_info.curr_offset = offset; + } + + /* + * Skipping back (after a match during binary search). + */ + else if (state == TS_SKIP_BACK) + { + search_info.curr_offset -= st->lbuf_size * 2; + if (search_info.curr_offset < 0) + { + search_info.curr_offset = 0; + rewind(fp); + state = TS_STEP_FORWARD; + } + } + + /* + * When jumping around in the file, first read a line to find the + * start of the next line. + */ + if (state == TS_BINARY || state == TS_SKIP_BACK) + { + // Adjust the search file offset to the correct position + search_info.curr_offset_used = search_info.curr_offset; + vim_fseek(fp, search_info.curr_offset, SEEK_SET); + eof = vim_fgets(st->lbuf, st->lbuf_size, fp); + if (!eof && search_info.curr_offset != 0) + { + search_info.curr_offset = vim_ftell(fp); + if (search_info.curr_offset == search_info.high_offset) + { + // oops, gone a bit too far; try from low offset + vim_fseek(fp, search_info.low_offset, SEEK_SET); + search_info.curr_offset = search_info.low_offset; + } + eof = vim_fgets(st->lbuf, st->lbuf_size, fp); + } + // skip empty and blank lines + while (!eof && vim_isblankline(st->lbuf)) + { + search_info.curr_offset = vim_ftell(fp); + eof = vim_fgets(st->lbuf, st->lbuf_size, fp); + } + if (eof) + { + // Hit end of file. Skip backwards. + state = TS_SKIP_BACK; + search_info.match_offset = vim_ftell(fp); + search_info.curr_offset = search_info.curr_offset_used; + continue; + } + } + + /* + * Not jumping around in the file: Read the next line. + */ + else +#endif + { + // skip empty and blank lines + do + { +#ifdef FEAT_CSCOPE + if (use_cscope) + eof = cs_fgets(st->lbuf, st->lbuf_size); + else +#endif + { +#ifdef FEAT_TAG_BINS + search_info.curr_offset = vim_ftell(fp); +#endif + eof = vim_fgets(st->lbuf, st->lbuf_size, fp); + } + } while (!eof && vim_isblankline(st->lbuf)); + + if (eof) + { +#ifdef FEAT_EMACS_TAGS + if (incstack_idx) // this was an included file + { + --incstack_idx; + fclose(fp); // end of this file ... + fp = incstack[incstack_idx].fp; + STRCPY(tag_fname, incstack[incstack_idx].etag_fname); + vim_free(incstack[incstack_idx].etag_fname); + is_etag = 1; // (only etags can include) + continue; // ... continue with parent file + } + else +#endif + break; // end of file + } + } +line_read_in: + + if (vimconv.vc_type != CONV_NONE) + { + char_u *conv_line; + int len; + + // Convert every line. Converting the pattern from 'enc' to + // the tags file encoding doesn't work, because characters are + // not recognized. + conv_line = string_convert(&vimconv, st->lbuf, NULL); + if (conv_line != NULL) + { + // Copy or swap lbuf and conv_line. + len = (int)STRLEN(conv_line) + 1; + if (len > st->lbuf_size) + { + vim_free(st->lbuf); + st->lbuf = conv_line; + st->lbuf_size = len; + } + else + { + STRCPY(st->lbuf, conv_line); + vim_free(conv_line); + } + } + } + + +#ifdef FEAT_EMACS_TAGS + /* + * Emacs tags line with CTRL-L: New file name on next line. + * The file name is followed by a ','. + * Remember etag file name in ebuf. + */ + if (*st->lbuf == Ctrl_L +# ifdef FEAT_CSCOPE + && !use_cscope +# endif + ) + { + is_etag = 1; // in case at the start + state = TS_LINEAR; + if (!vim_fgets(st->ebuf, LSIZE, fp)) + { + for (p = st->ebuf; *p && *p != ','; p++) + ; + *p = NUL; + + /* + * atoi(p+1) is the number of bytes before the next ^L + * unless it is an include statement. + */ + if (STRNCMP(p + 1, "include", 7) == 0 + && incstack_idx < INCSTACK_SIZE) + { + // Save current "fp" and "tag_fname" in the stack. + if ((incstack[incstack_idx].etag_fname = + vim_strsave(tag_fname)) != NULL) + { + char_u *fullpath_ebuf; + + incstack[incstack_idx].fp = fp; + fp = NULL; + + // Figure out "tag_fname" and "fp" to use for + // included file. + fullpath_ebuf = expand_tag_fname(st->ebuf, + tag_fname, FALSE); + if (fullpath_ebuf != NULL) + { + fp = mch_fopen((char *)fullpath_ebuf, "r"); + if (fp != NULL) + { + if (STRLEN(fullpath_ebuf) > LSIZE) + semsg(_(e_tag_file_path_truncated_for_str), st->ebuf); + vim_strncpy(tag_fname, fullpath_ebuf, + MAXPATHL); + ++incstack_idx; + is_etag = 0; // we can include anything + } + vim_free(fullpath_ebuf); + } + if (fp == NULL) + { + // Can't open the included file, skip it and + // restore old value of "fp". + fp = incstack[incstack_idx].fp; + vim_free(incstack[incstack_idx].etag_fname); + } + } + } + } + continue; + } +#endif + + /* + * When still at the start of the file, check for Emacs tags file + * format, and for "not sorted" flag. + */ + if (state == TS_START) + { + // The header ends when the line sorts below "!_TAG_". When + // case is folded lower case letters sort before "_". + if (STRNCMP(st->lbuf, "!_TAG_", 6) <= 0 + || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1]))) + { + if (STRNCMP(st->lbuf, "!_TAG_", 6) != 0) + // Non-header item before the header, e.g. "!" itself. + goto parse_line; + + /* + * Read header line. + */ +#ifdef FEAT_TAG_BINS + if (STRNCMP(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) + tag_file_sorted = st->lbuf[18]; +#endif + if (STRNCMP(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) + { + // Prepare to convert every line from the specified + // encoding to 'encoding'. + for (p = st->lbuf + 20; *p > ' ' && *p < 127; ++p) + ; + *p = NUL; + convert_setup(&vimconv, st->lbuf + 20, p_enc); + } + + // Read the next line. Unrecognized flags are ignored. + continue; + } + + // Headers ends. + +#ifdef FEAT_TAG_BINS + /* + * When there is no tag head, or ignoring case, need to do a + * linear search. + * When no "!_TAG_" is found, default to binary search. If + * the tag file isn't sorted, the second loop will find it. + * When "!_TAG_FILE_SORTED" found: start binary search if + * flag set. + * For cscope, it's always linear. + */ +# ifdef FEAT_CSCOPE + if (st->linear || use_cscope) +# else + if (st->linear) +# endif + state = TS_LINEAR; + else if (tag_file_sorted == NUL) + state = TS_BINARY; + else if (tag_file_sorted == '1') + state = TS_BINARY; + else if (tag_file_sorted == '2') + { + state = TS_BINARY; + sortic = TRUE; + st->orgpat.regmatch.rm_ic = (p_ic || !noic); + } + else + state = TS_LINEAR; + + if (state == TS_BINARY && st->orgpat.regmatch.rm_ic && !sortic) + { + // Binary search won't work for ignoring case, use linear + // search. + st->linear = TRUE; + state = TS_LINEAR; + } +#else + state = TS_LINEAR; +#endif + +#ifdef FEAT_TAG_BINS + // When starting a binary search, get the size of the file and + // compute the first offset. + if (state == TS_BINARY) + { + if (vim_fseek(fp, 0L, SEEK_END) != 0) + // can't seek, don't use binary search + state = TS_LINEAR; + else + { + // Get the tag file size (don't use mch_fstat(), it's + // not portable). Don't use lseek(), it doesn't work + // properly on MacOS Catalina. + filesize = vim_ftell(fp); + vim_fseek(fp, 0L, SEEK_SET); + + // Calculate the first read offset in the file. Start + // the search in the middle of the file. + search_info.low_offset = 0; + search_info.low_char = 0; + search_info.high_offset = filesize; + search_info.curr_offset = 0; + search_info.high_char = 0xff; + } + continue; + } +#endif + } + +parse_line: + // When the line is too long the NUL will not be in the + // last-but-one byte (see vim_fgets()). + // Has been reported for Mozilla JS with extremely long names. + // In that case we need to increase lbuf_size. + if (st->lbuf[st->lbuf_size - 2] != NUL +#ifdef FEAT_CSCOPE + && !use_cscope +#endif + ) + { + st->lbuf_size *= 2; + vim_free(st->lbuf); + st->lbuf = alloc(st->lbuf_size); + if (st->lbuf == NULL) + return FAIL; + +#ifdef FEAT_TAG_BINS + if (state == TS_STEP_FORWARD) + // Seek to the same position to read the same line again + vim_fseek(fp, search_info.curr_offset, SEEK_SET); + // this will try the same thing again, make sure the offset is + // different + search_info.curr_offset = 0; +#endif + continue; + } + + /* + * Figure out where the different strings are in this line. + * For "normal" tags: Do a quick check if the tag matches. + * This speeds up tag searching a lot! + */ + if (st->orgpat.headlen +#ifdef FEAT_EMACS_TAGS + && !is_etag +#endif + ) + { + CLEAR_FIELD(tagp); + tagp.tagname = st->lbuf; + tagp.tagname_end = vim_strchr(st->lbuf, TAB); + if (tagp.tagname_end == NULL) + { + // Corrupted tag line. + line_error = TRUE; + break; + } + + /* + * Skip this line if the length of the tag is different and + * there is no regexp, or the tag is too short. + */ + cmplen = (int)(tagp.tagname_end - tagp.tagname); + if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' + cmplen = p_tl; + if (has_re && st->orgpat.headlen < cmplen) + cmplen = st->orgpat.headlen; + else if (state == TS_LINEAR && st->orgpat.headlen != cmplen) + continue; + +#ifdef FEAT_TAG_BINS + if (state == TS_BINARY) + { + /* + * Simplistic check for unsorted tags file. + */ + i = (int)tagp.tagname[0]; + if (sortic) + i = (int)TOUPPER_ASC(tagp.tagname[0]); + if (i < search_info.low_char || i > search_info.high_char) + sort_error = TRUE; + + /* + * Compare the current tag with the searched tag. + */ + if (sortic) + tagcmp = tag_strnicmp(tagp.tagname, st->orgpat.head, + (size_t)cmplen); + else + tagcmp = STRNCMP(tagp.tagname, st->orgpat.head, cmplen); + + /* + * A match with a shorter tag means to search forward. + * A match with a longer tag means to search backward. + */ + if (tagcmp == 0) + { + if (cmplen < st->orgpat.headlen) + tagcmp = -1; + else if (cmplen > st->orgpat.headlen) + tagcmp = 1; + } + + if (tagcmp == 0) + { + // We've located the tag, now skip back and search + // forward until the first matching tag is found. + state = TS_SKIP_BACK; + search_info.match_offset = search_info.curr_offset; + continue; + } + if (tagcmp < 0) + { + search_info.curr_offset = vim_ftell(fp); + if (search_info.curr_offset < search_info.high_offset) + { + search_info.low_offset = search_info.curr_offset; + if (sortic) + search_info.low_char = + TOUPPER_ASC(tagp.tagname[0]); + else + search_info.low_char = tagp.tagname[0]; + continue; + } + } + if (tagcmp > 0 + && search_info.curr_offset != search_info.high_offset) + { + search_info.high_offset = search_info.curr_offset; + if (sortic) + search_info.high_char = + TOUPPER_ASC(tagp.tagname[0]); + else + search_info.high_char = tagp.tagname[0]; + continue; + } + + // No match yet and are at the end of the binary search. + break; + } + else if (state == TS_SKIP_BACK) + { + if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0) + state = TS_STEP_FORWARD; + else + // Have to skip back more. Restore the curr_offset + // used, otherwise we get stuck at a long line. + search_info.curr_offset = search_info.curr_offset_used; + continue; + } + else if (state == TS_STEP_FORWARD) + { + if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0) + { + if ((off_T)vim_ftell(fp) > search_info.match_offset) + break; // past last match + else + continue; // before first match + } + } + else +#endif + // skip this match if it can't match + if (MB_STRNICMP(tagp.tagname, st->orgpat.head, cmplen) != 0) + continue; + + /* + * Can be a matching tag, isolate the file name and command. + */ + tagp.fname = tagp.tagname_end + 1; + tagp.fname_end = vim_strchr(tagp.fname, TAB); + tagp.command = tagp.fname_end + 1; + if (tagp.fname_end == NULL) + i = FAIL; + else + i = OK; + } + else + i = parse_tag_line(st->lbuf, +#ifdef FEAT_EMACS_TAGS + is_etag, +#endif + &tagp); + if (i == FAIL) + { + line_error = TRUE; + break; + } + +#ifdef FEAT_EMACS_TAGS + if (is_etag) + tagp.fname = st->ebuf; +#endif + /* + * First try matching with the pattern literally (also when it is + * a regexp). + */ + cmplen = (int)(tagp.tagname_end - tagp.tagname); + if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' + cmplen = p_tl; + // if tag length does not match, don't try comparing + if (st->orgpat.len != cmplen) + match = FALSE; + else + { + if (st->orgpat.regmatch.rm_ic) + { + match = (MB_STRNICMP(tagp.tagname, st->orgpat.pat, cmplen) == 0); + if (match) + match_no_ic = (STRNCMP(tagp.tagname, st->orgpat.pat, + cmplen) == 0); + } + else + match = (STRNCMP(tagp.tagname, st->orgpat.pat, cmplen) == 0); + } + + /* + * Has a regexp: Also find tags matching regexp. + */ + match_re = FALSE; + if (!match && st->orgpat.regmatch.regprog != NULL) + { + int cc; + + cc = *tagp.tagname_end; + *tagp.tagname_end = NUL; + match = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0); + if (match) + { + matchoff = (int)(st->orgpat.regmatch.startp[0] - tagp.tagname); + if (st->orgpat.regmatch.rm_ic) + { + st->orgpat.regmatch.rm_ic = FALSE; + match_no_ic = vim_regexec(&st->orgpat.regmatch, tagp.tagname, + (colnr_T)0); + st->orgpat.regmatch.rm_ic = TRUE; + } + } + *tagp.tagname_end = cc; + match_re = TRUE; + } + + /* + * If a match is found, add it to ht_match[] and ga_match[]. + */ + if (match) + { + int len = 0; + +#ifdef FEAT_CSCOPE + if (use_cscope) + { + // Don't change the ordering, always use the same table. + mtt = MT_GL_OTH; + } + else +#endif + { + // Decide in which array to store this match. + is_current = test_for_current( +#ifdef FEAT_EMACS_TAGS + is_etag, +#endif + tagp.fname, tagp.fname_end, tag_fname, + buf_ffname); +#ifdef FEAT_EMACS_TAGS + is_static = FALSE; + if (!is_etag) // emacs tags are never static +#endif + is_static = test_for_static(&tagp); + + // decide in which of the sixteen tables to store this + // match + if (is_static) + { + if (is_current) + mtt = MT_ST_CUR; + else + mtt = MT_ST_OTH; + } + else + { + if (is_current) + mtt = MT_GL_CUR; + else + mtt = MT_GL_OTH; + } + if (st->orgpat.regmatch.rm_ic && !match_no_ic) + mtt += MT_IC_OFF; + if (match_re) + mtt += MT_RE_OFF; + } + + /* + * Add the found match in ht_match[mtt] and ga_match[mtt]. + * Store the info we need later, which depends on the kind of + * tags we are dealing with. + */ + if (help_only) + { +#ifdef FEAT_MULTI_LANG +# define ML_EXTRA 3 +#else +# define ML_EXTRA 0 +#endif + /* + * Append the help-heuristic number after the tagname, for + * sorting it later. The heuristic is ignored for + * detecting duplicates. + * The format is {tagname}@{lang}NUL{heuristic}NUL + */ + *tagp.tagname_end = NUL; + len = (int)(tagp.tagname_end - tagp.tagname); + mfp = alloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1); + if (mfp != NULL) + { + int heuristic; + + p = mfp; + STRCPY(p, tagp.tagname); +#ifdef FEAT_MULTI_LANG + p[len] = '@'; + STRCPY(p + len + 1, help_lang); +#endif + + heuristic = help_heuristic(tagp.tagname, + match_re ? matchoff : 0, !match_no_ic); +#ifdef FEAT_MULTI_LANG + heuristic += help_pri; +#endif + sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", + heuristic); + } + *tagp.tagname_end = TAB; + } + else if (name_only) + { + if (get_it_again) + { + char_u *temp_end = tagp.command; + + if (*temp_end == '/') + while (*temp_end && *temp_end != '\r' + && *temp_end != '\n' + && *temp_end != '$') + temp_end++; + + if (tagp.command + 2 < temp_end) + { + len = (int)(temp_end - tagp.command - 2); + mfp = alloc(len + 2); + if (mfp != NULL) + vim_strncpy(mfp, tagp.command + 2, len); + } + else + mfp = NULL; + get_it_again = FALSE; + } + else + { + len = (int)(tagp.tagname_end - tagp.tagname); + mfp = alloc(sizeof(char_u) + len + 1); + if (mfp != NULL) + vim_strncpy(mfp, tagp.tagname, len); + + // if wanted, re-read line to get long form too + if (State & INSERT) + get_it_again = p_sft; + } + } + else + { + size_t tag_fname_len = STRLEN(tag_fname); +#ifdef FEAT_EMACS_TAGS + size_t ebuf_len = 0; +#endif + + // Save the tag in a buffer. + // Use 0x02 to separate fields (Can't use NUL because the + // hash key is terminated by NUL, or Ctrl_A because that is + // part of some Emacs tag files -- see parse_tag_line). + // Emacs tag: <0x02><0x02> + // other tag: <0x02><0x02> + // without Emacs tags: <0x02> + // Here is the "mtt" value plus 1 to avoid NUL. + len = (int)tag_fname_len + (int)STRLEN(st->lbuf) + 3; +#ifdef FEAT_EMACS_TAGS + if (is_etag) + { + ebuf_len = STRLEN(st->ebuf); + len += (int)ebuf_len + 1; + } + else + ++len; +#endif + mfp = alloc(sizeof(char_u) + len + 1); + if (mfp != NULL) + { + p = mfp; + p[0] = mtt + 1; + STRCPY(p + 1, tag_fname); +#ifdef BACKSLASH_IN_FILENAME + // Ignore differences in slashes, avoid adding + // both path/file and path\file. + slash_adjust(p + 1); +#endif + p[tag_fname_len + 1] = TAG_SEP; + s = p + 1 + tag_fname_len + 1; +#ifdef FEAT_EMACS_TAGS + if (is_etag) + { + STRCPY(s, st->ebuf); + s[ebuf_len] = TAG_SEP; + s += ebuf_len + 1; + } + else + *s++ = TAG_SEP; +#endif + STRCPY(s, st->lbuf); + } + } + + if (mfp != NULL) + { + hashitem_T *hi; + + /* + * Don't add identical matches. + * Add all cscope tags, because they are all listed. + * "mfp" is used as a hash key, there is a NUL byte to end + * the part that matters for comparing, more bytes may + * follow after it. E.g. help tags store the priority + * after the NUL. + */ +#ifdef FEAT_CSCOPE + if (use_cscope) + hash++; + else +#endif + hash = hash_hash(mfp); + hi = hash_lookup(&st->ht_match[mtt], mfp, hash); + if (HASHITEM_EMPTY(hi)) + { + if (hash_add_item(&st->ht_match[mtt], hi, mfp, hash) + == FAIL + || ga_grow(&st->ga_match[mtt], 1) != OK) + { + // Out of memory! Just forget about the rest. + st->stop_searching = TRUE; + break; + } + else + { + ((char_u **)(st->ga_match[mtt].ga_data)) + [st->ga_match[mtt].ga_len++] = mfp; + st->match_count++; + } + } + else + // duplicate tag, drop it + vim_free(mfp); + } + } +#ifdef FEAT_CSCOPE + if (use_cscope && eof) + break; +#endif + } // forever + + if (line_error) + { + semsg(_(e_format_error_in_tags_file_str), tag_fname); +#ifdef FEAT_CSCOPE + if (!use_cscope) +#endif + semsg(_("Before byte %ld"), (long)vim_ftell(fp)); + st->stop_searching = TRUE; + line_error = FALSE; + } + +#ifdef FEAT_CSCOPE + if (!use_cscope) +#endif + fclose(fp); +#ifdef FEAT_EMACS_TAGS + while (incstack_idx) + { + --incstack_idx; + fclose(incstack[incstack_idx].fp); + vim_free(incstack[incstack_idx].etag_fname); + } +#endif + if (vimconv.vc_type != CONV_NONE) + convert_setup(&vimconv, NULL, NULL); + +#ifdef FEAT_TAG_BINS + tag_file_sorted = NUL; + if (sort_error) + { + semsg(_(e_tags_file_not_sorted_str), tag_fname); + sort_error = FALSE; + } +#endif + + /* + * Stop searching if sufficient tags have been found. + */ + if (st->match_count >= st->mincount) + st->stop_searching = TRUE; + + return OK; +} + +/* + * Copy the tags found by find_tags() to 'matchesp'. + */ + static void +findtags_copy_matches( + findtags_state_T *st, + char_u ***matchesp, + int *num_matches, + int name_only) +{ + char_u **matches; + int mtt; + int i; + char_u *mfp; + char_u *p; + + if (st->match_count > 0) + matches = ALLOC_MULT(char_u *, st->match_count); + else + matches = NULL; + st->match_count = 0; + for (mtt = 0; mtt < MT_COUNT; ++mtt) + { + for (i = 0; i < st->ga_match[mtt].ga_len; ++i) + { + mfp = ((char_u **)(st->ga_match[mtt].ga_data))[i]; + if (matches == NULL) + vim_free(mfp); + else + { + if (!name_only) + { + // Change mtt back to zero-based. + *mfp = *mfp - 1; + + // change the TAG_SEP back to NUL + for (p = mfp + 1; *p != NUL; ++p) + if (*p == TAG_SEP) + *p = NUL; + } + matches[st->match_count++] = mfp; + } + } + + ga_clear(&st->ga_match[mtt]); + hash_clear(&st->ht_match[mtt]); + } + + *matchesp = matches; + *num_matches = st->match_count; +} + +/* * find_tags() - search for tags in tags files * * Return FAIL if search completely failed (*num_matches will be 0, *matchesp * will be NULL), OK otherwise. * - * There is a priority in which type of tag is recognized. - * + * Priority depending on which type of tag is recognized: * 6. A static or global tag with a full matching tag for the current file. * 5. A global tag with a full matching tag for another file. * 4. A static tag with a full matching tag for another file. @@ -1613,108 +2762,31 @@ find_tags( // other: minimal number of matches char_u *buf_ffname) // name of buffer for priority { - FILE *fp; - char_u *lbuf; // line buffer - int lbuf_size = LSIZE; // length of lbuf - char_u *tag_fname; // name of tag file + findtags_state_T st; + char_u *tag_fname; // name of tag file tagname_T tn; // info for get_tagfname() int first_file; // trying first tag file - tagptrs_T tagp; - int did_open = FALSE; // did open a tag file - int stop_searching = FALSE; // stop when match found or error int retval = FAIL; // return value - int is_static; // current tag line is static - int is_current; // file name matches - int eof = FALSE; // found end-of-file - char_u *p; - char_u *s; - int i; #ifdef FEAT_TAG_BINS - int tag_file_sorted = NUL; // !_TAG_FILE_SORTED value - struct tag_search_info // Binary search file offsets - { - off_T low_offset; // offset for first char of first line that - // could match - off_T high_offset; // offset of char after last line that could - // match - off_T curr_offset; // Current file offset in search range - off_T curr_offset_used; // curr_offset used when skipping back - off_T match_offset; // Where the binary search found a tag - int low_char; // first char at low_offset - int high_char; // first char at high_offset - } search_info; - off_T filesize; - int tagcmp; - off_T offset; int round; #endif - enum - { - TS_START, // at start of file - TS_LINEAR // linear searching forward, till EOF -#ifdef FEAT_TAG_BINS - , TS_BINARY, // binary searching - TS_SKIP_BACK, // skipping backwards - TS_STEP_FORWARD // stepping forwards -#endif - } state; // Current search state - - int cmplen; - int match; // matches - int match_no_ic = 0;// matches with rm_ic == FALSE - int match_re; // match with regexp - int matchoff = 0; + int save_emsg_off; -#ifdef FEAT_EMACS_TAGS - /* - * Stack for included emacs-tags file. - * It has a fixed size, to truncate cyclic includes. jw - */ -# define INCSTACK_SIZE 42 - struct - { - FILE *fp; - char_u *etag_fname; - } incstack[INCSTACK_SIZE]; - - int incstack_idx = 0; // index in incstack - char_u *ebuf; // additional buffer for etag fname - int is_etag; // current file is emaces style -#endif - - char_u *mfp; - garray_T ga_match[MT_COUNT]; // stores matches in sequence - hashtab_T ht_match[MT_COUNT]; // stores matches by key - hash_T hash = 0; - int match_count = 0; // number of matches found - char_u **matches; - int mtt; int help_save; #ifdef FEAT_MULTI_LANG - int help_pri = 0; - char_u *help_lang_find = NULL; // lang to be found - char_u help_lang[3]; // lang of current tags file + int i; char_u *saved_pat = NULL; // copy of pat[] - int is_txt = FALSE; // flag of file extension #endif - pat_T orgpat; // holds unconverted pattern info - vimconv_T vimconv; - #ifdef FEAT_TAG_BINS int findall = (mincount == MAXCOL || mincount == TAG_MANY); // find all matching tags - int sort_error = FALSE; // tags file not sorted - int linear; // do a linear search - int sortic = FALSE; // tag file sorted in nocase #endif - int line_error = FALSE; // syntax error int has_re = (flags & TAG_REGEXP); // regexp used int help_only = (flags & TAG_HELP); int name_only = (flags & TAG_NAMES); int noic = (flags & TAG_NOIC); - int get_it_again = FALSE; #ifdef FEAT_CSCOPE int use_cscope = (flags & TAG_CSCOPE); #endif @@ -1738,34 +2810,21 @@ find_tags( } help_save = curbuf->b_help; - orgpat.pat = pat; - orgpat.regmatch.regprog = NULL; - vimconv.vc_type = CONV_NONE; /* * Allocate memory for the buffers that are used */ - lbuf = alloc(lbuf_size); tag_fname = alloc(MAXPATHL + 1); -#ifdef FEAT_EMACS_TAGS - ebuf = alloc(LSIZE); -#endif - for (mtt = 0; mtt < MT_COUNT; ++mtt) - { - ga_init2(&ga_match[mtt], sizeof(char_u *), 100); - hash_init(&ht_match[mtt]); - } // check for out of memory situation - if (lbuf == NULL || tag_fname == NULL -#ifdef FEAT_EMACS_TAGS - || ebuf == NULL -#endif - ) + if (tag_fname == NULL) + goto findtag_end; + + if (findtags_state_init(&st, pat, mincount) == FAIL) goto findtag_end; #ifdef FEAT_CSCOPE - STRCPY(tag_fname, "from cscope"); // for error messages + STRCPY(tag_fname, "from cscope"); // for error messages #endif /* @@ -1782,47 +2841,40 @@ find_tags( } #endif - orgpat.len = (int)STRLEN(pat); #ifdef FEAT_MULTI_LANG if (curbuf->b_help) { // When "@ab" is specified use only the "ab" language, otherwise // search all languages. - if (orgpat.len > 3 && pat[orgpat.len - 3] == '@' - && ASCII_ISALPHA(pat[orgpat.len - 2]) - && ASCII_ISALPHA(pat[orgpat.len - 1])) + if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@' + && ASCII_ISALPHA(pat[st.orgpat.len - 2]) + && ASCII_ISALPHA(pat[st.orgpat.len - 1])) { - saved_pat = vim_strnsave(pat, orgpat.len - 3); + saved_pat = vim_strnsave(pat, st.orgpat.len - 3); if (saved_pat != NULL) { - help_lang_find = &pat[orgpat.len - 2]; - orgpat.pat = saved_pat; - orgpat.len -= 3; + st.help_lang_find = &pat[st.orgpat.len - 2]; + st.orgpat.pat = saved_pat; + st.orgpat.len -= 3; } } } #endif - if (p_tl != 0 && orgpat.len > p_tl) // adjust for 'taglength' - orgpat.len = p_tl; + if (p_tl != 0 && st.orgpat.len > p_tl) // adjust for 'taglength' + st.orgpat.len = p_tl; save_emsg_off = emsg_off; emsg_off = TRUE; // don't want error for invalid RE here - prepare_pats(&orgpat, has_re); + prepare_pats(&st.orgpat, has_re); emsg_off = save_emsg_off; - if (has_re && orgpat.regmatch.regprog == NULL) + if (has_re && st.orgpat.regmatch.regprog == NULL) goto findtag_end; -#ifdef FEAT_TAG_BINS - // This is only to avoid a compiler warning for using search_info - // uninitialised. - CLEAR_FIELD(search_info); -#endif - #ifdef FEAT_EVAL if (*curbuf->b_p_tfu != NUL && use_tfu && !tfu_in_use) { tfu_in_use = TRUE; - retval = find_tagfunc_tags(pat, &ga_match[0], &match_count, + retval = find_tagfunc_tags(pat, &st.ga_match[0], &st.match_count, flags, buf_ffname); tfu_in_use = FALSE; if (retval != NOTDONE) @@ -1843,20 +2895,20 @@ find_tags( #ifdef FEAT_MULTI_LANG // Set a flag if the file extension is .txt if ((flags & TAG_KEEP_LANG) - && help_lang_find == NULL + && st.help_lang_find == NULL && curbuf->b_fname != NULL && (i = (int)STRLEN(curbuf->b_fname)) > 4 && STRICMP(curbuf->b_fname + i - 4, ".txt") == 0) - is_txt = TRUE; + st.is_txt = TRUE; #endif #ifdef FEAT_TAG_BINS - orgpat.regmatch.rm_ic = ((p_ic || !noic) - && (findall || orgpat.headlen == 0 || !p_tbs)); + st.orgpat.regmatch.rm_ic = ((p_ic || !noic) + && (findall || st.orgpat.headlen == 0 || !p_tbs)); for (round = 1; round <= 2; ++round) { - linear = (orgpat.headlen == 0 || !p_tbs || round == 2); + st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2); #else - orgpat.regmatch.rm_ic = (p_ic || !noic); + st.orgpat.regmatch.rm_ic = (p_ic || !noic); #endif /* @@ -1869,937 +2921,17 @@ find_tags( get_tagfname(&tn, first_file, tag_fname) == OK; first_file = FALSE) { - /* - * A file that doesn't exist is silently ignored. Only when not a - * single file is found, an error message is given (further on). - */ -#ifdef FEAT_CSCOPE - if (use_cscope) - fp = NULL; // avoid GCC warning - else -#endif - { -#ifdef FEAT_MULTI_LANG - if (curbuf->b_help) - { - // Keep en if the file extension is .txt - if (is_txt) - STRCPY(help_lang, "en"); - else - { - // Prefer help tags according to 'helplang'. Put the - // two-letter language name in help_lang[]. - i = (int)STRLEN(tag_fname); - if (i > 3 && tag_fname[i - 3] == '-') - STRCPY(help_lang, tag_fname + i - 2); - else - STRCPY(help_lang, "en"); - } - // When searching for a specific language skip tags files - // for other languages. - if (help_lang_find != NULL - && STRICMP(help_lang, help_lang_find) != 0) - continue; - - // For CTRL-] in a help file prefer a match with the same - // language. - if ((flags & TAG_KEEP_LANG) - && help_lang_find == NULL - && curbuf->b_fname != NULL - && (i = (int)STRLEN(curbuf->b_fname)) > 4 - && curbuf->b_fname[i - 1] == 'x' - && curbuf->b_fname[i - 4] == '.' - && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) - help_pri = 0; - else - { - help_pri = 1; - for (s = p_hlg; *s != NUL; ++s) - { - if (STRNICMP(s, help_lang, 2) == 0) - break; - ++help_pri; - if ((s = vim_strchr(s, ',')) == NULL) - break; - } - if (s == NULL || *s == NUL) - { - // Language not in 'helplang': use last, prefer English, - // unless found already. - ++help_pri; - if (STRICMP(help_lang, "en") != 0) - ++help_pri; - } - } - } -#endif - - if ((fp = mch_fopen((char *)tag_fname, "r")) == NULL) - continue; - - if (p_verbose >= 5) - { - verbose_enter(); - smsg(_("Searching tags file %s"), tag_fname); - verbose_leave(); - } - } - did_open = TRUE; // remember that we found at least one file - - state = TS_START; // we're at the start of the file -#ifdef FEAT_EMACS_TAGS - is_etag = 0; // default is: not emacs style -#endif - - /* - * Read and parse the lines in the file one by one - */ - for (;;) - { -#ifdef FEAT_TAG_BINS - // check for CTRL-C typed, more often when jumping around - if (state == TS_BINARY || state == TS_SKIP_BACK) - line_breakcheck(); - else -#endif - fast_breakcheck(); - if ((flags & TAG_INS_COMP)) // Double brackets for gcc - ins_compl_check_keys(30, FALSE); - if (got_int || ins_compl_interrupted()) - { - stop_searching = TRUE; - break; - } - // When mincount is TAG_MANY, stop when enough matches have been - // found (for completion). - if (mincount == TAG_MANY && match_count >= TAG_MANY) - { - stop_searching = TRUE; - retval = OK; - break; - } - if (get_it_again) - goto line_read_in; -#ifdef FEAT_TAG_BINS - /* - * For binary search: compute the next offset to use. - */ - if (state == TS_BINARY) - { - offset = search_info.low_offset + ((search_info.high_offset - - search_info.low_offset) / 2); - if (offset == search_info.curr_offset) - break; // End the binary search without a match. - else - search_info.curr_offset = offset; - } - - /* - * Skipping back (after a match during binary search). - */ - else if (state == TS_SKIP_BACK) - { - search_info.curr_offset -= lbuf_size * 2; - if (search_info.curr_offset < 0) - { - search_info.curr_offset = 0; - rewind(fp); - state = TS_STEP_FORWARD; - } - } - - /* - * When jumping around in the file, first read a line to find the - * start of the next line. - */ - if (state == TS_BINARY || state == TS_SKIP_BACK) - { - // Adjust the search file offset to the correct position - search_info.curr_offset_used = search_info.curr_offset; - vim_fseek(fp, search_info.curr_offset, SEEK_SET); - eof = vim_fgets(lbuf, lbuf_size, fp); - if (!eof && search_info.curr_offset != 0) - { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset == search_info.high_offset) - { - // oops, gone a bit too far; try from low offset - vim_fseek(fp, search_info.low_offset, SEEK_SET); - search_info.curr_offset = search_info.low_offset; - } - eof = vim_fgets(lbuf, lbuf_size, fp); - } - // skip empty and blank lines - while (!eof && vim_isblankline(lbuf)) - { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets(lbuf, lbuf_size, fp); - } - if (eof) - { - // Hit end of file. Skip backwards. - state = TS_SKIP_BACK; - search_info.match_offset = vim_ftell(fp); - search_info.curr_offset = search_info.curr_offset_used; - continue; - } - } - - /* - * Not jumping around in the file: Read the next line. - */ - else -#endif - { - // skip empty and blank lines - do - { + if (find_tags_in_file(tag_fname, &st, flags, buf_ffname) == FAIL) + goto findtag_end; + if (st.stop_searching #ifdef FEAT_CSCOPE - if (use_cscope) - eof = cs_fgets(lbuf, lbuf_size); - else -#endif - { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets(lbuf, lbuf_size, fp); - } - } while (!eof && vim_isblankline(lbuf)); - - if (eof) - { -#ifdef FEAT_EMACS_TAGS - if (incstack_idx) // this was an included file - { - --incstack_idx; - fclose(fp); // end of this file ... - fp = incstack[incstack_idx].fp; - STRCPY(tag_fname, incstack[incstack_idx].etag_fname); - vim_free(incstack[incstack_idx].etag_fname); - is_etag = 1; // (only etags can include) - continue; // ... continue with parent file - } - else -#endif - break; // end of file - } - } -line_read_in: - - if (vimconv.vc_type != CONV_NONE) - { - char_u *conv_line; - int len; - - // Convert every line. Converting the pattern from 'enc' to - // the tags file encoding doesn't work, because characters are - // not recognized. - conv_line = string_convert(&vimconv, lbuf, NULL); - if (conv_line != NULL) - { - // Copy or swap lbuf and conv_line. - len = (int)STRLEN(conv_line) + 1; - if (len > lbuf_size) - { - vim_free(lbuf); - lbuf = conv_line; - lbuf_size = len; - } - else - { - STRCPY(lbuf, conv_line); - vim_free(conv_line); - } - } - } - - -#ifdef FEAT_EMACS_TAGS - /* - * Emacs tags line with CTRL-L: New file name on next line. - * The file name is followed by a ','. - * Remember etag file name in ebuf. - */ - if (*lbuf == Ctrl_L -# ifdef FEAT_CSCOPE - && !use_cscope -# endif - ) - { - is_etag = 1; // in case at the start - state = TS_LINEAR; - if (!vim_fgets(ebuf, LSIZE, fp)) - { - for (p = ebuf; *p && *p != ','; p++) - ; - *p = NUL; - - /* - * atoi(p+1) is the number of bytes before the next ^L - * unless it is an include statement. - */ - if (STRNCMP(p + 1, "include", 7) == 0 - && incstack_idx < INCSTACK_SIZE) - { - // Save current "fp" and "tag_fname" in the stack. - if ((incstack[incstack_idx].etag_fname = - vim_strsave(tag_fname)) != NULL) - { - char_u *fullpath_ebuf; - - incstack[incstack_idx].fp = fp; - fp = NULL; - - // Figure out "tag_fname" and "fp" to use for - // included file. - fullpath_ebuf = expand_tag_fname(ebuf, - tag_fname, FALSE); - if (fullpath_ebuf != NULL) - { - fp = mch_fopen((char *)fullpath_ebuf, "r"); - if (fp != NULL) - { - if (STRLEN(fullpath_ebuf) > LSIZE) - semsg(_(e_tag_file_path_truncated_for_str), ebuf); - vim_strncpy(tag_fname, fullpath_ebuf, - MAXPATHL); - ++incstack_idx; - is_etag = 0; // we can include anything - } - vim_free(fullpath_ebuf); - } - if (fp == NULL) - { - // Can't open the included file, skip it and - // restore old value of "fp". - fp = incstack[incstack_idx].fp; - vim_free(incstack[incstack_idx].etag_fname); - } - } - } - } - continue; - } -#endif - - /* - * When still at the start of the file, check for Emacs tags file - * format, and for "not sorted" flag. - */ - if (state == TS_START) - { - // The header ends when the line sorts below "!_TAG_". When - // case is folded lower case letters sort before "_". - if (STRNCMP(lbuf, "!_TAG_", 6) <= 0 - || (lbuf[0] == '!' && ASCII_ISLOWER(lbuf[1]))) - { - if (STRNCMP(lbuf, "!_TAG_", 6) != 0) - // Non-header item before the header, e.g. "!" itself. - goto parse_line; - - /* - * Read header line. - */ -#ifdef FEAT_TAG_BINS - if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) - tag_file_sorted = lbuf[18]; -#endif - if (STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) - { - // Prepare to convert every line from the specified - // encoding to 'encoding'. - for (p = lbuf + 20; *p > ' ' && *p < 127; ++p) - ; - *p = NUL; - convert_setup(&vimconv, lbuf + 20, p_enc); - } - - // Read the next line. Unrecognized flags are ignored. - continue; - } - - // Headers ends. - -#ifdef FEAT_TAG_BINS - /* - * When there is no tag head, or ignoring case, need to do a - * linear search. - * When no "!_TAG_" is found, default to binary search. If - * the tag file isn't sorted, the second loop will find it. - * When "!_TAG_FILE_SORTED" found: start binary search if - * flag set. - * For cscope, it's always linear. - */ -# ifdef FEAT_CSCOPE - if (linear || use_cscope) -# else - if (linear) -# endif - state = TS_LINEAR; - else if (tag_file_sorted == NUL) - state = TS_BINARY; - else if (tag_file_sorted == '1') - state = TS_BINARY; - else if (tag_file_sorted == '2') - { - state = TS_BINARY; - sortic = TRUE; - orgpat.regmatch.rm_ic = (p_ic || !noic); - } - else - state = TS_LINEAR; - - if (state == TS_BINARY && orgpat.regmatch.rm_ic && !sortic) - { - // Binary search won't work for ignoring case, use linear - // search. - linear = TRUE; - state = TS_LINEAR; - } -#else - state = TS_LINEAR; -#endif - -#ifdef FEAT_TAG_BINS - // When starting a binary search, get the size of the file and - // compute the first offset. - if (state == TS_BINARY) - { - if (vim_fseek(fp, 0L, SEEK_END) != 0) - // can't seek, don't use binary search - state = TS_LINEAR; - else - { - // Get the tag file size (don't use mch_fstat(), it's - // not portable). Don't use lseek(), it doesn't work - // properly on MacOS Catalina. - filesize = vim_ftell(fp); - vim_fseek(fp, 0L, SEEK_SET); - - // Calculate the first read offset in the file. Start - // the search in the middle of the file. - search_info.low_offset = 0; - search_info.low_char = 0; - search_info.high_offset = filesize; - search_info.curr_offset = 0; - search_info.high_char = 0xff; - } - continue; - } -#endif - } - -parse_line: - // When the line is too long the NUL will not be in the - // last-but-one byte (see vim_fgets()). - // Has been reported for Mozilla JS with extremely long names. - // In that case we need to increase lbuf_size. - if (lbuf[lbuf_size - 2] != NUL -#ifdef FEAT_CSCOPE - && !use_cscope -#endif - ) - { - lbuf_size *= 2; - vim_free(lbuf); - lbuf = alloc(lbuf_size); - if (lbuf == NULL) - goto findtag_end; - - if (state == TS_STEP_FORWARD) - // Seek to the same position to read the same line again - vim_fseek(fp, search_info.curr_offset, SEEK_SET); -#ifdef FEAT_TAG_BINS - // this will try the same thing again, make sure the offset is - // different - search_info.curr_offset = 0; -#endif - continue; - } - - /* - * Figure out where the different strings are in this line. - * For "normal" tags: Do a quick check if the tag matches. - * This speeds up tag searching a lot! - */ - if (orgpat.headlen -#ifdef FEAT_EMACS_TAGS - && !is_etag + || use_cscope #endif - ) - { - CLEAR_FIELD(tagp); - tagp.tagname = lbuf; - tagp.tagname_end = vim_strchr(lbuf, TAB); - if (tagp.tagname_end == NULL) - { - // Corrupted tag line. - line_error = TRUE; - break; - } - - /* - * Skip this line if the length of the tag is different and - * there is no regexp, or the tag is too short. - */ - cmplen = (int)(tagp.tagname_end - tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' - cmplen = p_tl; - if (has_re && orgpat.headlen < cmplen) - cmplen = orgpat.headlen; - else if (state == TS_LINEAR && orgpat.headlen != cmplen) - continue; - -#ifdef FEAT_TAG_BINS - if (state == TS_BINARY) - { - /* - * Simplistic check for unsorted tags file. - */ - i = (int)tagp.tagname[0]; - if (sortic) - i = (int)TOUPPER_ASC(tagp.tagname[0]); - if (i < search_info.low_char || i > search_info.high_char) - sort_error = TRUE; - - /* - * Compare the current tag with the searched tag. - */ - if (sortic) - tagcmp = tag_strnicmp(tagp.tagname, orgpat.head, - (size_t)cmplen); - else - tagcmp = STRNCMP(tagp.tagname, orgpat.head, cmplen); - - /* - * A match with a shorter tag means to search forward. - * A match with a longer tag means to search backward. - */ - if (tagcmp == 0) - { - if (cmplen < orgpat.headlen) - tagcmp = -1; - else if (cmplen > orgpat.headlen) - tagcmp = 1; - } - - if (tagcmp == 0) - { - // We've located the tag, now skip back and search - // forward until the first matching tag is found. - state = TS_SKIP_BACK; - search_info.match_offset = search_info.curr_offset; - continue; - } - if (tagcmp < 0) - { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset < search_info.high_offset) - { - search_info.low_offset = search_info.curr_offset; - if (sortic) - search_info.low_char = - TOUPPER_ASC(tagp.tagname[0]); - else - search_info.low_char = tagp.tagname[0]; - continue; - } - } - if (tagcmp > 0 - && search_info.curr_offset != search_info.high_offset) - { - search_info.high_offset = search_info.curr_offset; - if (sortic) - search_info.high_char = - TOUPPER_ASC(tagp.tagname[0]); - else - search_info.high_char = tagp.tagname[0]; - continue; - } - - // No match yet and are at the end of the binary search. - break; - } - else if (state == TS_SKIP_BACK) - { - if (MB_STRNICMP(tagp.tagname, orgpat.head, cmplen) != 0) - state = TS_STEP_FORWARD; - else - // Have to skip back more. Restore the curr_offset - // used, otherwise we get stuck at a long line. - search_info.curr_offset = search_info.curr_offset_used; - continue; - } - else if (state == TS_STEP_FORWARD) - { - if (MB_STRNICMP(tagp.tagname, orgpat.head, cmplen) != 0) - { - if ((off_T)vim_ftell(fp) > search_info.match_offset) - break; // past last match - else - continue; // before first match - } - } - else -#endif - // skip this match if it can't match - if (MB_STRNICMP(tagp.tagname, orgpat.head, cmplen) != 0) - continue; - - /* - * Can be a matching tag, isolate the file name and command. - */ - tagp.fname = tagp.tagname_end + 1; - tagp.fname_end = vim_strchr(tagp.fname, TAB); - tagp.command = tagp.fname_end + 1; - if (tagp.fname_end == NULL) - i = FAIL; - else - i = OK; - } - else - i = parse_tag_line(lbuf, -#ifdef FEAT_EMACS_TAGS - is_etag, -#endif - &tagp); - if (i == FAIL) - { - line_error = TRUE; - break; - } - -#ifdef FEAT_EMACS_TAGS - if (is_etag) - tagp.fname = ebuf; -#endif - /* - * First try matching with the pattern literally (also when it is - * a regexp). - */ - cmplen = (int)(tagp.tagname_end - tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' - cmplen = p_tl; - // if tag length does not match, don't try comparing - if (orgpat.len != cmplen) - match = FALSE; - else - { - if (orgpat.regmatch.rm_ic) - { - match = (MB_STRNICMP(tagp.tagname, orgpat.pat, cmplen) == 0); - if (match) - match_no_ic = (STRNCMP(tagp.tagname, orgpat.pat, - cmplen) == 0); - } - else - match = (STRNCMP(tagp.tagname, orgpat.pat, cmplen) == 0); - } - - /* - * Has a regexp: Also find tags matching regexp. - */ - match_re = FALSE; - if (!match && orgpat.regmatch.regprog != NULL) - { - int cc; - - cc = *tagp.tagname_end; - *tagp.tagname_end = NUL; - match = vim_regexec(&orgpat.regmatch, tagp.tagname, (colnr_T)0); - if (match) - { - matchoff = (int)(orgpat.regmatch.startp[0] - tagp.tagname); - if (orgpat.regmatch.rm_ic) - { - orgpat.regmatch.rm_ic = FALSE; - match_no_ic = vim_regexec(&orgpat.regmatch, tagp.tagname, - (colnr_T)0); - orgpat.regmatch.rm_ic = TRUE; - } - } - *tagp.tagname_end = cc; - match_re = TRUE; - } - - /* - * If a match is found, add it to ht_match[] and ga_match[]. - */ - if (match) - { - int len = 0; - -#ifdef FEAT_CSCOPE - if (use_cscope) - { - // Don't change the ordering, always use the same table. - mtt = MT_GL_OTH; - } - else -#endif - { - // Decide in which array to store this match. - is_current = test_for_current( -#ifdef FEAT_EMACS_TAGS - is_etag, -#endif - tagp.fname, tagp.fname_end, tag_fname, - buf_ffname); -#ifdef FEAT_EMACS_TAGS - is_static = FALSE; - if (!is_etag) // emacs tags are never static -#endif - is_static = test_for_static(&tagp); - - // decide in which of the sixteen tables to store this - // match - if (is_static) - { - if (is_current) - mtt = MT_ST_CUR; - else - mtt = MT_ST_OTH; - } - else - { - if (is_current) - mtt = MT_GL_CUR; - else - mtt = MT_GL_OTH; - } - if (orgpat.regmatch.rm_ic && !match_no_ic) - mtt += MT_IC_OFF; - if (match_re) - mtt += MT_RE_OFF; - } - - /* - * Add the found match in ht_match[mtt] and ga_match[mtt]. - * Store the info we need later, which depends on the kind of - * tags we are dealing with. - */ - if (help_only) - { -#ifdef FEAT_MULTI_LANG -# define ML_EXTRA 3 -#else -# define ML_EXTRA 0 -#endif - /* - * Append the help-heuristic number after the tagname, for - * sorting it later. The heuristic is ignored for - * detecting duplicates. - * The format is {tagname}@{lang}NUL{heuristic}NUL - */ - *tagp.tagname_end = NUL; - len = (int)(tagp.tagname_end - tagp.tagname); - mfp = alloc(sizeof(char_u) + len + 10 + ML_EXTRA + 1); - if (mfp != NULL) - { - int heuristic; - - p = mfp; - STRCPY(p, tagp.tagname); -#ifdef FEAT_MULTI_LANG - p[len] = '@'; - STRCPY(p + len + 1, help_lang); -#endif - - heuristic = help_heuristic(tagp.tagname, - match_re ? matchoff : 0, !match_no_ic); -#ifdef FEAT_MULTI_LANG - heuristic += help_pri; -#endif - sprintf((char *)p + len + 1 + ML_EXTRA, "%06d", - heuristic); - } - *tagp.tagname_end = TAB; - } - else if (name_only) - { - if (get_it_again) - { - char_u *temp_end = tagp.command; - - if (*temp_end == '/') - while (*temp_end && *temp_end != '\r' - && *temp_end != '\n' - && *temp_end != '$') - temp_end++; - - if (tagp.command + 2 < temp_end) - { - len = (int)(temp_end - tagp.command - 2); - mfp = alloc(len + 2); - if (mfp != NULL) - vim_strncpy(mfp, tagp.command + 2, len); - } - else - mfp = NULL; - get_it_again = FALSE; - } - else - { - len = (int)(tagp.tagname_end - tagp.tagname); - mfp = alloc(sizeof(char_u) + len + 1); - if (mfp != NULL) - vim_strncpy(mfp, tagp.tagname, len); - - // if wanted, re-read line to get long form too - if (State & INSERT) - get_it_again = p_sft; - } - } - else - { - size_t tag_fname_len = STRLEN(tag_fname); -#ifdef FEAT_EMACS_TAGS - size_t ebuf_len = 0; -#endif - - // Save the tag in a buffer. - // Use 0x02 to separate fields (Can't use NUL because the - // hash key is terminated by NUL, or Ctrl_A because that is - // part of some Emacs tag files -- see parse_tag_line). - // Emacs tag: <0x02><0x02> - // other tag: <0x02><0x02> - // without Emacs tags: <0x02> - // Here is the "mtt" value plus 1 to avoid NUL. - len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3; -#ifdef FEAT_EMACS_TAGS - if (is_etag) - { - ebuf_len = STRLEN(ebuf); - len += (int)ebuf_len + 1; - } - else - ++len; -#endif - mfp = alloc(sizeof(char_u) + len + 1); - if (mfp != NULL) - { - p = mfp; - p[0] = mtt + 1; - STRCPY(p + 1, tag_fname); -#ifdef BACKSLASH_IN_FILENAME - // Ignore differences in slashes, avoid adding - // both path/file and path\file. - slash_adjust(p + 1); -#endif - p[tag_fname_len + 1] = TAG_SEP; - s = p + 1 + tag_fname_len + 1; -#ifdef FEAT_EMACS_TAGS - if (is_etag) - { - STRCPY(s, ebuf); - s[ebuf_len] = TAG_SEP; - s += ebuf_len + 1; - } - else - *s++ = TAG_SEP; -#endif - STRCPY(s, lbuf); - } - } - - if (mfp != NULL) - { - hashitem_T *hi; - - /* - * Don't add identical matches. - * Add all cscope tags, because they are all listed. - * "mfp" is used as a hash key, there is a NUL byte to end - * the part that matters for comparing, more bytes may - * follow after it. E.g. help tags store the priority - * after the NUL. - */ -#ifdef FEAT_CSCOPE - if (use_cscope) - hash++; - else -#endif - hash = hash_hash(mfp); - hi = hash_lookup(&ht_match[mtt], mfp, hash); - if (HASHITEM_EMPTY(hi)) - { - if (hash_add_item(&ht_match[mtt], hi, mfp, hash) - == FAIL - || ga_grow(&ga_match[mtt], 1) != OK) - { - // Out of memory! Just forget about the rest. - retval = OK; - stop_searching = TRUE; - break; - } - else - { - ((char_u **)(ga_match[mtt].ga_data)) - [ga_match[mtt].ga_len++] = mfp; - ++match_count; - } - } - else - // duplicate tag, drop it - vim_free(mfp); - } - } -#ifdef FEAT_CSCOPE - if (use_cscope && eof) - break; -#endif - } // forever - - if (line_error) - { - semsg(_(e_format_error_in_tags_file_str), tag_fname); -#ifdef FEAT_CSCOPE - if (!use_cscope) -#endif - semsg(_("Before byte %ld"), (long)vim_ftell(fp)); - stop_searching = TRUE; - line_error = FALSE; - } - -#ifdef FEAT_CSCOPE - if (!use_cscope) -#endif - fclose(fp); -#ifdef FEAT_EMACS_TAGS - while (incstack_idx) - { - --incstack_idx; - fclose(incstack[incstack_idx].fp); - vim_free(incstack[incstack_idx].etag_fname); - } -#endif - if (vimconv.vc_type != CONV_NONE) - convert_setup(&vimconv, NULL, NULL); - -#ifdef FEAT_TAG_BINS - tag_file_sorted = NUL; - if (sort_error) - { - semsg(_(e_tags_file_not_sorted_str), tag_fname); - sort_error = FALSE; - } -#endif - - /* - * Stop searching if sufficient tags have been found. - */ - if (match_count >= mincount) - { - retval = OK; - stop_searching = TRUE; - } - -#ifdef FEAT_CSCOPE - if (stop_searching || use_cscope) -#else - if (stop_searching) -#endif - break; - + ) + { + retval = OK; + break; + } } // end of for-each-file loop #ifdef FEAT_CSCOPE @@ -2808,31 +2940,34 @@ parse_line: tagname_free(&tn); #ifdef FEAT_TAG_BINS - // stop searching when already did a linear search, or when TAG_NOIC - // used, and 'ignorecase' not set or already did case-ignore search - if (stop_searching || linear || (!p_ic && noic) || orgpat.regmatch.rm_ic) - break; + // stop searching when already did a linear search, or when TAG_NOIC + // used, and 'ignorecase' not set or already did case-ignore search + if (st.stop_searching || st.linear || (!p_ic && noic) || + st.orgpat.regmatch.rm_ic) + break; # ifdef FEAT_CSCOPE - if (use_cscope) - break; + if (use_cscope) + break; # endif - orgpat.regmatch.rm_ic = TRUE; // try another time while ignoring case + + // try another time while ignoring case + st.orgpat.regmatch.rm_ic = TRUE; } #endif - if (!stop_searching) + if (!st.stop_searching) { - if (!did_open && verbose) // never opened any tags file + if (!st.did_open && verbose) // never opened any tags file emsg(_(e_no_tags_file)); retval = OK; // It's OK even when no tag found } findtag_end: - vim_free(lbuf); - vim_regfree(orgpat.regmatch.regprog); + vim_free(st.lbuf); + vim_regfree(st.orgpat.regmatch.regprog); vim_free(tag_fname); #ifdef FEAT_EMACS_TAGS - vim_free(ebuf); + vim_free(st.ebuf); #endif /* @@ -2840,42 +2975,9 @@ findtag_end: * matches. When retval == FAIL, free the matches. */ if (retval == FAIL) - match_count = 0; - - if (match_count > 0) - matches = ALLOC_MULT(char_u *, match_count); - else - matches = NULL; - match_count = 0; - for (mtt = 0; mtt < MT_COUNT; ++mtt) - { - for (i = 0; i < ga_match[mtt].ga_len; ++i) - { - mfp = ((char_u **)(ga_match[mtt].ga_data))[i]; - if (matches == NULL) - vim_free(mfp); - else - { - if (!name_only) - { - // Change mtt back to zero-based. - *mfp = *mfp - 1; - - // change the TAG_SEP back to NUL - for (p = mfp + 1; *p != NUL; ++p) - if (*p == TAG_SEP) - *p = NUL; - } - matches[match_count++] = mfp; - } - } - - ga_clear(&ga_match[mtt]); - hash_clear(&ht_match[mtt]); - } - - *matchesp = matches; - *num_matches = match_count; + st.match_count = 0; + + findtags_copy_matches(&st, matchesp, num_matches, name_only); curbuf->b_help = help_save; #ifdef FEAT_MULTI_LANG diff --git a/src/testdir/test_tagjump.vim b/src/testdir/test_tagjump.vim --- a/src/testdir/test_tagjump.vim +++ b/src/testdir/test_tagjump.vim @@ -1427,6 +1427,11 @@ func Test_tagfile_errors() endtry call assert_equal(v:true, caught_431) + " tag name and file name are not separated by a tab + call writefile(["!_TAG_FILE_ENCODING\tutf-8\t//", + \ "foo Xfile 1"], 'Xtags') + call assert_fails('tag foo', 'E431:') + call delete('Xtags') call delete('Xfile') set tags& diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -755,6 +755,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 4494, +/**/ 4493, /**/ 4492,