# HG changeset patch # User Bram Moolenaar # Date 1647199803 -3600 # Node ID a1914b89f0b70ee6a4146678cee0724186203950 # Parent 876fd6788230ed22311ab3ba5b10799ae5c4dae4 patch 8.2.4562: linear tag search is not optimal Commit: https://github.com/vim/vim/commit/b29b96806f1472371fb3cc01d48394e00b95cfc8 Author: Yegappan Lakshmanan Date: Sun Mar 13 19:23:48 2022 +0000 patch 8.2.4562: linear tag search is not optimal Problem: Linear tag search is not optimal. Solution: Improve linear tag search performance. (Yegappan Lakshmanan, closes #9944) diff --git a/src/tag.c b/src/tag.c --- a/src/tag.c +++ b/src/tag.c @@ -1642,34 +1642,34 @@ find_tagfunc_tags( typedef struct { tagsearch_state_T state; // tag search state + int stop_searching; // stop when match found or error + pat_T *orgpat; // holds unconverted pattern info + char_u *lbuf; // line buffer + int lbuf_size; // length of lbuf char_u *tag_fname; // name of the tag file FILE *fp; // current tags file pointer - pat_T orgpat; // holds unconverted pattern info int flags; // flags used for tag search int tag_file_sorted; // !_TAG_FILE_SORTED value int get_searchpat; // used for 'showfulltag' int help_only; // only search for help tags + int did_open; // did open a tag file + int mincount; // MAXCOL: find all matches + // other: minimal number of matches + int linear; // do a linear search vimconv_T vimconv; +#ifdef FEAT_EMACS_TAGS + int is_etag; // current file is emacs style + char_u *ebuf; // additional buffer for etag fname +#endif #ifdef FEAT_MULTI_LANG char_u help_lang[3]; // lang of current tags file int help_pri; // help language priority char_u *help_lang_find; // lang to be found int is_txt; // flag of file extension #endif - int did_open; // did open a tag file - int mincount; // MAXCOL: find all matches - // other: minimal number of matches - int linear; // do a linear search - char_u *lbuf; // line buffer - int lbuf_size; // length of lbuf -#ifdef FEAT_EMACS_TAGS - int is_etag; // current file is emacs style - char_u *ebuf; // additional buffer for etag fname -#endif int match_count; // number of matches found garray_T ga_match[MT_COUNT]; // stores matches in sequence hashtab_T ht_match[MT_COUNT]; // stores matches by key - int stop_searching; // stop when match found or error } findtags_state_T; /* @@ -1687,9 +1687,10 @@ findtags_state_init( st->tag_fname = alloc(MAXPATHL + 1); st->fp = NULL; - st->orgpat.pat = pat; - st->orgpat.len = (int)STRLEN(pat); - st->orgpat.regmatch.regprog = NULL; + st->orgpat = ALLOC_ONE(pat_T); + st->orgpat->pat = pat; + st->orgpat->len = (int)STRLEN(pat); + st->orgpat->regmatch.regprog = NULL; st->flags = flags; st->tag_file_sorted = NUL; st->help_only = (flags & TAG_HELP); @@ -1736,7 +1737,8 @@ findtags_state_free(findtags_state_T *st { vim_free(st->tag_fname); vim_free(st->lbuf); - vim_regfree(st->orgpat.regmatch.regprog); + vim_regfree(st->orgpat->regmatch.regprog); + vim_free(st->orgpat); #ifdef FEAT_EMACS_TAGS vim_free(st->ebuf); #endif @@ -1940,6 +1942,7 @@ emacs_tags_file_eof(findtags_state_T *st st->fp = incstack[incstack_idx].fp; STRCPY(st->tag_fname, incstack[incstack_idx].etag_fname); vim_free(incstack[incstack_idx].etag_fname); + st->is_etag = TRUE; // (only etags can include) return TRUE; } @@ -2099,12 +2102,9 @@ findtags_get_next_line(findtags_state_T { #ifdef FEAT_EMACS_TAGS if (emacs_tags_file_eof(st) == TRUE) - { // an included tags file. Continue processing the parent // tags file. - st->is_etag = TRUE; // (only etags can include) return TAGS_READ_IGNORE; - } #endif return TAGS_READ_EOF; } @@ -2193,12 +2193,12 @@ findtags_start_state_handler( { st->state = TS_BINARY; *sortic = TRUE; - st->orgpat.regmatch.rm_ic = (p_ic || !noic); + st->orgpat->regmatch.rm_ic = (p_ic || !noic); } else st->state = TS_LINEAR; - if (st->state == TS_BINARY && st->orgpat.regmatch.rm_ic && !*sortic) + if (st->state == TS_BINARY && st->orgpat->regmatch.rm_ic && !*sortic) { // Binary search won't work for ignoring case, use linear // search. @@ -2241,14 +2241,21 @@ findtags_start_state_handler( * Returns FAIL if a format error is encountered. */ static int -findtags_parse_line(findtags_state_T *st, tagptrs_T *tagpp) +findtags_parse_line( + findtags_state_T *st, + tagptrs_T *tagpp, + findtags_match_args_T *margs, + tagsearch_info_T *sinfo_p) { int status; + int i; + int cmplen; + int tagcmp; // Figure out where the different strings are in this line. // For "normal" tags: Do a quick check if the tag matches. // This speeds up tag searching a lot! - if (st->orgpat.headlen + if (st->orgpat->headlen #ifdef FEAT_EMACS_TAGS && !st->is_etag #endif @@ -2258,89 +2265,18 @@ findtags_parse_line(findtags_state_T *st tagpp->tagname = st->lbuf; tagpp->tagname_end = vim_strchr(st->lbuf, TAB); if (tagpp->tagname_end == NULL) - { // Corrupted tag line. - return FAIL; - } - - // Can be a matching tag, isolate the file name and command. - tagpp->fname = tagpp->tagname_end + 1; - tagpp->fname_end = vim_strchr(tagpp->fname, TAB); - if (tagpp->fname_end == NULL) - status = FAIL; - else - { - tagpp->command = tagpp->fname_end + 1; - status = OK; - } - } - else - status = parse_tag_line(st->lbuf, -#ifdef FEAT_EMACS_TAGS - st->is_etag, -#endif - tagpp); - - if (status == FAIL) - return FAIL; - -#ifdef FEAT_EMACS_TAGS - if (st->is_etag) - tagpp->fname = st->ebuf; -#endif - - return OK; -} - -/* - * Initialize the structure used for tag matching. - */ - static void -findtags_matchargs_init(findtags_match_args_T *margs, int flags) -{ - margs->matchoff = 0; // match offset - margs->match_re = FALSE; // match with regexp - margs->match_no_ic = FALSE; // matches with case - margs->has_re = (flags & TAG_REGEXP); // regexp used - margs->sortic = FALSE; // tag file sorted in nocase - margs->sort_error = FALSE; // tags file not sorted -} - -/* - * Compares the tag name in 'tagpp->tagname' with a search pattern in - * 'st->orgpat.head'. - * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag - * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a - * binary search) and TAG_MATCH_STOP if all the tags are processed without a - * match. Uses the values in 'margs' for doing the comparison. - */ - static tagmatch_status_T -findtags_match_tag( - findtags_state_T *st, - tagptrs_T *tagpp, - findtags_match_args_T *margs, - tagsearch_info_T *sinfo_p) -{ - int match = FALSE; - int cmplen; - int i; - int tagcmp; - - // Skip this line if the length of the tag is different and - // there is no regexp, or the tag is too short. - if (st->orgpat.headlen -#ifdef FEAT_EMACS_TAGS - && !st->is_etag -#endif - ) - { + return TAG_MATCH_FAIL; + + // Skip this line if the length of the tag is different and + // there is no regexp, or the tag is too short. cmplen = (int)(tagpp->tagname_end - tagpp->tagname); if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; - if (margs->has_re && st->orgpat.headlen < cmplen) - cmplen = st->orgpat.headlen; - else if (st->state == TS_LINEAR && st->orgpat.headlen != cmplen) - return TAG_MATCH_FAIL; + if ((st->flags & TAG_REGEXP) && st->orgpat->headlen < cmplen) + cmplen = st->orgpat->headlen; + else if (st->state == TS_LINEAR && st->orgpat->headlen != cmplen) + return TAG_MATCH_NEXT; if (st->state == TS_BINARY) { @@ -2353,18 +2289,18 @@ findtags_match_tag( // Compare the current tag with the searched tag. if (margs->sortic) - tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat.head, + tagcmp = tag_strnicmp(tagpp->tagname, st->orgpat->head, (size_t)cmplen); else - tagcmp = STRNCMP(tagpp->tagname, st->orgpat.head, cmplen); + tagcmp = STRNCMP(tagpp->tagname, st->orgpat->head, cmplen); // A match with a shorter tag means to search forward. // A match with a longer tag means to search backward. if (tagcmp == 0) { - if (cmplen < st->orgpat.headlen) + if (cmplen < st->orgpat->headlen) tagcmp = -1; - else if (cmplen > st->orgpat.headlen) + else if (cmplen > st->orgpat->headlen) tagcmp = 1; } @@ -2404,7 +2340,7 @@ findtags_match_tag( } else if (st->state == TS_SKIP_BACK) { - if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) + if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) st->state = TS_STEP_FORWARD; else // Have to skip back more. Restore the curr_offset @@ -2414,7 +2350,7 @@ findtags_match_tag( } else if (st->state == TS_STEP_FORWARD) { - if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) + if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) { if ((off_T)vim_ftell(st->fp) > sinfo_p->match_offset) return TAG_MATCH_STOP; // past last match @@ -2424,9 +2360,68 @@ findtags_match_tag( } else // skip this match if it can't match - if (MB_STRNICMP(tagpp->tagname, st->orgpat.head, cmplen) != 0) - return TAG_MATCH_FAIL; + if (MB_STRNICMP(tagpp->tagname, st->orgpat->head, cmplen) != 0) + return TAG_MATCH_NEXT; + + // Can be a matching tag, isolate the file name and command. + tagpp->fname = tagpp->tagname_end + 1; + tagpp->fname_end = vim_strchr(tagpp->fname, TAB); + if (tagpp->fname_end == NULL) + status = FAIL; + else + { + tagpp->command = tagpp->fname_end + 1; + status = OK; + } } + else + status = parse_tag_line(st->lbuf, +#ifdef FEAT_EMACS_TAGS + st->is_etag, +#endif + tagpp); + + if (status == FAIL) + return TAG_MATCH_FAIL; + +#ifdef FEAT_EMACS_TAGS + if (st->is_etag) + tagpp->fname = st->ebuf; +#endif + + return TAG_MATCH_SUCCESS; +} + +/* + * Initialize the structure used for tag matching. + */ + static void +findtags_matchargs_init(findtags_match_args_T *margs, int flags) +{ + margs->matchoff = 0; // match offset + margs->match_re = FALSE; // match with regexp + margs->match_no_ic = FALSE; // matches with case + margs->has_re = (flags & TAG_REGEXP); // regexp used + margs->sortic = FALSE; // tag file sorted in nocase + margs->sort_error = FALSE; // tags file not sorted +} + +/* + * Compares the tag name in 'tagpp->tagname' with a search pattern in + * 'st->orgpat.head'. + * Returns TAG_MATCH_SUCCESS if the tag matches, TAG_MATCH_FAIL if the tag + * doesn't match, TAG_MATCH_NEXT to look for the next matching tag (used in a + * binary search) and TAG_MATCH_STOP if all the tags are processed without a + * match. Uses the values in 'margs' for doing the comparison. + */ + static tagmatch_status_T +findtags_match_tag( + findtags_state_T *st, + tagptrs_T *tagpp, + findtags_match_args_T *margs) +{ + int match = FALSE; + int cmplen; // First try matching with the pattern literally (also when it is // a regexp). @@ -2434,41 +2429,41 @@ findtags_match_tag( if (p_tl != 0 && cmplen > p_tl) // adjust for 'taglength' cmplen = p_tl; // if tag length does not match, don't try comparing - if (st->orgpat.len != cmplen) + if (st->orgpat->len != cmplen) match = FALSE; else { - if (st->orgpat.regmatch.rm_ic) + if (st->orgpat->regmatch.rm_ic) { match = - (MB_STRNICMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); + (MB_STRNICMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); if (match) margs->match_no_ic = - (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); + (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); } else - match = (STRNCMP(tagpp->tagname, st->orgpat.pat, cmplen) == 0); + match = (STRNCMP(tagpp->tagname, st->orgpat->pat, cmplen) == 0); } // Has a regexp: Also find tags matching regexp. margs->match_re = FALSE; - if (!match && st->orgpat.regmatch.regprog != NULL) + if (!match && st->orgpat->regmatch.regprog != NULL) { int cc; cc = *tagpp->tagname_end; *tagpp->tagname_end = NUL; - match = vim_regexec(&st->orgpat.regmatch, tagpp->tagname, (colnr_T)0); + match = vim_regexec(&st->orgpat->regmatch, tagpp->tagname, (colnr_T)0); if (match) { - margs->matchoff = (int)(st->orgpat.regmatch.startp[0] - + margs->matchoff = (int)(st->orgpat->regmatch.startp[0] - tagpp->tagname); - if (st->orgpat.regmatch.rm_ic) + if (st->orgpat->regmatch.rm_ic) { - st->orgpat.regmatch.rm_ic = FALSE; - margs->match_no_ic = vim_regexec(&st->orgpat.regmatch, + st->orgpat->regmatch.rm_ic = FALSE; + margs->match_no_ic = vim_regexec(&st->orgpat->regmatch, tagpp->tagname, (colnr_T)0); - st->orgpat.regmatch.rm_ic = TRUE; + st->orgpat->regmatch.rm_ic = TRUE; } } *tagpp->tagname_end = cc; @@ -2571,7 +2566,7 @@ findtags_add_match( else mtt = MT_GL_OTH; } - if (st->orgpat.regmatch.rm_ic && !margs->match_no_ic) + if (st->orgpat->regmatch.rm_ic && !margs->match_no_ic) mtt += MT_IC_OFF; if (margs->match_re) mtt += MT_RE_OFF; @@ -2857,7 +2852,12 @@ line_read_in: continue; } - if (findtags_parse_line(st, &tagp) == FAIL) + retval = findtags_parse_line(st, &tagp, margs, &search_info); + if (retval == TAG_MATCH_NEXT) + continue; + if (retval == TAG_MATCH_STOP) + break; + if (retval == TAG_MATCH_FAIL) { semsg(_(e_format_error_in_tags_file_str), st->tag_fname); #ifdef FEAT_CSCOPE @@ -2868,7 +2868,7 @@ line_read_in: return; } - retval = findtags_match_tag(st, &tagp, margs, &search_info); + retval = findtags_match_tag(st, &tagp, margs); if (retval == TAG_MATCH_NEXT) continue; if (retval == TAG_MATCH_STOP) @@ -2895,7 +2895,7 @@ findtags_in_file(findtags_state_T *st, c { findtags_match_args_T margs; #ifdef FEAT_CSCOPE - int use_cscope = FALSE; + int use_cscope = (st->flags & TAG_CSCOPE); #endif st->vimconv.vc_type = CONV_NONE; @@ -2906,7 +2906,6 @@ findtags_in_file(findtags_state_T *st, c // A file that doesn't exist is silently ignored. Only when not a // single file is found, an error message is given (further on). #ifdef FEAT_CSCOPE - use_cscope = (st->flags & TAG_CSCOPE); if (use_cscope) st->fp = NULL; // avoid GCC warning else @@ -3111,28 +3110,28 @@ find_tags( { // When "@ab" is specified use only the "ab" language, otherwise // search all languages. - if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@' - && ASCII_ISALPHA(pat[st.orgpat.len - 2]) - && ASCII_ISALPHA(pat[st.orgpat.len - 1])) + if (st.orgpat->len > 3 && pat[st.orgpat->len - 3] == '@' + && ASCII_ISALPHA(pat[st.orgpat->len - 2]) + && ASCII_ISALPHA(pat[st.orgpat->len - 1])) { - saved_pat = vim_strnsave(pat, st.orgpat.len - 3); + saved_pat = vim_strnsave(pat, st.orgpat->len - 3); if (saved_pat != NULL) { - st.help_lang_find = &pat[st.orgpat.len - 2]; - st.orgpat.pat = saved_pat; - st.orgpat.len -= 3; + st.help_lang_find = &pat[st.orgpat->len - 2]; + st.orgpat->pat = saved_pat; + st.orgpat->len -= 3; } } } #endif - if (p_tl != 0 && st.orgpat.len > p_tl) // adjust for 'taglength' - st.orgpat.len = p_tl; + if (p_tl != 0 && st.orgpat->len > p_tl) // adjust for 'taglength' + st.orgpat->len = p_tl; save_emsg_off = emsg_off; emsg_off = TRUE; // don't want error for invalid RE here - prepare_pats(&st.orgpat, has_re); + prepare_pats(st.orgpat, has_re); emsg_off = save_emsg_off; - if (has_re && st.orgpat.regmatch.regprog == NULL) + if (has_re && st.orgpat->regmatch.regprog == NULL) goto findtag_end; #ifdef FEAT_EVAL @@ -3164,11 +3163,11 @@ find_tags( * When the tag file is case-fold sorted, it is either one or the other. * Only ignore case when TAG_NOIC not used or 'ignorecase' set. */ - st.orgpat.regmatch.rm_ic = ((p_ic || !noic) - && (findall || st.orgpat.headlen == 0 || !p_tbs)); + st.orgpat->regmatch.rm_ic = ((p_ic || !noic) + && (findall || st.orgpat->headlen == 0 || !p_tbs)); for (round = 1; round <= 2; ++round) { - st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2); + st.linear = (st.orgpat->headlen == 0 || !p_tbs || round == 2); /* * Try tag file names from tags option one by one. @@ -3200,7 +3199,7 @@ find_tags( // stop searching when already did a linear search, or when TAG_NOIC // used, and 'ignorecase' not set or already did case-ignore search if (st.stop_searching || st.linear || (!p_ic && noic) || - st.orgpat.regmatch.rm_ic) + st.orgpat->regmatch.rm_ic) break; # ifdef FEAT_CSCOPE if (use_cscope) @@ -3208,7 +3207,7 @@ find_tags( # endif // try another time while ignoring case - st.orgpat.regmatch.rm_ic = TRUE; + st.orgpat->regmatch.rm_ic = TRUE; } if (!st.stop_searching) diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -751,6 +751,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 4562, +/**/ 4561, /**/ 4560,