Mercurial > vim
diff src/search.c @ 22689:f8bf2c122452 v8.2.1893
patch 8.2.1893: fuzzy matching does not support multiple words
Commit: https://github.com/vim/vim/commit/8ded5b647aa4b3338da721b343e0bce0f86655f6
Author: Bram Moolenaar <Bram@vim.org>
Date: Fri Oct 23 16:50:30 2020 +0200
patch 8.2.1893: fuzzy matching does not support multiple words
Problem: Fuzzy matching does not support multiple words.
Solution: Add support for matching white space separated words. (Yegappan
Lakshmanan, closes #7163)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Fri, 23 Oct 2020 17:00:04 +0200 |
parents | 0dd527d9c62d |
children | e82579016863 |
line wrap: on
line diff
--- a/src/search.c +++ b/src/search.c @@ -4203,16 +4203,16 @@ the_end: * Ported from the lib_fts library authored by Forrest Smith. * https://github.com/forrestthewoods/lib_fts/tree/master/code * - * Blog describing the algorithm: + * The following blog describes the fuzzy matching algorithm: * https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ * * Each matching string is assigned a score. The following factors are checked: - * Matched letter - * Unmatched letter - * Consecutively matched letters - * Proximity to start - * Letter following a separator (space, underscore) - * Uppercase letter following lowercase (aka CamelCase) + * - Matched letter + * - Unmatched letter + * - Consecutively matched letters + * - Proximity to start + * - Letter following a separator (space, underscore) + * - Uppercase letter following lowercase (aka CamelCase) * * Matched letters are good. Unmatched letters are bad. Matching near the start * is good. Matching the first letter in the middle of a phrase is good. @@ -4222,16 +4222,17 @@ the_end: * File paths are different from file names. File extensions may be ignorable. * Single words care about consecutive matches but not separators or camel * case. - * Score starts at 0 + * Score starts at 100 * Matched letter: +0 points * Unmatched letter: -1 point - * Consecutive match bonus: +5 points - * Separator bonus: +10 points - * Camel case bonus: +10 points - * Unmatched leading letter: -3 points (max: -9) + * Consecutive match bonus: +15 points + * First letter bonus: +15 points + * Separator bonus: +30 points + * Camel case bonus: +30 points + * Unmatched leading letter: -5 points (max: -15) * * There is some nuance to this. Scores don’t have an intrinsic meaning. The - * score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a + * score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a * lower minimum score due to unmatched letter penalty. Longer search patterns * have a higher maximum score due to match bonuses. * @@ -4247,6 +4248,7 @@ the_end: */ typedef struct { + int idx; // used for stable sort listitem_T *item; int score; list_T *lmatchpos; @@ -4267,6 +4269,8 @@ typedef struct #define MAX_LEADING_LETTER_PENALTY -15 // penalty for every letter that doesn't match #define UNMATCHED_LETTER_PENALTY -1 +// penalty for gap in matching positions (-2 * k) +#define GAP_PENALTY -2 // Score for a string that doesn't fuzzy match the pattern #define SCORE_NONE -9999 @@ -4319,6 +4323,8 @@ fuzzy_match_compute_score( // Sequential if (currIdx == (prevIdx + 1)) score += SEQUENTIAL_BONUS; + else + score += GAP_PENALTY * (currIdx - prevIdx); } // Check for bonuses based on neighbor character value @@ -4334,7 +4340,7 @@ fuzzy_match_compute_score( while (sidx < currIdx) { neighbor = (*mb_ptr2char)(p); - (void)mb_ptr2char_adv(&p); + MB_PTR_ADV(p); sidx++; } curr = (*mb_ptr2char)(p); @@ -4362,6 +4368,10 @@ fuzzy_match_compute_score( return score; } +/* + * Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. + * Return the number of matching characters. + */ static int fuzzy_match_recursive( char_u *fuzpat, @@ -4386,11 +4396,11 @@ fuzzy_match_recursive( // Count recursions ++*recursionCount; if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) - return FALSE; + return 0; // Detect end of strings if (*fuzpat == '\0' || *str == '\0') - return FALSE; + return 0; // Loop through fuzpat and str looking for a match first_match = TRUE; @@ -4411,7 +4421,7 @@ fuzzy_match_recursive( // Supplied matches buffer was too short if (nextMatch >= maxMatches) - return FALSE; + return 0; // "Copy-on-Write" srcMatches into matches if (first_match && srcMatches) @@ -4444,12 +4454,12 @@ fuzzy_match_recursive( // Advance matches[nextMatch++] = strIdx; if (has_mbyte) - (void)mb_ptr2char_adv(&fuzpat); + MB_PTR_ADV(fuzpat); else ++fuzpat; } if (has_mbyte) - (void)mb_ptr2char_adv(&str); + MB_PTR_ADV(str); else ++str; strIdx++; @@ -4469,12 +4479,12 @@ fuzzy_match_recursive( // Recursive score is better than "this" memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); *outScore = bestRecursiveScore; - return TRUE; + return nextMatch; } else if (matched) - return TRUE; // "this" score is better than recursive - - return FALSE; // no match + return nextMatch; // "this" score is better than recursive + + return 0; // no match } /* @@ -4485,45 +4495,110 @@ fuzzy_match_recursive( * Scores values have no intrinsic meaning. Possible score range is not * normalized and varies with pattern. * Recursion is limited internally (default=10) to prevent degenerate cases - * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). + * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES * characters. * - * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in + * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in * 'outScore' and the matching character positions in 'matches'. */ static int fuzzy_match( char_u *str, - char_u *fuzpat, + char_u *pat_arg, + int matchseq, int *outScore, matchidx_T *matches, int maxMatches) { int recursionCount = 0; int len = MB_CHARLEN(str); + char_u *save_pat; + char_u *pat; + char_u *p; + int complete = FALSE; + int score = 0; + int numMatches = 0; + int matchCount; *outScore = 0; - return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, - matches, maxMatches, 0, &recursionCount); + save_pat = vim_strsave(pat_arg); + if (save_pat == NULL) + return FALSE; + pat = save_pat; + p = pat; + + // Try matching each word in 'pat_arg' in 'str' + while (TRUE) + { + if (matchseq) + complete = TRUE; + else + { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) + break; + pat = p; + while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p))) + { + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; + } + if (*p == NUL) // processed all the words + complete = TRUE; + *p = NUL; + } + + score = 0; + recursionCount = 0; + matchCount = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, + matches + numMatches, maxMatches - numMatches, + 0, &recursionCount); + if (matchCount == 0) + { + numMatches = 0; + break; + } + + // Accumulate the match score and the number of matches + *outScore += score; + numMatches += matchCount; + + if (complete) + break; + + // try matching the next word + ++p; + } + + vim_free(save_pat); + return numMatches != 0; } /* * Sort the fuzzy matches in the descending order of the match score. + * For items with same score, retain the order using the index (stable sort) */ static int -fuzzy_item_compare(const void *s1, const void *s2) +fuzzy_match_item_compare(const void *s1, const void *s2) { int v1 = ((fuzzyItem_T *)s1)->score; int v2 = ((fuzzyItem_T *)s2)->score; - - return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; + int idx1 = ((fuzzyItem_T *)s1)->idx; + int idx2 = ((fuzzyItem_T *)s2)->idx; + + return v1 == v2 ? (idx1 - idx2) : v1 > v2 ? -1 : 1; } /* * Fuzzy search the string 'str' in a list of 'items' and return the matching * strings in 'fmatchlist'. + * If 'matchseq' is TRUE, then for multi-word search strings, match all the + * words in sequence. * If 'items' is a list of strings, then search for 'str' in the list. * If 'items' is a list of dicts, then either use 'key' to lookup the string * for each item or use 'item_cb' Funcref function to get the string. @@ -4531,9 +4606,10 @@ fuzzy_item_compare(const void *s1, const * matches for each item. */ static void -match_fuzzy( +fuzzy_match_in_list( list_T *items, char_u *str, + int matchseq, char_u *key, callback_T *item_cb, int retmatchpos, @@ -4561,6 +4637,7 @@ match_fuzzy( char_u *itemstr; typval_T rettv; + ptrs[i].idx = i; ptrs[i].item = li; ptrs[i].score = SCORE_NONE; itemstr = NULL; @@ -4593,25 +4670,34 @@ match_fuzzy( } if (itemstr != NULL - && fuzzy_match(itemstr, str, &score, matches, + && fuzzy_match(itemstr, str, matchseq, &score, matches, sizeof(matches) / sizeof(matches[0]))) { // Copy the list of matching positions in itemstr to a list, if // 'retmatchpos' is set. if (retmatchpos) { - int j; - int strsz; + int j = 0; + char_u *p; ptrs[i].lmatchpos = list_alloc(); if (ptrs[i].lmatchpos == NULL) goto done; - strsz = MB_CHARLEN(str); - for (j = 0; j < strsz; j++) + + p = str; + while (*p != NUL) { - if (list_append_number(ptrs[i].lmatchpos, - matches[j]) == FAIL) - goto done; + if (!VIM_ISWHITE(PTR2CHAR(p))) + { + if (list_append_number(ptrs[i].lmatchpos, + matches[j]) == FAIL) + goto done; + j++; + } + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; } } ptrs[i].score = score; @@ -4627,7 +4713,7 @@ match_fuzzy( // Sort the list by the descending order of the match score qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), - fuzzy_item_compare); + fuzzy_match_item_compare); // For matchfuzzy(), return a list of matched strings. // ['str1', 'str2', 'str3'] @@ -4687,6 +4773,7 @@ do_fuzzymatch(typval_T *argvars, typval_ callback_T cb; char_u *key = NULL; int ret; + int matchseq = FALSE; CLEAR_POINTER(&cb); @@ -4737,6 +4824,8 @@ do_fuzzymatch(typval_T *argvars, typval_ return; } } + if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL) + matchseq = TRUE; } // get the fuzzy matches @@ -4762,8 +4851,8 @@ do_fuzzymatch(typval_T *argvars, typval_ goto done; } - match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key, - &cb, retmatchpos, rettv->vval.v_list); + fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]), + matchseq, key, &cb, retmatchpos, rettv->vval.v_list); done: free_callback(&cb);