Mercurial > vim
changeset 22355:0491b9cafd44 v8.2.1726
patch 8.2.1726: fuzzy matching only works on strings
Commit: https://github.com/vim/vim/commit/4f73b8e9cc83f647b34002554a8bdf9abec0a82f
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Sep 22 20:33:50 2020 +0200
patch 8.2.1726: fuzzy matching only works on strings
Problem: Fuzzy matching only works on strings.
Solution: Support passing a dict. Add matchfuzzypos() to also get the match
positions. (Yegappan Lakshmanan, closes #6947)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Tue, 22 Sep 2020 20:45:04 +0200 |
parents | 19139cc50651 |
children | 09065c79110e |
files | runtime/doc/eval.txt runtime/doc/usr_41.txt src/evalfunc.c src/proto/search.pro src/search.c src/testdir/Make_all.mak src/testdir/test_functions.vim src/testdir/test_matchfuzzy.vim src/version.c |
diffstat | 9 files changed, 637 insertions(+), 175 deletions(-) [+] |
line wrap: on
line diff
--- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -2641,7 +2641,10 @@ matcharg({nr}) List arguments of |:mat matchdelete({id} [, {win}]) Number delete match identified by {id} matchend({expr}, {pat} [, {start} [, {count}]]) Number position where {pat} ends in {expr} -matchfuzzy({list}, {str}) List fuzzy match {str} in {list} +matchfuzzy({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} +matchfuzzypos({list}, {str} [, {dict}]) + List fuzzy match {str} in {list} matchlist({expr}, {pat} [, {start} [, {count}]]) List match and submatches of {pat} in {expr} matchstr({expr}, {pat} [, {start} [, {count}]]) @@ -7311,12 +7314,25 @@ matchend({expr}, {pat} [, {start} [, {co GetText()->matchend('word') -matchfuzzy({list}, {str}) *matchfuzzy()* - Returns a list with all the strings in {list} that fuzzy - match {str}. The strings in the returned list are sorted - based on the matching score. {str} is treated as a literal - string and regular expression matching is NOT supported. - The maximum supported {str} length is 256. +matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* + If {list} is a list of strings, then returns a list with all + the strings in {list} that fuzzy match {str}. The strings in + the returned list are sorted based on the matching score. + + If {list} is a list of dictionaries, then the optional {dict} + argument supports the following items: + key key of the item which is fuzzy matched against + {str}. The value of this item should be a + string. + text_cb |Funcref| that will be called for every item + in {list} to get the text for fuzzy matching. + This should accept a dictionary item as the + argument and return the text for that item to + use for fuzzy matching. + + {str} is treated as a literal string and regular expression + matching is NOT supported. The maximum supported {str} length + is 256. If there are no matching strings or there is an error, then an empty list is returned. If length of {str} is greater than @@ -7327,11 +7343,36 @@ matchfuzzy({list}, {str}) *matchfuzzy( < results in ["clay"]. > :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") < results in a list of buffer names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "ndl". > + :echo getbufinfo()->matchfuzzy("spl", + \ {'text_cb' : {v -> v.name}}) +< results in a list of buffer information dicts with buffer + names fuzzy matching "spl". > :echo v:oldfiles->matchfuzzy("test") < results in a list of file names fuzzy matching "test". > :let l = readfile("buffer.c")->matchfuzzy("str") < results in a list of lines in "buffer.c" fuzzy matching "str". +matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* + Same as |matchfuzzy()|, but returns the list of matched + strings and the list of character positions where characters + in {str} matches. + + If {str} matches multiple times in a string, then only the + positions for the best match is returned. + + If there are no matching strings or there is an error, then a + list with two empty list items is returned. + + Example: > + :echo matchfuzzypos(['testing'], 'tsg') +< results in [['testing'], [[0, 2, 6]]] > + :echo matchfuzzypos(['clay', 'lacy'], 'la') +< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] > + :echo [{'text': 'hello', 'id' : 10}]->matchfuzzypos('ll', {'key' : 'text'}) +< results in [{'id': 10, 'text': 'hello'}] [[2, 3]] matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* Same as |match()|, but return a |List|. The first item in the
--- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -604,6 +604,7 @@ String manipulation: *string-functio match() position where a pattern matches in a string matchend() position where a pattern match ends in a string matchfuzzy() fuzzy matches a string in a list of strings + matchfuzzypos() fuzzy matches a string in a list of strings matchstr() match of a pattern in a string matchstrpos() match and positions of a pattern in a string matchlist() like matchstr() and also return submatches
--- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -753,7 +753,8 @@ static funcentry_T global_functions[] = {"matcharg", 1, 1, FEARG_1, ret_list_string, f_matcharg}, {"matchdelete", 1, 2, FEARG_1, ret_number, f_matchdelete}, {"matchend", 2, 4, FEARG_1, ret_number, f_matchend}, - {"matchfuzzy", 2, 2, FEARG_1, ret_list_string, f_matchfuzzy}, + {"matchfuzzy", 2, 3, FEARG_1, ret_list_string, f_matchfuzzy}, + {"matchfuzzypos", 2, 3, FEARG_1, ret_list_any, f_matchfuzzypos}, {"matchlist", 2, 4, FEARG_1, ret_list_string, f_matchlist}, {"matchstr", 2, 4, FEARG_1, ret_string, f_matchstr}, {"matchstrpos", 2, 4, FEARG_1, ret_list_any, f_matchstrpos},
--- a/src/proto/search.pro +++ b/src/proto/search.pro @@ -37,4 +37,5 @@ spat_T *get_spat(int idx); int get_spat_last_idx(void); void f_searchcount(typval_T *argvars, typval_T *rettv); void f_matchfuzzy(typval_T *argvars, typval_T *rettv); +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); /* vim: set ft=c : */
--- a/src/search.c +++ b/src/search.c @@ -4216,33 +4216,144 @@ the_end: */ typedef struct { - listitem_T *item; + listitem_T *item; + int score; + list_T *lmatchpos; +} fuzzyItem_T; + +// bonus for adjacent matches +#define SEQUENTIAL_BONUS 15 +// bonus if match occurs after a separator +#define SEPARATOR_BONUS 30 +// bonus if match is uppercase and prev is lower +#define CAMEL_BONUS 30 +// bonus if the first letter is matched +#define FIRST_LETTER_BONUS 15 +// penalty applied for every letter in str before the first match +#define LEADING_LETTER_PENALTY -5 +// maximum penalty for leading letters +#define MAX_LEADING_LETTER_PENALTY -15 +// penalty for every letter that doesn't match +#define UNMATCHED_LETTER_PENALTY -1 +// Score for a string that doesn't fuzzy match the pattern +#define SCORE_NONE -9999 + +#define FUZZY_MATCH_RECURSION_LIMIT 10 +// Maximum number of characters that can be fuzzy matched +#define MAXMATCHES 256 + +typedef int_u matchidx_T; + +/* + * Compute a score for a fuzzy matched string. The matching character locations + * are in 'matches'. + */ + static int +fuzzy_match_compute_score( + char_u *str, + int strSz, + matchidx_T *matches, + int numMatches) +{ int score; -} fuzzyItem_T; + int penalty; + int unmatched; + int i; + char_u *p = str; + matchidx_T sidx = 0; + + // Initialize score + score = 100; + + // Apply leading letter penalty + penalty = LEADING_LETTER_PENALTY * matches[0]; + if (penalty < MAX_LEADING_LETTER_PENALTY) + penalty = MAX_LEADING_LETTER_PENALTY; + score += penalty; + + // Apply unmatched penalty + unmatched = strSz - numMatches; + score += UNMATCHED_LETTER_PENALTY * unmatched; + + // Apply ordering bonuses + for (i = 0; i < numMatches; ++i) + { + matchidx_T currIdx = matches[i]; + + if (i > 0) + { + matchidx_T prevIdx = matches[i - 1]; + + // Sequential + if (currIdx == (prevIdx + 1)) + score += SEQUENTIAL_BONUS; + } + + // Check for bonuses based on neighbor character value + if (currIdx > 0) + { + // Camel case + int neighbor; + int curr; + int neighborSeparator; + + if (has_mbyte) + { + while (sidx < currIdx) + { + neighbor = (*mb_ptr2char)(p); + (void)mb_ptr2char_adv(&p); + sidx++; + } + curr = (*mb_ptr2char)(p); + } + else + { + neighbor = str[currIdx - 1]; + curr = str[currIdx]; + } + + if (vim_islower(neighbor) && vim_isupper(curr)) + score += CAMEL_BONUS; + + // Separator + neighborSeparator = neighbor == '_' || neighbor == ' '; + if (neighborSeparator) + score += SEPARATOR_BONUS; + } + else + { + // First letter + score += FIRST_LETTER_BONUS; + } + } + return score; +} static int fuzzy_match_recursive( - char_u *fuzpat, - char_u *str, - int *outScore, - char_u *strBegin, - char_u *srcMatches, - char_u *matches, - int maxMatches, - int nextMatch, - int *recursionCount, - int recursionLimit) + char_u *fuzpat, + char_u *str, + matchidx_T strIdx, + int *outScore, + char_u *strBegin, + int strLen, + matchidx_T *srcMatches, + matchidx_T *matches, + int maxMatches, + int nextMatch, + int *recursionCount) { // Recursion params int recursiveMatch = FALSE; - char_u bestRecursiveMatches[256]; + matchidx_T bestRecursiveMatches[MAXMATCHES]; int bestRecursiveScore = 0; int first_match; int matched; // Count recursions ++*recursionCount; - if (*recursionCount >= recursionLimit) + if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) return FALSE; // Detect end of strings @@ -4251,13 +4362,20 @@ fuzzy_match_recursive( // Loop through fuzpat and str looking for a match first_match = TRUE; - while (*fuzpat != '\0' && *str != '\0') + while (*fuzpat != NUL && *str != NUL) { + int c1; + int c2; + + c1 = PTR2CHAR(fuzpat); + c2 = PTR2CHAR(str); + // Found match - if (vim_tolower(*fuzpat) == vim_tolower(*str)) + if (vim_tolower(c1) == vim_tolower(c2)) { - char_u recursiveMatches[256]; + matchidx_T recursiveMatches[MAXMATCHES]; int recursiveScore = 0; + char_u *next_char; // Supplied matches buffer was too short if (nextMatch >= maxMatches) @@ -4266,116 +4384,58 @@ fuzzy_match_recursive( // "Copy-on-Write" srcMatches into matches if (first_match && srcMatches) { - memcpy(matches, srcMatches, nextMatch); + memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); first_match = FALSE; } // Recursive call that "skips" this match - if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, - strBegin, matches, recursiveMatches, - sizeof(recursiveMatches), nextMatch, recursionCount, - recursionLimit)) + if (has_mbyte) + next_char = str + (*mb_ptr2len)(str); + else + next_char = str + 1; + if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, + &recursiveScore, strBegin, strLen, matches, + recursiveMatches, + sizeof(recursiveMatches)/sizeof(recursiveMatches[0]), + nextMatch, recursionCount)) { // Pick best recursive score if (!recursiveMatch || recursiveScore > bestRecursiveScore) { - memcpy(bestRecursiveMatches, recursiveMatches, 256); + memcpy(bestRecursiveMatches, recursiveMatches, + MAXMATCHES * sizeof(recursiveMatches[0])); bestRecursiveScore = recursiveScore; } recursiveMatch = TRUE; } // Advance - matches[nextMatch++] = (char_u)(str - strBegin); - ++fuzpat; + matches[nextMatch++] = strIdx; + if (has_mbyte) + (void)mb_ptr2char_adv(&fuzpat); + else + ++fuzpat; } - ++str; + if (has_mbyte) + (void)mb_ptr2char_adv(&str); + else + ++str; + strIdx++; } // Determine if full fuzpat was matched - matched = *fuzpat == '\0' ? TRUE : FALSE; + matched = *fuzpat == NUL ? TRUE : FALSE; // Calculate score if (matched) - { - // bonus for adjacent matches - int sequential_bonus = 15; - // bonus if match occurs after a separator - int separator_bonus = 30; - // bonus if match is uppercase and prev is lower - int camel_bonus = 30; - // bonus if the first letter is matched - int first_letter_bonus = 15; - // penalty applied for every letter in str before the first match - int leading_letter_penalty = -5; - // maximum penalty for leading letters - int max_leading_letter_penalty = -15; - // penalty for every letter that doesn't matter - int unmatched_letter_penalty = -1; - int penalty; - int unmatched; - int i; - - // Iterate str to end - while (*str != '\0') - ++str; - - // Initialize score - *outScore = 100; - - // Apply leading letter penalty - penalty = leading_letter_penalty * matches[0]; - if (penalty < max_leading_letter_penalty) - penalty = max_leading_letter_penalty; - *outScore += penalty; - - // Apply unmatched penalty - unmatched = (int)(str - strBegin) - nextMatch; - *outScore += unmatched_letter_penalty * unmatched; - - // Apply ordering bonuses - for (i = 0; i < nextMatch; ++i) - { - char_u currIdx = matches[i]; - - if (i > 0) - { - char_u prevIdx = matches[i - 1]; - - // Sequential - if (currIdx == (prevIdx + 1)) - *outScore += sequential_bonus; - } - - // Check for bonuses based on neighbor character value - if (currIdx > 0) - { - // Camel case - char_u neighbor = strBegin[currIdx - 1]; - char_u curr = strBegin[currIdx]; - int neighborSeparator; - - if (islower(neighbor) && isupper(curr)) - *outScore += camel_bonus; - - // Separator - neighborSeparator = neighbor == '_' || neighbor == ' '; - if (neighborSeparator) - *outScore += separator_bonus; - } - else - { - // First letter - *outScore += first_letter_bonus; - } - } - } + *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, + nextMatch); // Return best result if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) { // Recursive score is better than "this" - memcpy(matches, bestRecursiveMatches, maxMatches); + memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); *outScore = bestRecursiveScore; return TRUE; } @@ -4394,22 +4454,27 @@ fuzzy_match_recursive( * normalized and varies with pattern. * Recursion is limited internally (default=10) to prevent degenerate cases * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). - * Uses char_u for match indices. Therefore patterns are limited to 256 + * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES * characters. * - * Returns TRUE if fuzpat is found AND calculates a score. + * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in + * 'outScore' and the matching character positions in 'matches'. */ static int -fuzzy_match(char_u *str, char_u *fuzpat, int *outScore) +fuzzy_match( + char_u *str, + char_u *fuzpat, + int *outScore, + matchidx_T *matches, + int maxMatches) { - char_u matches[256]; int recursionCount = 0; - int recursionLimit = 10; + int len = MB_CHARLEN(str); *outScore = 0; - return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, - sizeof(matches), 0, &recursionCount, recursionLimit); + return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, + matches, maxMatches, 0, &recursionCount); } /* @@ -4425,84 +4490,269 @@ fuzzy_item_compare(const void *s1, const } /* - * Fuzzy search the string 'str' in 'strlist' and return the matching strings - * in 'fmatchlist'. + * Fuzzy search the string 'str' in a list of 'items' and return the matching + * strings in 'fmatchlist'. + * If 'items' is a list of strings, then search for 'str' in the list. + * If 'items' is a list of dicts, then either use 'key' to lookup the string + * for each item or use 'item_cb' Funcref function to get the string. + * If 'retmatchpos' is TRUE, then return a list of positions where 'str' + * matches for each item. */ static void -match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist) +match_fuzzy( + list_T *items, + char_u *str, + char_u *key, + callback_T *item_cb, + int retmatchpos, + list_T *fmatchlist) { long len; fuzzyItem_T *ptrs; listitem_T *li; long i = 0; int found_match = FALSE; - - len = list_len(strlist); + matchidx_T matches[MAXMATCHES]; + + len = list_len(items); if (len == 0) return; - ptrs = ALLOC_MULT(fuzzyItem_T, len); + ptrs = ALLOC_CLEAR_MULT(fuzzyItem_T, len); if (ptrs == NULL) return; - // For all the string items in strlist, get the fuzzy matching score - FOR_ALL_LIST_ITEMS(strlist, li) + // For all the string items in items, get the fuzzy matching score + FOR_ALL_LIST_ITEMS(items, li) { - int score; + int score; + char_u *itemstr; + typval_T rettv; ptrs[i].item = li; - ptrs[i].score = -9999; - // ignore non-string items in the list - if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL) - if (fuzzy_match(li->li_tv.vval.v_string, str, &score)) + ptrs[i].score = SCORE_NONE; + itemstr = NULL; + rettv.v_type = VAR_UNKNOWN; + if (li->li_tv.v_type == VAR_STRING) // list of strings + itemstr = li->li_tv.vval.v_string; + else if (li->li_tv.v_type == VAR_DICT && + (key != NULL || item_cb->cb_name != NULL)) + { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) + itemstr = dict_get_string(li->li_tv.vval.v_dict, key, FALSE); + else { - ptrs[i].score = score; - found_match = TRUE; + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + li->li_tv.vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = li->li_tv.vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) + { + if (rettv.v_type == VAR_STRING) + itemstr = rettv.vval.v_string; + } + dict_unref(li->li_tv.vval.v_dict); } + } + + if (itemstr != NULL + && fuzzy_match(itemstr, str, &score, matches, + sizeof(matches) / sizeof(matches[0]))) + { + // Copy the list of matching positions in itemstr to a list, if + // 'retmatchpos' is set. + if (retmatchpos) + { + int j; + int strsz; + + ptrs[i].lmatchpos = list_alloc(); + if (ptrs[i].lmatchpos == NULL) + goto done; + strsz = MB_CHARLEN(str); + for (j = 0; j < strsz; j++) + { + if (list_append_number(ptrs[i].lmatchpos, + matches[j]) == FAIL) + goto done; + } + } + ptrs[i].score = score; + found_match = TRUE; + } ++i; + clear_tv(&rettv); } if (found_match) { + list_T *l; + // Sort the list by the descending order of the match score qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), fuzzy_item_compare); - // Copy the matching strings with 'score != -9999' to the return list + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with two items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + if (retmatchpos) + { + li = list_find(fmatchlist, 0); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + l = li->li_tv.vval.v_list; + } + else + l = fmatchlist; + + // Copy the matching strings with a valid score to the return list for (i = 0; i < len; i++) { - if (ptrs[i].score == -9999) + if (ptrs[i].score == SCORE_NONE) break; - list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string, - -1); + list_append_tv(l, &ptrs[i].item->li_tv); + } + + // next copy the list of matching positions + if (retmatchpos) + { + li = list_find(fmatchlist, -1); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + l = li->li_tv.vval.v_list; + + for (i = 0; i < len; i++) + { + if (ptrs[i].score == SCORE_NONE) + break; + if (ptrs[i].lmatchpos != NULL && + list_append_list(l, ptrs[i].lmatchpos) == FAIL) + goto done; + } } } +done: vim_free(ptrs); } /* + * Do fuzzy matching. Returns the list of matched strings in 'rettv'. + * If 'retmatchpos' is TRUE, also returns the matching character positions. + */ + static void +do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) +{ + callback_T cb; + char_u *key = NULL; + int ret; + + CLEAR_POINTER(&cb); + + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) + { + semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); + return; + } + if (argvars[1].v_type != VAR_STRING + || argvars[1].vval.v_string == NULL) + { + semsg(_(e_invarg2), tv_get_string(&argvars[1])); + return; + } + + if (argvars[2].v_type != VAR_UNKNOWN) + { + dict_T *d; + dictitem_T *di; + + if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) + { + emsg(_(e_dictreq)); + return; + } + + // To search a dict, either a callback function or a key can be + // specified. + d = argvars[2].vval.v_dict; + if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) + { + if (di->di_tv.v_type != VAR_STRING + || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) + { + semsg(_(e_invarg2), tv_get_string(&di->di_tv)); + return; + } + key = tv_get_string(&di->di_tv); + } + else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) + { + cb = get_callback(&di->di_tv); + if (cb.cb_name == NULL) + { + semsg(_(e_invargval), "text_cb"); + return; + } + } + } + + // get the fuzzy matches + ret = rettv_list_alloc(rettv); + if (ret != OK) + goto done; + if (retmatchpos) + { + list_T *l; + + // For matchfuzzypos(), a list with two items are returned. First item + // is a list of matching strings and the second item is a list of + // lists with matching positions within each string. + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + goto done; + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + goto done; + } + + match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key, + &cb, retmatchpos, rettv->vval.v_list); + +done: + free_callback(&cb); +} + +/* * "matchfuzzy()" function */ void f_matchfuzzy(typval_T *argvars, typval_T *rettv) { - if (argvars[0].v_type != VAR_LIST) - { - emsg(_(e_listreq)); - return; - } - if (argvars[0].vval.v_list == NULL) - return; - if (argvars[1].v_type != VAR_STRING - || argvars[1].vval.v_string == NULL) - { - semsg(_(e_invarg2), tv_get_string(&argvars[1])); - return; - } - if (rettv_list_alloc(rettv) == OK) - match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), - rettv->vval.v_list); + do_fuzzymatch(argvars, rettv, FALSE); +} + +/* + * "matchfuzzypos()" function + */ + void +f_matchfuzzypos(typval_T *argvars, typval_T *rettv) +{ + do_fuzzymatch(argvars, rettv, TRUE); } #endif
--- a/src/testdir/Make_all.mak +++ b/src/testdir/Make_all.mak @@ -184,6 +184,7 @@ NEW_TESTS = \ test_match \ test_matchadd_conceal \ test_matchadd_conceal_utf8 \ + test_matchfuzzy \ test_memory_usage \ test_menu \ test_messages \ @@ -420,6 +421,7 @@ NEW_TESTS_RES = \ test_match.res \ test_matchadd_conceal.res \ test_matchadd_conceal_utf8.res \ + test_matchfuzzy.res \ test_memory_usage.res \ test_menu.res \ test_messages.res \
--- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -2554,28 +2554,4 @@ func Test_browsedir() call assert_fails('call browsedir("open", [])', 'E730:') endfunc -" Test for matchfuzzy() -func Test_matchfuzzy() - call assert_fails('call matchfuzzy(10, "abc")', 'E714:') - call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') - call assert_equal([], matchfuzzy([], 'abc')) - call assert_equal([], matchfuzzy(['abc'], '')) - call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) - call assert_equal([], matchfuzzy([10, 20], 'ac')) - call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) - call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) - call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) - call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) - call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) - - %bw! - eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) - let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') - call assert_equal(1, len(l)) - call assert_match('needle', l[0]) -endfunc - " vim: shiftwidth=2 sts=2 expandtab
new file mode 100644 --- /dev/null +++ b/src/testdir/test_matchfuzzy.vim @@ -0,0 +1,188 @@ +" Tests for fuzzy matching + +source shared.vim +source check.vim + +" Test for matchfuzzy() +func Test_matchfuzzy() + call assert_fails('call matchfuzzy(10, "abc")', 'E686:') + call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') + call assert_fails("let x = matchfuzzy(test_null_list(), 'foo')", 'E686:') + call assert_fails('call matchfuzzy(["abc"], test_null_string())', 'E475:') + call assert_equal([], matchfuzzy([], 'abc')) + call assert_equal([], matchfuzzy(['abc'], '')) + call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) + call assert_equal([], matchfuzzy([10, 20], 'ac')) + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) + call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) + call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) + call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) + call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) + call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + + " Tests for match preferences + " preference for camel case match + call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + " preference for match after a separator (_ or space) + call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) + " preference for leading letter match + call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) + " preference for sequential match + call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo')) + " non-matching leading letter(s) penalty + call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo')) + " total non-matching letter(s) penalty + call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) + + %bw! + eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) + let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') + call assert_equal(1, len(l)) + call assert_match('needle', l[0]) + + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:') + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([], matchfuzzy(l, 'cam')) + call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzy(l, 'cam', test_null_dict())", 'E715:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : test_null_string()})", 'E475:') + call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') + + " Test in latin1 encoding + let save_enc = &encoding + set encoding=latin1 + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) + let &encoding = save_enc +endfunc + +" Test for the fuzzymatchpos() function +func Test_matchfuzzypos() + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['hello', 'hello world hello world'], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]], + \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab')) + let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + call assert_equal([[], []], matchfuzzypos([], 'abc')) + + " match in a long string + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]], + \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) + + " preference for camel case match + call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc')) + " preference for match after a separator (_ or space) + call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab')) + " preference for leading letter match + call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab')) + " preference for sequential match + call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one')) + " best recursive match + call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one')) + + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], + \ matchfuzzypos(l, 'cam', {'key' : 'val'})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'})) + call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:') + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) + call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') + call assert_equal([[], []], matchfuzzypos(l, 'cam')) + call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:') + call assert_fails("let x = matchfuzzypos(l, 'cam', test_null_dict())", 'E715:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : test_null_string()})", 'E475:') + call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') + + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') +endfunc + +func Test_matchfuzzy_mbyte() + CheckFeature multi_lang + call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal(['αβΩxxx', 'xαxβxΩx'], + \ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + + " preference for camel case match + call assert_equal(['oneĄwo', 'oneąwo'], + \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + " preference for match after a separator (_ or space) + call assert_equal(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡabㄟㄠ'], + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) + " preference for leading letter match + call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], + \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) + " preference for sequential match + call assert_equal(['ㄞㄡㄤfffifl', 'ㄞaㄡbㄤcffdfiefl'], + \ ['ㄞaㄡbㄤcffdfiefl', 'ㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " non-matching leading letter(s) penalty + call assert_equal(['xㄞㄡㄤfffifl', 'xxㄞㄡㄤfffifl'], + \ ['xxㄞㄡㄤfffifl', 'xㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) + " total non-matching letter(s) penalty + call assert_equal(['ŗŝţ', 'ŗŝţx', 'ŗŝţxx'], + \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ')) +endfunc + +func Test_matchfuzzypos_mbyte() + CheckFeature multi_lang + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]], + \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + " reverse the order of characters + call assert_equal([[], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]]], + \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) + call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], + \ [[0, 1], [0, 1], [0, 1], [0, 2]]], + \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) + call assert_equal([['ααααααα'], [[0, 1, 2]]], + \ matchfuzzypos(['ααααααα'], 'ααα')) + + call assert_equal([[], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) + let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) + call assert_equal(range(256), x[1][0]) + call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + + " match in a long string + call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]], + \ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗')) + " preference for camel case match + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) + " preference for match after a separator (_ or space) + call assert_equal([['xちだx_ちだ'], [[5, 6]]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + " preference for leading letter match + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + " preference for sequential match + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + " best recursive match + call assert_equal([['xффйд'], [[2, 3, 4]]], matchfuzzypos(['xффйд'], 'фйд')) +endfunc + +" vim: shiftwidth=2 sts=2 expandtab