comparison src/search.c @ 24547:192058cad081 v8.2.2813

patch 8.2.2813: cannot grep using fuzzy matching Commit: https://github.com/vim/vim/commit/bb01a1ef3a093cdb36877ba73474719c531dc8cb Author: Yegappan Lakshmanan <yegappan@yahoo.com> Date: Mon Apr 26 21:17:52 2021 +0200 patch 8.2.2813: cannot grep using fuzzy matching Problem: Cannot grep using fuzzy matching. Solution: Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes https://github.com/vim/vim/issues/8152)
author Bram Moolenaar <Bram@vim.org>
date Mon, 26 Apr 2021 21:30:04 +0200
parents 55f458d35292
children 7334bf933510
comparison
equal deleted inserted replaced
24546:eb6c05ae77f9 24547:192058cad081
4283 #define GAP_PENALTY -2 4283 #define GAP_PENALTY -2
4284 // Score for a string that doesn't fuzzy match the pattern 4284 // Score for a string that doesn't fuzzy match the pattern
4285 #define SCORE_NONE -9999 4285 #define SCORE_NONE -9999
4286 4286
4287 #define FUZZY_MATCH_RECURSION_LIMIT 10 4287 #define FUZZY_MATCH_RECURSION_LIMIT 10
4288 // Maximum number of characters that can be fuzzy matched
4289 #define MAXMATCHES 256
4290
4291 typedef int_u matchidx_T;
4292 4288
4293 /* 4289 /*
4294 * Compute a score for a fuzzy matched string. The matching character locations 4290 * Compute a score for a fuzzy matched string. The matching character locations
4295 * are in 'matches'. 4291 * are in 'matches'.
4296 */ 4292 */
4297 static int 4293 static int
4298 fuzzy_match_compute_score( 4294 fuzzy_match_compute_score(
4299 char_u *str, 4295 char_u *str,
4300 int strSz, 4296 int strSz,
4301 matchidx_T *matches, 4297 int_u *matches,
4302 int numMatches) 4298 int numMatches)
4303 { 4299 {
4304 int score; 4300 int score;
4305 int penalty; 4301 int penalty;
4306 int unmatched; 4302 int unmatched;
4307 int i; 4303 int i;
4308 char_u *p = str; 4304 char_u *p = str;
4309 matchidx_T sidx = 0; 4305 int_u sidx = 0;
4310 4306
4311 // Initialize score 4307 // Initialize score
4312 score = 100; 4308 score = 100;
4313 4309
4314 // Apply leading letter penalty 4310 // Apply leading letter penalty
4322 score += UNMATCHED_LETTER_PENALTY * unmatched; 4318 score += UNMATCHED_LETTER_PENALTY * unmatched;
4323 4319
4324 // Apply ordering bonuses 4320 // Apply ordering bonuses
4325 for (i = 0; i < numMatches; ++i) 4321 for (i = 0; i < numMatches; ++i)
4326 { 4322 {
4327 matchidx_T currIdx = matches[i]; 4323 int_u currIdx = matches[i];
4328 4324
4329 if (i > 0) 4325 if (i > 0)
4330 { 4326 {
4331 matchidx_T prevIdx = matches[i - 1]; 4327 int_u prevIdx = matches[i - 1];
4332 4328
4333 // Sequential 4329 // Sequential
4334 if (currIdx == (prevIdx + 1)) 4330 if (currIdx == (prevIdx + 1))
4335 score += SEQUENTIAL_BONUS; 4331 score += SEQUENTIAL_BONUS;
4336 else 4332 else
4384 */ 4380 */
4385 static int 4381 static int
4386 fuzzy_match_recursive( 4382 fuzzy_match_recursive(
4387 char_u *fuzpat, 4383 char_u *fuzpat,
4388 char_u *str, 4384 char_u *str,
4389 matchidx_T strIdx, 4385 int_u strIdx,
4390 int *outScore, 4386 int *outScore,
4391 char_u *strBegin, 4387 char_u *strBegin,
4392 int strLen, 4388 int strLen,
4393 matchidx_T *srcMatches, 4389 int_u *srcMatches,
4394 matchidx_T *matches, 4390 int_u *matches,
4395 int maxMatches, 4391 int maxMatches,
4396 int nextMatch, 4392 int nextMatch,
4397 int *recursionCount) 4393 int *recursionCount)
4398 { 4394 {
4399 // Recursion params 4395 // Recursion params
4400 int recursiveMatch = FALSE; 4396 int recursiveMatch = FALSE;
4401 matchidx_T bestRecursiveMatches[MAXMATCHES]; 4397 int_u bestRecursiveMatches[MAX_FUZZY_MATCHES];
4402 int bestRecursiveScore = 0; 4398 int bestRecursiveScore = 0;
4403 int first_match; 4399 int first_match;
4404 int matched; 4400 int matched;
4405 4401
4406 // Count recursions 4402 // Count recursions
4407 ++*recursionCount; 4403 ++*recursionCount;
4408 if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) 4404 if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT)
4409 return 0; 4405 return 0;
4410 4406
4411 // Detect end of strings 4407 // Detect end of strings
4412 if (*fuzpat == '\0' || *str == '\0') 4408 if (*fuzpat == NUL || *str == NUL)
4413 return 0; 4409 return 0;
4414 4410
4415 // Loop through fuzpat and str looking for a match 4411 // Loop through fuzpat and str looking for a match
4416 first_match = TRUE; 4412 first_match = TRUE;
4417 while (*fuzpat != NUL && *str != NUL) 4413 while (*fuzpat != NUL && *str != NUL)
4423 c2 = PTR2CHAR(str); 4419 c2 = PTR2CHAR(str);
4424 4420
4425 // Found match 4421 // Found match
4426 if (vim_tolower(c1) == vim_tolower(c2)) 4422 if (vim_tolower(c1) == vim_tolower(c2))
4427 { 4423 {
4428 matchidx_T recursiveMatches[MAXMATCHES]; 4424 int_u recursiveMatches[MAX_FUZZY_MATCHES];
4429 int recursiveScore = 0; 4425 int recursiveScore = 0;
4430 char_u *next_char; 4426 char_u *next_char;
4431 4427
4432 // Supplied matches buffer was too short 4428 // Supplied matches buffer was too short
4433 if (nextMatch >= maxMatches) 4429 if (nextMatch >= maxMatches)
4453 { 4449 {
4454 // Pick best recursive score 4450 // Pick best recursive score
4455 if (!recursiveMatch || recursiveScore > bestRecursiveScore) 4451 if (!recursiveMatch || recursiveScore > bestRecursiveScore)
4456 { 4452 {
4457 memcpy(bestRecursiveMatches, recursiveMatches, 4453 memcpy(bestRecursiveMatches, recursiveMatches,
4458 MAXMATCHES * sizeof(recursiveMatches[0])); 4454 MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0]));
4459 bestRecursiveScore = recursiveScore; 4455 bestRecursiveScore = recursiveScore;
4460 } 4456 }
4461 recursiveMatch = TRUE; 4457 recursiveMatch = TRUE;
4462 } 4458 }
4463 4459
4504 * match with highest score. 4500 * match with highest score.
4505 * Scores values have no intrinsic meaning. Possible score range is not 4501 * Scores values have no intrinsic meaning. Possible score range is not
4506 * normalized and varies with pattern. 4502 * normalized and varies with pattern.
4507 * Recursion is limited internally (default=10) to prevent degenerate cases 4503 * Recursion is limited internally (default=10) to prevent degenerate cases
4508 * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). 4504 * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
4509 * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES 4505 * Uses char_u for match indices. Therefore patterns are limited to
4510 * characters. 4506 * MAX_FUZZY_MATCHES characters.
4511 * 4507 *
4512 * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in 4508 * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in
4513 * 'outScore' and the matching character positions in 'matches'. 4509 * 'outScore' and the matching character positions in 'matches'.
4514 */ 4510 */
4515 static int 4511 int
4516 fuzzy_match( 4512 fuzzy_match(
4517 char_u *str, 4513 char_u *str,
4518 char_u *pat_arg, 4514 char_u *pat_arg,
4519 int matchseq, 4515 int matchseq,
4520 int *outScore, 4516 int *outScore,
4521 matchidx_T *matches, 4517 int_u *matches,
4522 int maxMatches) 4518 int maxMatches)
4523 { 4519 {
4524 int recursionCount = 0; 4520 int recursionCount = 0;
4525 int len = MB_CHARLEN(str); 4521 int len = MB_CHARLEN(str);
4526 char_u *save_pat; 4522 char_u *save_pat;
4628 long len; 4624 long len;
4629 fuzzyItem_T *ptrs; 4625 fuzzyItem_T *ptrs;
4630 listitem_T *li; 4626 listitem_T *li;
4631 long i = 0; 4627 long i = 0;
4632 int found_match = FALSE; 4628 int found_match = FALSE;
4633 matchidx_T matches[MAXMATCHES]; 4629 int_u matches[MAX_FUZZY_MATCHES];
4634 4630
4635 len = list_len(items); 4631 len = list_len(items);
4636 if (len == 0) 4632 if (len == 0)
4637 return; 4633 return;
4638 4634
4845 { 4841 {
4846 semsg(_(e_invargval), "text_cb"); 4842 semsg(_(e_invargval), "text_cb");
4847 return; 4843 return;
4848 } 4844 }
4849 } 4845 }
4850 if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL) 4846 if (dict_find(d, (char_u *)"matchseq", -1) != NULL)
4851 matchseq = TRUE; 4847 matchseq = TRUE;
4852 } 4848 }
4853 4849
4854 // get the fuzzy matches 4850 // get the fuzzy matches
4855 ret = rettv_list_alloc(rettv); 4851 ret = rettv_list_alloc(rettv);