Mercurial > vim
comparison src/search.c @ 24547:192058cad081 v8.2.2813
patch 8.2.2813: cannot grep using fuzzy matching
Commit: https://github.com/vim/vim/commit/bb01a1ef3a093cdb36877ba73474719c531dc8cb
Author: Yegappan Lakshmanan <yegappan@yahoo.com>
Date: Mon Apr 26 21:17:52 2021 +0200
patch 8.2.2813: cannot grep using fuzzy matching
Problem: Cannot grep using fuzzy matching.
Solution: Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes https://github.com/vim/vim/issues/8152)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Mon, 26 Apr 2021 21:30:04 +0200 |
parents | 55f458d35292 |
children | 7334bf933510 |
comparison
equal
deleted
inserted
replaced
24546:eb6c05ae77f9 | 24547:192058cad081 |
---|---|
4283 #define GAP_PENALTY -2 | 4283 #define GAP_PENALTY -2 |
4284 // Score for a string that doesn't fuzzy match the pattern | 4284 // Score for a string that doesn't fuzzy match the pattern |
4285 #define SCORE_NONE -9999 | 4285 #define SCORE_NONE -9999 |
4286 | 4286 |
4287 #define FUZZY_MATCH_RECURSION_LIMIT 10 | 4287 #define FUZZY_MATCH_RECURSION_LIMIT 10 |
4288 // Maximum number of characters that can be fuzzy matched | |
4289 #define MAXMATCHES 256 | |
4290 | |
4291 typedef int_u matchidx_T; | |
4292 | 4288 |
4293 /* | 4289 /* |
4294 * Compute a score for a fuzzy matched string. The matching character locations | 4290 * Compute a score for a fuzzy matched string. The matching character locations |
4295 * are in 'matches'. | 4291 * are in 'matches'. |
4296 */ | 4292 */ |
4297 static int | 4293 static int |
4298 fuzzy_match_compute_score( | 4294 fuzzy_match_compute_score( |
4299 char_u *str, | 4295 char_u *str, |
4300 int strSz, | 4296 int strSz, |
4301 matchidx_T *matches, | 4297 int_u *matches, |
4302 int numMatches) | 4298 int numMatches) |
4303 { | 4299 { |
4304 int score; | 4300 int score; |
4305 int penalty; | 4301 int penalty; |
4306 int unmatched; | 4302 int unmatched; |
4307 int i; | 4303 int i; |
4308 char_u *p = str; | 4304 char_u *p = str; |
4309 matchidx_T sidx = 0; | 4305 int_u sidx = 0; |
4310 | 4306 |
4311 // Initialize score | 4307 // Initialize score |
4312 score = 100; | 4308 score = 100; |
4313 | 4309 |
4314 // Apply leading letter penalty | 4310 // Apply leading letter penalty |
4322 score += UNMATCHED_LETTER_PENALTY * unmatched; | 4318 score += UNMATCHED_LETTER_PENALTY * unmatched; |
4323 | 4319 |
4324 // Apply ordering bonuses | 4320 // Apply ordering bonuses |
4325 for (i = 0; i < numMatches; ++i) | 4321 for (i = 0; i < numMatches; ++i) |
4326 { | 4322 { |
4327 matchidx_T currIdx = matches[i]; | 4323 int_u currIdx = matches[i]; |
4328 | 4324 |
4329 if (i > 0) | 4325 if (i > 0) |
4330 { | 4326 { |
4331 matchidx_T prevIdx = matches[i - 1]; | 4327 int_u prevIdx = matches[i - 1]; |
4332 | 4328 |
4333 // Sequential | 4329 // Sequential |
4334 if (currIdx == (prevIdx + 1)) | 4330 if (currIdx == (prevIdx + 1)) |
4335 score += SEQUENTIAL_BONUS; | 4331 score += SEQUENTIAL_BONUS; |
4336 else | 4332 else |
4384 */ | 4380 */ |
4385 static int | 4381 static int |
4386 fuzzy_match_recursive( | 4382 fuzzy_match_recursive( |
4387 char_u *fuzpat, | 4383 char_u *fuzpat, |
4388 char_u *str, | 4384 char_u *str, |
4389 matchidx_T strIdx, | 4385 int_u strIdx, |
4390 int *outScore, | 4386 int *outScore, |
4391 char_u *strBegin, | 4387 char_u *strBegin, |
4392 int strLen, | 4388 int strLen, |
4393 matchidx_T *srcMatches, | 4389 int_u *srcMatches, |
4394 matchidx_T *matches, | 4390 int_u *matches, |
4395 int maxMatches, | 4391 int maxMatches, |
4396 int nextMatch, | 4392 int nextMatch, |
4397 int *recursionCount) | 4393 int *recursionCount) |
4398 { | 4394 { |
4399 // Recursion params | 4395 // Recursion params |
4400 int recursiveMatch = FALSE; | 4396 int recursiveMatch = FALSE; |
4401 matchidx_T bestRecursiveMatches[MAXMATCHES]; | 4397 int_u bestRecursiveMatches[MAX_FUZZY_MATCHES]; |
4402 int bestRecursiveScore = 0; | 4398 int bestRecursiveScore = 0; |
4403 int first_match; | 4399 int first_match; |
4404 int matched; | 4400 int matched; |
4405 | 4401 |
4406 // Count recursions | 4402 // Count recursions |
4407 ++*recursionCount; | 4403 ++*recursionCount; |
4408 if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) | 4404 if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) |
4409 return 0; | 4405 return 0; |
4410 | 4406 |
4411 // Detect end of strings | 4407 // Detect end of strings |
4412 if (*fuzpat == '\0' || *str == '\0') | 4408 if (*fuzpat == NUL || *str == NUL) |
4413 return 0; | 4409 return 0; |
4414 | 4410 |
4415 // Loop through fuzpat and str looking for a match | 4411 // Loop through fuzpat and str looking for a match |
4416 first_match = TRUE; | 4412 first_match = TRUE; |
4417 while (*fuzpat != NUL && *str != NUL) | 4413 while (*fuzpat != NUL && *str != NUL) |
4423 c2 = PTR2CHAR(str); | 4419 c2 = PTR2CHAR(str); |
4424 | 4420 |
4425 // Found match | 4421 // Found match |
4426 if (vim_tolower(c1) == vim_tolower(c2)) | 4422 if (vim_tolower(c1) == vim_tolower(c2)) |
4427 { | 4423 { |
4428 matchidx_T recursiveMatches[MAXMATCHES]; | 4424 int_u recursiveMatches[MAX_FUZZY_MATCHES]; |
4429 int recursiveScore = 0; | 4425 int recursiveScore = 0; |
4430 char_u *next_char; | 4426 char_u *next_char; |
4431 | 4427 |
4432 // Supplied matches buffer was too short | 4428 // Supplied matches buffer was too short |
4433 if (nextMatch >= maxMatches) | 4429 if (nextMatch >= maxMatches) |
4453 { | 4449 { |
4454 // Pick best recursive score | 4450 // Pick best recursive score |
4455 if (!recursiveMatch || recursiveScore > bestRecursiveScore) | 4451 if (!recursiveMatch || recursiveScore > bestRecursiveScore) |
4456 { | 4452 { |
4457 memcpy(bestRecursiveMatches, recursiveMatches, | 4453 memcpy(bestRecursiveMatches, recursiveMatches, |
4458 MAXMATCHES * sizeof(recursiveMatches[0])); | 4454 MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); |
4459 bestRecursiveScore = recursiveScore; | 4455 bestRecursiveScore = recursiveScore; |
4460 } | 4456 } |
4461 recursiveMatch = TRUE; | 4457 recursiveMatch = TRUE; |
4462 } | 4458 } |
4463 | 4459 |
4504 * match with highest score. | 4500 * match with highest score. |
4505 * Scores values have no intrinsic meaning. Possible score range is not | 4501 * Scores values have no intrinsic meaning. Possible score range is not |
4506 * normalized and varies with pattern. | 4502 * normalized and varies with pattern. |
4507 * Recursion is limited internally (default=10) to prevent degenerate cases | 4503 * Recursion is limited internally (default=10) to prevent degenerate cases |
4508 * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). | 4504 * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). |
4509 * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES | 4505 * Uses char_u for match indices. Therefore patterns are limited to |
4510 * characters. | 4506 * MAX_FUZZY_MATCHES characters. |
4511 * | 4507 * |
4512 * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in | 4508 * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in |
4513 * 'outScore' and the matching character positions in 'matches'. | 4509 * 'outScore' and the matching character positions in 'matches'. |
4514 */ | 4510 */ |
4515 static int | 4511 int |
4516 fuzzy_match( | 4512 fuzzy_match( |
4517 char_u *str, | 4513 char_u *str, |
4518 char_u *pat_arg, | 4514 char_u *pat_arg, |
4519 int matchseq, | 4515 int matchseq, |
4520 int *outScore, | 4516 int *outScore, |
4521 matchidx_T *matches, | 4517 int_u *matches, |
4522 int maxMatches) | 4518 int maxMatches) |
4523 { | 4519 { |
4524 int recursionCount = 0; | 4520 int recursionCount = 0; |
4525 int len = MB_CHARLEN(str); | 4521 int len = MB_CHARLEN(str); |
4526 char_u *save_pat; | 4522 char_u *save_pat; |
4628 long len; | 4624 long len; |
4629 fuzzyItem_T *ptrs; | 4625 fuzzyItem_T *ptrs; |
4630 listitem_T *li; | 4626 listitem_T *li; |
4631 long i = 0; | 4627 long i = 0; |
4632 int found_match = FALSE; | 4628 int found_match = FALSE; |
4633 matchidx_T matches[MAXMATCHES]; | 4629 int_u matches[MAX_FUZZY_MATCHES]; |
4634 | 4630 |
4635 len = list_len(items); | 4631 len = list_len(items); |
4636 if (len == 0) | 4632 if (len == 0) |
4637 return; | 4633 return; |
4638 | 4634 |
4845 { | 4841 { |
4846 semsg(_(e_invargval), "text_cb"); | 4842 semsg(_(e_invargval), "text_cb"); |
4847 return; | 4843 return; |
4848 } | 4844 } |
4849 } | 4845 } |
4850 if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL) | 4846 if (dict_find(d, (char_u *)"matchseq", -1) != NULL) |
4851 matchseq = TRUE; | 4847 matchseq = TRUE; |
4852 } | 4848 } |
4853 | 4849 |
4854 // get the fuzzy matches | 4850 // get the fuzzy matches |
4855 ret = rettv_list_alloc(rettv); | 4851 ret = rettv_list_alloc(rettv); |