Mercurial > vim
comparison src/search.c @ 22355:0491b9cafd44 v8.2.1726
patch 8.2.1726: fuzzy matching only works on strings
Commit: https://github.com/vim/vim/commit/4f73b8e9cc83f647b34002554a8bdf9abec0a82f
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Sep 22 20:33:50 2020 +0200
patch 8.2.1726: fuzzy matching only works on strings
Problem: Fuzzy matching only works on strings.
Solution: Support passing a dict. Add matchfuzzypos() to also get the match
positions. (Yegappan Lakshmanan, closes #6947)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Tue, 22 Sep 2020 20:45:04 +0200 |
parents | f22acf6472da |
children | f4d1fe8e04cf |
comparison
equal
deleted
inserted
replaced
22354:19139cc50651 | 22355:0491b9cafd44 |
---|---|
4214 * There is not an explicit bonus for an exact match. Unmatched letters receive | 4214 * There is not an explicit bonus for an exact match. Unmatched letters receive |
4215 * a penalty. So shorter strings and closer matches are worth more. | 4215 * a penalty. So shorter strings and closer matches are worth more. |
4216 */ | 4216 */ |
4217 typedef struct | 4217 typedef struct |
4218 { | 4218 { |
4219 listitem_T *item; | 4219 listitem_T *item; |
4220 int score; | 4220 int score; |
4221 list_T *lmatchpos; | |
4221 } fuzzyItem_T; | 4222 } fuzzyItem_T; |
4223 | |
4224 // bonus for adjacent matches | |
4225 #define SEQUENTIAL_BONUS 15 | |
4226 // bonus if match occurs after a separator | |
4227 #define SEPARATOR_BONUS 30 | |
4228 // bonus if match is uppercase and prev is lower | |
4229 #define CAMEL_BONUS 30 | |
4230 // bonus if the first letter is matched | |
4231 #define FIRST_LETTER_BONUS 15 | |
4232 // penalty applied for every letter in str before the first match | |
4233 #define LEADING_LETTER_PENALTY -5 | |
4234 // maximum penalty for leading letters | |
4235 #define MAX_LEADING_LETTER_PENALTY -15 | |
4236 // penalty for every letter that doesn't match | |
4237 #define UNMATCHED_LETTER_PENALTY -1 | |
4238 // Score for a string that doesn't fuzzy match the pattern | |
4239 #define SCORE_NONE -9999 | |
4240 | |
4241 #define FUZZY_MATCH_RECURSION_LIMIT 10 | |
4242 // Maximum number of characters that can be fuzzy matched | |
4243 #define MAXMATCHES 256 | |
4244 | |
4245 typedef int_u matchidx_T; | |
4246 | |
4247 /* | |
4248 * Compute a score for a fuzzy matched string. The matching character locations | |
4249 * are in 'matches'. | |
4250 */ | |
4251 static int | |
4252 fuzzy_match_compute_score( | |
4253 char_u *str, | |
4254 int strSz, | |
4255 matchidx_T *matches, | |
4256 int numMatches) | |
4257 { | |
4258 int score; | |
4259 int penalty; | |
4260 int unmatched; | |
4261 int i; | |
4262 char_u *p = str; | |
4263 matchidx_T sidx = 0; | |
4264 | |
4265 // Initialize score | |
4266 score = 100; | |
4267 | |
4268 // Apply leading letter penalty | |
4269 penalty = LEADING_LETTER_PENALTY * matches[0]; | |
4270 if (penalty < MAX_LEADING_LETTER_PENALTY) | |
4271 penalty = MAX_LEADING_LETTER_PENALTY; | |
4272 score += penalty; | |
4273 | |
4274 // Apply unmatched penalty | |
4275 unmatched = strSz - numMatches; | |
4276 score += UNMATCHED_LETTER_PENALTY * unmatched; | |
4277 | |
4278 // Apply ordering bonuses | |
4279 for (i = 0; i < numMatches; ++i) | |
4280 { | |
4281 matchidx_T currIdx = matches[i]; | |
4282 | |
4283 if (i > 0) | |
4284 { | |
4285 matchidx_T prevIdx = matches[i - 1]; | |
4286 | |
4287 // Sequential | |
4288 if (currIdx == (prevIdx + 1)) | |
4289 score += SEQUENTIAL_BONUS; | |
4290 } | |
4291 | |
4292 // Check for bonuses based on neighbor character value | |
4293 if (currIdx > 0) | |
4294 { | |
4295 // Camel case | |
4296 int neighbor; | |
4297 int curr; | |
4298 int neighborSeparator; | |
4299 | |
4300 if (has_mbyte) | |
4301 { | |
4302 while (sidx < currIdx) | |
4303 { | |
4304 neighbor = (*mb_ptr2char)(p); | |
4305 (void)mb_ptr2char_adv(&p); | |
4306 sidx++; | |
4307 } | |
4308 curr = (*mb_ptr2char)(p); | |
4309 } | |
4310 else | |
4311 { | |
4312 neighbor = str[currIdx - 1]; | |
4313 curr = str[currIdx]; | |
4314 } | |
4315 | |
4316 if (vim_islower(neighbor) && vim_isupper(curr)) | |
4317 score += CAMEL_BONUS; | |
4318 | |
4319 // Separator | |
4320 neighborSeparator = neighbor == '_' || neighbor == ' '; | |
4321 if (neighborSeparator) | |
4322 score += SEPARATOR_BONUS; | |
4323 } | |
4324 else | |
4325 { | |
4326 // First letter | |
4327 score += FIRST_LETTER_BONUS; | |
4328 } | |
4329 } | |
4330 return score; | |
4331 } | |
4222 | 4332 |
4223 static int | 4333 static int |
4224 fuzzy_match_recursive( | 4334 fuzzy_match_recursive( |
4225 char_u *fuzpat, | 4335 char_u *fuzpat, |
4226 char_u *str, | 4336 char_u *str, |
4227 int *outScore, | 4337 matchidx_T strIdx, |
4228 char_u *strBegin, | 4338 int *outScore, |
4229 char_u *srcMatches, | 4339 char_u *strBegin, |
4230 char_u *matches, | 4340 int strLen, |
4231 int maxMatches, | 4341 matchidx_T *srcMatches, |
4232 int nextMatch, | 4342 matchidx_T *matches, |
4233 int *recursionCount, | 4343 int maxMatches, |
4234 int recursionLimit) | 4344 int nextMatch, |
4345 int *recursionCount) | |
4235 { | 4346 { |
4236 // Recursion params | 4347 // Recursion params |
4237 int recursiveMatch = FALSE; | 4348 int recursiveMatch = FALSE; |
4238 char_u bestRecursiveMatches[256]; | 4349 matchidx_T bestRecursiveMatches[MAXMATCHES]; |
4239 int bestRecursiveScore = 0; | 4350 int bestRecursiveScore = 0; |
4240 int first_match; | 4351 int first_match; |
4241 int matched; | 4352 int matched; |
4242 | 4353 |
4243 // Count recursions | 4354 // Count recursions |
4244 ++*recursionCount; | 4355 ++*recursionCount; |
4245 if (*recursionCount >= recursionLimit) | 4356 if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) |
4246 return FALSE; | 4357 return FALSE; |
4247 | 4358 |
4248 // Detect end of strings | 4359 // Detect end of strings |
4249 if (*fuzpat == '\0' || *str == '\0') | 4360 if (*fuzpat == '\0' || *str == '\0') |
4250 return FALSE; | 4361 return FALSE; |
4251 | 4362 |
4252 // Loop through fuzpat and str looking for a match | 4363 // Loop through fuzpat and str looking for a match |
4253 first_match = TRUE; | 4364 first_match = TRUE; |
4254 while (*fuzpat != '\0' && *str != '\0') | 4365 while (*fuzpat != NUL && *str != NUL) |
4255 { | 4366 { |
4367 int c1; | |
4368 int c2; | |
4369 | |
4370 c1 = PTR2CHAR(fuzpat); | |
4371 c2 = PTR2CHAR(str); | |
4372 | |
4256 // Found match | 4373 // Found match |
4257 if (vim_tolower(*fuzpat) == vim_tolower(*str)) | 4374 if (vim_tolower(c1) == vim_tolower(c2)) |
4258 { | 4375 { |
4259 char_u recursiveMatches[256]; | 4376 matchidx_T recursiveMatches[MAXMATCHES]; |
4260 int recursiveScore = 0; | 4377 int recursiveScore = 0; |
4378 char_u *next_char; | |
4261 | 4379 |
4262 // Supplied matches buffer was too short | 4380 // Supplied matches buffer was too short |
4263 if (nextMatch >= maxMatches) | 4381 if (nextMatch >= maxMatches) |
4264 return FALSE; | 4382 return FALSE; |
4265 | 4383 |
4266 // "Copy-on-Write" srcMatches into matches | 4384 // "Copy-on-Write" srcMatches into matches |
4267 if (first_match && srcMatches) | 4385 if (first_match && srcMatches) |
4268 { | 4386 { |
4269 memcpy(matches, srcMatches, nextMatch); | 4387 memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); |
4270 first_match = FALSE; | 4388 first_match = FALSE; |
4271 } | 4389 } |
4272 | 4390 |
4273 // Recursive call that "skips" this match | 4391 // Recursive call that "skips" this match |
4274 if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, | 4392 if (has_mbyte) |
4275 strBegin, matches, recursiveMatches, | 4393 next_char = str + (*mb_ptr2len)(str); |
4276 sizeof(recursiveMatches), nextMatch, recursionCount, | 4394 else |
4277 recursionLimit)) | 4395 next_char = str + 1; |
4396 if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, | |
4397 &recursiveScore, strBegin, strLen, matches, | |
4398 recursiveMatches, | |
4399 sizeof(recursiveMatches)/sizeof(recursiveMatches[0]), | |
4400 nextMatch, recursionCount)) | |
4278 { | 4401 { |
4279 // Pick best recursive score | 4402 // Pick best recursive score |
4280 if (!recursiveMatch || recursiveScore > bestRecursiveScore) | 4403 if (!recursiveMatch || recursiveScore > bestRecursiveScore) |
4281 { | 4404 { |
4282 memcpy(bestRecursiveMatches, recursiveMatches, 256); | 4405 memcpy(bestRecursiveMatches, recursiveMatches, |
4406 MAXMATCHES * sizeof(recursiveMatches[0])); | |
4283 bestRecursiveScore = recursiveScore; | 4407 bestRecursiveScore = recursiveScore; |
4284 } | 4408 } |
4285 recursiveMatch = TRUE; | 4409 recursiveMatch = TRUE; |
4286 } | 4410 } |
4287 | 4411 |
4288 // Advance | 4412 // Advance |
4289 matches[nextMatch++] = (char_u)(str - strBegin); | 4413 matches[nextMatch++] = strIdx; |
4290 ++fuzpat; | 4414 if (has_mbyte) |
4291 } | 4415 (void)mb_ptr2char_adv(&fuzpat); |
4292 ++str; | 4416 else |
4417 ++fuzpat; | |
4418 } | |
4419 if (has_mbyte) | |
4420 (void)mb_ptr2char_adv(&str); | |
4421 else | |
4422 ++str; | |
4423 strIdx++; | |
4293 } | 4424 } |
4294 | 4425 |
4295 // Determine if full fuzpat was matched | 4426 // Determine if full fuzpat was matched |
4296 matched = *fuzpat == '\0' ? TRUE : FALSE; | 4427 matched = *fuzpat == NUL ? TRUE : FALSE; |
4297 | 4428 |
4298 // Calculate score | 4429 // Calculate score |
4299 if (matched) | 4430 if (matched) |
4300 { | 4431 *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, |
4301 // bonus for adjacent matches | 4432 nextMatch); |
4302 int sequential_bonus = 15; | |
4303 // bonus if match occurs after a separator | |
4304 int separator_bonus = 30; | |
4305 // bonus if match is uppercase and prev is lower | |
4306 int camel_bonus = 30; | |
4307 // bonus if the first letter is matched | |
4308 int first_letter_bonus = 15; | |
4309 // penalty applied for every letter in str before the first match | |
4310 int leading_letter_penalty = -5; | |
4311 // maximum penalty for leading letters | |
4312 int max_leading_letter_penalty = -15; | |
4313 // penalty for every letter that doesn't matter | |
4314 int unmatched_letter_penalty = -1; | |
4315 int penalty; | |
4316 int unmatched; | |
4317 int i; | |
4318 | |
4319 // Iterate str to end | |
4320 while (*str != '\0') | |
4321 ++str; | |
4322 | |
4323 // Initialize score | |
4324 *outScore = 100; | |
4325 | |
4326 // Apply leading letter penalty | |
4327 penalty = leading_letter_penalty * matches[0]; | |
4328 if (penalty < max_leading_letter_penalty) | |
4329 penalty = max_leading_letter_penalty; | |
4330 *outScore += penalty; | |
4331 | |
4332 // Apply unmatched penalty | |
4333 unmatched = (int)(str - strBegin) - nextMatch; | |
4334 *outScore += unmatched_letter_penalty * unmatched; | |
4335 | |
4336 // Apply ordering bonuses | |
4337 for (i = 0; i < nextMatch; ++i) | |
4338 { | |
4339 char_u currIdx = matches[i]; | |
4340 | |
4341 if (i > 0) | |
4342 { | |
4343 char_u prevIdx = matches[i - 1]; | |
4344 | |
4345 // Sequential | |
4346 if (currIdx == (prevIdx + 1)) | |
4347 *outScore += sequential_bonus; | |
4348 } | |
4349 | |
4350 // Check for bonuses based on neighbor character value | |
4351 if (currIdx > 0) | |
4352 { | |
4353 // Camel case | |
4354 char_u neighbor = strBegin[currIdx - 1]; | |
4355 char_u curr = strBegin[currIdx]; | |
4356 int neighborSeparator; | |
4357 | |
4358 if (islower(neighbor) && isupper(curr)) | |
4359 *outScore += camel_bonus; | |
4360 | |
4361 // Separator | |
4362 neighborSeparator = neighbor == '_' || neighbor == ' '; | |
4363 if (neighborSeparator) | |
4364 *outScore += separator_bonus; | |
4365 } | |
4366 else | |
4367 { | |
4368 // First letter | |
4369 *outScore += first_letter_bonus; | |
4370 } | |
4371 } | |
4372 } | |
4373 | 4433 |
4374 // Return best result | 4434 // Return best result |
4375 if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) | 4435 if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) |
4376 { | 4436 { |
4377 // Recursive score is better than "this" | 4437 // Recursive score is better than "this" |
4378 memcpy(matches, bestRecursiveMatches, maxMatches); | 4438 memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); |
4379 *outScore = bestRecursiveScore; | 4439 *outScore = bestRecursiveScore; |
4380 return TRUE; | 4440 return TRUE; |
4381 } | 4441 } |
4382 else if (matched) | 4442 else if (matched) |
4383 return TRUE; // "this" score is better than recursive | 4443 return TRUE; // "this" score is better than recursive |
4392 * match with highest score. | 4452 * match with highest score. |
4393 * Scores values have no intrinsic meaning. Possible score range is not | 4453 * Scores values have no intrinsic meaning. Possible score range is not |
4394 * normalized and varies with pattern. | 4454 * normalized and varies with pattern. |
4395 * Recursion is limited internally (default=10) to prevent degenerate cases | 4455 * Recursion is limited internally (default=10) to prevent degenerate cases |
4396 * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). | 4456 * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). |
4397 * Uses char_u for match indices. Therefore patterns are limited to 256 | 4457 * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES |
4398 * characters. | 4458 * characters. |
4399 * | 4459 * |
4400 * Returns TRUE if fuzpat is found AND calculates a score. | 4460 * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in |
4461 * 'outScore' and the matching character positions in 'matches'. | |
4401 */ | 4462 */ |
4402 static int | 4463 static int |
4403 fuzzy_match(char_u *str, char_u *fuzpat, int *outScore) | 4464 fuzzy_match( |
4404 { | 4465 char_u *str, |
4405 char_u matches[256]; | 4466 char_u *fuzpat, |
4467 int *outScore, | |
4468 matchidx_T *matches, | |
4469 int maxMatches) | |
4470 { | |
4406 int recursionCount = 0; | 4471 int recursionCount = 0; |
4407 int recursionLimit = 10; | 4472 int len = MB_CHARLEN(str); |
4408 | 4473 |
4409 *outScore = 0; | 4474 *outScore = 0; |
4410 | 4475 |
4411 return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, | 4476 return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, |
4412 sizeof(matches), 0, &recursionCount, recursionLimit); | 4477 matches, maxMatches, 0, &recursionCount); |
4413 } | 4478 } |
4414 | 4479 |
4415 /* | 4480 /* |
4416 * Sort the fuzzy matches in the descending order of the match score. | 4481 * Sort the fuzzy matches in the descending order of the match score. |
4417 */ | 4482 */ |
4423 | 4488 |
4424 return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; | 4489 return v1 == v2 ? 0 : v1 > v2 ? -1 : 1; |
4425 } | 4490 } |
4426 | 4491 |
4427 /* | 4492 /* |
4428 * Fuzzy search the string 'str' in 'strlist' and return the matching strings | 4493 * Fuzzy search the string 'str' in a list of 'items' and return the matching |
4429 * in 'fmatchlist'. | 4494 * strings in 'fmatchlist'. |
4495 * If 'items' is a list of strings, then search for 'str' in the list. | |
4496 * If 'items' is a list of dicts, then either use 'key' to lookup the string | |
4497 * for each item or use 'item_cb' Funcref function to get the string. | |
4498 * If 'retmatchpos' is TRUE, then return a list of positions where 'str' | |
4499 * matches for each item. | |
4430 */ | 4500 */ |
4431 static void | 4501 static void |
4432 match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist) | 4502 match_fuzzy( |
4503 list_T *items, | |
4504 char_u *str, | |
4505 char_u *key, | |
4506 callback_T *item_cb, | |
4507 int retmatchpos, | |
4508 list_T *fmatchlist) | |
4433 { | 4509 { |
4434 long len; | 4510 long len; |
4435 fuzzyItem_T *ptrs; | 4511 fuzzyItem_T *ptrs; |
4436 listitem_T *li; | 4512 listitem_T *li; |
4437 long i = 0; | 4513 long i = 0; |
4438 int found_match = FALSE; | 4514 int found_match = FALSE; |
4439 | 4515 matchidx_T matches[MAXMATCHES]; |
4440 len = list_len(strlist); | 4516 |
4517 len = list_len(items); | |
4441 if (len == 0) | 4518 if (len == 0) |
4442 return; | 4519 return; |
4443 | 4520 |
4444 ptrs = ALLOC_MULT(fuzzyItem_T, len); | 4521 ptrs = ALLOC_CLEAR_MULT(fuzzyItem_T, len); |
4445 if (ptrs == NULL) | 4522 if (ptrs == NULL) |
4446 return; | 4523 return; |
4447 | 4524 |
4448 // For all the string items in strlist, get the fuzzy matching score | 4525 // For all the string items in items, get the fuzzy matching score |
4449 FOR_ALL_LIST_ITEMS(strlist, li) | 4526 FOR_ALL_LIST_ITEMS(items, li) |
4450 { | 4527 { |
4451 int score; | 4528 int score; |
4529 char_u *itemstr; | |
4530 typval_T rettv; | |
4452 | 4531 |
4453 ptrs[i].item = li; | 4532 ptrs[i].item = li; |
4454 ptrs[i].score = -9999; | 4533 ptrs[i].score = SCORE_NONE; |
4455 // ignore non-string items in the list | 4534 itemstr = NULL; |
4456 if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL) | 4535 rettv.v_type = VAR_UNKNOWN; |
4457 if (fuzzy_match(li->li_tv.vval.v_string, str, &score)) | 4536 if (li->li_tv.v_type == VAR_STRING) // list of strings |
4458 { | 4537 itemstr = li->li_tv.vval.v_string; |
4459 ptrs[i].score = score; | 4538 else if (li->li_tv.v_type == VAR_DICT && |
4460 found_match = TRUE; | 4539 (key != NULL || item_cb->cb_name != NULL)) |
4461 } | 4540 { |
4541 // For a dict, either use the specified key to lookup the string or | |
4542 // use the specified callback function to get the string. | |
4543 if (key != NULL) | |
4544 itemstr = dict_get_string(li->li_tv.vval.v_dict, key, FALSE); | |
4545 else | |
4546 { | |
4547 typval_T argv[2]; | |
4548 | |
4549 // Invoke the supplied callback (if any) to get the dict item | |
4550 li->li_tv.vval.v_dict->dv_refcount++; | |
4551 argv[0].v_type = VAR_DICT; | |
4552 argv[0].vval.v_dict = li->li_tv.vval.v_dict; | |
4553 argv[1].v_type = VAR_UNKNOWN; | |
4554 if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) | |
4555 { | |
4556 if (rettv.v_type == VAR_STRING) | |
4557 itemstr = rettv.vval.v_string; | |
4558 } | |
4559 dict_unref(li->li_tv.vval.v_dict); | |
4560 } | |
4561 } | |
4562 | |
4563 if (itemstr != NULL | |
4564 && fuzzy_match(itemstr, str, &score, matches, | |
4565 sizeof(matches) / sizeof(matches[0]))) | |
4566 { | |
4567 // Copy the list of matching positions in itemstr to a list, if | |
4568 // 'retmatchpos' is set. | |
4569 if (retmatchpos) | |
4570 { | |
4571 int j; | |
4572 int strsz; | |
4573 | |
4574 ptrs[i].lmatchpos = list_alloc(); | |
4575 if (ptrs[i].lmatchpos == NULL) | |
4576 goto done; | |
4577 strsz = MB_CHARLEN(str); | |
4578 for (j = 0; j < strsz; j++) | |
4579 { | |
4580 if (list_append_number(ptrs[i].lmatchpos, | |
4581 matches[j]) == FAIL) | |
4582 goto done; | |
4583 } | |
4584 } | |
4585 ptrs[i].score = score; | |
4586 found_match = TRUE; | |
4587 } | |
4462 ++i; | 4588 ++i; |
4589 clear_tv(&rettv); | |
4463 } | 4590 } |
4464 | 4591 |
4465 if (found_match) | 4592 if (found_match) |
4466 { | 4593 { |
4594 list_T *l; | |
4595 | |
4467 // Sort the list by the descending order of the match score | 4596 // Sort the list by the descending order of the match score |
4468 qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), | 4597 qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), |
4469 fuzzy_item_compare); | 4598 fuzzy_item_compare); |
4470 | 4599 |
4471 // Copy the matching strings with 'score != -9999' to the return list | 4600 // For matchfuzzy(), return a list of matched strings. |
4601 // ['str1', 'str2', 'str3'] | |
4602 // For matchfuzzypos(), return a list with two items. | |
4603 // The first item is a list of matched strings. The second item | |
4604 // is a list of lists where each list item is a list of matched | |
4605 // character positions. | |
4606 // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] | |
4607 if (retmatchpos) | |
4608 { | |
4609 li = list_find(fmatchlist, 0); | |
4610 if (li == NULL || li->li_tv.vval.v_list == NULL) | |
4611 goto done; | |
4612 l = li->li_tv.vval.v_list; | |
4613 } | |
4614 else | |
4615 l = fmatchlist; | |
4616 | |
4617 // Copy the matching strings with a valid score to the return list | |
4472 for (i = 0; i < len; i++) | 4618 for (i = 0; i < len; i++) |
4473 { | 4619 { |
4474 if (ptrs[i].score == -9999) | 4620 if (ptrs[i].score == SCORE_NONE) |
4475 break; | 4621 break; |
4476 list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string, | 4622 list_append_tv(l, &ptrs[i].item->li_tv); |
4477 -1); | 4623 } |
4478 } | 4624 |
4479 } | 4625 // next copy the list of matching positions |
4480 | 4626 if (retmatchpos) |
4627 { | |
4628 li = list_find(fmatchlist, -1); | |
4629 if (li == NULL || li->li_tv.vval.v_list == NULL) | |
4630 goto done; | |
4631 l = li->li_tv.vval.v_list; | |
4632 | |
4633 for (i = 0; i < len; i++) | |
4634 { | |
4635 if (ptrs[i].score == SCORE_NONE) | |
4636 break; | |
4637 if (ptrs[i].lmatchpos != NULL && | |
4638 list_append_list(l, ptrs[i].lmatchpos) == FAIL) | |
4639 goto done; | |
4640 } | |
4641 } | |
4642 } | |
4643 | |
4644 done: | |
4481 vim_free(ptrs); | 4645 vim_free(ptrs); |
4646 } | |
4647 | |
4648 /* | |
4649 * Do fuzzy matching. Returns the list of matched strings in 'rettv'. | |
4650 * If 'retmatchpos' is TRUE, also returns the matching character positions. | |
4651 */ | |
4652 static void | |
4653 do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) | |
4654 { | |
4655 callback_T cb; | |
4656 char_u *key = NULL; | |
4657 int ret; | |
4658 | |
4659 CLEAR_POINTER(&cb); | |
4660 | |
4661 // validate and get the arguments | |
4662 if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) | |
4663 { | |
4664 semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); | |
4665 return; | |
4666 } | |
4667 if (argvars[1].v_type != VAR_STRING | |
4668 || argvars[1].vval.v_string == NULL) | |
4669 { | |
4670 semsg(_(e_invarg2), tv_get_string(&argvars[1])); | |
4671 return; | |
4672 } | |
4673 | |
4674 if (argvars[2].v_type != VAR_UNKNOWN) | |
4675 { | |
4676 dict_T *d; | |
4677 dictitem_T *di; | |
4678 | |
4679 if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) | |
4680 { | |
4681 emsg(_(e_dictreq)); | |
4682 return; | |
4683 } | |
4684 | |
4685 // To search a dict, either a callback function or a key can be | |
4686 // specified. | |
4687 d = argvars[2].vval.v_dict; | |
4688 if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) | |
4689 { | |
4690 if (di->di_tv.v_type != VAR_STRING | |
4691 || di->di_tv.vval.v_string == NULL | |
4692 || *di->di_tv.vval.v_string == NUL) | |
4693 { | |
4694 semsg(_(e_invarg2), tv_get_string(&di->di_tv)); | |
4695 return; | |
4696 } | |
4697 key = tv_get_string(&di->di_tv); | |
4698 } | |
4699 else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) | |
4700 { | |
4701 cb = get_callback(&di->di_tv); | |
4702 if (cb.cb_name == NULL) | |
4703 { | |
4704 semsg(_(e_invargval), "text_cb"); | |
4705 return; | |
4706 } | |
4707 } | |
4708 } | |
4709 | |
4710 // get the fuzzy matches | |
4711 ret = rettv_list_alloc(rettv); | |
4712 if (ret != OK) | |
4713 goto done; | |
4714 if (retmatchpos) | |
4715 { | |
4716 list_T *l; | |
4717 | |
4718 // For matchfuzzypos(), a list with two items are returned. First item | |
4719 // is a list of matching strings and the second item is a list of | |
4720 // lists with matching positions within each string. | |
4721 l = list_alloc(); | |
4722 if (l == NULL) | |
4723 goto done; | |
4724 if (list_append_list(rettv->vval.v_list, l) == FAIL) | |
4725 goto done; | |
4726 l = list_alloc(); | |
4727 if (l == NULL) | |
4728 goto done; | |
4729 if (list_append_list(rettv->vval.v_list, l) == FAIL) | |
4730 goto done; | |
4731 } | |
4732 | |
4733 match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key, | |
4734 &cb, retmatchpos, rettv->vval.v_list); | |
4735 | |
4736 done: | |
4737 free_callback(&cb); | |
4482 } | 4738 } |
4483 | 4739 |
4484 /* | 4740 /* |
4485 * "matchfuzzy()" function | 4741 * "matchfuzzy()" function |
4486 */ | 4742 */ |
4487 void | 4743 void |
4488 f_matchfuzzy(typval_T *argvars, typval_T *rettv) | 4744 f_matchfuzzy(typval_T *argvars, typval_T *rettv) |
4489 { | 4745 { |
4490 if (argvars[0].v_type != VAR_LIST) | 4746 do_fuzzymatch(argvars, rettv, FALSE); |
4491 { | 4747 } |
4492 emsg(_(e_listreq)); | 4748 |
4493 return; | 4749 /* |
4494 } | 4750 * "matchfuzzypos()" function |
4495 if (argvars[0].vval.v_list == NULL) | 4751 */ |
4496 return; | 4752 void |
4497 if (argvars[1].v_type != VAR_STRING | 4753 f_matchfuzzypos(typval_T *argvars, typval_T *rettv) |
4498 || argvars[1].vval.v_string == NULL) | 4754 { |
4499 { | 4755 do_fuzzymatch(argvars, rettv, TRUE); |
4500 semsg(_(e_invarg2), tv_get_string(&argvars[1])); | 4756 } |
4501 return; | 4757 |
4502 } | 4758 #endif |
4503 if (rettv_list_alloc(rettv) == OK) | |
4504 match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), | |
4505 rettv->vval.v_list); | |
4506 } | |
4507 | |
4508 #endif |