changeset 24547:192058cad081 v8.2.2813

patch 8.2.2813: cannot grep using fuzzy matching Commit: https://github.com/vim/vim/commit/bb01a1ef3a093cdb36877ba73474719c531dc8cb Author: Yegappan Lakshmanan <yegappan@yahoo.com> Date: Mon Apr 26 21:17:52 2021 +0200 patch 8.2.2813: cannot grep using fuzzy matching Problem: Cannot grep using fuzzy matching. Solution: Add the "f" flag to :vimgrep. (Yegappan Lakshmanan, closes https://github.com/vim/vim/issues/8152)
author Bram Moolenaar <Bram@vim.org>
date Mon, 26 Apr 2021 21:30:04 +0200
parents eb6c05ae77f9
children 0d3d3374240a
files runtime/doc/quickfix.txt src/ex_cmds.c src/proto/search.pro src/quickfix.c src/search.c src/testdir/test_quickfix.vim src/version.c src/vim.h
diffstat 8 files changed, 160 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/runtime/doc/quickfix.txt
+++ b/runtime/doc/quickfix.txt
@@ -1008,7 +1008,7 @@ commands can be combined to create a New
 5.1 using Vim's internal grep
 
 					*:vim* *:vimgrep* *E682* *E683*
-:vim[grep][!] /{pattern}/[g][j] {file} ...
+:vim[grep][!] /{pattern}/[g][j][f] {file} ...
 			Search for {pattern} in the files {file} ... and set
 			the error list to the matches.  Files matching
 			'wildignore' are ignored; files in 'suffixes' are
@@ -1059,20 +1059,20 @@ 5.1 using Vim's internal grep
 				:vimgrep Error *.c
 <
 							*:lv* *:lvimgrep*
-:lv[imgrep][!] /{pattern}/[g][j] {file} ...
+:lv[imgrep][!] /{pattern}/[g][j][f] {file} ...
 :lv[imgrep][!] {pattern} {file} ...
 			Same as ":vimgrep", except the location list for the
 			current window is used instead of the quickfix list.
 
 						*:vimgrepa* *:vimgrepadd*
-:vimgrepa[dd][!] /{pattern}/[g][j] {file} ...
+:vimgrepa[dd][!] /{pattern}/[g][j][f] {file} ...
 :vimgrepa[dd][!] {pattern} {file} ...
 			Just like ":vimgrep", but instead of making a new list
 			of errors the matches are appended to the current
 			list.
 
 						*:lvimgrepa* *:lvimgrepadd*
-:lvimgrepa[dd][!] /{pattern}/[g][j] {file} ...
+:lvimgrepa[dd][!] /{pattern}/[g][j][f] {file} ...
 :lvimgrepa[dd][!] {pattern} {file} ...
 			Same as ":vimgrepadd", except the location list for
 			the current window is used instead of the quickfix
--- a/src/ex_cmds.c
+++ b/src/ex_cmds.c
@@ -5288,14 +5288,16 @@ skip_vimgrep_pat(char_u *p, char_u **s, 
 	++p;
 
 	// Find the flags
-	while (*p == 'g' || *p == 'j')
+	while (*p == 'g' || *p == 'j' || *p == 'f')
 	{
 	    if (flags != NULL)
 	    {
 		if (*p == 'g')
 		    *flags |= VGR_GLOBAL;
+		else if (*p == 'j')
+		    *flags |= VGR_NOJUMP;
 		else
-		    *flags |= VGR_NOJUMP;
+		    *flags |= VGR_FUZZY;
 	    }
 	    ++p;
 	}
--- a/src/proto/search.pro
+++ b/src/proto/search.pro
@@ -36,6 +36,7 @@ void find_pattern_in_path(char_u *ptr, i
 spat_T *get_spat(int idx);
 int get_spat_last_idx(void);
 void f_searchcount(typval_T *argvars, typval_T *rettv);
+int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches);
 void f_matchfuzzy(typval_T *argvars, typval_T *rettv);
 void f_matchfuzzypos(typval_T *argvars, typval_T *rettv);
 /* vim: set ft=c : */
--- a/src/quickfix.c
+++ b/src/quickfix.c
@@ -5912,6 +5912,7 @@ vgr_match_buflines(
 	qf_list_T   *qfl,
 	char_u	    *fname,
 	buf_T	    *buf,
+	char_u	    *spat,
 	regmmatch_T *regmatch,
 	long	    *tomatch,
 	int	    duplicate_name,
@@ -5920,45 +5921,91 @@ vgr_match_buflines(
     int		found_match = FALSE;
     long	lnum;
     colnr_T	col;
+    int		pat_len = STRLEN(spat);
 
     for (lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; ++lnum)
     {
 	col = 0;
-	while (vim_regexec_multi(regmatch, curwin, buf, lnum,
-		    col, NULL, NULL) > 0)
+	if (!(flags & VGR_FUZZY))
 	{
-	    // Pass the buffer number so that it gets used even for a
-	    // dummy buffer, unless duplicate_name is set, then the
-	    // buffer will be wiped out below.
-	    if (qf_add_entry(qfl,
-			NULL,       // dir
-			fname,
-			NULL,
-			duplicate_name ? 0 : buf->b_fnum,
-			ml_get_buf(buf,
-			    regmatch->startpos[0].lnum + lnum, FALSE),
-			regmatch->startpos[0].lnum + lnum,
-			regmatch->startpos[0].col + 1,
-			FALSE,      // vis_col
-			NULL,	    // search pattern
-			0,	    // nr
-			0,	    // type
-			TRUE	    // valid
-			) == QF_FAIL)
+	    // Regular expression match
+	    while (vim_regexec_multi(regmatch, curwin, buf, lnum,
+			col, NULL, NULL) > 0)
 	    {
-		got_int = TRUE;
-		break;
+		// Pass the buffer number so that it gets used even for a
+		// dummy buffer, unless duplicate_name is set, then the
+		// buffer will be wiped out below.
+		if (qf_add_entry(qfl,
+			    NULL,	// dir
+			    fname,
+			    NULL,
+			    duplicate_name ? 0 : buf->b_fnum,
+			    ml_get_buf(buf,
+				regmatch->startpos[0].lnum + lnum, FALSE),
+			    regmatch->startpos[0].lnum + lnum,
+			    regmatch->startpos[0].col + 1,
+			    FALSE,	// vis_col
+			    NULL,	// search pattern
+			    0,		// nr
+			    0,		// type
+			    TRUE	// valid
+			    ) == QF_FAIL)
+		{
+		    got_int = TRUE;
+		    break;
+		}
+		found_match = TRUE;
+		if (--*tomatch == 0)
+		    break;
+		if ((flags & VGR_GLOBAL) == 0
+			|| regmatch->endpos[0].lnum > 0)
+		    break;
+		col = regmatch->endpos[0].col
+		    + (col == regmatch->endpos[0].col);
+		if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, FALSE)))
+		    break;
 	    }
-	    found_match = TRUE;
-	    if (--*tomatch == 0)
-		break;
-	    if ((flags & VGR_GLOBAL) == 0
-		    || regmatch->endpos[0].lnum > 0)
-		break;
-	    col = regmatch->endpos[0].col
-		+ (col == regmatch->endpos[0].col);
-	    if (col > (colnr_T)STRLEN(ml_get_buf(buf, lnum, FALSE)))
-		break;
+	}
+	else
+	{
+	    char_u  *str = ml_get_buf(buf, lnum, FALSE);
+	    int	    score;
+	    int_u   matches[MAX_FUZZY_MATCHES];
+	    int_u   sz = sizeof(matches) / sizeof(matches[0]);
+
+	    // Fuzzy string match
+	    while (fuzzy_match(str + col, spat, FALSE, &score, matches, sz) > 0)
+	    {
+		// Pass the buffer number so that it gets used even for a
+		// dummy buffer, unless duplicate_name is set, then the
+		// buffer will be wiped out below.
+		if (qf_add_entry(qfl,
+			    NULL,	// dir
+			    fname,
+			    NULL,
+			    duplicate_name ? 0 : buf->b_fnum,
+			    str,
+			    lnum,
+			    matches[0] + col + 1,
+			    FALSE,	// vis_col
+			    NULL,	// search pattern
+			    0,		// nr
+			    0,		// type
+			    TRUE	// valid
+			    ) == QF_FAIL)
+		{
+		    got_int = TRUE;
+		    break;
+		}
+		found_match = TRUE;
+		if (--*tomatch == 0)
+		    break;
+		if ((flags & VGR_GLOBAL) == 0)
+		    break;
+		col = matches[pat_len - 1] + col + 1;
+		if (col > (colnr_T)STRLEN(str))
+		    break;
+	    }
 	}
 	line_breakcheck();
 	if (got_int)
@@ -6163,7 +6210,7 @@ vgr_process_files(
 	    // Try for a match in all lines of the buffer.
 	    // For ":1vimgrep" look for first match only.
 	    found_match = vgr_match_buflines(qf_get_curlist(qi),
-		    fname, buf, &cmd_args->regmatch,
+		    fname, buf, cmd_args->spat, &cmd_args->regmatch,
 		    &cmd_args->tomatch, duplicate_name, cmd_args->flags);
 
 	    if (using_dummy)
--- a/src/search.c
+++ b/src/search.c
@@ -4285,10 +4285,6 @@ typedef struct
 #define SCORE_NONE	-9999
 
 #define FUZZY_MATCH_RECURSION_LIMIT	10
-// Maximum number of characters that can be fuzzy matched
-#define MAXMATCHES			256
-
-typedef int_u		matchidx_T;
 
 /*
  * Compute a score for a fuzzy matched string. The matching character locations
@@ -4298,7 +4294,7 @@ typedef int_u		matchidx_T;
 fuzzy_match_compute_score(
 	char_u		*str,
 	int		strSz,
-	matchidx_T	*matches,
+	int_u		*matches,
 	int		numMatches)
 {
     int		score;
@@ -4306,7 +4302,7 @@ fuzzy_match_compute_score(
     int		unmatched;
     int		i;
     char_u	*p = str;
-    matchidx_T	sidx = 0;
+    int_u	sidx = 0;
 
     // Initialize score
     score = 100;
@@ -4324,11 +4320,11 @@ fuzzy_match_compute_score(
     // Apply ordering bonuses
     for (i = 0; i < numMatches; ++i)
     {
-	matchidx_T	currIdx = matches[i];
+	int_u	currIdx = matches[i];
 
 	if (i > 0)
 	{
-	    matchidx_T	prevIdx = matches[i - 1];
+	    int_u	prevIdx = matches[i - 1];
 
 	    // Sequential
 	    if (currIdx == (prevIdx + 1))
@@ -4386,19 +4382,19 @@ fuzzy_match_compute_score(
 fuzzy_match_recursive(
 	char_u		*fuzpat,
 	char_u		*str,
-	matchidx_T	strIdx,
+	int_u		strIdx,
 	int		*outScore,
 	char_u		*strBegin,
 	int		strLen,
-	matchidx_T	*srcMatches,
-	matchidx_T	*matches,
+	int_u		*srcMatches,
+	int_u		*matches,
 	int		maxMatches,
 	int		nextMatch,
 	int		*recursionCount)
 {
     // Recursion params
     int		recursiveMatch = FALSE;
-    matchidx_T	bestRecursiveMatches[MAXMATCHES];
+    int_u	bestRecursiveMatches[MAX_FUZZY_MATCHES];
     int		bestRecursiveScore = 0;
     int		first_match;
     int		matched;
@@ -4409,7 +4405,7 @@ fuzzy_match_recursive(
 	return 0;
 
     // Detect end of strings
-    if (*fuzpat == '\0' || *str == '\0')
+    if (*fuzpat == NUL || *str == NUL)
 	return 0;
 
     // Loop through fuzpat and str looking for a match
@@ -4425,7 +4421,7 @@ fuzzy_match_recursive(
 	// Found match
 	if (vim_tolower(c1) == vim_tolower(c2))
 	{
-	    matchidx_T	recursiveMatches[MAXMATCHES];
+	    int_u	recursiveMatches[MAX_FUZZY_MATCHES];
 	    int		recursiveScore = 0;
 	    char_u	*next_char;
 
@@ -4455,7 +4451,7 @@ fuzzy_match_recursive(
 		if (!recursiveMatch || recursiveScore > bestRecursiveScore)
 		{
 		    memcpy(bestRecursiveMatches, recursiveMatches,
-			    MAXMATCHES * sizeof(recursiveMatches[0]));
+			    MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0]));
 		    bestRecursiveScore = recursiveScore;
 		}
 		recursiveMatch = TRUE;
@@ -4506,19 +4502,19 @@ fuzzy_match_recursive(
  * normalized and varies with pattern.
  * Recursion is limited internally (default=10) to prevent degenerate cases
  * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
- * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES
- * characters.
+ * Uses char_u for match indices. Therefore patterns are limited to
+ * MAX_FUZZY_MATCHES characters.
  *
  * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in
  * 'outScore' and the matching character positions in 'matches'.
  */
-    static int
+    int
 fuzzy_match(
 	char_u		*str,
 	char_u		*pat_arg,
 	int		matchseq,
 	int		*outScore,
-	matchidx_T	*matches,
+	int_u		*matches,
 	int		maxMatches)
 {
     int		recursionCount = 0;
@@ -4630,7 +4626,7 @@ fuzzy_match_in_list(
     listitem_T	*li;
     long	i = 0;
     int		found_match = FALSE;
-    matchidx_T	matches[MAXMATCHES];
+    int_u	matches[MAX_FUZZY_MATCHES];
 
     len = list_len(items);
     if (len == 0)
@@ -4847,7 +4843,7 @@ do_fuzzymatch(typval_T *argvars, typval_
 		return;
 	    }
 	}
-	if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL)
+	if (dict_find(d, (char_u *)"matchseq", -1) != NULL)
 	    matchseq = TRUE;
     }
 
--- a/src/testdir/test_quickfix.vim
+++ b/src/testdir/test_quickfix.vim
@@ -32,7 +32,7 @@ func s:setup_commands(cchar)
     command! -count -nargs=* -bang Xnfile <mods><count>cnfile<bang> <args>
     command! -nargs=* -bang Xpfile <mods>cpfile<bang> <args>
     command! -nargs=* Xexpr <mods>cexpr <args>
-    command! -count -nargs=* Xvimgrep <mods> <count>vimgrep <args>
+    command! -count=999 -nargs=* Xvimgrep <mods> <count>vimgrep <args>
     command! -nargs=* Xvimgrepadd <mods> vimgrepadd <args>
     command! -nargs=* Xgrep <mods> grep <args>
     command! -nargs=* Xgrepadd <mods> grepadd <args>
@@ -69,7 +69,7 @@ func s:setup_commands(cchar)
     command! -count -nargs=* -bang Xnfile <mods><count>lnfile<bang> <args>
     command! -nargs=* -bang Xpfile <mods>lpfile<bang> <args>
     command! -nargs=* Xexpr <mods>lexpr <args>
-    command! -count -nargs=* Xvimgrep <mods> <count>lvimgrep <args>
+    command! -count=999 -nargs=* Xvimgrep <mods> <count>lvimgrep <args>
     command! -nargs=* Xvimgrepadd <mods> lvimgrepadd <args>
     command! -nargs=* Xgrep <mods> lgrep <args>
     command! -nargs=* Xgrepadd <mods> lgrepadd <args>
@@ -5372,4 +5372,50 @@ func Test_vimgrep_noswapfile()
   set swapfile
 endfunc
 
+" Test for the :vimgrep 'f' flag (fuzzy match)
+func Xvimgrep_fuzzy_match(cchar)
+  call s:setup_commands(a:cchar)
+
+  Xvimgrep /three one/f Xfile*
+  let l = g:Xgetlist()
+  call assert_equal(2, len(l))
+  call assert_equal(['Xfile1', 1, 9, 'one two three'],
+        \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
+  call assert_equal(['Xfile2', 2, 1, 'three one two'],
+        \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
+
+  Xvimgrep /the/f Xfile*
+  let l = g:Xgetlist()
+  call assert_equal(3, len(l))
+  call assert_equal(['Xfile1', 1, 9, 'one two three'],
+        \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
+  call assert_equal(['Xfile2', 2, 1, 'three one two'],
+        \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
+  call assert_equal(['Xfile2', 4, 4, 'aaathreeaaa'],
+        \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text])
+
+  Xvimgrep /aaa/fg Xfile*
+  let l = g:Xgetlist()
+  call assert_equal(4, len(l))
+  call assert_equal(['Xfile1', 2, 1, 'aaaaaa'],
+        \ [bufname(l[0].bufnr), l[0].lnum, l[0].col, l[0].text])
+  call assert_equal(['Xfile1', 2, 4, 'aaaaaa'],
+        \ [bufname(l[1].bufnr), l[1].lnum, l[1].col, l[1].text])
+  call assert_equal(['Xfile2', 4, 1, 'aaathreeaaa'],
+        \ [bufname(l[2].bufnr), l[2].lnum, l[2].col, l[2].text])
+  call assert_equal(['Xfile2', 4, 9, 'aaathreeaaa'],
+        \ [bufname(l[3].bufnr), l[3].lnum, l[3].col, l[3].text])
+
+  call assert_fails('Xvimgrep /xyz/fg Xfile*', 'E480:')
+endfunc
+
+func Test_vimgrep_fuzzy_match()
+  call writefile(['one two three', 'aaaaaa'], 'Xfile1')
+  call writefile(['one', 'three one two', 'two', 'aaathreeaaa'], 'Xfile2')
+  call Xvimgrep_fuzzy_match('c')
+  call Xvimgrep_fuzzy_match('l')
+  call delete('Xfile1')
+  call delete('Xfile2')
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2813,
+/**/
     2812,
 /**/
     2811,
--- a/src/vim.h
+++ b/src/vim.h
@@ -2457,6 +2457,7 @@ typedef enum {
 // flags for skip_vimgrep_pat()
 #define VGR_GLOBAL	1
 #define VGR_NOJUMP	2
+#define VGR_FUZZY	4
 
 // behavior for bad character, "++bad=" argument
 #define BAD_REPLACE	'?'	// replace it with '?' (default)
@@ -2711,4 +2712,7 @@ long elapsed(DWORD start_tick);
 #define EVAL_VAR_NOAUTOLOAD	2   // do not use script autoloading
 #define EVAL_VAR_IMPORT		4   // may return special variable for import
 
+// Maximum number of characters that can be fuzzy matched
+#define MAX_FUZZY_MATCHES	256
+
 #endif // VIM__H