changeset 26786:b802798e4a26 v8.2.3921

patch 8.2.3921: the way xdiff is used is inefficient Commit: https://github.com/vim/vim/commit/d9da86e94ea8dbaa056270a666892945c40a6674 Author: Lewis Russell <lewis6991@gmail.com> Date: Tue Dec 28 13:54:41 2021 +0000 patch 8.2.3921: the way xdiff is used is inefficient Problem: The way xdiff is used is inefficient. Solution: Use hunk_func instead of the out_line callback. (Lewis Russell, closes #9344)
author Bram Moolenaar <Bram@vim.org>
date Tue, 28 Dec 2021 15:00:03 +0100
parents a9a760590e20
children cd8dfdcab93c
files src/diff.c src/version.c
diffstat 2 files changed, 148 insertions(+), 118 deletions(-) [+]
line wrap: on
line diff
--- a/src/diff.c
+++ b/src/diff.c
@@ -64,6 +64,14 @@ typedef struct {
     garray_T	dout_ga;      // used for internal diff
 } diffout_T;
 
+// used for recording hunks from xdiff
+typedef struct {
+    linenr_T lnum_orig;
+    long     count_orig;
+    linenr_T lnum_new;
+    long     count_new;
+} diffhunk_T;
+
 // two diff inputs and one result
 typedef struct {
     diffin_T	dio_orig;     // original file input
@@ -84,12 +92,12 @@ static int diff_cmp(char_u *s1, char_u *
 #ifdef FEAT_FOLDING
 static void diff_fold_update(diff_T *dp, int skip_idx);
 #endif
-static void diff_read(int idx_orig, int idx_new, diffout_T *fname);
+static void diff_read(int idx_orig, int idx_new, diffio_T *dio);
 static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new);
 static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp);
-static int parse_diff_ed(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new);
-static int parse_diff_unified(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new);
-static int xdiff_out(void *priv, mmbuffer_t *mb, int nbuf);
+static int parse_diff_ed(char_u *line, diffhunk_T *hunk);
+static int parse_diff_unified(char_u *line, diffhunk_T *hunk);
+static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv);
 
 #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \
     for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next)
@@ -880,7 +888,7 @@ diff_try_update(
 	    continue;
 
 	// Read the diff output and add each entry to the diff list.
-	diff_read(idx_orig, idx_new, &dio->dio_diff);
+	diff_read(idx_orig, idx_new, dio);
 
 	clear_diffin(&dio->dio_new);
 	clear_diffout(&dio->dio_diff);
@@ -1118,7 +1126,7 @@ diff_file_internal(diffio_T *diffio)
 
     emit_cfg.ctxlen = 0; // don't need any diff_context here
     emit_cb.priv = &diffio->dio_diff;
-    emit_cb.out_line = xdiff_out;
+    emit_cfg.hunk_func = xdiff_out;
     if (xdl_diff(&diffio->dio_orig.din_mmfile,
 		&diffio->dio_new.din_mmfile,
 		&param, &emit_cfg, &emit_cb) < 0)
@@ -1482,9 +1490,9 @@ diff_win_options(
     wp->w_p_crb = TRUE;
     if (!(diff_flags & DIFF_FOLLOWWRAP))
     {
-        if (!wp->w_p_diff)
+	if (!wp->w_p_diff)
 	    wp->w_p_wrap_save = wp->w_p_wrap;
-        wp->w_p_wrap = FALSE;
+	wp->w_p_wrap = FALSE;
     }
 # ifdef FEAT_FOLDING
     if (!wp->w_p_diff)
@@ -1549,7 +1557,7 @@ ex_diffoff(exarg_T *eap)
 		if (!(diff_flags & DIFF_FOLLOWWRAP))
 		{
 		    if (!wp->w_p_wrap)
-		        wp->w_p_wrap = wp->w_p_wrap_save;
+			wp->w_p_wrap = wp->w_p_wrap_save;
 		}
 #ifdef FEAT_FOLDING
 		free_string_option(wp->w_p_fdm);
@@ -1607,20 +1615,21 @@ ex_diffoff(exarg_T *eap)
 diff_read(
     int		idx_orig,	// idx of original file
     int		idx_new,	// idx of new file
-    diffout_T	*dout)		// diff output
+    diffio_T   *dio)		// diff output
 {
     FILE	*fd = NULL;
     int		line_idx = 0;
     diff_T	*dprev = NULL;
     diff_T	*dp = curtab->tp_first_diff;
     diff_T	*dn, *dpl;
+    diffout_T   *dout = &dio->dio_diff;
     char_u	linebuf[LBUFLEN];   // only need to hold the diff line
     char_u	*line;
     long	off;
     int		i;
-    linenr_T	lnum_orig, lnum_new;
-    long	count_orig, count_new;
     int		notset = TRUE;	    // block "*dp" not set yet
+    diffhunk_T	*hunk;
+
     enum {
 	DIFF_ED,
 	DIFF_UNIFIED,
@@ -1641,75 +1650,94 @@ diff_read(
 	}
     }
 
+    if (!dio->dio_internal)
+    {
+	hunk = ALLOC_ONE(diffhunk_T);
+	if (hunk == NULL)
+	{
+	    emsg(_("E98: Cannot read diff output"));
+	    return;
+	}
+    }
+
     for (;;)
     {
-	if (fd == NULL)
+	if (dio->dio_internal)
 	{
-	    if (line_idx >= dout->dout_ga.ga_len)
-		break;	    // did last line
-	    line = ((char_u **)dout->dout_ga.ga_data)[line_idx++];
+	    if (line_idx >= dout->dout_ga.ga_len) {
+		break;      // did last line
+	    }
+	    hunk = ((diffhunk_T **)dout->dout_ga.ga_data)[line_idx++];
 	}
 	else
 	{
-	    if (vim_fgets(linebuf, LBUFLEN, fd))
-		break;		// end of file
-	    line = linebuf;
-	}
-
-	if (diffstyle == DIFF_NONE)
-	{
-	    // Determine diff style.
-	    // ed like diff looks like this:
-	    // {first}[,{last}]c{first}[,{last}]
-	    // {first}a{first}[,{last}]
-	    // {first}[,{last}]d{first}
-	    //
-	    // unified diff looks like this:
-	    // --- file1       2018-03-20 13:23:35.783153140 +0100
-	    // +++ file2       2018-03-20 13:23:41.183156066 +0100
-	    // @@ -1,3 +1,5 @@
-	    if (isdigit(*line))
-		diffstyle = DIFF_ED;
-	    else if ((STRNCMP(line, "@@ ", 3) == 0))
-	       diffstyle = DIFF_UNIFIED;
-	    else if ((STRNCMP(line, "--- ", 4) == 0)
-		    && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
-		    && (STRNCMP(line, "+++ ", 4) == 0)
-		    && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
-		    && (STRNCMP(line, "@@ ", 3) == 0))
-		diffstyle = DIFF_UNIFIED;
+	    if (fd == NULL)
+	    {
+		if (line_idx >= dout->dout_ga.ga_len)
+		    break;	    // did last line
+		line = ((char_u **)dout->dout_ga.ga_data)[line_idx++];
+	    }
 	    else
-		// Format not recognized yet, skip over this line.  Cygwin diff
-		// may put a warning at the start of the file.
-		continue;
-	}
-
-	if (diffstyle == DIFF_ED)
-	{
-	    if (!isdigit(*line))
-		continue;	// not the start of a diff block
-	    if (parse_diff_ed(line, &lnum_orig, &count_orig,
-						&lnum_new, &count_new) == FAIL)
-		continue;
-	}
-	else if (diffstyle == DIFF_UNIFIED)
-	{
-	    if (STRNCMP(line, "@@ ", 3)  != 0)
-		continue;	// not the start of a diff block
-	    if (parse_diff_unified(line, &lnum_orig, &count_orig,
-						&lnum_new, &count_new) == FAIL)
-		continue;
-	}
-	else
-	{
-	    emsg(_("E959: Invalid diff format."));
-	    break;
+	    {
+		if (vim_fgets(linebuf, LBUFLEN, fd))
+		    break;		// end of file
+		line = linebuf;
+	    }
+
+	    if (diffstyle == DIFF_NONE)
+	    {
+		// Determine diff style.
+		// ed like diff looks like this:
+		// {first}[,{last}]c{first}[,{last}]
+		// {first}a{first}[,{last}]
+		// {first}[,{last}]d{first}
+		//
+		// unified diff looks like this:
+		// --- file1       2018-03-20 13:23:35.783153140 +0100
+		// +++ file2       2018-03-20 13:23:41.183156066 +0100
+		// @@ -1,3 +1,5 @@
+		if (isdigit(*line))
+		    diffstyle = DIFF_ED;
+		else if ((STRNCMP(line, "@@ ", 3) == 0))
+		    diffstyle = DIFF_UNIFIED;
+		else if ((STRNCMP(line, "--- ", 4) == 0)
+			&& (vim_fgets(linebuf, LBUFLEN, fd) == 0)
+			&& (STRNCMP(line, "+++ ", 4) == 0)
+			&& (vim_fgets(linebuf, LBUFLEN, fd) == 0)
+			&& (STRNCMP(line, "@@ ", 3) == 0))
+		    diffstyle = DIFF_UNIFIED;
+		else
+		    // Format not recognized yet, skip over this line.  Cygwin
+		    // diff may put a warning at the start of the file.
+		    continue;
+	    }
+
+	    if (diffstyle == DIFF_ED)
+	    {
+		if (!isdigit(*line))
+		    continue;	// not the start of a diff block
+		if (parse_diff_ed(line, hunk) == FAIL)
+		    continue;
+	    }
+	    else if (diffstyle == DIFF_UNIFIED)
+	    {
+		if (STRNCMP(line, "@@ ", 3)  != 0)
+		    continue;	// not the start of a diff block
+		if (parse_diff_unified(line, hunk) == FAIL)
+		    continue;
+	    }
+	    else
+	    {
+		emsg(_("E959: Invalid diff format."));
+		break;
+	    }
 	}
 
 	// Go over blocks before the change, for which orig and new are equal.
 	// Copy blocks from orig to new.
 	while (dp != NULL
-		&& lnum_orig > dp->df_lnum[idx_orig] + dp->df_count[idx_orig])
+		&& hunk->lnum_orig > dp->df_lnum[idx_orig]
+						      + dp->df_count[idx_orig])
 	{
 	    if (notset)
 		diff_copy_entry(dprev, dp, idx_orig, idx_new);
@@ -1719,41 +1747,43 @@ diff_read(
 	}
 
 	if (dp != NULL
-		&& lnum_orig <= dp->df_lnum[idx_orig] + dp->df_count[idx_orig]
-		&& lnum_orig + count_orig >= dp->df_lnum[idx_orig])
+		&& hunk->lnum_orig <= dp->df_lnum[idx_orig]
+						       + dp->df_count[idx_orig]
+		&& hunk->lnum_orig + hunk->count_orig >= dp->df_lnum[idx_orig])
 	{
 	    // New block overlaps with existing block(s).
 	    // First find last block that overlaps.
 	    for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next)
-		if (lnum_orig + count_orig < dpl->df_next->df_lnum[idx_orig])
+		if (hunk->lnum_orig + hunk->count_orig
+					     < dpl->df_next->df_lnum[idx_orig])
 		    break;
 
 	    // If the newly found block starts before the old one, set the
 	    // start back a number of lines.
-	    off = dp->df_lnum[idx_orig] - lnum_orig;
+	    off = dp->df_lnum[idx_orig] - hunk->lnum_orig;
 	    if (off > 0)
 	    {
 		for (i = idx_orig; i < idx_new; ++i)
 		    if (curtab->tp_diffbuf[i] != NULL)
 			dp->df_lnum[i] -= off;
-		dp->df_lnum[idx_new] = lnum_new;
-		dp->df_count[idx_new] = count_new;
+		dp->df_lnum[idx_new] = hunk->lnum_new;
+		dp->df_count[idx_new] = hunk->count_new;
 	    }
 	    else if (notset)
 	    {
 		// new block inside existing one, adjust new block
-		dp->df_lnum[idx_new] = lnum_new + off;
-		dp->df_count[idx_new] = count_new - off;
+		dp->df_lnum[idx_new] = hunk->lnum_new + off;
+		dp->df_count[idx_new] = hunk->count_new - off;
 	    }
 	    else
 		// second overlap of new block with existing block
-		dp->df_count[idx_new] += count_new - count_orig
+		dp->df_count[idx_new] += hunk->count_new - hunk->count_orig
 		    + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]
 		    - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]);
 
 	    // Adjust the size of the block to include all the lines to the
 	    // end of the existing block or the new diff, whatever ends last.
-	    off = (lnum_orig + count_orig)
+	    off = (hunk->lnum_orig + hunk->count_orig)
 			 - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]);
 	    if (off < 0)
 	    {
@@ -1785,10 +1815,10 @@ diff_read(
 	    if (dp == NULL)
 		goto done;
 
-	    dp->df_lnum[idx_orig] = lnum_orig;
-	    dp->df_count[idx_orig] = count_orig;
-	    dp->df_lnum[idx_new] = lnum_new;
-	    dp->df_count[idx_new] = count_new;
+	    dp->df_lnum[idx_orig] = hunk->lnum_orig;
+	    dp->df_count[idx_orig] = hunk->count_orig;
+	    dp->df_lnum[idx_new] = hunk->lnum_new;
+	    dp->df_count[idx_new] = hunk->count_new;
 
 	    // Set values for other buffers, these must be equal to the
 	    // original buffer, otherwise there would have been a change
@@ -1811,6 +1841,9 @@ diff_read(
     }
 
 done:
+    if (!dio->dio_internal)
+	vim_free(hunk);
+
     if (fd != NULL)
 	fclose(fd);
 }
@@ -3137,10 +3170,7 @@ diff_lnum_win(linenr_T lnum, win_T *wp)
     static int
 parse_diff_ed(
 	char_u	    *line,
-	linenr_T    *lnum_orig,
-	long	    *count_orig,
-	linenr_T    *lnum_new,
-	long	    *count_new)
+	diffhunk_T  *hunk)
 {
     char_u *p;
     long    f1, l1, f2, l2;
@@ -3175,23 +3205,23 @@ parse_diff_ed(
 
     if (difftype == 'a')
     {
-	*lnum_orig = f1 + 1;
-	*count_orig = 0;
+	hunk->lnum_orig = f1 + 1;
+	hunk->count_orig = 0;
     }
     else
     {
-	*lnum_orig = f1;
-	*count_orig = l1 - f1 + 1;
+	hunk->lnum_orig = f1;
+	hunk->count_orig = l1 - f1 + 1;
     }
     if (difftype == 'd')
     {
-	*lnum_new = f2 + 1;
-	*count_new = 0;
+	hunk->lnum_new = f2 + 1;
+	hunk->count_new = 0;
     }
     else
     {
-	*lnum_new = f2;
-	*count_new = l2 - f2 + 1;
+	hunk->lnum_new = f2;
+	hunk->count_new = l2 - f2 + 1;
     }
     return OK;
 }
@@ -3203,10 +3233,7 @@ parse_diff_ed(
     static int
 parse_diff_unified(
 	char_u	    *line,
-	linenr_T    *lnum_orig,
-	long	    *count_orig,
-	linenr_T    *lnum_new,
-	long	    *count_new)
+	diffhunk_T  *hunk)
 {
     char_u *p;
     long    oldline, oldcount, newline, newcount;
@@ -3245,10 +3272,10 @@ parse_diff_unified(
 	if (newline == 0)
 	    newline = 1;
 
-	*lnum_orig = oldline;
-	*count_orig = oldcount;
-	*lnum_new = newline;
-	*count_new = newcount;
+	hunk->lnum_orig = oldline;
+	hunk->count_orig = oldcount;
+	hunk->lnum_new = newline;
+	hunk->count_new = newcount;
 
 	return OK;
     }
@@ -3261,26 +3288,27 @@ parse_diff_unified(
  * Stores the diff output in a grow array.
  */
     static int
-xdiff_out(void *priv, mmbuffer_t *mb, int nbuf)
+xdiff_out(
+	long start_a,
+	long count_a,
+	long start_b,
+	long count_b,
+	void *priv)
 {
     diffout_T	*dout = (diffout_T *)priv;
-    char_u	*p;
-
-    // The header line always comes by itself, text lines in at least two
-    // parts.  We drop the text part.
-    if (nbuf > 1)
-	return 0;
-
-    // sanity check
-    if (STRNCMP(mb[0].ptr, "@@ ", 3)  != 0)
-	return 0;
+    diffhunk_T *p = ALLOC_ONE(diffhunk_T);
+
+    if (p == NULL)
+	return -1;
 
     if (ga_grow(&dout->dout_ga, 1) == FAIL)
 	return -1;
-    p = vim_strnsave((char_u *)mb[0].ptr, mb[0].size);
-    if (p == NULL)
-	return -1;
-    ((char_u **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p;
+
+    p->lnum_orig  = start_a + 1;
+    p->count_orig = count_a;
+    p->lnum_new   = start_b + 1;
+    p->count_new  = count_b;
+    ((diffhunk_T **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p;
     return 0;
 }
 
--- a/src/version.c
+++ b/src/version.c
@@ -750,6 +750,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    3921,
+/**/
     3920,
 /**/
     3919,