# HG changeset patch # User Bram Moolenaar # Date 1640700003 -3600 # Node ID b802798e4a26207759755c5d56a68020b67f5ecb # Parent a9a760590e208fac57b64a46050a8cc108a88f6b patch 8.2.3921: the way xdiff is used is inefficient Commit: https://github.com/vim/vim/commit/d9da86e94ea8dbaa056270a666892945c40a6674 Author: Lewis Russell Date: Tue Dec 28 13:54:41 2021 +0000 patch 8.2.3921: the way xdiff is used is inefficient Problem: The way xdiff is used is inefficient. Solution: Use hunk_func instead of the out_line callback. (Lewis Russell, closes #9344) diff --git a/src/diff.c b/src/diff.c --- a/src/diff.c +++ b/src/diff.c @@ -64,6 +64,14 @@ typedef struct { garray_T dout_ga; // used for internal diff } diffout_T; +// used for recording hunks from xdiff +typedef struct { + linenr_T lnum_orig; + long count_orig; + linenr_T lnum_new; + long count_new; +} diffhunk_T; + // two diff inputs and one result typedef struct { diffin_T dio_orig; // original file input @@ -84,12 +92,12 @@ static int diff_cmp(char_u *s1, char_u * #ifdef FEAT_FOLDING static void diff_fold_update(diff_T *dp, int skip_idx); #endif -static void diff_read(int idx_orig, int idx_new, diffout_T *fname); +static void diff_read(int idx_orig, int idx_new, diffio_T *dio); static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new); static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp); -static int parse_diff_ed(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); -static int parse_diff_unified(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); -static int xdiff_out(void *priv, mmbuffer_t *mb, int nbuf); +static int parse_diff_ed(char_u *line, diffhunk_T *hunk); +static int parse_diff_unified(char_u *line, diffhunk_T *hunk); +static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv); #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \ for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next) @@ -880,7 +888,7 @@ diff_try_update( continue; // Read the diff output and add each entry to the diff list. - diff_read(idx_orig, idx_new, &dio->dio_diff); + diff_read(idx_orig, idx_new, dio); clear_diffin(&dio->dio_new); clear_diffout(&dio->dio_diff); @@ -1118,7 +1126,7 @@ diff_file_internal(diffio_T *diffio) emit_cfg.ctxlen = 0; // don't need any diff_context here emit_cb.priv = &diffio->dio_diff; - emit_cb.out_line = xdiff_out; + emit_cfg.hunk_func = xdiff_out; if (xdl_diff(&diffio->dio_orig.din_mmfile, &diffio->dio_new.din_mmfile, ¶m, &emit_cfg, &emit_cb) < 0) @@ -1482,9 +1490,9 @@ diff_win_options( wp->w_p_crb = TRUE; if (!(diff_flags & DIFF_FOLLOWWRAP)) { - if (!wp->w_p_diff) + if (!wp->w_p_diff) wp->w_p_wrap_save = wp->w_p_wrap; - wp->w_p_wrap = FALSE; + wp->w_p_wrap = FALSE; } # ifdef FEAT_FOLDING if (!wp->w_p_diff) @@ -1549,7 +1557,7 @@ ex_diffoff(exarg_T *eap) if (!(diff_flags & DIFF_FOLLOWWRAP)) { if (!wp->w_p_wrap) - wp->w_p_wrap = wp->w_p_wrap_save; + wp->w_p_wrap = wp->w_p_wrap_save; } #ifdef FEAT_FOLDING free_string_option(wp->w_p_fdm); @@ -1607,20 +1615,21 @@ ex_diffoff(exarg_T *eap) diff_read( int idx_orig, // idx of original file int idx_new, // idx of new file - diffout_T *dout) // diff output + diffio_T *dio) // diff output { FILE *fd = NULL; int line_idx = 0; diff_T *dprev = NULL; diff_T *dp = curtab->tp_first_diff; diff_T *dn, *dpl; + diffout_T *dout = &dio->dio_diff; char_u linebuf[LBUFLEN]; // only need to hold the diff line char_u *line; long off; int i; - linenr_T lnum_orig, lnum_new; - long count_orig, count_new; int notset = TRUE; // block "*dp" not set yet + diffhunk_T *hunk; + enum { DIFF_ED, DIFF_UNIFIED, @@ -1641,75 +1650,94 @@ diff_read( } } + if (!dio->dio_internal) + { + hunk = ALLOC_ONE(diffhunk_T); + if (hunk == NULL) + { + emsg(_("E98: Cannot read diff output")); + return; + } + } + for (;;) { - if (fd == NULL) + if (dio->dio_internal) { - if (line_idx >= dout->dout_ga.ga_len) - break; // did last line - line = ((char_u **)dout->dout_ga.ga_data)[line_idx++]; + if (line_idx >= dout->dout_ga.ga_len) { + break; // did last line + } + hunk = ((diffhunk_T **)dout->dout_ga.ga_data)[line_idx++]; } else { - if (vim_fgets(linebuf, LBUFLEN, fd)) - break; // end of file - line = linebuf; - } - - if (diffstyle == DIFF_NONE) - { - // Determine diff style. - // ed like diff looks like this: - // {first}[,{last}]c{first}[,{last}] - // {first}a{first}[,{last}] - // {first}[,{last}]d{first} - // - // unified diff looks like this: - // --- file1 2018-03-20 13:23:35.783153140 +0100 - // +++ file2 2018-03-20 13:23:41.183156066 +0100 - // @@ -1,3 +1,5 @@ - if (isdigit(*line)) - diffstyle = DIFF_ED; - else if ((STRNCMP(line, "@@ ", 3) == 0)) - diffstyle = DIFF_UNIFIED; - else if ((STRNCMP(line, "--- ", 4) == 0) - && (vim_fgets(linebuf, LBUFLEN, fd) == 0) - && (STRNCMP(line, "+++ ", 4) == 0) - && (vim_fgets(linebuf, LBUFLEN, fd) == 0) - && (STRNCMP(line, "@@ ", 3) == 0)) - diffstyle = DIFF_UNIFIED; + if (fd == NULL) + { + if (line_idx >= dout->dout_ga.ga_len) + break; // did last line + line = ((char_u **)dout->dout_ga.ga_data)[line_idx++]; + } else - // Format not recognized yet, skip over this line. Cygwin diff - // may put a warning at the start of the file. - continue; - } - - if (diffstyle == DIFF_ED) - { - if (!isdigit(*line)) - continue; // not the start of a diff block - if (parse_diff_ed(line, &lnum_orig, &count_orig, - &lnum_new, &count_new) == FAIL) - continue; - } - else if (diffstyle == DIFF_UNIFIED) - { - if (STRNCMP(line, "@@ ", 3) != 0) - continue; // not the start of a diff block - if (parse_diff_unified(line, &lnum_orig, &count_orig, - &lnum_new, &count_new) == FAIL) - continue; - } - else - { - emsg(_("E959: Invalid diff format.")); - break; + { + if (vim_fgets(linebuf, LBUFLEN, fd)) + break; // end of file + line = linebuf; + } + + if (diffstyle == DIFF_NONE) + { + // Determine diff style. + // ed like diff looks like this: + // {first}[,{last}]c{first}[,{last}] + // {first}a{first}[,{last}] + // {first}[,{last}]d{first} + // + // unified diff looks like this: + // --- file1 2018-03-20 13:23:35.783153140 +0100 + // +++ file2 2018-03-20 13:23:41.183156066 +0100 + // @@ -1,3 +1,5 @@ + if (isdigit(*line)) + diffstyle = DIFF_ED; + else if ((STRNCMP(line, "@@ ", 3) == 0)) + diffstyle = DIFF_UNIFIED; + else if ((STRNCMP(line, "--- ", 4) == 0) + && (vim_fgets(linebuf, LBUFLEN, fd) == 0) + && (STRNCMP(line, "+++ ", 4) == 0) + && (vim_fgets(linebuf, LBUFLEN, fd) == 0) + && (STRNCMP(line, "@@ ", 3) == 0)) + diffstyle = DIFF_UNIFIED; + else + // Format not recognized yet, skip over this line. Cygwin + // diff may put a warning at the start of the file. + continue; + } + + if (diffstyle == DIFF_ED) + { + if (!isdigit(*line)) + continue; // not the start of a diff block + if (parse_diff_ed(line, hunk) == FAIL) + continue; + } + else if (diffstyle == DIFF_UNIFIED) + { + if (STRNCMP(line, "@@ ", 3) != 0) + continue; // not the start of a diff block + if (parse_diff_unified(line, hunk) == FAIL) + continue; + } + else + { + emsg(_("E959: Invalid diff format.")); + break; + } } // Go over blocks before the change, for which orig and new are equal. // Copy blocks from orig to new. while (dp != NULL - && lnum_orig > dp->df_lnum[idx_orig] + dp->df_count[idx_orig]) + && hunk->lnum_orig > dp->df_lnum[idx_orig] + + dp->df_count[idx_orig]) { if (notset) diff_copy_entry(dprev, dp, idx_orig, idx_new); @@ -1719,41 +1747,43 @@ diff_read( } if (dp != NULL - && lnum_orig <= dp->df_lnum[idx_orig] + dp->df_count[idx_orig] - && lnum_orig + count_orig >= dp->df_lnum[idx_orig]) + && hunk->lnum_orig <= dp->df_lnum[idx_orig] + + dp->df_count[idx_orig] + && hunk->lnum_orig + hunk->count_orig >= dp->df_lnum[idx_orig]) { // New block overlaps with existing block(s). // First find last block that overlaps. for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next) - if (lnum_orig + count_orig < dpl->df_next->df_lnum[idx_orig]) + if (hunk->lnum_orig + hunk->count_orig + < dpl->df_next->df_lnum[idx_orig]) break; // If the newly found block starts before the old one, set the // start back a number of lines. - off = dp->df_lnum[idx_orig] - lnum_orig; + off = dp->df_lnum[idx_orig] - hunk->lnum_orig; if (off > 0) { for (i = idx_orig; i < idx_new; ++i) if (curtab->tp_diffbuf[i] != NULL) dp->df_lnum[i] -= off; - dp->df_lnum[idx_new] = lnum_new; - dp->df_count[idx_new] = count_new; + dp->df_lnum[idx_new] = hunk->lnum_new; + dp->df_count[idx_new] = hunk->count_new; } else if (notset) { // new block inside existing one, adjust new block - dp->df_lnum[idx_new] = lnum_new + off; - dp->df_count[idx_new] = count_new - off; + dp->df_lnum[idx_new] = hunk->lnum_new + off; + dp->df_count[idx_new] = hunk->count_new - off; } else // second overlap of new block with existing block - dp->df_count[idx_new] += count_new - count_orig + dp->df_count[idx_new] += hunk->count_new - hunk->count_orig + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig] - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]); // Adjust the size of the block to include all the lines to the // end of the existing block or the new diff, whatever ends last. - off = (lnum_orig + count_orig) + off = (hunk->lnum_orig + hunk->count_orig) - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]); if (off < 0) { @@ -1785,10 +1815,10 @@ diff_read( if (dp == NULL) goto done; - dp->df_lnum[idx_orig] = lnum_orig; - dp->df_count[idx_orig] = count_orig; - dp->df_lnum[idx_new] = lnum_new; - dp->df_count[idx_new] = count_new; + dp->df_lnum[idx_orig] = hunk->lnum_orig; + dp->df_count[idx_orig] = hunk->count_orig; + dp->df_lnum[idx_new] = hunk->lnum_new; + dp->df_count[idx_new] = hunk->count_new; // Set values for other buffers, these must be equal to the // original buffer, otherwise there would have been a change @@ -1811,6 +1841,9 @@ diff_read( } done: + if (!dio->dio_internal) + vim_free(hunk); + if (fd != NULL) fclose(fd); } @@ -3137,10 +3170,7 @@ diff_lnum_win(linenr_T lnum, win_T *wp) static int parse_diff_ed( char_u *line, - linenr_T *lnum_orig, - long *count_orig, - linenr_T *lnum_new, - long *count_new) + diffhunk_T *hunk) { char_u *p; long f1, l1, f2, l2; @@ -3175,23 +3205,23 @@ parse_diff_ed( if (difftype == 'a') { - *lnum_orig = f1 + 1; - *count_orig = 0; + hunk->lnum_orig = f1 + 1; + hunk->count_orig = 0; } else { - *lnum_orig = f1; - *count_orig = l1 - f1 + 1; + hunk->lnum_orig = f1; + hunk->count_orig = l1 - f1 + 1; } if (difftype == 'd') { - *lnum_new = f2 + 1; - *count_new = 0; + hunk->lnum_new = f2 + 1; + hunk->count_new = 0; } else { - *lnum_new = f2; - *count_new = l2 - f2 + 1; + hunk->lnum_new = f2; + hunk->count_new = l2 - f2 + 1; } return OK; } @@ -3203,10 +3233,7 @@ parse_diff_ed( static int parse_diff_unified( char_u *line, - linenr_T *lnum_orig, - long *count_orig, - linenr_T *lnum_new, - long *count_new) + diffhunk_T *hunk) { char_u *p; long oldline, oldcount, newline, newcount; @@ -3245,10 +3272,10 @@ parse_diff_unified( if (newline == 0) newline = 1; - *lnum_orig = oldline; - *count_orig = oldcount; - *lnum_new = newline; - *count_new = newcount; + hunk->lnum_orig = oldline; + hunk->count_orig = oldcount; + hunk->lnum_new = newline; + hunk->count_new = newcount; return OK; } @@ -3261,26 +3288,27 @@ parse_diff_unified( * Stores the diff output in a grow array. */ static int -xdiff_out(void *priv, mmbuffer_t *mb, int nbuf) +xdiff_out( + long start_a, + long count_a, + long start_b, + long count_b, + void *priv) { diffout_T *dout = (diffout_T *)priv; - char_u *p; - - // The header line always comes by itself, text lines in at least two - // parts. We drop the text part. - if (nbuf > 1) - return 0; - - // sanity check - if (STRNCMP(mb[0].ptr, "@@ ", 3) != 0) - return 0; + diffhunk_T *p = ALLOC_ONE(diffhunk_T); + + if (p == NULL) + return -1; if (ga_grow(&dout->dout_ga, 1) == FAIL) return -1; - p = vim_strnsave((char_u *)mb[0].ptr, mb[0].size); - if (p == NULL) - return -1; - ((char_u **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p; + + p->lnum_orig = start_a + 1; + p->count_orig = count_a; + p->lnum_new = start_b + 1; + p->count_new = count_b; + ((diffhunk_T **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p; return 0; } diff --git a/src/version.c b/src/version.c --- a/src/version.c +++ b/src/version.c @@ -750,6 +750,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ /**/ + 3921, +/**/ 3920, /**/ 3919,