patch 8.2.3921: the way xdiff is used is inefficient

Problem:    The way xdiff is used is inefficient.
Solution:   Use hunk_func instead of the out_line callback. (Lewis Russell,
            closes #9344)
This commit is contained in:
Lewis Russell
2021-12-28 13:54:41 +00:00
committed by Bram Moolenaar
parent 90c317f224
commit d9da86e94e
2 changed files with 146 additions and 116 deletions

View File

@ -64,6 +64,14 @@ typedef struct {
garray_T dout_ga; // used for internal diff garray_T dout_ga; // used for internal diff
} diffout_T; } diffout_T;
// used for recording hunks from xdiff
typedef struct {
linenr_T lnum_orig;
long count_orig;
linenr_T lnum_new;
long count_new;
} diffhunk_T;
// two diff inputs and one result // two diff inputs and one result
typedef struct { typedef struct {
diffin_T dio_orig; // original file input diffin_T dio_orig; // original file input
@ -84,12 +92,12 @@ static int diff_cmp(char_u *s1, char_u *s2);
#ifdef FEAT_FOLDING #ifdef FEAT_FOLDING
static void diff_fold_update(diff_T *dp, int skip_idx); static void diff_fold_update(diff_T *dp, int skip_idx);
#endif #endif
static void diff_read(int idx_orig, int idx_new, diffout_T *fname); static void diff_read(int idx_orig, int idx_new, diffio_T *dio);
static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new); static void diff_copy_entry(diff_T *dprev, diff_T *dp, int idx_orig, int idx_new);
static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp); static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp);
static int parse_diff_ed(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); static int parse_diff_ed(char_u *line, diffhunk_T *hunk);
static int parse_diff_unified(char_u *line, linenr_T *lnum_orig, long *count_orig, linenr_T *lnum_new, long *count_new); static int parse_diff_unified(char_u *line, diffhunk_T *hunk);
static int xdiff_out(void *priv, mmbuffer_t *mb, int nbuf); static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv);
#define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \ #define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \
for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next) for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next)
@ -880,7 +888,7 @@ diff_try_update(
continue; continue;
// Read the diff output and add each entry to the diff list. // Read the diff output and add each entry to the diff list.
diff_read(idx_orig, idx_new, &dio->dio_diff); diff_read(idx_orig, idx_new, dio);
clear_diffin(&dio->dio_new); clear_diffin(&dio->dio_new);
clear_diffout(&dio->dio_diff); clear_diffout(&dio->dio_diff);
@ -1118,7 +1126,7 @@ diff_file_internal(diffio_T *diffio)
emit_cfg.ctxlen = 0; // don't need any diff_context here emit_cfg.ctxlen = 0; // don't need any diff_context here
emit_cb.priv = &diffio->dio_diff; emit_cb.priv = &diffio->dio_diff;
emit_cb.out_line = xdiff_out; emit_cfg.hunk_func = xdiff_out;
if (xdl_diff(&diffio->dio_orig.din_mmfile, if (xdl_diff(&diffio->dio_orig.din_mmfile,
&diffio->dio_new.din_mmfile, &diffio->dio_new.din_mmfile,
&param, &emit_cfg, &emit_cb) < 0) &param, &emit_cfg, &emit_cb) < 0)
@ -1482,9 +1490,9 @@ diff_win_options(
wp->w_p_crb = TRUE; wp->w_p_crb = TRUE;
if (!(diff_flags & DIFF_FOLLOWWRAP)) if (!(diff_flags & DIFF_FOLLOWWRAP))
{ {
if (!wp->w_p_diff) if (!wp->w_p_diff)
wp->w_p_wrap_save = wp->w_p_wrap; wp->w_p_wrap_save = wp->w_p_wrap;
wp->w_p_wrap = FALSE; wp->w_p_wrap = FALSE;
} }
# ifdef FEAT_FOLDING # ifdef FEAT_FOLDING
if (!wp->w_p_diff) if (!wp->w_p_diff)
@ -1549,7 +1557,7 @@ ex_diffoff(exarg_T *eap)
if (!(diff_flags & DIFF_FOLLOWWRAP)) if (!(diff_flags & DIFF_FOLLOWWRAP))
{ {
if (!wp->w_p_wrap) if (!wp->w_p_wrap)
wp->w_p_wrap = wp->w_p_wrap_save; wp->w_p_wrap = wp->w_p_wrap_save;
} }
#ifdef FEAT_FOLDING #ifdef FEAT_FOLDING
free_string_option(wp->w_p_fdm); free_string_option(wp->w_p_fdm);
@ -1607,20 +1615,21 @@ ex_diffoff(exarg_T *eap)
diff_read( diff_read(
int idx_orig, // idx of original file int idx_orig, // idx of original file
int idx_new, // idx of new file int idx_new, // idx of new file
diffout_T *dout) // diff output diffio_T *dio) // diff output
{ {
FILE *fd = NULL; FILE *fd = NULL;
int line_idx = 0; int line_idx = 0;
diff_T *dprev = NULL; diff_T *dprev = NULL;
diff_T *dp = curtab->tp_first_diff; diff_T *dp = curtab->tp_first_diff;
diff_T *dn, *dpl; diff_T *dn, *dpl;
diffout_T *dout = &dio->dio_diff;
char_u linebuf[LBUFLEN]; // only need to hold the diff line char_u linebuf[LBUFLEN]; // only need to hold the diff line
char_u *line; char_u *line;
long off; long off;
int i; int i;
linenr_T lnum_orig, lnum_new;
long count_orig, count_new;
int notset = TRUE; // block "*dp" not set yet int notset = TRUE; // block "*dp" not set yet
diffhunk_T *hunk;
enum { enum {
DIFF_ED, DIFF_ED,
DIFF_UNIFIED, DIFF_UNIFIED,
@ -1641,75 +1650,94 @@ diff_read(
} }
} }
if (!dio->dio_internal)
{
hunk = ALLOC_ONE(diffhunk_T);
if (hunk == NULL)
{
emsg(_("E98: Cannot read diff output"));
return;
}
}
for (;;) for (;;)
{ {
if (fd == NULL) if (dio->dio_internal)
{ {
if (line_idx >= dout->dout_ga.ga_len) if (line_idx >= dout->dout_ga.ga_len) {
break; // did last line break; // did last line
line = ((char_u **)dout->dout_ga.ga_data)[line_idx++]; }
hunk = ((diffhunk_T **)dout->dout_ga.ga_data)[line_idx++];
} }
else else
{ {
if (vim_fgets(linebuf, LBUFLEN, fd)) if (fd == NULL)
break; // end of file {
line = linebuf; if (line_idx >= dout->dout_ga.ga_len)
} break; // did last line
line = ((char_u **)dout->dout_ga.ga_data)[line_idx++];
if (diffstyle == DIFF_NONE) }
{
// Determine diff style.
// ed like diff looks like this:
// {first}[,{last}]c{first}[,{last}]
// {first}a{first}[,{last}]
// {first}[,{last}]d{first}
//
// unified diff looks like this:
// --- file1 2018-03-20 13:23:35.783153140 +0100
// +++ file2 2018-03-20 13:23:41.183156066 +0100
// @@ -1,3 +1,5 @@
if (isdigit(*line))
diffstyle = DIFF_ED;
else if ((STRNCMP(line, "@@ ", 3) == 0))
diffstyle = DIFF_UNIFIED;
else if ((STRNCMP(line, "--- ", 4) == 0)
&& (vim_fgets(linebuf, LBUFLEN, fd) == 0)
&& (STRNCMP(line, "+++ ", 4) == 0)
&& (vim_fgets(linebuf, LBUFLEN, fd) == 0)
&& (STRNCMP(line, "@@ ", 3) == 0))
diffstyle = DIFF_UNIFIED;
else else
// Format not recognized yet, skip over this line. Cygwin diff {
// may put a warning at the start of the file. if (vim_fgets(linebuf, LBUFLEN, fd))
continue; break; // end of file
} line = linebuf;
}
if (diffstyle == DIFF_ED) if (diffstyle == DIFF_NONE)
{ {
if (!isdigit(*line)) // Determine diff style.
continue; // not the start of a diff block // ed like diff looks like this:
if (parse_diff_ed(line, &lnum_orig, &count_orig, // {first}[,{last}]c{first}[,{last}]
&lnum_new, &count_new) == FAIL) // {first}a{first}[,{last}]
continue; // {first}[,{last}]d{first}
} //
else if (diffstyle == DIFF_UNIFIED) // unified diff looks like this:
{ // --- file1 2018-03-20 13:23:35.783153140 +0100
if (STRNCMP(line, "@@ ", 3) != 0) // +++ file2 2018-03-20 13:23:41.183156066 +0100
continue; // not the start of a diff block // @@ -1,3 +1,5 @@
if (parse_diff_unified(line, &lnum_orig, &count_orig, if (isdigit(*line))
&lnum_new, &count_new) == FAIL) diffstyle = DIFF_ED;
continue; else if ((STRNCMP(line, "@@ ", 3) == 0))
} diffstyle = DIFF_UNIFIED;
else else if ((STRNCMP(line, "--- ", 4) == 0)
{ && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
emsg(_("E959: Invalid diff format.")); && (STRNCMP(line, "+++ ", 4) == 0)
break; && (vim_fgets(linebuf, LBUFLEN, fd) == 0)
&& (STRNCMP(line, "@@ ", 3) == 0))
diffstyle = DIFF_UNIFIED;
else
// Format not recognized yet, skip over this line. Cygwin
// diff may put a warning at the start of the file.
continue;
}
if (diffstyle == DIFF_ED)
{
if (!isdigit(*line))
continue; // not the start of a diff block
if (parse_diff_ed(line, hunk) == FAIL)
continue;
}
else if (diffstyle == DIFF_UNIFIED)
{
if (STRNCMP(line, "@@ ", 3) != 0)
continue; // not the start of a diff block
if (parse_diff_unified(line, hunk) == FAIL)
continue;
}
else
{
emsg(_("E959: Invalid diff format."));
break;
}
} }
// Go over blocks before the change, for which orig and new are equal. // Go over blocks before the change, for which orig and new are equal.
// Copy blocks from orig to new. // Copy blocks from orig to new.
while (dp != NULL while (dp != NULL
&& lnum_orig > dp->df_lnum[idx_orig] + dp->df_count[idx_orig]) && hunk->lnum_orig > dp->df_lnum[idx_orig]
+ dp->df_count[idx_orig])
{ {
if (notset) if (notset)
diff_copy_entry(dprev, dp, idx_orig, idx_new); diff_copy_entry(dprev, dp, idx_orig, idx_new);
@ -1719,41 +1747,43 @@ diff_read(
} }
if (dp != NULL if (dp != NULL
&& lnum_orig <= dp->df_lnum[idx_orig] + dp->df_count[idx_orig] && hunk->lnum_orig <= dp->df_lnum[idx_orig]
&& lnum_orig + count_orig >= dp->df_lnum[idx_orig]) + dp->df_count[idx_orig]
&& hunk->lnum_orig + hunk->count_orig >= dp->df_lnum[idx_orig])
{ {
// New block overlaps with existing block(s). // New block overlaps with existing block(s).
// First find last block that overlaps. // First find last block that overlaps.
for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next) for (dpl = dp; dpl->df_next != NULL; dpl = dpl->df_next)
if (lnum_orig + count_orig < dpl->df_next->df_lnum[idx_orig]) if (hunk->lnum_orig + hunk->count_orig
< dpl->df_next->df_lnum[idx_orig])
break; break;
// If the newly found block starts before the old one, set the // If the newly found block starts before the old one, set the
// start back a number of lines. // start back a number of lines.
off = dp->df_lnum[idx_orig] - lnum_orig; off = dp->df_lnum[idx_orig] - hunk->lnum_orig;
if (off > 0) if (off > 0)
{ {
for (i = idx_orig; i < idx_new; ++i) for (i = idx_orig; i < idx_new; ++i)
if (curtab->tp_diffbuf[i] != NULL) if (curtab->tp_diffbuf[i] != NULL)
dp->df_lnum[i] -= off; dp->df_lnum[i] -= off;
dp->df_lnum[idx_new] = lnum_new; dp->df_lnum[idx_new] = hunk->lnum_new;
dp->df_count[idx_new] = count_new; dp->df_count[idx_new] = hunk->count_new;
} }
else if (notset) else if (notset)
{ {
// new block inside existing one, adjust new block // new block inside existing one, adjust new block
dp->df_lnum[idx_new] = lnum_new + off; dp->df_lnum[idx_new] = hunk->lnum_new + off;
dp->df_count[idx_new] = count_new - off; dp->df_count[idx_new] = hunk->count_new - off;
} }
else else
// second overlap of new block with existing block // second overlap of new block with existing block
dp->df_count[idx_new] += count_new - count_orig dp->df_count[idx_new] += hunk->count_new - hunk->count_orig
+ dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig] + dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]
- (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]); - (dp->df_lnum[idx_orig] + dp->df_count[idx_orig]);
// Adjust the size of the block to include all the lines to the // Adjust the size of the block to include all the lines to the
// end of the existing block or the new diff, whatever ends last. // end of the existing block or the new diff, whatever ends last.
off = (lnum_orig + count_orig) off = (hunk->lnum_orig + hunk->count_orig)
- (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]); - (dpl->df_lnum[idx_orig] + dpl->df_count[idx_orig]);
if (off < 0) if (off < 0)
{ {
@ -1785,10 +1815,10 @@ diff_read(
if (dp == NULL) if (dp == NULL)
goto done; goto done;
dp->df_lnum[idx_orig] = lnum_orig; dp->df_lnum[idx_orig] = hunk->lnum_orig;
dp->df_count[idx_orig] = count_orig; dp->df_count[idx_orig] = hunk->count_orig;
dp->df_lnum[idx_new] = lnum_new; dp->df_lnum[idx_new] = hunk->lnum_new;
dp->df_count[idx_new] = count_new; dp->df_count[idx_new] = hunk->count_new;
// Set values for other buffers, these must be equal to the // Set values for other buffers, these must be equal to the
// original buffer, otherwise there would have been a change // original buffer, otherwise there would have been a change
@ -1811,6 +1841,9 @@ diff_read(
} }
done: done:
if (!dio->dio_internal)
vim_free(hunk);
if (fd != NULL) if (fd != NULL)
fclose(fd); fclose(fd);
} }
@ -3137,10 +3170,7 @@ diff_lnum_win(linenr_T lnum, win_T *wp)
static int static int
parse_diff_ed( parse_diff_ed(
char_u *line, char_u *line,
linenr_T *lnum_orig, diffhunk_T *hunk)
long *count_orig,
linenr_T *lnum_new,
long *count_new)
{ {
char_u *p; char_u *p;
long f1, l1, f2, l2; long f1, l1, f2, l2;
@ -3175,23 +3205,23 @@ parse_diff_ed(
if (difftype == 'a') if (difftype == 'a')
{ {
*lnum_orig = f1 + 1; hunk->lnum_orig = f1 + 1;
*count_orig = 0; hunk->count_orig = 0;
} }
else else
{ {
*lnum_orig = f1; hunk->lnum_orig = f1;
*count_orig = l1 - f1 + 1; hunk->count_orig = l1 - f1 + 1;
} }
if (difftype == 'd') if (difftype == 'd')
{ {
*lnum_new = f2 + 1; hunk->lnum_new = f2 + 1;
*count_new = 0; hunk->count_new = 0;
} }
else else
{ {
*lnum_new = f2; hunk->lnum_new = f2;
*count_new = l2 - f2 + 1; hunk->count_new = l2 - f2 + 1;
} }
return OK; return OK;
} }
@ -3203,10 +3233,7 @@ parse_diff_ed(
static int static int
parse_diff_unified( parse_diff_unified(
char_u *line, char_u *line,
linenr_T *lnum_orig, diffhunk_T *hunk)
long *count_orig,
linenr_T *lnum_new,
long *count_new)
{ {
char_u *p; char_u *p;
long oldline, oldcount, newline, newcount; long oldline, oldcount, newline, newcount;
@ -3245,10 +3272,10 @@ parse_diff_unified(
if (newline == 0) if (newline == 0)
newline = 1; newline = 1;
*lnum_orig = oldline; hunk->lnum_orig = oldline;
*count_orig = oldcount; hunk->count_orig = oldcount;
*lnum_new = newline; hunk->lnum_new = newline;
*count_new = newcount; hunk->count_new = newcount;
return OK; return OK;
} }
@ -3261,26 +3288,27 @@ parse_diff_unified(
* Stores the diff output in a grow array. * Stores the diff output in a grow array.
*/ */
static int static int
xdiff_out(void *priv, mmbuffer_t *mb, int nbuf) xdiff_out(
long start_a,
long count_a,
long start_b,
long count_b,
void *priv)
{ {
diffout_T *dout = (diffout_T *)priv; diffout_T *dout = (diffout_T *)priv;
char_u *p; diffhunk_T *p = ALLOC_ONE(diffhunk_T);
// The header line always comes by itself, text lines in at least two if (p == NULL)
// parts. We drop the text part. return -1;
if (nbuf > 1)
return 0;
// sanity check
if (STRNCMP(mb[0].ptr, "@@ ", 3) != 0)
return 0;
if (ga_grow(&dout->dout_ga, 1) == FAIL) if (ga_grow(&dout->dout_ga, 1) == FAIL)
return -1; return -1;
p = vim_strnsave((char_u *)mb[0].ptr, mb[0].size);
if (p == NULL) p->lnum_orig = start_a + 1;
return -1; p->count_orig = count_a;
((char_u **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p; p->lnum_new = start_b + 1;
p->count_new = count_b;
((diffhunk_T **)dout->dout_ga.ga_data)[dout->dout_ga.ga_len++] = p;
return 0; return 0;
} }

View File

@ -749,6 +749,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
3921,
/**/ /**/
3920, 3920,
/**/ /**/