Skip to content

Commit 231f810

Browse files
committed
Merge branch 'yc/histogram-hunk-shift-fix'
The final clean-up phase of the diff output could turn the result of histogram diff algorithm suboptimal, which has been corrected. * yc/histogram-hunk-shift-fix: xdiff: re-diff shifted change groups when using histogram algorithm
2 parents 7f13e5c + e417277 commit 231f810

3 files changed

Lines changed: 210 additions & 2 deletions

File tree

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ integration_tests = [
509509
't4071-diff-minimal.sh',
510510
't4072-diff-max-depth.sh',
511511
't4073-diff-stat-name-width.sh',
512+
't4074-diff-shifted-matched-group.sh',
512513
't4100-apply-stat.sh',
513514
't4101-apply-nonl.sh',
514515
't4102-apply-rename.sh',
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/bin/sh
2+
3+
test_description='shifted diff groups re-diffing during histogram diff'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success 'shifted/merged diff group should re-diff to minimize patch' '
8+
test_write_lines A x A A A x A A A >file1 &&
9+
test_write_lines A x A Z A x A A A >file2 &&
10+
11+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
12+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
13+
14+
cat >expect <<-EOF &&
15+
diff --git a/file1 b/file2
16+
index $file1_h..$file2_h 100644
17+
--- a/file1
18+
+++ b/file2
19+
@@ -1,7 +1,7 @@
20+
A
21+
x
22+
A
23+
-A
24+
+Z
25+
A
26+
x
27+
A
28+
EOF
29+
30+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
31+
test_cmp expect output
32+
'
33+
34+
test_expect_success 'merged diff group with no shift' '
35+
test_write_lines A Z B x >file1 &&
36+
test_write_lines C D x Z E x >file2 &&
37+
38+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
39+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
40+
41+
cat >expect <<-EOF &&
42+
diff --git a/file1 b/file2
43+
index $file1_h..$file2_h 100644
44+
--- a/file1
45+
+++ b/file2
46+
@@ -1,4 +1,6 @@
47+
-A
48+
+C
49+
+D
50+
+x
51+
Z
52+
-B
53+
+E
54+
x
55+
EOF
56+
57+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
58+
test_cmp expect output
59+
'
60+
61+
test_expect_success 're-diff should preserve diff flags' '
62+
test_write_lines a b c a b c >file1 &&
63+
test_write_lines x " b" z a b c >file2 &&
64+
65+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
66+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
67+
68+
cat >expect <<-EOF &&
69+
diff --git a/file1 b/file2
70+
index $file1_h..$file2_h 100644
71+
--- a/file1
72+
+++ b/file2
73+
@@ -1,6 +1,6 @@
74+
-a
75+
-b
76+
-c
77+
+x
78+
+ b
79+
+z
80+
a
81+
b
82+
c
83+
EOF
84+
85+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output &&
86+
test_cmp expect output &&
87+
88+
cat >expect_iwhite <<-EOF &&
89+
diff --git a/file1 b/file2
90+
index $file1_h..$file2_h 100644
91+
--- a/file1
92+
+++ b/file2
93+
@@ -1,6 +1,6 @@
94+
-a
95+
+x
96+
b
97+
-c
98+
+z
99+
a
100+
b
101+
c
102+
EOF
103+
104+
test_expect_code 1 git diff --no-index --histogram --ignore-all-space file1 file2 >output_iwhite &&
105+
test_cmp expect_iwhite output_iwhite
106+
'
107+
108+
test_expect_success 'shifting on either side should trigger re-diff properly' '
109+
test_write_lines a b c a b c a b c >file1 &&
110+
test_write_lines a b c a1 a2 a3 b c1 a b c >file2 &&
111+
112+
file1_h=$(git rev-parse --short $(git hash-object file1)) &&
113+
file2_h=$(git rev-parse --short $(git hash-object file2)) &&
114+
115+
cat >expect1 <<-EOF &&
116+
diff --git a/file1 b/file2
117+
index $file1_h..$file2_h 100644
118+
--- a/file1
119+
+++ b/file2
120+
@@ -1,9 +1,11 @@
121+
a
122+
b
123+
c
124+
-a
125+
+a1
126+
+a2
127+
+a3
128+
b
129+
-c
130+
+c1
131+
a
132+
b
133+
c
134+
EOF
135+
136+
test_expect_code 1 git diff --no-index --histogram file1 file2 >output1 &&
137+
test_cmp expect1 output1 &&
138+
139+
cat >expect2 <<-EOF &&
140+
diff --git a/file2 b/file1
141+
index $file2_h..$file1_h 100644
142+
--- a/file2
143+
+++ b/file1
144+
@@ -1,11 +1,9 @@
145+
a
146+
b
147+
c
148+
-a1
149+
-a2
150+
-a3
151+
+a
152+
b
153+
-c1
154+
+c
155+
a
156+
b
157+
c
158+
EOF
159+
160+
test_expect_code 1 git diff --no-index --histogram file2 file1 >output2 &&
161+
test_cmp expect2 output2
162+
'
163+
164+
test_done

xdiff/xdiffi.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
792792
*/
793793
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
794794
struct xdlgroup g, go;
795+
struct xdlgroup g_orig;
795796
long earliest_end, end_matching_other;
796797
long groupsize;
797798

@@ -805,10 +806,12 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
805806
if (g.end == g.start)
806807
goto next;
807808

809+
g_orig = g;
810+
808811
/*
809812
* Now shift the change up and then down as far as possible in
810813
* each direction. If it bumps into any other changes, merge
811-
* them.
814+
* them and restart the process.
812815
*/
813816
do {
814817
groupsize = g.end - g.start;
@@ -861,7 +864,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
861864
/*
862865
* Move the possibly merged group of changes back to
863866
* line up with the last group of changes from the
864-
* other file that it can align with.
867+
* other file that it can align with. This avoids breaking
868+
* a single change into a separate addition/deletion.
865869
*/
866870
while (go.end == go.start) {
867871
if (group_slide_up(xdf, &g))
@@ -914,6 +918,45 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
914918
}
915919
}
916920

921+
/*
922+
* If we merged change groups during shifting, the new
923+
* combined group could now have matching lines in both files,
924+
* even if the original separate groups did not. Re-diff the
925+
* new group to find these matching lines to mark them as
926+
* unchanged.
927+
*
928+
* Only do this if the corresponding group in the other file is
929+
* non-empty, as it's trivial otherwise.
930+
*
931+
* Only do this for histogram diff as its LCS algorithm allows
932+
* for this scenario. In contrast, patience diff finds LCS
933+
* of unique lines that groups cannot be shifted across.
934+
* Myer's diff (standalone or used as fall-back in patience
935+
* diff) already finds minimal edits so it is not possible for
936+
* shifted groups to result in a smaller diff. (Without
937+
* XDF_NEED_MINIMAL, Myer's isn't technically guaranteed to be
938+
* minimal, but it should be so most of the time)
939+
*/
940+
if (go.end != go.start &&
941+
XDF_DIFF_ALG(flags) == XDF_HISTOGRAM_DIFF &&
942+
(g.start != g_orig.start ||
943+
g.end != g_orig.end)) {
944+
xpparam_t xpp;
945+
xdfenv_t xe;
946+
947+
memset(&xpp, 0, sizeof(xpp));
948+
xpp.flags = flags & ~XDF_DIFF_ALGORITHM_MASK;
949+
950+
xe.xdf1 = *xdf;
951+
xe.xdf2 = *xdfo;
952+
953+
if (xdl_fall_back_diff(&xe, &xpp,
954+
g.start + 1, g.end - g.start,
955+
go.start + 1, go.end - go.start)) {
956+
return -1;
957+
}
958+
}
959+
917960
next:
918961
/* Move past the just-processed group: */
919962
if (group_next(xdf, &g))

0 commit comments

Comments
 (0)