@@ -96,7 +96,7 @@ fn diffInternal(
9696 try diffs .append (allocator , Diff { .operation = .equal , .text = common_suffix });
9797 }
9898
99- dmp . diffCleanupMerge (allocator , diffs );
99+ try diffCleanupMerge (allocator , & diffs );
100100 return diffs ;
101101}
102102
@@ -471,7 +471,7 @@ fn diffLineMode(
471471 // Convert the diff back to original text.
472472 try diffCharsToLines (allocator , diffs .items , line_array .items );
473473 // Eliminate freak matches (e.g. blank lines)
474- try diffCleanupSemantic (allocator , diffs );
474+ try diffCleanupSemantic (allocator , & diffs );
475475
476476 // Rediff any replacement blocks, this time character-by-character.
477477 // Add a dummy entry at the end.
@@ -606,7 +606,7 @@ fn diffCharsToLines(allocator: std.mem.Allocator, diffs: []Diff, line_array: []c
606606// Any edit section can move as long as it doesn't cross an equality.
607607// @param diffs List of Diff objects.
608608//
609- fn diffCleanupMerge (diffs : std .ArrayListUnmanaged ( Diff ), allocator : mem . Allocator ) ! void {
609+ fn diffCleanupMerge (allocator : std.mem.Allocator , diffs : * std . ArrayListUnmanaged ( Diff ) ) ! void {
610610 // Add a dummy entry at the end.
611611 try diffs .append (allocator , Diff { .operation = .equal , .text = "" });
612612 var pointer : usize = 0 ;
@@ -632,7 +632,7 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
632632 if (count_delete + count_insert > 1 ) {
633633 if (count_delete != 0 and count_insert != 0 ) {
634634 // Factor out any common prefixies.
635- commonlength = this . diffCommonPrefix (text_insert , text_delete );
635+ commonlength = diffCommonPrefix (text_insert , text_delete );
636636 if (commonlength != 0 ) {
637637 if ((pointer - count_delete - count_insert ) > 0 and
638638 diffs [pointer - count_delete - count_insert - 1 ].operation == .equal )
@@ -652,7 +652,7 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
652652 }
653653 // Factor out any common suffixies.
654654 // @ZigPort this seems very wrong
655- commonlength = this . diffCommonSuffix (text_insert , text_delete );
655+ commonlength = diffCommonSuffix (text_insert , text_delete );
656656 if (commonlength != 0 ) {
657657 diffs [pointer ].text = try std .mem .concat (allocator , &.{ text_insert .items [
658658 text_insert .items .len - commonlength
@@ -703,15 +703,15 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
703703 diffs [pointer + 1 ].operation == .equal )
704704 {
705705 // This is a single edit surrounded by equalities.
706- if (mem .endsWith (u8 , diffs [pointer ].text .items , diffs [pointer - 1 ].text .items )) {
706+ if (std . mem .endsWith (u8 , diffs [pointer ].text .items , diffs [pointer - 1 ].text .items )) {
707707 // Shift the edit over the previous equality.
708708 diffs [pointer ].text = diffs [pointer - 1 ].text +
709709 diffs [pointer ].text [0 .. diffs [pointer ].text .len -
710710 diffs [pointer - 1 ].text .len ];
711711 diffs [pointer + 1 ].text = diffs [pointer - 1 ].text + diffs [pointer + 1 ].text ;
712712 try diffs .replaceRange (allocator , pointer - 1 , 1 , &.{});
713713 changes = true ;
714- } else if (mem .startsWith (u8 , diffs [pointer ].text .items , diffs [pointer + 1 ].text .items )) {
714+ } else if (std . mem .startsWith (u8 , diffs [pointer ].text .items , diffs [pointer + 1 ].text .items )) {
715715 // Shift the edit over the next equality.
716716 diffs [pointer - 1 ].text += diffs [pointer + 1 ].text ;
717717 diffs [pointer ].text =
@@ -724,16 +724,16 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
724724 }
725725 // If shifts were made, the diff needs reordering and another shift sweep.
726726 if (changes ) {
727- this . diff_cleanupMerge ( diffs );
727+ try diffCleanupMerge ( allocator , & diffs );
728728 }
729729}
730730
731- fn diffCleanupSemantic (allocator : std.mem.Allocator , diffs : ArrayListUnmanaged (Diff )) error {OutOfMemory }! void {
731+ fn diffCleanupSemantic (allocator : std.mem.Allocator , diffs : * ArrayListUnmanaged (Diff )) error {OutOfMemory }! void {
732732 var changes = false ;
733733 // Stack of indices where equalities are found.
734734 var equalities = ArrayListUnmanaged (isize ){};
735735 // Always equal to equalities[equalitiesLength-1][1]
736- var lastEquality : ? []const u8 = null ;
736+ var last_equality : ? []const u8 = null ;
737737 var pointer : isize = 0 ; // Index of current position.
738738 // Number of characters that changed prior to the equality.
739739 var length_insertions1 : usize = 0 ;
@@ -748,7 +748,7 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
748748 length_deletions1 = length_deletions2 ;
749749 length_insertions2 = 0 ;
750750 length_deletions2 = 0 ;
751- lastEquality = diffs .items [pointer ].text ;
751+ last_equality = diffs .items [pointer ].text ;
752752 } else { // an insertion or deletion
753753 if (diffs .items [pointer ].operation == .equal ) {
754754 length_insertions2 += diffs .items [pointer ].text .len ;
@@ -757,9 +757,9 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
757757 }
758758 // Eliminate an equality that is smaller or equal to the edits on both
759759 // sides of it.
760- if (lastEquality != null and (lastEquality .len <= std .math .max (length_insertions1 , length_deletions1 )) and (lastEquality .len <= std .math .max (length_insertions2 , length_deletions2 ))) {
760+ if (last_equality != null and (last_equality .len <= std .math .max (length_insertions1 , length_deletions1 )) and (last_equality .len <= std .math .max (length_insertions2 , length_deletions2 ))) {
761761 // Duplicate record.
762- diffs .Insert (equalities .Peek (), Diff { .operation = .delete , .text = lastEquality });
762+ diffs .Insert (equalities .Peek (), Diff { .operation = .delete , .text = last_equality });
763763 // Change second copy to insert.
764764 diffs .items [equalities .Peek () + 1 ].operation = .insert ;
765765 // Throw away the equality we just deleted.
@@ -772,7 +772,7 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
772772 length_deletions1 = 0 ;
773773 length_insertions2 = 0 ;
774774 length_deletions2 = 0 ;
775- lastEquality = null ;
775+ last_equality = null ;
776776 changes = true ;
777777 }
778778 }
@@ -781,7 +781,7 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
781781
782782 // Normalize the diff.
783783 if (changes ) {
784- diffCleanupMerge (diffs );
784+ try diffCleanupMerge (allocator , & diffs );
785785 }
786786 diffCleanupSemanticLossless (diffs );
787787
@@ -793,13 +793,13 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
793793 // Only extract an overlap if it is as big as the edit ahead or behind it.
794794 pointer = 1 ;
795795 while (pointer < diffs .Count ) {
796- if (diffs .items [pointer - 1 ].operation == Operation . DELETE and
797- diffs .items [pointer ].operation == Operation . INSERT )
796+ if (diffs .items [pointer - 1 ].operation == .delete and
797+ diffs .items [pointer ].operation == .insert )
798798 {
799799 const deletion = diffs .items [pointer - 1 ].text .items ;
800800 const insertion = diffs .items [pointer ].text .items ;
801- var overlap_length1 : isize = diff_commonOverlap (deletion , insertion );
802- var overlap_length2 : isize = diff_commonOverlap (insertion , deletion );
801+ var overlap_length1 : isize = diffCommonOverlap (deletion , insertion );
802+ var overlap_length2 : isize = diffCommonOverlap (insertion , deletion );
803803 if (overlap_length1 >= overlap_length2 ) {
804804 if (overlap_length1 >= deletion .len / 2.0 or
805805 overlap_length1 >= insertion .len / 2.0 )
@@ -995,3 +995,79 @@ fn cleanupSemanticScore(one: []const u8, two: []const u8) usize {
995995// Define some regex patterns for matching boundaries.
996996// private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z");
997997// private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n");
998+
999+ /// Reduce the number of edits by eliminating operationally trivial
1000+ /// equalities.
1001+ pub fn diffCleanupEfficiency (allocator : std.mem.Allocator , diffs : * ArrayListUnmanaged (Diff )) error {OutOfMemory }! void {
1002+ var changes = false ;
1003+ // Stack of indices where equalities are found.
1004+ var equalities = ArrayListUnmanaged (Diff ){};
1005+ // Always equal to equalities[equalitiesLength-1][1]
1006+ var last_equality = "" ;
1007+ var pointer : usize = 0 ; // Index of current position.
1008+ // Is there an insertion operation before the last equality.
1009+ var pre_ins = false ;
1010+ // Is there a deletion operation before the last equality.
1011+ var pre_del = false ;
1012+ // Is there an insertion operation after the last equality.
1013+ var post_ins = false ;
1014+ // Is there a deletion operation after the last equality.
1015+ var post_del = false ;
1016+ while (pointer < diffs .Count ) {
1017+ if (diffs [pointer ].operation == Operation .EQUAL ) { // Equality found.
1018+ if (diffs [pointer ].text .Length < this .Diff_EditCost and (post_ins or post_del )) {
1019+ // Candidate found.
1020+ equalities .Push (pointer );
1021+ pre_ins = post_ins ;
1022+ pre_del = post_del ;
1023+ last_equality = diffs [pointer ].text ;
1024+ } else {
1025+ // Not a candidate, and can never become one.
1026+ equalities .Clear ();
1027+ last_equality = string .Empty ;
1028+ }
1029+ post_ins = false ;
1030+ post_del = false ;
1031+ } else { // An insertion or deletion.
1032+ if (diffs [pointer ].operation == Operation .DELETE ) {
1033+ post_del = true ;
1034+ } else {
1035+ post_ins = true ;
1036+ }
1037+ // Five types to be split:
1038+ // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1039+ // <ins>A</ins>X<ins>C</ins><del>D</del>
1040+ // <ins>A</ins><del>B</del>X<ins>C</ins>
1041+ // <ins>A</del>X<ins>C</ins><del>D</del>
1042+ // <ins>A</ins><del>B</del>X<del>C</del>
1043+ if ((last_equality .Length != 0 ) and ((pre_ins and pre_del and post_ins and post_del ) or ((last_equality .Length < this .Diff_EditCost / 2 ) and ((if (pre_ins ) 1 else 0 ) + (if (pre_del ) 1 else 0 ) + (if (post_ins ) 1 else 0 ) + (if (post_del ) 1 else 0 )) == 3 ))) {
1044+ // Duplicate record.
1045+ diffs .Insert (equalities .Peek (), Diff (Operation .DELETE , last_equality ));
1046+ // Change second copy to insert.
1047+ diffs [equalities .Peek () + 1 ].operation = Operation .INSERT ;
1048+ equalities .Pop (); // Throw away the equality we just deleted.
1049+ last_equality = string .Empty ;
1050+ if (pre_ins and pre_del ) {
1051+ // No changes made which could affect previous entry, keep going.
1052+ post_ins = true ;
1053+ post_del = true ;
1054+ equalities .Clear ();
1055+ } else {
1056+ if (equalities .Count > 0 ) {
1057+ equalities .Pop ();
1058+ }
1059+
1060+ pointer = if (equalities .Count > 0 ) equalities .Peek () else -1 ;
1061+ post_ins = false ;
1062+ post_del = false ;
1063+ }
1064+ changes = true ;
1065+ }
1066+ }
1067+ pointer += 1 ;
1068+ }
1069+
1070+ if (changes ) {
1071+ diffCleanupMerge (allocator , diffs );
1072+ }
1073+ }
0 commit comments