@@ -594,96 +594,13 @@ fn diffCharsToLines(allocator: std.mem.Allocator, diffs: []Diff, line_array: []c
594594 for (diffs ) | d | {
595595 text .items .len = 0 ;
596596 var j : usize = 0 ;
597- while (j < diff .text .Length ) : (j += 1 ) {
597+ while (j < diff .text .len ) : (j += 1 ) {
598598 try text .append (allocator , line_array [d .text [j ]]);
599599 }
600600 d .text = text ;
601601 }
602602}
603603
604- //
605- // Do a quick line-level diff on both strings, then rediff the parts for
606- // greater accuracy.
607- // This speedup can produce non-minimal diffs.
608- // @param text1 Old string to be diffed.
609- // @param text2 New string to be diffed.
610- // @param deadline Time when the diff should be complete by.
611- // @return List of Diff objects.
612- //
613- fn diff_lineMode (
614- text1 : []const u8 ,
615- text2 : []const u8 ,
616- deadline : u64 ,
617- ) DiffError ! ArrayListUnmanaged (Diff ) {
618- // Scan the text on a line-by-line basis first.
619- var a = diff_linesToChars (text1 , text2 );
620- text1 = a [0 ];
621- text2 = a [1 ];
622- var linearray = a [2 ];
623-
624- var diffs : std .ArrayListUnmanaged (Diff ) =
625- diff_main (text1 , text2 , false , deadline );
626-
627- // Convert the diff back to original text.
628- diff_charsToLines (diffs , linearray );
629- // Eliminate freak matches (e.g. blank lines)
630- diff_cleanupSemantic (diffs );
631-
632- // Rediff any replacement blocks, this time character-by-character.
633- // Add a dummy entry at the end.
634- try diffs .append (allocator , Diff (.equal , "" ));
635- var pointer : usize = 0 ;
636- var count_delete : usize = 0 ;
637- var count_insert : usize = 0 ;
638- var text_delete : ArrayListUnmanaged (u8 ) = .{};
639- var text_insert : ArrayListUnmanaged (u8 ) = .{};
640- defer {
641- text_delete .deinit (allocator );
642- text_insert .deinit (allocator );
643- }
644- while (pointer < diffs .len ) {
645- switch (diffs [pointer ].operation ) {
646- .insert = > {
647- count_insert += 1 ;
648- // text_insert += diffs[pointer].text;
649- text_insert .append (allocator , diffs [pointer ].text );
650- },
651- .delete = > {
652- count_delete += 1 ;
653- // text_delete += diffs[pointer].text;
654- text_delete .append (allocator , diffs [pointer ].text );
655- },
656- .equal = > {
657- // Upon reaching an equality, check for prior redundancies.
658- if (count_delete >= 1 and count_insert >= 1 ) {
659- // Delete the offending records and add the merged ones.
660- // diffs.RemoveRange(pointer - count_delete - count_insert, count_delete + count_insert);
661- diffs .replaceRange (
662- allocator ,
663- pointer - count_delete - count_insert ,
664- count_delete + count_insert ,
665- &.{},
666- );
667- pointer = pointer - count_delete - count_insert ;
668- var subDiff = this .diff_main (text_delete , text_insert , false , deadline );
669- // diffs.InsertRange(pointer, subDiff);
670- try diffs .insertSlice (allocator , pointer , subDiff );
671- pointer = pointer + subDiff .items .len ;
672- }
673- count_insert = 0 ;
674- count_delete = 0 ;
675- text_delete .items .len = 0 ;
676- text_insert .items .len = 0 ;
677- },
678- }
679- pointer += 1 ;
680- }
681- // diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end.
682- diffs .items .len -= 1 ;
683-
684- return diffs ;
685- }
686-
687604//
688605// Reorder and merge like edit sections. Merge equalities.
689606// Any edit section can move as long as it doesn't cross an equality.
@@ -752,7 +669,7 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
752669 try diffs .replaceRange (allocator , pointer , 0 , &.{Diff { .operation = .delete , .text = text_delete }});
753670 pointer += 1 ;
754671 }
755- if (text_insert .Length != 0 ) {
672+ if (text_insert .len != 0 ) {
756673 try diffs .replaceRange (allocator , pointer , 0 , &.{Diff { .operation = .insert , .text = text_insert }});
757674 pointer += 1 ;
758675 }
@@ -789,16 +706,16 @@ fn diffCleanupMerge(diffs: std.ArrayListUnmanaged(Diff), allocator: mem.Allocato
789706 if (mem .endsWith (u8 , diffs [pointer ].text .items , diffs [pointer - 1 ].text .items )) {
790707 // Shift the edit over the previous equality.
791708 diffs [pointer ].text = diffs [pointer - 1 ].text +
792- diffs [pointer ].text . Substring ( 0 , diffs [pointer ].text .Length -
793- diffs [pointer - 1 ].text .Length ) ;
709+ diffs [pointer ].text [ 0 .. diffs [pointer ].text .len -
710+ diffs [pointer - 1 ].text .len ] ;
794711 diffs [pointer + 1 ].text = diffs [pointer - 1 ].text + diffs [pointer + 1 ].text ;
795712 try diffs .replaceRange (allocator , pointer - 1 , 1 , &.{});
796713 changes = true ;
797714 } else if (mem .startsWith (u8 , diffs [pointer ].text .items , diffs [pointer + 1 ].text .items )) {
798715 // Shift the edit over the next equality.
799716 diffs [pointer - 1 ].text += diffs [pointer + 1 ].text ;
800717 diffs [pointer ].text =
801- diffs [pointer ].text . Substring ( diffs [pointer + 1 ].text .Length ) + diffs [pointer + 1 ].text ;
718+ diffs [pointer ].text [ diffs [pointer + 1 ].text .len .. ] + diffs [pointer + 1 ].text ;
802719 try diffs .replaceRange (allocator , pointer + 1 , 1 , &.{});
803720 changes = true ;
804721 }
@@ -834,13 +751,13 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
834751 lastEquality = diffs .items [pointer ].text ;
835752 } else { // an insertion or deletion
836753 if (diffs .items [pointer ].operation == .equal ) {
837- length_insertions2 += diffs .items [pointer ].text .Length ;
754+ length_insertions2 += diffs .items [pointer ].text .len ;
838755 } else {
839- length_deletions2 += diffs .items [pointer ].text .Length ;
756+ length_deletions2 += diffs .items [pointer ].text .len ;
840757 }
841758 // Eliminate an equality that is smaller or equal to the edits on both
842759 // sides of it.
843- if (lastEquality != null and (lastEquality .Length <= std .math .max (length_insertions1 , length_deletions1 )) and (lastEquality .length <= std .math .max (length_insertions2 , length_deletions2 ))) {
760+ if (lastEquality != null and (lastEquality .len <= std .math .max (length_insertions1 , length_deletions1 )) and (lastEquality .len <= std .math .max (length_insertions2 , length_deletions2 ))) {
844761 // Duplicate record.
845762 diffs .Insert (equalities .Peek (), Diff { .operation = .delete , .text = lastEquality });
846763 // Change second copy to insert.
@@ -884,27 +801,27 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
884801 var overlap_length1 : isize = diff_commonOverlap (deletion , insertion );
885802 var overlap_length2 : isize = diff_commonOverlap (insertion , deletion );
886803 if (overlap_length1 >= overlap_length2 ) {
887- if (overlap_length1 >= deletion .Length / 2.0 or
888- overlap_length1 >= insertion .Length / 2.0 )
804+ if (overlap_length1 >= deletion .len / 2.0 or
805+ overlap_length1 >= insertion .len / 2.0 )
889806 {
890807 // Overlap found.
891808 // Insert an equality and trim the surrounding edits.
892809 diffs .Insert (pointer , Diff { .operation = .equal , .text = insertion .Substring (0 , overlap_length1 ) });
893810 diffs .items [pointer - 1 ].text =
894- deletion .Substring (0 , deletion .Length - overlap_length1 );
811+ deletion .Substring (0 , deletion .len - overlap_length1 );
895812 diffs .items [pointer + 1 ].text = insertion .Substring (overlap_length1 );
896813 pointer += 1 ;
897814 }
898815 } else {
899- if (overlap_length2 >= deletion .Length / 2.0 or
900- overlap_length2 >= insertion .Length / 2.0 )
816+ if (overlap_length2 >= deletion .len / 2.0 or
817+ overlap_length2 >= insertion .len / 2.0 )
901818 {
902819 // Reverse overlap found.
903820 // Insert an equality and swap and trim the surrounding edits.
904821 diffs .Insert (pointer , Diff { .operation = .equal , .text = deletion .Substring (0 , overlap_length2 ) });
905822 diffs .items [pointer - 1 ].operation = Operation .INSERT ;
906823 diffs .items [pointer - 1 ].text =
907- insertion .Substring (0 , insertion .Length - overlap_length2 );
824+ insertion .Substring (0 , insertion .len - overlap_length2 );
908825 diffs .items [pointer + 1 ].operation = Operation .DELETE ;
909826 diffs .items [pointer + 1 ].text = deletion .Substring (overlap_length2 );
910827 pointer += 1 ;
@@ -915,3 +832,109 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: ArrayListUnmanaged(D
915832 pointer += 1 ;
916833 }
917834}
835+
836+ /// Look for single edits surrounded on both sides by equalities
837+ /// which can be shifted sideways to align the edit to a word boundary.
838+ /// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
839+ pub fn diffCleanupSemanticLossless (
840+ dmp : DiffMatchPatch ,
841+ allocator : std.mem.Allocator ,
842+ diffs : * ArrayListUnmanaged (Diff ),
843+ ) error {OutOfMemory }! void {
844+ var pointer : usize = 1 ;
845+ // Intentionally ignore the first and last element (don't need checking).
846+ while (pointer < diffs .items .len - 1 ) {
847+ if (diffs .items [pointer - 1 ].operation == .equal and
848+ diffs .items [pointer + 1 ].operation == .equal )
849+ {
850+ // This is a single edit surrounded by equalities.
851+ var equality_1 = std .ArrayListUnmanaged (u8 ){};
852+ defer equality_1 .deinit (allocator );
853+ try equality_1 .appendSlice (allocator , diffs .items [pointer - 1 ].text );
854+
855+ var edit = std .ArrayListUnmanaged (u8 ){};
856+ defer edit .deinit (allocator );
857+ try edit .appendSlice (allocator , diffs .items [pointer ].text );
858+
859+ var equality_2 = std .ArrayListUnmanaged (u8 ){};
860+ defer equality_2 .deinit (allocator );
861+ try equality_2 .appendSlice (allocator , diffs .items [pointer + 1 ].text );
862+
863+ // First, shift the edit as far left as possible.
864+ const common_offset = dmp .diffCommonSuffix (equality_1 , edit );
865+ if (common_offset > 0 ) {
866+ // TODO: Use buffer
867+ const common_string = try allocator .dupe (u8 , edit .items [edit .items .len - common_offset .. ]);
868+ defer allocator .free (common_string );
869+
870+ equality_1 .items .len = equality_1 .len - common_offset ;
871+
872+ edit .items .len = edit .items .len - common_offset ;
873+ try edit .insertSlice (allocator , 0 , common_string );
874+
875+ try equality_2 .insertSlice (allocator , 0 , common_string );
876+ }
877+
878+ // Second, step character by character right,
879+ // looking for the best fit.
880+ var best_equality_1 = ArrayListUnmanaged (u8 ){};
881+ defer best_equality_1 .deinit (allocator );
882+ try best_equality_1 .appendSlice (allocator , equality_1 .items );
883+
884+ var best_edit = ArrayListUnmanaged (u8 ){};
885+ defer best_edit .deinit (allocator );
886+ try best_edit .appendSlice (allocator , edit .items );
887+
888+ var best_equality_2 = ArrayListUnmanaged (u8 ){};
889+ defer best_equality_2 .deinit (allocator );
890+ try best_equality_2 .appendSlice (allocator , equality_2 .items );
891+
892+ var best_score = diffCleanupSemanticScore (equality_1 , edit ) +
893+ diffCleanupSemanticScore (edit , equality_2 );
894+
895+ while (edit .len != 0 and equality_2 .len != 0 and edit [0 ] == equality_2 [0 ]) {
896+ try equality_1 .append (allocator , edit .items [0 ]);
897+
898+ _ = edit .orderedRemove (0 );
899+ try edit .append (allocator , equality_2 .items [0 ]);
900+
901+ _ = equality_2 .orderedRemove (0 );
902+
903+ const score = diffCleanupSemanticScore (equality_1 , edit ) +
904+ diffCleanupSemanticScore (edit , equality_2 );
905+ // The >= encourages trailing rather than leading whitespace on
906+ // edits.
907+ if (score >= best_score ) {
908+ best_score = score ;
909+
910+ best_equality_1 .items .len = 0 ;
911+ try best_equality_1 .appendSlice (allocator , equality_1 .items );
912+
913+ best_edit .items .len = 0 ;
914+ try best_edit .appendSlice (allocator , edit .items );
915+
916+ best_equality_2 .items .len = 0 ;
917+ try best_equality_2 .appendSlice (allocator , equality_2 .items );
918+ }
919+ }
920+
921+ if (diffs [pointer - 1 ].text != best_equality_1 ) {
922+ // We have an improvement, save it back to the diff.
923+ if (best_equality_1 .len != 0 ) {
924+ diffs [pointer - 1 ].text = try allocator .dupe (u8 , best_equality_1 );
925+ } else {
926+ _ = diffs .orderedRemove (pointer - 1 );
927+ pointer -= 1 ;
928+ }
929+ diffs [pointer ].text = best_edit ;
930+ if (best_equality_2 .len != 0 ) {
931+ diffs [pointer + 1 ].text = try allocator .dupe (u8 , best_equality_2 );
932+ } else {
933+ _ = diffs .orderedRemove (pointer + 1 );
934+ pointer -= 1 ;
935+ }
936+ }
937+ }
938+ pointer += 1 ;
939+ }
940+ }
0 commit comments