Skip to content

Commit 0710936

Browse files
add cleanupSemanticScore (#3)
1 parent 9104ca6 commit 0710936

1 file changed

Lines changed: 57 additions & 0 deletions

File tree

DiffMatchPatch.zig

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,3 +938,60 @@ pub fn diffCleanupSemanticLossless(
938938
pointer += 1;
939939
}
940940
}
941+
942+
//
943+
// Given two strings, compute a score representing whether the internal
944+
// boundary falls on logical boundaries.
945+
// Scores range from 6 (best) to 0 (worst).
946+
// @param one First string.
947+
// @param two Second string.
948+
// @return The score.
949+
//
950+
fn cleanupSemanticScore(one: []const u8, two: []const u8) usize {
951+
if (one.len == 0 or two.len == 0) {
952+
// Edges are the best.
953+
return 6;
954+
}
955+
956+
// Each port of this function behaves slightly differently due to
957+
// subtle differences in each language's definition of things like
958+
// 'whitespace'. Since this function's purpose is largely cosmetic,
959+
// the choice has been made to use each language's native features
960+
// rather than force total conformity.
961+
const char1 = one[one.len - 1];
962+
const char2 = two[0];
963+
const nonAlphaNumeric1 = !std.ascii.isAlphanumeric(char1);
964+
const nonAlphaNumeric2 = !std.ascii.isAlphanumeric(char2);
965+
const whitespace1 = nonAlphaNumeric1 and std.ascii.isWhitespace(char1);
966+
const whitespace2 = nonAlphaNumeric2 and std.ascii.isWhitespace(char2);
967+
const lineBreak1 = whitespace1 and std.ascii.isControl(char1);
968+
const lineBreak2 = whitespace2 and std.ascii.isControl(char2);
969+
const blankLine1 = lineBreak1 and
970+
// BLANKLINEEND.IsMatch(one);
971+
(mem.endsWith(u8, "\n") or mem.endsWith(u8, "\r\n"));
972+
const blankLine2 = lineBreak2 and
973+
// BLANKLINESTART.IsMatch(two);
974+
(mem.startsWith(u8, "\n") or mem.startsWith(u8, "\r\n"));
975+
976+
if (blankLine1 or blankLine2) {
977+
// Five points for blank lines.
978+
return 5;
979+
} else if (lineBreak1 or lineBreak2) {
980+
// Four points for line breaks.
981+
return 4;
982+
} else if (nonAlphaNumeric1 and !whitespace1 and whitespace2) {
983+
// Three points for end of sentences.
984+
return 3;
985+
} else if (whitespace1 or whitespace2) {
986+
// Two points for whitespace.
987+
return 2;
988+
} else if (nonAlphaNumeric1 or nonAlphaNumeric2) {
989+
// One point for non-alphanumeric.
990+
return 1;
991+
}
992+
return 0;
993+
}
994+
995+
// Define some regex patterns for matching boundaries.
996+
// private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z");
997+
// private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n");

0 commit comments

Comments
 (0)