@@ -938,3 +938,60 @@ pub fn diffCleanupSemanticLossless(
938938 pointer += 1 ;
939939 }
940940}
941+
942+ //
943+ // Given two strings, compute a score representing whether the internal
944+ // boundary falls on logical boundaries.
945+ // Scores range from 6 (best) to 0 (worst).
946+ // @param one First string.
947+ // @param two Second string.
948+ // @return The score.
949+ //
950+ fn cleanupSemanticScore (one : []const u8 , two : []const u8 ) usize {
951+ if (one .len == 0 or two .len == 0 ) {
952+ // Edges are the best.
953+ return 6 ;
954+ }
955+
956+ // Each port of this function behaves slightly differently due to
957+ // subtle differences in each language's definition of things like
958+ // 'whitespace'. Since this function's purpose is largely cosmetic,
959+ // the choice has been made to use each language's native features
960+ // rather than force total conformity.
961+ const char1 = one [one .len - 1 ];
962+ const char2 = two [0 ];
963+ const nonAlphaNumeric1 = ! std .ascii .isAlphanumeric (char1 );
964+ const nonAlphaNumeric2 = ! std .ascii .isAlphanumeric (char2 );
965+ const whitespace1 = nonAlphaNumeric1 and std .ascii .isWhitespace (char1 );
966+ const whitespace2 = nonAlphaNumeric2 and std .ascii .isWhitespace (char2 );
967+ const lineBreak1 = whitespace1 and std .ascii .isControl (char1 );
968+ const lineBreak2 = whitespace2 and std .ascii .isControl (char2 );
969+ const blankLine1 = lineBreak1 and
970+ // BLANKLINEEND.IsMatch(one);
971+ (mem .endsWith (u8 , "\n " ) or mem .endsWith (u8 , "\r \n " ));
972+ const blankLine2 = lineBreak2 and
973+ // BLANKLINESTART.IsMatch(two);
974+ (mem .startsWith (u8 , "\n " ) or mem .startsWith (u8 , "\r \n " ));
975+
976+ if (blankLine1 or blankLine2 ) {
977+ // Five points for blank lines.
978+ return 5 ;
979+ } else if (lineBreak1 or lineBreak2 ) {
980+ // Four points for line breaks.
981+ return 4 ;
982+ } else if (nonAlphaNumeric1 and ! whitespace1 and whitespace2 ) {
983+ // Three points for end of sentences.
984+ return 3 ;
985+ } else if (whitespace1 or whitespace2 ) {
986+ // Two points for whitespace.
987+ return 2 ;
988+ } else if (nonAlphaNumeric1 or nonAlphaNumeric2 ) {
989+ // One point for non-alphanumeric.
990+ return 1 ;
991+ }
992+ return 0 ;
993+ }
994+
995+ // Define some regex patterns for matching boundaries.
996+ // private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z");
997+ // private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n");
0 commit comments