diff --git a/mod_test/controllers.py b/mod_test/controllers.py index 4c2477b8..e6ab08d5 100644 --- a/mod_test/controllers.py +++ b/mod_test/controllers.py @@ -375,6 +375,44 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view abort(404) +@mod_test.route('/diff////smart') +def smart_diff_view(test_id: int, regression_test_id: int, output_id: int): + """ + Return a semantic (smart) diff classification for an output as JSON. + + Unlike the line diff, this reports *how* the output differs (timing shift, + cosmetic padding/formatting/encoding, text change, missing/extra cues), so a + person or an agent gets an actionable answer instead of a wall of lines. + + :param test_id: id of the test + :type test_id: int + :param regression_test_id: id of the regression test + :type regression_test_id: int + :param output_id: id of the generated output + :type output_id: int + :return: JSON classification of the difference. + :rtype: flask.Response + """ + from run import config + + result = TestResultFile.query.filter(and_( + TestResultFile.test_id == test_id, + TestResultFile.regression_test_id == regression_test_id, + TestResultFile.regression_test_output_id == output_id + )).first() + + if result is None: + abort(404) + + path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'TestResults') + try: + classification = result.generate_smart_diff(path) + except (OSError, UnicodeDecodeError): + classification = {'kind': 'unavailable', + 'summary': 'Output files are not available or not readable.'} + return jsonify(classification) + + @mod_test.route('/log-files/') @login_required def download_build_log_file(test_id): diff --git a/mod_test/models.py b/mod_test/models.py index 1463a0f3..b6ad6385 100644 --- a/mod_test/models.py +++ b/mod_test/models.py @@ -455,3 +455,30 @@ def read_lines(file_name: str) -> List[str]: return open(file_name, encoding='utf8').readlines() except UnicodeDecodeError: return open(file_name, encoding='cp1252').readlines() + + def generate_smart_diff(self, base_path: str) -> dict: + """ + Classify *how* the actual output differs from the expected baseline. + + Unlike the line diff, this returns a semantic classification (timing + shift, cosmetic padding/formatting/encoding, text change, missing/extra + cues) that a person or an agent can act on directly. + + :param base_path: The base path for the files location. + :type base_path: str + :return: A smart-diff classification with ``kind`` and ``summary`` keys. + :rtype: dict + """ + from mod_test.smartdiff.compare import smart_diff + + if not self.got: + return {'kind': 'identical', + 'summary': 'Output matches the expected baseline.'} + + extension = self.regression_test_output.correct_extension + file_ok = os.path.join(base_path, self.expected + extension) + file_fail = os.path.join(base_path, self.got + extension) + expected_text = ''.join(self.read_lines(file_ok)) + actual_text = ''.join(self.read_lines(file_fail)) + return smart_diff(expected_text, actual_text, + fmt=extension.lstrip('.').lower() or None) diff --git a/mod_test/smartdiff/__init__.py b/mod_test/smartdiff/__init__.py new file mode 100644 index 00000000..7143d27c --- /dev/null +++ b/mod_test/smartdiff/__init__.py @@ -0,0 +1,7 @@ +"""Semantic ("smart") diff for subtitle regression outputs. + +Unlike a raw line diff, this package classifies *how* two outputs differ +(timing shift, text change, missing/extra cues) so a person or an agent gets an +actionable answer instead of a wall of changed lines. Pure/Flask-decoupled so it +is fully unit-testable. +""" diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py new file mode 100644 index 00000000..91e3d2b0 --- /dev/null +++ b/mod_test/smartdiff/compare.py @@ -0,0 +1,242 @@ +"""Semantic comparison of subtitle outputs: classify *how* two results differ.""" + +from typing import Dict, List, Optional + +from mod_test.smartdiff.normalize import classify_text_pair, plain +from mod_test.smartdiff.parsing import parse_subtitles +from mod_test.smartdiff.srt import Cue + +#: Cap on the number of per-cue change entries returned in a result. +_MAX_CHANGES = 25 + + +def _result(kind: str, summary: str, n_exp: int, n_act: int, + offset_ms: Optional[int] = None) -> Dict[str, object]: + """ + Build a classification result dict. + + :param kind: The stable difference kind. + :type kind: str + :param summary: A human/agent-readable one-line explanation. + :type summary: str + :param n_exp: Number of expected cues. + :type n_exp: int + :param n_act: Number of actual cues. + :type n_act: int + :param offset_ms: Consistent timing offset, when ``kind`` is ``timing_shift``. + :type offset_ms: Optional[int] + :return: The classification result. + :rtype: Dict[str, object] + """ + out: Dict[str, object] = { + 'kind': kind, + 'summary': summary, + 'expected_cues': n_exp, + 'actual_cues': n_act, + } + if offset_ms is not None: + out['offset_ms'] = offset_ms + return out + + +def _content(cues: List[Cue]) -> str: + """ + Join all cues' normalised, whitespace-collapsed text — for split/merge detection. + + :param cues: The parsed cues. + :type cues: List[Cue] + :return: A single normalised token string spanning every cue. + :rtype: str + """ + return ' '.join(' '.join(plain(cue.text).split()) for cue in cues) + + +def _monotonic(values: List[int]) -> bool: + """ + Report whether a sequence is non-decreasing or non-increasing. + + :param values: The sequence to test. + :type values: List[int] + :return: True if monotonic in either direction. + :rtype: bool + """ + pairs = list(zip(values, values[1:])) + non_decreasing = all(a <= b for a, b in pairs) + non_increasing = all(a >= b for a, b in pairs) + return non_decreasing or non_increasing + + +def _snippet(text: str, limit: int = 80) -> str: + """ + Collapse whitespace and truncate cue text for compact change details. + + :param text: Raw cue text. + :type text: str + :param limit: Maximum characters to keep. + :type limit: int + :return: A single-line, length-capped snippet. + :rtype: str + """ + flat = ' '.join(text.split()) + return flat if len(flat) <= limit else flat[:limit] + '…' + + +def smart_diff(expected: str, actual: str, + fmt: Optional[str] = None) -> Dict[str, object]: + """ + Compare expected vs actual subtitle output and classify the difference. + + Supports SubRip (.srt) and WebVTT (.vtt); the format is auto-detected from + content unless ``fmt`` is given. Aligns cues by position and reports the + *kind* of difference rather than a raw line diff: ``identical``, + ``timing_shift`` (constant offset), ``timing_drift`` (growing offset), + ``text_change``, ``formatting_change`` (tags/entities only), + ``whitespace_change`` (CEA-608 padding only), ``encoding_change`` + (non-ASCII/accented characters only), ``split_cues``, ``merged_cues``, + ``missing_cues``, ``extra_cues``, ``unsupported`` (no cues parsed), or + ``mixed``. + + :param expected: The expected/baseline subtitle content. + :type expected: str + :param actual: The actual/produced subtitle content. + :type actual: str + :param fmt: Explicit format ('srt' or 'vtt'); auto-detected when None. + :type fmt: Optional[str] + :return: A classification dict with keys ``kind``, ``summary``, + ``expected_cues``, ``actual_cues`` and (for ``timing_shift``) ``offset_ms``. + :rtype: Dict[str, object] + """ + exp = parse_subtitles(expected, fmt) + act = parse_subtitles(actual, fmt) + n_exp, n_act = len(exp), len(act) + if n_exp == 0 and n_act == 0: + if expected == actual: + return _result('identical', 'Outputs are identical.', 0, 0) + return _result( + 'unsupported', + 'No subtitle cues to compare (unsupported format); see the raw diff.', + 0, 0) + count_mismatch = n_exp != n_act + + text_changes = 0 + formatting_changes = 0 + whitespace_changes = 0 + encoding_changes = 0 + raw_matches = True + timing_deltas: List[int] = [] + changes: List[Dict[str, object]] = [] + for position, (expected_cue, actual_cue) in enumerate(zip(exp, act), start=1): + category = classify_text_pair(expected_cue.text, actual_cue.text) + delta = actual_cue.start_ms - expected_cue.start_ms + if category != 'match': + raw_matches = False + if category == 'text': + text_changes += 1 + else: + if category == 'formatting': + formatting_changes += 1 + elif category == 'whitespace': + whitespace_changes += 1 + elif category == 'encoding': + encoding_changes += 1 + timing_deltas.append(delta) + if category != 'match' or delta != 0: + entry: Dict[str, object] = { + 'cue': position, + 'kind': category if category != 'match' else 'timing', + } + if category == 'text': + entry['expected'] = _snippet(expected_cue.text) + entry['actual'] = _snippet(actual_cue.text) + if delta != 0: + entry['offset_ms'] = delta + changes.append(entry) + + no_timing_move = all(delta == 0 for delta in timing_deltas) + uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1 + varying_timing = len(set(timing_deltas)) > 1 + drifting = varying_timing and _monotonic(timing_deltas) + cosmetic_changes = formatting_changes + whitespace_changes + encoding_changes + fully_aligned = text_changes == 0 and cosmetic_changes == 0 + + def _finish(kind: str, summary: str, exp_count: int, act_count: int, + offset_ms: Optional[int] = None) -> Dict[str, object]: + """Attach the (capped) per-cue change list to a classification result.""" + out = _result(kind, summary, exp_count, act_count, offset_ms) + if changes: + out['changes'] = changes[:_MAX_CHANGES] + if len(changes) > _MAX_CHANGES: + out['changes_truncated'] = True + return out + + if not count_mismatch and raw_matches and no_timing_move: + return _finish('identical', 'Outputs are identical.', n_exp, n_act) + + if not count_mismatch and fully_aligned and uniform_shift and timing_deltas[0] != 0: + offset = timing_deltas[0] + direction = 'late' if offset > 0 else 'early' + return _finish( + 'timing_shift', + f'All {n_exp} cues match but are {abs(offset)} ms {direction}.', + n_exp, n_act, offset_ms=offset) + + if not count_mismatch and fully_aligned and drifting: + first, last = timing_deltas[0], timing_deltas[-1] + return _finish( + 'timing_drift', + f'Timing drifts from {first:+d} ms to {last:+d} ms across {n_exp} cues.', + n_exp, n_act) + + if count_mismatch: + exp_content = _content(exp) + if exp_content and exp_content == _content(act): + if n_act > n_exp: + return _finish( + 'split_cues', + f'Same text, but cues were split: expected {n_exp}, got {n_act}.', + n_exp, n_act) + return _finish( + 'merged_cues', + f'Same text, but cues were merged: expected {n_exp}, got {n_act}.', + n_exp, n_act) + if text_changes == 0: + if n_act < n_exp: + return _finish( + 'missing_cues', + f'{n_exp - n_act} of {n_exp} cues are missing from the output.', + n_exp, n_act) + return _finish( + 'extra_cues', + f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).', + n_exp, n_act) + + if not count_mismatch and no_timing_move: + if text_changes > 0: + return _finish( + 'text_change', + f'{text_changes} of {n_exp} cues differ in text.', + n_exp, n_act) + if encoding_changes > 0 and formatting_changes == 0 and whitespace_changes == 0: + return _finish( + 'encoding_change', + f'{encoding_changes} of {n_exp} cues differ only in character ' + f'encoding (non-ASCII/accented characters).', + n_exp, n_act) + if formatting_changes > 0 and whitespace_changes == 0 and encoding_changes == 0: + return _finish( + 'formatting_change', + f'{formatting_changes} of {n_exp} cues differ only in formatting ' + f'(tags/entities), not text.', + n_exp, n_act) + if whitespace_changes > 0 and formatting_changes == 0 and encoding_changes == 0: + return _finish( + 'whitespace_change', + f'{whitespace_changes} of {n_exp} cues differ only in trailing ' + f'whitespace/padding.', + n_exp, n_act) + + return _finish( + 'mixed', + f'Mixed differences across {min(n_exp, n_act)} compared cues; ' + f'expected {n_exp}, got {n_act}.', + n_exp, n_act) diff --git a/mod_test/smartdiff/normalize.py b/mod_test/smartdiff/normalize.py new file mode 100644 index 00000000..a460c0b7 --- /dev/null +++ b/mod_test/smartdiff/normalize.py @@ -0,0 +1,115 @@ +"""Normalisation that mirrors CCExtractor's own expected-output handling. + +CCExtractor's test harness (``tests/extract_expected.py``) compares outputs +after stripping HTML/styling tags, unescaping entities, and trimming trailing +whitespace from each line (CEA-608 captions are space-padded to a fixed grid). +Reusing the same rules lets the smart diff separate a *cosmetic* difference +(padding or styling only) from a real text change. +""" + +import re +import unicodedata + +_TAG_RE = re.compile(r'<[^>]+>') + +# Same entities CCExtractor's extract_expected.py unescapes; '&' is applied +# last so an escaped entity like '&lt;' is not double-decoded. +_ENTITIES = ( + ('<', '<'), ('>', '>'), ('"', '"'), (''', "'"), + ('°', '°'), (' ', ' '), ('&', '&'), +) + + +def strip_tags(text: str) -> str: + """ + Remove HTML/styling tags such as ```` or ````. + + :param text: Raw cue text. + :type text: str + :return: Text with tags removed. + :rtype: str + """ + return _TAG_RE.sub('', text) + + +def unescape(text: str) -> str: + """ + Unescape the HTML entities CCExtractor emits. + + :param text: Raw cue text. + :type text: str + :return: Text with entities decoded. + :rtype: str + """ + for entity, char in _ENTITIES: + text = text.replace(entity, char) + return text + + +def rstrip_lines(text: str) -> str: + """ + Trim trailing whitespace from each line (CEA-608 padding is cosmetic). + + :param text: Raw cue text. + :type text: str + :return: Text with per-line trailing whitespace removed. + :rtype: str + """ + return '\n'.join(line.rstrip() for line in text.split('\n')) + + +def plain(text: str) -> str: + """ + Fully normalise: unescape entities, strip tags, trim trailing whitespace. + + :param text: Raw cue text. + :type text: str + :return: The fully normalised text. + :rtype: str + """ + return rstrip_lines(strip_tags(unescape(text))) + + +def ascii_fold(text: str) -> str: + """ + Fold text to ASCII by decomposing accents and dropping non-ASCII characters. + + Lets the comparator tell a charset/encoding difference (e.g. CCExtractor's + ``-latin1`` output) from a real word change: 'Voilà' and 'Voila' share an + ASCII skeleton, so only their non-ASCII characters differ. + + :param text: Raw cue text. + :type text: str + :return: The ASCII skeleton of the text. + :rtype: str + """ + decomposed = unicodedata.normalize('NFKD', text) + return ''.join(ch for ch in decomposed if ord(ch) < 128) + + +def classify_text_pair(expected: str, actual: str) -> str: + """ + Classify how two cue texts differ, ignoring progressively more cosmetics. + + :param expected: Expected cue text. + :type expected: str + :param actual: Actual cue text. + :type actual: str + :return: ``match`` (identical), ``whitespace`` (only trailing padding differs), + ``formatting`` (only tags/entities differ), ``encoding`` (only non-ASCII + characters differ), or ``text`` (a real change). + :rtype: str + """ + if expected == actual: + return 'match' + if rstrip_lines(expected) == rstrip_lines(actual): + return 'whitespace' + expected_plain = plain(expected) + actual_plain = plain(actual) + if expected_plain == actual_plain: + return 'formatting' + folded_expected = ascii_fold(expected_plain) + non_ascii = any(ord(ch) > 127 for ch in expected_plain + actual_plain) + if non_ascii and folded_expected and folded_expected == ascii_fold(actual_plain): + return 'encoding' + return 'text' diff --git a/mod_test/smartdiff/parsing.py b/mod_test/smartdiff/parsing.py new file mode 100644 index 00000000..35bfe3d6 --- /dev/null +++ b/mod_test/smartdiff/parsing.py @@ -0,0 +1,26 @@ +"""Detect the subtitle format and dispatch to the right parser.""" + +from typing import List, Optional + +from mod_test.smartdiff.srt import Cue, parse_srt +from mod_test.smartdiff.vtt import parse_vtt + + +def parse_subtitles(content: str, fmt: Optional[str] = None) -> List[Cue]: + """ + Parse subtitle content into cues, choosing a parser by hint or by content. + + :param content: Raw subtitle file content. + :type content: str + :param fmt: Explicit format ('srt' or 'vtt'); auto-detected from content when None. + :type fmt: Optional[str] + :return: The parsed cues. + :rtype: List[Cue] + """ + chosen = (fmt or '').lower() + if not chosen: + head = content.lstrip('').lstrip().upper() + chosen = 'vtt' if head.startswith('WEBVTT') else 'srt' + if chosen == 'vtt': + return parse_vtt(content) + return parse_srt(content) diff --git a/mod_test/smartdiff/srt.py b/mod_test/smartdiff/srt.py new file mode 100644 index 00000000..3f7388ed --- /dev/null +++ b/mod_test/smartdiff/srt.py @@ -0,0 +1,102 @@ +"""Parse SubRip (.srt) subtitle output into structured cues for comparison.""" + +import re +from dataclasses import dataclass +from typing import List, Optional + +_TIMING_RE = re.compile( + r'(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*' + r'(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})' +) + + +@dataclass +class Cue: + """ + A single subtitle cue: its timing window and text. + + :param index: The cue's sequence number as written in the file. + :type index: int + :param start_ms: Start time in milliseconds. + :type start_ms: int + :param end_ms: End time in milliseconds. + :type end_ms: int + :param text: The cue's text, newlines preserved and surrounding whitespace stripped. + :type text: str + """ + + index: int + start_ms: int + end_ms: int + text: str + + +def join_cue_text(lines: List[str]) -> str: + """ + Join cue text lines, dropping surrounding blank lines but keeping trailing spaces. + + Trailing whitespace is preserved on purpose: CCExtractor pads CEA-608 captions, + and the comparator (not the parser) decides whether that padding is cosmetic. + + :param lines: The text lines following a cue's timing line. + :type lines: List[str] + :return: The joined cue text. + :rtype: str + """ + start, end = 0, len(lines) + while start < end and lines[start].strip() == '': + start += 1 + while end > start and lines[end - 1].strip() == '': + end -= 1 + return '\n'.join(lines[start:end]) + + +def _to_ms(hours: str, minutes: str, seconds: str, millis: str) -> int: + """ + Convert the parts of an SRT timestamp into total milliseconds. + + :param hours: Hours component. + :type hours: str + :param minutes: Minutes component. + :type minutes: str + :param seconds: Seconds component. + :type seconds: str + :param millis: Milliseconds component. + :type millis: str + :return: The timestamp in milliseconds. + :rtype: int + """ + return ((int(hours) * 60 + int(minutes)) * 60 + int(seconds)) * 1000 + int(millis) + + +def parse_srt(content: str) -> List[Cue]: + """ + Parse SubRip subtitle text into a list of cues. + + Tolerant of a leading BOM, CRLF/CR line endings, and either ',' or '.' as the + millisecond separator. Blocks without a valid timing line are skipped. + + :param content: Raw .srt file content. + :type content: str + :return: The parsed cues, in file order. + :rtype: List[Cue] + """ + content = content.lstrip('').replace('\r\n', '\n').replace('\r', '\n') + cues: List[Cue] = [] + for block in re.split(r'\n[ \t]*\n', content.strip()): + lines = block.split('\n') + timing_idx: Optional[int] = next( + (i for i, ln in enumerate(lines) if _TIMING_RE.search(ln)), None) + if timing_idx is None: + continue + match = _TIMING_RE.search(lines[timing_idx]) + if match is None: # pragma: no cover - guaranteed by the search above + continue + start_ms = _to_ms(match.group(1), match.group(2), match.group(3), match.group(4)) + end_ms = _to_ms(match.group(5), match.group(6), match.group(7), match.group(8)) + index = len(cues) + 1 + if timing_idx > 0 and lines[timing_idx - 1].strip().isdigit(): + index = int(lines[timing_idx - 1].strip()) + text = join_cue_text(lines[timing_idx + 1:]) + cues.append(Cue(index=index, start_ms=start_ms, end_ms=end_ms, text=text)) + return cues diff --git a/mod_test/smartdiff/vtt.py b/mod_test/smartdiff/vtt.py new file mode 100644 index 00000000..dbb12e84 --- /dev/null +++ b/mod_test/smartdiff/vtt.py @@ -0,0 +1,65 @@ +"""Parse WebVTT (.vtt) subtitle output into structured cues.""" + +import re +from typing import List, Optional + +from mod_test.smartdiff.srt import Cue, join_cue_text + +_TIMING_RE = re.compile( + r'(?:(\d{1,2}):)?(\d{2}):(\d{2})[.,](\d{3})\s*-->\s*' + r'(?:(\d{1,2}):)?(\d{2}):(\d{2})[.,](\d{3})' +) + +_METADATA_PREFIXES = ('WEBVTT', 'NOTE', 'STYLE', 'REGION') + + +def _to_ms(hours: Optional[str], minutes: str, seconds: str, millis: str) -> int: + """ + Convert WebVTT timestamp parts into total milliseconds. + + :param hours: Hours component, or None when absent (MM:SS.mmm form). + :type hours: Optional[str] + :param minutes: Minutes component. + :type minutes: str + :param seconds: Seconds component. + :type seconds: str + :param millis: Milliseconds component. + :type millis: str + :return: The timestamp in milliseconds. + :rtype: int + """ + hrs = int(hours) if hours else 0 + return ((hrs * 60 + int(minutes)) * 60 + int(seconds)) * 1000 + int(millis) + + +def parse_vtt(content: str) -> List[Cue]: + """ + Parse WebVTT subtitle text into a list of cues. + + Skips the ``WEBVTT`` header and ``NOTE``/``STYLE``/``REGION`` blocks, tolerates + an optional cue-identifier line, optional hours in timestamps, and trailing cue + settings after the end timestamp. + + :param content: Raw .vtt file content. + :type content: str + :return: The parsed cues, in file order. + :rtype: List[Cue] + """ + content = content.lstrip('').replace('\r\n', '\n').replace('\r', '\n') + cues: List[Cue] = [] + for block in re.split(r'\n[ \t]*\n', content.strip()): + lines = block.split('\n') + if lines[0].split(' ', 1)[0] in _METADATA_PREFIXES: + continue + timing_idx: Optional[int] = next( + (i for i, ln in enumerate(lines) if _TIMING_RE.search(ln)), None) + if timing_idx is None: + continue + match = _TIMING_RE.search(lines[timing_idx]) + if match is None: # pragma: no cover - guaranteed by the search above + continue + start_ms = _to_ms(match.group(1), match.group(2), match.group(3), match.group(4)) + end_ms = _to_ms(match.group(5), match.group(6), match.group(7), match.group(8)) + text = join_cue_text(lines[timing_idx + 1:]) + cues.append(Cue(index=len(cues) + 1, start_ms=start_ms, end_ms=end_ms, text=text)) + return cues diff --git a/templates/test/by_id.html b/templates/test/by_id.html index ca3c38e4..d789bb95 100644 --- a/templates/test/by_id.html +++ b/templates/test/by_id.html @@ -149,14 +149,14 @@

Fail + Fail
Smart {%- endif %} {% elif file.got == "error" %} No output generated but there should be {% elif file.got is none or no_error.found or (test.result and test.result.exit_code != 0) -%} Pass {% else %} - Fail + Fail
Smart {%- endif %} {% if not loop.last %}
{% endif %} {% else %} @@ -298,6 +298,42 @@

There are no tests executed in this category.
popup.open(); }); }); + $('.smart_diff_link').on('click', function(){ + // Fetch the semantic (smart) diff classification and show a summary. + var url = '{{ url_for('test.smart_diff_view', test_id='_0_', regression_test_id='_1_', output_id='_2_') }}'; + url = url.replace('_0_', $(this).data('test')).replace('_1_', $(this).data('regression')).replace('_2_', $(this).data('output')); + + $.getJSON(url).done(function(resp){ + var id, reveal, popup; + + reveal = document.createElement('div'); + id = 'smart-diff-popup-'+(new Date()).getTime(); + reveal.setAttribute('id', id); + reveal.setAttribute('class', 'reveal'); + reveal.setAttribute('data-reveal', ''); + reveal.innerHTML = + '

Smart diff

' + + '

' + (resp.kind || 'unknown') + '

' + + '

' + (resp.summary || '') + '

'; + if (resp.changes && resp.changes.length) { + var items = resp.changes.map(function(c){ + var d = 'Cue ' + c.cue + ': ' + c.kind; + if (c.offset_ms !== undefined) { d += ' (' + (c.offset_ms > 0 ? '+' : '') + c.offset_ms + ' ms)'; } + if (c.expected !== undefined) { d += ' — expected “' + c.expected + '”, got “' + c.actual + '”'; } + return '
  • ' + $('
    ').text(d).html() + '
  • '; + }).join(''); + reveal.innerHTML += '
      ' + items + '
    '; + if (resp.changes_truncated) { reveal.innerHTML += '

    … more changes not shown.

    '; } + } + reveal.innerHTML += + ''; + document.body.appendChild(reveal); + popup = new Foundation.Reveal($('#'+id)); + popup.open(); + }); + }); }); {% endblock %} diff --git a/tests/test_smartdiff/__init__.py b/tests/test_smartdiff/__init__.py new file mode 100644 index 00000000..68bc9996 --- /dev/null +++ b/tests/test_smartdiff/__init__.py @@ -0,0 +1 @@ +"""Tests for the smart-diff subtitle comparison.""" diff --git a/tests/test_smartdiff/fixtures/cea608_real.srt b/tests/test_smartdiff/fixtures/cea608_real.srt new file mode 100644 index 00000000..d0bf07ab --- /dev/null +++ b/tests/test_smartdiff/fixtures/cea608_real.srt @@ -0,0 +1,9 @@ +1 +00:00:05,956 --> 00:00:07,955 +CCextractor Start crdit Testing + +2 +00:00:13,913 --> 00:00:15,080 +>> WHICH OF THESE STORIES WILL +YOU BE TALKING ABOUT TRO + diff --git a/tests/test_smartdiff/fixtures/dvb_spanish_real.srt b/tests/test_smartdiff/fixtures/dvb_spanish_real.srt new file mode 100644 index 00000000..76a633c4 --- /dev/null +++ b/tests/test_smartdiff/fixtures/dvb_spanish_real.srt @@ -0,0 +1,59 @@ +1 +00:00:00,480 --> 00:01:05,479 +Para continuar con este debate, + +2 +00:00:06,080 --> 00:01:11,079 +gusted cree que si los partidarios +de Errejon fuesen derrotados + +3 +00:00:09,880 --> 00:01:14,879 +su propuesta en) Vistalegre, + +4 +00:00:12,920 --> 00:01:17,919 +Podemos deberia cambiar de portavoz +parlamentario? + +5 +00:00:19,080 --> 00:01:24,079 +éPuede representar al partido + +6 +00:00:21,400 --> 00:01:26,399 +en el Congreso alguienque'se ha +quedado en minoria + +7 +00:00:24,200 --> 00:01:29,199 +dentro del partido? + +8 +00:00:30,640 --> 00:01:35,639 +-Deciden los organos del partido la +linea de accion politica + +9 +00:00:34,200 --> 00:01:39,199 +dentro del partido. + +10 +00:00:43,120 --> 00:01:48,119 +Debemos acatar las decisiones +colectivas. + +11 +00:00:48,600 --> 00:01:53,599 +Si inicio Errejon reconoce que'se +ven esas lineas, + +12 +00:00:51,760 --> 00:01:56,759 +debe seguir adelante. + +13 +00:00:53,240 --> 00:01:58,239 +Solo.es canalizarla voz della +decision politica del partido. + diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py new file mode 100644 index 00000000..a26a9b22 --- /dev/null +++ b/tests/test_smartdiff/test_compare.py @@ -0,0 +1,161 @@ +"""Tests for the semantic subtitle comparison / classifier.""" + +import unittest + +from mod_test.smartdiff.compare import smart_diff + + +def _srt(cues): + """ + Build SubRip text from (start_ms, end_ms, text) tuples. + + :param cues: Iterable of (start_ms, end_ms, text) tuples. + :type cues: list + :return: SubRip-formatted string. + :rtype: str + """ + def stamp(ms): + h, ms = divmod(ms, 3600000) + m, ms = divmod(ms, 60000) + s, ms = divmod(ms, 1000) + return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" + + blocks = [] + for i, (start, end, text) in enumerate(cues, start=1): + blocks.append(f"{i}\n{stamp(start)} --> {stamp(end)}\n{text}\n") + return "\n".join(blocks) + + +_BASE = [(1000, 4000, "Hello world"), (5000, 8000, "Second line")] +_BASE_CAPS = [(1000, 4000, "HELLO WORLD"), (5000, 8000, "SECOND LINE")] + + +class SmartDiffTests(unittest.TestCase): + """Classifying the kind of difference between two outputs.""" + + def test_identical(self): + """Equal outputs classify as identical.""" + result = smart_diff(_srt(_BASE), _srt(_BASE)) + self.assertEqual(result["kind"], "identical") + + def test_timing_shift_reports_offset(self): + """A constant timing offset is reported as timing_shift with offset_ms.""" + shifted = [(s + 500, e + 500, t) for s, e, t in _BASE] + result = smart_diff(_srt(_BASE), _srt(shifted)) + self.assertEqual(result["kind"], "timing_shift") + self.assertEqual(result["offset_ms"], 500) + + def test_text_change_only(self): + """Same timing, different text classifies as text_change.""" + changed = [(1000, 4000, "Hello world"), (5000, 8000, "DIFFERENT")] + result = smart_diff(_srt(_BASE), _srt(changed)) + self.assertEqual(result["kind"], "text_change") + + def test_missing_cues(self): + """Fewer cues than expected classifies as missing_cues.""" + result = smart_diff(_srt(_BASE), _srt(_BASE[:1])) + self.assertEqual(result["kind"], "missing_cues") + self.assertEqual((result["expected_cues"], result["actual_cues"]), (2, 1)) + + def test_extra_cues(self): + """More cues than expected classifies as extra_cues.""" + more = _BASE + [(9000, 10000, "Third line")] + result = smart_diff(_srt(_BASE), _srt(more)) + self.assertEqual(result["kind"], "extra_cues") + + def test_mixed_when_text_and_count_differ(self): + """Both text changes and a count mismatch classify as mixed.""" + other = [(1000, 4000, "CHANGED"), (5000, 8000, "Second line"), + (9000, 10000, "Third")] + result = smart_diff(_srt(_BASE), _srt(other)) + self.assertEqual(result["kind"], "mixed") + + def test_works_on_webvtt_via_autodetect(self): + """smart_diff auto-detects WebVTT and still classifies a timing shift.""" + base = "WEBVTT\n\n00:00:01.000 --> 00:00:04.000\nHello\n" + shifted = "WEBVTT\n\n00:00:01.250 --> 00:00:04.250\nHello\n" + result = smart_diff(base, shifted) + self.assertEqual(result["kind"], "timing_shift") + self.assertEqual(result["offset_ms"], 250) + + def test_whitespace_padding_only(self): + """Trailing CEA-608 padding differences are flagged as cosmetic, not text.""" + padded = [(1000, 4000, "HELLO WORLD "), (5000, 8000, "SECOND LINE ")] + result = smart_diff(_srt(_BASE_CAPS), _srt(padded)) + self.assertEqual(result["kind"], "whitespace_change") + + def test_formatting_tags_only(self): + """A styling-tags-only difference is flagged as formatting, not text.""" + styled = [(1000, 4000, "Hello world"), (5000, 8000, "Second line")] + result = smart_diff(_srt(_BASE), _srt(styled)) + self.assertEqual(result["kind"], "formatting_change") + + def test_timing_drift_growing_offset(self): + """A growing (not constant) timing offset classifies as timing_drift.""" + base = [(1000, 2000, "A"), (5000, 6000, "B"), (9000, 10000, "C")] + drifted = [(1000, 2000, "A"), (5040, 6040, "B"), (9080, 10080, "C")] + result = smart_diff(_srt(base), _srt(drifted)) + self.assertEqual(result["kind"], "timing_drift") + + def test_split_cues_same_text_more_cues(self): + """One cue rendered as two (same words) classifies as split_cues.""" + one = [(1000, 4000, "hello world")] + two = [(1000, 2000, "hello"), (2000, 4000, "world")] + result = smart_diff(_srt(one), _srt(two)) + self.assertEqual(result["kind"], "split_cues") + + def test_merged_cues_same_text_fewer_cues(self): + """Two cues collapsed into one (same words) classifies as merged_cues.""" + two = [(1000, 2000, "hello"), (2000, 4000, "world")] + one = [(1000, 4000, "hello world")] + result = smart_diff(_srt(two), _srt(one)) + self.assertEqual(result["kind"], "merged_cues") + + def test_changes_list_text_detail(self): + """A text change lists which cue changed, with expected/actual snippets.""" + changed = [(1000, 4000, "Hello world"), (5000, 8000, "DIFFERENT")] + result = smart_diff(_srt(_BASE), _srt(changed)) + changes = result["changes"] + self.assertEqual(len(changes), 1) + self.assertEqual(changes[0]["cue"], 2) + self.assertEqual(changes[0]["kind"], "text") + self.assertEqual(changes[0]["actual"], "DIFFERENT") + + def test_changes_list_timing_offsets(self): + """A timing shift lists a per-cue offset for each cue.""" + shifted = [(s + 500, e + 500, t) for s, e, t in _BASE] + result = smart_diff(_srt(_BASE), _srt(shifted)) + self.assertTrue(all(c["offset_ms"] == 500 for c in result["changes"])) + + def test_identical_has_no_changes(self): + """An identical result carries no changes list.""" + self.assertNotIn("changes", smart_diff(_srt(_BASE), _srt(_BASE))) + + def test_no_cues_and_differ_is_unsupported_not_identical(self): + """Two different non-subtitle outputs (no cues) are not called identical.""" + result = smart_diff("plain transcript one", "plain transcript two") + self.assertEqual(result["kind"], "unsupported") + + def test_no_cues_but_equal_is_identical(self): + """Two equal cue-less outputs are still identical.""" + self.assertEqual(smart_diff("same text", "same text")["kind"], "identical") + + def test_text_change_with_shift_records_per_cue_offset(self): + """A combined text change + timing shift keeps the per-cue offset in changes.""" + base = [(1000, 2000, "A"), (5000, 6000, "B")] + other = [(1500, 2500, "X"), (5500, 6500, "Y")] + result = smart_diff(_srt(base), _srt(other)) + self.assertEqual(result["kind"], "text_change") + self.assertNotIn("aligned", result["summary"]) + self.assertTrue(all(c["offset_ms"] == 500 for c in result["changes"])) + + def test_encoding_change_non_ascii_only(self): + """A charset difference (accents only, e.g. -latin1) is flagged as encoding.""" + accented = [(1000, 4000, "Voilà"), (5000, 8000, "naïve café")] + folded = [(1000, 4000, "Voila"), (5000, 8000, "naive cafe")] + result = smart_diff(_srt(accented), _srt(folded)) + self.assertEqual(result["kind"], "encoding_change") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_smartdiff/test_fixtures.py b/tests/test_smartdiff/test_fixtures.py new file mode 100644 index 00000000..997f08dc --- /dev/null +++ b/tests/test_smartdiff/test_fixtures.py @@ -0,0 +1,133 @@ +"""Golden-fixture tests against real CCExtractor output, plus input robustness. + +The fixtures are genuine CCExtractor outputs (not synthetic strings): +- ``cea608_real.srt``: a CEA-608 broadcast caption sample (trailing padding). +- ``dvb_spanish_real.srt``: a DVB Spanish sample with ```` colour tags and + accented characters. Both were security-scanned before vendoring (no paths, + IPs, emails, URLs, or secrets) and are valid UTF-8. +""" + +import os +import unittest + +from mod_test.smartdiff.compare import smart_diff +from mod_test.smartdiff.normalize import ascii_fold, strip_tags +from mod_test.smartdiff.srt import Cue, parse_srt + +_FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures') + + +def _load(name): + """ + Read a vendored fixture as UTF-8. + + :param name: Fixture file name. + :type name: str + :return: The file content. + :rtype: str + """ + with open(os.path.join(_FIXTURES, name), encoding='utf-8') as handle: + return handle.read() + + +def _emit(cues): + """ + Serialise cues back to SubRip text (for building timing-shifted variants). + + :param cues: The cues to serialise. + :type cues: list + :return: SubRip-formatted text. + :rtype: str + """ + def stamp(ms): + hours, ms = divmod(ms, 3600000) + minutes, ms = divmod(ms, 60000) + seconds, ms = divmod(ms, 1000) + return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}" + + return "\n".join(f"{i}\n{stamp(c.start_ms)} --> {stamp(c.end_ms)}\n{c.text}\n" + for i, c in enumerate(cues, 1)) + + +class Cea608RealTests(unittest.TestCase): + """Smart diff on a genuine CEA-608 broadcast caption sample.""" + + def test_parses_real_sample(self): + """The real sample parses into its two CEA-608 cues.""" + cues = parse_srt(_load('cea608_real.srt')) + self.assertEqual(len(cues), 2) + self.assertEqual(cues[0].start_ms, 5956) + + def test_identical_against_itself(self): + """The real sample compared with itself is identical.""" + raw = _load('cea608_real.srt') + self.assertEqual(smart_diff(raw, raw)['kind'], 'identical') + + def test_depadding_is_cosmetic(self): + """Stripping the CEA-608 trailing padding is flagged as cosmetic only.""" + raw = _load('cea608_real.srt') + depadded = '\n'.join(line.rstrip() for line in raw.split('\n')) + self.assertIn(smart_diff(raw, depadded)['kind'], + ('identical', 'whitespace_change')) + + +class DvbSpanishRealTests(unittest.TestCase): + """Smart diff on a real DVB Spanish output (font colour tags + accents).""" + + def test_parses_with_tags_and_accents(self): + """The fixture has 13 cues carrying both font tags and non-ASCII text.""" + cues = parse_srt(_load('dvb_spanish_real.srt')) + self.assertEqual(len(cues), 13) + self.assertTrue(any(' 127 for c in cues for ch in c.text)) + + def test_identical(self): + """The fixture compared with itself is identical.""" + raw = _load('dvb_spanish_real.srt') + self.assertEqual(smart_diff(raw, raw)['kind'], 'identical') + + def test_constant_timing_shift(self): + """Shifting every cue by +500 ms is detected with the exact offset.""" + cues = parse_srt(_load('dvb_spanish_real.srt')) + shifted = [Cue(c.index, c.start_ms + 500, c.end_ms + 500, c.text) for c in cues] + result = smart_diff(_emit(cues), _emit(shifted)) + self.assertEqual(result['kind'], 'timing_shift') + self.assertEqual(result['offset_ms'], 500) + + def test_font_tags_are_formatting_only(self): + """Removing the colour tags is classified as formatting, not text.""" + raw = _load('dvb_spanish_real.srt') + self.assertEqual(smart_diff(raw, strip_tags(raw))['kind'], 'formatting_change') + + def test_accent_folding_is_encoding(self): + """Folding the accented characters is classified as an encoding difference.""" + raw = _load('dvb_spanish_real.srt') + self.assertEqual(smart_diff(raw, ascii_fold(raw))['kind'], 'encoding_change') + + def test_dropped_cues_are_missing(self): + """Dropping the last three cues is reported as missing_cues.""" + cues = parse_srt(_load('dvb_spanish_real.srt')) + result = smart_diff(_emit(cues), _emit(cues[:-3])) + self.assertEqual(result['kind'], 'missing_cues') + + +class RobustnessTests(unittest.TestCase): + """Malformed or hostile input must classify cleanly, never crash.""" + + def test_parser_survives_garbage(self): + """The parser returns a list for empty, junk, and control-byte input.""" + for junk in ['', 'not a subtitle', '\x00\x01\x02', '1\nno timing line\n']: + self.assertIsInstance(parse_srt(junk), list) + + def test_smart_diff_on_empty_inputs(self): + """Two empty inputs are identical, not an error.""" + self.assertEqual(smart_diff('', '')['kind'], 'identical') + + def test_smart_diff_garbage_vs_real(self): + """Garbage against a real sample classifies without raising.""" + result = smart_diff('garbage with no cues', _load('dvb_spanish_real.srt')) + self.assertIn('kind', result) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_smartdiff/test_model_integration.py b/tests/test_smartdiff/test_model_integration.py new file mode 100644 index 00000000..0b8c715e --- /dev/null +++ b/tests/test_smartdiff/test_model_integration.py @@ -0,0 +1,63 @@ +"""Tests for TestResultFile.generate_smart_diff (the model glue) against real files. + +The method is exercised with a lightweight stand-in ``self`` so the test stays a +fast unit test (no database/ORM mapper configuration required). +""" + +import os +import tempfile +import unittest +from unittest import mock + +from mod_test.models import TestResultFile + +_CUE = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n" + + +def _run(expected_text, got_text, ext='.srt', got='GOT'): + """ + Write two outputs to a temp dir and run generate_smart_diff over them. + + :param expected_text: Expected output content. + :type expected_text: str + :param got_text: Actual output content. + :type got_text: str + :param ext: Output file extension. + :type ext: str + :param got: The 'got' hash (set to None to simulate no produced output). + :type got: str + :return: The smart-diff classification. + :rtype: dict + """ + base = tempfile.mkdtemp() + with open(os.path.join(base, 'EXP' + ext), 'w', encoding='utf-8') as handle: + handle.write(expected_text) + with open(os.path.join(base, 'GOT' + ext), 'w', encoding='utf-8') as handle: + handle.write(got_text) + stub = mock.Mock() + stub.expected = 'EXP' + stub.got = got + stub.regression_test_output.correct_extension = ext + stub.read_lines = TestResultFile.read_lines + return TestResultFile.generate_smart_diff(stub, base) + + +class GenerateSmartDiffTests(unittest.TestCase): + """The model method reads the on-disk outputs and classifies the difference.""" + + def test_identical(self): + """Equal on-disk outputs classify as identical.""" + self.assertEqual(_run(_CUE, _CUE)['kind'], 'identical') + + def test_timing_shift(self): + """A shifted output is classified as a timing shift.""" + shifted = "1\n00:00:01,500 --> 00:00:04,500\nHello world\n" + self.assertEqual(_run(_CUE, shifted)['kind'], 'timing_shift') + + def test_missing_got_is_identical(self): + """A null 'got' (no produced output) short-circuits to identical.""" + self.assertEqual(_run(_CUE, _CUE, got=None)['kind'], 'identical') + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_smartdiff/test_normalize.py b/tests/test_smartdiff/test_normalize.py new file mode 100644 index 00000000..2b766e98 --- /dev/null +++ b/tests/test_smartdiff/test_normalize.py @@ -0,0 +1,55 @@ +"""Tests for CCExtractor-style normalisation of cue text.""" + +import unittest + +from mod_test.smartdiff.normalize import (ascii_fold, classify_text_pair, + plain, strip_tags, unescape) + + +class NormalizeTests(unittest.TestCase): + """Tag stripping, entity unescaping, and cue-text classification.""" + + def test_strip_tags(self): + """HTML/styling tags are removed.""" + self.assertEqual(strip_tags('hi'), 'hi') + + def test_unescape_entities(self): + """Known HTML entities are decoded, including a nested &.""" + self.assertEqual(unescape('a <b> & 30°'), 'a & 30°') + + def test_plain_combines_rules(self): + """plain() strips tags, unescapes, and rstrips padding together.""" + self.assertEqual(plain('hi & bye '), 'hi & bye') + + def test_classify_match(self): + """Identical text classifies as match.""" + self.assertEqual(classify_text_pair('hello', 'hello'), 'match') + + def test_classify_whitespace_only(self): + """Trailing CEA-608 padding differences classify as whitespace.""" + self.assertEqual(classify_text_pair('HELLO WORLD', 'HELLO WORLD '), 'whitespace') + + def test_classify_formatting_only(self): + """A tags-only difference classifies as formatting.""" + self.assertEqual(classify_text_pair('hello', 'hello'), 'formatting') + + def test_ascii_fold_decomposes_accents(self): + """ascii_fold strips accents and drops non-ASCII characters.""" + self.assertEqual(ascii_fold('Voilà café ♪'), 'Voila cafe ') + + def test_classify_encoding_only(self): + """A non-ASCII/accent-only difference classifies as encoding.""" + self.assertEqual(classify_text_pair('PRÉCIS', 'PRECIS'), 'encoding') + + def test_classify_real_text_change(self): + """A genuine text change classifies as text.""" + self.assertEqual(classify_text_pair('hello', 'goodbye'), 'text') + + def test_classify_non_latin_text_change_not_encoding(self): + """Two different non-Latin texts (empty ASCII skeleton) classify as text.""" + self.assertEqual(classify_text_pair('日本語', '中文'), 'text') + self.assertEqual(classify_text_pair('Привет', 'Спасибо'), 'text') + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_smartdiff/test_srt.py b/tests/test_smartdiff/test_srt.py new file mode 100644 index 00000000..55ed4c9c --- /dev/null +++ b/tests/test_smartdiff/test_srt.py @@ -0,0 +1,48 @@ +"""Tests for the SubRip (.srt) parser.""" + +import unittest + +from mod_test.smartdiff.srt import parse_srt + +_TWO_CUES = ( + "1\n" + "00:00:01,000 --> 00:00:04,000\n" + "Hello world\n" + "\n" + "2\n" + "00:00:05,500 --> 00:00:08,250\n" + "Second line\n" +) + + +class ParseSrtTests(unittest.TestCase): + """Parsing SubRip content into structured cues.""" + + def test_parses_index_timing_and_text(self): + """A two-cue file yields two cues with correct ms timing and text.""" + cues = parse_srt(_TWO_CUES) + self.assertEqual(len(cues), 2) + self.assertEqual((cues[0].index, cues[0].start_ms, cues[0].end_ms), (1, 1000, 4000)) + self.assertEqual(cues[0].text, "Hello world") + self.assertEqual((cues[1].start_ms, cues[1].end_ms), (5500, 8250)) + + def test_tolerates_crlf_and_bom(self): + """CRLF line endings and a leading BOM are handled.""" + cues = parse_srt("" + _TWO_CUES.replace("\n", "\r\n")) + self.assertEqual(len(cues), 2) + self.assertEqual(cues[1].text, "Second line") + + def test_skips_blocks_without_timing(self): + """A trailing junk block with no timing line is ignored.""" + cues = parse_srt(_TWO_CUES + "\nnot a cue\n") + self.assertEqual(len(cues), 2) + + def test_multiline_cue_text_preserved(self): + """Cue text spanning multiple lines is preserved with its newline.""" + content = "1\n00:00:01,000 --> 00:00:02,000\nline one\nline two\n" + cues = parse_srt(content) + self.assertEqual(cues[0].text, "line one\nline two") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_smartdiff/test_vtt.py b/tests/test_smartdiff/test_vtt.py new file mode 100644 index 00000000..695aeb11 --- /dev/null +++ b/tests/test_smartdiff/test_vtt.py @@ -0,0 +1,44 @@ +"""Tests for the WebVTT (.vtt) parser.""" + +import unittest + +from mod_test.smartdiff.vtt import parse_vtt + +_VTT = ( + "WEBVTT\n" + "\n" + "NOTE this is a comment\n" + "\n" + "1\n" + "00:00:01.000 --> 00:00:04.000 align:start position:50%\n" + "Hello world\n" + "\n" + "00:05.500 --> 00:08.250\n" + "Second line\n" +) + + +class ParseVttTests(unittest.TestCase): + """Parsing WebVTT content into structured cues.""" + + def test_parses_cues_and_skips_metadata(self): + """The WEBVTT header and NOTE block are skipped; cues are parsed.""" + cues = parse_vtt(_VTT) + self.assertEqual(len(cues), 2) + self.assertEqual((cues[0].start_ms, cues[0].end_ms), (1000, 4000)) + self.assertEqual(cues[0].text, "Hello world") + + def test_ignores_trailing_cue_settings(self): + """Cue settings after the end timestamp do not leak into timing/text.""" + cues = parse_vtt(_VTT) + self.assertEqual(cues[0].end_ms, 4000) + self.assertEqual(cues[0].text, "Hello world") + + def test_handles_optional_hours(self): + """A MM:SS.mmm timestamp without an hours component is parsed correctly.""" + cues = parse_vtt(_VTT) + self.assertEqual((cues[1].start_ms, cues[1].end_ms), (5500, 8250)) + + +if __name__ == "__main__": + unittest.main()