From 430f8e4e9357072fc084d61fe8523e37b1e9c318 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Thu, 25 Jun 2026 02:19:33 +0530
Subject: [PATCH 01/10] feat(smartdiff): semantic subtitle diff core (SRT
 parser + classifier)

Add a Flask-decoupled smart-diff module that classifies *how* two subtitle
outputs differ instead of producing a raw line diff:
- srt.py: parse SubRip content into structured cues (BOM/CRLF tolerant).
- compare.py: align cues and classify as identical, timing_shift (with a
  consistent offset_ms), text_change, missing_cues, extra_cues, or mixed,
  with an agent-actionable one-line summary.

Includes unit tests for the parser and every classification branch.
---
 mod_test/smartdiff/__init__.py       |   7 ++
 mod_test/smartdiff/compare.py        | 113 +++++++++++++++++++++++++++
 mod_test/smartdiff/srt.py            |  82 +++++++++++++++++++
 tests/test_smartdiff/__init__.py     |   1 +
 tests/test_smartdiff/test_compare.py |  74 ++++++++++++++++++
 tests/test_smartdiff/test_srt.py     |  48 ++++++++++++
 6 files changed, 325 insertions(+)
 create mode 100644 mod_test/smartdiff/__init__.py
 create mode 100644 mod_test/smartdiff/compare.py
 create mode 100644 mod_test/smartdiff/srt.py
 create mode 100644 tests/test_smartdiff/__init__.py
 create mode 100644 tests/test_smartdiff/test_compare.py
 create mode 100644 tests/test_smartdiff/test_srt.py

diff --git a/mod_test/smartdiff/__init__.py b/mod_test/smartdiff/__init__.py
new file mode 100644
index 00000000..7143d27c
--- /dev/null
+++ b/mod_test/smartdiff/__init__.py
@@ -0,0 +1,7 @@
+"""Semantic ("smart") diff for subtitle regression outputs.
+
+Unlike a raw line diff, this package classifies *how* two outputs differ
+(timing shift, text change, missing/extra cues) so a person or an agent gets an
+actionable answer instead of a wall of changed lines. Pure/Flask-decoupled so it
+is fully unit-testable.
+"""
diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
new file mode 100644
index 00000000..90154d4f
--- /dev/null
+++ b/mod_test/smartdiff/compare.py
@@ -0,0 +1,113 @@
+"""Semantic comparison of subtitle outputs: classify *how* two results differ."""
+
+from typing import Dict, List, Optional
+
+from mod_test.smartdiff.srt import parse_srt
+
+
+def _norm(text: str) -> str:
+    """
+    Normalise cue text for comparison (collapse whitespace, case-fold).
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: Normalised text.
+    :rtype: str
+    """
+    return ' '.join(text.split()).casefold()
+
+
+def _result(kind: str, summary: str, n_exp: int, n_act: int,
+            offset_ms: Optional[int] = None) -> Dict[str, object]:
+    """
+    Build a classification result dict.
+
+    :param kind: The stable difference kind.
+    :type kind: str
+    :param summary: A human/agent-readable one-line explanation.
+    :type summary: str
+    :param n_exp: Number of expected cues.
+    :type n_exp: int
+    :param n_act: Number of actual cues.
+    :type n_act: int
+    :param offset_ms: Consistent timing offset, when ``kind`` is ``timing_shift``.
+    :type offset_ms: Optional[int]
+    :return: The classification result.
+    :rtype: Dict[str, object]
+    """
+    out: Dict[str, object] = {
+        'kind': kind,
+        'summary': summary,
+        'expected_cues': n_exp,
+        'actual_cues': n_act,
+    }
+    if offset_ms is not None:
+        out['offset_ms'] = offset_ms
+    return out
+
+
+def smart_diff(expected: str, actual: str) -> Dict[str, object]:
+    """
+    Compare expected vs actual SubRip output and classify the difference.
+
+    Aligns cues by position and reports the *kind* of difference rather than a
+    raw line diff: ``identical``, ``timing_shift`` (with a consistent offset),
+    ``text_change``, ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is
+    an actionable answer ("subtitles are +120 ms late") instead of a wall of
+    changed lines.
+
+    :param expected: The expected/baseline .srt content.
+    :type expected: str
+    :param actual: The actual/produced .srt content.
+    :type actual: str
+    :return: A classification dict with keys ``kind``, ``summary``,
+        ``expected_cues``, ``actual_cues`` and (for ``timing_shift``) ``offset_ms``.
+    :rtype: Dict[str, object]
+    """
+    exp = parse_srt(expected)
+    act = parse_srt(actual)
+    n_exp, n_act = len(exp), len(act)
+    count_mismatch = n_exp != n_act
+
+    text_changes = 0
+    timing_deltas: List[int] = []
+    for e, a in zip(exp, act):
+        if _norm(e.text) != _norm(a.text):
+            text_changes += 1
+        else:
+            timing_deltas.append(a.start_ms - e.start_ms)
+
+    if not count_mismatch and text_changes == 0 and all(d == 0 for d in timing_deltas):
+        return _result('identical', 'Outputs are identical.', n_exp, n_act)
+
+    uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
+    if not count_mismatch and text_changes == 0 and uniform_shift and timing_deltas[0] != 0:
+        offset = timing_deltas[0]
+        direction = 'late' if offset > 0 else 'early'
+        return _result(
+            'timing_shift',
+            f'All {n_exp} cues match but are {abs(offset)} ms {direction}.',
+            n_exp, n_act, offset_ms=offset)
+
+    if count_mismatch and text_changes == 0:
+        if n_act < n_exp:
+            return _result(
+                'missing_cues',
+                f'{n_exp - n_act} of {n_exp} cues are missing from the output.',
+                n_exp, n_act)
+        return _result(
+            'extra_cues',
+            f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
+            n_exp, n_act)
+
+    if not count_mismatch and text_changes > 0 and all(d == 0 for d in timing_deltas):
+        return _result(
+            'text_change',
+            f'{text_changes} of {n_exp} cues differ in text only (timing matches).',
+            n_exp, n_act)
+
+    return _result(
+        'mixed',
+        f'Mixed differences: {text_changes} text change(s) across '
+        f'{min(n_exp, n_act)} compared cues; expected {n_exp}, got {n_act}.',
+        n_exp, n_act)
diff --git a/mod_test/smartdiff/srt.py b/mod_test/smartdiff/srt.py
new file mode 100644
index 00000000..427b70cc
--- /dev/null
+++ b/mod_test/smartdiff/srt.py
@@ -0,0 +1,82 @@
+"""Parse SubRip (.srt) subtitle output into structured cues for comparison."""
+
+import re
+from dataclasses import dataclass
+from typing import List, Optional
+
+_TIMING_RE = re.compile(
+    r'(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*-->\s*'
+    r'(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})'
+)
+
+
+@dataclass
+class Cue:
+    """
+    A single subtitle cue: its timing window and text.
+
+    :param index: The cue's sequence number as written in the file.
+    :type index: int
+    :param start_ms: Start time in milliseconds.
+    :type start_ms: int
+    :param end_ms: End time in milliseconds.
+    :type end_ms: int
+    :param text: The cue's text, newlines preserved and surrounding whitespace stripped.
+    :type text: str
+    """
+
+    index: int
+    start_ms: int
+    end_ms: int
+    text: str
+
+
+def _to_ms(hours: str, minutes: str, seconds: str, millis: str) -> int:
+    """
+    Convert the parts of an SRT timestamp into total milliseconds.
+
+    :param hours: Hours component.
+    :type hours: str
+    :param minutes: Minutes component.
+    :type minutes: str
+    :param seconds: Seconds component.
+    :type seconds: str
+    :param millis: Milliseconds component.
+    :type millis: str
+    :return: The timestamp in milliseconds.
+    :rtype: int
+    """
+    return ((int(hours) * 60 + int(minutes)) * 60 + int(seconds)) * 1000 + int(millis)
+
+
+def parse_srt(content: str) -> List[Cue]:
+    """
+    Parse SubRip subtitle text into a list of cues.
+
+    Tolerant of a leading BOM, CRLF/CR line endings, and either ',' or '.' as the
+    millisecond separator. Blocks without a valid timing line are skipped.
+
+    :param content: Raw .srt file content.
+    :type content: str
+    :return: The parsed cues, in file order.
+    :rtype: List[Cue]
+    """
+    content = content.lstrip('﻿').replace('\r\n', '\n').replace('\r', '\n')
+    cues: List[Cue] = []
+    for block in re.split(r'\n[ \t]*\n', content.strip()):
+        lines = block.split('\n')
+        timing_idx: Optional[int] = next(
+            (i for i, ln in enumerate(lines) if _TIMING_RE.search(ln)), None)
+        if timing_idx is None:
+            continue
+        match = _TIMING_RE.search(lines[timing_idx])
+        if match is None:  # pragma: no cover - guaranteed by the search above
+            continue
+        start_ms = _to_ms(match.group(1), match.group(2), match.group(3), match.group(4))
+        end_ms = _to_ms(match.group(5), match.group(6), match.group(7), match.group(8))
+        index = len(cues) + 1
+        if timing_idx > 0 and lines[timing_idx - 1].strip().isdigit():
+            index = int(lines[timing_idx - 1].strip())
+        text = '\n'.join(lines[timing_idx + 1:]).strip()
+        cues.append(Cue(index=index, start_ms=start_ms, end_ms=end_ms, text=text))
+    return cues
diff --git a/tests/test_smartdiff/__init__.py b/tests/test_smartdiff/__init__.py
new file mode 100644
index 00000000..68bc9996
--- /dev/null
+++ b/tests/test_smartdiff/__init__.py
@@ -0,0 +1 @@
+"""Tests for the smart-diff subtitle comparison."""
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
new file mode 100644
index 00000000..9d60a82b
--- /dev/null
+++ b/tests/test_smartdiff/test_compare.py
@@ -0,0 +1,74 @@
+"""Tests for the semantic subtitle comparison / classifier."""
+
+import unittest
+
+from mod_test.smartdiff.compare import smart_diff
+
+
+def _srt(cues):
+    """
+    Build SubRip text from (start_ms, end_ms, text) tuples.
+
+    :param cues: Iterable of (start_ms, end_ms, text) tuples.
+    :type cues: list
+    :return: SubRip-formatted string.
+    :rtype: str
+    """
+    def stamp(ms):
+        h, ms = divmod(ms, 3600000)
+        m, ms = divmod(ms, 60000)
+        s, ms = divmod(ms, 1000)
+        return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+
+    blocks = []
+    for i, (start, end, text) in enumerate(cues, start=1):
+        blocks.append(f"{i}\n{stamp(start)} --> {stamp(end)}\n{text}\n")
+    return "\n".join(blocks)
+
+
+_BASE = [(1000, 4000, "Hello world"), (5000, 8000, "Second line")]
+
+
+class SmartDiffTests(unittest.TestCase):
+    """Classifying the kind of difference between two outputs."""
+
+    def test_identical(self):
+        """Equal outputs classify as identical."""
+        result = smart_diff(_srt(_BASE), _srt(_BASE))
+        self.assertEqual(result["kind"], "identical")
+
+    def test_timing_shift_reports_offset(self):
+        """A constant timing offset is reported as timing_shift with offset_ms."""
+        shifted = [(s + 500, e + 500, t) for s, e, t in _BASE]
+        result = smart_diff(_srt(_BASE), _srt(shifted))
+        self.assertEqual(result["kind"], "timing_shift")
+        self.assertEqual(result["offset_ms"], 500)
+
+    def test_text_change_only(self):
+        """Same timing, different text classifies as text_change."""
+        changed = [(1000, 4000, "Hello world"), (5000, 8000, "DIFFERENT")]
+        result = smart_diff(_srt(_BASE), _srt(changed))
+        self.assertEqual(result["kind"], "text_change")
+
+    def test_missing_cues(self):
+        """Fewer cues than expected classifies as missing_cues."""
+        result = smart_diff(_srt(_BASE), _srt(_BASE[:1]))
+        self.assertEqual(result["kind"], "missing_cues")
+        self.assertEqual((result["expected_cues"], result["actual_cues"]), (2, 1))
+
+    def test_extra_cues(self):
+        """More cues than expected classifies as extra_cues."""
+        more = _BASE + [(9000, 10000, "Third line")]
+        result = smart_diff(_srt(_BASE), _srt(more))
+        self.assertEqual(result["kind"], "extra_cues")
+
+    def test_mixed_when_text_and_count_differ(self):
+        """Both text changes and a count mismatch classify as mixed."""
+        other = [(1000, 4000, "CHANGED"), (5000, 8000, "Second line"),
+                 (9000, 10000, "Third")]
+        result = smart_diff(_srt(_BASE), _srt(other))
+        self.assertEqual(result["kind"], "mixed")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_smartdiff/test_srt.py b/tests/test_smartdiff/test_srt.py
new file mode 100644
index 00000000..55ed4c9c
--- /dev/null
+++ b/tests/test_smartdiff/test_srt.py
@@ -0,0 +1,48 @@
+"""Tests for the SubRip (.srt) parser."""
+
+import unittest
+
+from mod_test.smartdiff.srt import parse_srt
+
+_TWO_CUES = (
+    "1\n"
+    "00:00:01,000 --> 00:00:04,000\n"
+    "Hello world\n"
+    "\n"
+    "2\n"
+    "00:00:05,500 --> 00:00:08,250\n"
+    "Second line\n"
+)
+
+
+class ParseSrtTests(unittest.TestCase):
+    """Parsing SubRip content into structured cues."""
+
+    def test_parses_index_timing_and_text(self):
+        """A two-cue file yields two cues with correct ms timing and text."""
+        cues = parse_srt(_TWO_CUES)
+        self.assertEqual(len(cues), 2)
+        self.assertEqual((cues[0].index, cues[0].start_ms, cues[0].end_ms), (1, 1000, 4000))
+        self.assertEqual(cues[0].text, "Hello world")
+        self.assertEqual((cues[1].start_ms, cues[1].end_ms), (5500, 8250))
+
+    def test_tolerates_crlf_and_bom(self):
+        """CRLF line endings and a leading BOM are handled."""
+        cues = parse_srt("﻿" + _TWO_CUES.replace("\n", "\r\n"))
+        self.assertEqual(len(cues), 2)
+        self.assertEqual(cues[1].text, "Second line")
+
+    def test_skips_blocks_without_timing(self):
+        """A trailing junk block with no timing line is ignored."""
+        cues = parse_srt(_TWO_CUES + "\nnot a cue\n")
+        self.assertEqual(len(cues), 2)
+
+    def test_multiline_cue_text_preserved(self):
+        """Cue text spanning multiple lines is preserved with its newline."""
+        content = "1\n00:00:01,000 --> 00:00:02,000\nline one\nline two\n"
+        cues = parse_srt(content)
+        self.assertEqual(cues[0].text, "line one\nline two")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 4e8144b0477f36ef88fad882cc6294bb95288b1d Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Thu, 25 Jun 2026 02:29:54 +0530
Subject: [PATCH 02/10] feat(smartdiff): add WebVTT support and a format
 dispatcher

- vtt.py: parse WebVTT into cues (skips WEBVTT/NOTE/STYLE/REGION blocks,
  handles optional hours and trailing cue settings).
- parsing.py: parse_subtitles() picks the parser by explicit hint or by
  auto-detecting the format from content.
- compare.smart_diff() now takes an optional fmt and works across SRT/VTT.

Adds parser tests for WebVTT and a cross-format auto-detect compare test.
---
 mod_test/smartdiff/compare.py        | 28 +++++++-----
 mod_test/smartdiff/parsing.py        | 26 +++++++++++
 mod_test/smartdiff/vtt.py            | 65 ++++++++++++++++++++++++++++
 tests/test_smartdiff/test_compare.py |  8 ++++
 tests/test_smartdiff/test_vtt.py     | 44 +++++++++++++++++++
 5 files changed, 159 insertions(+), 12 deletions(-)
 create mode 100644 mod_test/smartdiff/parsing.py
 create mode 100644 mod_test/smartdiff/vtt.py
 create mode 100644 tests/test_smartdiff/test_vtt.py

diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index 90154d4f..a3f9aafb 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Optional
 
-from mod_test.smartdiff.srt import parse_srt
+from mod_test.smartdiff.parsing import parse_subtitles
 
 
 def _norm(text: str) -> str:
@@ -46,26 +46,30 @@ def _result(kind: str, summary: str, n_exp: int, n_act: int,
     return out
 
 
-def smart_diff(expected: str, actual: str) -> Dict[str, object]:
+def smart_diff(expected: str, actual: str,
+               fmt: Optional[str] = None) -> Dict[str, object]:
     """
-    Compare expected vs actual SubRip output and classify the difference.
+    Compare expected vs actual subtitle output and classify the difference.
 
-    Aligns cues by position and reports the *kind* of difference rather than a
-    raw line diff: ``identical``, ``timing_shift`` (with a consistent offset),
-    ``text_change``, ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is
-    an actionable answer ("subtitles are +120 ms late") instead of a wall of
-    changed lines.
+    Supports SubRip (.srt) and WebVTT (.vtt); the format is auto-detected from
+    content unless ``fmt`` is given. Aligns cues by position and reports the
+    *kind* of difference rather than a raw line diff: ``identical``,
+    ``timing_shift`` (with a consistent offset), ``text_change``,
+    ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is an actionable
+    answer ("subtitles are +120 ms late") instead of a wall of changed lines.
 
-    :param expected: The expected/baseline .srt content.
+    :param expected: The expected/baseline subtitle content.
     :type expected: str
-    :param actual: The actual/produced .srt content.
+    :param actual: The actual/produced subtitle content.
     :type actual: str
+    :param fmt: Explicit format ('srt' or 'vtt'); auto-detected when None.
+    :type fmt: Optional[str]
     :return: A classification dict with keys ``kind``, ``summary``,
         ``expected_cues``, ``actual_cues`` and (for ``timing_shift``) ``offset_ms``.
     :rtype: Dict[str, object]
     """
-    exp = parse_srt(expected)
-    act = parse_srt(actual)
+    exp = parse_subtitles(expected, fmt)
+    act = parse_subtitles(actual, fmt)
     n_exp, n_act = len(exp), len(act)
     count_mismatch = n_exp != n_act
 
diff --git a/mod_test/smartdiff/parsing.py b/mod_test/smartdiff/parsing.py
new file mode 100644
index 00000000..35bfe3d6
--- /dev/null
+++ b/mod_test/smartdiff/parsing.py
@@ -0,0 +1,26 @@
+"""Detect the subtitle format and dispatch to the right parser."""
+
+from typing import List, Optional
+
+from mod_test.smartdiff.srt import Cue, parse_srt
+from mod_test.smartdiff.vtt import parse_vtt
+
+
+def parse_subtitles(content: str, fmt: Optional[str] = None) -> List[Cue]:
+    """
+    Parse subtitle content into cues, choosing a parser by hint or by content.
+
+    :param content: Raw subtitle file content.
+    :type content: str
+    :param fmt: Explicit format ('srt' or 'vtt'); auto-detected from content when None.
+    :type fmt: Optional[str]
+    :return: The parsed cues.
+    :rtype: List[Cue]
+    """
+    chosen = (fmt or '').lower()
+    if not chosen:
+        head = content.lstrip('﻿').lstrip().upper()
+        chosen = 'vtt' if head.startswith('WEBVTT') else 'srt'
+    if chosen == 'vtt':
+        return parse_vtt(content)
+    return parse_srt(content)
diff --git a/mod_test/smartdiff/vtt.py b/mod_test/smartdiff/vtt.py
new file mode 100644
index 00000000..cd13af0f
--- /dev/null
+++ b/mod_test/smartdiff/vtt.py
@@ -0,0 +1,65 @@
+"""Parse WebVTT (.vtt) subtitle output into structured cues."""
+
+import re
+from typing import List, Optional
+
+from mod_test.smartdiff.srt import Cue
+
+_TIMING_RE = re.compile(
+    r'(?:(\d{1,2}):)?(\d{2}):(\d{2})[.,](\d{3})\s*-->\s*'
+    r'(?:(\d{1,2}):)?(\d{2}):(\d{2})[.,](\d{3})'
+)
+
+_METADATA_PREFIXES = ('WEBVTT', 'NOTE', 'STYLE', 'REGION')
+
+
+def _to_ms(hours: Optional[str], minutes: str, seconds: str, millis: str) -> int:
+    """
+    Convert WebVTT timestamp parts into total milliseconds.
+
+    :param hours: Hours component, or None when absent (MM:SS.mmm form).
+    :type hours: Optional[str]
+    :param minutes: Minutes component.
+    :type minutes: str
+    :param seconds: Seconds component.
+    :type seconds: str
+    :param millis: Milliseconds component.
+    :type millis: str
+    :return: The timestamp in milliseconds.
+    :rtype: int
+    """
+    hrs = int(hours) if hours else 0
+    return ((hrs * 60 + int(minutes)) * 60 + int(seconds)) * 1000 + int(millis)
+
+
+def parse_vtt(content: str) -> List[Cue]:
+    """
+    Parse WebVTT subtitle text into a list of cues.
+
+    Skips the ``WEBVTT`` header and ``NOTE``/``STYLE``/``REGION`` blocks, tolerates
+    an optional cue-identifier line, optional hours in timestamps, and trailing cue
+    settings after the end timestamp.
+
+    :param content: Raw .vtt file content.
+    :type content: str
+    :return: The parsed cues, in file order.
+    :rtype: List[Cue]
+    """
+    content = content.lstrip('﻿').replace('\r\n', '\n').replace('\r', '\n')
+    cues: List[Cue] = []
+    for block in re.split(r'\n[ \t]*\n', content.strip()):
+        lines = block.split('\n')
+        if lines[0].split(' ', 1)[0] in _METADATA_PREFIXES:
+            continue
+        timing_idx: Optional[int] = next(
+            (i for i, ln in enumerate(lines) if _TIMING_RE.search(ln)), None)
+        if timing_idx is None:
+            continue
+        match = _TIMING_RE.search(lines[timing_idx])
+        if match is None:  # pragma: no cover - guaranteed by the search above
+            continue
+        start_ms = _to_ms(match.group(1), match.group(2), match.group(3), match.group(4))
+        end_ms = _to_ms(match.group(5), match.group(6), match.group(7), match.group(8))
+        text = '\n'.join(lines[timing_idx + 1:]).strip()
+        cues.append(Cue(index=len(cues) + 1, start_ms=start_ms, end_ms=end_ms, text=text))
+    return cues
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index 9d60a82b..2e241d29 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -69,6 +69,14 @@ def test_mixed_when_text_and_count_differ(self):
         result = smart_diff(_srt(_BASE), _srt(other))
         self.assertEqual(result["kind"], "mixed")
 
+    def test_works_on_webvtt_via_autodetect(self):
+        """smart_diff auto-detects WebVTT and still classifies a timing shift."""
+        base = "WEBVTT\n\n00:00:01.000 --> 00:00:04.000\nHello\n"
+        shifted = "WEBVTT\n\n00:00:01.250 --> 00:00:04.250\nHello\n"
+        result = smart_diff(base, shifted)
+        self.assertEqual(result["kind"], "timing_shift")
+        self.assertEqual(result["offset_ms"], 250)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_smartdiff/test_vtt.py b/tests/test_smartdiff/test_vtt.py
new file mode 100644
index 00000000..695aeb11
--- /dev/null
+++ b/tests/test_smartdiff/test_vtt.py
@@ -0,0 +1,44 @@
+"""Tests for the WebVTT (.vtt) parser."""
+
+import unittest
+
+from mod_test.smartdiff.vtt import parse_vtt
+
+_VTT = (
+    "WEBVTT\n"
+    "\n"
+    "NOTE this is a comment\n"
+    "\n"
+    "1\n"
+    "00:00:01.000 --> 00:00:04.000 align:start position:50%\n"
+    "Hello world\n"
+    "\n"
+    "00:05.500 --> 00:08.250\n"
+    "Second line\n"
+)
+
+
+class ParseVttTests(unittest.TestCase):
+    """Parsing WebVTT content into structured cues."""
+
+    def test_parses_cues_and_skips_metadata(self):
+        """The WEBVTT header and NOTE block are skipped; cues are parsed."""
+        cues = parse_vtt(_VTT)
+        self.assertEqual(len(cues), 2)
+        self.assertEqual((cues[0].start_ms, cues[0].end_ms), (1000, 4000))
+        self.assertEqual(cues[0].text, "Hello world")
+
+    def test_ignores_trailing_cue_settings(self):
+        """Cue settings after the end timestamp do not leak into timing/text."""
+        cues = parse_vtt(_VTT)
+        self.assertEqual(cues[0].end_ms, 4000)
+        self.assertEqual(cues[0].text, "Hello world")
+
+    def test_handles_optional_hours(self):
+        """A MM:SS.mmm timestamp without an hours component is parsed correctly."""
+        cues = parse_vtt(_VTT)
+        self.assertEqual((cues[1].start_ms, cues[1].end_ms), (5500, 8250))
+
+
+if __name__ == "__main__":
+    unittest.main()

From cda6de8881826c3faf3d26f162934658e613f64f Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sat, 27 Jun 2026 01:07:49 +0530
Subject: [PATCH 03/10] feat(smartdiff): CCExtractor-grounded normalization +
 cosmetic-diff kinds

Mirror CCExtractor's own expected-output handling (tests/extract_expected.py):
strip HTML/styling tags, unescape entities, and trim per-line trailing
whitespace. This lets the comparator separate cosmetic differences from real
text changes, adding two classifications:
- formatting_change: cues differ only in tags/entities, not text.
- whitespace_change: cues differ only in CEA-608 trailing padding.

Parsers now preserve raw cue text (only surrounding blank lines are dropped)
so the comparator, not the parser, decides what is cosmetic. Verified against
a real CCExtractor CEA-608 sample.
---
 mod_test/smartdiff/compare.py          | 73 +++++++++++++--------
 mod_test/smartdiff/normalize.py        | 90 ++++++++++++++++++++++++++
 mod_test/smartdiff/srt.py              | 22 ++++++-
 mod_test/smartdiff/vtt.py              |  4 +-
 tests/test_smartdiff/test_compare.py   | 13 ++++
 tests/test_smartdiff/test_normalize.py | 42 ++++++++++++
 6 files changed, 213 insertions(+), 31 deletions(-)
 create mode 100644 mod_test/smartdiff/normalize.py
 create mode 100644 tests/test_smartdiff/test_normalize.py

diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index a3f9aafb..2eea1d03 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -2,21 +2,10 @@
 
 from typing import Dict, List, Optional
 
+from mod_test.smartdiff.normalize import classify_text_pair
 from mod_test.smartdiff.parsing import parse_subtitles
 
 
-def _norm(text: str) -> str:
-    """
-    Normalise cue text for comparison (collapse whitespace, case-fold).
-
-    :param text: Raw cue text.
-    :type text: str
-    :return: Normalised text.
-    :rtype: str
-    """
-    return ' '.join(text.split()).casefold()
-
-
 def _result(kind: str, summary: str, n_exp: int, n_act: int,
             offset_ms: Optional[int] = None) -> Dict[str, object]:
     """
@@ -55,8 +44,9 @@ def smart_diff(expected: str, actual: str,
     content unless ``fmt`` is given. Aligns cues by position and reports the
     *kind* of difference rather than a raw line diff: ``identical``,
     ``timing_shift`` (with a consistent offset), ``text_change``,
-    ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is an actionable
-    answer ("subtitles are +120 ms late") instead of a wall of changed lines.
+    ``formatting_change`` (tags/entities only), ``whitespace_change`` (CEA-608
+    padding only), ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is an
+    actionable answer ("subtitles are +120 ms late") instead of a wall of lines.
 
     :param expected: The expected/baseline subtitle content.
     :type expected: str
@@ -74,18 +64,32 @@ def smart_diff(expected: str, actual: str,
     count_mismatch = n_exp != n_act
 
     text_changes = 0
+    formatting_changes = 0
+    whitespace_changes = 0
+    raw_matches = True
     timing_deltas: List[int] = []
-    for e, a in zip(exp, act):
-        if _norm(e.text) != _norm(a.text):
+    for expected_cue, actual_cue in zip(exp, act):
+        category = classify_text_pair(expected_cue.text, actual_cue.text)
+        if category != 'match':
+            raw_matches = False
+        if category == 'text':
             text_changes += 1
-        else:
-            timing_deltas.append(a.start_ms - e.start_ms)
+            continue
+        if category == 'formatting':
+            formatting_changes += 1
+        elif category == 'whitespace':
+            whitespace_changes += 1
+        timing_deltas.append(actual_cue.start_ms - expected_cue.start_ms)
+
+    no_timing_move = all(delta == 0 for delta in timing_deltas)
+    uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
+    cosmetic_changes = formatting_changes + whitespace_changes
+    fully_aligned = text_changes == 0 and cosmetic_changes == 0
 
-    if not count_mismatch and text_changes == 0 and all(d == 0 for d in timing_deltas):
+    if not count_mismatch and raw_matches and no_timing_move:
         return _result('identical', 'Outputs are identical.', n_exp, n_act)
 
-    uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
-    if not count_mismatch and text_changes == 0 and uniform_shift and timing_deltas[0] != 0:
+    if not count_mismatch and fully_aligned and uniform_shift and timing_deltas[0] != 0:
         offset = timing_deltas[0]
         direction = 'late' if offset > 0 else 'early'
         return _result(
@@ -104,14 +108,27 @@ def smart_diff(expected: str, actual: str,
             f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
             n_exp, n_act)
 
-    if not count_mismatch and text_changes > 0 and all(d == 0 for d in timing_deltas):
-        return _result(
-            'text_change',
-            f'{text_changes} of {n_exp} cues differ in text only (timing matches).',
-            n_exp, n_act)
+    if not count_mismatch and no_timing_move:
+        if text_changes > 0:
+            return _result(
+                'text_change',
+                f'{text_changes} of {n_exp} cues differ in text (timing aligned).',
+                n_exp, n_act)
+        if formatting_changes > 0 and whitespace_changes == 0:
+            return _result(
+                'formatting_change',
+                f'{formatting_changes} of {n_exp} cues differ only in formatting '
+                f'(tags/entities), not text.',
+                n_exp, n_act)
+        if whitespace_changes > 0 and formatting_changes == 0:
+            return _result(
+                'whitespace_change',
+                f'{whitespace_changes} of {n_exp} cues differ only in trailing '
+                f'whitespace/padding.',
+                n_exp, n_act)
 
     return _result(
         'mixed',
-        f'Mixed differences: {text_changes} text change(s) across '
-        f'{min(n_exp, n_act)} compared cues; expected {n_exp}, got {n_act}.',
+        f'Mixed differences across {min(n_exp, n_act)} compared cues; '
+        f'expected {n_exp}, got {n_act}.',
         n_exp, n_act)
diff --git a/mod_test/smartdiff/normalize.py b/mod_test/smartdiff/normalize.py
new file mode 100644
index 00000000..3ecfb2e8
--- /dev/null
+++ b/mod_test/smartdiff/normalize.py
@@ -0,0 +1,90 @@
+"""Normalisation that mirrors CCExtractor's own expected-output handling.
+
+CCExtractor's test harness (``tests/extract_expected.py``) compares outputs
+after stripping HTML/styling tags, unescaping entities, and trimming trailing
+whitespace from each line (CEA-608 captions are space-padded to a fixed grid).
+Reusing the same rules lets the smart diff separate a *cosmetic* difference
+(padding or styling only) from a real text change.
+"""
+
+import re
+
+_TAG_RE = re.compile(r'<[^>]+>')
+
+# Same entities CCExtractor's extract_expected.py unescapes; '&amp;' is applied
+# last so an escaped entity like '&amp;lt;' is not double-decoded.
+_ENTITIES = (
+    ('&lt;', '<'), ('&gt;', '>'), ('&quot;', '"'), ('&#x27;', "'"),
+    ('&deg;', '°'), ('&nbsp;', ' '), ('&amp;', '&'),
+)
+
+
+def strip_tags(text: str) -> str:
+    """
+    Remove HTML/styling tags such as ``<font color=...>`` or ``<i>``.
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: Text with tags removed.
+    :rtype: str
+    """
+    return _TAG_RE.sub('', text)
+
+
+def unescape(text: str) -> str:
+    """
+    Unescape the HTML entities CCExtractor emits.
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: Text with entities decoded.
+    :rtype: str
+    """
+    for entity, char in _ENTITIES:
+        text = text.replace(entity, char)
+    return text
+
+
+def rstrip_lines(text: str) -> str:
+    """
+    Trim trailing whitespace from each line (CEA-608 padding is cosmetic).
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: Text with per-line trailing whitespace removed.
+    :rtype: str
+    """
+    return '\n'.join(line.rstrip() for line in text.split('\n'))
+
+
+def plain(text: str) -> str:
+    """
+    Fully normalise: unescape entities, strip tags, trim trailing whitespace.
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: The fully normalised text.
+    :rtype: str
+    """
+    return rstrip_lines(strip_tags(unescape(text)))
+
+
+def classify_text_pair(expected: str, actual: str) -> str:
+    """
+    Classify how two cue texts differ, ignoring progressively more cosmetics.
+
+    :param expected: Expected cue text.
+    :type expected: str
+    :param actual: Actual cue text.
+    :type actual: str
+    :return: ``match`` (identical), ``whitespace`` (only trailing padding differs),
+        ``formatting`` (only tags/entities differ), or ``text`` (a real change).
+    :rtype: str
+    """
+    if expected == actual:
+        return 'match'
+    if rstrip_lines(expected) == rstrip_lines(actual):
+        return 'whitespace'
+    if plain(expected) == plain(actual):
+        return 'formatting'
+    return 'text'
diff --git a/mod_test/smartdiff/srt.py b/mod_test/smartdiff/srt.py
index 427b70cc..3f7388ed 100644
--- a/mod_test/smartdiff/srt.py
+++ b/mod_test/smartdiff/srt.py
@@ -31,6 +31,26 @@ class Cue:
     text: str
 
 
+def join_cue_text(lines: List[str]) -> str:
+    """
+    Join cue text lines, dropping surrounding blank lines but keeping trailing spaces.
+
+    Trailing whitespace is preserved on purpose: CCExtractor pads CEA-608 captions,
+    and the comparator (not the parser) decides whether that padding is cosmetic.
+
+    :param lines: The text lines following a cue's timing line.
+    :type lines: List[str]
+    :return: The joined cue text.
+    :rtype: str
+    """
+    start, end = 0, len(lines)
+    while start < end and lines[start].strip() == '':
+        start += 1
+    while end > start and lines[end - 1].strip() == '':
+        end -= 1
+    return '\n'.join(lines[start:end])
+
+
 def _to_ms(hours: str, minutes: str, seconds: str, millis: str) -> int:
     """
     Convert the parts of an SRT timestamp into total milliseconds.
@@ -77,6 +97,6 @@ def parse_srt(content: str) -> List[Cue]:
         index = len(cues) + 1
         if timing_idx > 0 and lines[timing_idx - 1].strip().isdigit():
             index = int(lines[timing_idx - 1].strip())
-        text = '\n'.join(lines[timing_idx + 1:]).strip()
+        text = join_cue_text(lines[timing_idx + 1:])
         cues.append(Cue(index=index, start_ms=start_ms, end_ms=end_ms, text=text))
     return cues
diff --git a/mod_test/smartdiff/vtt.py b/mod_test/smartdiff/vtt.py
index cd13af0f..dbb12e84 100644
--- a/mod_test/smartdiff/vtt.py
+++ b/mod_test/smartdiff/vtt.py
@@ -3,7 +3,7 @@
 import re
 from typing import List, Optional
 
-from mod_test.smartdiff.srt import Cue
+from mod_test.smartdiff.srt import Cue, join_cue_text
 
 _TIMING_RE = re.compile(
     r'(?:(\d{1,2}):)?(\d{2}):(\d{2})[.,](\d{3})\s*-->\s*'
@@ -60,6 +60,6 @@ def parse_vtt(content: str) -> List[Cue]:
             continue
         start_ms = _to_ms(match.group(1), match.group(2), match.group(3), match.group(4))
         end_ms = _to_ms(match.group(5), match.group(6), match.group(7), match.group(8))
-        text = '\n'.join(lines[timing_idx + 1:]).strip()
+        text = join_cue_text(lines[timing_idx + 1:])
         cues.append(Cue(index=len(cues) + 1, start_ms=start_ms, end_ms=end_ms, text=text))
     return cues
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index 2e241d29..dbbc034d 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -27,6 +27,7 @@ def stamp(ms):
 
 
 _BASE = [(1000, 4000, "Hello world"), (5000, 8000, "Second line")]
+_BASE_CAPS = [(1000, 4000, "HELLO WORLD"), (5000, 8000, "SECOND LINE")]
 
 
 class SmartDiffTests(unittest.TestCase):
@@ -77,6 +78,18 @@ def test_works_on_webvtt_via_autodetect(self):
         self.assertEqual(result["kind"], "timing_shift")
         self.assertEqual(result["offset_ms"], 250)
 
+    def test_whitespace_padding_only(self):
+        """Trailing CEA-608 padding differences are flagged as cosmetic, not text."""
+        padded = [(1000, 4000, "HELLO WORLD   "), (5000, 8000, "SECOND LINE  ")]
+        result = smart_diff(_srt(_BASE_CAPS), _srt(padded))
+        self.assertEqual(result["kind"], "whitespace_change")
+
+    def test_formatting_tags_only(self):
+        """A styling-tags-only difference is flagged as formatting, not text."""
+        styled = [(1000, 4000, "<i>Hello world</i>"), (5000, 8000, "Second line")]
+        result = smart_diff(_srt(_BASE), _srt(styled))
+        self.assertEqual(result["kind"], "formatting_change")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_smartdiff/test_normalize.py b/tests/test_smartdiff/test_normalize.py
new file mode 100644
index 00000000..6d7183d2
--- /dev/null
+++ b/tests/test_smartdiff/test_normalize.py
@@ -0,0 +1,42 @@
+"""Tests for CCExtractor-style normalisation of cue text."""
+
+import unittest
+
+from mod_test.smartdiff.normalize import (classify_text_pair, plain,
+                                          strip_tags, unescape)
+
+
+class NormalizeTests(unittest.TestCase):
+    """Tag stripping, entity unescaping, and cue-text classification."""
+
+    def test_strip_tags(self):
+        """HTML/styling tags are removed."""
+        self.assertEqual(strip_tags('<font color="#fff">hi</font>'), 'hi')
+
+    def test_unescape_entities(self):
+        """Known HTML entities are decoded, including a nested &amp;."""
+        self.assertEqual(unescape('a &lt;b&gt; &amp; 30&deg;'), 'a <b> & 30°')
+
+    def test_plain_combines_rules(self):
+        """plain() strips tags, unescapes, and rstrips padding together."""
+        self.assertEqual(plain('<i>hi &amp; bye</i>   '), 'hi & bye')
+
+    def test_classify_match(self):
+        """Identical text classifies as match."""
+        self.assertEqual(classify_text_pair('hello', 'hello'), 'match')
+
+    def test_classify_whitespace_only(self):
+        """Trailing CEA-608 padding differences classify as whitespace."""
+        self.assertEqual(classify_text_pair('HELLO WORLD', 'HELLO WORLD     '), 'whitespace')
+
+    def test_classify_formatting_only(self):
+        """A tags-only difference classifies as formatting."""
+        self.assertEqual(classify_text_pair('hello', '<i>hello</i>'), 'formatting')
+
+    def test_classify_real_text_change(self):
+        """A genuine text change classifies as text."""
+        self.assertEqual(classify_text_pair('hello', 'goodbye'), 'text')
+
+
+if __name__ == "__main__":
+    unittest.main()

From 8f59b35d22aeb50e085952466d0561bf161ff2d5 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Sat, 27 Jun 2026 01:21:02 +0530
Subject: [PATCH 04/10] feat(smartdiff): timing drift, split/merged cues, real
 golden fixtures

- timing_drift: detect a growing (non-constant) offset across cues, the
  signature of a progressive sync bug, distinct from a constant timing_shift.
- split_cues / merged_cues: when cue count changes but the text content is
  unchanged, report re-segmentation instead of missing/extra cues.
- Vendor a real CCExtractor CEA-608 sample (tests/.../fixtures/cea608_real.srt)
  and add golden-fixture tests so the diff is exercised on true output:
  identical, constant shift, and cosmetic de-padding.
---
 mod_test/smartdiff/compare.py                 | 74 +++++++++++++++----
 tests/test_smartdiff/fixtures/cea608_real.srt |  9 +++
 tests/test_smartdiff/test_compare.py          | 21 ++++++
 tests/test_smartdiff/test_fixtures.py         | 58 +++++++++++++++
 4 files changed, 149 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_smartdiff/fixtures/cea608_real.srt
 create mode 100644 tests/test_smartdiff/test_fixtures.py

diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index 2eea1d03..801c108f 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -2,8 +2,9 @@
 
 from typing import Dict, List, Optional
 
-from mod_test.smartdiff.normalize import classify_text_pair
+from mod_test.smartdiff.normalize import classify_text_pair, plain
 from mod_test.smartdiff.parsing import parse_subtitles
+from mod_test.smartdiff.srt import Cue
 
 
 def _result(kind: str, summary: str, n_exp: int, n_act: int,
@@ -35,6 +36,33 @@ def _result(kind: str, summary: str, n_exp: int, n_act: int,
     return out
 
 
+def _content(cues: List[Cue]) -> str:
+    """
+    Join all cues' normalised, whitespace-collapsed text — for split/merge detection.
+
+    :param cues: The parsed cues.
+    :type cues: List[Cue]
+    :return: A single normalised token string spanning every cue.
+    :rtype: str
+    """
+    return ' '.join(' '.join(plain(cue.text).split()) for cue in cues)
+
+
+def _monotonic(values: List[int]) -> bool:
+    """
+    Report whether a sequence is non-decreasing or non-increasing.
+
+    :param values: The sequence to test.
+    :type values: List[int]
+    :return: True if monotonic in either direction.
+    :rtype: bool
+    """
+    pairs = list(zip(values, values[1:]))
+    non_decreasing = all(a <= b for a, b in pairs)
+    non_increasing = all(a >= b for a, b in pairs)
+    return non_decreasing or non_increasing
+
+
 def smart_diff(expected: str, actual: str,
                fmt: Optional[str] = None) -> Dict[str, object]:
     """
@@ -43,10 +71,10 @@ def smart_diff(expected: str, actual: str,
     Supports SubRip (.srt) and WebVTT (.vtt); the format is auto-detected from
     content unless ``fmt`` is given. Aligns cues by position and reports the
     *kind* of difference rather than a raw line diff: ``identical``,
-    ``timing_shift`` (with a consistent offset), ``text_change``,
-    ``formatting_change`` (tags/entities only), ``whitespace_change`` (CEA-608
-    padding only), ``missing_cues``, ``extra_cues``, or ``mixed``. The goal is an
-    actionable answer ("subtitles are +120 ms late") instead of a wall of lines.
+    ``timing_shift`` (constant offset), ``timing_drift`` (growing offset),
+    ``text_change``, ``formatting_change`` (tags/entities only),
+    ``whitespace_change`` (CEA-608 padding only), ``split_cues``,
+    ``merged_cues``, ``missing_cues``, ``extra_cues``, or ``mixed``.
 
     :param expected: The expected/baseline subtitle content.
     :type expected: str
@@ -83,6 +111,8 @@ def smart_diff(expected: str, actual: str,
 
     no_timing_move = all(delta == 0 for delta in timing_deltas)
     uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
+    varying_timing = len(set(timing_deltas)) > 1
+    drifting = varying_timing and _monotonic(timing_deltas)
     cosmetic_changes = formatting_changes + whitespace_changes
     fully_aligned = text_changes == 0 and cosmetic_changes == 0
 
@@ -97,17 +127,35 @@ def smart_diff(expected: str, actual: str,
             f'All {n_exp} cues match but are {abs(offset)} ms {direction}.',
             n_exp, n_act, offset_ms=offset)
 
-    if count_mismatch and text_changes == 0:
-        if n_act < n_exp:
-            return _result(
-                'missing_cues',
-                f'{n_exp - n_act} of {n_exp} cues are missing from the output.',
-                n_exp, n_act)
+    if not count_mismatch and fully_aligned and drifting:
+        first, last = timing_deltas[0], timing_deltas[-1]
         return _result(
-            'extra_cues',
-            f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
+            'timing_drift',
+            f'Timing drifts from {first:+d} ms to {last:+d} ms across {n_exp} cues.',
             n_exp, n_act)
 
+    if count_mismatch:
+        if _content(exp) and _content(exp) == _content(act):
+            if n_act > n_exp:
+                return _result(
+                    'split_cues',
+                    f'Same text, but cues were split: expected {n_exp}, got {n_act}.',
+                    n_exp, n_act)
+            return _result(
+                'merged_cues',
+                f'Same text, but cues were merged: expected {n_exp}, got {n_act}.',
+                n_exp, n_act)
+        if text_changes == 0:
+            if n_act < n_exp:
+                return _result(
+                    'missing_cues',
+                    f'{n_exp - n_act} of {n_exp} cues are missing from the output.',
+                    n_exp, n_act)
+            return _result(
+                'extra_cues',
+                f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
+                n_exp, n_act)
+
     if not count_mismatch and no_timing_move:
         if text_changes > 0:
             return _result(
diff --git a/tests/test_smartdiff/fixtures/cea608_real.srt b/tests/test_smartdiff/fixtures/cea608_real.srt
new file mode 100644
index 00000000..d0bf07ab
--- /dev/null
+++ b/tests/test_smartdiff/fixtures/cea608_real.srt
@@ -0,0 +1,9 @@
+﻿1
+00:00:05,956 --> 00:00:07,955
+CCextractor Start crdit Testing
+
+2
+00:00:13,913 --> 00:00:15,080
+>> WHICH OF THESE STORIES WILL  
+YOU BE TALKING ABOUT TRO        
+
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index dbbc034d..00de1746 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -90,6 +90,27 @@ def test_formatting_tags_only(self):
         result = smart_diff(_srt(_BASE), _srt(styled))
         self.assertEqual(result["kind"], "formatting_change")
 
+    def test_timing_drift_growing_offset(self):
+        """A growing (not constant) timing offset classifies as timing_drift."""
+        base = [(1000, 2000, "A"), (5000, 6000, "B"), (9000, 10000, "C")]
+        drifted = [(1000, 2000, "A"), (5040, 6040, "B"), (9080, 10080, "C")]
+        result = smart_diff(_srt(base), _srt(drifted))
+        self.assertEqual(result["kind"], "timing_drift")
+
+    def test_split_cues_same_text_more_cues(self):
+        """One cue rendered as two (same words) classifies as split_cues."""
+        one = [(1000, 4000, "hello world")]
+        two = [(1000, 2000, "hello"), (2000, 4000, "world")]
+        result = smart_diff(_srt(one), _srt(two))
+        self.assertEqual(result["kind"], "split_cues")
+
+    def test_merged_cues_same_text_fewer_cues(self):
+        """Two cues collapsed into one (same words) classifies as merged_cues."""
+        two = [(1000, 2000, "hello"), (2000, 4000, "world")]
+        one = [(1000, 4000, "hello world")]
+        result = smart_diff(_srt(two), _srt(one))
+        self.assertEqual(result["kind"], "merged_cues")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_smartdiff/test_fixtures.py b/tests/test_smartdiff/test_fixtures.py
new file mode 100644
index 00000000..cac9c479
--- /dev/null
+++ b/tests/test_smartdiff/test_fixtures.py
@@ -0,0 +1,58 @@
+"""Golden-fixture tests against a real CCExtractor CEA-608 sample output."""
+
+import os
+import unittest
+
+from mod_test.smartdiff.compare import smart_diff
+from mod_test.smartdiff.srt import parse_srt
+
+_FIXTURE = os.path.join(os.path.dirname(__file__), 'fixtures', 'cea608_real.srt')
+
+
+def _load():
+    """
+    Read the vendored real CCExtractor sample.
+
+    :return: The raw .srt content.
+    :rtype: str
+    """
+    with open(_FIXTURE, encoding='utf-8') as handle:
+        return handle.read()
+
+
+class RealSampleTests(unittest.TestCase):
+    """Exercise the smart diff on genuine CCExtractor output, not synthetic strings."""
+
+    def test_parses_real_sample(self):
+        """The real sample parses into its two CEA-608 cues."""
+        cues = parse_srt(_load())
+        self.assertEqual(len(cues), 2)
+        self.assertEqual(cues[0].start_ms, 5956)
+
+    def test_identical_against_itself(self):
+        """The real sample compared with itself is identical."""
+        raw = _load()
+        self.assertEqual(smart_diff(raw, raw)["kind"], "identical")
+
+    def test_constant_shift_on_real_sample(self):
+        """Shifting every timestamp by a constant is detected as timing_shift."""
+        raw = _load()
+        shifted = (raw
+                   .replace('00:00:05,956', '00:00:06,206')
+                   .replace('00:00:07,955', '00:00:08,205')
+                   .replace('00:00:13,913', '00:00:14,163')
+                   .replace('00:00:15,080', '00:00:15,330'))
+        result = smart_diff(raw, shifted)
+        self.assertEqual(result["kind"], "timing_shift")
+        self.assertEqual(result["offset_ms"], 250)
+
+    def test_depadding_is_cosmetic_on_real_sample(self):
+        """Stripping the CEA-608 trailing padding is flagged as cosmetic only."""
+        raw = _load()
+        depadded = '\n'.join(line.rstrip() for line in raw.split('\n'))
+        result = smart_diff(raw, depadded)
+        self.assertIn(result["kind"], ("identical", "whitespace_change"))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 9c96da9738a177d079649b19d0d7e5da8ad5f05b Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Mon, 29 Jun 2026 23:06:00 +0530
Subject: [PATCH 05/10] feat(smartdiff): detect encoding-only differences
 (non-ASCII/accents)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add ascii_fold() and an 'encoding' text category so the comparator can tell a
charset difference (e.g. CCExtractor's -latin1 output: 'Voilà' vs 'Voila')
from a real word change. Surfaced as a new 'encoding_change' classification.
---
 mod_test/smartdiff/compare.py          | 20 +++++++++++++-----
 mod_test/smartdiff/normalize.py        | 28 ++++++++++++++++++++++++--
 tests/test_smartdiff/test_compare.py   |  7 +++++++
 tests/test_smartdiff/test_normalize.py | 12 +++++++++--
 4 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index 801c108f..db0b1366 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -73,8 +73,9 @@ def smart_diff(expected: str, actual: str,
     *kind* of difference rather than a raw line diff: ``identical``,
     ``timing_shift`` (constant offset), ``timing_drift`` (growing offset),
     ``text_change``, ``formatting_change`` (tags/entities only),
-    ``whitespace_change`` (CEA-608 padding only), ``split_cues``,
-    ``merged_cues``, ``missing_cues``, ``extra_cues``, or ``mixed``.
+    ``whitespace_change`` (CEA-608 padding only), ``encoding_change``
+    (non-ASCII/accented characters only), ``split_cues``, ``merged_cues``,
+    ``missing_cues``, ``extra_cues``, or ``mixed``.
 
     :param expected: The expected/baseline subtitle content.
     :type expected: str
@@ -94,6 +95,7 @@ def smart_diff(expected: str, actual: str,
     text_changes = 0
     formatting_changes = 0
     whitespace_changes = 0
+    encoding_changes = 0
     raw_matches = True
     timing_deltas: List[int] = []
     for expected_cue, actual_cue in zip(exp, act):
@@ -107,13 +109,15 @@ def smart_diff(expected: str, actual: str,
             formatting_changes += 1
         elif category == 'whitespace':
             whitespace_changes += 1
+        elif category == 'encoding':
+            encoding_changes += 1
         timing_deltas.append(actual_cue.start_ms - expected_cue.start_ms)
 
     no_timing_move = all(delta == 0 for delta in timing_deltas)
     uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
     varying_timing = len(set(timing_deltas)) > 1
     drifting = varying_timing and _monotonic(timing_deltas)
-    cosmetic_changes = formatting_changes + whitespace_changes
+    cosmetic_changes = formatting_changes + whitespace_changes + encoding_changes
     fully_aligned = text_changes == 0 and cosmetic_changes == 0
 
     if not count_mismatch and raw_matches and no_timing_move:
@@ -162,13 +166,19 @@ def smart_diff(expected: str, actual: str,
                 'text_change',
                 f'{text_changes} of {n_exp} cues differ in text (timing aligned).',
                 n_exp, n_act)
-        if formatting_changes > 0 and whitespace_changes == 0:
+        if encoding_changes > 0 and formatting_changes == 0 and whitespace_changes == 0:
+            return _result(
+                'encoding_change',
+                f'{encoding_changes} of {n_exp} cues differ only in character '
+                f'encoding (non-ASCII/accented characters).',
+                n_exp, n_act)
+        if formatting_changes > 0 and whitespace_changes == 0 and encoding_changes == 0:
             return _result(
                 'formatting_change',
                 f'{formatting_changes} of {n_exp} cues differ only in formatting '
                 f'(tags/entities), not text.',
                 n_exp, n_act)
-        if whitespace_changes > 0 and formatting_changes == 0:
+        if whitespace_changes > 0 and formatting_changes == 0 and encoding_changes == 0:
             return _result(
                 'whitespace_change',
                 f'{whitespace_changes} of {n_exp} cues differ only in trailing '
diff --git a/mod_test/smartdiff/normalize.py b/mod_test/smartdiff/normalize.py
index 3ecfb2e8..6a7d1cdd 100644
--- a/mod_test/smartdiff/normalize.py
+++ b/mod_test/smartdiff/normalize.py
@@ -8,6 +8,7 @@
 """
 
 import re
+import unicodedata
 
 _TAG_RE = re.compile(r'<[^>]+>')
 
@@ -69,6 +70,23 @@ def plain(text: str) -> str:
     return rstrip_lines(strip_tags(unescape(text)))
 
 
+def ascii_fold(text: str) -> str:
+    """
+    Fold text to ASCII by decomposing accents and dropping non-ASCII characters.
+
+    Lets the comparator tell a charset/encoding difference (e.g. CCExtractor's
+    ``-latin1`` output) from a real word change: 'Voilà' and 'Voila' share an
+    ASCII skeleton, so only their non-ASCII characters differ.
+
+    :param text: Raw cue text.
+    :type text: str
+    :return: The ASCII skeleton of the text.
+    :rtype: str
+    """
+    decomposed = unicodedata.normalize('NFKD', text)
+    return ''.join(ch for ch in decomposed if ord(ch) < 128)
+
+
 def classify_text_pair(expected: str, actual: str) -> str:
     """
     Classify how two cue texts differ, ignoring progressively more cosmetics.
@@ -78,13 +96,19 @@ def classify_text_pair(expected: str, actual: str) -> str:
     :param actual: Actual cue text.
     :type actual: str
     :return: ``match`` (identical), ``whitespace`` (only trailing padding differs),
-        ``formatting`` (only tags/entities differ), or ``text`` (a real change).
+        ``formatting`` (only tags/entities differ), ``encoding`` (only non-ASCII
+        characters differ), or ``text`` (a real change).
     :rtype: str
     """
     if expected == actual:
         return 'match'
     if rstrip_lines(expected) == rstrip_lines(actual):
         return 'whitespace'
-    if plain(expected) == plain(actual):
+    expected_plain = plain(expected)
+    actual_plain = plain(actual)
+    if expected_plain == actual_plain:
         return 'formatting'
+    has_non_ascii = any(ord(ch) > 127 for ch in expected_plain + actual_plain)
+    if has_non_ascii and ascii_fold(expected_plain) == ascii_fold(actual_plain):
+        return 'encoding'
     return 'text'
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index 00de1746..ef3e4e27 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -111,6 +111,13 @@ def test_merged_cues_same_text_fewer_cues(self):
         result = smart_diff(_srt(two), _srt(one))
         self.assertEqual(result["kind"], "merged_cues")
 
+    def test_encoding_change_non_ascii_only(self):
+        """A charset difference (accents only, e.g. -latin1) is flagged as encoding."""
+        accented = [(1000, 4000, "Voilà"), (5000, 8000, "naïve café")]
+        folded = [(1000, 4000, "Voila"), (5000, 8000, "naive cafe")]
+        result = smart_diff(_srt(accented), _srt(folded))
+        self.assertEqual(result["kind"], "encoding_change")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_smartdiff/test_normalize.py b/tests/test_smartdiff/test_normalize.py
index 6d7183d2..9c107ccc 100644
--- a/tests/test_smartdiff/test_normalize.py
+++ b/tests/test_smartdiff/test_normalize.py
@@ -2,8 +2,8 @@
 
 import unittest
 
-from mod_test.smartdiff.normalize import (classify_text_pair, plain,
-                                          strip_tags, unescape)
+from mod_test.smartdiff.normalize import (ascii_fold, classify_text_pair,
+                                          plain, strip_tags, unescape)
 
 
 class NormalizeTests(unittest.TestCase):
@@ -33,6 +33,14 @@ def test_classify_formatting_only(self):
         """A tags-only difference classifies as formatting."""
         self.assertEqual(classify_text_pair('hello', '<i>hello</i>'), 'formatting')
 
+    def test_ascii_fold_decomposes_accents(self):
+        """ascii_fold strips accents and drops non-ASCII characters."""
+        self.assertEqual(ascii_fold('Voilà café ♪'), 'Voila cafe ')
+
+    def test_classify_encoding_only(self):
+        """A non-ASCII/accent-only difference classifies as encoding."""
+        self.assertEqual(classify_text_pair('PRÉCIS', 'PRECIS'), 'encoding')
+
     def test_classify_real_text_change(self):
         """A genuine text change classifies as text."""
         self.assertEqual(classify_text_pair('hello', 'goodbye'), 'text')

From 27ab78bf54d027c02f6b282b494ec2bbe7bdf67e Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Tue, 30 Jun 2026 00:05:10 +0530
Subject: [PATCH 06/10] test(smartdiff): real DVB Spanish golden fixture +
 strict & robustness tests

- Vendor dvb_spanish_real.srt: a genuine CCExtractor DVB Spanish output with
  <font> colour tags and accented text. Security-scanned before vendoring
  (no paths/IPs/emails/URLs/secrets) and verified valid UTF-8.
- Strict fixture tests assert exact kinds and values on real output:
  identical, timing_shift (offset 500), formatting_change (font tags),
  encoding_change (accent folding), missing_cues.
- Robustness tests: malformed/empty/control-byte/garbage input must classify
  cleanly and never raise.

Note: the available "Chinese" DVB samples were failed OCR (no real CJK, and
invalid UTF-8), so they were deliberately not vendored.
---
 .../fixtures/dvb_spanish_real.srt             |  59 ++++++++
 tests/test_smartdiff/test_fixtures.py         | 133 ++++++++++++++----
 2 files changed, 163 insertions(+), 29 deletions(-)
 create mode 100644 tests/test_smartdiff/fixtures/dvb_spanish_real.srt

diff --git a/tests/test_smartdiff/fixtures/dvb_spanish_real.srt b/tests/test_smartdiff/fixtures/dvb_spanish_real.srt
new file mode 100644
index 00000000..76a633c4
--- /dev/null
+++ b/tests/test_smartdiff/fixtures/dvb_spanish_real.srt
@@ -0,0 +1,59 @@
+1
+00:00:00,480 --> 00:01:05,479
+<font color="#ffffff">Para continuar con este debate,</font>
+
+2
+00:00:06,080 --> 00:01:11,079
+<font color="#ffffff">gusted cree que si los partidarios</font>
+<font color="#ffffff">de Errejon fuesen derrotados</font>
+
+3
+00:00:09,880 --> 00:01:14,879
+<font color="#ffffff">su propuesta en) Vistalegre,</font>
+
+4
+00:00:12,920 --> 00:01:17,919
+<font color="#ffffff">Podemos deberia cambiar de portavoz</font>
+<font color="#ffffff">parlamentario?</font>
+
+5
+00:00:19,080 --> 00:01:24,079
+<font color="#ffffff">éPuede representar al partido</font>
+
+6
+00:00:21,400 --> 00:01:26,399
+<font color="#ffffff">en el Congreso alguienque'se ha</font>
+<font color="#ffffff">quedado en minoria</font>
+
+7
+00:00:24,200 --> 00:01:29,199
+<font color="#ffffff">dentro del partido?</font>
+
+8
+00:00:30,640 --> 00:01:35,639
+<font color="#ffffff">-Deciden los organos del partido la</font>
+<font color="#ffffff">linea de accion politica</font>
+
+9
+00:00:34,200 --> 00:01:39,199
+<font color="#ffffff">dentro del partido.</font>
+
+10
+00:00:43,120 --> 00:01:48,119
+<font color="#ffffff">Debemos acatar las decisiones</font>
+<font color="#ffffff">colectivas.</font>
+
+11
+00:00:48,600 --> 00:01:53,599
+<font color="#ffffff">Si inicio Errejon reconoce que'se</font>
+<font color="#ffffff">ven esas lineas,</font>
+
+12
+00:00:51,760 --> 00:01:56,759
+<font color="#ffffff">debe seguir adelante.</font>
+
+13
+00:00:53,240 --> 00:01:58,239
+<font color="#ffffff">Solo.es canalizarla voz della</font>
+<font color="#ffffff">decision politica del partido.</font>
+
diff --git a/tests/test_smartdiff/test_fixtures.py b/tests/test_smartdiff/test_fixtures.py
index cac9c479..997f08dc 100644
--- a/tests/test_smartdiff/test_fixtures.py
+++ b/tests/test_smartdiff/test_fixtures.py
@@ -1,57 +1,132 @@
-"""Golden-fixture tests against a real CCExtractor CEA-608 sample output."""
+"""Golden-fixture tests against real CCExtractor output, plus input robustness.
+
+The fixtures are genuine CCExtractor outputs (not synthetic strings):
+- ``cea608_real.srt``: a CEA-608 broadcast caption sample (trailing padding).
+- ``dvb_spanish_real.srt``: a DVB Spanish sample with ``<font>`` colour tags and
+  accented characters. Both were security-scanned before vendoring (no paths,
+  IPs, emails, URLs, or secrets) and are valid UTF-8.
+"""
 
 import os
 import unittest
 
 from mod_test.smartdiff.compare import smart_diff
-from mod_test.smartdiff.srt import parse_srt
+from mod_test.smartdiff.normalize import ascii_fold, strip_tags
+from mod_test.smartdiff.srt import Cue, parse_srt
 
-_FIXTURE = os.path.join(os.path.dirname(__file__), 'fixtures', 'cea608_real.srt')
+_FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')
 
 
-def _load():
+def _load(name):
     """
-    Read the vendored real CCExtractor sample.
+    Read a vendored fixture as UTF-8.
 
-    :return: The raw .srt content.
+    :param name: Fixture file name.
+    :type name: str
+    :return: The file content.
     :rtype: str
     """
-    with open(_FIXTURE, encoding='utf-8') as handle:
+    with open(os.path.join(_FIXTURES, name), encoding='utf-8') as handle:
         return handle.read()
 
 
-class RealSampleTests(unittest.TestCase):
-    """Exercise the smart diff on genuine CCExtractor output, not synthetic strings."""
+def _emit(cues):
+    """
+    Serialise cues back to SubRip text (for building timing-shifted variants).
+
+    :param cues: The cues to serialise.
+    :type cues: list
+    :return: SubRip-formatted text.
+    :rtype: str
+    """
+    def stamp(ms):
+        hours, ms = divmod(ms, 3600000)
+        minutes, ms = divmod(ms, 60000)
+        seconds, ms = divmod(ms, 1000)
+        return f"{hours:02d}:{minutes:02d}:{seconds:02d},{ms:03d}"
+
+    return "\n".join(f"{i}\n{stamp(c.start_ms)} --> {stamp(c.end_ms)}\n{c.text}\n"
+                     for i, c in enumerate(cues, 1))
+
+
+class Cea608RealTests(unittest.TestCase):
+    """Smart diff on a genuine CEA-608 broadcast caption sample."""
 
     def test_parses_real_sample(self):
         """The real sample parses into its two CEA-608 cues."""
-        cues = parse_srt(_load())
+        cues = parse_srt(_load('cea608_real.srt'))
         self.assertEqual(len(cues), 2)
         self.assertEqual(cues[0].start_ms, 5956)
 
     def test_identical_against_itself(self):
         """The real sample compared with itself is identical."""
-        raw = _load()
-        self.assertEqual(smart_diff(raw, raw)["kind"], "identical")
-
-    def test_constant_shift_on_real_sample(self):
-        """Shifting every timestamp by a constant is detected as timing_shift."""
-        raw = _load()
-        shifted = (raw
-                   .replace('00:00:05,956', '00:00:06,206')
-                   .replace('00:00:07,955', '00:00:08,205')
-                   .replace('00:00:13,913', '00:00:14,163')
-                   .replace('00:00:15,080', '00:00:15,330'))
-        result = smart_diff(raw, shifted)
-        self.assertEqual(result["kind"], "timing_shift")
-        self.assertEqual(result["offset_ms"], 250)
-
-    def test_depadding_is_cosmetic_on_real_sample(self):
+        raw = _load('cea608_real.srt')
+        self.assertEqual(smart_diff(raw, raw)['kind'], 'identical')
+
+    def test_depadding_is_cosmetic(self):
         """Stripping the CEA-608 trailing padding is flagged as cosmetic only."""
-        raw = _load()
+        raw = _load('cea608_real.srt')
         depadded = '\n'.join(line.rstrip() for line in raw.split('\n'))
-        result = smart_diff(raw, depadded)
-        self.assertIn(result["kind"], ("identical", "whitespace_change"))
+        self.assertIn(smart_diff(raw, depadded)['kind'],
+                      ('identical', 'whitespace_change'))
+
+
+class DvbSpanishRealTests(unittest.TestCase):
+    """Smart diff on a real DVB Spanish output (font colour tags + accents)."""
+
+    def test_parses_with_tags_and_accents(self):
+        """The fixture has 13 cues carrying both font tags and non-ASCII text."""
+        cues = parse_srt(_load('dvb_spanish_real.srt'))
+        self.assertEqual(len(cues), 13)
+        self.assertTrue(any('<font' in c.text for c in cues))
+        self.assertTrue(any(ord(ch) > 127 for c in cues for ch in c.text))
+
+    def test_identical(self):
+        """The fixture compared with itself is identical."""
+        raw = _load('dvb_spanish_real.srt')
+        self.assertEqual(smart_diff(raw, raw)['kind'], 'identical')
+
+    def test_constant_timing_shift(self):
+        """Shifting every cue by +500 ms is detected with the exact offset."""
+        cues = parse_srt(_load('dvb_spanish_real.srt'))
+        shifted = [Cue(c.index, c.start_ms + 500, c.end_ms + 500, c.text) for c in cues]
+        result = smart_diff(_emit(cues), _emit(shifted))
+        self.assertEqual(result['kind'], 'timing_shift')
+        self.assertEqual(result['offset_ms'], 500)
+
+    def test_font_tags_are_formatting_only(self):
+        """Removing the <font> colour tags is classified as formatting, not text."""
+        raw = _load('dvb_spanish_real.srt')
+        self.assertEqual(smart_diff(raw, strip_tags(raw))['kind'], 'formatting_change')
+
+    def test_accent_folding_is_encoding(self):
+        """Folding the accented characters is classified as an encoding difference."""
+        raw = _load('dvb_spanish_real.srt')
+        self.assertEqual(smart_diff(raw, ascii_fold(raw))['kind'], 'encoding_change')
+
+    def test_dropped_cues_are_missing(self):
+        """Dropping the last three cues is reported as missing_cues."""
+        cues = parse_srt(_load('dvb_spanish_real.srt'))
+        result = smart_diff(_emit(cues), _emit(cues[:-3]))
+        self.assertEqual(result['kind'], 'missing_cues')
+
+
+class RobustnessTests(unittest.TestCase):
+    """Malformed or hostile input must classify cleanly, never crash."""
+
+    def test_parser_survives_garbage(self):
+        """The parser returns a list for empty, junk, and control-byte input."""
+        for junk in ['', 'not a subtitle', '\x00\x01\x02', '1\nno timing line\n']:
+            self.assertIsInstance(parse_srt(junk), list)
+
+    def test_smart_diff_on_empty_inputs(self):
+        """Two empty inputs are identical, not an error."""
+        self.assertEqual(smart_diff('', '')['kind'], 'identical')
+
+    def test_smart_diff_garbage_vs_real(self):
+        """Garbage against a real sample classifies without raising."""
+        result = smart_diff('garbage with no cues', _load('dvb_spanish_real.srt'))
+        self.assertIn('kind', result)
 
 
 if __name__ == "__main__":

From 57b0a253d6c8f889092a8b940511e99937e7caf1 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Tue, 30 Jun 2026 00:35:48 +0530
Subject: [PATCH 07/10] feat(smartdiff): expose smart diff via endpoint + a
 "Smart" option in the UI

- TestResultFile.generate_smart_diff(): reads the expected/actual output files
  (reusing the encoding-tolerant read_lines) and returns a semantic
  classification via smart_diff.
- New JSON endpoint GET /diff/<test>/<regression>/<output>/smart, reusable by
  the web UI, the CLI, and agents. Returns 'unavailable' gracefully if the
  output files are not on disk.
- Result page: a "Smart" link next to each "Fail" diff link opens a small
  popup with the difference kind + summary (additive, opt-in).

Includes a unit test of the model glue against real on-disk files.
---
 mod_test/controllers.py                       | 38 +++++++++++
 mod_test/models.py                            | 27 ++++++++
 templates/test/by_id.html                     | 30 ++++++++-
 .../test_smartdiff/test_model_integration.py  | 63 +++++++++++++++++++
 4 files changed, 156 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_smartdiff/test_model_integration.py

diff --git a/mod_test/controllers.py b/mod_test/controllers.py
index 4c2477b8..19bff476 100644
--- a/mod_test/controllers.py
+++ b/mod_test/controllers.py
@@ -375,6 +375,44 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view
     abort(404)
 
 
+@mod_test.route('/diff/<test_id>/<regression_test_id>/<output_id>/smart')
+def smart_diff_view(test_id: int, regression_test_id: int, output_id: int):
+    """
+    Return a semantic (smart) diff classification for an output as JSON.
+
+    Unlike the line diff, this reports *how* the output differs (timing shift,
+    cosmetic padding/formatting/encoding, text change, missing/extra cues), so a
+    person or an agent gets an actionable answer instead of a wall of lines.
+
+    :param test_id: id of the test
+    :type test_id: int
+    :param regression_test_id: id of the regression test
+    :type regression_test_id: int
+    :param output_id: id of the generated output
+    :type output_id: int
+    :return: JSON classification of the difference.
+    :rtype: flask.Response
+    """
+    from run import config
+
+    result = TestResultFile.query.filter(and_(
+        TestResultFile.test_id == test_id,
+        TestResultFile.regression_test_id == regression_test_id,
+        TestResultFile.regression_test_output_id == output_id
+    )).first()
+
+    if result is None:
+        abort(404)
+
+    path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'TestResults')
+    try:
+        classification = result.generate_smart_diff(path)
+    except OSError:
+        classification = {'kind': 'unavailable',
+                          'summary': 'Output files are not available locally.'}
+    return jsonify(classification)
+
+
 @mod_test.route('/log-files/<test_id>')
 @login_required
 def download_build_log_file(test_id):
diff --git a/mod_test/models.py b/mod_test/models.py
index 1463a0f3..b6ad6385 100644
--- a/mod_test/models.py
+++ b/mod_test/models.py
@@ -455,3 +455,30 @@ def read_lines(file_name: str) -> List[str]:
             return open(file_name, encoding='utf8').readlines()
         except UnicodeDecodeError:
             return open(file_name, encoding='cp1252').readlines()
+
+    def generate_smart_diff(self, base_path: str) -> dict:
+        """
+        Classify *how* the actual output differs from the expected baseline.
+
+        Unlike the line diff, this returns a semantic classification (timing
+        shift, cosmetic padding/formatting/encoding, text change, missing/extra
+        cues) that a person or an agent can act on directly.
+
+        :param base_path: The base path for the files location.
+        :type base_path: str
+        :return: A smart-diff classification with ``kind`` and ``summary`` keys.
+        :rtype: dict
+        """
+        from mod_test.smartdiff.compare import smart_diff
+
+        if not self.got:
+            return {'kind': 'identical',
+                    'summary': 'Output matches the expected baseline.'}
+
+        extension = self.regression_test_output.correct_extension
+        file_ok = os.path.join(base_path, self.expected + extension)
+        file_fail = os.path.join(base_path, self.got + extension)
+        expected_text = ''.join(self.read_lines(file_ok))
+        actual_text = ''.join(self.read_lines(file_fail))
+        return smart_diff(expected_text, actual_text,
+                          fmt=extension.lstrip('.').lower() or None)
diff --git a/templates/test/by_id.html b/templates/test/by_id.html
index a54df860..1228d4ce 100644
--- a/templates/test/by_id.html
+++ b/templates/test/by_id.html
@@ -149,14 +149,14 @@ <h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-catego
                                                             {% if file.got is none or no_error.found-%}
                                                                 Fail
                                                             {% else %}
-                                                                <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a>
+                                                                <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a> · <a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
                                                             {%- endif %}
                                                         {% elif file.got is none or no_error.found or test.result.exit_code != 0 -%}
                                                             Pass
                                                         {% elif file.got == "error" %}
                                                             No output generated but there should be
                                                         {% else %}
-                                                            <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a>
+                                                            <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a> · <a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
                                                         {%- endif %}
                                                         {% if not loop.last %}<br />{% endif %}
                                                     {% else %}
@@ -298,6 +298,32 @@ <h6>There are no tests executed in this category.</h6>
                     popup.open();
                 });
             });
+            $('.smart_diff_link').on('click', function(){
+                // Fetch the semantic (smart) diff classification and show a summary.
+                var url = '{{ url_for('test.smart_diff_view', test_id='_0_', regression_test_id='_1_', output_id='_2_') }}';
+                url = url.replace('_0_', $(this).data('test')).replace('_1_', $(this).data('regression')).replace('_2_', $(this).data('output'));
+
+                $.getJSON(url).done(function(resp){
+                    var id, reveal, popup;
+
+                    reveal = document.createElement('div');
+                    id = 'smart-diff-popup-'+(new Date()).getTime();
+                    reveal.setAttribute('id', id);
+                    reveal.setAttribute('class', 'reveal');
+                    reveal.setAttribute('data-reveal', '');
+                    reveal.innerHTML =
+                        '<h4>Smart diff</h4>' +
+                        '<p><span class="label">' + (resp.kind || 'unknown') + '</span></p>' +
+                        '<p>' + (resp.summary || '') + '</p>';
+                    reveal.innerHTML +=
+                        '<button class="close-button" data-close aria-label="Close" type="button">' +
+                        '   <span aria-hidden="true">&times;</span>' +
+                        '</button>';
+                    document.body.appendChild(reveal);
+                    popup = new Foundation.Reveal($('#'+id));
+                    popup.open();
+                });
+            });
         });
     </script>
 {% endblock %}
diff --git a/tests/test_smartdiff/test_model_integration.py b/tests/test_smartdiff/test_model_integration.py
new file mode 100644
index 00000000..0b8c715e
--- /dev/null
+++ b/tests/test_smartdiff/test_model_integration.py
@@ -0,0 +1,63 @@
+"""Tests for TestResultFile.generate_smart_diff (the model glue) against real files.
+
+The method is exercised with a lightweight stand-in ``self`` so the test stays a
+fast unit test (no database/ORM mapper configuration required).
+"""
+
+import os
+import tempfile
+import unittest
+from unittest import mock
+
+from mod_test.models import TestResultFile
+
+_CUE = "1\n00:00:01,000 --> 00:00:04,000\nHello world\n"
+
+
+def _run(expected_text, got_text, ext='.srt', got='GOT'):
+    """
+    Write two outputs to a temp dir and run generate_smart_diff over them.
+
+    :param expected_text: Expected output content.
+    :type expected_text: str
+    :param got_text: Actual output content.
+    :type got_text: str
+    :param ext: Output file extension.
+    :type ext: str
+    :param got: The 'got' hash (set to None to simulate no produced output).
+    :type got: str
+    :return: The smart-diff classification.
+    :rtype: dict
+    """
+    base = tempfile.mkdtemp()
+    with open(os.path.join(base, 'EXP' + ext), 'w', encoding='utf-8') as handle:
+        handle.write(expected_text)
+    with open(os.path.join(base, 'GOT' + ext), 'w', encoding='utf-8') as handle:
+        handle.write(got_text)
+    stub = mock.Mock()
+    stub.expected = 'EXP'
+    stub.got = got
+    stub.regression_test_output.correct_extension = ext
+    stub.read_lines = TestResultFile.read_lines
+    return TestResultFile.generate_smart_diff(stub, base)
+
+
+class GenerateSmartDiffTests(unittest.TestCase):
+    """The model method reads the on-disk outputs and classifies the difference."""
+
+    def test_identical(self):
+        """Equal on-disk outputs classify as identical."""
+        self.assertEqual(_run(_CUE, _CUE)['kind'], 'identical')
+
+    def test_timing_shift(self):
+        """A shifted output is classified as a timing shift."""
+        shifted = "1\n00:00:01,500 --> 00:00:04,500\nHello world\n"
+        self.assertEqual(_run(_CUE, shifted)['kind'], 'timing_shift')
+
+    def test_missing_got_is_identical(self):
+        """A null 'got' (no produced output) short-circuits to identical."""
+        self.assertEqual(_run(_CUE, _CUE, got=None)['kind'], 'identical')
+
+
+if __name__ == "__main__":
+    unittest.main()

From 1b1e75e231f57121cb0ec559137a8439a36fd859 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Tue, 30 Jun 2026 01:22:41 +0530
Subject: [PATCH 08/10] feat(smartdiff): per-cue change detail (which cues,
 expected/actual, offset)

smart_diff now returns a capped 'changes' list alongside the verdict: each
changed cue with its kind, a per-cue timing offset, and (for text changes)
expected/actual snippets. This gives an agent the structured detail to act on
without scraping the raw HTML diff. The web "Smart" popup lists these changes
(HTML-escaped). Result shape stays backward compatible (additive).
---
 mod_test/smartdiff/compare.py        | 83 +++++++++++++++++++++-------
 templates/test/by_id.html            | 10 ++++
 tests/test_smartdiff/test_compare.py | 20 +++++++
 3 files changed, 92 insertions(+), 21 deletions(-)

diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index db0b1366..4fc79f9f 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -6,6 +6,9 @@
 from mod_test.smartdiff.parsing import parse_subtitles
 from mod_test.smartdiff.srt import Cue
 
+#: Cap on the number of per-cue change entries returned in a result.
+_MAX_CHANGES = 25
+
 
 def _result(kind: str, summary: str, n_exp: int, n_act: int,
             offset_ms: Optional[int] = None) -> Dict[str, object]:
@@ -63,6 +66,21 @@ def _monotonic(values: List[int]) -> bool:
     return non_decreasing or non_increasing
 
 
+def _snippet(text: str, limit: int = 80) -> str:
+    """
+    Collapse whitespace and truncate cue text for compact change details.
+
+    :param text: Raw cue text.
+    :type text: str
+    :param limit: Maximum characters to keep.
+    :type limit: int
+    :return: A single-line, length-capped snippet.
+    :rtype: str
+    """
+    flat = ' '.join(text.split())
+    return flat if len(flat) <= limit else flat[:limit] + '…'
+
+
 def smart_diff(expected: str, actual: str,
                fmt: Optional[str] = None) -> Dict[str, object]:
     """
@@ -98,20 +116,33 @@ def smart_diff(expected: str, actual: str,
     encoding_changes = 0
     raw_matches = True
     timing_deltas: List[int] = []
-    for expected_cue, actual_cue in zip(exp, act):
+    changes: List[Dict[str, object]] = []
+    for position, (expected_cue, actual_cue) in enumerate(zip(exp, act), start=1):
         category = classify_text_pair(expected_cue.text, actual_cue.text)
+        delta = actual_cue.start_ms - expected_cue.start_ms
         if category != 'match':
             raw_matches = False
         if category == 'text':
             text_changes += 1
-            continue
-        if category == 'formatting':
-            formatting_changes += 1
-        elif category == 'whitespace':
-            whitespace_changes += 1
-        elif category == 'encoding':
-            encoding_changes += 1
-        timing_deltas.append(actual_cue.start_ms - expected_cue.start_ms)
+        else:
+            if category == 'formatting':
+                formatting_changes += 1
+            elif category == 'whitespace':
+                whitespace_changes += 1
+            elif category == 'encoding':
+                encoding_changes += 1
+            timing_deltas.append(delta)
+        if category != 'match' or delta != 0:
+            entry: Dict[str, object] = {
+                'cue': position,
+                'kind': category if category != 'match' else 'timing',
+            }
+            if category == 'text':
+                entry['expected'] = _snippet(expected_cue.text)
+                entry['actual'] = _snippet(actual_cue.text)
+            if delta != 0:
+                entry['offset_ms'] = delta
+            changes.append(entry)
 
     no_timing_move = all(delta == 0 for delta in timing_deltas)
     uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
@@ -120,20 +151,30 @@ def smart_diff(expected: str, actual: str,
     cosmetic_changes = formatting_changes + whitespace_changes + encoding_changes
     fully_aligned = text_changes == 0 and cosmetic_changes == 0
 
+    def _finish(kind: str, summary: str, exp_count: int, act_count: int,
+                offset_ms: Optional[int] = None) -> Dict[str, object]:
+        """Attach the (capped) per-cue change list to a classification result."""
+        out = _result(kind, summary, exp_count, act_count, offset_ms)
+        if changes:
+            out['changes'] = changes[:_MAX_CHANGES]
+            if len(changes) > _MAX_CHANGES:
+                out['changes_truncated'] = True
+        return out
+
     if not count_mismatch and raw_matches and no_timing_move:
-        return _result('identical', 'Outputs are identical.', n_exp, n_act)
+        return _finish('identical', 'Outputs are identical.', n_exp, n_act)
 
     if not count_mismatch and fully_aligned and uniform_shift and timing_deltas[0] != 0:
         offset = timing_deltas[0]
         direction = 'late' if offset > 0 else 'early'
-        return _result(
+        return _finish(
             'timing_shift',
             f'All {n_exp} cues match but are {abs(offset)} ms {direction}.',
             n_exp, n_act, offset_ms=offset)
 
     if not count_mismatch and fully_aligned and drifting:
         first, last = timing_deltas[0], timing_deltas[-1]
-        return _result(
+        return _finish(
             'timing_drift',
             f'Timing drifts from {first:+d} ms to {last:+d} ms across {n_exp} cues.',
             n_exp, n_act)
@@ -141,51 +182,51 @@ def smart_diff(expected: str, actual: str,
     if count_mismatch:
         if _content(exp) and _content(exp) == _content(act):
             if n_act > n_exp:
-                return _result(
+                return _finish(
                     'split_cues',
                     f'Same text, but cues were split: expected {n_exp}, got {n_act}.',
                     n_exp, n_act)
-            return _result(
+            return _finish(
                 'merged_cues',
                 f'Same text, but cues were merged: expected {n_exp}, got {n_act}.',
                 n_exp, n_act)
         if text_changes == 0:
             if n_act < n_exp:
-                return _result(
+                return _finish(
                     'missing_cues',
                     f'{n_exp - n_act} of {n_exp} cues are missing from the output.',
                     n_exp, n_act)
-            return _result(
+            return _finish(
                 'extra_cues',
                 f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
                 n_exp, n_act)
 
     if not count_mismatch and no_timing_move:
         if text_changes > 0:
-            return _result(
+            return _finish(
                 'text_change',
                 f'{text_changes} of {n_exp} cues differ in text (timing aligned).',
                 n_exp, n_act)
         if encoding_changes > 0 and formatting_changes == 0 and whitespace_changes == 0:
-            return _result(
+            return _finish(
                 'encoding_change',
                 f'{encoding_changes} of {n_exp} cues differ only in character '
                 f'encoding (non-ASCII/accented characters).',
                 n_exp, n_act)
         if formatting_changes > 0 and whitespace_changes == 0 and encoding_changes == 0:
-            return _result(
+            return _finish(
                 'formatting_change',
                 f'{formatting_changes} of {n_exp} cues differ only in formatting '
                 f'(tags/entities), not text.',
                 n_exp, n_act)
         if whitespace_changes > 0 and formatting_changes == 0 and encoding_changes == 0:
-            return _result(
+            return _finish(
                 'whitespace_change',
                 f'{whitespace_changes} of {n_exp} cues differ only in trailing '
                 f'whitespace/padding.',
                 n_exp, n_act)
 
-    return _result(
+    return _finish(
         'mixed',
         f'Mixed differences across {min(n_exp, n_act)} compared cues; '
         f'expected {n_exp}, got {n_act}.',
diff --git a/templates/test/by_id.html b/templates/test/by_id.html
index 1228d4ce..c9376231 100644
--- a/templates/test/by_id.html
+++ b/templates/test/by_id.html
@@ -315,6 +315,16 @@ <h6>There are no tests executed in this category.</h6>
                         '<h4>Smart diff</h4>' +
                         '<p><span class="label">' + (resp.kind || 'unknown') + '</span></p>' +
                         '<p>' + (resp.summary || '') + '</p>';
+                    if (resp.changes && resp.changes.length) {
+                        var items = resp.changes.map(function(c){
+                            var d = 'Cue ' + c.cue + ': ' + c.kind;
+                            if (c.offset_ms !== undefined) { d += ' (' + (c.offset_ms > 0 ? '+' : '') + c.offset_ms + ' ms)'; }
+                            if (c.expected !== undefined) { d += ' — expected “' + c.expected + '”, got “' + c.actual + '”'; }
+                            return '<li>' + $('<div>').text(d).html() + '</li>';
+                        }).join('');
+                        reveal.innerHTML += '<ul class="smart-changes">' + items + '</ul>';
+                        if (resp.changes_truncated) { reveal.innerHTML += '<p><em>… more changes not shown.</em></p>'; }
+                    }
                     reveal.innerHTML +=
                         '<button class="close-button" data-close aria-label="Close" type="button">' +
                         '   <span aria-hidden="true">&times;</span>' +
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index ef3e4e27..b9a25fff 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -111,6 +111,26 @@ def test_merged_cues_same_text_fewer_cues(self):
         result = smart_diff(_srt(two), _srt(one))
         self.assertEqual(result["kind"], "merged_cues")
 
+    def test_changes_list_text_detail(self):
+        """A text change lists which cue changed, with expected/actual snippets."""
+        changed = [(1000, 4000, "Hello world"), (5000, 8000, "DIFFERENT")]
+        result = smart_diff(_srt(_BASE), _srt(changed))
+        changes = result["changes"]
+        self.assertEqual(len(changes), 1)
+        self.assertEqual(changes[0]["cue"], 2)
+        self.assertEqual(changes[0]["kind"], "text")
+        self.assertEqual(changes[0]["actual"], "DIFFERENT")
+
+    def test_changes_list_timing_offsets(self):
+        """A timing shift lists a per-cue offset for each cue."""
+        shifted = [(s + 500, e + 500, t) for s, e, t in _BASE]
+        result = smart_diff(_srt(_BASE), _srt(shifted))
+        self.assertTrue(all(c["offset_ms"] == 500 for c in result["changes"]))
+
+    def test_identical_has_no_changes(self):
+        """An identical result carries no changes list."""
+        self.assertNotIn("changes", smart_diff(_srt(_BASE), _srt(_BASE)))
+
     def test_encoding_change_non_ascii_only(self):
         """A charset difference (accents only, e.g. -latin1) is flagged as encoding."""
         accented = [(1000, 4000, "Voilà"), (5000, 8000, "naïve café")]

From 8c1f7319b6ed4c118b243ddd089528e3077d9664 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Wed, 1 Jul 2026 00:43:22 +0530
Subject: [PATCH 09/10] style(smartdiff): stack Fail / Smart vertically with a
 divider

In the result cell, place the Smart link below the Fail link separated by a
thin horizontal rule, instead of inline, so the two diff actions read clearly.
---
 templates/test/by_id.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/templates/test/by_id.html b/templates/test/by_id.html
index c9376231..49b8d9e7 100644
--- a/templates/test/by_id.html
+++ b/templates/test/by_id.html
@@ -149,14 +149,14 @@ <h4 class="category-header {{ 'fail' if result.error else 'pass' }}" data-catego
                                                             {% if file.got is none or no_error.found-%}
                                                                 Fail
                                                             {% else %}
-                                                                <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a> · <a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
+                                                                <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a><hr style="margin:5px 0;border:0;border-top:1px solid #777;width:44px" /><a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
                                                             {%- endif %}
                                                         {% elif file.got is none or no_error.found or test.result.exit_code != 0 -%}
                                                             Pass
                                                         {% elif file.got == "error" %}
                                                             No output generated but there should be
                                                         {% else %}
-                                                            <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a> · <a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
+                                                            <a href="#" class="diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Fail</a><hr style="margin:5px 0;border:0;border-top:1px solid #777;width:44px" /><a href="#" class="smart_diff_link" data-test="{{ file.test_id }}" data-regression="{{ file.regression_test_id }}" data-output="{{ file.regression_test_output_id }}">Smart</a>
                                                         {%- endif %}
                                                         {% if not loop.last %}<br />{% endif %}
                                                     {% else %}

From 0c7e5e820774c56fd0bdfcbc27d389fcd0344266 Mon Sep 17 00:00:00 2001
From: GAURAV KARMAKAR <gaurav.k@graeon.ai>
Date: Wed, 1 Jul 2026 01:04:46 +0530
Subject: [PATCH 10/10] fix(smartdiff): address code-review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- compare: zero-cue outputs (non-srt/vtt formats) no longer report 'identical'
  when they differ — return a new 'unsupported' kind so a real failure isn't
  masked; equal cue-less outputs still report 'identical'.
- normalize: require a non-empty ASCII skeleton for 'encoding', so two
  different non-Latin texts (CJK/Cyrillic) classify as 'text', not 'encoding'.
- controllers: smart_diff_view also catches UnicodeDecodeError (outputs with
  bytes invalid in both utf-8 and cp1252) and returns 'unavailable' instead of
  a 500.
- compare: drop the misleading "(timing aligned)" from the text_change summary
  (per-cue offsets are still in `changes`); compute _content(exp) once.
---
 mod_test/controllers.py                |  4 ++--
 mod_test/smartdiff/compare.py          | 15 ++++++++++++---
 mod_test/smartdiff/normalize.py        |  5 +++--
 tests/test_smartdiff/test_compare.py   | 18 ++++++++++++++++++
 tests/test_smartdiff/test_normalize.py |  5 +++++
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/mod_test/controllers.py b/mod_test/controllers.py
index 19bff476..e6ab08d5 100644
--- a/mod_test/controllers.py
+++ b/mod_test/controllers.py
@@ -407,9 +407,9 @@ def smart_diff_view(test_id: int, regression_test_id: int, output_id: int):
     path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'TestResults')
     try:
         classification = result.generate_smart_diff(path)
-    except OSError:
+    except (OSError, UnicodeDecodeError):
         classification = {'kind': 'unavailable',
-                          'summary': 'Output files are not available locally.'}
+                          'summary': 'Output files are not available or not readable.'}
     return jsonify(classification)
 
 
diff --git a/mod_test/smartdiff/compare.py b/mod_test/smartdiff/compare.py
index 4fc79f9f..91e3d2b0 100644
--- a/mod_test/smartdiff/compare.py
+++ b/mod_test/smartdiff/compare.py
@@ -93,7 +93,8 @@ def smart_diff(expected: str, actual: str,
     ``text_change``, ``formatting_change`` (tags/entities only),
     ``whitespace_change`` (CEA-608 padding only), ``encoding_change``
     (non-ASCII/accented characters only), ``split_cues``, ``merged_cues``,
-    ``missing_cues``, ``extra_cues``, or ``mixed``.
+    ``missing_cues``, ``extra_cues``, ``unsupported`` (no cues parsed), or
+    ``mixed``.
 
     :param expected: The expected/baseline subtitle content.
     :type expected: str
@@ -108,6 +109,13 @@ def smart_diff(expected: str, actual: str,
     exp = parse_subtitles(expected, fmt)
     act = parse_subtitles(actual, fmt)
     n_exp, n_act = len(exp), len(act)
+    if n_exp == 0 and n_act == 0:
+        if expected == actual:
+            return _result('identical', 'Outputs are identical.', 0, 0)
+        return _result(
+            'unsupported',
+            'No subtitle cues to compare (unsupported format); see the raw diff.',
+            0, 0)
     count_mismatch = n_exp != n_act
 
     text_changes = 0
@@ -180,7 +188,8 @@ def _finish(kind: str, summary: str, exp_count: int, act_count: int,
             n_exp, n_act)
 
     if count_mismatch:
-        if _content(exp) and _content(exp) == _content(act):
+        exp_content = _content(exp)
+        if exp_content and exp_content == _content(act):
             if n_act > n_exp:
                 return _finish(
                     'split_cues',
@@ -205,7 +214,7 @@ def _finish(kind: str, summary: str, exp_count: int, act_count: int,
         if text_changes > 0:
             return _finish(
                 'text_change',
-                f'{text_changes} of {n_exp} cues differ in text (timing aligned).',
+                f'{text_changes} of {n_exp} cues differ in text.',
                 n_exp, n_act)
         if encoding_changes > 0 and formatting_changes == 0 and whitespace_changes == 0:
             return _finish(
diff --git a/mod_test/smartdiff/normalize.py b/mod_test/smartdiff/normalize.py
index 6a7d1cdd..a460c0b7 100644
--- a/mod_test/smartdiff/normalize.py
+++ b/mod_test/smartdiff/normalize.py
@@ -108,7 +108,8 @@ def classify_text_pair(expected: str, actual: str) -> str:
     actual_plain = plain(actual)
     if expected_plain == actual_plain:
         return 'formatting'
-    has_non_ascii = any(ord(ch) > 127 for ch in expected_plain + actual_plain)
-    if has_non_ascii and ascii_fold(expected_plain) == ascii_fold(actual_plain):
+    folded_expected = ascii_fold(expected_plain)
+    non_ascii = any(ord(ch) > 127 for ch in expected_plain + actual_plain)
+    if non_ascii and folded_expected and folded_expected == ascii_fold(actual_plain):
         return 'encoding'
     return 'text'
diff --git a/tests/test_smartdiff/test_compare.py b/tests/test_smartdiff/test_compare.py
index b9a25fff..a26a9b22 100644
--- a/tests/test_smartdiff/test_compare.py
+++ b/tests/test_smartdiff/test_compare.py
@@ -131,6 +131,24 @@ def test_identical_has_no_changes(self):
         """An identical result carries no changes list."""
         self.assertNotIn("changes", smart_diff(_srt(_BASE), _srt(_BASE)))
 
+    def test_no_cues_and_differ_is_unsupported_not_identical(self):
+        """Two different non-subtitle outputs (no cues) are not called identical."""
+        result = smart_diff("plain transcript one", "plain transcript two")
+        self.assertEqual(result["kind"], "unsupported")
+
+    def test_no_cues_but_equal_is_identical(self):
+        """Two equal cue-less outputs are still identical."""
+        self.assertEqual(smart_diff("same text", "same text")["kind"], "identical")
+
+    def test_text_change_with_shift_records_per_cue_offset(self):
+        """A combined text change + timing shift keeps the per-cue offset in changes."""
+        base = [(1000, 2000, "A"), (5000, 6000, "B")]
+        other = [(1500, 2500, "X"), (5500, 6500, "Y")]
+        result = smart_diff(_srt(base), _srt(other))
+        self.assertEqual(result["kind"], "text_change")
+        self.assertNotIn("aligned", result["summary"])
+        self.assertTrue(all(c["offset_ms"] == 500 for c in result["changes"]))
+
     def test_encoding_change_non_ascii_only(self):
         """A charset difference (accents only, e.g. -latin1) is flagged as encoding."""
         accented = [(1000, 4000, "Voilà"), (5000, 8000, "naïve café")]
diff --git a/tests/test_smartdiff/test_normalize.py b/tests/test_smartdiff/test_normalize.py
index 9c107ccc..2b766e98 100644
--- a/tests/test_smartdiff/test_normalize.py
+++ b/tests/test_smartdiff/test_normalize.py
@@ -45,6 +45,11 @@ def test_classify_real_text_change(self):
         """A genuine text change classifies as text."""
         self.assertEqual(classify_text_pair('hello', 'goodbye'), 'text')
 
+    def test_classify_non_latin_text_change_not_encoding(self):
+        """Two different non-Latin texts (empty ASCII skeleton) classify as text."""
+        self.assertEqual(classify_text_pair('日本語', '中文'), 'text')
+        self.assertEqual(classify_text_pair('Привет', 'Спасибо'), 'text')
+
 
 if __name__ == "__main__":
     unittest.main()