Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions mod_test/controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,44 @@
abort(404)


@mod_test.route('/diff/<test_id>/<regression_test_id>/<output_id>/smart')

Check warning on line 378 in mod_test/controllers.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Specify the HTTP methods this route should accept.

See more on https://sonarcloud.io/project/issues?id=CCExtractor_sample-platform&issues=AZ8aCjtizPwv4lx_O2ga&open=AZ8aCjtizPwv4lx_O2ga&pullRequest=1142
def smart_diff_view(test_id: int, regression_test_id: int, output_id: int):
"""
Return a semantic (smart) diff classification for an output as JSON.

Unlike the line diff, this reports *how* the output differs (timing shift,
cosmetic padding/formatting/encoding, text change, missing/extra cues), so a
person or an agent gets an actionable answer instead of a wall of lines.

:param test_id: id of the test
:type test_id: int
:param regression_test_id: id of the regression test
:type regression_test_id: int
:param output_id: id of the generated output
:type output_id: int
:return: JSON classification of the difference.
:rtype: flask.Response
"""
from run import config

result = TestResultFile.query.filter(and_(
TestResultFile.test_id == test_id,
TestResultFile.regression_test_id == regression_test_id,
TestResultFile.regression_test_output_id == output_id
)).first()

if result is None:
abort(404)

path = os.path.join(config.get('SAMPLE_REPOSITORY', ''), 'TestResults')
try:
classification = result.generate_smart_diff(path)
except (OSError, UnicodeDecodeError):
classification = {'kind': 'unavailable',
'summary': 'Output files are not available or not readable.'}
return jsonify(classification)


@mod_test.route('/log-files/<test_id>')
@login_required
def download_build_log_file(test_id):
Expand Down
27 changes: 27 additions & 0 deletions mod_test/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,30 @@ def read_lines(file_name: str) -> List[str]:
return open(file_name, encoding='utf8').readlines()
except UnicodeDecodeError:
return open(file_name, encoding='cp1252').readlines()

def generate_smart_diff(self, base_path: str) -> dict:
"""
Classify *how* the actual output differs from the expected baseline.

Unlike the line diff, this returns a semantic classification (timing
shift, cosmetic padding/formatting/encoding, text change, missing/extra
cues) that a person or an agent can act on directly.

:param base_path: The base path for the files location.
:type base_path: str
:return: A smart-diff classification with ``kind`` and ``summary`` keys.
:rtype: dict
"""
from mod_test.smartdiff.compare import smart_diff

if not self.got:
return {'kind': 'identical',
'summary': 'Output matches the expected baseline.'}

extension = self.regression_test_output.correct_extension
file_ok = os.path.join(base_path, self.expected + extension)
file_fail = os.path.join(base_path, self.got + extension)
expected_text = ''.join(self.read_lines(file_ok))
actual_text = ''.join(self.read_lines(file_fail))
return smart_diff(expected_text, actual_text,
fmt=extension.lstrip('.').lower() or None)
7 changes: 7 additions & 0 deletions mod_test/smartdiff/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Semantic ("smart") diff for subtitle regression outputs.

Unlike a raw line diff, this package classifies *how* two outputs differ
(timing shift, text change, missing/extra cues) so a person or an agent gets an
actionable answer instead of a wall of changed lines. Pure/Flask-decoupled so it
is fully unit-testable.
"""
242 changes: 242 additions & 0 deletions mod_test/smartdiff/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
"""Semantic comparison of subtitle outputs: classify *how* two results differ."""

from typing import Dict, List, Optional

from mod_test.smartdiff.normalize import classify_text_pair, plain
from mod_test.smartdiff.parsing import parse_subtitles
from mod_test.smartdiff.srt import Cue

#: Cap on the number of per-cue change entries returned in a result.
_MAX_CHANGES = 25


def _result(kind: str, summary: str, n_exp: int, n_act: int,
offset_ms: Optional[int] = None) -> Dict[str, object]:
"""
Build a classification result dict.

:param kind: The stable difference kind.
:type kind: str
:param summary: A human/agent-readable one-line explanation.
:type summary: str
:param n_exp: Number of expected cues.
:type n_exp: int
:param n_act: Number of actual cues.
:type n_act: int
:param offset_ms: Consistent timing offset, when ``kind`` is ``timing_shift``.
:type offset_ms: Optional[int]
:return: The classification result.
:rtype: Dict[str, object]
"""
out: Dict[str, object] = {
'kind': kind,
'summary': summary,
'expected_cues': n_exp,
'actual_cues': n_act,
}
if offset_ms is not None:
out['offset_ms'] = offset_ms
return out


def _content(cues: List[Cue]) -> str:
"""
Join all cues' normalised, whitespace-collapsed text — for split/merge detection.

:param cues: The parsed cues.
:type cues: List[Cue]
:return: A single normalised token string spanning every cue.
:rtype: str
"""
return ' '.join(' '.join(plain(cue.text).split()) for cue in cues)


def _monotonic(values: List[int]) -> bool:
"""
Report whether a sequence is non-decreasing or non-increasing.

:param values: The sequence to test.
:type values: List[int]
:return: True if monotonic in either direction.
:rtype: bool
"""
pairs = list(zip(values, values[1:]))
non_decreasing = all(a <= b for a, b in pairs)
non_increasing = all(a >= b for a, b in pairs)
return non_decreasing or non_increasing


def _snippet(text: str, limit: int = 80) -> str:
"""
Collapse whitespace and truncate cue text for compact change details.

:param text: Raw cue text.
:type text: str
:param limit: Maximum characters to keep.
:type limit: int
:return: A single-line, length-capped snippet.
:rtype: str
"""
flat = ' '.join(text.split())
return flat if len(flat) <= limit else flat[:limit] + '…'


def smart_diff(expected: str, actual: str,

Check failure on line 84 in mod_test/smartdiff/compare.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 68 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=CCExtractor_sample-platform&issues=AZ8aCjvzzPwv4lx_O2gb&open=AZ8aCjvzzPwv4lx_O2gb&pullRequest=1142
fmt: Optional[str] = None) -> Dict[str, object]:
"""
Compare expected vs actual subtitle output and classify the difference.

Supports SubRip (.srt) and WebVTT (.vtt); the format is auto-detected from
content unless ``fmt`` is given. Aligns cues by position and reports the
*kind* of difference rather than a raw line diff: ``identical``,
``timing_shift`` (constant offset), ``timing_drift`` (growing offset),
``text_change``, ``formatting_change`` (tags/entities only),
``whitespace_change`` (CEA-608 padding only), ``encoding_change``
(non-ASCII/accented characters only), ``split_cues``, ``merged_cues``,
``missing_cues``, ``extra_cues``, ``unsupported`` (no cues parsed), or
``mixed``.

:param expected: The expected/baseline subtitle content.
:type expected: str
:param actual: The actual/produced subtitle content.
:type actual: str
:param fmt: Explicit format ('srt' or 'vtt'); auto-detected when None.
:type fmt: Optional[str]
:return: A classification dict with keys ``kind``, ``summary``,
``expected_cues``, ``actual_cues`` and (for ``timing_shift``) ``offset_ms``.
:rtype: Dict[str, object]
"""
exp = parse_subtitles(expected, fmt)
act = parse_subtitles(actual, fmt)
n_exp, n_act = len(exp), len(act)
if n_exp == 0 and n_act == 0:
if expected == actual:
return _result('identical', 'Outputs are identical.', 0, 0)
return _result(
'unsupported',
'No subtitle cues to compare (unsupported format); see the raw diff.',
0, 0)
count_mismatch = n_exp != n_act

text_changes = 0
formatting_changes = 0
whitespace_changes = 0
encoding_changes = 0
raw_matches = True
timing_deltas: List[int] = []
changes: List[Dict[str, object]] = []
for position, (expected_cue, actual_cue) in enumerate(zip(exp, act), start=1):
category = classify_text_pair(expected_cue.text, actual_cue.text)
delta = actual_cue.start_ms - expected_cue.start_ms
if category != 'match':
raw_matches = False
if category == 'text':
text_changes += 1
else:
if category == 'formatting':
formatting_changes += 1
elif category == 'whitespace':
whitespace_changes += 1
elif category == 'encoding':
encoding_changes += 1
timing_deltas.append(delta)
if category != 'match' or delta != 0:
entry: Dict[str, object] = {
'cue': position,
'kind': category if category != 'match' else 'timing',
}
if category == 'text':
entry['expected'] = _snippet(expected_cue.text)
entry['actual'] = _snippet(actual_cue.text)
if delta != 0:
entry['offset_ms'] = delta
changes.append(entry)

no_timing_move = all(delta == 0 for delta in timing_deltas)
uniform_shift = bool(timing_deltas) and len(set(timing_deltas)) == 1
varying_timing = len(set(timing_deltas)) > 1
drifting = varying_timing and _monotonic(timing_deltas)
cosmetic_changes = formatting_changes + whitespace_changes + encoding_changes
fully_aligned = text_changes == 0 and cosmetic_changes == 0

def _finish(kind: str, summary: str, exp_count: int, act_count: int,
offset_ms: Optional[int] = None) -> Dict[str, object]:
"""Attach the (capped) per-cue change list to a classification result."""
out = _result(kind, summary, exp_count, act_count, offset_ms)
if changes:
out['changes'] = changes[:_MAX_CHANGES]
if len(changes) > _MAX_CHANGES:
out['changes_truncated'] = True
return out

if not count_mismatch and raw_matches and no_timing_move:
return _finish('identical', 'Outputs are identical.', n_exp, n_act)

if not count_mismatch and fully_aligned and uniform_shift and timing_deltas[0] != 0:
offset = timing_deltas[0]
direction = 'late' if offset > 0 else 'early'
return _finish(
'timing_shift',
f'All {n_exp} cues match but are {abs(offset)} ms {direction}.',
n_exp, n_act, offset_ms=offset)

if not count_mismatch and fully_aligned and drifting:
first, last = timing_deltas[0], timing_deltas[-1]
return _finish(
'timing_drift',
f'Timing drifts from {first:+d} ms to {last:+d} ms across {n_exp} cues.',
n_exp, n_act)

if count_mismatch:
exp_content = _content(exp)
if exp_content and exp_content == _content(act):
if n_act > n_exp:
return _finish(
'split_cues',
f'Same text, but cues were split: expected {n_exp}, got {n_act}.',
n_exp, n_act)
return _finish(
'merged_cues',
f'Same text, but cues were merged: expected {n_exp}, got {n_act}.',
n_exp, n_act)
if text_changes == 0:
if n_act < n_exp:
return _finish(
'missing_cues',
f'{n_exp - n_act} of {n_exp} cues are missing from the output.',
n_exp, n_act)
return _finish(
'extra_cues',
f'Output has {n_act - n_exp} extra cues ({n_act} vs {n_exp} expected).',
n_exp, n_act)

if not count_mismatch and no_timing_move:
if text_changes > 0:
return _finish(
'text_change',
f'{text_changes} of {n_exp} cues differ in text.',
n_exp, n_act)
if encoding_changes > 0 and formatting_changes == 0 and whitespace_changes == 0:
return _finish(
'encoding_change',
f'{encoding_changes} of {n_exp} cues differ only in character '
f'encoding (non-ASCII/accented characters).',
n_exp, n_act)
if formatting_changes > 0 and whitespace_changes == 0 and encoding_changes == 0:
return _finish(
'formatting_change',
f'{formatting_changes} of {n_exp} cues differ only in formatting '
f'(tags/entities), not text.',
n_exp, n_act)
if whitespace_changes > 0 and formatting_changes == 0 and encoding_changes == 0:
return _finish(
'whitespace_change',
f'{whitespace_changes} of {n_exp} cues differ only in trailing '
f'whitespace/padding.',
n_exp, n_act)

return _finish(
'mixed',
f'Mixed differences across {min(n_exp, n_act)} compared cues; '
f'expected {n_exp}, got {n_act}.',
n_exp, n_act)
Loading
Loading