Feat: Add verbose result comparison in tests

VaggelisD · VaggelisD · commit 38c53217a60d · 2025-06-05T18:46:09.000+03:00
diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py
@@ -28,6 +28,7 @@
 from rich.tree import Tree
 from sqlglot import exp
 
+from sqlmesh.core.test.result import ModelTextTestResult
 from sqlmesh.core.environment import EnvironmentNamingInfo, EnvironmentSummary
 from sqlmesh.core.linter.rule import RuleViolation
 from sqlmesh.core.model import Model
@@ -498,6 +499,10 @@ def loading_start(self, message: t.Optional[str] = None) -> uuid.UUID:
     def loading_stop(self, id: uuid.UUID) -> None:
         """Stop loading for the given id."""
 
+    @abc.abstractmethod
+    def log_unit_test_results(self, result: ModelTextTestResult, test_duration: float) -> None:
+        """Print the unit test results."""
+
 
 class NoopConsole(Console):
     def start_plan_evaluation(self, plan: EvaluatablePlan) -> None:
@@ -779,6 +784,9 @@ def start_destroy(self) -> bool:
     def stop_destroy(self, success: bool = True) -> None:
         pass
 
+    def log_unit_test_results(self, result: ModelTextTestResult, test_duration: float) -> None:
+        pass
+
 
 def make_progress_bar(
     message: str,
@@ -2494,6 +2502,51 @@ def show_linter_violations(
         else:
             self.log_warning(msg)
 
+    def log_unit_test_results(self, result: ModelTextTestResult, test_duration: float) -> None:
+        tests_run = result.testsRun
+        errors = result.errors
+        failures = result.original_failures
+        skipped = result.skipped
+
+        is_success = not (errors or failures)
+
+        infos = []
+        if failures:
+            infos.append(f"failures={len(failures)}")
+        if errors:
+            infos.append(f"errors={len(errors)}")
+        if skipped:
+            infos.append(f"skipped={skipped}")
+
+        self._print("\n", end="")
+
+        for test_case, failure in failures:
+            self._print(unittest.TextTestResult.separator1)
+            self._print(f"FAIL: {test_case}")
+
+            if test_description := test_case.shortDescription():
+                self._print(test_description)
+            self._print(f"{unittest.TextTestResult.separator2}\n")
+
+            if exception := failure[1]:
+                for arg in exception.args:
+                    self._print(arg)
+                    self._print("\n")
+
+        for test_case, error in errors:
+            self._print(unittest.TextTestResult.separator1)
+            self._print(f"ERROR: {test_case}")
+            self._print(error)
+
+        # Output final report
+        self._print(unittest.TextTestResult.separator2)
+        self._print(
+            f"Ran {tests_run} {'tests' if tests_run > 1 else 'test'} in {test_duration:.3f}s \n"
+        )
+        self._print(
+            f"{'OK' if is_success else 'FAILED'}{' (' + ', '.join(infos) + ')' if infos else ''}"
+        )
+
 
 def _cells_match(x: t.Any, y: t.Any) -> bool:
     """Helper function to compare two cells and returns true if they're equal, handling array objects."""
diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py
@@ -10,6 +10,10 @@
 from pathlib import Path
 from unittest.mock import patch
 
+from rich.table import Table
+from rich.tree import Tree
+from rich.align import Align
+
 import numpy as np
 import pandas as pd
 from io import StringIO
@@ -27,6 +31,7 @@
 from sqlmesh.utils.date import date_dict, pandas_timestamp_to_pydatetime, to_datetime
 from sqlmesh.utils.errors import ConfigError, TestError
 from sqlmesh.utils.yaml import load as yaml_load
+from sqlmesh.utils import Verbosity
 
 if t.TYPE_CHECKING:
     from sqlglot.dialects.dialect import DialectType
@@ -61,6 +66,8 @@ def __init__(
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
         concurrency: bool = False,
+        verbosity: Verbosity = Verbosity.DEFAULT,
+        rich_output: bool = True,
     ) -> None:
         """ModelTest encapsulates a unit test for a model.
 
@@ -84,6 +91,8 @@ def __init__(
         self.default_catalog = default_catalog
         self.dialect = dialect
         self.concurrency = concurrency
+        self.verbosity = verbosity
+        self.rich_output = rich_output
 
         self._fixture_table_cache: t.Dict[str, exp.Table] = {}
         self._normalized_column_name_cache: t.Dict[str, str] = {}
@@ -278,6 +287,7 @@ def _to_hashable(x: t.Any) -> t.Any:
                 check_like=True,  # Ignore column order
             )
         except AssertionError as e:
+            args: t.List[t.Any] = []
             if expected.shape != actual.shape:
                 _raise_if_unexpected_columns(expected.columns, actual.columns)
 
@@ -291,10 +301,35 @@ def _to_hashable(x: t.Any) -> t.Any:
                 if not unexpected_rows.empty:
                     error_msg += f"\n\nUnexpected rows:\n\n{unexpected_rows}"
 
-                e.args = (error_msg,)
+                args.append(error_msg)
             else:
-                diff = expected.compare(actual).rename(columns={"self": "exp", "other": "act"})
-                e.args = (f"Data mismatch (exp: expected, act: actual)\n\n{diff}",)
+                diff = expected.compare(actual).rename(
+                    columns={"self": "Expected", "other": "Actual"}
+                )
+
+                if not self.rich_output:
+                    args.append(f"Data mismatch\n\n{diff}")
+                elif self.verbosity == Verbosity.DEFAULT:
+                    args.append(df_to_table("Data mismatch", diff))
+                else:
+                    from pandas import MultiIndex
+
+                    levels = t.cast(MultiIndex, diff.columns).levels[0]
+                    for col in levels:
+                        col_diff = diff[col]
+                        if not col_diff.empty:
+                            table = df_to_table(
+                                f"[bold red]Column '{col}' mismatch[/bold red]", col_diff
+                            )
+                            args.append(table)
+
+                    # Show summary statistics
+                    summary_tree = Tree("[bold][summary]Summary:[/summary]")
+                    summary_tree.add(f"Total differences: {len(diff)}\n")
+                    summary_tree.add(f"Different columns: {len(levels)}\n")
+                    args.append(summary_tree)
+
+            e.args = (*args,)
 
             raise e
 
@@ -316,6 +351,7 @@ def create_test(
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
         concurrency: bool = False,
+        verbosity: Verbosity = Verbosity.DEFAULT,
     ) -> t.Optional[ModelTest]:
         """Create a SqlModelTest or a PythonModelTest.
 
@@ -361,6 +397,7 @@ def create_test(
                 preserve_fixtures,
                 default_catalog,
                 concurrency,
+                verbosity,
             )
         except Exception as e:
             raise TestError(f"Failed to create test {test_name} ({path})\n{str(e)}")
@@ -676,6 +713,8 @@ def __init__(
         preserve_fixtures: bool = False,
         default_catalog: str | None = None,
         concurrency: bool = False,
+        verbosity: Verbosity = Verbosity.DEFAULT,
+        rich_output: bool = True,
     ) -> None:
         """PythonModelTest encapsulates a unit test for a Python model.
 
@@ -702,6 +741,8 @@ def __init__(
             preserve_fixtures,
             default_catalog,
             concurrency,
+            verbosity,
+            rich_output,
         )
 
         self.context = TestExecutionContext(
@@ -926,3 +967,41 @@ def _normalize_df_value(value: t.Any) -> t.Any:
             return {k: _normalize_df_value(v) for k, v in zip(value["key"], value["value"])}
         return {k: _normalize_df_value(v) for k, v in value.items()}
     return value
+
+
+def df_to_table(
+    header: str,
+    df: pd.DataFrame,
+    show_index: bool = True,
+    index_name: str = "Row",
+) -> Table:
+    """Convert a pandas.DataFrame obj into a rich.Table obj.
+    Args:
+        df (DataFrame): A Pandas DataFrame to be converted to a rich Table.
+        rich_table (Table): A rich Table that should be populated by the DataFrame values.
+        show_index (bool): Add a column with a row count to the table. Defaults to True.
+        index_name (str, optional): The column name to give to the index column. Defaults to None, showing no value.
+    Returns:
+        Table: The rich Table instance passed, populated with the DataFrame values."""
+
+    rich_table = Table(title=f"[bold red]{header}[/bold red]", show_lines=True, min_width=60)
+    if show_index:
+        index_name = str(index_name) if index_name else ""
+        rich_table.add_column(index_name)
+
+    for column in df.columns:
+        column_name = column if isinstance(column, str) else ": ".join(str(col) for col in column)
+        if "expected" in column_name.lower():
+            column_name = f"[green]{column_name}[/green]"
+        else:
+            column_name = f"[red]{column_name}[/red]"
+
+        rich_table.add_column(Align.center(column_name))
+
+    for index, value_list in enumerate(df.values.tolist()):
+        row = [str(index)] if show_index else []
+        row += [str(x) for x in value_list]
+        center = [Align.center(x) for x in row]
+        rich_table.add_row(*center)
+
+    return rich_table
diff --git a/sqlmesh/core/test/result.py b/sqlmesh/core/test/result.py
@@ -83,43 +83,6 @@ def log_test_report(self, test_duration: float) -> None:
         Args:
             test_duration: The duration of the tests.
         """
-        tests_run = self.testsRun
-        errors = self.errors
-        failures = self.failures
-        skipped = self.skipped
-
-        is_success = not (errors or failures)
-
-        infos = []
-        if failures:
-            infos.append(f"failures={len(failures)}")
-        if errors:
-            infos.append(f"errors={len(errors)}")
-        if skipped:
-            infos.append(f"skipped={skipped}")
-
-        stream = self.stream
-
-        stream.write("\n")
-
-        for test_case, failure in failures:
-            stream.writeln(unittest.TextTestResult.separator1)
-            stream.writeln(f"FAIL: {test_case}")
-            if test_description := test_case.shortDescription():
-                stream.writeln(test_description)
-            stream.writeln(unittest.TextTestResult.separator2)
-            stream.writeln(failure)
-
-        for test_case, error in errors:
-            stream.writeln(unittest.TextTestResult.separator1)
-            stream.writeln(f"ERROR: {test_case}")
-            stream.writeln(error)
-
-        # Output final report
-        stream.writeln(unittest.TextTestResult.separator2)
-        stream.writeln(
-            f"Ran {tests_run} {'tests' if tests_run > 1 else 'test'} in {test_duration:.3f}s \n"
-        )
-        stream.writeln(
-            f"{'OK' if is_success else 'FAILED'}{' (' + ', '.join(infos) + ')' if infos else ''}"
-        )
+        from sqlmesh.core.console import get_console
+
+        get_console().log_unit_test_results(self, test_duration)
diff --git a/sqlmesh/core/test/runner.py b/sqlmesh/core/test/runner.py
@@ -107,7 +107,7 @@ def run_tests(
     lock = threading.Lock()
 
     combined_results = ModelTextTestResult(
-        stream=unittest.runner._WritelnDecorator(stream or sys.stderr),  # type: ignore
+        stream=unittest.runner._WritelnDecorator(stream or sys.stdout),  # type: ignore
         verbosity=2 if verbosity >= Verbosity.VERBOSE else 1,
         descriptions=True,
     )
@@ -136,6 +136,7 @@ def _run_single_test(
             default_catalog=default_catalog,
             preserve_fixtures=preserve_fixtures,
             concurrency=num_workers > 1,
+            verbosity=verbosity,
         )
 
         if not test:
diff --git a/tests/core/test_test.py b/tests/core/test_test.py

Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ def run_tests(`
`107`	`107`	`lock = threading.Lock()`
`108`	`108`
`109`	`109`	`combined_results = ModelTextTestResult(`
`110`		`- stream=unittest.runner._WritelnDecorator(stream or sys.stderr), # type: ignore`
	`110`	`+ stream=unittest.runner._WritelnDecorator(stream or sys.stdout), # type: ignore`
`111`	`111`	`verbosity=2 if verbosity >= Verbosity.VERBOSE else 1,`
`112`	`112`	`descriptions=True,`
`113`	`113`	`)`
`@@ -136,6 +136,7 @@ def _run_single_test(`
`136`	`136`	`default_catalog=default_catalog,`
`137`	`137`	`preserve_fixtures=preserve_fixtures,`
`138`	`138`	`concurrency=num_workers > 1,`
	`139`	`+ verbosity=verbosity,`
`139`	`140`	`)`
`140`	`141`
`141`	`142`	`if not test:`