Split dataframe into equal chunks for default verbosity

VaggelisD · VaggelisD · commit 1d84ad59ed29 · 2025-06-16T18:56:18.000+03:00
diff --git a/sqlmesh/core/test/definition.py b/sqlmesh/core/test/definition.py
@@ -333,7 +333,9 @@ def _to_hashable(x: t.Any) -> t.Any:
 
                 diff.rename(columns={"exp": "Expected", "act": "Actual"}, inplace=True)
                 if self.verbosity == Verbosity.DEFAULT:
-                    args.append(df_to_table("Data mismatch", diff))
+                    args.extend(
+                        df_to_table("Data mismatch", df) for df in _split_df_by_column_pairs(diff)
+                    )
                 else:
                     from pandas import MultiIndex
 
@@ -995,3 +997,43 @@ def _normalize_df_value(value: t.Any) -> t.Any:
             return {k: _normalize_df_value(v) for k, v in zip(value["key"], value["value"])}
         return {k: _normalize_df_value(v) for k, v in value.items()}
     return value
+
+
+def _split_df_by_column_pairs(df: pd.DataFrame, pairs_per_chunk: int = 4) -> t.List[pd.DataFrame]:
+    """Split a dataframe into chunks of column pairs.
+
+    Args:
+        df: The dataframe to split
+        pairs_per_chunk: Number of column pairs per chunk (default: 4)
+
+    Returns:
+        List of dataframes, each containing an even number of columns
+    """
+    total_columns = len(df.columns)
+
+    # If we have fewer columns than pairs_per_chunk * 2, return the original df
+    if total_columns <= pairs_per_chunk * 2:
+        return [df]
+
+    # Calculate number of chunks needed to split columns evenly
+    num_chunks = (total_columns + (pairs_per_chunk * 2 - 1)) // (pairs_per_chunk * 2)
+
+    # Calculate columns per chunk to ensure equal distribution
+    # We round down to nearest even number to ensure each chunk has even columns
+    columns_per_chunk = (total_columns // num_chunks) & ~1  # Round down to nearest even number
+    remainder = total_columns - (columns_per_chunk * num_chunks)
+
+    chunks = []
+    start_idx = 0
+
+    # Distribute columns evenly across chunks
+    for i in range(num_chunks):
+        # Add 2 columns to early chunks if there's a remainder
+        # This ensures we always add pairs of columns
+        extra = 2 if i < remainder // 2 else 0
+        end_idx = start_idx + columns_per_chunk + extra
+        chunk = df.iloc[:, start_idx:end_idx]
+        chunks.append(chunk)
+        start_idx = end_idx
+
+    return chunks
diff --git a/tests/core/test_test.py b/tests/core/test_test.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 import unittest
 from unittest.mock import call, patch
-from shutil import copyfile
+from shutil import copyfile, rmtree
 
 import pandas as pd  # noqa: TID253
 import pytest
@@ -2323,6 +2323,65 @@ def test_test_output(tmp_path: Path) -> None:
     assert "Ran 102 tests" in output
     assert "FAILED (failures=51)" in output
 
+    # Case 4: Test that wide tables are split into even chunks for default verbosity
+    rmtree(tmp_path / "tests")
+
+    wide_model_query = (
+        "SELECT 1 AS col_1, 2 AS col_2, 3 AS col_3, 4 AS col_4, 5 AS col_5, 6 AS col_6, 7 AS col_7"
+    )
+
+    context.upsert_model(
+        _create_model(
+            meta="MODEL(name test.test_wide_model)",
+            query=wide_model_query,
+            default_catalog=context.default_catalog,
+        )
+    )
+
+    tests_dir = tmp_path / "tests"
+    tests_dir.mkdir()
+
+    wide_test_file = tmp_path / "tests" / "test_wide_model.yaml"
+    wide_test_file_content = """
+    test_wide_model:
+      model: test.test_wide_model
+      outputs:
+        query:
+          rows:
+          - col_1: 6
+            col_2: 5
+            col_3: 4
+            col_4: 3
+            col_5: 2
+            col_6: 1
+            col_7: 0
+ 
+    """
+
+    wide_test_file.write_text(wide_test_file_content)
+
+    with capture_output() as captured_output:
+        context.test()
+
+    assert (
+        """Data mismatch                                  
+┏━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓
+┃     ┃ col_1: ┃ col_1: ┃ col_2: ┃ col_2: ┃ col_3: ┃ col_3: ┃ col_4:  ┃ col_4: ┃
+┃ Row ┃ Expec… ┃ Actual ┃ Expec… ┃ Actual ┃ Expec… ┃ Actual ┃ Expect… ┃ Actual ┃
+┡━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩
+│  0  │   6    │   1    │   5    │   2    │   4    │   3    │    3    │   4    │
+└─────┴────────┴────────┴────────┴────────┴────────┴────────┴─────────┴────────┘
+
+                                 Data mismatch                                  
+┏━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓
+┃     ┃ col_5:    ┃ col_5:    ┃ col_6:    ┃ col_6:    ┃ col_7:    ┃ col_7:     ┃
+┃ Row ┃ Expected  ┃ Actual    ┃ Expected  ┃ Actual    ┃ Expected  ┃ Actual     ┃
+┡━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩
+│  0  │     2     │     5     │     1     │     6     │     0     │     7      │
+└─────┴───────────┴───────────┴───────────┴───────────┴───────────┴────────────┘"""
+        in captured_output.stdout
+    )
+
 
 @use_terminal_console
 def test_test_output_with_invalid_model_name(tmp_path: Path) -> None: