Skip to content

Commit 1d84ad5

Browse files
committed
Split dataframe into equal chunks for default verbosity
1 parent 3da5904 commit 1d84ad5

2 files changed

Lines changed: 103 additions & 2 deletions

File tree

sqlmesh/core/test/definition.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,9 @@ def _to_hashable(x: t.Any) -> t.Any:
333333

334334
diff.rename(columns={"exp": "Expected", "act": "Actual"}, inplace=True)
335335
if self.verbosity == Verbosity.DEFAULT:
336-
args.append(df_to_table("Data mismatch", diff))
336+
args.extend(
337+
df_to_table("Data mismatch", df) for df in _split_df_by_column_pairs(diff)
338+
)
337339
else:
338340
from pandas import MultiIndex
339341

@@ -995,3 +997,43 @@ def _normalize_df_value(value: t.Any) -> t.Any:
995997
return {k: _normalize_df_value(v) for k, v in zip(value["key"], value["value"])}
996998
return {k: _normalize_df_value(v) for k, v in value.items()}
997999
return value
1000+
1001+
1002+
def _split_df_by_column_pairs(df: pd.DataFrame, pairs_per_chunk: int = 4) -> t.List[pd.DataFrame]:
1003+
"""Split a dataframe into chunks of column pairs.
1004+
1005+
Args:
1006+
df: The dataframe to split
1007+
pairs_per_chunk: Number of column pairs per chunk (default: 4)
1008+
1009+
Returns:
1010+
List of dataframes, each containing an even number of columns
1011+
"""
1012+
total_columns = len(df.columns)
1013+
1014+
# If we have fewer columns than pairs_per_chunk * 2, return the original df
1015+
if total_columns <= pairs_per_chunk * 2:
1016+
return [df]
1017+
1018+
# Calculate number of chunks needed to split columns evenly
1019+
num_chunks = (total_columns + (pairs_per_chunk * 2 - 1)) // (pairs_per_chunk * 2)
1020+
1021+
# Calculate columns per chunk to ensure equal distribution
1022+
# We round down to nearest even number to ensure each chunk has even columns
1023+
columns_per_chunk = (total_columns // num_chunks) & ~1 # Round down to nearest even number
1024+
remainder = total_columns - (columns_per_chunk * num_chunks)
1025+
1026+
chunks = []
1027+
start_idx = 0
1028+
1029+
# Distribute columns evenly across chunks
1030+
for i in range(num_chunks):
1031+
# Add 2 columns to early chunks if there's a remainder
1032+
# This ensures we always add pairs of columns
1033+
extra = 2 if i < remainder // 2 else 0
1034+
end_idx = start_idx + columns_per_chunk + extra
1035+
chunk = df.iloc[:, start_idx:end_idx]
1036+
chunks.append(chunk)
1037+
start_idx = end_idx
1038+
1039+
return chunks

tests/core/test_test.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pathlib import Path
77
import unittest
88
from unittest.mock import call, patch
9-
from shutil import copyfile
9+
from shutil import copyfile, rmtree
1010

1111
import pandas as pd # noqa: TID253
1212
import pytest
@@ -2323,6 +2323,65 @@ def test_test_output(tmp_path: Path) -> None:
23232323
assert "Ran 102 tests" in output
23242324
assert "FAILED (failures=51)" in output
23252325

2326+
# Case 4: Test that wide tables are split into even chunks for default verbosity
2327+
rmtree(tmp_path / "tests")
2328+
2329+
wide_model_query = (
2330+
"SELECT 1 AS col_1, 2 AS col_2, 3 AS col_3, 4 AS col_4, 5 AS col_5, 6 AS col_6, 7 AS col_7"
2331+
)
2332+
2333+
context.upsert_model(
2334+
_create_model(
2335+
meta="MODEL(name test.test_wide_model)",
2336+
query=wide_model_query,
2337+
default_catalog=context.default_catalog,
2338+
)
2339+
)
2340+
2341+
tests_dir = tmp_path / "tests"
2342+
tests_dir.mkdir()
2343+
2344+
wide_test_file = tmp_path / "tests" / "test_wide_model.yaml"
2345+
wide_test_file_content = """
2346+
test_wide_model:
2347+
model: test.test_wide_model
2348+
outputs:
2349+
query:
2350+
rows:
2351+
- col_1: 6
2352+
col_2: 5
2353+
col_3: 4
2354+
col_4: 3
2355+
col_5: 2
2356+
col_6: 1
2357+
col_7: 0
2358+
2359+
"""
2360+
2361+
wide_test_file.write_text(wide_test_file_content)
2362+
2363+
with capture_output() as captured_output:
2364+
context.test()
2365+
2366+
assert (
2367+
"""Data mismatch
2368+
┏━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓
2369+
┃ ┃ col_1: ┃ col_1: ┃ col_2: ┃ col_2: ┃ col_3: ┃ col_3: ┃ col_4: ┃ col_4: ┃
2370+
┃ Row ┃ Expec… ┃ Actual ┃ Expec… ┃ Actual ┃ Expec… ┃ Actual ┃ Expect… ┃ Actual ┃
2371+
┡━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩
2372+
│ 0 │ 6 │ 1 │ 5 │ 2 │ 4 │ 3 │ 3 │ 4 │
2373+
└─────┴────────┴────────┴────────┴────────┴────────┴────────┴─────────┴────────┘
2374+
2375+
Data mismatch
2376+
┏━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┓
2377+
┃ ┃ col_5: ┃ col_5: ┃ col_6: ┃ col_6: ┃ col_7: ┃ col_7: ┃
2378+
┃ Row ┃ Expected ┃ Actual ┃ Expected ┃ Actual ┃ Expected ┃ Actual ┃
2379+
┡━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━┩
2380+
│ 0 │ 2 │ 5 │ 1 │ 6 │ 0 │ 7 │
2381+
└─────┴───────────┴───────────┴───────────┴───────────┴───────────┴────────────┘"""
2382+
in captured_output.stdout
2383+
)
2384+
23262385

23272386
@use_terminal_console
23282387
def test_test_output_with_invalid_model_name(tmp_path: Path) -> None:

0 commit comments

Comments
 (0)