|
| 1 | +import pytest |
| 2 | +from pathlib import Path |
| 3 | +from sqlmesh.cli.example_project import init_example_project |
| 4 | +from sqlmesh.core.config import Config, ModelDefaultsConfig |
| 5 | +from sqlmesh.core.context import Context |
| 6 | +from sqlmesh.utils.errors import ConfigError |
| 7 | + |
| 8 | + |
| 9 | +@pytest.fixture |
| 10 | +def sample_models(request): |
| 11 | + models = { |
| 12 | + "sql": { |
| 13 | + "contents": """ |
| 14 | +MODEL ( |
| 15 | + name test_schema.test_model, |
| 16 | + kind FULL, |
| 17 | +); |
| 18 | +
|
| 19 | +SELECT 1; |
| 20 | +""", |
| 21 | + "path": "models/sql_model.sql", |
| 22 | + }, |
| 23 | + "python": { |
| 24 | + "contents": """import typing as t |
| 25 | +import pandas as pd |
| 26 | +from sqlmesh import ExecutionContext, model |
| 27 | +
|
| 28 | +@model( |
| 29 | + "test_schema.test_model", |
| 30 | + kind="FULL", |
| 31 | + columns={ |
| 32 | + "id": "int", |
| 33 | + } |
| 34 | +) |
| 35 | +def execute( |
| 36 | + context: ExecutionContext, |
| 37 | + **kwargs: t.Any, |
| 38 | +) -> pd.DataFrame: |
| 39 | + return pd.DataFrame([ |
| 40 | + {"id": 1} |
| 41 | + ]) |
| 42 | +""", |
| 43 | + "path": "models/python_model.py", |
| 44 | + }, |
| 45 | + "external": { |
| 46 | + "contents": """ |
| 47 | +- name: test_schema.test_model |
| 48 | + columns: |
| 49 | + id: INT |
| 50 | +""", |
| 51 | + "path": "external_models/external_model.yaml", |
| 52 | + }, |
| 53 | + } |
| 54 | + requested_models = request.param.split("_") |
| 55 | + return [v for k, v in models.items() if k in requested_models] |
| 56 | + |
| 57 | + |
| 58 | +@pytest.mark.parametrize( |
| 59 | + "sample_models", |
| 60 | + ["sql_python", "python_external", "sql_external", "sql_python_external"], |
| 61 | + indirect=True, |
| 62 | +) |
| 63 | +def test_duplicate_model_names_different_kind(tmp_path: Path, sample_models): |
| 64 | + """Test different (SQL, Python and external) models with duplicate model names raises ValueError.""" |
| 65 | + model_1, *models = sample_models |
| 66 | + if len(models) == 2: |
| 67 | + model_2, model_3 = models |
| 68 | + else: |
| 69 | + model_2, model_3 = models[0], None |
| 70 | + |
| 71 | + init_example_project(tmp_path, dialect="duckdb") |
| 72 | + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) |
| 73 | + |
| 74 | + path_1: Path = tmp_path / model_1["path"] |
| 75 | + path_2: Path = tmp_path / model_2["path"] |
| 76 | + |
| 77 | + path_1.parent.mkdir(parents=True, exist_ok=True) |
| 78 | + path_1.write_text(model_1["contents"]) |
| 79 | + path_2.parent.mkdir(parents=True, exist_ok=True) |
| 80 | + path_2.write_text(model_2["contents"]) |
| 81 | + |
| 82 | + if model_3: |
| 83 | + path_3: Path = tmp_path / model_3["path"] |
| 84 | + path_3.parent.mkdir(parents=True, exist_ok=True) |
| 85 | + path_3.write_text(model_3["contents"]) |
| 86 | + |
| 87 | + with pytest.raises( |
| 88 | + ValueError, match=r'Duplicate model name\(s\) found: "memory"."test_schema"."test_model".' |
| 89 | + ): |
| 90 | + Context(paths=tmp_path, config=config) |
| 91 | + |
| 92 | + |
| 93 | +@pytest.mark.parametrize("sample_models", ["sql", "external"], indirect=True) |
| 94 | +def test_duplicate_model_names_same_kind(tmp_path: Path, sample_models): |
| 95 | + """Test same (SQL and external) models with duplicate model names raises ValueError.""" |
| 96 | + |
| 97 | + def duplicate_model_path(fpath): |
| 98 | + return Path(fpath).parent / ("duplicate" + Path(fpath).suffix) |
| 99 | + |
| 100 | + model = sample_models[0] |
| 101 | + init_example_project(tmp_path, dialect="duckdb") |
| 102 | + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) |
| 103 | + |
| 104 | + path_1: Path = tmp_path / model["path"] |
| 105 | + path_1.parent.mkdir(parents=True, exist_ok=True) |
| 106 | + path_1.write_text(model["contents"]) |
| 107 | + |
| 108 | + duplicate_fpath = tmp_path / duplicate_model_path(model["path"]) |
| 109 | + duplicate_fpath.write_text(model["contents"]) |
| 110 | + |
| 111 | + with pytest.raises( |
| 112 | + ValueError, |
| 113 | + match=r'Duplicate key \'"memory"."test_schema"."test_model"\' found in UniqueKeyDict<models>. Call dict.update\(\.\.\.\) if this is intentional.', |
| 114 | + ): |
| 115 | + Context(paths=tmp_path, config=config) |
| 116 | + |
| 117 | + |
| 118 | +@pytest.mark.isolated |
| 119 | +def test_duplicate_python_model_names_raise_error(tmp_path: Path) -> None: |
| 120 | + """Test python models with duplicate model names raises ConfigError if the functions are not identical.""" |
| 121 | + init_example_project(tmp_path, dialect="duckdb") |
| 122 | + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) |
| 123 | + model_name = "test_schema.test_model" |
| 124 | + |
| 125 | + path_a = tmp_path / "models/test_schema/test_model_a.py" |
| 126 | + path_b = tmp_path / "models/test_schema/test_model_b.py" |
| 127 | + |
| 128 | + model_payload_a = f"""from sqlmesh import model |
| 129 | +@model( |
| 130 | + name="{model_name}", |
| 131 | + columns={{'"COL"': "int"}}, |
| 132 | +) |
| 133 | +def my_model(context, **kwargs): |
| 134 | + pass""" |
| 135 | + |
| 136 | + model_payload_b = f"""import typing as t |
| 137 | +import pandas as pd |
| 138 | +from sqlmesh import ExecutionContext, model |
| 139 | +
|
| 140 | +@model( |
| 141 | + name="{model_name}", |
| 142 | + kind="FULL", |
| 143 | + columns={{ |
| 144 | + "id": "int", |
| 145 | + }} |
| 146 | +) |
| 147 | +def execute( |
| 148 | + context: ExecutionContext, |
| 149 | + **kwargs: t.Any, |
| 150 | +) -> pd.DataFrame: |
| 151 | + return pd.DataFrame([ |
| 152 | + {{"id": 1}} |
| 153 | + ]) |
| 154 | +""" |
| 155 | + |
| 156 | + path_a.parent.mkdir(parents=True, exist_ok=True) |
| 157 | + path_a.write_text(model_payload_a) |
| 158 | + path_b.write_text(model_payload_b) |
| 159 | + |
| 160 | + with pytest.raises( |
| 161 | + ConfigError, |
| 162 | + match=r"Failed to load model definition at '.*'.\nDuplicate key 'test_schema.test_model' found in UniqueKeyDict<python_models>.", |
| 163 | + ): |
| 164 | + Context(paths=tmp_path, config=config) |
| 165 | + |
| 166 | + |
| 167 | +@pytest.mark.slow |
| 168 | +def test_duplicate_python_model_names_no_error(tmp_path: Path) -> None: |
| 169 | + """Test python models with duplicate model names raises no error if the functions are identical.""" |
| 170 | + init_example_project(tmp_path, dialect="duckdb") |
| 171 | + config = Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")) |
| 172 | + model_name = "test_schema.test_model" |
| 173 | + |
| 174 | + path_a = tmp_path / "models/test_schema1/test_model_a.py" |
| 175 | + path_b = tmp_path / "models/test_schema2/test_model_b.py" |
| 176 | + |
| 177 | + model_payload_a = f"""from sqlmesh import model |
| 178 | +@model( |
| 179 | + name="{model_name}", |
| 180 | + columns={{'"COL"': "int"}}, |
| 181 | + description="model_payload_a", |
| 182 | +) |
| 183 | +def my_model(context, **kwargs): |
| 184 | + pass""" |
| 185 | + |
| 186 | + model_payload_b = f"""from sqlmesh import model |
| 187 | +@model( |
| 188 | + name="{model_name}", |
| 189 | + columns={{'"COL"': "int"}}, |
| 190 | + description="model_payload_b", |
| 191 | +) |
| 192 | +def my_model(context, **kwargs): |
| 193 | + pass""" |
| 194 | + |
| 195 | + path_a.parent.mkdir(parents=True, exist_ok=True) |
| 196 | + path_b.parent.mkdir(parents=True, exist_ok=True) |
| 197 | + path_a.write_text(model_payload_a) |
| 198 | + context = Context(paths=tmp_path, config=config) |
| 199 | + context.load() |
| 200 | + model = context.get_model(f"{model_name}") |
| 201 | + assert model.description == "model_payload_a" |
| 202 | + path_b.write_text(model_payload_b) |
| 203 | + context.load() # raise no error to duplicate key if the functions are identical (by registry class_method) |
| 204 | + model = context.get_model(f"{model_name}") |
| 205 | + assert ( |
| 206 | + model.description != "model_payload_b" |
| 207 | + ) # model will not be overwritten by model_payload_b |
| 208 | + assert model.description == "model_payload_a" |
0 commit comments