Skip to content

Commit a4a4123

Browse files
committed
Fix!: make metadata_only a transitive property in Python objects
1 parent 8db5700 commit a4a4123

2 files changed

Lines changed: 162 additions & 9 deletions

File tree

sqlmesh/utils/metaprogramming.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -283,13 +283,25 @@ def build_env(
283283
# We don't rely on `env` to keep track of visited objects, because it's populated in post-order
284284
visited: t.Set[str] = set()
285285

286-
def walk(obj: t.Any, name: str) -> None:
286+
def walk(obj: t.Any, name: str, is_metadata: t.Optional[bool] = None) -> None:
287287
obj_module = inspect.getmodule(obj)
288288
if name in visited or (obj_module and obj_module.__name__ == "builtins"):
289289
return
290290

291291
visited.add(name)
292-
if name not in env:
292+
name_missing_from_env = name not in env
293+
294+
if name_missing_from_env or (
295+
not is_metadata and env[name] == obj and getattr(env[name], c.SQLMESH_METADATA, None)
296+
):
297+
if not name_missing_from_env:
298+
# The existing object in the env is "metadata only" but we're walking it again as a
299+
# non-"metadata only" dependency, so we update this flag to ensure all transitive
300+
# dependencies are also not marked as "metadata only"
301+
is_metadata = False
302+
if hasattr(obj, c.SQLMESH_METADATA):
303+
delattr(obj, c.SQLMESH_METADATA)
304+
293305
if hasattr(obj, c.SQLMESH_MACRO):
294306
# We only need to add the undecorated code of @macro() functions in env, which
295307
# is accessible through the `__wrapped__` attribute added by functools.wraps
@@ -308,6 +320,11 @@ def walk(obj: t.Any, name: str) -> None:
308320
or not hasattr(obj_module, "__file__")
309321
or not _is_relative_to(obj_module.__file__, path)
310322
):
323+
if is_metadata:
324+
setattr(obj, c.SQLMESH_METADATA, True)
325+
elif hasattr(obj, c.SQLMESH_METADATA):
326+
delattr(obj, c.SQLMESH_METADATA)
327+
311328
env[name] = obj
312329
return
313330
elif env[name] != obj:
@@ -318,10 +335,10 @@ def walk(obj: t.Any, name: str) -> None:
318335
if inspect.isclass(obj):
319336
for var in decorator_vars(obj):
320337
if obj_module and var in obj_module.__dict__:
321-
walk(obj_module.__dict__[var], var)
338+
walk(obj_module.__dict__[var], var, is_metadata)
322339

323340
for base in obj.__bases__:
324-
walk(base, base.__qualname__)
341+
walk(base, base.__qualname__, is_metadata)
325342

326343
for k, v in obj.__dict__.items():
327344
if k.startswith("__"):
@@ -335,19 +352,23 @@ def walk(obj: t.Any, name: str) -> None:
335352
# Walk the method if it's part of the object, else it's a global function and we just store it
336353
if v.__qualname__.startswith(obj.__qualname__):
337354
for k, v in func_globals(v).items():
338-
walk(v, k)
355+
walk(v, k, is_metadata)
339356
else:
340-
walk(v, v.__name__)
357+
walk(v, v.__name__, is_metadata)
341358
elif callable(obj):
342359
for k, v in func_globals(obj).items():
343-
walk(v, k)
360+
walk(v, k, is_metadata)
361+
362+
if is_metadata:
363+
setattr(obj, c.SQLMESH_METADATA, True)
344364

345365
# We store the object in the environment after its dependencies, because otherwise we
346366
# could crash at environment hydration time, since dicts are ordered and the top-level
347367
# objects would be loaded before their dependencies.
348368
env[name] = obj
349369

350-
walk(obj, name)
370+
# The "metadata only" annotation of the object is transitive
371+
walk(obj, name, getattr(obj, c.SQLMESH_METADATA, None))
351372

352373

353374
@dataclass
@@ -411,6 +432,8 @@ def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable
411432
serialized = {}
412433

413434
for k, v in env.items():
435+
is_metadata = getattr(v, c.SQLMESH_METADATA, None)
436+
414437
if isinstance(v, LITERALS) or v is None:
415438
serialized[k] = Executable.value(v)
416439
elif inspect.ismodule(v):
@@ -423,6 +446,7 @@ def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable
423446
serialized[k] = Executable(
424447
payload=f"import {name}{postfix}",
425448
kind=ExecutableKind.IMPORT,
449+
is_metadata=is_metadata,
426450
)
427451
elif callable(v):
428452
name = v.__name__
@@ -447,6 +471,7 @@ def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable
447471
v = wrapped
448472
file_path = Path(inspect.getfile(wrapped))
449473
relative_obj_file_path = _is_relative_to(file_path, path)
474+
is_metadata = is_metadata or getattr(v, c.SQLMESH_METADATA, None)
450475
except TypeError:
451476
file_path = None
452477
relative_obj_file_path = False
@@ -459,12 +484,13 @@ def serialize_env(env: t.Dict[str, t.Any], path: Path) -> t.Dict[str, Executable
459484
# Do `as_posix` to serialize windows path back to POSIX
460485
path=t.cast(Path, file_path).relative_to(path.absolute()).as_posix(),
461486
alias=k if name != k else None,
462-
is_metadata=getattr(v, c.SQLMESH_METADATA, None),
487+
is_metadata=is_metadata,
463488
)
464489
else:
465490
serialized[k] = Executable(
466491
payload=f"from {v.__module__} import {name}",
467492
kind=ExecutableKind.IMPORT,
493+
is_metadata=is_metadata,
468494
)
469495
else:
470496
raise SQLMeshError(

tests/core/test_model.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8665,3 +8665,130 @@ def test_data_hash_unchanged_when_column_type_uses_default_dialect():
86658665

86668666
# int == int64 in bigquery
86678667
assert model.data_hash == deserialized_model.data_hash
8668+
8669+
8670+
def test_transitive_dependency_of_metadata_only_object_is_metadata_only(tmp_path: Path) -> None:
8671+
init_example_project(tmp_path, dialect="duckdb", template=ProjectTemplate.EMPTY)
8672+
8673+
test_model = tmp_path / "models/test_model.sql"
8674+
test_model.parent.mkdir(parents=True, exist_ok=True)
8675+
test_model.write_text("MODEL (name test_model, kind FULL); @metadata_macro(); SELECT 1 AS c")
8676+
8677+
metadata_macro_code = """
8678+
from sqlglot import parse_one
8679+
from sqlmesh import macro
8680+
8681+
def get_parsed_query():
8682+
return parse_one("SELECT 1")
8683+
8684+
@macro(metadata_only=True)
8685+
def metadata_macro(evaluator):
8686+
if evaluator.runtime_stage == "evaluating":
8687+
evaluator.engine_adapter.execute({query})"""
8688+
8689+
metadata_macro = tmp_path / "macros/metadata_macro.py"
8690+
metadata_macro.parent.mkdir(parents=True, exist_ok=True)
8691+
metadata_macro.write_text(metadata_macro_code.format(query='"SELECT 1"'))
8692+
8693+
ctx = Context(
8694+
config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")),
8695+
paths=tmp_path,
8696+
)
8697+
8698+
model = ctx.get_model("test_model")
8699+
empty_executable = Executable(payload="")
8700+
8701+
assert len(model.python_env) == 1
8702+
assert (model.python_env.get("metadata_macro") or empty_executable).is_metadata
8703+
8704+
ctx.plan(no_prompts=True, auto_apply=True)
8705+
8706+
# This should make `parse_one` a dependency and so it'll be added in the python env
8707+
metadata_macro.write_text(metadata_macro_code.format(query='parse_one("SELECT 1")'))
8708+
8709+
ctx.load()
8710+
model = ctx.get_model("test_model")
8711+
8712+
assert len(model.python_env) == 2
8713+
assert (model.python_env.get("metadata_macro") or empty_executable).is_metadata
8714+
assert (model.python_env.get("parse_one") or empty_executable).is_metadata
8715+
8716+
plan = ctx.plan(no_prompts=True, auto_apply=True)
8717+
ctx_diff = plan.context_diff
8718+
8719+
assert len(ctx_diff.modified_snapshots) == 1
8720+
8721+
new_snapshot, _ = ctx_diff.modified_snapshots['"test_model"']
8722+
assert new_snapshot.change_category == SnapshotChangeCategory.METADATA
8723+
8724+
# This should make `get_parsed_query` a dependency and so it'll be added in
8725+
# the python env, carrying `parse_one` with it as another transitive dependency
8726+
metadata_macro.write_text(metadata_macro_code.format(query="get_parsed_query()"))
8727+
8728+
ctx.load()
8729+
model = ctx.get_model("test_model")
8730+
8731+
assert len(model.python_env) == 3
8732+
assert (model.python_env.get("metadata_macro") or empty_executable).is_metadata
8733+
assert (model.python_env.get("get_parsed_query") or empty_executable).is_metadata
8734+
assert (model.python_env.get("parse_one") or empty_executable).is_metadata
8735+
8736+
plan = ctx.plan(no_prompts=True, auto_apply=True)
8737+
ctx_diff = plan.context_diff
8738+
8739+
assert len(ctx_diff.modified_snapshots) == 1
8740+
8741+
new_snapshot, _ = ctx_diff.modified_snapshots['"test_model"']
8742+
assert new_snapshot.change_category == SnapshotChangeCategory.METADATA
8743+
8744+
8745+
def test_non_metadata_object_takes_precedence_over_metadata_only_object(tmp_path: Path) -> None:
8746+
init_example_project(tmp_path, dialect="duckdb", template=ProjectTemplate.EMPTY)
8747+
8748+
test_model = tmp_path / "models/test_model.sql"
8749+
test_model.parent.mkdir(parents=True, exist_ok=True)
8750+
test_model.write_text("MODEL (name test_model, kind FULL); @m1(); @m2(); SELECT 1 AS c")
8751+
8752+
macro_code = """
8753+
from sqlglot import parse_one
8754+
from sqlmesh import macro
8755+
8756+
def common_dep():
8757+
pass
8758+
8759+
def m1_dep():
8760+
pass
8761+
8762+
@macro(metadata_only=True)
8763+
def m1(evaluator):
8764+
m1_dep()
8765+
common_dep()
8766+
if evaluator.runtime_stage == "evaluating":
8767+
evaluator.engine_adapter.execute(parse_one("SELECT 1"))
8768+
8769+
@macro()
8770+
def m2(evaluator):
8771+
common_dep()
8772+
if evaluator.runtime_stage == "evaluating":
8773+
evaluator.engine_adapter.execute(parse_one("SELECT 1"))"""
8774+
8775+
test_macros = tmp_path / "macros/test_macros.py"
8776+
test_macros.parent.mkdir(parents=True, exist_ok=True)
8777+
test_macros.write_text(macro_code)
8778+
8779+
ctx = Context(
8780+
config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb")),
8781+
paths=tmp_path,
8782+
)
8783+
model = ctx.get_model("test_model")
8784+
empty_executable = Executable(payload="")
8785+
8786+
# Both `m1` and `m2` refer to `parse_one`, so for `m1` it would be a transitive metadata-only
8787+
# object, but since the python env is a flat namespace and `parse_one` is also a dependency
8788+
# of `m2`, it needs to be treated as non-metadata.
8789+
assert len(model.python_env) == 5
8790+
assert (model.python_env.get("m1") or empty_executable).is_metadata
8791+
assert (model.python_env.get("m1_dep") or empty_executable).is_metadata
8792+
assert not (model.python_env.get("m2") or empty_executable).is_metadata
8793+
assert not (model.python_env.get("parse_one") or empty_executable).is_metadata
8794+
assert not (model.python_env.get("common_dep") or empty_executable).is_metadata

0 commit comments

Comments
 (0)