Feat: Ensure audits run even if adding them is a metadata change

VaggelisD · VaggelisD · commit 7c406826f22e · 2025-05-08T21:17:50.000+03:00
diff --git a/sqlmesh/core/model/definition.py b/sqlmesh/core/model/definition.py
@@ -1069,6 +1069,34 @@ def _data_hash_values(self) -> t.List[str]:
 
         return data  # type: ignore
 
+    def _audit_metadata(self) -> t.List[str]:
+        from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS
+
+        metadata = []
+
+        for audit_name, audit_args in sorted(self.audits, key=lambda a: a[0]):
+            metadata.append(audit_name)
+            if audit_name in BUILT_IN_AUDITS:
+                for arg_name, arg_value in audit_args.items():
+                    metadata.append(arg_name)
+                    metadata.append(gen(arg_value))
+            else:
+                audit = self.audit_definitions[audit_name]
+                query = (
+                    self.render_audit_query(audit, **t.cast(t.Dict[str, t.Any], audit_args))
+                    or audit.query
+                )
+                metadata.extend(
+                    [
+                        gen(query),
+                        audit.dialect,
+                        str(audit.skip),
+                        str(audit.blocking),
+                    ]
+                )
+
+        return metadata
+
     @property
     def metadata_hash(self) -> str:
         """
@@ -1078,8 +1106,6 @@ def metadata_hash(self) -> str:
             The metadata hash for the node.
         """
         if self._metadata_hash is None:
-            from sqlmesh.core.audit.builtin import BUILT_IN_AUDITS
-
             metadata = [
                 self.dialect,
                 self.owner,
@@ -1100,29 +1126,9 @@ def metadata_hash(self) -> str:
                 str(self.allow_partials),
                 gen(self.session_properties_) if self.session_properties_ else None,
                 *[gen(g) for g in self.grains],
+                *self._audit_metadata(),
             ]
 
-            for audit_name, audit_args in sorted(self.audits, key=lambda a: a[0]):
-                metadata.append(audit_name)
-                if audit_name in BUILT_IN_AUDITS:
-                    for arg_name, arg_value in audit_args.items():
-                        metadata.append(arg_name)
-                        metadata.append(gen(arg_value))
-                else:
-                    audit = self.audit_definitions[audit_name]
-                    query = (
-                        self.render_audit_query(audit, **t.cast(t.Dict[str, t.Any], audit_args))
-                        or audit.query
-                    )
-                    metadata.extend(
-                        [
-                            gen(query),
-                            audit.dialect,
-                            str(audit.skip),
-                            str(audit.blocking),
-                        ]
-                    )
-
             for key, value in (self.virtual_properties or {}).items():
                 metadata.append(key)
                 metadata.append(gen(value))
diff --git a/sqlmesh/core/plan/evaluator.py b/sqlmesh/core/plan/evaluator.py
@@ -34,13 +34,15 @@
     SnapshotInfoLike,
     SnapshotTableInfo,
 )
+from sqlmesh.core.snapshot.definition import SnapshotChangeCategory, parent_snapshots_by_name
 from sqlmesh.utils import CompletionStatus
 from sqlmesh.core.state_sync import StateSync
 from sqlmesh.core.state_sync.base import PromotionResult
 from sqlmesh.utils.concurrency import NodeExecutionFailedError
 from sqlmesh.utils.errors import PlanError
 from sqlmesh.utils.dag import DAG
 from sqlmesh.utils.date import now
+from sqlmesh.utils.hashing import hash_data
 
 logger = logging.getLogger(__name__)
 
@@ -115,6 +117,10 @@ def evaluate(
                 after_promote_snapshots = all_names - before_promote_snapshots
                 deployability_index_for_evaluation = DeployabilityIndex.all_deployable()
 
+            self._run_audits_for_metadata_snapshots(
+                new_snapshots, plan, deployability_index_for_evaluation
+            )
+
             execute_environment_statements(
                 adapter=self.snapshot_evaluator.adapter,
                 environment_statements=plan.environment_statements or [],
@@ -541,6 +547,72 @@ def _restatement_intervals_across_all_environments(
 
         return set(snapshots_to_restate.values())
 
+    def _run_audits_for_metadata_snapshots(
+        self,
+        new_snapshots: t.Dict[SnapshotId, Snapshot],
+        plan: EvaluatablePlan,
+        deployability_index: DeployabilityIndex,
+    ) -> None:
+        to_be_audited_snapshots = []
+
+        for snapshot in new_snapshots.values():
+            if (
+                snapshot.change_category != SnapshotChangeCategory.METADATA
+                or not snapshot.previous_version
+                or not snapshot.is_model
+            ):
+                continue
+
+            previous_snapshot_id = snapshot.previous_version.snapshot_id(snapshot.name)
+            previous_snapshot = self.state_sync.get_snapshots([previous_snapshot_id])[
+                previous_snapshot_id
+            ]
+
+            new_audits = snapshot.model._audit_metadata()
+
+            # Compare the audit metadata hashes there was a change in the audits field
+            previous_audit_hash = hash_data(previous_snapshot.model._audit_metadata())
+            current_audit_hash = hash_data(new_audits)
+
+            if previous_audit_hash != current_audit_hash and new_audits:
+                to_be_audited_snapshots.append((snapshot, previous_snapshot))
+
+        if not to_be_audited_snapshots:
+            return
+
+        scheduler = self.create_scheduler(new_snapshots.values())
+        raise_plan_error = False
+        for to_be_audited_snapshot, previous_snapshot in to_be_audited_snapshots:
+            parent_snapshots = parent_snapshots_by_name(to_be_audited_snapshot, new_snapshots)
+
+            # The previous snapshot is the snapshot before the metadata change
+            # and contains the latest intervals that we should use for the new audit
+            for interval in previous_snapshot.intervals:
+                start, end = interval
+
+                try:
+                    scheduler._audit_snapshot(
+                        to_be_audited_snapshot,
+                        environment_naming_info=plan.environment.naming_info,
+                        snapshots=parent_snapshots,
+                        start=start,
+                        end=end,
+                        execution_time=plan.execution_time,
+                        deployability_index=deployability_index,
+                    )
+                except Exception as e:
+                    # Simulate a node execution failure with the audit error passed as the
+                    # cause in order to reuse log_failed_models
+                    error = NodeExecutionFailedError(
+                        (to_be_audited_snapshot.name, ((start, end), -1))
+                    )
+                    error.__cause__ = e
+                    self.console.log_failed_models([error])
+                    raise_plan_error = True
+
+            if raise_plan_error:
+                raise PlanError("Plan application failed.")
+
 
 def update_intervals_for_new_snapshots(
     snapshots: t.Collection[Snapshot], state_sync: StateSync
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -149,8 +149,9 @@ def evaluate(
         execution_time: TimeLike,
         deployability_index: DeployabilityIndex,
         batch_index: int,
+        environment_naming_info: EnvironmentNamingInfo,
         **kwargs: t.Any,
-    ) -> t.Tuple[t.List[AuditResult], t.List[AuditError]]:
+    ) -> t.List[AuditResult]:
         """Evaluate a snapshot and add the processed interval to the state sync.
 
         Args:
@@ -182,8 +183,9 @@ def evaluate(
             batch_index=batch_index,
             **kwargs,
         )
-        audit_results = self.snapshot_evaluator.audit(
+        audit_results = self._audit_snapshot(
             snapshot=snapshot,
+            environment_naming_info=environment_naming_info,
             start=start,
             end=end,
             execution_time=execution_time,
@@ -193,32 +195,8 @@ def evaluate(
             **kwargs,
         )
 
-        audit_errors_to_raise: t.List[AuditError] = []
-        audit_errors_to_warn: t.List[AuditError] = []
-        for audit_result in (result for result in audit_results if result.count):
-            error = AuditError(
-                audit_name=audit_result.audit.name,
-                audit_args=audit_result.audit_args,
-                model=snapshot.model_or_none,
-                count=t.cast(int, audit_result.count),
-                query=t.cast(exp.Query, audit_result.query),
-                adapter_dialect=self.snapshot_evaluator.adapter.dialect,
-            )
-            self.notification_target_manager.notify(NotificationEvent.AUDIT_FAILURE, error)
-            if is_deployable and snapshot.node.owner:
-                self.notification_target_manager.notify_user(
-                    NotificationEvent.AUDIT_FAILURE, snapshot.node.owner, error
-                )
-            if audit_result.blocking:
-                audit_errors_to_raise.append(error)
-            else:
-                audit_errors_to_warn.append(error)
-
-        if audit_errors_to_raise:
-            raise NodeAuditsErrors(audit_errors_to_raise)
-
         self.state_sync.add_interval(snapshot, start, end, is_dev=not is_deployable)
-        return audit_results, audit_errors_to_warn
+        return audit_results
 
     def run(
         self,
@@ -465,30 +443,19 @@ def evaluate_node(node: SchedulingUnit) -> None:
             evaluation_duration_ms: t.Optional[int] = None
 
             audit_results: t.List[AuditResult] = []
-            audit_errors_to_warn: t.List[AuditError] = []
             try:
                 assert execution_time  # mypy
                 assert deployability_index  # mypy
-                audit_results, audit_errors_to_warn = self.evaluate(
+                audit_results = self.evaluate(
                     snapshot=snapshot,
+                    environment_naming_info=environment_naming_info,
                     start=start,
                     end=end,
                     execution_time=execution_time,
                     deployability_index=deployability_index,
                     batch_index=batch_idx,
                 )
 
-                for audit_error in audit_errors_to_warn:
-                    display_name = snapshot.display_name(
-                        environment_naming_info,
-                        self.default_catalog,
-                        self.snapshot_evaluator.adapter.dialect,
-                    )
-                    self.console.log_warning(
-                        f"\n{display_name}: {audit_error}.",
-                        f"{audit_error}. Audit query:\n{audit_error.query.sql(audit_error.adapter_dialect)}",
-                    )
-
                 evaluation_duration_ms = now_timestamp() - execution_start_ts
             finally:
                 num_audits = len(audit_results)
@@ -583,6 +550,68 @@ def _dag(self, batches: SnapshotToIntervals) -> DAG[SchedulingUnit]:
                     )
         return dag
 
+    def _audit_snapshot(
+        self,
+        snapshot: Snapshot,
+        environment_naming_info: EnvironmentNamingInfo,
+        deployability_index: DeployabilityIndex,
+        snapshots: t.Dict[str, Snapshot],
+        start: t.Optional[TimeLike] = None,
+        end: t.Optional[TimeLike] = None,
+        execution_time: t.Optional[TimeLike] = None,
+        wap_id: t.Optional[str] = None,
+        **kwargs: t.Any,
+    ) -> t.List[AuditResult]:
+        is_deployable = deployability_index.is_deployable(snapshot)
+
+        audit_results = self.snapshot_evaluator.audit(
+            snapshot=snapshot,
+            start=start,
+            end=end,
+            execution_time=execution_time,
+            snapshots=snapshots,
+            deployability_index=deployability_index,
+            wap_id=wap_id,
+            **kwargs,
+        )
+
+        audit_errors_to_raise: t.List[AuditError] = []
+        audit_errors_to_warn: t.List[AuditError] = []
+        for audit_result in (result for result in audit_results if result.count):
+            error = AuditError(
+                audit_name=audit_result.audit.name,
+                audit_args=audit_result.audit_args,
+                model=snapshot.model_or_none,
+                count=t.cast(int, audit_result.count),
+                query=t.cast(exp.Query, audit_result.query),
+                adapter_dialect=self.snapshot_evaluator.adapter.dialect,
+            )
+            self.notification_target_manager.notify(NotificationEvent.AUDIT_FAILURE, error)
+            if is_deployable and snapshot.node.owner:
+                self.notification_target_manager.notify_user(
+                    NotificationEvent.AUDIT_FAILURE, snapshot.node.owner, error
+                )
+            if audit_result.blocking:
+                audit_errors_to_raise.append(error)
+            else:
+                audit_errors_to_warn.append(error)
+
+        if audit_errors_to_raise:
+            raise NodeAuditsErrors(audit_errors_to_raise)
+
+        for audit_error in audit_errors_to_warn:
+            display_name = snapshot.display_name(
+                environment_naming_info,
+                self.default_catalog,
+                self.snapshot_evaluator.adapter.dialect,
+            )
+            self.console.log_warning(
+                f"\n{display_name}: {audit_error}.",
+                f"{audit_error}. Audit query:\n{audit_error.query.sql(audit_error.adapter_dialect)}",
+            )
+
+        return audit_results
+
 
 def compute_interval_params(
     snapshots: t.Collection[Snapshot],
diff --git a/tests/core/test_context.py b/tests/core/test_context.py
@@ -5,6 +5,7 @@
 from datetime import date, timedelta
 from tempfile import TemporaryDirectory
 from unittest.mock import PropertyMock, call, patch
+from IPython.utils.capture import capture_output
 
 import time_machine
 import pytest
@@ -1906,7 +1907,7 @@ def create_log_view(evaluator, view_name):
     assert log_schema["my_schema"][0] == "db__dev"
 
 
-def test_plan_audit_intervals(tmp_path: pathlib.Path, capsys, caplog):
+def test_plan_audit_intervals(tmp_path: pathlib.Path, caplog):
     ctx = Context(
         paths=tmp_path, config=Config(model_defaults=ModelDefaultsConfig(dialect="duckdb"))
     )
@@ -2048,3 +2049,42 @@ def test_audit():
     context.plan(no_prompts=True, auto_apply=True)
 
     assert context.audit(models=["dummy"], start="2020-01-01", end="2020-01-01") is True
+
+
+@use_terminal_console
+def test_audits_running_on_metadata_changes(tmp_path: pathlib.Path):
+    def setup_senario(model_before: str, model_after: str):
+        models_dir = pathlib.Path("models")
+        create_temp_file(tmp_path, pathlib.Path(models_dir, "test.sql"), model_before)
+
+        # Create first snapshot
+        context = Context(paths=tmp_path, config=Config())
+        context.plan("prod", no_prompts=True, auto_apply=True)
+
+        # Create second (metadata) snapshot
+        create_temp_file(tmp_path, pathlib.Path(models_dir, "test.sql"), model_after)
+        context.load()
+
+        with capture_output() as output:
+            with pytest.raises(PlanError):
+                context.plan("prod", no_prompts=True, auto_apply=True)
+
+        assert 'Failed models\n\n  "model"' in output.stdout
+
+        return output
+
+    # Ensure incorrect audits (bad data, incorrect definition etc) are evaluated immediately
+    output = setup_senario(
+        "MODEL (name model); SELECT NULL AS col",
+        "MODEL (name model, audits (not_null(columns=[col]))); SELECT NULL AS col",
+    )
+    assert "'not_null' audit error: 1 row failed" in output.stdout
+
+    output = setup_senario(
+        "MODEL (name model); SELECT NULL AS col",
+        "MODEL (name model, audits (not_null(columns=[this_col_does_not_exist]))); SELECT NULL AS col",
+    )
+    assert (
+        'Binder Error: Referenced column "this_col_does_not_exist" not found in \nFROM clause!'
+        in output.stdout
+    )