|
35 | 35 | SnapshotTableInfo, |
36 | 36 | SnapshotCreationFailedError, |
37 | 37 | ) |
38 | | -from sqlmesh.core.snapshot.definition import SnapshotChangeCategory, parent_snapshots_by_name |
39 | 38 | from sqlmesh.utils import CompletionStatus |
40 | 39 | from sqlmesh.core.state_sync import StateSync |
41 | 40 | from sqlmesh.core.state_sync.base import PromotionResult |
42 | 41 | from sqlmesh.utils.concurrency import NodeExecutionFailedError |
43 | 42 | from sqlmesh.utils.errors import PlanError |
44 | 43 | from sqlmesh.utils.dag import DAG |
45 | 44 | from sqlmesh.utils.date import now |
46 | | -from sqlmesh.utils.hashing import hash_data |
47 | 45 |
|
48 | 46 | logger = logging.getLogger(__name__) |
49 | 47 |
|
@@ -118,9 +116,7 @@ def evaluate( |
118 | 116 | after_promote_snapshots = all_names - before_promote_snapshots |
119 | 117 | deployability_index_for_evaluation = DeployabilityIndex.all_deployable() |
120 | 118 |
|
121 | | - self._run_audits_for_metadata_snapshots( |
122 | | - new_snapshots, plan, deployability_index_for_evaluation |
123 | | - ) |
| 119 | + self._run_audits_for_metadata_snapshots(plan, snapshots, new_snapshots) |
124 | 120 |
|
125 | 121 | execute_environment_statements( |
126 | 122 | adapter=self.snapshot_evaluator.adapter, |
@@ -553,69 +549,56 @@ def _restatement_intervals_across_all_environments( |
553 | 549 |
|
554 | 550 | def _run_audits_for_metadata_snapshots( |
555 | 551 | self, |
556 | | - new_snapshots: t.Dict[SnapshotId, Snapshot], |
557 | 552 | plan: EvaluatablePlan, |
558 | | - deployability_index: DeployabilityIndex, |
| 553 | + snapshots: t.Dict[SnapshotId, Snapshot], |
| 554 | + new_snapshots: t.Dict[SnapshotId, Snapshot], |
559 | 555 | ) -> None: |
560 | | - to_be_audited_snapshots = [] |
561 | | - |
| 556 | + # Step 1: Filter out snapshots that are not categorized as metadata changes on models |
| 557 | + metadata_snapshots = [] |
562 | 558 | for snapshot in new_snapshots.values(): |
563 | | - if ( |
564 | | - snapshot.change_category != SnapshotChangeCategory.METADATA |
565 | | - or not snapshot.previous_version |
566 | | - or not snapshot.is_model |
567 | | - ): |
| 559 | + if not snapshot.is_metadata or not snapshot.is_model or not snapshot.evaluatable: |
568 | 560 | continue |
569 | 561 |
|
570 | | - previous_snapshot_id = snapshot.previous_version.snapshot_id(snapshot.name) |
571 | | - previous_snapshot = self.state_sync.get_snapshots([previous_snapshot_id])[ |
572 | | - previous_snapshot_id |
573 | | - ] |
| 562 | + metadata_snapshots.append(snapshot) |
574 | 563 |
|
575 | | - new_audits = snapshot.model._audit_metadata_hash_values() |
| 564 | + # Step 2: Bulk load their previous snapshots from state |
| 565 | + previous_snapshots = self.state_sync.get_snapshots( |
| 566 | + [ |
| 567 | + s.previous_version.snapshot_id(s.name) |
| 568 | + for s in metadata_snapshots |
| 569 | + if s.previous_version |
| 570 | + ] |
| 571 | + ).values() |
576 | 572 |
|
577 | | - # Compare the audit metadata hashes to determine if there was a change |
578 | | - previous_audit_hash = hash_data(previous_snapshot.model._audit_metadata_hash_values()) |
579 | | - current_audit_hash = hash_data(new_audits) |
| 573 | + # Step 3: Compare the audit metadata hashes to determine if there was a change in the audits field |
| 574 | + to_be_audited_snapshots = {} |
| 575 | + for snapshot, previous_snapshot in zip(metadata_snapshots, previous_snapshots): |
| 576 | + new_audits, new_audits_hash = snapshot.model.audit_metadata_hash() |
| 577 | + _, previous_audit_hash = previous_snapshot.model.audit_metadata_hash() |
580 | 578 |
|
581 | | - if previous_audit_hash != current_audit_hash and new_audits: |
582 | | - to_be_audited_snapshots.append((snapshot, previous_snapshot)) |
| 579 | + if previous_audit_hash != new_audits_hash and new_audits: |
| 580 | + snapshot_start = min(i[0] for i in snapshot.intervals) |
| 581 | + snapshot_end = max(i[1] for i in snapshot.intervals) |
| 582 | + to_be_audited_snapshots[snapshot.snapshot_id] = (snapshot_start, snapshot_end) |
583 | 583 |
|
584 | 584 | if not to_be_audited_snapshots: |
585 | 585 | return |
586 | 586 |
|
587 | | - scheduler = self.create_scheduler(new_snapshots.values()) |
588 | | - raise_plan_error = False |
589 | | - for to_be_audited_snapshot, previous_snapshot in to_be_audited_snapshots: |
590 | | - parent_snapshots = parent_snapshots_by_name(to_be_audited_snapshot, new_snapshots) |
591 | | - |
592 | | - # The previous snapshot is the snapshot before the metadata change |
593 | | - # and contains the latest intervals that we should use for the new audit |
594 | | - for interval in previous_snapshot.intervals: |
595 | | - start, end = interval |
596 | | - |
597 | | - try: |
598 | | - scheduler._audit_snapshot( |
599 | | - to_be_audited_snapshot, |
600 | | - environment_naming_info=plan.environment.naming_info, |
601 | | - snapshots=parent_snapshots, |
602 | | - start=start, |
603 | | - end=end, |
604 | | - execution_time=plan.execution_time, |
605 | | - deployability_index=deployability_index, |
606 | | - ) |
607 | | - except Exception as e: |
608 | | - # Simulate a node execution failure with the audit error passed as the |
609 | | - # cause in order to reuse log_failed_models |
610 | | - error = NodeExecutionFailedError( |
611 | | - (to_be_audited_snapshot.name, ((start, end), -1)) |
612 | | - ) |
613 | | - error.__cause__ = e |
614 | | - self.console.log_failed_models([error]) |
615 | | - raise_plan_error = True |
| 587 | + # Step 4: If there are any snapshots to be audited, we'll reuse the scheduler's |
| 588 | + # internals to audit them by utilizing the restatement logic |
| 589 | + scheduler = self.create_scheduler(snapshots.values()) |
| 590 | + completion_status = scheduler.audit( |
| 591 | + plan.environment, |
| 592 | + plan.start, |
| 593 | + plan.end, |
| 594 | + execution_time=plan.execution_time, |
| 595 | + restatements=to_be_audited_snapshots, |
| 596 | + end_bounded=plan.end_bounded, |
| 597 | + interval_end_per_model=plan.interval_end_per_model, |
| 598 | + ) |
616 | 599 |
|
617 | | - if raise_plan_error: |
618 | | - raise PlanError("Plan application failed.") |
| 600 | + if completion_status.is_failure: |
| 601 | + raise PlanError("Plan application failed.") |
619 | 602 |
|
620 | 603 |
|
621 | 604 | def update_intervals_for_new_snapshots( |
|
0 commit comments