@@ -147,7 +147,7 @@ def merged_missing_intervals(
147147 ignore_cron : bool = False ,
148148 end_bounded : bool = False ,
149149 selected_snapshots : t .Optional [t .Set [str ]] = None ,
150- ) -> SnapshotToIntervals :
150+ ) -> t . Tuple [ SnapshotToIntervals , t . List [ SnapshotId ]] :
151151 """Find the largest contiguous date interval parameters based only on what is missing.
152152
153153 For each node name, find all dependencies and look for a stored snapshot from the metastore. If a snapshot is found,
@@ -167,8 +167,11 @@ def merged_missing_intervals(
167167 end_bounded: If set to true, the returned intervals will be bounded by the target end date, disregarding lookback,
168168 allow_partials, and other attributes that could cause the intervals to exceed the target end date.
169169 selected_snapshots: A set of snapshot names to run. If not provided, all snapshots will be run.
170+
171+ Returns:
172+ A tuple containing a dict containing all snapshots needing to be run with their associated interval params and a list of snapshots that are ready to run based on their naive cron schedule (ignoring plan/run context and other attributes).
170173 """
171- snapshots_to_intervals = merged_missing_intervals (
174+ snapshots_to_intervals , snapshots_naive_cron_ready = merged_missing_intervals (
172175 snapshots = self .snapshot_per_version .values (),
173176 start = start ,
174177 end = end ,
@@ -186,7 +189,7 @@ def merged_missing_intervals(
186189 snapshots_to_intervals = {
187190 s : i for s , i in snapshots_to_intervals .items () if s .name in selected_snapshots
188191 }
189- return snapshots_to_intervals
192+ return snapshots_to_intervals , snapshots_naive_cron_ready
190193
191194 def evaluate (
192195 self ,
@@ -755,7 +758,7 @@ def _run_or_audit(
755758 {s .name_version : s .next_auto_restatement_ts for s in self .snapshots .values ()}
756759 )
757760
758- merged_intervals = self .merged_missing_intervals (
761+ merged_intervals , snapshots_naive_cron_ready = self .merged_missing_intervals (
759762 start ,
760763 end ,
761764 execution_time ,
@@ -770,9 +773,7 @@ def _run_or_audit(
770773 if not merged_intervals :
771774 return CompletionStatus .NOTHING_TO_DO
772775
773- merged_intervals_snapshots = {
774- snapshot .snapshot_id : snapshot for snapshot in merged_intervals .keys ()
775- }
776+ merged_intervals_snapshots = {snapshot .snapshot_id for snapshot in merged_intervals }
776777 select_snapshot_triggers : t .Dict [SnapshotId , t .List [SnapshotId ]] = {}
777778 if selected_snapshots and selected_snapshots_auto_upstream :
778779 # actually selected snapshots are their own triggers
@@ -788,24 +789,25 @@ def _run_or_audit(
788789 ]
789790 }
790791
791- # trace upstream by reversing dag of all snapshots to evaluate
792- reversed_intervals_dag = snapshots_to_dag (merged_intervals_snapshots .values ()).reversed
793- for s_id in reversed_intervals_dag :
794- if s_id not in select_snapshot_triggers :
795- triggers = []
796- for parent_s_id in merged_intervals_snapshots [ s_id ]. parents :
797- triggers .extend (select_snapshot_triggers [ parent_s_id ] )
792+ # trace upstream by walking downstream on reversed dag
793+ reversed_dag = snapshots_to_dag (self . snapshots .values ()).reversed
794+ for s_id in reversed_dag :
795+ if s_id in merged_intervals_snapshots :
796+ triggers = select_snapshot_triggers . get ( s_id , [])
797+ for parent_s_id in reversed_dag . graph . get ( s_id , set ()) :
798+ triggers .extend (select_snapshot_triggers . get ( parent_s_id , []) )
798799 select_snapshot_triggers [s_id ] = list (dict .fromkeys (triggers ))
799800
800801 all_snapshot_triggers : t .Dict [SnapshotId , SnapshotEvaluationTriggers ] = {
801802 s_id : SnapshotEvaluationTriggers (
802- ignore_cron = ignore_cron ,
803+ ignore_cron_flag = ignore_cron ,
804+ cron_ready = s_id in snapshots_naive_cron_ready ,
803805 auto_restatement_triggers = auto_restatement_triggers .get (s_id , []),
804806 select_snapshot_triggers = select_snapshot_triggers .get (s_id , []),
805807 )
806808 for s_id in merged_intervals_snapshots
807- if ignore_cron or s_id in auto_restatement_triggers or s_id in select_snapshot_triggers
808809 }
810+
809811 errors , _ = self .run_merged_intervals (
810812 merged_intervals = merged_intervals ,
811813 deployability_index = deployability_index ,
@@ -967,7 +969,7 @@ def merged_missing_intervals(
967969 end_override_per_model : t .Optional [t .Dict [str , datetime ]] = None ,
968970 ignore_cron : bool = False ,
969971 end_bounded : bool = False ,
970- ) -> SnapshotToIntervals :
972+ ) -> t . Tuple [ SnapshotToIntervals , t . List [ SnapshotId ]] :
971973 """Find the largest contiguous date interval parameters based only on what is missing.
972974
973975 For each node name, find all dependencies and look for a stored snapshot from the metastore. If a snapshot is found,
@@ -1017,7 +1019,7 @@ def compute_interval_params(
10171019 end_override_per_model : t .Optional [t .Dict [str , datetime ]] = None ,
10181020 ignore_cron : bool = False ,
10191021 end_bounded : bool = False ,
1020- ) -> SnapshotToIntervals :
1022+ ) -> t . Tuple [ SnapshotToIntervals , t . List [ SnapshotId ]] :
10211023 """Find the largest contiguous date interval parameters based only on what is missing.
10221024
10231025 For each node name, find all dependencies and look for a stored snapshot from the metastore. If a snapshot is found,
@@ -1039,7 +1041,7 @@ def compute_interval_params(
10391041 allow_partials, and other attributes that could cause the intervals to exceed the target end date.
10401042
10411043 Returns:
1042- A dict containing all snapshots needing to be run with their associated interval params.
1044+ A tuple containing a dict containing all snapshots needing to be run with their associated interval params and a list of snapshots that are ready to run based on their naive cron schedule (ignoring plan/run context and other attributes) .
10431045 """
10441046 snapshot_merged_intervals = {}
10451047
@@ -1067,7 +1069,11 @@ def compute_interval_params(
10671069 contiguous_batch .append ((next_batch [0 ][0 ], next_batch [- 1 ][- 1 ]))
10681070 snapshot_merged_intervals [snapshot ] = contiguous_batch
10691071
1070- return snapshot_merged_intervals
1072+ snapshots_naive_cron_ready = [
1073+ snap .snapshot_id for snap in missing_intervals (snapshots , execution_time = execution_time )
1074+ ]
1075+
1076+ return snapshot_merged_intervals , snapshots_naive_cron_ready
10711077
10721078
10731079def interval_diff (
0 commit comments