From bc29dda7889ee4bbc3f1a29144f73a395f1972d0 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Fri, 4 Jul 2025 04:32:14 +0000 Subject: [PATCH] Feat(cicd_bot): Enable min_intervals plan option --- docs/concepts/plans.md | 57 +++++++++++++++++++ docs/integrations/github.md | 1 + sqlmesh/integrations/github/cicd/config.py | 1 + .../integrations/github/cicd/controller.py | 1 + tests/integrations/github/cicd/test_config.py | 6 ++ .../github/cicd/test_github_controller.py | 13 +++++ 6 files changed, 79 insertions(+) diff --git a/docs/concepts/plans.md b/docs/concepts/plans.md index 7903fe249f..da3d3debb7 100644 --- a/docs/concepts/plans.md +++ b/docs/concepts/plans.md @@ -246,6 +246,63 @@ Models needing backfill (missing dates): Enter the backfill end date (eg. '1 month ago', '2020-01-01') or blank to backfill up until '2024-09-27 00:00:00': ``` +#### Minimum intervals + +When you run a plan with a fixed `--start` or `--end` date, you create a virtual data environment with a limited subset of data. However, if the time range specified is less than the size of an interval on one of your models, that model will be skipped by default. + +For example, if you have a model like so: + +```sql +MODEL( + name sqlmesh_example.monthly_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column month + ), + cron '@monthly' +); + +SELECT SUM(a) AS sum_a, MONTH(day) AS month +FROM sqlmesh_example.upstream_model +WHERE day BETWEEN @start_ds AND @end_ds +``` + +make a change to it and run the following: + +```bash linenums="1" hl_lines="8" +$ sqlmesh plan dev --start '1 day ago' + +Models: +└── Added: + └── sqlmesh_example__dev.monthly_model +Apply - Virtual Update [y/n]: y + +SKIP: No model batches to execute +``` + +No data will be backfilled because `1 day ago` does not contain a complete month. However, you can use the `--min-intervals` option to override this behaviour like so: + +```bash linenums="1" hl_lines="11" +$ sqlmesh plan dev --start '1 day ago' --min-intervals 1 + +Models: +└── Added: + └── sqlmesh_example__dev.monthly_model +Apply - Virtual Update [y/n]: y + +[1/1] sqlmesh_example__dev.monthly_model [insert 2025-06-01 - 2025-06-30] 0.08s +Executing model batches ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100.0% • 1/1 • 0:00:00 + +✔ Model batches executed +``` + +This will ensure that regardless of the plan `--start` date, all added or modified models will have at least `--min-intervals` intervals considered for backfill. + +!!! info + + If you are running plans manually you can just adjust the `--start` date to be wide enough to cover the models in question. + + The `--min-intervals` option is primarily intended for [automation scenarios](../integrations/github.md) where the plan is always run with a default relative start date and you always want (for example) "2 weeks worth of data" in the target environment. + ### Data preview for forward-only changes As mentioned earlier, the data output produced by [forward-only changes](#forward-only-change) in a development environment can only be used for preview and will not be reused in production. diff --git a/docs/integrations/github.md b/docs/integrations/github.md index 450e7af8f3..323aff0565 100644 --- a/docs/integrations/github.md +++ b/docs/integrations/github.md @@ -294,6 +294,7 @@ Below is an example of how to define the default config for the bot in either YA | `command_namespace` | The namespace to use for SQLMesh commands. For example if you provide `#SQLMesh` as a value then commands will be expected in the format of `#SQLMesh/`. Default: `None` meaning no namespace is used. | string | N | | `auto_categorize_changes` | Auto categorization behavior to use for the bot. If not provided then the project-wide categorization behavior is used. See [Auto-categorize model changes](https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#auto-categorize-model-changes) for details. | dict | N | | `default_pr_start` | Default start when creating PR environment plans. If running in a mode where the bot automatically backfills models (based on `auto_categorize_changes` behavior) then this can be used to limit the amount of data backfilled. Defaults to `None` meaning the start date is set to the earliest model's start or to 1 day ago if [data previews](../concepts/plans.md#data-preview) need to be computed. | str | N | +| `pr_min_intervals` | Intended for use when `default_pr_start` is set to a relative time, eg `1 week ago`. This ensures that at least this many intervals across every model are included for backfill in the PR environment. Without this, models with an interval unit wider than `default_pr_start` (such as `@monthly` models if `default_pr_start` was set to `1 week ago`) will be excluded from backfill entirely. | int | N | | `skip_pr_backfill` | Indicates if the bot should skip backfilling models in the PR environment. Default: `True` | bool | N | | `pr_include_unmodified` | Indicates whether to include unmodified models in the PR environment. Default to the project's config value (which defaults to `False`) | bool | N | | `run_on_deploy_to_prod` | Indicates whether to run latest intervals when deploying to prod. If set to false, the deployment will backfill only the changed models up to the existing latest interval in production, ignoring any missing intervals beyond this point. Default: `False` | bool | N | diff --git a/sqlmesh/integrations/github/cicd/config.py b/sqlmesh/integrations/github/cicd/config.py index b273329380..33312c4ad7 100644 --- a/sqlmesh/integrations/github/cicd/config.py +++ b/sqlmesh/integrations/github/cicd/config.py @@ -28,6 +28,7 @@ class GithubCICDBotConfig(BaseConfig): pr_include_unmodified: t.Optional[bool] = None run_on_deploy_to_prod: bool = False pr_environment_name: t.Optional[str] = None + pr_min_intervals: t.Optional[int] = None prod_branch_names_: t.Optional[str] = Field(default=None, alias="prod_branch_name") @model_validator(mode="before") diff --git a/sqlmesh/integrations/github/cicd/controller.py b/sqlmesh/integrations/github/cicd/controller.py index 5ae2a763e7..51e23164a1 100644 --- a/sqlmesh/integrations/github/cicd/controller.py +++ b/sqlmesh/integrations/github/cicd/controller.py @@ -402,6 +402,7 @@ def pr_plan(self) -> Plan: skip_linter=True, categorizer_config=self.bot_config.auto_categorize_changes, start=self.bot_config.default_pr_start, + min_intervals=self.bot_config.pr_min_intervals, skip_backfill=self.bot_config.skip_pr_backfill, include_unmodified=self.bot_config.pr_include_unmodified, ) diff --git a/tests/integrations/github/cicd/test_config.py b/tests/integrations/github/cicd/test_config.py index d42a5bdb4f..c100a1fa98 100644 --- a/tests/integrations/github/cicd/test_config.py +++ b/tests/integrations/github/cicd/test_config.py @@ -41,6 +41,7 @@ def test_load_yaml_config_default(tmp_path): assert config.cicd_bot.pr_include_unmodified is None assert config.cicd_bot.pr_environment_name is None assert config.cicd_bot.prod_branch_names == ["main", "master"] + assert not config.cicd_bot.pr_min_intervals def test_load_yaml_config(tmp_path): @@ -64,6 +65,7 @@ def test_load_yaml_config(tmp_path): pr_include_unmodified: true pr_environment_name: "MyOverride" prod_branch_name: testing + pr_min_intervals: 1 model_defaults: dialect: duckdb """, @@ -88,6 +90,7 @@ def test_load_yaml_config(tmp_path): assert config.cicd_bot.pr_include_unmodified assert config.cicd_bot.pr_environment_name == "MyOverride" assert config.cicd_bot.prod_branch_names == ["testing"] + assert config.cicd_bot.pr_min_intervals == 1 def test_load_python_config_defaults(tmp_path): @@ -119,6 +122,7 @@ def test_load_python_config_defaults(tmp_path): assert config.cicd_bot.pr_include_unmodified is None assert config.cicd_bot.pr_environment_name is None assert config.cicd_bot.prod_branch_names == ["main", "master"] + assert not config.cicd_bot.pr_min_intervals def test_load_python_config(tmp_path): @@ -141,6 +145,7 @@ def test_load_python_config(tmp_path): seed=AutoCategorizationMode.FULL, ), default_pr_start="1 week ago", + pr_min_intervals=1, enable_deploy_command=True, skip_pr_backfill=False, pr_include_unmodified=True, @@ -172,6 +177,7 @@ def test_load_python_config(tmp_path): assert config.cicd_bot.pr_include_unmodified assert config.cicd_bot.pr_environment_name == "MyOverride" assert config.cicd_bot.prod_branch_names == ["testing"] + assert config.cicd_bot.pr_min_intervals == 1 def test_validation(tmp_path): diff --git a/tests/integrations/github/cicd/test_github_controller.py b/tests/integrations/github/cicd/test_github_controller.py index d7d4f5343c..bca7c7bd87 100644 --- a/tests/integrations/github/cicd/test_github_controller.py +++ b/tests/integrations/github/cicd/test_github_controller.py @@ -12,6 +12,7 @@ from sqlmesh.core.dialect import parse_one from sqlmesh.core.model import SqlModel from sqlmesh.core.user import User, UserRole +from sqlmesh.core.plan.definition import Plan from sqlmesh.integrations.github.cicd.config import GithubCICDBotConfig, MergeMethod from sqlmesh.integrations.github.cicd.controller import ( BotCommand, @@ -251,6 +252,18 @@ def test_pr_plan_auto_categorization(github_client, make_controller): assert controller._context._run_plan_tests.call_args == call(skip_tests=True) assert controller._pr_plan_builder._categorizer_config == custom_categorizer_config assert controller.pr_plan.start == default_start_absolute + assert not controller.pr_plan.start_override_per_model + + +def test_pr_plan_min_intervals(github_client, make_controller): + controller = make_controller( + "tests/fixtures/github/pull_request_synchronized.json", + github_client, + bot_config=GithubCICDBotConfig(default_pr_start="1 day ago", pr_min_intervals=1), + ) + assert controller.pr_plan.environment.name == "hello_world_2" + assert isinstance(controller.pr_plan, Plan) + assert controller.pr_plan.start_override_per_model def test_prod_plan(github_client, make_controller):