SQLMesh
diff --git a/‎.circleci/continue_config.yml‎
Lines changed: 2 additions & 0 deletions b/‎.circleci/continue_config.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.circleci/manage-test-db.sh‎
Lines changed: 26 additions & 0 deletions b/‎.circleci/manage-test-db.sh‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 3 additions & 0 deletions b/‎Makefile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎docs/concepts/models/model_kinds.md‎
Lines changed: 8 additions & 40 deletions b/‎docs/concepts/models/model_kinds.md‎
Lines changed: 8 additions & 40 deletions
diff --git a/‎sqlmesh/core/config/connection.py‎
Lines changed: 0 additions & 6 deletions b/‎sqlmesh/core/config/connection.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎sqlmesh/core/engine_adapter/base.py‎
Lines changed: 2 additions & 45 deletions b/‎sqlmesh/core/engine_adapter/base.py‎
Lines changed: 2 additions & 45 deletions
diff --git a/‎sqlmesh/core/engine_adapter/bigquery.py‎
Lines changed: 45 additions & 11 deletions b/‎sqlmesh/core/engine_adapter/bigquery.py‎
Lines changed: 45 additions & 11 deletions
@@ -246,6 +246,7 @@ jobs:
             echo "export SNOWFLAKE_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV"
             echo "export DATABRICKS_CATALOG='$TEST_DB_NAME'" >> "$BASH_ENV"
             echo "export REDSHIFT_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV"
+            echo "export GCP_POSTGRES_DATABASE='$TEST_DB_NAME'" >> "$BASH_ENV"
       - run:
           name: Create test database
           command: ./.circleci/manage-test-db.sh << parameters.engine >> "$TEST_DB_NAME" up
@@ -305,6 +306,7 @@ workflows:
                 - athena
                 # todo: enable fabric when cicd catalog create/drop implemented in manage-test-db.sh
                 #- fabric
+                - gcp-postgres
           filters:
             branches:
               only:
 
@@ -109,6 +109,32 @@ clickhouse-cloud_init() {
     echo "Clickhouse Cloud instance $CLICKHOUSE_CLOUD_HOST is up and running"
 }
 
+# GCP Postgres
+gcp-postgres_init() {
+    # Download and start Cloud SQL Proxy
+    curl -fsSL -o cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.18.0/cloud-sql-proxy.linux.amd64
+    chmod +x cloud-sql-proxy
+    echo "$GCP_POSTGRES_KEYFILE_JSON" > /tmp/keyfile.json
+    ./cloud-sql-proxy --credentials-file /tmp/keyfile.json $GCP_POSTGRES_INSTANCE_CONNECTION_STRING &
+
+    # Wait for proxy to start
+    sleep 5
+}
+
+gcp-postgres_exec() {
+    PGPASSWORD=$GCP_POSTGRES_PASSWORD psql -h 127.0.0.1 -U $GCP_POSTGRES_USER -c "$1" postgres
+}
+
+gcp-postgres_up() {
+    gcp-postgres_exec "create database $1"
+}
+
+gcp-postgres_down() {
+    gcp-postgres_exec "drop database $1"
+}
+
+
+
 INIT_FUNC="${ENGINE}_init"
 UP_FUNC="${ENGINE}_up"
 DOWN_FUNC="${ENGINE}_down"
 
@@ -176,6 +176,9 @@ athena-test: guard-AWS_ACCESS_KEY_ID guard-AWS_SECRET_ACCESS_KEY guard-ATHENA_S3
 fabric-test: guard-FABRIC_HOST guard-FABRIC_CLIENT_ID guard-FABRIC_CLIENT_SECRET guard-FABRIC_DATABASE engine-fabric-install
 	pytest -n auto -m "fabric" --retries 3 --junitxml=test-results/junit-fabric.xml	
 
+gcp-postgres-test: guard-GCP_POSTGRES_INSTANCE_CONNECTION_STRING guard-GCP_POSTGRES_USER guard-GCP_POSTGRES_PASSWORD guard-GCP_POSTGRES_KEYFILE_JSON engine-gcppostgres-install
+	pytest -n auto -m "gcp_postgres" --retries 3 --junitxml=test-results/junit-gcp-postgres.xml
+
 vscode_settings:
 	mkdir -p .vscode
 	cp -r ./tooling/vscode/*.json .vscode/
 
@@ -935,13 +935,7 @@ SQLMesh achieves this by adding a `valid_from` and `valid_to` column to your mod
 
 Therefore, you can use these models to not only tell you what the latest value is for a given record but also what the values were anytime in the past. Note that maintaining this history does come at a cost of increased storage and compute and this may not be a good fit for sources that change frequently since the history could get very large.
 
-**Note**: SCD Type 2 models support [restatements](../plans.md#restatement-plans) with specific limitations:
-
-- **Full restatements**: The entire table will be recreated from scratch when no start date is specified
-- **Partial restatements**: You can specify a start date to restate data from a certain point onwards to the latest interval. The end date will always be set to the latest interval's end date, regardless of what end date you specify
-- **Partial sections**: Restatements of specific sections (discontinued ranges) of the table are not supported
-
-Data restatement is disabled for models of this kind by default (`disable_restatement true`). To enable restatements, set `disable_restatement false` in your model configuration.
+**Note**: Partial data [restatement](../plans.md#restatement-plans) is not supported for this model kind, which means that the entire table will be recreated from scratch if restated. This may lead to data loss, so data restatement is disabled for models of this kind by default.
 
 There are two ways to tracking changes: By Time (Recommended) or By Column.
 
@@ -1289,11 +1283,11 @@ This is the most accurate representation of the menu based on the source data pr
 
 ### Processing Source Table with Historical Data
 
-The most common case for SCD Type 2 is creating history for a table that it doesn't have it already.
+The most common case for SCD Type 2 is creating history for a table that it doesn't have it already. 
 In the example of the restaurant menu, the menu just tells you what is offered right now, but you want to know what was offered over time.
 In this case, the default setting of `None` for `batch_size` is the best option.
 
-Another use case though is processing a source table that already has history in it.
+Another use case though is processing a source table that already has history in it. 
 A common example of this is a "daily snapshot" table that is created by a source system that takes a snapshot of the data at the end of each day.
 If your source table has historical records, like a "daily snapshot" table, then set `batch_size` to `1` to process each interval (each day if a `@daily` cron) in sequential order.
 That way the historical records will be properly captured in the SCD Type 2 table.
@@ -1439,14 +1433,11 @@ GROUP BY
   id
 ```
 
-### SCD Type 2 Restatements
+### Reset SCD Type 2 Model (clearing history)
 
 SCD Type 2 models are designed by default to protect the data that has been captured because it is not possible to recreate the history once it has been lost.
 However, there are cases where you may want to clear the history and start fresh.
-
-#### Enabling Restatements
-
-To enable restatements for an SCD Type 2 model, set `disable_restatement` to `false` in the model definition:
+For this use use case you will want to start by setting `disable_restatement` to `false` in the model definition.
 
 ```sql linenums="1" hl_lines="5"
 MODEL (
@@ -1458,39 +1449,16 @@ MODEL (
 );
 ```
 
-#### Full Restatements (Clearing All History)
-
-To clear all history and recreate the entire table from scratch:
+Plan/apply this change to production.
+Then you will want to [restate the model](../plans.md#restatement-plans).
 
 ```bash
 sqlmesh plan --restate-model db.menu_items
 ```
 
 !!! warning
 
-    This will remove **all** historical data on the model which in most situations cannot be recovered.
-
-#### Partial Restatements (From a Specific Date)
-
-You can restate data from a specific start date onwards. This will:
-- Delete all records with `valid_from >= start_date`
-- Reprocess the data from the start date to the latest interval
-
-```bash
-sqlmesh plan --restate-model db.menu_items --start "2023-01-15"
-```
-
-!!! note
-
-    If you specify an end date for SCD Type 2 restatements, it will be ignored and automatically set to the latest interval's end date.
-
-```bash
-# This end date will be ignored and set to the latest interval
-sqlmesh plan --restate-model db.menu_items --start "2023-01-15" --end "2023-01-20"
-```
-
-
-#### Re-enabling Protection
+    This will remove the historical data on the model which in most situations cannot be recovered.
 
 Once complete you will want to remove `disable_restatement` on the model definition which will set it back to `true` and prevent accidental data loss.
 
 
@@ -1216,12 +1216,6 @@ def _validate_auth_method(cls, data: t.Any) -> t.Any:
         password = data.get("password")
         enable_iam_auth = data.get("enable_iam_auth")
 
-        if password and enable_iam_auth:
-            raise ConfigError(
-                "Invalid GCP Postgres connection configuration - both password and"
-                " enable_iam_auth set. Use password when connecting to a postgres"
-                " user and enable_iam_auth 'True' when connecting to an IAM user."
-            )
         if not password and not enable_iam_auth:
             raise ConfigError(
                 "GCP Postgres connection configuration requires either password set"
 
@@ -1497,7 +1497,6 @@ def scd_type_2_by_time(
         table_description: t.Optional[str] = None,
         column_descriptions: t.Optional[t.Dict[str, str]] = None,
         truncate: bool = False,
-        is_restatement: bool = False,
         **kwargs: t.Any,
     ) -> None:
         self._scd_type_2(
@@ -1514,7 +1513,6 @@ def scd_type_2_by_time(
             table_description=table_description,
             column_descriptions=column_descriptions,
             truncate=truncate,
-            is_restatement=is_restatement,
             **kwargs,
         )
 
@@ -1533,7 +1531,6 @@ def scd_type_2_by_column(
         table_description: t.Optional[str] = None,
         column_descriptions: t.Optional[t.Dict[str, str]] = None,
         truncate: bool = False,
-        is_restatement: bool = False,
         **kwargs: t.Any,
     ) -> None:
         self._scd_type_2(
@@ -1550,7 +1547,6 @@ def scd_type_2_by_column(
             table_description=table_description,
             column_descriptions=column_descriptions,
             truncate=truncate,
-            is_restatement=is_restatement,
             **kwargs,
         )
 
@@ -1561,7 +1557,6 @@ def _scd_type_2(
         unique_key: t.Sequence[exp.Expression],
         valid_from_col: exp.Column,
         valid_to_col: exp.Column,
-        start: TimeLike,
         execution_time: t.Union[TimeLike, exp.Column],
         invalidate_hard_deletes: bool = True,
         updated_at_col: t.Optional[exp.Column] = None,
@@ -1572,7 +1567,6 @@ def _scd_type_2(
         table_description: t.Optional[str] = None,
         column_descriptions: t.Optional[t.Dict[str, str]] = None,
         truncate: bool = False,
-        is_restatement: bool = False,
         **kwargs: t.Any,
     ) -> None:
         def remove_managed_columns(
@@ -1757,17 +1751,9 @@ def remove_managed_columns(
         existing_rows_query = exp.select(*table_columns, exp.true().as_("_exists")).from_(
             target_table
         )
-
         if truncate:
             existing_rows_query = existing_rows_query.limit(0)
 
-        # Only set cleanup_ts if is_restatement is True and truncate is False (this to enable full restatement)
-        cleanup_ts = (
-            to_time_column(start, time_data_type, self.dialect, nullable=True)
-            if is_restatement and not truncate
-            else None
-        )
-
         with source_queries[0] as source_query:
             prefixed_columns_to_types = []
             for column in columns_to_types:
@@ -1804,41 +1790,12 @@ def remove_managed_columns(
                 # Historical Records that Do Not Change
                 .with_(
                     "static",
-                    existing_rows_query.where(valid_to_col.is_(exp.Null()).not_())
-                    if cleanup_ts is None
-                    else existing_rows_query.where(
-                        exp.and_(
-                            valid_to_col.is_(exp.Null().not_()),
-                            valid_to_col < cleanup_ts,
-                        ),
-                    ),
+                    existing_rows_query.where(valid_to_col.is_(exp.Null()).not_()),
                 )
                 # Latest Records that can be updated
                 .with_(
                     "latest",
-                    existing_rows_query.where(valid_to_col.is_(exp.Null()))
-                    if cleanup_ts is None
-                    else exp.select(
-                        *(
-                            to_time_column(
-                                exp.null(), time_data_type, self.dialect, nullable=True
-                            ).as_(col)
-                            if col == valid_to_col.name
-                            else exp.column(col)
-                            for col in columns_to_types
-                        ),
-                        exp.true().as_("_exists"),
-                    )
-                    .from_(target_table)
-                    .where(
-                        exp.and_(
-                            valid_from_col <= cleanup_ts,
-                            exp.or_(
-                                valid_to_col.is_(exp.null()),
-                                valid_to_col >= cleanup_ts,
-                            ),
-                        )
-                    ),
+                    existing_rows_query.where(valid_to_col.is_(exp.Null())),
                 )
                 # Deleted records which can be used to determine `valid_from` for undeleted source records
                 .with_(
 
@@ -33,6 +33,7 @@
     from google.cloud import bigquery
     from google.cloud.bigquery import StandardSqlDataType
     from google.cloud.bigquery.client import Client as BigQueryClient
+    from google.cloud.bigquery.job import QueryJob
     from google.cloud.bigquery.job.base import _AsyncJob as BigQueryQueryResult
     from google.cloud.bigquery.table import Table as BigQueryTable
 
@@ -186,6 +187,31 @@ def query_factory() -> Query:
             )
         ]
 
+    def close(self) -> t.Any:
+        # Cancel all pending query jobs across all threads
+        all_query_jobs = self._connection_pool.get_all_attributes("query_job")
+        for query_job in all_query_jobs:
+            if query_job:
+                try:
+                    if not self._db_call(query_job.done):
+                        self._db_call(query_job.cancel)
+                        logger.debug(
+                            "Cancelled BigQuery job: https://console.cloud.google.com/bigquery?project=%s&j=bq:%s:%s",
+                            query_job.project,
+                            query_job.location,
+                            query_job.job_id,
+                        )
+                except Exception as ex:
+                    logger.debug(
+                        "Failed to cancel BigQuery job: https://console.cloud.google.com/bigquery?project=%s&j=bq:%s:%s. %s",
+                        query_job.project,
+                        query_job.location,
+                        query_job.job_id,
+                        str(ex),
+                    )
+
+        return super().close()
+
     def _begin_session(self, properties: SessionProperties) -> None:
         from google.cloud.bigquery import QueryJobConfig
 
@@ -318,7 +344,10 @@ def create_mapping_schema(
         if len(table.parts) == 3 and "." in table.name:
             # The client's `get_table` method can't handle paths with >3 identifiers
             self.execute(exp.select("*").from_(table).limit(0))
-            query_results = self._query_job._query_results
+            query_job = self._query_job
+            assert query_job is not None
+
+            query_results = query_job._query_results
             columns = create_mapping_schema(query_results.schema)
         else:
             bq_table = self._get_table(table)
@@ -717,7 +746,9 @@ def _fetch_native_df(
         self, query: t.Union[exp.Expression, str], quote_identifiers: bool = False
     ) -> DF:
         self.execute(query, quote_identifiers=quote_identifiers)
-        return self._query_job.to_dataframe()
+        query_job = self._query_job
+        assert query_job is not None
+        return query_job.to_dataframe()
 
     def _create_column_comments(
         self,
@@ -1021,20 +1052,23 @@ def _execute(
             job_config=job_config,
             timeout=self._extra_config.get("job_creation_timeout_seconds"),
         )
+        query_job = self._query_job
+        assert query_job is not None
 
         logger.debug(
             "BigQuery job created: https://console.cloud.google.com/bigquery?project=%s&j=bq:%s:%s",
-            self._query_job.project,
-            self._query_job.location,
-            self._query_job.job_id,
+            query_job.project,
+            query_job.location,
+            query_job.job_id,
         )
 
         results = self._db_call(
-            self._query_job.result,
+            query_job.result,
             timeout=self._extra_config.get("job_execution_timeout_seconds"),  # type: ignore
         )
+
         self._query_data = iter(results) if results.total_rows else iter([])
-        query_results = self._query_job._query_results
+        query_results = query_job._query_results
         self.cursor._set_rowcount(query_results)
         self.cursor._set_description(query_results.schema)
 
@@ -1198,23 +1232,23 @@ def _query_data(self) -> t.Any:
 
     @_query_data.setter
     def _query_data(self, value: t.Any) -> None:
-        return self._connection_pool.set_attribute("query_data", value)
+        self._connection_pool.set_attribute("query_data", value)
 
     @property
-    def _query_job(self) -> t.Any:
+    def _query_job(self) -> t.Optional[QueryJob]:
         return self._connection_pool.get_attribute("query_job")
 
     @_query_job.setter
     def _query_job(self, value: t.Any) -> None:
-        return self._connection_pool.set_attribute("query_job", value)
+        self._connection_pool.set_attribute("query_job", value)
 
     @property
     def _session_id(self) -> t.Any:
         return self._connection_pool.get_attribute("session_id")
 
     @_session_id.setter
     def _session_id(self, value: t.Any) -> None:
-        return self._connection_pool.set_attribute("session_id", value)
+        self._connection_pool.set_attribute("session_id", value)
 
 
 class _ErrorCounter: