SQLMesh
diff --git a/‎.circleci/continue_config.yml‎
Lines changed: 3 additions & 0 deletions b/‎.circleci/continue_config.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.prettierignore‎
Lines changed: 1 addition & 1 deletion b/‎.prettierignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 3 additions & 0 deletions b/‎Makefile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmarks/lsp_render_model_bench.py‎
Lines changed: 118 additions & 0 deletions b/‎benchmarks/lsp_render_model_bench.py‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎docs/cloud/features/security/security.md‎
Lines changed: 80 additions & 0 deletions b/‎docs/cloud/features/security/security.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎docs/cloud/features/security/tcloud_hybrid_deployment.png‎
56.5 KB b/‎docs/cloud/features/security/tcloud_hybrid_deployment.png‎
56.5 KB
diff --git a/‎docs/cloud/features/security/tcloud_standard_deployment.png‎
89.7 KB b/‎docs/cloud/features/security/tcloud_standard_deployment.png‎
89.7 KB
diff --git a/‎docs/concepts/macros/macro_variables.md‎
Lines changed: 13 additions & 3 deletions b/‎docs/concepts/macros/macro_variables.md‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎docs/concepts/macros/sqlmesh_macros.md‎
Lines changed: 65 additions & 2 deletions b/‎docs/concepts/macros/sqlmesh_macros.md‎
Lines changed: 65 additions & 2 deletions
diff --git a/‎docs/concepts/models/external_models.md‎
Lines changed: 3 additions & 1 deletion b/‎docs/concepts/models/external_models.md‎
Lines changed: 3 additions & 1 deletion
@@ -93,6 +93,9 @@ jobs:
       - run:
           name: Run linters and code style checks
           command: make py-style
+      - run:
+          name: Exercise the benchmarks
+          command: make benchmark-ci
       - run:
           name: Run cicd tests
           command: make cicd-test
 
@@ -23,7 +23,7 @@ vscode/extension/.vscode-test/
 
 sqlmesh
 docs
-tests
+/tests/**
 examples
 posts
 .circleci
 
@@ -181,3 +181,6 @@ vscode-generate-openapi:
 	python3 web/server/openapi.py --output vscode/openapi.json
 	pnpm run fmt
 	cd vscode/react && pnpm run generate:api
+
+benchmark-ci:
+	python benchmarks/lsp_render_model_bench.py --debug-single-value
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import asyncio
+import pyperf
+import os
+import logging
+from pathlib import Path
+from lsprotocol import types
+
+from sqlmesh.lsp.custom import RenderModelRequest, RENDER_MODEL_FEATURE
+from sqlmesh.lsp.uri import URI
+from pygls.client import JsonRPCClient
+
+# Suppress debug logging during benchmark
+logging.getLogger().setLevel(logging.WARNING)
+
+
+class LSPClient(JsonRPCClient):
+    """A custom LSP client for benchmarking."""
+    
+    def __init__(self):
+        super().__init__()
+        self.render_model_result = None
+        self.initialized = asyncio.Event()
+        
+        # Register handlers for notifications we expect from the server
+        @self.feature(types.WINDOW_SHOW_MESSAGE)
+        def handle_show_message(_):
+            # Silently ignore show message notifications during benchmark
+            pass
+        
+        @self.feature(types.WINDOW_LOG_MESSAGE)
+        def handle_log_message(_):
+            # Silently ignore log message notifications during benchmark
+            pass
+        
+    async def initialize_server(self):
+        """Send initialization request to server."""
+        # Get the sushi example directory
+        sushi_dir = Path(__file__).parent.parent / "examples" / "sushi"
+        
+        response = await self.protocol.send_request_async(
+            types.INITIALIZE,
+            types.InitializeParams(
+                process_id=os.getpid(),
+                root_uri=URI.from_path(sushi_dir).value,
+                capabilities=types.ClientCapabilities(),
+                workspace_folders=[
+                    types.WorkspaceFolder(
+                        uri=URI.from_path(sushi_dir).value,
+                        name="sushi"
+                    )
+                ]
+            )
+        )
+        
+        # Send initialized notification
+        self.protocol.notify(types.INITIALIZED, types.InitializedParams())
+        self.initialized.set()
+        return response
+
+
+async def benchmark_render_model_async(client: LSPClient, model_path: Path):
+    """Benchmark the render_model request."""
+    uri = URI.from_path(model_path).value
+    
+    # Send render_model request
+    result = await client.protocol.send_request_async(
+        RENDER_MODEL_FEATURE,
+        RenderModelRequest(textDocumentUri=uri)
+    )
+    
+    return result
+
+
+def benchmark_render_model(loops):
+    """Synchronous wrapper for the benchmark."""
+    async def run():
+        # Create client
+        client = LSPClient()
+        
+        # Start the SQLMesh LSP server as a subprocess
+        await client.start_io("python", "-m", "sqlmesh.lsp.main")
+        
+        # Initialize the server
+        await client.initialize_server()
+        
+        # Get a model file to test with
+        sushi_dir = Path(__file__).parent.parent / "examples" / "sushi"
+        model_path = sushi_dir / "models" / "customers.sql"
+        
+        # Warm up
+        await benchmark_render_model_async(client, model_path)
+        
+        # Run benchmark
+        t0 = pyperf.perf_counter()
+        for _ in range(loops):
+            await benchmark_render_model_async(client, model_path)
+        dt = pyperf.perf_counter() - t0
+        
+        # Clean up
+        await client.stop()
+        
+        return dt
+    
+    return asyncio.run(run())
+
+
+def main():
+    runner = pyperf.Runner()
+    runner.bench_time_func(
+        "lsp_render_model",
+        benchmark_render_model
+    )
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,80 @@
+# Security Overview
+
+
+At Tobiko, we treat security as a first-class citizen because we know how valuable your data assets are. Our team follows and executes security best practices across each layer of our product. 
+
+## Tobiko Cloud Standard Deployment
+
+Our standard Tobiko Cloud deployment consists of several components that are each responsible for different parts of the product. 
+
+Below is a diagram of the components along with their descriptions. 
+
+![tobiko_cloud_standard_deployment](./tcloud_standard_deployment.png){ width=80% height=60% style="display: block; margin: 0 auto" }
+
+- **Scheduler**: Orchestrates schedule cadence and hosts state metadata (code versions, logs, cost)
+- **Executor**: Applies code changes and runs SQL queries (actual data processing in SQL Engine) and Python models in proper DAG order.
+- **Gateway**: Stores authentication credentials for SQL Engine. Secured through encryption.
+- **SQL Engine**: Processes and stores data based on the above instructions within the **customer’s** environment.
+
+## Tobiko Cloud Hybrid Deployment
+
+For some customers, our hybrid deployment option is a great fit. It provides a seamless experience with Tobiko Cloud but within your own VPC and infrastructure.  
+
+In a hybrid deployment, Tobiko Cloud does not execute tasks directly with the engine. Instead, it passes tasks to the executors hosted in your environment, which then execute the tasks with the engine. 
+
+Executors are Docker containers that connect to both Tobiko Cloud and your SQL engine. They pull work tasks from the Tobiko Cloud scheduler and execute them with your SQL engine. This is a pull-only mechanism authenticated through an OAuth Client ID/Secret. Whitelist IPs in your network to allow reaching Tobiko Cloud IPs from the executor: 34.28.17.91, 34.136.27.153, 34.136.131.20
+
+Below is a diagram of the components along with their description. 
+
+![tobiko_cloud_hybrid_deployment](./tcloud_hybrid_deployment.png){ width=80% height=60% style="display: block; margin: 0 auto" }
+
+- **Scheduler**: Orchestrates schedule cadence and hosts state metadata (code versions, logs, cost). **Never pushes** instructions to executor.
+- **Executor**: Appplies code changes and runs SQL queries and Python models in proper DAG order (actual data processing in SQL Engine)
+- **Gateway**: Stores authentication credentials for SQL Engine. Secured through your secrets manager or Kubernetes Secrets.
+- **SQL Engine**: Processes and stores data based on the above instructions
+- **Executor -> Scheduler**: A pull-only mechanism for obtaining work tasks. 
+- **Helm Chart**: For production environements, we provide a [Helm chart](../scheduler/hybrid_executors_helm.md) that includes robust configurability, secret management, and scaling options.
+- **Docker Compose**: For simpler environments or testing, we offer a [Docker Compose setup](../scheduler/hybrid_executors_docker_compose.md) to quickly deploy executors on any machine with Docker.
+
+
+
+## Internal Code Practices
+
+We enforce coding standards throughout Tobiko to write, maintain, and collaborate on code effectively. These practice ensure consistency, maintainability, reliability, and most importantly, trust. 
+
+A few key components of our internal code requirements:
+
+- We used signed Git commits, required approvers, and signed Docker artifacts.
+- Each commit to a `main` branch must be approved by someone other than the author.
+- We sign commits and register the key with GitHub ([Github Docs](https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits)).
+- Binaries are signed using cosign and OIDC for keyless ([Signing docs](https://docs.sigstore.dev/cosign/signing/overview/)).
+- Attestations are created to certify an image, enforced with GCP Binary Authorization ([Attestation docs](https://cloud.google.com/binary-authorization/docs/key-concepts#attestations)).
+- Encryption is a key feature of our security posture and is enforced at each stage of access. For example, the state database automatically encrypts all data. Credentials are also securely encrypted and stored. 
+- We back up each state database nightly and before upgrades. These backups are stored for 14 days.
+
+## Penetration Testing
+
+At least once a year, Tobiko engages a third-party security firm to perform a penetration test. This test evaluates our systems by identifying and attempting to exploit known vulnerabilities, focusing on critical external and/or internal assets. A detailed report is available upon request.
+ 
+
+## Asset and Access Management 
+
+### How do we protect PGP keys?
+
+If an employee loses their laptop, we don't need to get the old PGP key back because we can invalidate the key directly. 
+
+We use GitHub to sign code commits. At the time the code was committed, the PGP key was valid. When an employee loses their laptop, we will invalidate it, and they will regenerate a new key to use in future commits. The old commits are still valid because the PGP key was valid at the time the commit was made.
+
+### How do we invalidate PGP keys if someone did steal it and could potentially use it?
+
+We would revoke access for the GitHub user account associated with the compromised key and not give it access again until the old PGP key is deprecated and a new key issued.
+
+### If someone steals a laptop, what's our continuity plan in protecting code?
+
+- All employee devices are monitored for proper encryption and password policies.
+- Laptop protection is enforced through file encryption via Vanta.
+- Mandatory lock screen after a timeout.
+- We follow a formal IT asset disposal procedure to prevent key compromise through improper hardware disposal.
+- See above for PGP key protection.
+- Binaries are signed using Cosign and OIDC for keyless signing.
+
@@ -130,11 +130,21 @@ SQLMesh provides additional predefined variables used to modify model behavior b
     * 'auditing' - The audit is being run.
     * 'testing' - The model query logic is being evaluated in the context of a unit test.
 * @gateway - A string value containing the name of the current [gateway](../../guides/connections.md).
-* @this_model - A string value containing the name of the physical table the model view selects from. Typically used to create [generic audits](../audits.md#generic-audits). In the case of [on_virtual_update statements](../models/sql_models.md#optional-on-virtual-update-statements) it contains the qualified view name instead.
-    * Can be used in model definitions when SQLGlot cannot fully parse a statement and you need to reference the model's underlying physical table directly.
-    * Can be passed as an argument to macros that access or interact with the underlying physical table.
+* @this_model - The physical table name that the model's view selects from. Typically used to create [generic audits](../audits.md#generic-audits). When used in [on_virtual_update statements](../models/sql_models.md#optional-on-virtual-update-statements), it contains the qualified view name instead.
 * @model_kind_name - A string value containing the name of the current model kind. Intended to be used in scenarios where you need to control the [physical properties in model defaults](../../reference/model_configuration.md#model-defaults).
 
+!!! note "Embedding variables in strings"
+
+    Macro variable references sometimes use the curly brace syntax `@{variable}`, which serves a different purpose than the regular `@variable` syntax.
+
+    The curly brace syntax tells SQLMesh that the rendered string should be treated as an identifier, instead of simply replacing the macro variable value.
+
+    For example, if `variable` is defined as `@DEF(`variable`, foo.bar)`, then `@variable` produces `foo.bar`, while `@{variable}` produces `"foo.bar"`. This is because SQLMesh converts `foo.bar` into an identifier, using double quotes to correctly include the `.` character in the identifier name.
+
+    In practice, `@{variable}` is most commonly used to interpolate a value within an identifier, e.g., `@{variable}_suffix`, whereas `@variable` is used to do plain substitutions for string literals.
+
+    Learn more [above](#embedding-variables-in-strings).
+
 #### Before all and after all variables
 
 The following variables are also available in [`before_all` and `after_all` statements](../../guides/configuration.md#before_all-and-after_all-statements), as well as in macros invoked within them.
 
@@ -38,6 +38,59 @@ It uses the following five step approach to accomplish this:
 
 5. Modify the semantic representation of the SQL query with the substituted variable values from (3) and functions from (4).
 
+### Embedding variables in strings
+
+SQLMesh always incorporates macro variable values into the semantic representation of a SQL query (step 5 above). To do that, it infers the role each macro variable value plays in the query.
+
+For context, two commonly used types of string in SQL are:
+
+- String literals, which represent text values and are surrounded by single quotes, such as `'the_string'`
+- Identifiers, which reference database objects like column, table, alias, and function names
+    - They may be unquoted or quoted with double quotes, backticks, or brackets, depending on the SQL dialect
+
+In a normal query, SQLMesh can easily determine which role a given string is playing. However, it is more difficult if a macro variable is embedded directly into a string - especially if the string is in the `MODEL` block (and not the query itself).
+
+For example, consider a project that defines a [gateway variable](#gateway-variables) named `gateway_var`. The project includes a model that references `@gateway_var` as part of the schema in the model's `name`, which is a SQL *identifier*.
+
+This is how we might try to write the model:
+
+``` sql title="Incorrectly rendered to string literal"
+MODEL (
+  name the_@gateway_var_schema.table
+);
+```
+
+From SQLMesh's perspective, the model schema is the combination of three sub-strings: `the_`, the value of `@gateway_var`, and `_schema`.
+
+SQLMesh will concatenate those strings, but it does not have the context to know that it is building a SQL identifier and will return a string literal.
+
+To provide the context SQLMesh needs, you must add curly braces to the macro variable reference: `@{gateway_var}` instead of `@gateway_var`:
+
+``` sql title="Correctly rendered to identifier"
+MODEL (
+  name the_@{gateway_var}_schema.table
+);
+```
+
+The curly braces let SQLMesh know that it should treat the string as a SQL identifier, which it will then quote based on the SQL dialect's quoting rules.
+
+The most common use of the curly brace syntax is embedding macro variables into strings, it can also be used to differentiate string literals and identifiers in SQL queries. For example, consider a macro variable `my_variable` whose value is `col`.
+
+If we `SELECT` this value with regular macro syntax, it will render to a string literal:
+
+``` sql
+SELECT @my_variable AS the_column; -- renders to SELECT 'col' AS the_column
+```
+
+`'col'` is surrounded with single quotes, and the SQL engine will use that string as the column's data value.
+
+If we use curly braces, SQLMesh will know that we want to use the rendered string as an identifier:
+
+``` sql
+SELECT @{my_variable} AS the_column; -- renders to SELECT col AS the_column
+```
+
+`col` is not surrounded with single quotes, and the SQL engine will determine that the query is referencing a column or other object named `col`.
 
 ## User-defined variables
 
@@ -174,6 +227,8 @@ SELECT
 FROM @customer.some_source
 ```
 
+Note the use of both regular `@field_a` and curly brace syntax `@{field_b}` macro variable references in the model query. Learn more [above](#embedding-variables-in-strings)
+
 Blueprint variables can be accessed using the syntax shown above, or through the `@BLUEPRINT_VAR()` macro function, which also supports specifying default values in case the variable is undefined (similar to `@VAR()`).
 
 ### Local variables
@@ -448,7 +503,13 @@ FROM table
 
 This syntax works regardless of whether the array values are quoted or not.
 
-NOTE: SQLMesh macros support placing macro values at the end of a column name simply using `column_@x`. However if you wish to substitute the variable anywhere else in the identifier, you need to use the more explicit substitution syntax `@{}`. This avoids ambiguity. These are valid uses: `@{x}_column` or `my_@{x}_column`.
+!!! note "Embedding macros in strings"
+
+    SQLMesh macros support placing macro values at the end of a column name using `column_@x`.
+
+    However, if you wish to substitute the variable anywhere else in the identifier, you need to use the more explicit curly brace syntax `@{}` to avoid ambiguity. For example, these are valid uses: `@{x}_column` or `my_@{x}_column`.
+
+    Learn more about embedding macros in strings [above](#embedding-variables-in-strings)
 
 ### @IF
 
@@ -1087,7 +1148,9 @@ The `template` can contain the following placeholders that will be substituted:
   - `@{schema_name}` - The name of the physical schema that SQLMesh is using for the model version table, eg `sqlmesh__landing`
   - `@{table_name}` - The name of the physical table that SQLMesh is using for the model version, eg `landing__customers__2517971505`
 
-It can be used in a `MODEL` block:
+Note the use of the curly brace syntax `@{}` in the template placeholders - learn more [above](#embedding-variables-in-strings).
+
+The `@resolve_template` macro can be used in a `MODEL` block:
 
 ```sql linenums="1" hl_lines="5"
 MODEL (
 
@@ -70,7 +70,9 @@ FROM
   @{gateway}_db.external_table;
 ```
 
-This table will be named differently depending on which `--gateway` SQLMesh is run with. For example:
+This table will be named differently depending on which `--gateway` SQLMesh is run with (learn more about the curly brace `@{gateway}` syntax [here](../../concepts/macros/sqlmesh_macros.md#embedding-variables-in-strings)).
+
+For example:
 
 - `sqlmesh --gateway dev plan` - SQLMesh will try to query `dev_db.external_table`
 - `sqlmesh --gateway prod plan` - SQLMesh will try to query `prod_db.external_table`