Skip to content

Commit a149d13

Browse files
authored
Feat: Introduce runtime check for identifier limits per engine (#4432)
1 parent d5c3ca7 commit a149d13

5 files changed

Lines changed: 38 additions & 10 deletions

File tree

sqlmesh/core/engine_adapter/base.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class EngineAdapter:
106106
SUPPORTS_REPLACE_TABLE = True
107107
DEFAULT_CATALOG_TYPE = DIALECT
108108
QUOTE_IDENTIFIERS_IN_VIEWS = True
109+
MAX_IDENTIFIER_LENGTH: t.Optional[int] = None
109110

110111
def __init__(
111112
self,
@@ -2138,14 +2139,12 @@ def execute(
21382139
)
21392140
with self.transaction():
21402141
for e in ensure_list(expressions):
2141-
sql = t.cast(
2142-
str,
2143-
(
2144-
self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs)
2145-
if isinstance(e, exp.Expression)
2146-
else e
2147-
),
2148-
)
2142+
if isinstance(e, exp.Expression):
2143+
self._check_identifier_length(e)
2144+
sql = self._to_sql(e, quote=quote_identifiers, **to_sql_kwargs)
2145+
else:
2146+
sql = t.cast(str, e)
2147+
21492148
self._log_sql(
21502149
sql,
21512150
expression=e if isinstance(e, exp.Expression) else None,
@@ -2516,6 +2515,18 @@ def ping(self) -> None:
25162515
def _select_columns(cls, columns: t.Iterable[str]) -> exp.Select:
25172516
return exp.select(*(exp.column(c, quoted=True) for c in columns))
25182517

2518+
def _check_identifier_length(self, expression: exp.Expression) -> None:
2519+
if self.MAX_IDENTIFIER_LENGTH is None or not isinstance(expression, exp.DDL):
2520+
return
2521+
2522+
for identifier in expression.find_all(exp.Identifier):
2523+
name = identifier.name
2524+
name_length = len(name)
2525+
if name_length > self.MAX_IDENTIFIER_LENGTH:
2526+
raise SQLMeshError(
2527+
f"Identifier name '{name}' (length {name_length}) exceeds {self.dialect.capitalize()}'s max identifier limit of {self.MAX_IDENTIFIER_LENGTH} characters"
2528+
)
2529+
25192530

25202531
class EngineAdapterWithIndexSupport(EngineAdapter):
25212532
SUPPORTS_INDEXES = True

sqlmesh/core/engine_adapter/mysql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class MySQLEngineAdapter(
3939
MAX_TABLE_COMMENT_LENGTH = 2048
4040
MAX_COLUMN_COMMENT_LENGTH = 1024
4141
SUPPORTS_REPLACE_TABLE = False
42+
MAX_IDENTIFIER_LENGTH = 64
4243
SCHEMA_DIFFER = SchemaDiffer(
4344
parameterized_type_defaults={
4445
exp.DataType.build("BIT", dialect=DIALECT).this: [(1,)],

sqlmesh/core/engine_adapter/postgres.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class PostgresEngineAdapter(
3434
HAS_VIEW_BINDING = True
3535
CURRENT_CATALOG_EXPRESSION = exp.column("current_catalog")
3636
SUPPORTS_REPLACE_TABLE = False
37+
MAX_IDENTIFIER_LENGTH = 63
3738
SCHEMA_DIFFER = SchemaDiffer(
3839
parameterized_type_defaults={
3940
# DECIMAL without precision is "up to 131072 digits before the decimal point; up to 16383 digits after the decimal point"

sqlmesh/core/engine_adapter/risingwave.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class RisingwaveEngineAdapter(PostgresEngineAdapter):
3030
COMMENT_CREATION_VIEW = CommentCreationView.UNSUPPORTED
3131
SUPPORTS_MATERIALIZED_VIEWS = True
3232
SUPPORTS_TRANSACTIONS = False
33+
MAX_IDENTIFIER_LENGTH = None
3334

3435
def _truncate_table(self, table_name: TableName) -> None:
3536
return self.execute(exp.Delete(this=exp.to_table(table_name)))

tests/core/engine_adapter/integration/test_integration.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import os
55
import pathlib
6+
import re
67
import sys
78
import typing as t
89
import shutil
@@ -1571,7 +1572,6 @@ def test_init_project(ctx: TestContext, tmp_path_factory: pytest.TempPathFactory
15711572

15721573
# normalize object names for snowflake
15731574
if ctx.dialect == "snowflake":
1574-
import re
15751575

15761576
def _normalize_snowflake(name: str, prefix_regex: str = "(sqlmesh__)(.*)"):
15771577
match = re.search(prefix_regex, name)
@@ -1789,7 +1789,6 @@ def test_to_time_column(
17891789
# Clickhouse does not have natively timezone-aware types and does not accept timestrings
17901790
# with UTC offset "+XX:XX". Therefore, we remove the timezone offset and set a timezone-
17911791
# specific data type to validate what is returned.
1792-
import re
17931792

17941793
time_column = re.match(r"^(.*?)\+", time_column).group(1)
17951794
time_column_type = exp.DataType.build("TIMESTAMP('UTC')", dialect="clickhouse")
@@ -2652,3 +2651,18 @@ def execute(
26522651
{"id": 1, "name": "foo"} if ctx.dialect != "snowflake" else {"ID": 1, "NAME": "foo"}
26532652
)
26542653
assert df.iloc[0].to_dict() == expected_result
2654+
2655+
2656+
def test_identifier_length_limit(ctx: TestContext):
2657+
adapter = ctx.engine_adapter
2658+
if adapter.MAX_IDENTIFIER_LENGTH is None:
2659+
pytest.skip(f"Engine {adapter.dialect} does not have identifier length limits set.")
2660+
2661+
long_table_name = "a" * (adapter.MAX_IDENTIFIER_LENGTH + 1)
2662+
2663+
match = f"Identifier name '{long_table_name}' (length {len(long_table_name)}) exceeds {adapter.dialect.capitalize()}'s max identifier limit of {adapter.MAX_IDENTIFIER_LENGTH} characters"
2664+
with pytest.raises(
2665+
SQLMeshError,
2666+
match=re.escape(match),
2667+
):
2668+
adapter.create_table(long_table_name, {"col": exp.DataType.build("int")})

0 commit comments

Comments
 (0)