From 78d9e32dc4ce29c981e5ac2d797fd42ad47160a7 Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 13:18:17 -0500 Subject: [PATCH 1/7] feat(unigraph): materialize searchable __canonical_name_prefix column Add a length-capped (64 code point) materialized `domains.__canonical_name_prefix` column to back left-anchored/substring search and NAME ordering, replacing the `left(canonical_name, 256)` expression index. Direct-SQL consumers can now `WHERE __canonical_name_prefix LIKE 'vit%' ORDER BY __canonical_name_prefix` without replicating the expression. - ensdb-sdk: column (explicit DB name to preserve the `__`), shared CANONICAL_NAME_PREFIX_LENGTH + truncateCanonicalNamePrefix, repointed composite btree + GIN trigram onto the prefix column; hash on canonical_name kept for eq/in. - ensindexer: maintain the column in all three canonicality write paths (ensureDomainInRegistry, cascadeCanonicality, cascadeLabelHeal). - ensapi: Omnigraph name.starts_with + NAME order/cursor target the prefix column; canonical_name still returned everywhere and used for exact matches. - docs: schema-reference column/index + prefix-search example. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../searchable-canonical-name-prefix.md | 7 +++ .../find-domains-resolver-helpers.ts | 47 ++++------------ .../lib/find-domains/find-domains-resolver.ts | 4 +- .../src/lib/ensv2/canonicality-db-helpers.ts | 34 +++++++++--- .../unigraph/examples/domain-by-name.mdx | 14 +++++ .../integrate/unigraph/schema-reference.mdx | 5 +- .../ensindexer-abstract/unigraph.schema.ts | 55 ++++++++++++++----- 7 files changed, 104 insertions(+), 62 deletions(-) create mode 100644 .changeset/searchable-canonical-name-prefix.md diff --git a/.changeset/searchable-canonical-name-prefix.md b/.changeset/searchable-canonical-name-prefix.md new file mode 100644 index 000000000..3b435370b --- /dev/null +++ b/.changeset/searchable-canonical-name-prefix.md @@ -0,0 +1,7 @@ +--- +"@ensnode/ensdb-sdk": patch +"ensindexer": patch +"ensapi": patch +--- + +Add a materialized `domains.__canonical_name_prefix` column — the first 64 code points of `canonical_name` — to back left-anchored / substring search and NAME ordering. Direct-SQL consumers can now `WHERE __canonical_name_prefix LIKE 'vit%' ORDER BY __canonical_name_prefix` instead of replicating the previous `left(canonical_name, 256)` expression index. `canonical_name` is unchanged and remains the column for exact (`=` / `IN`) matches and display; the Omnigraph `name.starts_with` filter now targets the prefix column while continuing to return `canonical_name`. Reindex required. diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts index 513950f54..c5c273d70 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts @@ -1,46 +1,23 @@ import { asc, desc, type SQL, sql } from "drizzle-orm"; +import { truncateCanonicalNamePrefix } from "@ensnode/ensdb-sdk/ensindexer-abstract"; + import di from "@/di"; import type { DomainCursor } from "@/omnigraph-api/lib/find-domains/domain-cursor"; import type { DomainsOrderBy } from "@/omnigraph-api/schema/domain-inputs"; import type { OrderDirection } from "@/omnigraph-api/schema/order-direction"; /** - * Length cap (in characters) of the `canonical_name` prefix used by: - * 1. the `(registry_id, left(canonical_name, N), id)` composite btree on `domains`, - * 2. all NAME-ordered queries' ORDER BY expressions, and - * 3. the value stored in `DomainCursor.value` when ordering by NAME — pre-truncated at - * encode time via {@link truncateNameForCursor} so filter-time comparisons are simple - * tuple compares against the index expression with no per-row `left(...)` re-application. - * - * The btree per-tuple max is ~2712 bytes; with `registry_id` and `id` consuming ~240 bytes of - * that, ~2400 bytes remain for the prefix expression. 256 chars × max 4-byte UTF-8 codepoint = - * 1024 bytes, well under the limit and within the realm of reasonable name lengths (mainnet avg - * is ~126). Queries MUST sort by this same expression for the planner to use the index for - * ordered scan; raw `canonical_name` ORDER BY falls back to a full scan + sort. - * - * An alternative solution is to redefine InterpretedLabel to enforce a maximum byte length of 255 before - * being truncated into an Encoded LabelHash — this mirrors a name's resolvability (must be dns-encodable) - * and allows us to avoid storing spam names. Then we'd also have to produce a b-tree-indexed - * materializedCanonicalName field that's length-capped as well to fit the btree index. Then we could - * query against that column instead of the full InterpretedName. All of that would avoid this - * LEFT(...) expression index and the necessity for the query pattern to match the defined index - * (to avoid the full scan). - */ -export const CANONICAL_NAME_SORT_PREFIX = 256; - -/** - * Truncate a `canonicalName` to the cursor / index prefix length. Used when writing the cursor - * value for NAME orderings — callers slice once at encode time so the encoded cursor stays small - * (long names can hit thousands of characters) and `cursorFilter` can compare directly against - * the index expression without re-applying `left(...)` per row. + * Truncate a `canonicalName` to the materialized `__canonical_name_prefix` length when writing the + * `DomainCursor.value` of NAME orderings. Pre-truncating once at encode time keeps the encoded + * cursor small (long names hit thousands of characters) and lets `cursorFilter` compare directly + * against the `__canonical_name_prefix` column with no per-row `left(...)`. * - * Uses code-point iteration (`[...name]`) rather than `String.slice`, which counts UTF-16 code - * units and would split surrogate pairs. Postgres `left(text, N)` counts characters (code - * points), so this keeps the JS-side and DB-side prefixes byte-identical. + * Delegates to {@link truncateCanonicalNamePrefix} so the cursor prefix is byte-identical to the + * column the NAME index sorts on. */ export function truncateNameForCursor(name: string | null): string | null { - return name === null ? null : [...name].slice(0, CANONICAL_NAME_SORT_PREFIX).join(""); + return truncateCanonicalNamePrefix(name); } /** @@ -54,7 +31,7 @@ function getOrderColumn(orderBy: typeof DomainsOrderBy.$inferType): SQL { const { ensIndexerSchema } = di.context; switch (orderBy) { case "NAME": - return sql`left(${ensIndexerSchema.domain.canonicalName}, ${sql.raw(String(CANONICAL_NAME_SORT_PREFIX))})`; + return sql`${ensIndexerSchema.domain.__canonicalNamePrefix}`; case "DEPTH": return sql`${ensIndexerSchema.domain.canonicalDepth}`; case "REGISTRATION_TIMESTAMP": @@ -117,8 +94,8 @@ export function cursorFilter( const value = (() => { switch (cursor.by) { case "NAME": - // Already pre-truncated at encode time (see `truncateNameForCursor`), so this matches - // the index expression `left(canonical_name, CANONICAL_NAME_SORT_PREFIX)` directly. + // Already pre-truncated at encode time (see `truncateNameForCursor`), so this matches the + // `__canonical_name_prefix` column the NAME order sorts on directly. return sql`${cursor.value}::text`; case "DEPTH": return sql`${cursor.value}::int`; diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts index 0fdb08f2f..d3b8651de 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts @@ -65,7 +65,9 @@ const VERSION_TO_DOMAIN_TYPE: Record< function nameCondition(filter: typeof DomainsNameFilter.$inferInput): SQL { const { ensIndexerSchema } = di.context; if (filter.starts_with) { - return ilike(ensIndexerSchema.domain.canonicalName, `${filter.starts_with}%`); + // prefix / substring search runs against the materialized, length-capped prefix column (backed + // by its GIN trigram index); exact `eq`/`in` below stay on the full `canonicalName`. + return ilike(ensIndexerSchema.domain.__canonicalNamePrefix, `${filter.starts_with}%`); } if (filter.eq) { diff --git a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts index e4ba0934c..c2e963abf 100644 --- a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts +++ b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts @@ -11,6 +11,10 @@ import type { RegistryId, } from "enssdk"; +import { + CANONICAL_NAME_PREFIX_LENGTH, + truncateCanonicalNamePrefix, +} from "@ensnode/ensdb-sdk/ensindexer-abstract"; import { isRootRegistryId } from "@ensnode/ensnode-sdk"; import { isBridgedResolver, isBridgedTargetRegistry } from "@ensnode/ensnode-sdk/internal"; @@ -151,6 +155,7 @@ export async function ensureDomainInRegistry( await context.ensDb.update(ensIndexerSchema.domain, { id: domainId }).set({ canonical: true, canonicalName, + __canonicalNamePrefix: truncateCanonicalNamePrefix(canonicalName), canonicalLabelHashPath, canonicalPath, canonicalDepth: canonicalLabelHashPath.length, @@ -359,8 +364,9 @@ async function reconcileRegistryCanonicality( /** * Propagate a Label heal to every canonical Domain whose `canonicalLabelHashPath` contains - * `labelHash`. Re-renders `canonical_name` by joining each path element to its current - * `label.interpreted` value. `canonicalLabelHashPath` is head-first (root → leaf), but + * `labelHash`. Re-renders `canonical_name` (and its materialized `__canonical_name_prefix`) by + * joining each path element to its current `label.interpreted` value, computing the name once in a + * CTE so the `string_agg` isn't run twice. `canonicalLabelHashPath` is head-first (root → leaf), but * `canonicalName` is the standard leaf-first ENS string (e.g. "vitalik.eth"), so the * WITH ORDINALITY rows are joined in DESC ordinal order. * @@ -376,14 +382,23 @@ export async function cascadeLabelHeal( labelHash: LabelHash, ): Promise { await context.ensDb.sql.execute(sql` - UPDATE ${ensIndexerSchema.domain} AS d - SET canonical_name = ( - SELECT string_agg(l.interpreted, '.' ORDER BY p.ord DESC) - FROM unnest(d.canonical_label_hash_path) WITH ORDINALITY AS p(lh, ord) - JOIN ${ensIndexerSchema.label} l ON l.label_hash = p.lh - ) + WITH healed AS ( + SELECT + d.id, + ( + SELECT string_agg(l.interpreted, '.' ORDER BY p.ord DESC) + FROM unnest(d.canonical_label_hash_path) WITH ORDINALITY AS p(lh, ord) + JOIN ${ensIndexerSchema.label} l ON l.label_hash = p.lh + ) AS name + FROM ${ensIndexerSchema.domain} d WHERE d.canonical = true - AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[]; + AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[] + ) + UPDATE ${ensIndexerSchema.domain} AS d + SET canonical_name = h.name, + __canonical_name_prefix = left(h.name, ${CANONICAL_NAME_PREFIX_LENGTH}) + FROM healed h + WHERE d.id = h.id; `); } @@ -494,6 +509,7 @@ async function cascadeCanonicality( UPDATE ${ensIndexerSchema.domain} AS d SET canonical = ${nextCanonical}, canonical_name = CASE WHEN ${nextCanonical} THEN dt.new_name ELSE NULL END, + __canonical_name_prefix = CASE WHEN ${nextCanonical} THEN left(dt.new_name, ${CANONICAL_NAME_PREFIX_LENGTH}) ELSE NULL END, canonical_label_hash_path = CASE WHEN ${nextCanonical} THEN dt.new_path ELSE NULL END, canonical_path = CASE WHEN ${nextCanonical} THEN dt.new_path_ids ELSE NULL END, canonical_depth = CASE WHEN ${nextCanonical} THEN array_length(dt.new_path, 1) ELSE NULL END, diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx index 5590b6083..ec076f0b1 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx @@ -59,6 +59,20 @@ Performing SQL queries on the ENS Unigraph requires that you have the `unigraph` Fetch a Domain by its canonical name. Because `canonical_name` is materialized across both ENSv1 and ENSv2, the same lookup works regardless of protocol version. See [Connect](/docs/integrate/unigraph/examples) for setup. +:::tip[Prefix search & typeahead] +For left-anchored / typeahead search, query the materialized `__canonical_name_prefix` column — the first 64 code points of `canonical_name`, backed by a GIN trigram index — instead of `canonical_name`: + +```sql +SELECT id, type, canonical_name, canonical_node, owner_id +FROM ensindexer_0.domains +WHERE __canonical_name_prefix LIKE 'vit%' +ORDER BY __canonical_name_prefix +LIMIT 10; +``` + +Use `canonical_name` only for exact matches (`canonical_name = 'vitalik.eth'`). +::: + :::note[Canonical fields] Canonical fields are populated on every Domain reachable from the canonical root, across both ENSv1 and ENSv2 — query them uniformly without branching by `type`. In SQL, these columns are `canonical_name`, `canonical_path`, `canonical_node`, and `canonical_depth`; in `ensdb-sdk`, the corresponding fields are `canonicalName`, `canonicalPath`, `canonicalNode`, and `canonicalDepth`. ::: diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx index 06e7368fc..6e0fa72fc 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx @@ -223,13 +223,14 @@ Domain-Resolver relations are tracked via the Protocol Acceleration plugin, not | `owner_id` | `text` | yes | If `ENSv1Domain`, the materialized effective owner address. If `ENSv2Domain`, the on-chain owner address (the HCA account address if used). | | `root_registry_owner_id` | `text` | yes | ENSv1 only: the owner recorded in the root ENSv1 registry. `null` for ENSv2 domains. | | `canonical` | `boolean` | no | Whether this Domain is part of the canonical nametree. This encodes bi-directional agreement between `domains.subregistry_id` and `registries.canonical_domain_id`, so traversal of the canonical nametree filtered to domains/registries where `canonical=true` is safe and doesn't require edge-authenticating oneself (i.e. don't need to compare `domains.subregistry_id` and `registries.canonical_domain_id` in the query, can just `WHERE canonical = true`). Mirrors the parent Registry's flag. Default `false`. | -| `canonical_name` | `text` | yes | Materialized Canonical Name, `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Example: `"vitalik.eth"`. | +| `canonical_name` | `text` | yes | Materialized Canonical Name, `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for exact matches (`canonical_name = 'vitalik.eth'`) and display. Example: `"vitalik.eth"`. | +| `__canonical_name_prefix` | `text` | yes | Materialized prefix of `canonical_name` (first 64 code points), `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for left-anchored / substring search (`__canonical_name_prefix LIKE 'vit%'`) and NAME ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it an internal implementation detail — query `canonical_name` for exact matches and display. | | `canonical_label_hash_path` | `text[]` | yes | Materialized Canonical LabelHashPath, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[labelhash("eth"), labelhash("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_path` | `text[]` | yes | Materialized Canonical Domain Path, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[DomainId("eth"), DomainId("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_depth` | `integer` | yes | Materialized Canonical Depth, `NULL` iff `canonical = false`. The depth of this Domain in the Canonical Nametree, i.e. the number of Labels in its Canonical Name (e.g. `"eth"` depth 1, `"vitalik.eth"` depth 2). Maintained by `canonicality-db-helpers.ts`. | | `canonical_node` | `text` | yes | Materialized Canonical Node, `NULL` iff `canonical = false`. The computed Node (via `namehash`) of this Domain's Canonical Name. Maintained by `canonicality-db-helpers.ts`. | -**Indexes:** `type`, `subregistry_id` (partial: non-null only), `owner_id`, `label_hash`, `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed), `(registry_id, left(canonical_name, 256), id)` (composite expression index for registry-scoped `WHERE registry_id = X ORDER BY canonical_name LIMIT N` — the `Domain.subdomains` shape; the 256-char prefix bounds the index tuple under btree's per-tuple max, and NAME-ordered queries must sort by the same `left(...)` expression for the planner to use this index for ordered scan), `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names), `canonical_name` (GIN trigram for substring / similarity queries), `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup), `canonical_node` (hash, for resolver-record → canonical-domain joins), `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse). +**Indexes:** `type`, `subregistry_id` (partial: non-null only), `owner_id`, `label_hash`, `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed), `(registry_id, __canonical_name_prefix, id)` (composite for registry-scoped `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` — the `Domain.subdomains` shape; ordering by the materialized, length-capped prefix column avoids replicating a `left(...)` expression and keeps the index tuple under btree's per-tuple max), `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names), `__canonical_name_prefix` (GIN trigram for left-anchored `LIKE 'vit%'` and substring search), `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup), `canonical_node` (hash, for resolver-record → canonical-domain joins), `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse). **Relations:** belongs to one `registries` record, belongs to one `registries` record (as subregistry), has one `accounts` record (owner), has one `accounts` record (rootRegistryOwner), has one `labels` record, has many `registrations` records. diff --git a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts index 4a939c552..ea80333a4 100644 --- a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts +++ b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts @@ -5,6 +5,7 @@ import type { InterpretedName, LabelHash, LabelHashPath, + Name, Node, NormalizedAddress, PermissionsId, @@ -260,6 +261,23 @@ export const relations_registry = relations(registry, ({ one, many }) => ({ export const domainType = onchainEnum("DomainType", ["ENSv1Domain", "ENSv2Domain"]); +/** + * Length cap (in code points) of the materialized `domain.__canonicalNamePrefix`. Sized for + * typeahead and left-anchored search; longer (invariably spam) names truncate here and tie-break + * by `id` in NAME ordering. Kept small to bound the prefix indexes. + */ +export const CANONICAL_NAME_PREFIX_LENGTH = 64; + +/** + * Truncate a Canonical Name to {@link CANONICAL_NAME_PREFIX_LENGTH} for `domain.__canonicalNamePrefix`. + * Uses code-point iteration so the JS-side prefix is byte-identical to Postgres `left(text, N)` + * (which counts code points), keeping the materialized column consistent across the JS and raw-SQL + * write paths in `canonicality-db-helpers.ts`. + */ +export function truncateCanonicalNamePrefix(name: Name | null): Name | null { + return name === null ? null : [...name].slice(0, CANONICAL_NAME_PREFIX_LENGTH).join(""); +} + export const domain = onchainTable( "domains", (t) => ({ @@ -302,6 +320,19 @@ export const domain = onchainTable( */ canonicalName: t.text().$type(), + /** + * Materialized prefix of `canonicalName` (first {@link CANONICAL_NAME_PREFIX_LENGTH} code + * points), NULL iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. + * + * Powers left-anchored / substring search (`__canonical_name_prefix LIKE 'vit%'`) and NAME + * ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it + * an internal implementation detail (mirrors `Registry.__hasChildren`); query `canonical_name` + * for exact matches and display. + * + * @example "vitalik.eth" + */ + __canonicalNamePrefix: t.text("__canonical_name_prefix").$type(), + /** * Materialized Canonical LabelHashPath, NULL iff `canonical = false`. * Maintained by `canonicality-db-helpers.ts`. @@ -347,24 +378,18 @@ export const domain = onchainTable( // `WHERE registry_id = X` lookups via prefix scan. byRegistryAndLabelHash: index().on(t.registryId, t.labelHash), - // composite for `WHERE registry_id = X ORDER BY canonical_name LIMIT N` (Domain.subdomains - // and other find-domains queries when ordering by NAME). Uses `left(canonical_name, 256)` - // to bound the index tuple under btree's per-tuple max (~2712 bytes): 256 chars × max 4-byte - // UTF-8 = 1024 bytes, leaving ample room for the registry_id and id columns. Names beyond - // 256 chars (currently <0.0001% of mainnet) collide on the truncated prefix and tie-break by - // id; this is acceptable since such names are invariably spam. Callers MUST sort by the same - // expression for the planner to use this index for ordered scan. - byRegistryAndCanonicalNameLeft: index().on( - t.registryId, - sql`left(${t.canonicalName}, 256)`, - t.id, - ), + // composite for `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` + // (Domain.subdomains and other find-domains queries when ordering by NAME). Orders by the + // materialized, length-capped prefix column so callers ORDER BY the plain column (no + // `left(...)` expression to replicate) and the index tuple stays under btree's per-tuple max. + byRegistryAndCanonicalNamePrefix: index().on(t.registryId, t.__canonicalNamePrefix, t.id), // hash index avoids the btree 8191-byte row-size hazard for spam names byCanonicalNameExact: index().using("hash", t.canonicalName), - // GIN trigram index for substring / similarity queries (inline `gin_trgm_ops` via `sql` - // because passing it through `.op()` gets dropped by Ponder) - byCanonicalNameFuzzy: index().using("gin", sql`${t.canonicalName} gin_trgm_ops`), + // GIN trigram on the length-capped prefix for left-anchored (`LIKE 'vit%'`) and substring + // search (inline `gin_trgm_ops` via `sql` because passing it through `.op()` gets dropped by + // Ponder) + byCanonicalNamePrefixFuzzy: index().using("gin", sql`${t.__canonicalNamePrefix} gin_trgm_ops`), // GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup byCanonicalLabelHashPath: index().using("gin", t.canonicalLabelHashPath), // hash index for resolver-record → canonical-domain joins From 7c3bb5fbf3748e8a523baa7c294f2bd4603bc798 Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 13:43:45 -0500 Subject: [PATCH 2/7] fix(unigraph): address review feedback (greploop 1) - drop truncateNameForCursor wrapper; use truncateCanonicalNamePrefix directly - remove now-redundant inline comments (starts_with, cursor NAME) - rename cascadeLabelHeal CTE healed -> canonical_name - early return in truncateCanonicalNamePrefix - docs: reword search-vs-display tip; convert all Indexes sections to lists; document current state only (drop previous-approach framing) - changeset: drop "Reindex required" Co-Authored-By: Claude Opus 4.8 (1M context) --- .../searchable-canonical-name-prefix.md | 2 +- .../find-domains-resolver-helpers.ts | 17 ------- .../lib/find-domains/find-domains-resolver.ts | 7 ++- .../src/lib/ensv2/canonicality-db-helpers.ts | 10 ++-- .../unigraph/examples/domain-by-name.mdx | 6 +-- .../integrate/unigraph/schema-reference.mdx | 50 +++++++++++++++---- .../ensindexer-abstract/unigraph.schema.ts | 11 ++-- 7 files changed, 59 insertions(+), 44 deletions(-) diff --git a/.changeset/searchable-canonical-name-prefix.md b/.changeset/searchable-canonical-name-prefix.md index 3b435370b..1f89f433d 100644 --- a/.changeset/searchable-canonical-name-prefix.md +++ b/.changeset/searchable-canonical-name-prefix.md @@ -4,4 +4,4 @@ "ensapi": patch --- -Add a materialized `domains.__canonical_name_prefix` column — the first 64 code points of `canonical_name` — to back left-anchored / substring search and NAME ordering. Direct-SQL consumers can now `WHERE __canonical_name_prefix LIKE 'vit%' ORDER BY __canonical_name_prefix` instead of replicating the previous `left(canonical_name, 256)` expression index. `canonical_name` is unchanged and remains the column for exact (`=` / `IN`) matches and display; the Omnigraph `name.starts_with` filter now targets the prefix column while continuing to return `canonical_name`. Reindex required. +Add a materialized `domains.__canonical_name_prefix` column — the first 64 code points of `canonical_name` — to back left-anchored / substring search and NAME ordering. Direct-SQL consumers can now `WHERE __canonical_name_prefix LIKE 'vit%' ORDER BY __canonical_name_prefix` instead of replicating the previous `left(canonical_name, 256)` expression index. `canonical_name` is unchanged and remains the column for exact (`=` / `IN`) matches and display; the Omnigraph `name.starts_with` filter now targets the prefix column while continuing to return `canonical_name`. diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts index c5c273d70..38620c9cb 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts @@ -1,25 +1,10 @@ import { asc, desc, type SQL, sql } from "drizzle-orm"; -import { truncateCanonicalNamePrefix } from "@ensnode/ensdb-sdk/ensindexer-abstract"; - import di from "@/di"; import type { DomainCursor } from "@/omnigraph-api/lib/find-domains/domain-cursor"; import type { DomainsOrderBy } from "@/omnigraph-api/schema/domain-inputs"; import type { OrderDirection } from "@/omnigraph-api/schema/order-direction"; -/** - * Truncate a `canonicalName` to the materialized `__canonical_name_prefix` length when writing the - * `DomainCursor.value` of NAME orderings. Pre-truncating once at encode time keeps the encoded - * cursor small (long names hit thousands of characters) and lets `cursorFilter` compare directly - * against the `__canonical_name_prefix` column with no per-row `left(...)`. - * - * Delegates to {@link truncateCanonicalNamePrefix} so the cursor prefix is byte-identical to the - * column the NAME index sorts on. - */ -export function truncateNameForCursor(name: string | null): string | null { - return truncateCanonicalNamePrefix(name); -} - /** * The order column / expression for each `DomainsOrderBy` value. * @@ -94,8 +79,6 @@ export function cursorFilter( const value = (() => { switch (cursor.by) { case "NAME": - // Already pre-truncated at encode time (see `truncateNameForCursor`), so this matches the - // `__canonical_name_prefix` column the NAME order sorts on directly. return sql`${cursor.value}::text`; case "DEPTH": return sql`${cursor.value}::int`; diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts index d3b8651de..644f0b9d2 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts @@ -3,6 +3,8 @@ import { type ResolveCursorConnectionArgs, resolveCursorConnection } from "@poth import { and, count, eq, ilike, inArray, type SQL, sql } from "drizzle-orm"; import type { NormalizedAddress, RegistryId } from "enssdk"; +import { truncateCanonicalNamePrefix } from "@ensnode/ensdb-sdk/ensindexer-abstract"; + import di from "@/di"; import { withActiveSpanAsync } from "@/lib/instrumentation/auto-span"; import { makeLogger } from "@/lib/logger"; @@ -11,7 +13,6 @@ import { DomainCursors } from "@/omnigraph-api/lib/find-domains/domain-cursor"; import { cursorFilter, orderFindDomains, - truncateNameForCursor, } from "@/omnigraph-api/lib/find-domains/find-domains-resolver-helpers"; import type { DomainOrderValue } from "@/omnigraph-api/lib/find-domains/types"; import { lazyConnection } from "@/omnigraph-api/lib/lazy-connection"; @@ -65,8 +66,6 @@ const VERSION_TO_DOMAIN_TYPE: Record< function nameCondition(filter: typeof DomainsNameFilter.$inferInput): SQL { const { ensIndexerSchema } = di.context; if (filter.starts_with) { - // prefix / substring search runs against the materialized, length-capped prefix column (backed - // by its GIN trigram index); exact `eq`/`in` below stay on the full `canonicalName`. return ilike(ensIndexerSchema.domain.__canonicalNamePrefix, `${filter.starts_with}%`); } @@ -257,7 +256,7 @@ export function resolveFindDomains( const __orderValue: DomainOrderValue = (() => { switch (orderBy) { case "NAME": - return truncateNameForCursor(domain.canonicalName); + return truncateCanonicalNamePrefix(domain.canonicalName); case "DEPTH": return domain.canonicalDepth; case "REGISTRATION_TIMESTAMP": diff --git a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts index c2e963abf..de5d04e44 100644 --- a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts +++ b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts @@ -382,7 +382,7 @@ export async function cascadeLabelHeal( labelHash: LabelHash, ): Promise { await context.ensDb.sql.execute(sql` - WITH healed AS ( + WITH canonical_name AS ( SELECT d.id, ( @@ -395,10 +395,10 @@ export async function cascadeLabelHeal( AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[] ) UPDATE ${ensIndexerSchema.domain} AS d - SET canonical_name = h.name, - __canonical_name_prefix = left(h.name, ${CANONICAL_NAME_PREFIX_LENGTH}) - FROM healed h - WHERE d.id = h.id; + SET canonical_name = canonical_name.name, + __canonical_name_prefix = left(canonical_name.name, ${CANONICAL_NAME_PREFIX_LENGTH}) + FROM canonical_name + WHERE d.id = canonical_name.id; `); } diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx index ec076f0b1..0880288b6 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx @@ -59,8 +59,8 @@ Performing SQL queries on the ENS Unigraph requires that you have the `unigraph` Fetch a Domain by its canonical name. Because `canonical_name` is materialized across both ENSv1 and ENSv2, the same lookup works regardless of protocol version. See [Connect](/docs/integrate/unigraph/examples) for setup. -:::tip[Prefix search & typeahead] -For left-anchored / typeahead search, query the materialized `__canonical_name_prefix` column — the first 64 code points of `canonical_name`, backed by a GIN trigram index — instead of `canonical_name`: +:::tip[Searching vs. displaying] +A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`LIKE 'vit%'`), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the query stays index-backed: ```sql SELECT id, type, canonical_name, canonical_node, owner_id @@ -70,7 +70,7 @@ ORDER BY __canonical_name_prefix LIMIT 10; ``` -Use `canonical_name` only for exact matches (`canonical_name = 'vitalik.eth'`). +The `SELECT` still returns `canonical_name`; only the `LIKE` / `ORDER BY` use the prefix. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). ::: :::note[Canonical fields] diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx index 6e0fa72fc..7e73ff1ba 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx @@ -130,7 +130,12 @@ the Event responsible for its existence. | `topics` | `text[]` | no | All log topics. | | `data` | `text` | no | Log data. | -**Indexes:** `selector`, `from`, `sender`, `timestamp`. +**Indexes:** + +- `selector` +- `from` +- `sender` +- `timestamp` ## domain_events @@ -199,7 +204,9 @@ For ENSv1, each domain that has children implicitly owns a "virtual" Registry (` | `canonical` | `boolean` | no | Whether this Registry is part of the canonical nametree. This encodes bi-directional agreement between `domains.subregistry_id` and `registries.canonical_domain_id`, so traversal of the canonical nametree filtered to domains/registries where `canonical=true` is safe and doesn't require edge-authenticating oneself (i.e. don't need to compare `domains.subregistry_id` and `registries.canonical_domain_id` in the query, can just `WHERE canonical = true`). Default `false`. | | `has_children` | `boolean` | no | Internal bookkeeping field. Synthetic monotonic sentinel flipped to `true` the first time a child Domain is registered under this Registry. Used to optimize canonicality cascades. Default `false`. | -**Indexes:** `(chain_id, address)` — non-unique, because multiple rows can share `(chain_id, address)` across virtual registries. +**Indexes:** + +- `(chain_id, address)` — non-unique, because multiple rows can share `(chain_id, address)` across virtual registries. **Relations:** has many `domains` (as parent registry), has many `domains` (as subregistry), has one `permissions` via `(chain_id, address)`. @@ -230,7 +237,19 @@ Domain-Resolver relations are tracked via the Protocol Acceleration plugin, not | `canonical_depth` | `integer` | yes | Materialized Canonical Depth, `NULL` iff `canonical = false`. The depth of this Domain in the Canonical Nametree, i.e. the number of Labels in its Canonical Name (e.g. `"eth"` depth 1, `"vitalik.eth"` depth 2). Maintained by `canonicality-db-helpers.ts`. | | `canonical_node` | `text` | yes | Materialized Canonical Node, `NULL` iff `canonical = false`. The computed Node (via `namehash`) of this Domain's Canonical Name. Maintained by `canonicality-db-helpers.ts`. | -**Indexes:** `type`, `subregistry_id` (partial: non-null only), `owner_id`, `label_hash`, `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed), `(registry_id, __canonical_name_prefix, id)` (composite for registry-scoped `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` — the `Domain.subdomains` shape; ordering by the materialized, length-capped prefix column avoids replicating a `left(...)` expression and keeps the index tuple under btree's per-tuple max), `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names), `__canonical_name_prefix` (GIN trigram for left-anchored `LIKE 'vit%'` and substring search), `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup), `canonical_node` (hash, for resolver-record → canonical-domain joins), `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse). +**Indexes:** + +- `type` +- `subregistry_id` (partial: non-null only) +- `owner_id` +- `label_hash` +- `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed) +- `(registry_id, __canonical_name_prefix, id)` (composite for registry-scoped `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` — the `Domain.subdomains` shape; the length-capped prefix keeps the index tuple under btree's per-tuple max) +- `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names) +- `__canonical_name_prefix` (GIN trigram for left-anchored `LIKE 'vit%'` and substring search) +- `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup) +- `canonical_node` (hash, for resolver-record → canonical-domain joins) +- `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse) **Relations:** belongs to one `registries` record, belongs to one `registries` record (as subregistry), has one `accounts` record (owner), has one `accounts` record (rootRegistryOwner), has one `labels` record, has many `registrations` records. @@ -243,7 +262,10 @@ Internal rainbow table mapping a `label_hash` to its interpreted label string. D | `label_hash` | `text` | no | `keccak256` of the label. Primary key. | | `interpreted` | `text` | no | The interpreted label string. | -**Indexes:** `interpreted` (hash index for exact match), `interpreted` (GIN trigram index for prefix/substring `LIKE`) +**Indexes:** + +- `interpreted` (hash index for exact match) +- `interpreted` (GIN trigram index for prefix/substring `LIKE`) **Relations:** has many `domains`. @@ -271,7 +293,9 @@ A registration is keyed by `id`. | `wrapped` | `boolean` | no | Whether the registration is currently wrapped by the NameWrapper. Default `false`. | | `event_id` | `text` | no | The event that created this registration record. | -**Indexes:** unique on `(domain_id, registration_index)`. +**Indexes:** + +- unique on `(domain_id, registration_index)` **Relations:** belongs to one `domains` record, has one `accounts` record (registrant), has one `accounts` record (unregistrant), has many `renewals`, has one `events` record. @@ -302,7 +326,9 @@ A renewal is keyed by `id` and belongs to a specific registration. | `premium` | `numeric(78)` | yes | Premium cost in wei above base. ENSv1 `RegistrarControllers` only. | | `event_id` | `text` | no | The event that created this renewal record. | -**Indexes:** unique on `(domain_id, registration_index, renewal_index)`. +**Indexes:** + +- unique on `(domain_id, registration_index, renewal_index)` **Relations:** belongs to one `registrations` record via `(domain_id, registration_index)`, has one `events` record via `(event_id)`. @@ -328,7 +354,9 @@ An ENSv2 permissions contract instance. | `chain_id` | `bigint` | no | Chain the permissions contract is deployed on. | | `address` | `text` | no | Address of the permissions contract. | -**Indexes:** unique on `(chain_id, address)`. +**Indexes:** + +- unique on `(chain_id, address)` **Relations:** has many `permissions_resources`, has many `permissions_users`. @@ -343,7 +371,9 @@ A resource managed by a `permissions` contract. | `address` | `text` | no | Address of the parent permissions contract. | | `resource` | `numeric(78)` | no | Resource identifier (a `uint256` token ID or similar). | -**Indexes:** unique on `(chain_id, address, resource)`. +**Indexes:** + +- unique on `(chain_id, address, resource)` **Relations:** belongs to one `permissions` via `(chain_id, address)`. @@ -360,6 +390,8 @@ A user's role bitmap for a specific resource within a `permissions` contract. | `user` | `text` | no | The user/grantee address this Permission is granted to (the HCA account address if used). | | `roles` | `numeric(78)` | no | Roles bitmap for this user on this resource. | -**Indexes:** unique on `(chain_id, address, resource, user)`. +**Indexes:** + +- unique on `(chain_id, address, resource, user)` **Relations:** has one `accounts` record (user), belongs to one `permissions` record via `(chain_id, address)`, belongs to one `permissions_resource` record via `(chain_id, address, resource)`. diff --git a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts index ea80333a4..1d63eb313 100644 --- a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts +++ b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts @@ -275,7 +275,8 @@ export const CANONICAL_NAME_PREFIX_LENGTH = 64; * write paths in `canonicality-db-helpers.ts`. */ export function truncateCanonicalNamePrefix(name: Name | null): Name | null { - return name === null ? null : [...name].slice(0, CANONICAL_NAME_PREFIX_LENGTH).join(""); + if (name === null) return null; + return [...name].slice(0, CANONICAL_NAME_PREFIX_LENGTH).join(""); } export const domain = onchainTable( @@ -378,10 +379,10 @@ export const domain = onchainTable( // `WHERE registry_id = X` lookups via prefix scan. byRegistryAndLabelHash: index().on(t.registryId, t.labelHash), - // composite for `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` - // (Domain.subdomains and other find-domains queries when ordering by NAME). Orders by the - // materialized, length-capped prefix column so callers ORDER BY the plain column (no - // `left(...)` expression to replicate) and the index tuple stays under btree's per-tuple max. + // composite for `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` (Domain.subdomains + // and other find-domains queries when ordering by NAME). The length-capped prefix keeps the + // index tuple under btree's per-tuple max (~2712 bytes); 64 code points × max 4-byte UTF-8 = + // 256 bytes, leaving ample room for the registry_id and id columns. byRegistryAndCanonicalNamePrefix: index().on(t.registryId, t.__canonicalNamePrefix, t.id), // hash index avoids the btree 8191-byte row-size hazard for spam names From 69111a0a563700d3dac83db629d9ebdf2cf6c504 Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 13:55:23 -0500 Subject: [PATCH 3/7] fix(unigraph): address Greptile feedback (greploop 2) - NAME cursor reads stored domain.__canonicalNamePrefix directly (single source of truth) instead of recomputing from canonicalName; drop the now-unused truncateCanonicalNamePrefix import in the resolver - document the 64-code-point starts_with cap in the GraphQL field description; regenerate Omnigraph SDL Co-Authored-By: Claude Opus 4.8 (1M context) --- .../omnigraph-api/lib/find-domains/find-domains-resolver.ts | 4 +--- apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts | 2 +- packages/enssdk/src/omnigraph/generated/schema.graphql | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts index 644f0b9d2..31bf50abf 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts @@ -3,8 +3,6 @@ import { type ResolveCursorConnectionArgs, resolveCursorConnection } from "@poth import { and, count, eq, ilike, inArray, type SQL, sql } from "drizzle-orm"; import type { NormalizedAddress, RegistryId } from "enssdk"; -import { truncateCanonicalNamePrefix } from "@ensnode/ensdb-sdk/ensindexer-abstract"; - import di from "@/di"; import { withActiveSpanAsync } from "@/lib/instrumentation/auto-span"; import { makeLogger } from "@/lib/logger"; @@ -256,7 +254,7 @@ export function resolveFindDomains( const __orderValue: DomainOrderValue = (() => { switch (orderBy) { case "NAME": - return truncateCanonicalNamePrefix(domain.canonicalName); + return domain.__canonicalNamePrefix; case "DEPTH": return domain.canonicalDepth; case "REGISTRATION_TIMESTAMP": diff --git a/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts b/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts index 3334b2245..885b6b7cf 100644 --- a/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts +++ b/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts @@ -69,7 +69,7 @@ export const DomainsNameFilter = builder.inputType("DomainsNameFilter", { fields: (t) => ({ starts_with: t.string({ description: - "Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized).", + "Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). Matched against the first 64 code points of the name; prefixes longer than 64 code points never match.", validate: { minLength: 1 }, }), eq: t.field({ diff --git a/packages/enssdk/src/omnigraph/generated/schema.graphql b/packages/enssdk/src/omnigraph/generated/schema.graphql index 81bc4ece1..7d907ed9a 100644 --- a/packages/enssdk/src/omnigraph/generated/schema.graphql +++ b/packages/enssdk/src/omnigraph/generated/schema.graphql @@ -490,7 +490,7 @@ input DomainsNameFilter @oneOf { in: [InterpretedName!] """ - Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). + Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). Matched against the first 64 code points of the name; prefixes longer than 64 code points never match. """ starts_with: String } From ccf4d84a886f6adafb7a45e46c41313aadb92777 Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 14:03:10 -0500 Subject: [PATCH 4/7] perf(unigraph): bound truncateCanonicalNamePrefix iteration (greploop 3) iterate code points and stop at the cap instead of spreading the whole string, avoiding an O(name length) allocation on the write/cursor hot path for thousands-of-code-point spam names. result is byte-identical. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/ensindexer-abstract/unigraph.schema.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts index 1d63eb313..af5023204 100644 --- a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts +++ b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts @@ -276,7 +276,15 @@ export const CANONICAL_NAME_PREFIX_LENGTH = 64; */ export function truncateCanonicalNamePrefix(name: Name | null): Name | null { if (name === null) return null; - return [...name].slice(0, CANONICAL_NAME_PREFIX_LENGTH).join(""); + // iterate code points and stop at the cap rather than spreading the whole string, which can be + // thousands of code points for spam names on a hot path (indexer writes + cursor encoding) + let prefix = ""; + let count = 0; + for (const codePoint of name) { + prefix += codePoint; + if (++count >= CANONICAL_NAME_PREFIX_LENGTH) break; + } + return prefix; } export const domain = onchainTable( From a9dce3bc04e511e9ac5421a324d3d81022e3642b Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 14:10:58 -0500 Subject: [PATCH 5/7] docs(unigraph): clarify index coverage in prefix-search tip (greploop 4) the GIN trigram backs the LIKE filter; ORDER BY __canonical_name_prefix sorts the matched set unless scoped by registry_id (composite btree). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../docs/docs/integrate/unigraph/examples/domain-by-name.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx index 0880288b6..9f6bd7448 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx @@ -60,7 +60,7 @@ Performing SQL queries on the ENS Unigraph requires that you have the `unigraph` Fetch a Domain by its canonical name. Because `canonical_name` is materialized across both ENSv1 and ENSv2, the same lookup works regardless of protocol version. See [Connect](/docs/integrate/unigraph/examples) for setup. :::tip[Searching vs. displaying] -A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`LIKE 'vit%'`), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the query stays index-backed: +A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`LIKE 'vit%'`), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the `LIKE` filter is index-backed: ```sql SELECT id, type, canonical_name, canonical_node, owner_id @@ -70,7 +70,7 @@ ORDER BY __canonical_name_prefix LIMIT 10; ``` -The `SELECT` still returns `canonical_name`; only the `LIKE` / `ORDER BY` use the prefix. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). +The `SELECT` still returns `canonical_name`; only the `LIKE` / `ORDER BY` use the prefix. The GIN trigram index backs the `LIKE` filter; the `ORDER BY` then sorts the matched set (cheap under a small `LIMIT`) — scope the query by `registry_id` to use the `(registry_id, __canonical_name_prefix, id)` btree for fully index-backed ordering. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). ::: :::note[Canonical fields] From dec504364ce4f7f95ed1ca9fe2bb7bb3399cd803 Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 15:05:56 -0500 Subject: [PATCH 6/7] refactor(unigraph): rename cascadeLabelHeal CTE to healed_names (greploop 5) avoids the CTE name shadowing the target canonical_name column in the UPDATE...SET, which several reviewers found ambiguous. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/lib/ensv2/canonicality-db-helpers.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts index de5d04e44..76ee2098c 100644 --- a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts +++ b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts @@ -382,7 +382,7 @@ export async function cascadeLabelHeal( labelHash: LabelHash, ): Promise { await context.ensDb.sql.execute(sql` - WITH canonical_name AS ( + WITH healed_names AS ( SELECT d.id, ( @@ -395,10 +395,10 @@ export async function cascadeLabelHeal( AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[] ) UPDATE ${ensIndexerSchema.domain} AS d - SET canonical_name = canonical_name.name, - __canonical_name_prefix = left(canonical_name.name, ${CANONICAL_NAME_PREFIX_LENGTH}) - FROM canonical_name - WHERE d.id = canonical_name.id; + SET canonical_name = healed_names.name, + __canonical_name_prefix = left(healed_names.name, ${CANONICAL_NAME_PREFIX_LENGTH}) + FROM healed_names + WHERE d.id = healed_names.id; `); } From a52694fd00dfd58d4404fafc175d7045c2fb5f4a Mon Sep 17 00:00:00 2001 From: shrugs Date: Thu, 4 Jun 2026 15:15:15 -0500 Subject: [PATCH 7/7] docs(unigraph): use ILIKE in prefix-search examples (greploop 6) align direct-SQL prefix-search docs with the case-insensitive Omnigraph starts_with semantics; the GIN trigram index serves ILIKE equally. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../docs/integrate/unigraph/examples/domain-by-name.mdx | 6 +++--- .../docs/docs/integrate/unigraph/schema-reference.mdx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx index 9f6bd7448..bc80cedd1 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx @@ -60,17 +60,17 @@ Performing SQL queries on the ENS Unigraph requires that you have the `unigraph` Fetch a Domain by its canonical name. Because `canonical_name` is materialized across both ENSv1 and ENSv2, the same lookup works regardless of protocol version. See [Connect](/docs/integrate/unigraph/examples) for setup. :::tip[Searching vs. displaying] -A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`LIKE 'vit%'`), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the `LIKE` filter is index-backed: +A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`ILIKE 'vit%'`, case-insensitive to match the Omnigraph `starts_with` filter), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the `ILIKE` filter is index-backed: ```sql SELECT id, type, canonical_name, canonical_node, owner_id FROM ensindexer_0.domains -WHERE __canonical_name_prefix LIKE 'vit%' +WHERE __canonical_name_prefix ILIKE 'vit%' ORDER BY __canonical_name_prefix LIMIT 10; ``` -The `SELECT` still returns `canonical_name`; only the `LIKE` / `ORDER BY` use the prefix. The GIN trigram index backs the `LIKE` filter; the `ORDER BY` then sorts the matched set (cheap under a small `LIMIT`) — scope the query by `registry_id` to use the `(registry_id, __canonical_name_prefix, id)` btree for fully index-backed ordering. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). +The `SELECT` still returns `canonical_name`; only the `ILIKE` / `ORDER BY` use the prefix. The GIN trigram index backs the `ILIKE` filter; the `ORDER BY` then sorts the matched set (cheap under a small `LIMIT`) — scope the query by `registry_id` to use the `(registry_id, __canonical_name_prefix, id)` btree for fully index-backed ordering. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). ::: :::note[Canonical fields] diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx index 7e73ff1ba..bd2f2e852 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx @@ -231,7 +231,7 @@ Domain-Resolver relations are tracked via the Protocol Acceleration plugin, not | `root_registry_owner_id` | `text` | yes | ENSv1 only: the owner recorded in the root ENSv1 registry. `null` for ENSv2 domains. | | `canonical` | `boolean` | no | Whether this Domain is part of the canonical nametree. This encodes bi-directional agreement between `domains.subregistry_id` and `registries.canonical_domain_id`, so traversal of the canonical nametree filtered to domains/registries where `canonical=true` is safe and doesn't require edge-authenticating oneself (i.e. don't need to compare `domains.subregistry_id` and `registries.canonical_domain_id` in the query, can just `WHERE canonical = true`). Mirrors the parent Registry's flag. Default `false`. | | `canonical_name` | `text` | yes | Materialized Canonical Name, `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for exact matches (`canonical_name = 'vitalik.eth'`) and display. Example: `"vitalik.eth"`. | -| `__canonical_name_prefix` | `text` | yes | Materialized prefix of `canonical_name` (first 64 code points), `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for left-anchored / substring search (`__canonical_name_prefix LIKE 'vit%'`) and NAME ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it an internal implementation detail — query `canonical_name` for exact matches and display. | +| `__canonical_name_prefix` | `text` | yes | Materialized prefix of `canonical_name` (first 64 code points), `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for left-anchored / substring search (`__canonical_name_prefix ILIKE 'vit%'`, case-insensitive to match the Omnigraph `starts_with` filter) and NAME ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it an internal implementation detail — query `canonical_name` for exact matches and display. | | `canonical_label_hash_path` | `text[]` | yes | Materialized Canonical LabelHashPath, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[labelhash("eth"), labelhash("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_path` | `text[]` | yes | Materialized Canonical Domain Path, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[DomainId("eth"), DomainId("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_depth` | `integer` | yes | Materialized Canonical Depth, `NULL` iff `canonical = false`. The depth of this Domain in the Canonical Nametree, i.e. the number of Labels in its Canonical Name (e.g. `"eth"` depth 1, `"vitalik.eth"` depth 2). Maintained by `canonicality-db-helpers.ts`. | @@ -246,7 +246,7 @@ Domain-Resolver relations are tracked via the Protocol Acceleration plugin, not - `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed) - `(registry_id, __canonical_name_prefix, id)` (composite for registry-scoped `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` — the `Domain.subdomains` shape; the length-capped prefix keeps the index tuple under btree's per-tuple max) - `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names) -- `__canonical_name_prefix` (GIN trigram for left-anchored `LIKE 'vit%'` and substring search) +- `__canonical_name_prefix` (GIN trigram for left-anchored `ILIKE 'vit%'` and substring search) - `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup) - `canonical_node` (hash, for resolver-record → canonical-domain joins) - `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse)