diff --git a/.changeset/searchable-canonical-name-prefix.md b/.changeset/searchable-canonical-name-prefix.md new file mode 100644 index 000000000..1f89f433d --- /dev/null +++ b/.changeset/searchable-canonical-name-prefix.md @@ -0,0 +1,7 @@ +--- +"@ensnode/ensdb-sdk": patch +"ensindexer": patch +"ensapi": patch +--- + +Add a materialized `domains.__canonical_name_prefix` column — the first 64 code points of `canonical_name` — to back left-anchored / substring search and NAME ordering. Direct-SQL consumers can now `WHERE __canonical_name_prefix LIKE 'vit%' ORDER BY __canonical_name_prefix` instead of replicating the previous `left(canonical_name, 256)` expression index. `canonical_name` is unchanged and remains the column for exact (`=` / `IN`) matches and display; the Omnigraph `name.starts_with` filter now targets the prefix column while continuing to return `canonical_name`. diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts index 513950f54..38620c9cb 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver-helpers.ts @@ -5,44 +5,6 @@ import type { DomainCursor } from "@/omnigraph-api/lib/find-domains/domain-curso import type { DomainsOrderBy } from "@/omnigraph-api/schema/domain-inputs"; import type { OrderDirection } from "@/omnigraph-api/schema/order-direction"; -/** - * Length cap (in characters) of the `canonical_name` prefix used by: - * 1. the `(registry_id, left(canonical_name, N), id)` composite btree on `domains`, - * 2. all NAME-ordered queries' ORDER BY expressions, and - * 3. the value stored in `DomainCursor.value` when ordering by NAME — pre-truncated at - * encode time via {@link truncateNameForCursor} so filter-time comparisons are simple - * tuple compares against the index expression with no per-row `left(...)` re-application. - * - * The btree per-tuple max is ~2712 bytes; with `registry_id` and `id` consuming ~240 bytes of - * that, ~2400 bytes remain for the prefix expression. 256 chars × max 4-byte UTF-8 codepoint = - * 1024 bytes, well under the limit and within the realm of reasonable name lengths (mainnet avg - * is ~126). Queries MUST sort by this same expression for the planner to use the index for - * ordered scan; raw `canonical_name` ORDER BY falls back to a full scan + sort. - * - * An alternative solution is to redefine InterpretedLabel to enforce a maximum byte length of 255 before - * being truncated into an Encoded LabelHash — this mirrors a name's resolvability (must be dns-encodable) - * and allows us to avoid storing spam names. Then we'd also have to produce a b-tree-indexed - * materializedCanonicalName field that's length-capped as well to fit the btree index. Then we could - * query against that column instead of the full InterpretedName. All of that would avoid this - * LEFT(...) expression index and the necessity for the query pattern to match the defined index - * (to avoid the full scan). - */ -export const CANONICAL_NAME_SORT_PREFIX = 256; - -/** - * Truncate a `canonicalName` to the cursor / index prefix length. Used when writing the cursor - * value for NAME orderings — callers slice once at encode time so the encoded cursor stays small - * (long names can hit thousands of characters) and `cursorFilter` can compare directly against - * the index expression without re-applying `left(...)` per row. - * - * Uses code-point iteration (`[...name]`) rather than `String.slice`, which counts UTF-16 code - * units and would split surrogate pairs. Postgres `left(text, N)` counts characters (code - * points), so this keeps the JS-side and DB-side prefixes byte-identical. - */ -export function truncateNameForCursor(name: string | null): string | null { - return name === null ? null : [...name].slice(0, CANONICAL_NAME_SORT_PREFIX).join(""); -} - /** * The order column / expression for each `DomainsOrderBy` value. * @@ -54,7 +16,7 @@ function getOrderColumn(orderBy: typeof DomainsOrderBy.$inferType): SQL { const { ensIndexerSchema } = di.context; switch (orderBy) { case "NAME": - return sql`left(${ensIndexerSchema.domain.canonicalName}, ${sql.raw(String(CANONICAL_NAME_SORT_PREFIX))})`; + return sql`${ensIndexerSchema.domain.__canonicalNamePrefix}`; case "DEPTH": return sql`${ensIndexerSchema.domain.canonicalDepth}`; case "REGISTRATION_TIMESTAMP": @@ -117,8 +79,6 @@ export function cursorFilter( const value = (() => { switch (cursor.by) { case "NAME": - // Already pre-truncated at encode time (see `truncateNameForCursor`), so this matches - // the index expression `left(canonical_name, CANONICAL_NAME_SORT_PREFIX)` directly. return sql`${cursor.value}::text`; case "DEPTH": return sql`${cursor.value}::int`; diff --git a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts index 0fdb08f2f..31bf50abf 100644 --- a/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts +++ b/apps/ensapi/src/omnigraph-api/lib/find-domains/find-domains-resolver.ts @@ -11,7 +11,6 @@ import { DomainCursors } from "@/omnigraph-api/lib/find-domains/domain-cursor"; import { cursorFilter, orderFindDomains, - truncateNameForCursor, } from "@/omnigraph-api/lib/find-domains/find-domains-resolver-helpers"; import type { DomainOrderValue } from "@/omnigraph-api/lib/find-domains/types"; import { lazyConnection } from "@/omnigraph-api/lib/lazy-connection"; @@ -65,7 +64,7 @@ const VERSION_TO_DOMAIN_TYPE: Record< function nameCondition(filter: typeof DomainsNameFilter.$inferInput): SQL { const { ensIndexerSchema } = di.context; if (filter.starts_with) { - return ilike(ensIndexerSchema.domain.canonicalName, `${filter.starts_with}%`); + return ilike(ensIndexerSchema.domain.__canonicalNamePrefix, `${filter.starts_with}%`); } if (filter.eq) { @@ -255,7 +254,7 @@ export function resolveFindDomains( const __orderValue: DomainOrderValue = (() => { switch (orderBy) { case "NAME": - return truncateNameForCursor(domain.canonicalName); + return domain.__canonicalNamePrefix; case "DEPTH": return domain.canonicalDepth; case "REGISTRATION_TIMESTAMP": diff --git a/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts b/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts index 3334b2245..885b6b7cf 100644 --- a/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts +++ b/apps/ensapi/src/omnigraph-api/schema/domain-inputs.ts @@ -69,7 +69,7 @@ export const DomainsNameFilter = builder.inputType("DomainsNameFilter", { fields: (t) => ({ starts_with: t.string({ description: - "Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized).", + "Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). Matched against the first 64 code points of the name; prefixes longer than 64 code points never match.", validate: { minLength: 1 }, }), eq: t.field({ diff --git a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts index e4ba0934c..76ee2098c 100644 --- a/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts +++ b/apps/ensindexer/src/lib/ensv2/canonicality-db-helpers.ts @@ -11,6 +11,10 @@ import type { RegistryId, } from "enssdk"; +import { + CANONICAL_NAME_PREFIX_LENGTH, + truncateCanonicalNamePrefix, +} from "@ensnode/ensdb-sdk/ensindexer-abstract"; import { isRootRegistryId } from "@ensnode/ensnode-sdk"; import { isBridgedResolver, isBridgedTargetRegistry } from "@ensnode/ensnode-sdk/internal"; @@ -151,6 +155,7 @@ export async function ensureDomainInRegistry( await context.ensDb.update(ensIndexerSchema.domain, { id: domainId }).set({ canonical: true, canonicalName, + __canonicalNamePrefix: truncateCanonicalNamePrefix(canonicalName), canonicalLabelHashPath, canonicalPath, canonicalDepth: canonicalLabelHashPath.length, @@ -359,8 +364,9 @@ async function reconcileRegistryCanonicality( /** * Propagate a Label heal to every canonical Domain whose `canonicalLabelHashPath` contains - * `labelHash`. Re-renders `canonical_name` by joining each path element to its current - * `label.interpreted` value. `canonicalLabelHashPath` is head-first (root → leaf), but + * `labelHash`. Re-renders `canonical_name` (and its materialized `__canonical_name_prefix`) by + * joining each path element to its current `label.interpreted` value, computing the name once in a + * CTE so the `string_agg` isn't run twice. `canonicalLabelHashPath` is head-first (root → leaf), but * `canonicalName` is the standard leaf-first ENS string (e.g. "vitalik.eth"), so the * WITH ORDINALITY rows are joined in DESC ordinal order. * @@ -376,14 +382,23 @@ export async function cascadeLabelHeal( labelHash: LabelHash, ): Promise { await context.ensDb.sql.execute(sql` - UPDATE ${ensIndexerSchema.domain} AS d - SET canonical_name = ( - SELECT string_agg(l.interpreted, '.' ORDER BY p.ord DESC) - FROM unnest(d.canonical_label_hash_path) WITH ORDINALITY AS p(lh, ord) - JOIN ${ensIndexerSchema.label} l ON l.label_hash = p.lh - ) + WITH healed_names AS ( + SELECT + d.id, + ( + SELECT string_agg(l.interpreted, '.' ORDER BY p.ord DESC) + FROM unnest(d.canonical_label_hash_path) WITH ORDINALITY AS p(lh, ord) + JOIN ${ensIndexerSchema.label} l ON l.label_hash = p.lh + ) AS name + FROM ${ensIndexerSchema.domain} d WHERE d.canonical = true - AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[]; + AND d.canonical_label_hash_path @> ARRAY[${labelHash}]::text[] + ) + UPDATE ${ensIndexerSchema.domain} AS d + SET canonical_name = healed_names.name, + __canonical_name_prefix = left(healed_names.name, ${CANONICAL_NAME_PREFIX_LENGTH}) + FROM healed_names + WHERE d.id = healed_names.id; `); } @@ -494,6 +509,7 @@ async function cascadeCanonicality( UPDATE ${ensIndexerSchema.domain} AS d SET canonical = ${nextCanonical}, canonical_name = CASE WHEN ${nextCanonical} THEN dt.new_name ELSE NULL END, + __canonical_name_prefix = CASE WHEN ${nextCanonical} THEN left(dt.new_name, ${CANONICAL_NAME_PREFIX_LENGTH}) ELSE NULL END, canonical_label_hash_path = CASE WHEN ${nextCanonical} THEN dt.new_path ELSE NULL END, canonical_path = CASE WHEN ${nextCanonical} THEN dt.new_path_ids ELSE NULL END, canonical_depth = CASE WHEN ${nextCanonical} THEN array_length(dt.new_path, 1) ELSE NULL END, diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx index 5590b6083..bc80cedd1 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/examples/domain-by-name.mdx @@ -59,6 +59,20 @@ Performing SQL queries on the ENS Unigraph requires that you have the `unigraph` Fetch a Domain by its canonical name. Because `canonical_name` is materialized across both ENSv1 and ENSv2, the same lookup works regardless of protocol version. See [Connect](/docs/integrate/unigraph/examples) for setup. +:::tip[Searching vs. displaying] +A `canonical_name` can be very long, but it's the full, correct name — always **select and display `canonical_name`**. When you need to **search** by prefix (`ILIKE 'vit%'`, case-insensitive to match the Omnigraph `starts_with` filter), match against the materialized `__canonical_name_prefix` column (the first 64 code points of `canonical_name`, backed by a GIN trigram index) so the `ILIKE` filter is index-backed: + +```sql +SELECT id, type, canonical_name, canonical_node, owner_id +FROM ensindexer_0.domains +WHERE __canonical_name_prefix ILIKE 'vit%' +ORDER BY __canonical_name_prefix +LIMIT 10; +``` + +The `SELECT` still returns `canonical_name`; only the `ILIKE` / `ORDER BY` use the prefix. The GIN trigram index backs the `ILIKE` filter; the `ORDER BY` then sorts the matched set (cheap under a small `LIMIT`) — scope the query by `registry_id` to use the `(registry_id, __canonical_name_prefix, id)` btree for fully index-backed ordering. For exact matches, use `canonical_name` directly (`canonical_name = 'vitalik.eth'`). +::: + :::note[Canonical fields] Canonical fields are populated on every Domain reachable from the canonical root, across both ENSv1 and ENSv2 — query them uniformly without branching by `type`. In SQL, these columns are `canonical_name`, `canonical_path`, `canonical_node`, and `canonical_depth`; in `ensdb-sdk`, the corresponding fields are `canonicalName`, `canonicalPath`, `canonicalNode`, and `canonicalDepth`. ::: diff --git a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx index 06e7368fc..bd2f2e852 100644 --- a/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx +++ b/docs/ensnode.io/src/content/docs/docs/integrate/unigraph/schema-reference.mdx @@ -130,7 +130,12 @@ the Event responsible for its existence. | `topics` | `text[]` | no | All log topics. | | `data` | `text` | no | Log data. | -**Indexes:** `selector`, `from`, `sender`, `timestamp`. +**Indexes:** + +- `selector` +- `from` +- `sender` +- `timestamp` ## domain_events @@ -199,7 +204,9 @@ For ENSv1, each domain that has children implicitly owns a "virtual" Registry (` | `canonical` | `boolean` | no | Whether this Registry is part of the canonical nametree. This encodes bi-directional agreement between `domains.subregistry_id` and `registries.canonical_domain_id`, so traversal of the canonical nametree filtered to domains/registries where `canonical=true` is safe and doesn't require edge-authenticating oneself (i.e. don't need to compare `domains.subregistry_id` and `registries.canonical_domain_id` in the query, can just `WHERE canonical = true`). Default `false`. | | `has_children` | `boolean` | no | Internal bookkeeping field. Synthetic monotonic sentinel flipped to `true` the first time a child Domain is registered under this Registry. Used to optimize canonicality cascades. Default `false`. | -**Indexes:** `(chain_id, address)` — non-unique, because multiple rows can share `(chain_id, address)` across virtual registries. +**Indexes:** + +- `(chain_id, address)` — non-unique, because multiple rows can share `(chain_id, address)` across virtual registries. **Relations:** has many `domains` (as parent registry), has many `domains` (as subregistry), has one `permissions` via `(chain_id, address)`. @@ -223,13 +230,26 @@ Domain-Resolver relations are tracked via the Protocol Acceleration plugin, not | `owner_id` | `text` | yes | If `ENSv1Domain`, the materialized effective owner address. If `ENSv2Domain`, the on-chain owner address (the HCA account address if used). | | `root_registry_owner_id` | `text` | yes | ENSv1 only: the owner recorded in the root ENSv1 registry. `null` for ENSv2 domains. | | `canonical` | `boolean` | no | Whether this Domain is part of the canonical nametree. This encodes bi-directional agreement between `domains.subregistry_id` and `registries.canonical_domain_id`, so traversal of the canonical nametree filtered to domains/registries where `canonical=true` is safe and doesn't require edge-authenticating oneself (i.e. don't need to compare `domains.subregistry_id` and `registries.canonical_domain_id` in the query, can just `WHERE canonical = true`). Mirrors the parent Registry's flag. Default `false`. | -| `canonical_name` | `text` | yes | Materialized Canonical Name, `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Example: `"vitalik.eth"`. | +| `canonical_name` | `text` | yes | Materialized Canonical Name, `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for exact matches (`canonical_name = 'vitalik.eth'`) and display. Example: `"vitalik.eth"`. | +| `__canonical_name_prefix` | `text` | yes | Materialized prefix of `canonical_name` (first 64 code points), `NULL` iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. Use for left-anchored / substring search (`__canonical_name_prefix ILIKE 'vit%'`, case-insensitive to match the Omnigraph `starts_with` filter) and NAME ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it an internal implementation detail — query `canonical_name` for exact matches and display. | | `canonical_label_hash_path` | `text[]` | yes | Materialized Canonical LabelHashPath, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[labelhash("eth"), labelhash("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_path` | `text[]` | yes | Materialized Canonical Domain Path, `NULL` iff `canonical = false`. Head-first (root → leaf), i.e. `[DomainId("eth"), DomainId("vitalik")]` for `"vitalik.eth"`. Maintained by `canonicality-db-helpers.ts`. | | `canonical_depth` | `integer` | yes | Materialized Canonical Depth, `NULL` iff `canonical = false`. The depth of this Domain in the Canonical Nametree, i.e. the number of Labels in its Canonical Name (e.g. `"eth"` depth 1, `"vitalik.eth"` depth 2). Maintained by `canonicality-db-helpers.ts`. | | `canonical_node` | `text` | yes | Materialized Canonical Node, `NULL` iff `canonical = false`. The computed Node (via `namehash`) of this Domain's Canonical Name. Maintained by `canonicality-db-helpers.ts`. | -**Indexes:** `type`, `subregistry_id` (partial: non-null only), `owner_id`, `label_hash`, `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed), `(registry_id, left(canonical_name, 256), id)` (composite expression index for registry-scoped `WHERE registry_id = X ORDER BY canonical_name LIMIT N` — the `Domain.subdomains` shape; the 256-char prefix bounds the index tuple under btree's per-tuple max, and NAME-ordered queries must sort by the same `left(...)` expression for the planner to use this index for ordered scan), `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names), `canonical_name` (GIN trigram for substring / similarity queries), `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup), `canonical_node` (hash, for resolver-record → canonical-domain joins), `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse). +**Indexes:** + +- `type` +- `subregistry_id` (partial: non-null only) +- `owner_id` +- `label_hash` +- `(registry_id, label_hash)` (composite; leading-column prefix also serves `WHERE registry_id = X` lookups, so no separate `registry_id` index is needed) +- `(registry_id, __canonical_name_prefix, id)` (composite for registry-scoped `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` — the `Domain.subdomains` shape; the length-capped prefix keeps the index tuple under btree's per-tuple max) +- `canonical_name` (hash, exact match — avoids the btree 8191-byte row-size hazard for spam names) +- `__canonical_name_prefix` (GIN trigram for left-anchored `ILIKE 'vit%'` and substring search) +- `canonical_label_hash_path` (GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup) +- `canonical_node` (hash, for resolver-record → canonical-domain joins) +- `canonical_depth` (btree, for `ORDER BY canonical_depth` — typeahead and depth-ordered browse) **Relations:** belongs to one `registries` record, belongs to one `registries` record (as subregistry), has one `accounts` record (owner), has one `accounts` record (rootRegistryOwner), has one `labels` record, has many `registrations` records. @@ -242,7 +262,10 @@ Internal rainbow table mapping a `label_hash` to its interpreted label string. D | `label_hash` | `text` | no | `keccak256` of the label. Primary key. | | `interpreted` | `text` | no | The interpreted label string. | -**Indexes:** `interpreted` (hash index for exact match), `interpreted` (GIN trigram index for prefix/substring `LIKE`) +**Indexes:** + +- `interpreted` (hash index for exact match) +- `interpreted` (GIN trigram index for prefix/substring `LIKE`) **Relations:** has many `domains`. @@ -270,7 +293,9 @@ A registration is keyed by `id`. | `wrapped` | `boolean` | no | Whether the registration is currently wrapped by the NameWrapper. Default `false`. | | `event_id` | `text` | no | The event that created this registration record. | -**Indexes:** unique on `(domain_id, registration_index)`. +**Indexes:** + +- unique on `(domain_id, registration_index)` **Relations:** belongs to one `domains` record, has one `accounts` record (registrant), has one `accounts` record (unregistrant), has many `renewals`, has one `events` record. @@ -301,7 +326,9 @@ A renewal is keyed by `id` and belongs to a specific registration. | `premium` | `numeric(78)` | yes | Premium cost in wei above base. ENSv1 `RegistrarControllers` only. | | `event_id` | `text` | no | The event that created this renewal record. | -**Indexes:** unique on `(domain_id, registration_index, renewal_index)`. +**Indexes:** + +- unique on `(domain_id, registration_index, renewal_index)` **Relations:** belongs to one `registrations` record via `(domain_id, registration_index)`, has one `events` record via `(event_id)`. @@ -327,7 +354,9 @@ An ENSv2 permissions contract instance. | `chain_id` | `bigint` | no | Chain the permissions contract is deployed on. | | `address` | `text` | no | Address of the permissions contract. | -**Indexes:** unique on `(chain_id, address)`. +**Indexes:** + +- unique on `(chain_id, address)` **Relations:** has many `permissions_resources`, has many `permissions_users`. @@ -342,7 +371,9 @@ A resource managed by a `permissions` contract. | `address` | `text` | no | Address of the parent permissions contract. | | `resource` | `numeric(78)` | no | Resource identifier (a `uint256` token ID or similar). | -**Indexes:** unique on `(chain_id, address, resource)`. +**Indexes:** + +- unique on `(chain_id, address, resource)` **Relations:** belongs to one `permissions` via `(chain_id, address)`. @@ -359,6 +390,8 @@ A user's role bitmap for a specific resource within a `permissions` contract. | `user` | `text` | no | The user/grantee address this Permission is granted to (the HCA account address if used). | | `roles` | `numeric(78)` | no | Roles bitmap for this user on this resource. | -**Indexes:** unique on `(chain_id, address, resource, user)`. +**Indexes:** + +- unique on `(chain_id, address, resource, user)` **Relations:** has one `accounts` record (user), belongs to one `permissions` record via `(chain_id, address)`, belongs to one `permissions_resource` record via `(chain_id, address, resource)`. diff --git a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts index 4a939c552..af5023204 100644 --- a/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts +++ b/packages/ensdb-sdk/src/ensindexer-abstract/unigraph.schema.ts @@ -5,6 +5,7 @@ import type { InterpretedName, LabelHash, LabelHashPath, + Name, Node, NormalizedAddress, PermissionsId, @@ -260,6 +261,32 @@ export const relations_registry = relations(registry, ({ one, many }) => ({ export const domainType = onchainEnum("DomainType", ["ENSv1Domain", "ENSv2Domain"]); +/** + * Length cap (in code points) of the materialized `domain.__canonicalNamePrefix`. Sized for + * typeahead and left-anchored search; longer (invariably spam) names truncate here and tie-break + * by `id` in NAME ordering. Kept small to bound the prefix indexes. + */ +export const CANONICAL_NAME_PREFIX_LENGTH = 64; + +/** + * Truncate a Canonical Name to {@link CANONICAL_NAME_PREFIX_LENGTH} for `domain.__canonicalNamePrefix`. + * Uses code-point iteration so the JS-side prefix is byte-identical to Postgres `left(text, N)` + * (which counts code points), keeping the materialized column consistent across the JS and raw-SQL + * write paths in `canonicality-db-helpers.ts`. + */ +export function truncateCanonicalNamePrefix(name: Name | null): Name | null { + if (name === null) return null; + // iterate code points and stop at the cap rather than spreading the whole string, which can be + // thousands of code points for spam names on a hot path (indexer writes + cursor encoding) + let prefix = ""; + let count = 0; + for (const codePoint of name) { + prefix += codePoint; + if (++count >= CANONICAL_NAME_PREFIX_LENGTH) break; + } + return prefix; +} + export const domain = onchainTable( "domains", (t) => ({ @@ -302,6 +329,19 @@ export const domain = onchainTable( */ canonicalName: t.text().$type(), + /** + * Materialized prefix of `canonicalName` (first {@link CANONICAL_NAME_PREFIX_LENGTH} code + * points), NULL iff `canonical = false`. Maintained by `canonicality-db-helpers.ts`. + * + * Powers left-anchored / substring search (`__canonical_name_prefix LIKE 'vit%'`) and NAME + * ordering without `canonical_name`'s full-length btree size hazard. The `__` prefix marks it + * an internal implementation detail (mirrors `Registry.__hasChildren`); query `canonical_name` + * for exact matches and display. + * + * @example "vitalik.eth" + */ + __canonicalNamePrefix: t.text("__canonical_name_prefix").$type(), + /** * Materialized Canonical LabelHashPath, NULL iff `canonical = false`. * Maintained by `canonicality-db-helpers.ts`. @@ -347,24 +387,18 @@ export const domain = onchainTable( // `WHERE registry_id = X` lookups via prefix scan. byRegistryAndLabelHash: index().on(t.registryId, t.labelHash), - // composite for `WHERE registry_id = X ORDER BY canonical_name LIMIT N` (Domain.subdomains - // and other find-domains queries when ordering by NAME). Uses `left(canonical_name, 256)` - // to bound the index tuple under btree's per-tuple max (~2712 bytes): 256 chars × max 4-byte - // UTF-8 = 1024 bytes, leaving ample room for the registry_id and id columns. Names beyond - // 256 chars (currently <0.0001% of mainnet) collide on the truncated prefix and tie-break by - // id; this is acceptable since such names are invariably spam. Callers MUST sort by the same - // expression for the planner to use this index for ordered scan. - byRegistryAndCanonicalNameLeft: index().on( - t.registryId, - sql`left(${t.canonicalName}, 256)`, - t.id, - ), + // composite for `WHERE registry_id = X ORDER BY __canonical_name_prefix LIMIT N` (Domain.subdomains + // and other find-domains queries when ordering by NAME). The length-capped prefix keeps the + // index tuple under btree's per-tuple max (~2712 bytes); 64 code points × max 4-byte UTF-8 = + // 256 bytes, leaving ample room for the registry_id and id columns. + byRegistryAndCanonicalNamePrefix: index().on(t.registryId, t.__canonicalNamePrefix, t.id), // hash index avoids the btree 8191-byte row-size hazard for spam names byCanonicalNameExact: index().using("hash", t.canonicalName), - // GIN trigram index for substring / similarity queries (inline `gin_trgm_ops` via `sql` - // because passing it through `.op()` gets dropped by Ponder) - byCanonicalNameFuzzy: index().using("gin", sql`${t.canonicalName} gin_trgm_ops`), + // GIN trigram on the length-capped prefix for left-anchored (`LIKE 'vit%'`) and substring + // search (inline `gin_trgm_ops` via `sql` because passing it through `.op()` gets dropped by + // Ponder) + byCanonicalNamePrefixFuzzy: index().using("gin", sql`${t.__canonicalNamePrefix} gin_trgm_ops`), // GIN containment for `cascadeLabelHeal`'s `canonical_label_hash_path @> ARRAY[lh]` lookup byCanonicalLabelHashPath: index().using("gin", t.canonicalLabelHashPath), // hash index for resolver-record → canonical-domain joins diff --git a/packages/enssdk/src/omnigraph/generated/schema.graphql b/packages/enssdk/src/omnigraph/generated/schema.graphql index 81bc4ece1..7d907ed9a 100644 --- a/packages/enssdk/src/omnigraph/generated/schema.graphql +++ b/packages/enssdk/src/omnigraph/generated/schema.graphql @@ -490,7 +490,7 @@ input DomainsNameFilter @oneOf { in: [InterpretedName!] """ - Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). + Prefix-match on Interpreted Name for typeahead. ex: 'vit', 'vitalik.et'. Case-insensitive (InterpretedName labels are normalized). Matched against the first 64 code points of the name; prefixes longer than 64 code points never match. """ starts_with: String }