Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bindings/python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ arrow = { workspace = true, features = ["pyarrow"] }
datafusion = { workspace = true }
datafusion-ffi = { workspace = true }
paimon = { path = "../../crates/paimon", features = ["storage-all"] }
paimon-datafusion = { path = "../../crates/integrations/datafusion" }
paimon-datafusion = { path = "../../crates/integrations/datafusion", features = ["fulltext"] }
pyo3 = { version = "0.28", features = ["abi3-py310"] }
tokio = { workspace = true }
17 changes: 17 additions & 0 deletions bindings/python/tests/test_datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,20 @@ def test_register_batch_invalid_catalog():
assert False, "Expected an error for unknown catalog"
except Exception as e:
assert "unknown_catalog" in str(e).lower() or "not a paimon" in str(e).lower() or "unknown" in str(e).lower()


def test_table_functions_registered_with_catalog():
"""register_catalog auto-registers vector_search / full_text_search as
UDTFs. Calling one with the wrong argument count surfaces the function's
own validation error, which proves it is registered — an unregistered
name would instead fail with 'table function not found'."""
with tempfile.TemporaryDirectory() as warehouse:
ctx = SQLContext()
ctx.register_catalog("paimon", {"warehouse": warehouse})

for fn in ("vector_search", "full_text_search"):
try:
ctx.sql(f"SELECT * FROM {fn}('only_one_arg')")
assert False, f"expected {fn} to reject a single argument"
except Exception as e:
assert "requires 4 arguments" in str(e), str(e)
14 changes: 14 additions & 0 deletions crates/integrations/datafusion/src/sql_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ impl SQLContext {
self.dynamic_options.clone(),
)),
);
register_table_functions(&self.ctx, &catalog, default_db);
self.catalogs.insert(catalog_name.clone(), catalog);
if is_first {
self.set_current_catalog(catalog_name).await?;
Expand Down Expand Up @@ -2302,6 +2303,19 @@ fn ok_result(ctx: &SessionContext) -> DFResult<DataFrame> {
Ok(df)
}

/// Registers the built-in table-valued functions against `catalog` so they can
/// be used in SQL without any extra setup call. Called for every catalog
/// registered on the context; add new built-in table functions here.
fn register_table_functions(
ctx: &SessionContext,
catalog: &Arc<dyn Catalog>,
default_database: &str,
) {
crate::vector_search::register_vector_search(ctx, Arc::clone(catalog), default_database);
#[cfg(feature = "fulltext")]
crate::full_text_search::register_full_text_search(ctx, Arc::clone(catalog), default_database);
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
10 changes: 9 additions & 1 deletion docs/src/sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ async fn example() -> Result<(), Box<dyn std::error::Error>> {
}
```

`SQLContext::new` creates a session context with the Paimon relation planner pre-registered. Use `register_catalog` to add one or more Paimon catalogs. It also manages session-scoped dynamic options internally for `SET`/`RESET` support.
`SQLContext::new` creates a session context with the Paimon relation planner pre-registered. Use `register_catalog` to add one or more Paimon catalogs; registering a catalog also registers the built-in table-valued functions (`vector_search`, `full_text_search`) against it. It also manages session-scoped dynamic options internally for `SET`/`RESET` support.

## Data Types

Expand Down Expand Up @@ -445,6 +445,10 @@ Paimon supports approximate nearest neighbor (ANN) vector search via the Lumina

### Registration

When you use a `SQLContext`, `vector_search` is registered automatically for every catalog you register — no extra setup is needed.

With a raw DataFusion `SessionContext`, register it explicitly:

```rust
use paimon_datafusion::register_vector_search;

Expand Down Expand Up @@ -510,6 +514,10 @@ paimon-datafusion = { version = "0.1.0", features = ["fulltext"] }

### Registration

When you use a `SQLContext`, `full_text_search` is registered automatically for every catalog you register (when the `fulltext` feature is enabled) — no extra setup is needed.

With a raw DataFusion `SessionContext`, register it explicitly:

```rust
use paimon_datafusion::register_full_text_search;

Expand Down
Loading