Auto-backfill
Withauto_backfill=True, LanceDB Enterprise recomputes the column for you whenever the
data or the UDF version changes — no explicit backfill() call needed (see
Backfilling).
Documentation Index
Fetch the complete documentation index at: /llms.txt
Use this file to discover all available pages before exploring further.
Connect to LanceDB Enterprise, define a UDF, and run a distributed backfill — from a notebook or a script.
import os
import geneva
import pyarrow as pa
# Connect to LanceDB Enterprise
db = geneva.connect(
uri="db://my-db",
host_override=os.getenv("LANCEDB_URI", "http://localhost:10024"),
api_key=os.getenv("LANCEDB_API_KEY"),
)
tbl = db.open_table("my_table")
# Define a User Defined Function (UDF) that counts the words in the text column
@geneva.udf(data_type=pa.int32())
def word_count(text: str) -> int:
return len(text.split())
# Register the UDF as a new virtual column
tbl.add_columns({"word_count": word_count})
# Backfill the new column using distributed execution with incremental checkpointing
tbl.backfill("word_count")
auto_backfill=True, LanceDB Enterprise recomputes the column for you whenever the
data or the UDF version changes — no explicit backfill() call needed (see
Backfilling).
# Change the column to use a new UDF version with auto-backfill enabled
@geneva.udf(data_type=pa.int32(), auto_backfill=True)
def word_count(text: str) -> int:
return len(text.split())
tbl.alter_columns({"path": "word_count", "udf": word_count})
# Add new rows. word_count is computed automatically in the background.
tbl.add([{"text": "hello world"}])
# Materialized view: a query with UDF-computed columns, refreshed incrementally
query = tbl.search(None).select({"text": "text", "word_count": word_count})
view = db.create_materialized_view("my_view", query)
view.refresh()
# Chunker view: 1:N row expansion — split each row's text into one row per word
from typing import Iterator, NamedTuple
class Chunk(NamedTuple):
chunk_index: int
chunk_text: str
@geneva.chunker
def split_text(text: str) -> Iterator[Chunk]:
for i, word in enumerate(text.split()):
yield Chunk(chunk_index=i, chunk_text=word)
chunks = db.create_udtf_view(
"my_chunks",
source=tbl.search(None).select(["text"]),
udtf=split_text,
)
chunks.refresh()
# Cloud object storage (S3, GCS, Azure, or any S3-compatible object store)
db = geneva.connect("s3://my-bucket/my-database")
# Local filesystem
db = geneva.connect("/path/to/my-database")
Was this page helpful?