We support hybrid search that combines semantic and full-text search via a
reranking algorithm of your choice, to get the best of both worlds. LanceDB
comes with built-in rerankers
and you can implement you own customized reranker as well.
Explore the complete hybrid search example in our guided walkthroughs:
import os
import lancedb
import openai
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
from lancedb.rerankers import RRFReranker
# connect to LanceDB
db = lancedb.connect(
uri="db://your-project-slug",
api_key="your-api-key",
region="us-east-1"
)
# Configuring the environment variable OPENAI_API_KEY
if "OPENAI_API_KEY" not in os.environ:
# OR set the key here as a variable
openai.api_key = "sk-..."
embeddings = get_registry().get("openai").create()
# Define schema for documents with embeddings
class Documents(LanceModel):
text: str = embeddings.SourceField()
vector: Vector(embeddings.ndims()) = embeddings.VectorField()
# Create a table with the defined schema
table_name = "hybrid_search_example"
table = db.create_table(table_name, schema=Documents, mode="overwrite")
# Add sample data
data = [
{"text": "rebel spaceships striking from a hidden base"},
{"text": "have won their first victory against the evil Galactic Empire"},
{"text": "during the battle rebel spies managed to steal secret plans"},
{"text": "to the Empire's ultimate weapon the Death Star"},
]
table.add(data=data)
table.create_fts_index("text")
# Wait for indexes to be ready
wait_for_index(table, "text_idx")
# Create a reranker for hybrid search
reranker = RRFReranker()
# Perform hybrid search with reranking
results = (
table.search(
"flower moon",
query_type="hybrid",
vector_column_name="vector",
fts_columns="text",
)
.rerank(reranker)
.limit(10)
.to_pandas()
)
print("Hybrid search results:")
print(results)