We support hybrid search that combines semantic and full-text search via a reranking algorithm of your choice, to get the best of both worlds. LanceDB comes with built-in rerankers and you can implement you own customized reranker as well.

Explore the complete hybrid search example in our guided walkthroughs:

import os

import lancedb
import openai
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
from lancedb.rerankers import RRFReranker

# connect to LanceDB
db = lancedb.connect(
  uri="db://your-project-slug",
  api_key="your-api-key",
  region="us-east-1"
)

# Configuring the environment variable OPENAI_API_KEY
if "OPENAI_API_KEY" not in os.environ:
    # OR set the key here as a variable
    openai.api_key = "sk-..."
embeddings = get_registry().get("openai").create()

# Define schema for documents with embeddings
class Documents(LanceModel):
    text: str = embeddings.SourceField()
    vector: Vector(embeddings.ndims()) = embeddings.VectorField()

# Create a table with the defined schema
table_name = "hybrid_search_example"
table = db.create_table(table_name, schema=Documents, mode="overwrite")

# Add sample data
data = [
    {"text": "rebel spaceships striking from a hidden base"},
    {"text": "have won their first victory against the evil Galactic Empire"},
    {"text": "during the battle rebel spies managed to steal secret plans"},
    {"text": "to the Empire's ultimate weapon the Death Star"},
]
table.add(data=data)

table.create_fts_index("text")

# Wait for indexes to be ready
wait_for_index(table, "text_idx")

# Create a reranker for hybrid search
reranker = RRFReranker()

# Perform hybrid search with reranking
results = (
    table.search(
        "flower moon",
        query_type="hybrid",
        vector_column_name="vector",
        fts_columns="text",
    )
    .rerank(reranker)
    .limit(10)
    .to_pandas()
)

print("Hybrid search results:")
print(results)