We support hybrid search that combines semantic and full-text search via a
reranking algorithm of your choice, to get the best of both worlds. LanceDB
comes with built-in rerankers
and you can implement you own customized reranker as well.
Explore the complete hybrid search example in our guided walkthroughs:
import osimport lancedbimport openaifrom lancedb.embeddings import get_registryfrom lancedb.pydantic import LanceModel, Vectorfrom lancedb.rerankers import RRFReranker# connect to LanceDBdb = lancedb.connect( uri="db://your-project-slug", api_key="your-api-key", region="us-east-1")# Configuring the environment variable OPENAI_API_KEYif"OPENAI_API_KEY"notin os.environ:# OR set the key here as a variable openai.api_key ="sk-..."embeddings = get_registry().get("openai").create()# Define schema for documents with embeddingsclassDocuments(LanceModel): text:str= embeddings.SourceField() vector: Vector(embeddings.ndims())= embeddings.VectorField()# Create a table with the defined schematable_name ="hybrid_search_example"table = db.create_table(table_name, schema=Documents, mode="overwrite")# Add sample datadata =[{"text":"rebel spaceships striking from a hidden base"},{"text":"have won their first victory against the evil Galactic Empire"},{"text":"during the battle rebel spies managed to steal secret plans"},{"text":"to the Empire's ultimate weapon the Death Star"},]table.add(data=data)table.create_fts_index("text")# Wait for indexes to be readywait_for_index(table,"text_idx")# Create a reranker for hybrid searchreranker = RRFReranker()# Perform hybrid search with rerankingresults =( table.search("flower moon", query_type="hybrid", vector_column_name="vector", fts_columns="text",).rerank(reranker).limit(10).to_pandas())print("Hybrid search results:")print(results)