For LanceDB Cloud users, the database URI (which starts with db://) and API key can both be
retrieved from the LanceDB Cloud UI. For step-by-step instructions,
refer to our onboarding tutorial.
For LanceDB Enterprise user, please contact our team
to obtain your database URI, API key and host_override URL.
import lancedbimport numpy as npimport pyarrow as paimport os# Connect to LanceDB Cloud/Enterpriseuri ="db://your-database-uri"api_key ="your-api-key"region ="us-east-1"# (Optional) For LanceDB Enterprise, set the host override to your enterprise endpointhost_override = os.environ.get("LANCEDB_HOST_OVERRIDE")db = lancedb.connect( uri=uri, api_key=api_key, region=region, host_override=host_override)
from datasets import load_dataset# Load a sample dataset from HuggingFace with pre-computed embeddingssample_dataset = load_dataset("sunhaozhepy/ag_news_sbert_keywords_embeddings", split="test[:1000]")print(f"Loaded {len(sample_dataset)} samples")print(f"Sample features: {sample_dataset.features}")print(f"Column names: {sample_dataset.column_names}")# Preview the first sampleprint(sample_dataset[0])# Get embedding dimensionvector_dim =len(sample_dataset[0]["keywords_embeddings"])print(f"Embedding dimension: {vector_dim}")
import pyarrow as pa# Create a table with the datasettable_name ="lancedb-cloud-quickstart"table = db.create_table(table_name, data=sample_dataset, mode="overwrite")# Convert list to fixedsizelist on the vector columntable.alter_columns(dict(path="keywords_embeddings", data_type=pa.list_(pa.float32(), vector_dim)))print(f"Table '{table_name}' created successfully")
from datetime import timedelta# Create a vector index and wait for it to completetable.create_index("cosine", vector_column_name="keywords_embeddings", wait_timeout=timedelta(seconds=120))print(table.index_stats("keywords_embeddings_idx"))
The create_index/createIndex operation executes asynchronously in LanceDB Cloud/Enterprise. To ensure the index is fully built,
you can use the wait_timeout parameter or call wait_for_index on the table.