> ## Documentation Index > Fetch the complete documentation index at: https://docs.lancedb.com/llms.txt > Use this file to discover all available pages before exploring further. # Ingesting Data > Learn about different methods to ingest data into tables in LanceDB, including from various data sources and empty tables. export const TablesTzValidator = "from datetime import datetime\nfrom zoneinfo import ZoneInfo\n\nfrom lancedb.pydantic import LanceModel\nfrom pydantic import Field, ValidationError, ValidationInfo, field_validator\n\ntzname = \"America/New_York\"\ntz = ZoneInfo(tzname)\n\nclass TestModel(LanceModel):\n dt_with_tz: datetime = Field(json_schema_extra={\"tz\": tzname})\n\n @field_validator(\"dt_with_tz\")\n @classmethod\n def tz_must_match(cls, dt: datetime) -> datetime:\n assert dt.tzinfo == tz\n return dt\n\nok = TestModel(dt_with_tz=datetime.now(tz))\n\ntry:\n TestModel(dt_with_tz=datetime.now(ZoneInfo(\"Asia/Shanghai\")))\n assert 0 == 1, \"this should raise ValidationError\"\nexcept ValidationError:\n print(\"A ValidationError was raised.\")\n pass\n"; export const TablesDocumentModel = "from pydantic import BaseModel\n\nclass Document(BaseModel):\n content: str\n source: str\n"; export const TablesBasicConnect = "import lancedb\n\nuri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n"; export const RsDropTable = "let drop_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n]));\nlet drop_batch = RecordBatch::try_new(\n drop_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![Some(vec![Some(1.1), Some(1.2)])],\n 2,\n ),\n ),\n Arc::new(Float32Array::from(vec![45.5])),\n ],\n)\n.unwrap();\nlet drop_reader: Box =\n Box::new(RecordBatchIterator::new(vec![Ok(drop_batch)].into_iter(), drop_schema.clone()));\ndb.create_table(\"my_table\", drop_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n\ndb.drop_table(\"my_table\", &[]).await.unwrap();\n"; export const TsDropTable = "await db.createTable(\"my_table\", [{ vector: [1.1, 1.2], lat: 45.5 }], {\n mode: \"overwrite\",\n});\n\nawait db.dropTable(\"my_table\");\n"; export const DropTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5}]\ndb.create_table(\"my_table\", data, mode=\"overwrite\")\n\n# Drop the table\ndb.drop_table(\"my_table\")\n"; export const CreateEmptyTablePydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Item(LanceModel):\n vector: Vector(2)\n item: str\n price: float\n\ndb = tmp_db\ntbl = db.create_table(\n \"test_empty_table_new\", schema=Item.to_arrow_schema(), mode=\"overwrite\"\n)\n"; export const RsCreateEmptyTable = "let empty_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"item\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float32, false),\n]));\nlet empty_table = db\n .create_empty_table(\"test_empty_table\", empty_schema)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateEmptyTable = "const emptySchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 2,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"item\", new arrow.Utf8()),\n new arrow.Field(\"price\", new arrow.Float32()),\n]);\nconst emptyTable = await db.createEmptyTable(\n \"test_empty_table\",\n emptySchema,\n {\n mode: \"overwrite\",\n },\n);\n"; export const CreateEmptyTable = "import pyarrow as pa\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n pa.field(\"item\", pa.string()),\n pa.field(\"price\", pa.float32()),\n ]\n)\ndb = tmp_db\ntbl = db.create_table(\"test_empty_table\", schema=schema, mode=\"overwrite\")\n"; export const RsOpenExistingTable = "let open_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n Field::new(\"long\", DataType::Float32, false),\n]));\nlet open_batch = RecordBatch::try_new(\n open_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![Some(vec![Some(1.1), Some(1.2)])],\n 2,\n ),\n ),\n Arc::new(Float32Array::from(vec![45.5])),\n Arc::new(Float32Array::from(vec![-122.7])),\n ],\n)\n.unwrap();\nlet open_reader: Box =\n Box::new(RecordBatchIterator::new(vec![Ok(open_batch)].into_iter(), open_schema.clone()));\ndb.create_table(\"test_table\", open_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n\nprintln!(\"{:?}\", db.table_names().execute().await.unwrap());\n\nlet opened_table = db.open_table(\"test_table\").execute().await.unwrap();\n"; export const TsOpenExistingTable = "const openTableData = [{ vector: [1.1, 1.2], lat: 45.5, long: -122.7 }];\nawait db.createTable(\"test_table_open\", openTableData, {\n mode: \"overwrite\",\n});\n\nconsole.log(await db.tableNames());\n\nconst openedTable = await db.openTable(\"test_table_open\");\n"; export const OpenExistingTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7}]\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\n\n# List table names\nprint(db.list_tables().tables)\n\n# Open existing table\ntbl = db.open_table(\"test_table\")\n"; export const RsCreateTableFromIterator = "let batch_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n false,\n ),\n Field::new(\"item\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float32, false),\n]));\n\nlet batches = (0..5)\n .map(|i| {\n RecordBatch::try_new(\n batch_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![\n Some(vec![Some(3.1 + i as f32), Some(4.1), Some(5.1), Some(6.1)]),\n Some(vec![\n Some(5.9),\n Some(26.5 + i as f32),\n Some(4.7),\n Some(32.8),\n ]),\n ],\n 4,\n ),\n ),\n Arc::new(StringArray::from(vec![\n format!(\"item{}\", i * 2 + 1),\n format!(\"item{}\", i * 2 + 2),\n ])),\n Arc::new(Float32Array::from(vec![\n ((i * 2 + 1) * 10) as f32,\n ((i * 2 + 2) * 10) as f32,\n ])),\n ],\n )\n .unwrap()\n })\n .collect::>();\n\nlet batch_reader: Box =\n Box::new(RecordBatchIterator::new(batches.into_iter().map(Ok), batch_schema.clone()));\nlet batch_table = db\n .create_table(\"batched_table\", batch_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateTableFromIterator = "const batchSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 4,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"item\", new arrow.Utf8()),\n new arrow.Field(\"price\", new arrow.Float32()),\n]);\n\nconst tableForBatches = await db.createEmptyTable(\n \"batched_table\",\n batchSchema,\n {\n mode: \"overwrite\",\n },\n);\n\nconst rows = Array.from({ length: 10 }, (_, i) => ({\n vector: [i + 0.1, i + 0.2, i + 0.3, i + 0.4],\n item: `item-${i + 1}`,\n price: (i + 1) * 10,\n}));\n\nconst chunkSize = 2;\nfor (let i = 0; i < rows.length; i += chunkSize) {\n const batch = lancedb.makeArrowTable(rows.slice(i, i + chunkSize), {\n schema: batchSchema,\n });\n await tableForBatches.add(batch);\n}\n"; export const CreateTableFromIterator = "import pyarrow as pa\n\nschema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\"item\", pa.utf8()),\n pa.field(\"price\", pa.float32()),\n ]\n)\n\ndef make_batches():\n for i in range(5):\n yield pa.RecordBatch.from_arrays(\n [\n pa.array(\n [[3.1, 4.1, 5.1, 6.1], [5.9, 26.5, 4.7, 32.8]],\n pa.list_(pa.float32(), 4),\n ),\n pa.array([\"foo\", \"bar\"]),\n pa.array([10.0, 20.0]),\n ],\n [\"vector\", \"item\", \"price\"],\n )\n\ndb = tmp_db\ndb.create_table(\"batched_table\", make_batches(), schema=schema, mode=\"overwrite\")\n"; export const AddFromDataset = "import pyarrow.dataset as ds\n\ndataset = ds.dataset(data_path, format=\"parquet\")\ndb = tmp_db\ntable = db.create_table(\"my_table\", schema=dataset.schema, mode=\"overwrite\")\ntable.add(dataset)\n"; export const CreateTableNestedSchema = "from lancedb.pydantic import LanceModel, Vector\n\n# --8<-- [start:tables_document_model]\nfrom pydantic import BaseModel\n\nclass Document(BaseModel):\n content: str\n source: str\n\n# --8<-- [end:tables_document_model]\n\nclass NestedSchema(LanceModel):\n id: str\n vector: Vector(1536)\n document: Document\n\ndb = tmp_db\ntbl = db.create_table(\"nested_table\", schema=NestedSchema, mode=\"overwrite\")\n"; export const CreateTableFromPydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Content(LanceModel):\n movie_id: int\n vector: Vector(128)\n genres: str\n title: str\n imdb_id: int\n\n @property\n def imdb_url(self) -> str:\n return f\"https://www.imdb.com/title/tt{self.imdb_id}\"\n\ndb = tmp_db\ntbl = db.create_table(\"movielens_small\", schema=Content, mode=\"overwrite\")\n"; export const RsCreateTableFromArrow = "let arrow_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 16),\n false,\n ),\n Field::new(\"text\", DataType::Utf8, false),\n]));\n\nlet arrow_batch = RecordBatch::try_new(\n arrow_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![Some(vec![Some(0.1); 16]), Some(vec![Some(0.2); 16])],\n 16,\n ),\n ),\n Arc::new(StringArray::from(vec![\"foo\", \"bar\"])),\n ],\n)\n.unwrap();\nlet arrow_reader: Box =\n Box::new(RecordBatchIterator::new(vec![Ok(arrow_batch)].into_iter(), arrow_schema.clone()));\nlet arrow_table = db\n .create_table(\"arrow_table_example\", arrow_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateTableFromArrow = "const arrowSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 16,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"text\", new arrow.Utf8()),\n]);\nconst arrowData = lancedb.makeArrowTable(\n [\n { vector: Array(16).fill(0.1), text: \"foo\" },\n { vector: Array(16).fill(0.2), text: \"bar\" },\n ],\n { schema: arrowSchema },\n);\nconst arrowTable = await db.createTable(\"f32_tbl\", arrowData, {\n mode: \"overwrite\",\n});\n"; export const CreateTableFromArrow = "import numpy as np\nimport pyarrow as pa\n\ndim = 16\ntotal = 2\nschema = pa.schema(\n [pa.field(\"vector\", pa.list_(pa.float16(), dim)), pa.field(\"text\", pa.string())]\n)\ndata = pa.Table.from_arrays(\n [\n pa.array(\n [np.random.randn(dim).astype(np.float16) for _ in range(total)],\n pa.list_(pa.float16(), dim),\n ),\n pa.array([\"foo\", \"bar\"]),\n ],\n [\"vector\", \"text\"],\n)\ndb = tmp_db\ntbl = db.create_table(\"f16_tbl\", data, schema=schema, mode=\"overwrite\")\n"; export const CreateTableFromPolars = "import polars as pl\n\ndata = pl.DataFrame(\n {\n \"vector\": [[3.1, 4.1], [5.9, 26.5]],\n \"item\": [\"foo\", \"bar\"],\n \"price\": [10.0, 20.0],\n }\n)\ndb = tmp_db\ntbl = db.create_table(\"my_table_pl\", data, mode=\"overwrite\")\n"; export const RsCreateTableCustomSchema = "let custom_schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet custom_batch = RecordBatch::try_new(\n custom_schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![\n Some(vec![Some(1.1), Some(1.2), Some(1.3), Some(1.4)]),\n Some(vec![Some(0.2), Some(1.8), Some(0.4), Some(3.6)]),\n ],\n 4,\n ),\n ),\n Arc::new(Float32Array::from(vec![45.5, 40.1])),\n Arc::new(Float32Array::from(vec![-122.7, -74.1])),\n ],\n)\n.unwrap();\nlet custom_reader: Box =\n Box::new(RecordBatchIterator::new(vec![Ok(custom_batch)].into_iter(), custom_schema.clone()));\nlet custom_table = db\n .create_table(\"my_table_custom_schema\", custom_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateTableCustomSchema = "const customSchema = new arrow.Schema([\n new arrow.Field(\n \"vector\",\n new arrow.FixedSizeList(\n 4,\n new arrow.Field(\"item\", new arrow.Float32(), true),\n ),\n ),\n new arrow.Field(\"lat\", new arrow.Float32()),\n new arrow.Field(\"long\", new arrow.Float32()),\n]);\n\nconst customSchemaData = lancedb.makeArrowTable(\n [\n { vector: [1.1, 1.2, 1.3, 1.4], lat: 45.5, long: -122.7 },\n { vector: [0.2, 1.8, 0.4, 3.6], lat: 40.1, long: -74.1 },\n ],\n { schema: customSchema },\n);\nconst customSchemaTable = await db.createTable(\n \"my_table_custom_schema\",\n customSchemaData,\n { mode: \"overwrite\" },\n);\n"; export const CreateTableCustomSchema = "import pyarrow as pa\n\ncustom_schema = pa.schema(\n [\n pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n pa.field(\"lat\", pa.float32()),\n pa.field(\"long\", pa.float32()),\n ]\n)\n\ndata = [\n {\"vector\": [1.1, 1.2, 1.3, 1.4], \"lat\": 45.5, \"long\": -122.7},\n {\"vector\": [0.2, 1.8, 0.4, 3.6], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ntbl = db.create_table(\n \"my_table_custom_schema\", data, schema=custom_schema, mode=\"overwrite\"\n)\n"; export const CreateTableFromPandas = "import pandas as pd\n\ndata = pd.DataFrame(\n {\n \"vector\": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],\n \"lat\": [45.5, 40.1],\n \"long\": [-122.7, -74.1],\n }\n)\ndb = tmp_db\ndb.create_table(\"my_table_pandas\", data, mode=\"overwrite\")\ndb[\"my_table_pandas\"].head()\n"; export const RsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet _conflict_table = db\n .create_table(\"conflict_table\", exist_ok_reader)\n .mode(CreateTableMode::exist_ok(|req| req))\n .execute()\n .await\n .unwrap();\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nlet conflict_table = db\n .create_table(\"conflict_table\", overwrite_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet conflictTable = await db.createTable(\"conflict_table\", data, {\n existOk: true,\n});\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nconflictTable = await db.createTable(\"conflict_table\", data, {\n mode: \"overwrite\",\n});\n"; export const CreateTableConflictHandling = "# Idempotent open: reuse the existing table if it exists.\n# The provided data is ignored; the schema is validated against the\n# existing table and a mismatch raises an error.\ntbl = db.create_table(\"conflict_table\", data, exist_ok=True)\n\n# Overwrite: drop the existing table and create a new one with the\n# provided data. This permanently discards the old table's data.\ntbl = db.create_table(\"conflict_table\", data, mode=\"overwrite\")\n"; export const RsCreateTableFromDicts = "struct Location {\n vector: [f32; 2],\n lat: f32,\n long: f32,\n}\n\nlet data = vec![\n Location {\n vector: [1.1, 1.2],\n lat: 45.5,\n long: -122.7,\n },\n Location {\n vector: [0.2, 1.8],\n lat: 40.1,\n long: -74.1,\n },\n];\n\nlet schema = Arc::new(Schema::new(vec![\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n false,\n ),\n Field::new(\"lat\", DataType::Float32, false),\n Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet batch = RecordBatch::try_new(\n schema.clone(),\n vec![\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n data.iter()\n .map(|row| Some(row.vector.iter().copied().map(Some).collect::>())),\n 2,\n ),\n ),\n Arc::new(Float32Array::from_iter_values(\n data.iter().map(|row| row.lat),\n )),\n Arc::new(Float32Array::from_iter_values(\n data.iter().map(|row| row.long),\n )),\n ],\n)\n.unwrap();\nlet reader: Box =\n Box::new(RecordBatchIterator::new(vec![Ok(batch)].into_iter(), schema.clone()));\nlet table = db\n .create_table(\"test_table\", reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; export const TsCreateTableFromDicts = "type Location = {\n vector: number[];\n lat: number;\n long: number;\n};\n\nconst data: Location[] = [\n { vector: [1.1, 1.2], lat: 45.5, long: -122.7 },\n { vector: [0.2, 1.8], lat: 40.1, long: -74.1 },\n];\nconst table = await db.createTable(\"test_table\", data, {\n mode: \"overwrite\",\n});\n"; export const CreateTableFromDicts = "data = [\n {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\ntbl = db[\"test_table\"]\ntbl.head()\n"; export const RsConnect = "async fn connect_example(uri: &str) {\n let db = connect(uri).execute().await.unwrap();\n let _ = db;\n}\n"; export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n const db = await lancedb.connect(uri);\n return db;\n}\n"; In LanceDB, tables store records with a defined schema that specifies column names and types. Across the SDKs, you can create tables from row-oriented data and Apache Arrow data structures. The Python SDK additionally supports: * PyArrow schemas for explicit schema control * `LanceModel` for Pydantic-based validation ## Create a table with data Initialize a LanceDB connection and create a table {TablesBasicConnect} {TsConnect} {RsConnect} Depending on the SDK, LanceDB can ingest arrays of records, Arrow tables or record batches, and Arrow batch iterators or readers. Let's take a look at some of the common patterns. ### From list of objects You can provide a list of objects to create a table. The Python and TypeScript SDKs support lists/arrays of dictionaries, while the Rust SDK supports lists of structs. In Python, pass a list or other batch-like object; a single bare `dict` or single `LanceModel` is rejected. {CreateTableFromDicts} {TsCreateTableFromDicts} {RsCreateTableFromDicts} ### Handle existing tables By default, `create_table` raises an error if a table with the same name already exists. You can change this behavior with two parameters that resolve the conflict in different ways: * **Idempotent open**: return the existing table without modifying it. Use when your code may run more than once (notebooks, retries, init scripts) and you want to reuse the table on subsequent runs. The provided data is ignored, but the schema is validated against the existing table and a mismatch raises an error. * **Overwrite**: drop the existing table and create a new one with the provided data. Use this for test fixtures or when you intentionally want to replace prior contents. This permanently discards the old table's data. {CreateTableConflictHandling} {TsCreateTableConflictHandling} {RsCreateTableConflictHandling} `exist_ok` / `existOk` does not append the provided data to an existing table. Use [`table.add()`](/tables/update) for that. If you need to ensure a table exists *and* contains specific rows, prefer the [empty-table-then-add pattern](#create-empty-table). ### From a custom schema You can define a custom Arrow schema for the table. This is useful when you want to have more control over the column types and metadata. {CreateTableCustomSchema} {TsCreateTableCustomSchema} {RsCreateTableCustomSchema} An explicit schema is also where you control nullability. If later writes omit a non-nullable column, or provide actual nulls for it, ingestion fails; nullable columns can be omitted or written with null values. Without an explicit schema, Python infers list-like vector values as fixed-size `float32` vector fields from the observed dimension. ### From an Arrow Table You can also create LanceDB tables directly from Arrow tables. Rust uses an Arrow `RecordBatchReader` for the same Arrow-native ingest flow. {CreateTableFromArrow} {TsCreateTableFromArrow} {RsCreateTableFromArrow} ### From a Pandas DataFrame Python Only {CreateTableFromPandas} Data is converted to Arrow before being written to disk. For maximum control over how data is saved, either provide the PyArrow schema to convert to or else provide a PyArrow Table directly. The **`vector`** column needs to be a [Vector](/integrations/data/pydantic#vector-field) (defined as [pyarrow.FixedSizeList](https://arrow.apache.org/docs/python/generated/pyarrow.list_.html)) type. ### From a Polars DataFrame Python Only LanceDB supports [Polars](https://pola.rs/), a modern, fast DataFrame library written in Rust. Just like in Pandas, the Polars integration is enabled by PyArrow under the hood. A deeper integration between LanceDB Tables and Polars DataFrames is on the way. {CreateTableFromPolars} ### From Pydantic Models Python Only When you create an empty table without data, you must specify the table schema. LanceDB supports creating tables by specifying a PyArrow schema or a specialized Pydantic model called `LanceModel`. For example, the following Content model specifies a table with 5 columns: `movie_id`, `vector`, `genres`, `title`, and `imdb_id`. When you create a table, you can pass the class as the value of the `schema` parameter to `create_table`. The `vector` column is a `Vector` type, which is a specialized Pydantic type that can be configured with the vector dimensions. It is also important to note that LanceDB only understands subclasses of `lancedb.pydantic.LanceModel` (which itself derives from `pydantic.BaseModel`). {CreateTableFromPydantic} #### Nested schemas Sometimes your data model may contain nested objects. For example, you may want to store the document string and the document source name as a nested Document object: {TablesDocumentModel} This can be used as the type of a LanceDB table column: {CreateTableNestedSchema} This creates a struct column called "document" that has two subfields called "content" and "source": ```bash theme={"theme":{"light":"vitesse-light","dark":"catppuccin-mocha"}} In [28]: tbl.schema Out[28]: id: string not null vector: fixed_size_list[1536] not null child 0, item: float document: struct not null child 0, content: string not null child 1, source: string not null ``` #### Validators Because `LanceModel` inherits from Pydantic's `BaseModel`, you can combine them with Pydantic's [field validators](https://docs.pydantic.dev/latest/concepts/validators). The example below shows how to add a validator to ensure that only valid timezone-aware datetime objects are used for a `created_at` field. {TablesTzValidator} When you run this code it, should raise the `ValidationError`. ### Loading Large Datasets When ingesting large datasets, use `table.add()` on an existing table rather than passing all data to `create_table()`. The `add()` method auto-parallelizes large writes, while `create_table(name, data)` does not. For best performance with large datasets, create an empty table first and then call `table.add()`. This enables automatic write parallelism for materialized data sources. #### From files (Parquet, CSV, etc.) Python Only For file-based data, pass a `pyarrow.dataset.Dataset` to `table.add()`. This streams data from disk without loading the entire dataset into memory. {AddFromDataset} `pa.dataset()` input is currently Python-only. TypeScript and Rust support for file-based dataset ingestion is tracked in [lancedb#3173](https://github.com/lancedb/lancedb/issues/3173). #### From iterators (custom batch generation) When you need custom batch logic — generating embeddings on the fly, transforming rows from an external source, etc. — use an iterator of `RecordBatch` objects. {CreateTableFromIterator} {TsCreateTableFromIterator} {RsCreateTableFromIterator} Use this pattern when: * Your source data already arrives in Arrow batches, readers, datasets, or streams. * Materializing the entire ingest as one giant in-memory list or array would be too expensive. * You want to control chunk size explicitly during ingestion. Python can also consume iterators of other supported types like Pandas DataFrames or Python lists. #### Write parallelism For materialized data (`pa.Table`, `pd.DataFrame`, `pa.dataset()`), LanceDB automatically parallelizes large writes — no configuration needed. Auto-parallelism targets approximately 1M rows or 2GB per write partition. For streaming sources (iterators, `RecordBatchReader`), LanceDB cannot determine total size upfront. A `parallelism` parameter to control this manually is planned but not yet exposed in Python or TypeScript ([tracking issue](https://github.com/lancedb/lancedb/issues/3173)). #### Tracking ingestion progress TypeScript Only For long-running writes, pass a `progress` callback to `table.add()` to surface per-batch progress in your UI, logs, or metrics pipeline. The callback fires once per batch written and once more with `done: true` when the write completes. Each invocation receives a `WriteProgress` object: | Field | Description | | :--------------- | :--------------------------------------------------------------------------------------- | | `outputRows` | Rows written so far. | | `outputBytes` | Bytes written so far. | | `totalRows` | Expected total rows when the input source reports one. Always set on the final callback. | | `elapsedSeconds` | Wall-clock seconds since the write started. | | `activeTasks` | Parallel write tasks currently in flight. | | `totalTasks` | Total parallel write tasks (the write parallelism). | | `done` | `true` only for the final callback. | {TsAddProgress} A few things to know before you wire this up: * Back-pressures the writer: callback invocations are serialized and run inline with each batch, so a slow callback will slow the write rather than drop updates. Every batch update is delivered, and the final `done: true` callback always fires (even on error or cancellation). Keep the callback cheap — offload heavy work to a queue you drain elsewhere. * Errors swallowed: anything your callback throws is logged with `console.warn` and won't abort the write, so keep the callback side-effect-only and don't rely on it for control flow. * Row totals: `totalRows` is only populated when the input source can report it up front (for example, a materialized `arrow.Table`). For streaming sources it stays `undefined` until the final callback, where it falls back to the actual rows written. ## Create empty table You can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches. An empty table can be initialized via an Arrow schema. {CreateEmptyTable} {TsCreateEmptyTable} {RsCreateEmptyTable} Alternatively, you can also use Pydantic to specify the schema for the empty table. Note that we do not directly import `pydantic` but instead use `lancedb.pydantic` which is a subclass of `pydantic.BaseModel` that has been extended to support LanceDB specific types like `Vector`. {CreateEmptyTablePydantic} Once the empty table has been created, you can append to it or modify its contents, as explained in the [updating and modifying tables](/tables/update) section. ## Open an existing table You can open an existing table by specifying the name of the table to the `open_table` / `openTable` method. If you forget the name of your table, you can always get a listing of all table names. {OpenExistingTable} {TsOpenExistingTable} {RsOpenExistingTable} ## Drop a table Use the `drop_table()` method on the database to remove a table. {DropTable} {TsDropTable} {RsDropTable} This permanently removes the table and is not recoverable, unlike deleting rows. By default, if the table does not exist an exception is raised. To suppress this, you can pass in `ignore_missing=True`.