> ## Documentation Index
> Fetch the complete documentation index at: https://docs.lancedb.com/llms.txt
> Use this file to discover all available pages before exploring further.

# Ingesting Data

> Learn about different methods to ingest data into tables in LanceDB, including from various data sources and empty tables.

export const TablesTzValidator = "from datetime import datetime\nfrom zoneinfo import ZoneInfo\n\nfrom lancedb.pydantic import LanceModel\nfrom pydantic import Field, ValidationError, ValidationInfo, field_validator\n\ntzname = \"America/New_York\"\ntz = ZoneInfo(tzname)\n\nclass TestModel(LanceModel):\n    dt_with_tz: datetime = Field(json_schema_extra={\"tz\": tzname})\n\n    @field_validator(\"dt_with_tz\")\n    @classmethod\n    def tz_must_match(cls, dt: datetime) -> datetime:\n        assert dt.tzinfo == tz\n        return dt\n\nok = TestModel(dt_with_tz=datetime.now(tz))\n\ntry:\n    TestModel(dt_with_tz=datetime.now(ZoneInfo(\"Asia/Shanghai\")))\n    assert 0 == 1, \"this should raise ValidationError\"\nexcept ValidationError:\n    print(\"A ValidationError was raised.\")\n    pass\n";

export const TablesDocumentModel = "from pydantic import BaseModel\n\nclass Document(BaseModel):\n    content: str\n    source: str\n";

export const TablesBasicConnect = "import lancedb\n\nuri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n";

export const RsDropTable = "let drop_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n        false,\n    ),\n    Field::new(\"lat\", DataType::Float32, false),\n]));\nlet drop_batch = RecordBatch::try_new(\n    drop_schema.clone(),\n    vec![\n        Arc::new(\n            FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                vec![Some(vec![Some(1.1), Some(1.2)])],\n                2,\n            ),\n        ),\n        Arc::new(Float32Array::from(vec![45.5])),\n    ],\n)\n.unwrap();\nlet drop_reader =\n    RecordBatchIterator::new(vec![Ok(drop_batch)].into_iter(), drop_schema.clone());\ndb.create_table(\"my_table\", drop_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n\ndb.drop_table(\"my_table\", &[]).await.unwrap();\n";

export const TsDropTable = "await db.createTable(\"my_table\", [{ vector: [1.1, 1.2], lat: 45.5 }], {\n  mode: \"overwrite\",\n});\n\nawait db.dropTable(\"my_table\");\n";

export const DropTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5}]\ndb.create_table(\"my_table\", data, mode=\"overwrite\")\n\n# Drop the table\ndb.drop_table(\"my_table\")\n";

export const CreateEmptyTablePydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Item(LanceModel):\n    vector: Vector(2)\n    item: str\n    price: float\n\ndb = tmp_db\ntbl = db.create_table(\n    \"test_empty_table_new\", schema=Item.to_arrow_schema(), mode=\"overwrite\"\n)\n";

export const RsCreateEmptyTable = "let empty_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n        false,\n    ),\n    Field::new(\"item\", DataType::Utf8, false),\n    Field::new(\"price\", DataType::Float32, false),\n]));\nlet empty_table = db\n    .create_empty_table(\"test_empty_table\", empty_schema)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateEmptyTable = "const emptySchema = new arrow.Schema([\n  new arrow.Field(\n    \"vector\",\n    new arrow.FixedSizeList(\n      2,\n      new arrow.Field(\"item\", new arrow.Float32(), true),\n    ),\n  ),\n  new arrow.Field(\"item\", new arrow.Utf8()),\n  new arrow.Field(\"price\", new arrow.Float32()),\n]);\nconst emptyTable = await db.createEmptyTable(\n  \"test_empty_table\",\n  emptySchema,\n  {\n    mode: \"overwrite\",\n  },\n);\n";

export const CreateEmptyTable = "import pyarrow as pa\n\nschema = pa.schema(\n    [\n        pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n        pa.field(\"item\", pa.string()),\n        pa.field(\"price\", pa.float32()),\n    ]\n)\ndb = tmp_db\ntbl = db.create_table(\"test_empty_table\", schema=schema, mode=\"overwrite\")\n";

export const RsOpenExistingTable = "let open_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n        false,\n    ),\n    Field::new(\"lat\", DataType::Float32, false),\n    Field::new(\"long\", DataType::Float32, false),\n]));\nlet open_batch = RecordBatch::try_new(\n    open_schema.clone(),\n    vec![\n        Arc::new(\n            FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                vec![Some(vec![Some(1.1), Some(1.2)])],\n                2,\n            ),\n        ),\n        Arc::new(Float32Array::from(vec![45.5])),\n        Arc::new(Float32Array::from(vec![-122.7])),\n    ],\n)\n.unwrap();\nlet open_reader =\n    RecordBatchIterator::new(vec![Ok(open_batch)].into_iter(), open_schema.clone());\ndb.create_table(\"test_table\", open_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n\nprintln!(\"{:?}\", db.table_names().execute().await.unwrap());\n\nlet opened_table = db.open_table(\"test_table\").execute().await.unwrap();\n";

export const TsOpenExistingTable = "const openTableData = [{ vector: [1.1, 1.2], lat: 45.5, long: -122.7 }];\nawait db.createTable(\"test_table_open\", openTableData, {\n  mode: \"overwrite\",\n});\n\nconsole.log(await db.tableNames());\n\nconst openedTable = await db.openTable(\"test_table_open\");\n";

export const OpenExistingTable = "db = tmp_db\n# Create a table first\ndata = [{\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7}]\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\n\n# List table names\nprint(db.list_tables().tables)\n\n# Open existing table\ntbl = db.open_table(\"test_table\")\n";

export const RsCreateTableFromIterator = "let batch_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n        false,\n    ),\n    Field::new(\"item\", DataType::Utf8, false),\n    Field::new(\"price\", DataType::Float32, false),\n]));\n\nlet batches = (0..5)\n    .map(|i| {\n        RecordBatch::try_new(\n            batch_schema.clone(),\n            vec![\n                Arc::new(\n                    FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                        vec![\n                            Some(vec![Some(3.1 + i as f32), Some(4.1), Some(5.1), Some(6.1)]),\n                            Some(vec![\n                                Some(5.9),\n                                Some(26.5 + i as f32),\n                                Some(4.7),\n                                Some(32.8),\n                            ]),\n                        ],\n                        4,\n                    ),\n                ),\n                Arc::new(StringArray::from(vec![\n                    format!(\"item{}\", i * 2 + 1),\n                    format!(\"item{}\", i * 2 + 2),\n                ])),\n                Arc::new(Float32Array::from(vec![\n                    ((i * 2 + 1) * 10) as f32,\n                    ((i * 2 + 2) * 10) as f32,\n                ])),\n            ],\n        )\n        .unwrap()\n    })\n    .collect::<Vec<_>>();\n\nlet batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), batch_schema.clone());\nlet batch_table = db\n    .create_table(\"batched_table\", batch_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateTableFromIterator = "const batchSchema = new arrow.Schema([\n  new arrow.Field(\n    \"vector\",\n    new arrow.FixedSizeList(\n      4,\n      new arrow.Field(\"item\", new arrow.Float32(), true),\n    ),\n  ),\n  new arrow.Field(\"item\", new arrow.Utf8()),\n  new arrow.Field(\"price\", new arrow.Float32()),\n]);\n\nconst tableForBatches = await db.createEmptyTable(\n  \"batched_table\",\n  batchSchema,\n  {\n    mode: \"overwrite\",\n  },\n);\n\nconst rows = Array.from({ length: 10 }, (_, i) => ({\n  vector: [i + 0.1, i + 0.2, i + 0.3, i + 0.4],\n  item: `item-${i + 1}`,\n  price: (i + 1) * 10,\n}));\n\nconst chunkSize = 2;\nfor (let i = 0; i < rows.length; i += chunkSize) {\n  const batch = lancedb.makeArrowTable(rows.slice(i, i + chunkSize), {\n    schema: batchSchema,\n  });\n  await tableForBatches.add(batch);\n}\n";

export const CreateTableFromIterator = "import pyarrow as pa\n\nschema = pa.schema(\n    [\n        pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n        pa.field(\"item\", pa.utf8()),\n        pa.field(\"price\", pa.float32()),\n    ]\n)\n\ndef make_batches():\n    for i in range(5):\n        yield pa.RecordBatch.from_arrays(\n            [\n                pa.array(\n                    [[3.1, 4.1, 5.1, 6.1], [5.9, 26.5, 4.7, 32.8]],\n                    pa.list_(pa.float32(), 4),\n                ),\n                pa.array([\"foo\", \"bar\"]),\n                pa.array([10.0, 20.0]),\n            ],\n            [\"vector\", \"item\", \"price\"],\n        )\n\ndb = tmp_db\ndb.create_table(\"batched_table\", make_batches(), schema=schema, mode=\"overwrite\")\n";

export const AddFromDataset = "import pyarrow.dataset as ds\n\ndataset = ds.dataset(data_path, format=\"parquet\")\ndb = tmp_db\ntable = db.create_table(\"my_table\", schema=dataset.schema, mode=\"overwrite\")\ntable.add(dataset)\n";

export const CreateTableNestedSchema = "from lancedb.pydantic import LanceModel, Vector\n\n# --8<-- [start:tables_document_model]\nfrom pydantic import BaseModel\n\nclass Document(BaseModel):\n    content: str\n    source: str\n\n# --8<-- [end:tables_document_model]\n\nclass NestedSchema(LanceModel):\n    id: str\n    vector: Vector(1536)\n    document: Document\n\ndb = tmp_db\ntbl = db.create_table(\"nested_table\", schema=NestedSchema, mode=\"overwrite\")\n";

export const CreateTableFromPydantic = "from lancedb.pydantic import LanceModel, Vector\n\nclass Content(LanceModel):\n    movie_id: int\n    vector: Vector(128)\n    genres: str\n    title: str\n    imdb_id: int\n\n    @property\n    def imdb_url(self) -> str:\n        return f\"https://www.imdb.com/title/tt{self.imdb_id}\"\n\ndb = tmp_db\ntbl = db.create_table(\"movielens_small\", schema=Content, mode=\"overwrite\")\n";

export const RsCreateTableFromArrow = "let arrow_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 16),\n        false,\n    ),\n    Field::new(\"text\", DataType::Utf8, false),\n]));\n\nlet arrow_batch = RecordBatch::try_new(\n    arrow_schema.clone(),\n    vec![\n        Arc::new(\n            FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                vec![Some(vec![Some(0.1); 16]), Some(vec![Some(0.2); 16])],\n                16,\n            ),\n        ),\n        Arc::new(StringArray::from(vec![\"foo\", \"bar\"])),\n    ],\n)\n.unwrap();\nlet arrow_reader =\n    RecordBatchIterator::new(vec![Ok(arrow_batch)].into_iter(), arrow_schema.clone());\nlet arrow_table = db\n    .create_table(\"arrow_table_example\", arrow_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateTableFromArrow = "const arrowSchema = new arrow.Schema([\n  new arrow.Field(\n    \"vector\",\n    new arrow.FixedSizeList(\n      16,\n      new arrow.Field(\"item\", new arrow.Float32(), true),\n    ),\n  ),\n  new arrow.Field(\"text\", new arrow.Utf8()),\n]);\nconst arrowData = lancedb.makeArrowTable(\n  [\n    { vector: Array(16).fill(0.1), text: \"foo\" },\n    { vector: Array(16).fill(0.2), text: \"bar\" },\n  ],\n  { schema: arrowSchema },\n);\nconst arrowTable = await db.createTable(\"f32_tbl\", arrowData, {\n  mode: \"overwrite\",\n});\n";

export const CreateTableFromArrow = "import numpy as np\nimport pyarrow as pa\n\ndim = 16\ntotal = 2\nschema = pa.schema(\n    [pa.field(\"vector\", pa.list_(pa.float16(), dim)), pa.field(\"text\", pa.string())]\n)\ndata = pa.Table.from_arrays(\n    [\n        pa.array(\n            [np.random.randn(dim).astype(np.float16) for _ in range(total)],\n            pa.list_(pa.float16(), dim),\n        ),\n        pa.array([\"foo\", \"bar\"]),\n    ],\n    [\"vector\", \"text\"],\n)\ndb = tmp_db\ntbl = db.create_table(\"f16_tbl\", data, schema=schema, mode=\"overwrite\")\n";

export const CreateTableFromPolars = "import polars as pl\n\ndata = pl.DataFrame(\n    {\n        \"vector\": [[3.1, 4.1], [5.9, 26.5]],\n        \"item\": [\"foo\", \"bar\"],\n        \"price\": [10.0, 20.0],\n    }\n)\ndb = tmp_db\ntbl = db.create_table(\"my_table_pl\", data, mode=\"overwrite\")\n";

export const RsCreateTableCustomSchema = "let custom_schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 4),\n        false,\n    ),\n    Field::new(\"lat\", DataType::Float32, false),\n    Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet custom_batch = RecordBatch::try_new(\n    custom_schema.clone(),\n    vec![\n        Arc::new(\n            FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                vec![\n                    Some(vec![Some(1.1), Some(1.2), Some(1.3), Some(1.4)]),\n                    Some(vec![Some(0.2), Some(1.8), Some(0.4), Some(3.6)]),\n                ],\n                4,\n            ),\n        ),\n        Arc::new(Float32Array::from(vec![45.5, 40.1])),\n        Arc::new(Float32Array::from(vec![-122.7, -74.1])),\n    ],\n)\n.unwrap();\nlet custom_reader =\n    RecordBatchIterator::new(vec![Ok(custom_batch)].into_iter(), custom_schema.clone());\nlet custom_table = db\n    .create_table(\"my_table_custom_schema\", custom_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateTableCustomSchema = "const customSchema = new arrow.Schema([\n  new arrow.Field(\n    \"vector\",\n    new arrow.FixedSizeList(\n      4,\n      new arrow.Field(\"item\", new arrow.Float32(), true),\n    ),\n  ),\n  new arrow.Field(\"lat\", new arrow.Float32()),\n  new arrow.Field(\"long\", new arrow.Float32()),\n]);\n\nconst customSchemaData = lancedb.makeArrowTable(\n  [\n    { vector: [1.1, 1.2, 1.3, 1.4], lat: 45.5, long: -122.7 },\n    { vector: [0.2, 1.8, 0.4, 3.6], lat: 40.1, long: -74.1 },\n  ],\n  { schema: customSchema },\n);\nconst customSchemaTable = await db.createTable(\n  \"my_table_custom_schema\",\n  customSchemaData,\n  { mode: \"overwrite\" },\n);\n";

export const CreateTableCustomSchema = "import pyarrow as pa\n\ncustom_schema = pa.schema(\n    [\n        pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n        pa.field(\"lat\", pa.float32()),\n        pa.field(\"long\", pa.float32()),\n    ]\n)\n\ndata = [\n    {\"vector\": [1.1, 1.2, 1.3, 1.4], \"lat\": 45.5, \"long\": -122.7},\n    {\"vector\": [0.2, 1.8, 0.4, 3.6], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ntbl = db.create_table(\n    \"my_table_custom_schema\", data, schema=custom_schema, mode=\"overwrite\"\n)\n";

export const CreateTableFromPandas = "import pandas as pd\n\ndata = pd.DataFrame(\n    {\n        \"vector\": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],\n        \"lat\": [45.5, 40.1],\n        \"long\": [-122.7, -74.1],\n    }\n)\ndb = tmp_db\ndb.create_table(\"my_table_pandas\", data, mode=\"overwrite\")\ndb[\"my_table_pandas\"].head()\n";

export const RsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet _conflict_table = db\n    .create_table(\"conflict_table\", exist_ok_reader)\n    .mode(CreateTableMode::exist_ok(|req| req))\n    .execute()\n    .await\n    .unwrap();\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nlet conflict_table = db\n    .create_table(\"conflict_table\", overwrite_reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateTableConflictHandling = "// Idempotent open: reuse the existing table if it exists.\n// The provided data is ignored; the schema is validated against the\n// existing table and a mismatch raises an error.\nlet conflictTable = await db.createTable(\"conflict_table\", data, {\n  existOk: true,\n});\n\n// Overwrite: drop the existing table and create a new one with the\n// provided data. This permanently discards the old table's data.\nconflictTable = await db.createTable(\"conflict_table\", data, {\n  mode: \"overwrite\",\n});\n";

export const CreateTableConflictHandling = "# Idempotent open: reuse the existing table if it exists.\n# The provided data is ignored; the schema is validated against the\n# existing table and a mismatch raises an error.\ntbl = db.create_table(\"conflict_table\", data, exist_ok=True)\n\n# Overwrite: drop the existing table and create a new one with the\n# provided data. This permanently discards the old table's data.\ntbl = db.create_table(\"conflict_table\", data, mode=\"overwrite\")\n";

export const RsCreateTableFromDicts = "struct Location {\n    vector: [f32; 2],\n    lat: f32,\n    long: f32,\n}\n\nlet data = vec![\n    Location {\n        vector: [1.1, 1.2],\n        lat: 45.5,\n        long: -122.7,\n    },\n    Location {\n        vector: [0.2, 1.8],\n        lat: 40.1,\n        long: -74.1,\n    },\n];\n\nlet schema = Arc::new(Schema::new(vec![\n    Field::new(\n        \"vector\",\n        DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 2),\n        false,\n    ),\n    Field::new(\"lat\", DataType::Float32, false),\n    Field::new(\"long\", DataType::Float32, false),\n]));\n\nlet batch = RecordBatch::try_new(\n    schema.clone(),\n    vec![\n        Arc::new(\n            FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n                data.iter()\n                    .map(|row| Some(row.vector.iter().copied().map(Some).collect::<Vec<_>>())),\n                2,\n            ),\n        ),\n        Arc::new(Float32Array::from_iter_values(\n            data.iter().map(|row| row.lat),\n        )),\n        Arc::new(Float32Array::from_iter_values(\n            data.iter().map(|row| row.long),\n        )),\n    ],\n)\n.unwrap();\nlet reader = RecordBatchIterator::new(vec![Ok(batch)].into_iter(), schema.clone());\nlet table = db\n    .create_table(\"test_table\", reader)\n    .mode(CreateTableMode::Overwrite)\n    .execute()\n    .await\n    .unwrap();\n";

export const TsCreateTableFromDicts = "type Location = {\n  vector: number[];\n  lat: number;\n  long: number;\n};\n\nconst data: Location[] = [\n  { vector: [1.1, 1.2], lat: 45.5, long: -122.7 },\n  { vector: [0.2, 1.8], lat: 40.1, long: -74.1 },\n];\nconst table = await db.createTable(\"test_table\", data, {\n  mode: \"overwrite\",\n});\n";

export const CreateTableFromDicts = "data = [\n    {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n    {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n]\ndb = tmp_db\ndb.create_table(\"test_table\", data, mode=\"overwrite\")\ntbl = db[\"test_table\"]\ntbl.head()\n";

export const RsConnect = "async fn connect_example(uri: &str) {\n    let db = connect(uri).execute().await.unwrap();\n    let _ = db;\n}\n";

export const TsConnect = "import * as lancedb from \"@lancedb/lancedb\";\n\nasync function connectExample(uri: string) {\n  const db = await lancedb.connect(uri);\n  return db;\n}\n";

In LanceDB, tables store records with a defined schema that specifies column names and types. Across the SDKs, you can create tables from row-oriented data and Apache Arrow data structures. The Python SDK additionally supports:

* PyArrow schemas for explicit schema control
* `LanceModel` for Pydantic-based validation

## Create a table with data

Initialize a LanceDB connection and create a table

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {TablesBasicConnect}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsConnect}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsConnect}
  </CodeBlock>
</CodeGroup>

Depending on the SDK, LanceDB can ingest arrays of records, Arrow tables or record batches, and Arrow batch iterators or readers. Let's take a look at some of the common patterns.

### From list of objects

You can provide a list of objects to create a table. The Python and TypeScript SDKs
support lists/arrays of dictionaries, while the Rust SDK supports lists of structs.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromDicts}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateTableFromDicts}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateTableFromDicts}
  </CodeBlock>
</CodeGroup>

### Handle existing tables

By default, `create_table` raises an error if a table with the same name already exists.
You can change this behavior with two parameters that resolve the conflict in different ways:

* **Idempotent open**: return the existing table without modifying it. Use when your
  code may run more than once (notebooks, retries, init scripts) and you want to reuse
  the table on subsequent runs. The provided data is ignored, but the schema is
  validated against the existing table and a mismatch raises an error.
* **Overwrite**: drop the existing table and create a new one with the provided data.
  Use this for test fixtures or when you intentionally want to replace prior contents.
  This permanently discards the old table's data.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableConflictHandling}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateTableConflictHandling}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateTableConflictHandling}
  </CodeBlock>
</CodeGroup>

<Note>
  `exist_ok` / `existOk` does not append the provided data to an existing table. Use
  [`table.add()`](/tables/update) for that. If you need to ensure a table exists *and*
  contains specific rows, prefer the [empty-table-then-add pattern](#create-empty-table).
</Note>

### From a custom schema

You can define a custom Arrow schema for the table. This is useful when you want to have more control over the column types and metadata.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableCustomSchema}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateTableCustomSchema}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateTableCustomSchema}
  </CodeBlock>
</CodeGroup>

### From an Arrow Table

You can also create LanceDB tables directly from Arrow tables.
Rust uses an Arrow `RecordBatchReader` for the same Arrow-native ingest flow.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromArrow}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateTableFromArrow}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateTableFromArrow}
  </CodeBlock>
</CodeGroup>

### From a Pandas DataFrame

<Badge color="green">Python Only</Badge>

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromPandas}
  </CodeBlock>
</CodeGroup>

<Note title="Note">
  Data is converted to Arrow before being written to disk. For maximum control over how data is saved, either provide the PyArrow schema to convert to or else provide a PyArrow Table directly.
</Note>

<Note title="Vector Column Type">
  The **`vector`** column needs to be a [Vector](/integrations/data/pydantic#vector-field) (defined as [pyarrow.FixedSizeList](https://arrow.apache.org/docs/python/generated/pyarrow.list_.html)) type.
</Note>

### From a Polars DataFrame

<Badge color="green">Python Only</Badge>

LanceDB supports [Polars](https://pola.rs/), a modern, fast DataFrame library
written in Rust. Just like in Pandas, the Polars integration is enabled by PyArrow
under the hood. A deeper integration between LanceDB Tables and Polars DataFrames
is on the way.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromPolars}
  </CodeBlock>
</CodeGroup>

### From Pydantic Models

<Badge color="green">Python Only</Badge>

When you create an empty table without data, you must specify the table schema.
LanceDB supports creating tables by specifying a PyArrow schema or a specialized
Pydantic model called `LanceModel`.

For example, the following Content model specifies a table with 5 columns:
`movie_id`, `vector`, `genres`, `title`, and `imdb_id`. When you create a table, you can
pass the class as the value of the `schema` parameter to `create_table`.
The `vector` column is a `Vector` type, which is a specialized Pydantic type that
can be configured with the vector dimensions. It is also important to note that
LanceDB only understands subclasses of `lancedb.pydantic.LanceModel`
(which itself derives from `pydantic.BaseModel`).

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromPydantic}
  </CodeBlock>
</CodeGroup>

#### Nested schemas

Sometimes your data model may contain nested objects. For example, you may want to store the document string and the document source name as a nested Document object:

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {TablesDocumentModel}
  </CodeBlock>
</CodeGroup>

This can be used as the type of a LanceDB table column:

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableNestedSchema}
  </CodeBlock>
</CodeGroup>

This creates a struct column called "document" that has two subfields
called "content" and "source":

```bash theme={"theme":{"light":"vitesse-light","dark":"catppuccin-mocha"}}
In [28]: tbl.schema
Out[28]:
id: string not null
vector: fixed_size_list<item: float>[1536] not null
    child 0, item: float
document: struct<content: string not null, source: string not null> not null
    child 0, content: string not null
    child 1, source: string not null
```

#### Validators

Because `LanceModel` inherits from Pydantic's `BaseModel`, you can combine them with Pydantic's
[field validators](https://docs.pydantic.dev/latest/concepts/validators). The example
below shows how to add a validator to ensure that only valid timezone-aware datetime objects are used
for a `created_at` field.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {TablesTzValidator}
  </CodeBlock>
</CodeGroup>

When you run this code it, should raise the `ValidationError`.

### Loading Large Datasets

When ingesting large datasets, use `table.add()` on an existing table rather than
passing all data to `create_table()`. The `add()` method auto-parallelizes large
writes, while `create_table(name, data)` does not.

<Tip>
  For best performance with large datasets, create an empty table first and then call
  `table.add()`. This enables automatic write parallelism for materialized data sources.
</Tip>

#### From files (Parquet, CSV, etc.)

<Badge color="green">Python Only</Badge>

For file-based data, pass a `pyarrow.dataset.Dataset` to `table.add()`. This streams
data from disk without loading the entire dataset into memory.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {AddFromDataset}
  </CodeBlock>
</CodeGroup>

<Note>
  `pa.dataset()` input is currently Python-only. TypeScript and Rust support for
  file-based dataset ingestion is tracked in
  [lancedb#3173](https://github.com/lancedb/lancedb/issues/3173).
</Note>

#### From iterators (custom batch generation)

When you need custom batch logic — generating embeddings on the fly, transforming
rows from an external source, etc. — use an iterator of `RecordBatch` objects.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateTableFromIterator}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateTableFromIterator}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateTableFromIterator}
  </CodeBlock>
</CodeGroup>

Use this pattern when:

* Your source data already arrives in Arrow batches, readers, datasets, or streams.
* Materializing the entire ingest as one giant in-memory list or array would be too expensive.
* You want to control chunk size explicitly during ingestion.

Python can also consume iterators of other supported types like Pandas DataFrames or Python lists.

#### Write parallelism

<Note title="Automatic parallelism">
  For materialized data (`pa.Table`, `pd.DataFrame`, `pa.dataset()`), LanceDB
  automatically parallelizes large writes — no configuration needed. Auto-parallelism
  targets approximately 1M rows or 2GB per write partition.

  For streaming sources (iterators, `RecordBatchReader`), LanceDB cannot determine
  total size upfront. A `parallelism` parameter to control this manually is planned
  but not yet exposed in Python or TypeScript
  ([tracking issue](https://github.com/lancedb/lancedb/issues/3173)).
</Note>

## Create empty table

You can create an empty table for scenarios where you want to add data to the table later.
An example would be when you want to collect data from a stream/external file and then add it to a table in
batches.

An empty table can be initialized via an Arrow schema.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateEmptyTable}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsCreateEmptyTable}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsCreateEmptyTable}
  </CodeBlock>
</CodeGroup>

Alternatively, you can also use Pydantic to specify the schema for the empty table. Note that we do not
directly import `pydantic` but instead use `lancedb.pydantic` which is a subclass of `pydantic.BaseModel`
that has been extended to support LanceDB specific types like `Vector`.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {CreateEmptyTablePydantic}
  </CodeBlock>
</CodeGroup>

Once the empty table has been created, you can append to it or modify its contents,
as explained in the [updating and modifying tables](/tables/update) section.

## Open an existing table

You can open an existing table by specifying the name of the table to the `open_table` / `openTable` method.
If you forget the name of your table, you can always get a listing of all table names.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {OpenExistingTable}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsOpenExistingTable}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsOpenExistingTable}
  </CodeBlock>
</CodeGroup>

## Drop a table

Use the `drop_table()` method on the database to remove a table.

<CodeGroup>
  <CodeBlock filename="Python" language="Python" icon="python">
    {DropTable}
  </CodeBlock>

  <CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
    {TsDropTable}
  </CodeBlock>

  <CodeBlock filename="Rust" language="Rust" icon="rust">
    {RsDropTable}
  </CodeBlock>
</CodeGroup>

This permanently removes the table and is not recoverable, unlike deleting rows.
By default, if the table does not exist an exception is raised. To suppress this,
you can pass in `ignore_missing=True`.
