From 1fabd1eca374ab2518e3cf2826aa3c2a6ed9873b Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Fri, 19 Jun 2026 10:57:35 -0600
Subject: [PATCH 1/3] migrate to polars

---
 README.md                     |  47 ++++
 pyproject.toml                |  27 +-
 src/cytodataframe/__init__.py |  10 +
 src/cytodataframe/engine.py   | 216 ++++++++++++++++
 src/cytodataframe/frame.py    | 124 ++++++++-
 src/cytodataframe/lazy.py     | 271 ++++++++++++++++++++
 src/cytodataframe/schema.py   | 456 ++++++++++++++++++++++++++++++++++
 tests/test_engine.py          | 154 ++++++++++++
 tests/test_lazy.py            | 164 ++++++++++++
 tests/test_schema.py          | 199 +++++++++++++++
 uv.lock                       | 251 ++++++-------------
 11 files changed, 1739 insertions(+), 180 deletions(-)
 create mode 100644 src/cytodataframe/engine.py
 create mode 100644 src/cytodataframe/lazy.py
 create mode 100644 src/cytodataframe/schema.py
 create mode 100644 tests/test_engine.py
 create mode 100644 tests/test_lazy.py
 create mode 100644 tests/test_schema.py

diff --git a/README.md b/README.md
index 3c97656..62a1c50 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,39 @@ With CytoDataFrame you can:
 - Highlight image objects using mask or outline files to understand their segmentation.
 - Adjust image displays on-the-fly using interactive slider widgets.
 - Automatically detect 3D image volumes and render interactive [trame](https://github.com/Kitware/trame) views in notebooks when 3D dependencies are installed (with graceful fallback otherwise).
+- Interoperate with the [Polars](https://pola.rs/) and [Apache Arrow](https://arrow.apache.org/) ecosystems while keeping the familiar Pandas-based experience.
+
+### Polars and Arrow interoperability
+
+CytoDataFrame uses Apache Arrow as its canonical schema/interchange contract and
+Polars as an execution engine, while Pandas remains the compatibility layer. You
+can move between representations and run lazy, scalable queries without leaving
+the CytoDataFrame API:
+
+```python
+import polars as pl
+from cytodataframe import CytoDataFrame
+
+# Construct from pandas, polars (DataFrame or LazyFrame), or a pyarrow Table.
+cdf = CytoDataFrame("profiles.parquet")
+
+# Convert out to any representation (Pandas stays a boundary layer).
+cdf.to_pandas()   # pandas.DataFrame
+cdf.to_polars()   # polars.DataFrame
+cdf.to_arrow()    # pyarrow.Table
+cdf.to_lazy()     # CytoLazyFrame (lazy, Polars-backed)
+
+# Inspect the inferred schema (metadata / feature / geometry roles).
+cdf.cyto_schema
+
+# Lazily scan large Parquet datasets with predicate/projection pushdown.
+result = (
+    CytoDataFrame.scan_parquet("profiles.parquet")
+    .filter(pl.col("Metadata_Well") == "A01")
+    .select_features()
+    .collect()  # -> CytoDataFrame
+)
+```
 
 For 3D notebook display behavior:
 
@@ -53,6 +86,20 @@ pip install cytodataframe
 pip install git+https://github.com/cytomining/CytoDataFrame.git
 ```
 
+The core install is intentionally lean. Heavier, feature-specific stacks are
+available as optional extras:
+
+```shell
+# interactive 3D volume rendering (trame / pyvista)
+pip install "cytodataframe[viz3d]"
+
+# OME-Arrow image read/write/embedding (to_ome_parquet, OME-Arrow columns)
+pip install "cytodataframe[ome]"
+
+# everything
+pip install "cytodataframe[all]"
+```
+
 ## Contributing, Development, and Testing
 
 Please see our [contributing](https://cytomining.github.io/CytoDataFrame/main/contributing) documentation for more details on contributions, development, and testing.
diff --git a/pyproject.toml b/pyproject.toml
index 25653bb..5186635 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,21 +23,29 @@ dependencies = [
   "imagecodecs>=2024.9.22,<2027",
   "imageio>=2.37,<3",
   "ipython>=8.12.3,<10",
-  "ipyvolume>=0.6.3,<0.7",
   "ipywidgets>=8.1.7,<9",
-  "matplotlib>=3.9.3,<4",
-  "nest-asyncio>=1.6,<2",
-  "ome-arrow>=0.0.3,<0.0.10",
   "opencv-python>=4.10.0.84,<5",
   "pandas>=2.2.2,<4",
+  "polars>=1,<2",
   "pyarrow>=16",
-  "pyvista>=0.46.4",
-  "pywavelets>1.4.1",
   "scikit-image>0.19.3",
+]
+# Optional feature stacks. Install with e.g. `pip install cytodataframe[viz3d,ome]`.
+# These are imported lazily, so the core package imports fine without them.
+optional-dependencies.ome = [
+  # OME-Arrow image read/write/embedding (to_ome_parquet, OME-Arrow columns).
+  "ome-arrow>=0.0.3,<0.0.10",
+]
+optional-dependencies.viz3d = [
+  # Interactive 3D volume rendering (trame/pyvista views in notebooks).
+  "pyvista>=0.46.4",
   "trame>=3.12",
   "trame-vtk>=2.10",
   "trame-vuetify>=3.1",
 ]
+optional-dependencies.all = [
+  "cytodataframe[ome,viz3d]",
+]
 
 [dependency-groups]
 dev = [
@@ -46,6 +54,7 @@ dev = [
   "coverage>=7.6,<8",
   "duckdb>=1.1.3,<2",
   "httpcore>=0.18,<1.1",
+  "hypothesis>=6,<7",
   "isort>=5.13.2,<9",
   "jupyterlab>=4.3,<5",
   "jupyterlab-code-formatter>=3.0.2,<4",
@@ -54,6 +63,12 @@ dev = [
   "pytest>=8.3.3,<10",
   "pytest-cov>=5,<8",
   "sqlalchemy>=1.3.6,<3",
+  # optional-feature stacks needed to exercise the full test suite
+  "ome-arrow>=0.0.3,<0.0.10",
+  "pyvista>=0.46.4",
+  "trame>=3.12",
+  "trame-vtk>=2.10",
+  "trame-vuetify>=3.1",
 ]
 docs = [
   "dunamai>=1.22,<2",
diff --git a/src/cytodataframe/__init__.py b/src/cytodataframe/__init__.py
index f1dc227..33c8379 100644
--- a/src/cytodataframe/__init__.py
+++ b/src/cytodataframe/__init__.py
@@ -2,7 +2,17 @@
 Initialization for cytodataframe package
 """
 
+from . import engine
 from .frame import CytoDataFrame
+from .lazy import CytoLazyFrame
+from .schema import CytoSchema
 
 # note: version placeholder is updated during builds
 __version__ = "0.0.0"
+
+__all__ = [
+    "CytoDataFrame",
+    "CytoLazyFrame",
+    "CytoSchema",
+    "engine",
+]
diff --git a/src/cytodataframe/engine.py b/src/cytodataframe/engine.py
new file mode 100644
index 0000000..3b20b80
--- /dev/null
+++ b/src/cytodataframe/engine.py
@@ -0,0 +1,216 @@
+"""
+Backend abstraction layer for CytoDataFrame.
+
+This module is the execution/interchange boundary described in the CytoDataFrame
+evolution plan. It treats Apache Arrow as the canonical schema and memory
+contract, Polars as the execution engine, and pandas as a compatibility layer.
+
+The functions here normalize the supported tabular inputs
+
+    * :class:`pandas.DataFrame` / :class:`pandas.Series`
+    * :class:`polars.DataFrame`
+    * :class:`polars.LazyFrame`
+    * :class:`pyarrow.Table`
+    * :class:`cytodataframe.frame.CytoDataFrame` (a ``pandas.DataFrame`` subclass)
+
+into the representation requested by the caller while preserving row counts,
+null semantics, column ordering, and schema.
+
+Design notes:
+    * Arrow is used as the bridge whenever a schema/serialization contract is
+      requested (``to_arrow``).
+    * Conversions intentionally avoid forcing existing *pandas* object columns
+      (which may hold numpy image arrays or OME-Arrow structs) through Arrow,
+      because Arrow cannot always round-trip arbitrary Python objects. Such
+      columns are only converted when the caller explicitly asks for an Arrow or
+      Polars representation.
+"""
+
+from __future__ import annotations
+
+import pathlib
+from typing import TYPE_CHECKING, Any, Union
+
+import pandas as pd
+
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    import polars as pl
+    import pyarrow as pa
+
+# Public alias describing every tabular input CytoDataFrame's engine understands.
+TabularData = Union[
+    "pd.DataFrame",
+    "pd.Series",
+    "pl.DataFrame",
+    "pl.LazyFrame",
+    "pa.Table",
+]
+
+
+def _polars() -> Any:
+    """Import polars lazily so importing this module stays cheap."""
+    import polars as pl
+
+    return pl
+
+
+def _pyarrow() -> Any:
+    """Import pyarrow lazily so importing this module stays cheap."""
+    import pyarrow as pa
+
+    return pa
+
+
+def is_polars_dataframe(data: Any) -> bool:
+    """Return True when ``data`` is a :class:`polars.DataFrame`."""
+    try:
+        pl = _polars()
+    except ImportError:
+        return False
+    return isinstance(data, pl.DataFrame)
+
+
+def is_polars_lazyframe(data: Any) -> bool:
+    """Return True when ``data`` is a :class:`polars.LazyFrame`."""
+    try:
+        pl = _polars()
+    except ImportError:
+        return False
+    return isinstance(data, pl.LazyFrame)
+
+
+def is_arrow_table(data: Any) -> bool:
+    """Return True when ``data`` is a :class:`pyarrow.Table`."""
+    try:
+        pa = _pyarrow()
+    except ImportError:
+        return False
+    return isinstance(data, pa.Table)
+
+
+def is_supported(data: Any) -> bool:
+    """Return True when ``data`` is one of the supported tabular inputs."""
+    return (
+        isinstance(data, (pd.DataFrame, pd.Series))
+        or is_polars_dataframe(data)
+        or is_polars_lazyframe(data)
+        or is_arrow_table(data)
+    )
+
+
+def to_pandas(data: TabularData) -> pd.DataFrame:
+    """
+    Convert any supported tabular input to a :class:`pandas.DataFrame`.
+
+    pandas inputs (including ``CytoDataFrame``) are returned as-is so that object
+    columns holding images or OME-Arrow structs are never disturbed.
+    """
+    if isinstance(data, pd.DataFrame):
+        return data
+    if isinstance(data, pd.Series):
+        return data.to_frame()
+    if is_polars_lazyframe(data):
+        return data.collect().to_pandas()
+    if is_polars_dataframe(data):
+        return data.to_pandas()
+    if is_arrow_table(data):
+        return data.to_pandas()
+    raise TypeError(
+        f"Unsupported type for CytoDataFrame engine conversion: {type(data)!r}"
+    )
+
+
+def to_polars(data: TabularData) -> "pl.DataFrame":
+    """Convert any supported tabular input to an eager :class:`polars.DataFrame`."""
+    pl = _polars()
+    if isinstance(data, pl.DataFrame):
+        return data
+    if isinstance(data, pl.LazyFrame):
+        return data.collect()
+    if is_arrow_table(data):
+        return pl.from_arrow(data)
+    if isinstance(data, pd.Series):
+        data = data.to_frame()
+    if isinstance(data, pd.DataFrame):
+        # Strip any pandas subclass (e.g. CytoDataFrame) and index before handing
+        # the frame to polars, which has no index concept.
+        try:
+            return pl.from_pandas(pd.DataFrame(data))
+        except Exception as exc:
+            raise TypeError(
+                "Could not convert pandas data to polars. Columns holding "
+                "non-Arrow-compatible Python objects (e.g. numpy image arrays) "
+                "cannot be represented in polars/Arrow."
+            ) from exc
+    raise TypeError(
+        f"Unsupported type for CytoDataFrame engine conversion: {type(data)!r}"
+    )
+
+
+def to_lazyframe(data: TabularData) -> "pl.LazyFrame":
+    """Convert any supported tabular input to a :class:`polars.LazyFrame`."""
+    pl = _polars()
+    if isinstance(data, pl.LazyFrame):
+        return data
+    return to_polars(data).lazy()
+
+
+def to_arrow(data: TabularData, *, preserve_index: bool = False) -> "pa.Table":
+    """
+    Convert any supported tabular input to a :class:`pyarrow.Table`.
+
+    Arrow is the canonical schema/serialization contract, so this is the
+    conversion used whenever schema or interchange guarantees matter.
+    """
+    pa = _pyarrow()
+    if is_arrow_table(data):
+        return data
+    if is_polars_lazyframe(data):
+        return data.collect().to_arrow()
+    if is_polars_dataframe(data):
+        return data.to_arrow()
+    if isinstance(data, pd.Series):
+        data = data.to_frame()
+    if isinstance(data, pd.DataFrame):
+        try:
+            return pa.Table.from_pandas(
+                pd.DataFrame(data), preserve_index=preserve_index
+            )
+        except (pa.ArrowInvalid, pa.ArrowTypeError, TypeError) as exc:
+            raise TypeError(
+                "Could not convert pandas data to an Arrow table. Columns "
+                "holding non-Arrow-compatible Python objects (e.g. numpy image "
+                "arrays) cannot be represented in Arrow."
+            ) from exc
+    raise TypeError(
+        f"Unsupported type for CytoDataFrame engine conversion: {type(data)!r}"
+    )
+
+
+def normalize_to_pandas(data: TabularData) -> pd.DataFrame:
+    """
+    Normalize a supported input to pandas for the compatibility facade.
+
+    This is the ingestion entry point used by ``CytoDataFrame.__init__`` to wrap
+    Polars/Arrow inputs while keeping pandas as the backing store.
+    """
+    return to_pandas(data)
+
+
+def scan_parquet(
+    source: Union[str, pathlib.Path], **kwargs: Any
+) -> "pl.LazyFrame":
+    """
+    Lazily scan a Parquet file/dataset into a :class:`polars.LazyFrame`.
+
+    This enables predicate/projection pushdown for large profiling datasets
+    without materializing them eagerly.
+    """
+    pl = _polars()
+    return pl.scan_parquet(source, **kwargs)
+
+
+def read_parquet(source: Union[str, pathlib.Path], **kwargs: Any) -> "pl.DataFrame":
+    """Eagerly read a Parquet file into a :class:`polars.DataFrame`."""
+    pl = _polars()
+    return pl.read_parquet(source, **kwargs)
diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py
index 3cad707..790b44b 100644
--- a/src/cytodataframe/frame.py
+++ b/src/cytodataframe/frame.py
@@ -42,6 +42,7 @@
 )
 from skimage.util import img_as_ubyte
 
+from . import engine
 from .image import (
     add_image_scale_bar,
     adjust_with_adaptive_histogram_equalization,
@@ -49,6 +50,9 @@
     draw_outline_on_image_from_outline,
     get_pixel_bbox_from_offsets,
 )
+from .lazy import CytoLazyFrame, build_context
+from .lazy import scan_parquet as _lazy_scan_parquet
+from .schema import CytoSchema
 from .volume import (
     build_3d_html_from_path,
     build_3d_image_html_stub,
@@ -112,7 +116,7 @@ class CytoDataFrame(pd.DataFrame):
     # while avoiding oversized outputs in typical Jupyter viewports.
     _DEFAULT_TABLE_MAX_HEIGHT: ClassVar[str] = "700px"
 
-    def __init__(  # noqa: PLR0913
+    def __init__(  # noqa: PLR0913, C901, PLR0912
         self: CytoDataFrame_type,
         data: Union[CytoDataFrame_type, pd.DataFrame, str, pathlib.Path],
         data_context_dir: Optional[str] = None,
@@ -326,6 +330,19 @@ def __init__(  # noqa: PLR0913
 
             super().__init__(data)
 
+        # polars/arrow inputs are probed last so the common pandas/path cases
+        # never trigger a polars/pyarrow import.
+        elif engine.is_polars_lazyframe(data):
+            # Lazy polars input: collect through the Arrow contract into the
+            # pandas compatibility facade.
+            self._custom_attrs["data_source"] = "polars.LazyFrame"
+            super().__init__(engine.normalize_to_pandas(data))
+        elif engine.is_polars_dataframe(data):
+            self._custom_attrs["data_source"] = "polars.DataFrame"
+            super().__init__(engine.normalize_to_pandas(data))
+        elif engine.is_arrow_table(data):
+            self._custom_attrs["data_source"] = "pyarrow.Table"
+            super().__init__(engine.normalize_to_pandas(data))
         else:
             super().__init__(data)
 
@@ -1506,6 +1523,111 @@ def export(
         else:
             raise ValueError("Unsupported file format for export.")
 
+    # ------------------------------------------------------------------ #
+    # Backend / interchange conversions (Polars engine, Arrow contract)
+    # ------------------------------------------------------------------ #
+    def to_pandas(self: CytoDataFrame_type) -> pd.DataFrame:
+        """
+        Return the data as a plain :class:`pandas.DataFrame`.
+
+        The pandas layer is CytoDataFrame's compatibility boundary; this
+        returns a standard pandas DataFrame (not a CytoDataFrame) for use with
+        pandas-native tooling.
+        """
+        return pd.DataFrame(self)
+
+    def to_polars(self: CytoDataFrame_type) -> Any:
+        """
+        Return the tabular data as an eager :class:`polars.DataFrame`.
+
+        Note: polars has no row-index concept, so the pandas index is dropped.
+        Object columns holding non-Arrow values (e.g. numpy image arrays) cannot
+        be converted and will raise a ``TypeError``.
+        """
+        return engine.to_polars(pd.DataFrame(self))
+
+    def to_lazy(self: CytoDataFrame_type) -> CytoLazyFrame:
+        """
+        Return a lazy, Polars-backed :class:`CytoLazyFrame` view.
+
+        The returned object carries this frame's image/display context so that a
+        subsequent ``.collect()`` rebuilds an equivalently-configured
+        CytoDataFrame.
+        """
+        return CytoLazyFrame(
+            engine.to_lazyframe(pd.DataFrame(self)),
+            context=build_context(self._custom_attrs),
+        )
+
+    def to_arrow(self: CytoDataFrame_type, preserve_index: bool = False) -> Any:
+        """
+        Return the tabular data as a :class:`pyarrow.Table`.
+
+        Arrow is CytoDataFrame's canonical schema and interchange contract.
+        """
+        return engine.to_arrow(pd.DataFrame(self), preserve_index=preserve_index)
+
+    @property
+    def cyto_schema(self: CytoDataFrame_type) -> CytoSchema:
+        """
+        The inferred :class:`CytoSchema` describing this frame's columns.
+
+        Classifies columns into image/object keys, metadata, feature, and
+        geometry roles using the Arrow-native schema contract.
+        """
+        return CytoSchema.from_pandas(pd.DataFrame(self))
+
+    @classmethod
+    def from_file(
+        cls,
+        source: Union[str, pathlib.Path],
+        **kwargs: Any,
+    ) -> "CytoDataFrame":
+        """
+        Eagerly construct a CytoDataFrame from a file path.
+
+        A thin, explicit alias for ``CytoDataFrame(source, ...)`` matching the
+        domain-oriented API in the evolution plan.
+        """
+        return cls(source, **kwargs)
+
+    @classmethod
+    def scan_parquet(  # noqa: PLR0913
+        cls,
+        source: Union[str, pathlib.Path],
+        data_context_dir: Optional[str] = None,
+        data_mask_context_dir: Optional[str] = None,
+        data_outline_context_dir: Optional[str] = None,
+        segmentation_file_regex: Optional[Dict[str, str]] = None,
+        image_adjustment: Optional[Callable] = None,
+        display_options: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> CytoLazyFrame:
+        """
+        Lazily scan a Parquet source into a :class:`CytoLazyFrame`.
+
+        Enables predicate/projection pushdown for large profiling datasets::
+
+            (
+                CytoDataFrame.scan_parquet("profiles.parquet")
+                .filter(...)
+                .select_features()
+                .collect()
+            )
+
+        The image/display context provided here is carried through the lazy
+        pipeline and applied when the result is ``.collect()``-ed.
+        """
+        context = {
+            "data_context_dir": data_context_dir,
+            "data_mask_context_dir": data_mask_context_dir,
+            "data_outline_context_dir": data_outline_context_dir,
+            "segmentation_file_regex": segmentation_file_regex,
+            "image_adjustment": image_adjustment,
+            "display_options": display_options,
+        }
+        return _lazy_scan_parquet(source, context=context, **kwargs)
+
     def to_ome_parquet(  # noqa: PLR0915, PLR0912, C901
         self: CytoDataFrame_type,
         file_path: Union[str, pathlib.Path],
diff --git a/src/cytodataframe/lazy.py b/src/cytodataframe/lazy.py
new file mode 100644
index 0000000..25b40bb
--- /dev/null
+++ b/src/cytodataframe/lazy.py
@@ -0,0 +1,271 @@
+"""
+Lazy Polars query builder for CytoDataFrame.
+
+``CytoLazyFrame`` wraps a :class:`polars.LazyFrame` and carries the
+CytoDataFrame "context" (image directories, display options, ...) so that a
+lazy pipeline can be materialized back into a fully-configured
+:class:`~cytodataframe.frame.CytoDataFrame`.
+
+This is the surface that powers the lazy-execution example from the evolution
+plan::
+
+    (
+        CytoDataFrame.scan_parquet("profiles.parquet")
+        .filter(pl.col("Metadata_Well") == "A01")
+        .select_features()
+        .collect()
+    )
+
+It is intentionally a *separate* type from ``CytoDataFrame`` so that its
+polars-native ``filter``/``select`` semantics never collide with pandas' own
+``DataFrame.filter`` (which CytoDataFrame inherits and relies on internally).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence
+
+from . import engine
+from .schema import CytoSchema
+
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    import pandas as pd
+    import polars as pl
+    import pyarrow as pa
+
+    from .frame import CytoDataFrame
+
+
+# Constructor kwargs that carry image/display context and should survive a lazy
+# pipeline so ``collect()`` rebuilds an equivalently-configured CytoDataFrame.
+_CONTEXT_KEYS = (
+    "data_context_dir",
+    "data_mask_context_dir",
+    "data_outline_context_dir",
+    "segmentation_file_regex",
+    "image_adjustment",
+    "display_options",
+)
+
+# Number of column names shown in a CytoLazyFrame repr before truncating.
+_REPR_PREVIEW_COLS = 8
+
+
+class CytoLazyGroupBy:
+    """Thin wrapper around a polars lazy group-by that returns a CytoLazyFrame."""
+
+    def __init__(self, group_by: Any, context: Dict[str, Any]) -> None:
+        self._group_by = group_by
+        self._context = context
+
+    def agg(self, *aggs: Any, **named_aggs: Any) -> "CytoLazyFrame":
+        """Aggregate grouped data, returning a :class:`CytoLazyFrame`."""
+        return CytoLazyFrame(
+            self._group_by.agg(*aggs, **named_aggs), context=self._context
+        )
+
+
+class CytoLazyFrame:
+    """
+    A lazy, Polars-backed view over CytoDataFrame data.
+
+    The wrapped :class:`polars.LazyFrame` is the canonical execution engine;
+    operations build up a query plan and only execute on :meth:`collect`.
+    """
+
+    def __init__(
+        self,
+        data: Any,
+        *,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._lf: "pl.LazyFrame" = engine.to_lazyframe(data)
+        self._context: Dict[str, Any] = dict(context or {})
+
+    # ------------------------------------------------------------------ #
+    # Introspection
+    # ------------------------------------------------------------------ #
+    @property
+    def lazyframe(self) -> "pl.LazyFrame":
+        """The underlying :class:`polars.LazyFrame`."""
+        return self._lf
+
+    @property
+    def context(self) -> Dict[str, Any]:
+        """The CytoDataFrame context carried through the pipeline."""
+        return dict(self._context)
+
+    @property
+    def columns(self) -> List[str]:
+        """Column names of the (lazily) resolved schema."""
+        return list(self._lf.collect_schema().names())
+
+    @property
+    def cyto_schema(self) -> CytoSchema:
+        """Infer a :class:`CytoSchema` from the lazy schema (no data scan)."""
+        return CytoSchema.from_polars(self._lf)
+
+    def _wrap(self, lazyframe: "pl.LazyFrame") -> "CytoLazyFrame":
+        """Wrap a derived LazyFrame, preserving context."""
+        return CytoLazyFrame(lazyframe, context=self._context)
+
+    # ------------------------------------------------------------------ #
+    # Table operations (delegated to polars, return CytoLazyFrame)
+    # ------------------------------------------------------------------ #
+    def filter(self, *predicates: Any, **constraints: Any) -> "CytoLazyFrame":
+        """Filter rows. Mirrors :meth:`polars.LazyFrame.filter`."""
+        return self._wrap(self._lf.filter(*predicates, **constraints))
+
+    def select(self, *exprs: Any, **named_exprs: Any) -> "CytoLazyFrame":
+        """Select/transform columns. Mirrors :meth:`polars.LazyFrame.select`."""
+        return self._wrap(self._lf.select(*exprs, **named_exprs))
+
+    def with_columns(self, *exprs: Any, **named_exprs: Any) -> "CytoLazyFrame":
+        """Add/replace columns. Mirrors :meth:`polars.LazyFrame.with_columns`."""
+        return self._wrap(self._lf.with_columns(*exprs, **named_exprs))
+
+    def rename(self, mapping: Dict[str, str], **kwargs: Any) -> "CytoLazyFrame":
+        """Rename columns. Mirrors :meth:`polars.LazyFrame.rename`."""
+        return self._wrap(self._lf.rename(mapping, **kwargs))
+
+    def drop(self, *columns: Any, **kwargs: Any) -> "CytoLazyFrame":
+        """Drop columns. Mirrors :meth:`polars.LazyFrame.drop`."""
+        return self._wrap(self._lf.drop(*columns, **kwargs))
+
+    def sort(self, *by: Any, **kwargs: Any) -> "CytoLazyFrame":
+        """Sort rows. Mirrors :meth:`polars.LazyFrame.sort`."""
+        return self._wrap(self._lf.sort(*by, **kwargs))
+
+    def unique(self, *args: Any, **kwargs: Any) -> "CytoLazyFrame":
+        """Drop duplicate rows. Mirrors :meth:`polars.LazyFrame.unique`."""
+        return self._wrap(self._lf.unique(*args, **kwargs))
+
+    def head(self, n: int = 5) -> "CytoLazyFrame":
+        """Return the first ``n`` rows lazily."""
+        return self._wrap(self._lf.head(n))
+
+    def tail(self, n: int = 5) -> "CytoLazyFrame":
+        """Return the last ``n`` rows lazily."""
+        return self._wrap(self._lf.tail(n))
+
+    def limit(self, n: int = 5) -> "CytoLazyFrame":
+        """Limit to ``n`` rows lazily."""
+        return self._wrap(self._lf.limit(n))
+
+    def join(
+        self,
+        other: "CytoLazyFrame | pl.LazyFrame | pl.DataFrame | pd.DataFrame",
+        *args: Any,
+        **kwargs: Any,
+    ) -> "CytoLazyFrame":
+        """
+        Join against another frame. Mirrors :meth:`polars.LazyFrame.join`.
+
+        ``other`` may be a CytoLazyFrame, polars LazyFrame/DataFrame, or pandas
+        DataFrame; it is normalized to a LazyFrame first.
+        """
+        if isinstance(other, CytoLazyFrame):
+            other_lf = other._lf
+        else:
+            other_lf = engine.to_lazyframe(other)
+        return self._wrap(self._lf.join(other_lf, *args, **kwargs))
+
+    def group_by(self, *by: Any, **kwargs: Any) -> CytoLazyGroupBy:
+        """Group rows for aggregation. Mirrors :meth:`polars.LazyFrame.group_by`."""
+        return CytoLazyGroupBy(self._lf.group_by(*by, **kwargs), self._context)
+
+    def select_features(
+        self,
+        features: Optional[Iterable[str]] = None,
+        *,
+        keep_metadata: bool = True,
+    ) -> "CytoLazyFrame":
+        """
+        Select feature columns (optionally keeping metadata identifiers).
+
+        When ``features`` is omitted, the schema-inferred feature columns are
+        used. When ``keep_metadata`` is True, metadata/identifier/image columns
+        are retained alongside the selected features, preserving original column
+        order.
+        """
+        import polars as pl
+
+        schema = self.cyto_schema
+        available = self.columns
+        available_set = set(available)
+
+        if features is None:
+            feature_set = set(schema.feature_columns)
+        else:
+            feature_set = {str(f) for f in features}
+
+        keep = set(feature_set)
+        if keep_metadata:
+            keep.update(schema.metadata_columns)
+
+        selected = [col for col in available if col in keep and col in available_set]
+        return self._wrap(self._lf.select([pl.col(c) for c in selected]))
+
+    # ------------------------------------------------------------------ #
+    # Materialization
+    # ------------------------------------------------------------------ #
+    def collect_polars(self, **kwargs: Any) -> "pl.DataFrame":
+        """Execute the query plan, returning an eager :class:`polars.DataFrame`."""
+        return self._lf.collect(**kwargs)
+
+    def to_polars(self, **kwargs: Any) -> "pl.DataFrame":
+        """Alias for :meth:`collect_polars`."""
+        return self.collect_polars(**kwargs)
+
+    def to_arrow(self, **kwargs: Any) -> "pa.Table":
+        """Execute and return a :class:`pyarrow.Table`."""
+        return self.collect_polars(**kwargs).to_arrow()
+
+    def to_pandas(self, **kwargs: Any) -> "pd.DataFrame":
+        """Execute and return a :class:`pandas.DataFrame`."""
+        return self.collect_polars(**kwargs).to_pandas()
+
+    def collect(self, **kwargs: Any) -> "CytoDataFrame":
+        """
+        Execute the query plan and return a configured ``CytoDataFrame``.
+
+        The CytoDataFrame is rebuilt with the image/display context that was
+        carried through the lazy pipeline.
+        """
+        # Imported lazily to avoid a circular import at module load time.
+        from .frame import CytoDataFrame
+
+        pandas_df = self.collect_polars(**kwargs).to_pandas()
+        context = {k: v for k, v in self._context.items() if k in _CONTEXT_KEYS}
+        return CytoDataFrame(pandas_df, **context)
+
+    def __repr__(self) -> str:
+        try:
+            cols = self.columns
+            head = cols[:_REPR_PREVIEW_COLS]
+            preview = ", ".join(head) + (
+                " ..." if len(cols) > _REPR_PREVIEW_COLS else ""
+            )
+        except Exception:  # repr must never raise
+            preview = "<unresolved schema>"
+        return f"CytoLazyFrame(columns=[{preview}])"
+
+
+def build_context(custom_attrs: Dict[str, Any]) -> Dict[str, Any]:
+    """Extract the carry-through context from a CytoDataFrame ``_custom_attrs``."""
+    return {key: custom_attrs.get(key) for key in _CONTEXT_KEYS}
+
+
+def scan_parquet(
+    source: Any,
+    *,
+    context: Optional[Dict[str, Any]] = None,
+    **kwargs: Any,
+) -> CytoLazyFrame:
+    """Lazily scan a Parquet source into a :class:`CytoLazyFrame`."""
+    return CytoLazyFrame(engine.scan_parquet(source, **kwargs), context=context)
+
+
+def from_sequence_context(keys: Sequence[str], values: Sequence[Any]) -> Dict[str, Any]:
+    """Build a context dict from parallel key/value sequences (helper for tests)."""
+    return dict(zip(keys, values, strict=False))
diff --git a/src/cytodataframe/schema.py b/src/cytodataframe/schema.py
new file mode 100644
index 0000000..5ec5982
--- /dev/null
+++ b/src/cytodataframe/schema.py
@@ -0,0 +1,456 @@
+"""
+Formal schema system for CytoDataFrame.
+
+This module implements the Arrow-native schema contract described in the
+CytoDataFrame evolution plan (Phases 2 and 3). It provides:
+
+    * :class:`CytoSchema` - an explicit, inspectable classification of a
+      profiling table's columns into image / object keys, metadata, feature, and
+      geometry roles. The classification reduces reliance on ad-hoc naming
+      conventions scattered through the codebase and gives downstream operations
+      a single source of truth.
+    * Arrow-native struct helpers that fold the flattened CellProfiler-style
+      bounding-box / centroid columns into nested Arrow structs while keeping the
+      flattened compatibility columns available for existing consumers.
+
+Schema inference is deterministic and works from a pandas DataFrame, a polars
+DataFrame/LazyFrame, or a :class:`pyarrow.Schema`/:class:`pyarrow.Table`.
+"""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, List, Mapping, Optional, Sequence
+
+if TYPE_CHECKING:  # pragma: no cover - typing only
+    import pandas as pd
+    import polars as pl
+    import pyarrow as pa
+
+
+# --------------------------------------------------------------------------- #
+# Column-role detection patterns
+# --------------------------------------------------------------------------- #
+
+# Geometry columns hold spatial coordinates (bounding boxes / centroids).
+_GEOMETRY_PATTERN = re.compile(
+    r"(boundingbox"
+    r"|location_center"
+    r"|areashape_center"
+    r"|center_mass"
+    r"|_center_[xyz]\b"
+    r"|_center_[xyz]$)",
+    flags=re.IGNORECASE,
+)
+
+# Image filename / path columns reference images rather than measurements.
+_IMAGE_FILENAME_PATTERN = re.compile(r"filename", flags=re.IGNORECASE)
+_IMAGE_PATHNAME_PATTERN = re.compile(r"pathname", flags=re.IGNORECASE)
+
+# Known single-cell object identifier columns, in preference order.
+_OBJECT_KEY_PRIORITY = (
+    "metadata_objectnumber",
+    "metadata_object_number",
+    "objectnumber",
+    "object_number",
+)
+_OBJECT_KEY_SUFFIX = "number_object_number"
+
+# Known identifier-style metadata columns (casefolded exact names).
+_KNOWN_ID_COLUMNS = frozenset(
+    {
+        "imagenumber",
+        "objectnumber",
+        "object_number",
+        "tablenumber",
+        "table_number",
+        "plate",
+        "well",
+        "site",
+    }
+)
+
+
+def _is_image_column(name: str) -> bool:
+    """Return True when a column name references an image filename or path."""
+    return bool(
+        _IMAGE_FILENAME_PATTERN.search(name)
+        or _IMAGE_PATHNAME_PATTERN.search(name)
+    )
+
+
+def _is_geometry_column(name: str) -> bool:
+    """Return True when a column name encodes spatial geometry."""
+    return bool(_GEOMETRY_PATTERN.search(name))
+
+
+def _is_identifier_metadata(name: str) -> bool:
+    """Return True when a column name looks like an identifier/metadata column."""
+    lowered = name.casefold()
+    if lowered.startswith("metadata"):
+        return True
+    if lowered in _KNOWN_ID_COLUMNS:
+        return True
+    return lowered.endswith(_OBJECT_KEY_SUFFIX)
+
+
+@dataclass
+class CytoSchema:
+    """
+    An explicit classification of a profiling table's columns.
+
+    Attributes:
+        image_key:
+            The primary image filename column, if present.
+        object_key:
+            The single-cell object identifier column, if present.
+        metadata_columns:
+            Identifier / annotation / image-reference / non-numeric columns.
+        feature_columns:
+            Numeric measurement columns (the modeling features).
+        geometry_columns:
+            Spatial coordinate columns (bounding boxes, centroids).
+        image_columns:
+            All image filename/path columns (``image_key`` is the first).
+    """
+
+    image_key: Optional[str] = None
+    object_key: Optional[str] = None
+    metadata_columns: List[str] = field(default_factory=list)
+    feature_columns: List[str] = field(default_factory=list)
+    geometry_columns: List[str] = field(default_factory=list)
+    image_columns: List[str] = field(default_factory=list)
+
+    # ------------------------------------------------------------------ #
+    # Construction / inference
+    # ------------------------------------------------------------------ #
+    @classmethod
+    def from_columns(
+        cls,
+        columns: Sequence[str],
+        numeric: Optional[Mapping[str, bool]] = None,
+    ) -> "CytoSchema":
+        """
+        Classify ``columns`` into schema roles.
+
+        Args:
+            columns:
+                Ordered column names.
+            numeric:
+                Optional mapping of column name -> whether the column holds a
+                numeric dtype. When a column is absent from the mapping (or the
+                mapping is ``None``) the column is treated as numeric for the
+                purpose of feature detection, so name-based rules still apply.
+        """
+        numeric = dict(numeric) if numeric is not None else None
+
+        metadata: List[str] = []
+        features: List[str] = []
+        geometry: List[str] = []
+        image_columns: List[str] = []
+
+        for name in columns:
+            col = str(name)
+            is_numeric = True if numeric is None else bool(numeric.get(col, True))
+
+            if _is_image_column(col):
+                image_columns.append(col)
+                metadata.append(col)
+                continue
+            if _is_geometry_column(col):
+                geometry.append(col)
+                continue
+            if _is_identifier_metadata(col) or not is_numeric:
+                metadata.append(col)
+                continue
+            features.append(col)
+
+        image_key = image_columns[0] if image_columns else None
+        object_key = cls._detect_object_key(columns)
+
+        return cls(
+            image_key=image_key,
+            object_key=object_key,
+            metadata_columns=metadata,
+            feature_columns=features,
+            geometry_columns=geometry,
+            image_columns=image_columns,
+        )
+
+    @staticmethod
+    def _detect_object_key(columns: Sequence[str]) -> Optional[str]:
+        """Return the best single-cell object identifier column, if any."""
+        lowered = {str(c).casefold(): str(c) for c in columns}
+        for candidate in _OBJECT_KEY_PRIORITY:
+            if candidate in lowered:
+                return lowered[candidate]
+        for col in columns:
+            if str(col).casefold().endswith(_OBJECT_KEY_SUFFIX):
+                return str(col)
+        return None
+
+    @classmethod
+    def from_pandas(cls, data: "pd.DataFrame") -> "CytoSchema":
+        """Infer a schema from a :class:`pandas.DataFrame`."""
+        import pandas as pd
+
+        numeric = {
+            str(col): (
+                pd.api.types.is_numeric_dtype(dtype)
+                and not pd.api.types.is_bool_dtype(dtype)
+            )
+            for col, dtype in data.dtypes.items()
+        }
+        return cls.from_columns(list(data.columns), numeric=numeric)
+
+    @classmethod
+    def from_arrow(cls, schema: "pa.Schema") -> "CytoSchema":
+        """Infer a schema from a :class:`pyarrow.Schema`."""
+        import pyarrow as pa
+
+        def _numeric(dtype: "pa.DataType") -> bool:
+            return (
+                pa.types.is_integer(dtype)
+                or pa.types.is_floating(dtype)
+                or pa.types.is_decimal(dtype)
+            )
+
+        numeric = {field.name: _numeric(field.type) for field in schema}
+        return cls.from_columns(list(schema.names), numeric=numeric)
+
+    @classmethod
+    def from_polars(cls, data: "pl.DataFrame | pl.LazyFrame") -> "CytoSchema":
+        """Infer a schema from a polars DataFrame or LazyFrame."""
+        schema = data.collect_schema() if hasattr(data, "collect_schema") else None
+        if schema is None:
+            schema = data.schema
+        numeric = {name: dtype.is_numeric() for name, dtype in schema.items()}
+        return cls.from_columns(list(schema.keys()), numeric=numeric)
+
+    @classmethod
+    def infer(cls, data: Any) -> "CytoSchema":
+        """
+        Infer a schema from any supported tabular input.
+
+        Dispatches on the runtime type so callers can pass pandas, polars, or
+        Arrow data without converting first.
+        """
+        import pandas as pd
+
+        # pyarrow first: a Table exposes ``.schema``.
+        try:
+            import pyarrow as pa
+
+            if isinstance(data, pa.Table):
+                return cls.from_arrow(data.schema)
+            if isinstance(data, pa.Schema):
+                return cls.from_arrow(data)
+        except ImportError:  # pragma: no cover - pyarrow is a hard dependency
+            pass
+
+        try:
+            import polars as pl
+
+            if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
+                return cls.from_polars(data)
+        except ImportError:  # pragma: no cover - polars is a hard dependency
+            pass
+
+        if isinstance(data, pd.DataFrame):
+            return cls.from_pandas(data)
+
+        raise TypeError(
+            f"Cannot infer a CytoSchema from object of type {type(data)!r}."
+        )
+
+    # ------------------------------------------------------------------ #
+    # Introspection / validation
+    # ------------------------------------------------------------------ #
+    @property
+    def columns(self) -> List[str]:
+        """All classified columns in metadata/geometry/feature order."""
+        ordered: List[str] = []
+        seen: set[str] = set()
+        for bucket in (
+            self.metadata_columns,
+            self.geometry_columns,
+            self.feature_columns,
+        ):
+            for col in bucket:
+                if col not in seen:
+                    seen.add(col)
+                    ordered.append(col)
+        return ordered
+
+    def validate(self, strict: bool = False) -> List[str]:
+        """
+        Check schema self-consistency.
+
+        Returns a list of human-readable issues. When ``strict`` is True and any
+        issue is found, a :class:`ValueError` is raised instead.
+        """
+        issues: List[str] = []
+
+        feature_set = set(self.feature_columns)
+        metadata_set = set(self.metadata_columns)
+        geometry_set = set(self.geometry_columns)
+
+        overlap_fm = feature_set & metadata_set
+        overlap_fg = feature_set & geometry_set
+        if overlap_fm:
+            issues.append(
+                f"Columns classified as both feature and metadata: "
+                f"{sorted(overlap_fm)}"
+            )
+        if overlap_fg:
+            issues.append(
+                f"Columns classified as both feature and geometry: "
+                f"{sorted(overlap_fg)}"
+            )
+        if self.image_key is not None and self.image_key not in metadata_set:
+            issues.append(
+                f"image_key {self.image_key!r} is not present in metadata columns."
+            )
+
+        if strict and issues:
+            raise ValueError("Invalid CytoSchema: " + "; ".join(issues))
+        return issues
+
+    def require(self, *keys: str) -> "CytoSchema":
+        """
+        Assert that the named required keys are present.
+
+        Args:
+            keys:
+                Any of ``"image_key"`` / ``"object_key"``. Raises
+                :class:`ValueError` when a required key is ``None``.
+        """
+        missing = [key for key in keys if getattr(self, key, None) is None]
+        if missing:
+            raise ValueError(
+                f"CytoSchema is missing required key(s): {missing}"
+            )
+        return self
+
+    def to_dict(self) -> dict:
+        """Return a plain-dict view of the schema (handy for tests/serialization)."""
+        return {
+            "image_key": self.image_key,
+            "object_key": self.object_key,
+            "metadata_columns": list(self.metadata_columns),
+            "feature_columns": list(self.feature_columns),
+            "geometry_columns": list(self.geometry_columns),
+            "image_columns": list(self.image_columns),
+        }
+
+
+# --------------------------------------------------------------------------- #
+# Arrow-native struct helpers (Phase 3)
+# --------------------------------------------------------------------------- #
+
+# Bounding-box column groups keyed by compartment, mirroring the flattened
+# CellProfiler naming convention. Order is (min_x, min_y, max_x, max_y).
+_BBOX_GROUPS = {
+    "cytoplasm": "Cytoplasm_AreaShape_BoundingBox",
+    "nuclei": "Nuclei_AreaShape_BoundingBox",
+    "cells": "Cells_AreaShape_BoundingBox",
+    "generic": "AreaShape_BoundingBox",
+}
+
+# Centroid column groups keyed by compartment, mirroring the flattened naming.
+_CENTROID_GROUPS = {
+    "nuclei": "Nuclei_Location_Center",
+    "nuclei_meta": "Metadata_Nuclei_Location_Center",
+    "cells": "Cells_Location_Center",
+    "cells_meta": "Metadata_Cells_Location_Center",
+    "cytoplasm": "Cytoplasm_Location_Center",
+    "cytoplasm_meta": "Metadata_Cytoplasm_Location_Center",
+}
+
+
+def _bbox_field_columns(prefix: str) -> dict:
+    """Return the flattened bounding-box column names for a prefix."""
+    return {
+        "min_x": f"{prefix}Minimum_X",
+        "min_y": f"{prefix}Minimum_Y",
+        "max_x": f"{prefix}Maximum_X",
+        "max_y": f"{prefix}Maximum_Y",
+        "min_z": f"{prefix}Minimum_Z",
+        "max_z": f"{prefix}Maximum_Z",
+    }
+
+
+def add_bbox_struct(
+    data: "pl.DataFrame",
+    struct_name: str = "bbox",
+    keep_flattened: bool = True,
+) -> "pl.DataFrame":
+    """
+    Fold flattened bounding-box columns into a nested Arrow struct.
+
+    The first matching compartment group (cytoplasm -> nuclei -> cells ->
+    generic) is used. The flattened compatibility columns are retained by
+    default so existing consumers keep working.
+
+    Returns the input unchanged when no bounding-box columns are present.
+    """
+    import polars as pl
+
+    required_keys = ("min_x", "min_y", "max_x", "max_y")
+    available = set(data.columns)
+    for prefix in _BBOX_GROUPS.values():
+        cols = _bbox_field_columns(prefix)
+        required = {k: v for k, v in cols.items() if k in required_keys}
+        if not all(col in available for col in required.values()):
+            continue
+        fields = [
+            pl.col(cols["min_x"]).alias("min_x"),
+            pl.col(cols["min_y"]).alias("min_y"),
+            pl.col(cols["max_x"]).alias("max_x"),
+            pl.col(cols["max_y"]).alias("max_y"),
+        ]
+        if cols["min_z"] in available and cols["max_z"] in available:
+            fields.append(pl.col(cols["min_z"]).alias("min_z"))
+            fields.append(pl.col(cols["max_z"]).alias("max_z"))
+        result = data.with_columns(pl.struct(fields).alias(struct_name))
+        if not keep_flattened:
+            drop = [c for c in cols.values() if c in available]
+            result = result.drop(drop)
+        return result
+    return data
+
+
+def add_centroid_struct(
+    data: "pl.DataFrame",
+    struct_name: str = "centroid",
+    keep_flattened: bool = True,
+) -> "pl.DataFrame":
+    """
+    Fold flattened centroid columns into a nested Arrow struct ``{x, y[, z]}``.
+
+    The first matching compartment group is used. Flattened compatibility
+    columns are retained by default. Returns the input unchanged when no
+    centroid columns are present.
+    """
+    import polars as pl
+
+    available = set(data.columns)
+    for prefix in _CENTROID_GROUPS.values():
+        x_col = f"{prefix}_X"
+        y_col = f"{prefix}_Y"
+        z_col = f"{prefix}_Z"
+        if x_col not in available or y_col not in available:
+            continue
+        fields = [
+            pl.col(x_col).alias("x"),
+            pl.col(y_col).alias("y"),
+        ]
+        if z_col in available:
+            fields.append(pl.col(z_col).alias("z"))
+        result = data.with_columns(pl.struct(fields).alias(struct_name))
+        if not keep_flattened:
+            drop = [c for c in (x_col, y_col, z_col) if c in available]
+            result = result.drop(drop)
+        return result
+    return data
diff --git a/tests/test_engine.py b/tests/test_engine.py
new file mode 100644
index 0000000..3ee82d6
--- /dev/null
+++ b/tests/test_engine.py
@@ -0,0 +1,154 @@
+"""
+Tests for the CytoDataFrame backend abstraction layer (engine.py).
+
+Covers Arrow round-trip / interchange guarantees described in the evolution
+plan: row counts, nulls, schema, and column ordering must be preserved across
+
+    cdf -> Arrow   -> cdf
+    cdf -> Parquet -> cdf
+    cdf -> pandas  -> cdf
+    cdf -> Polars  -> cdf
+"""
+
+import pathlib
+
+import numpy as np
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+import pytest
+
+from cytodataframe import CytoDataFrame, engine
+
+
+@pytest.fixture(name="profiling_frame")
+def fixture_profiling_frame() -> pd.DataFrame:
+    """A small profiling-like frame with mixed dtypes and nulls."""
+    return pd.DataFrame(
+        {
+            "Metadata_Well": ["A01", "A01", "B02", None],
+            "Metadata_ObjectNumber": [1, 2, 1, 2],
+            "Cells_AreaShape_Area": [10.0, np.nan, 30.0, 40.0],
+            "Nuclei_Location_Center_X": [5.0, 6.0, 7.0, 8.0],
+            "Nuclei_Intensity_MeanIntensity_DNA": [0.1, 0.2, 0.3, 0.4],
+        }
+    )
+
+
+def _assert_tabular_equivalent(left: pd.DataFrame, right: pd.DataFrame) -> None:
+    """Assert two frames share row count, columns, null mask, and values."""
+    left = left.reset_index(drop=True)
+    right = right.reset_index(drop=True)
+    assert len(left) == len(right)
+    assert list(left.columns) == list(right.columns)
+    for col in left.columns:
+        lnull = left[col].isna().to_numpy()
+        rnull = right[col].isna().to_numpy()
+        assert np.array_equal(lnull, rnull), f"null mask differs for {col}"
+        lvals = left[col][~left[col].isna()].tolist()
+        rvals = right[col][~right[col].isna()].tolist()
+        assert lvals == rvals, f"values differ for {col}"
+
+
+# --------------------------------------------------------------------------- #
+# Conversions from every supported input type
+# --------------------------------------------------------------------------- #
+def test_engine_to_arrow_from_all_inputs(profiling_frame: pd.DataFrame):
+    pdf = profiling_frame
+    expected = pa.Table.from_pandas(pdf, preserve_index=False)
+    for source in (
+        pdf,
+        pl.from_pandas(pdf),
+        pl.from_pandas(pdf).lazy(),
+        expected,
+    ):
+        table = engine.to_arrow(source)
+        assert isinstance(table, pa.Table)
+        assert table.num_rows == len(pdf)
+        assert table.schema.names == list(pdf.columns)
+
+
+def test_engine_to_polars_from_all_inputs(profiling_frame: pd.DataFrame):
+    pdf = profiling_frame
+    for source in (
+        pdf,
+        pl.from_pandas(pdf),
+        pl.from_pandas(pdf).lazy(),
+        pa.Table.from_pandas(pdf, preserve_index=False),
+    ):
+        out = engine.to_polars(source)
+        assert isinstance(out, pl.DataFrame)
+        assert out.height == len(pdf)
+        assert out.columns == list(pdf.columns)
+
+
+def test_engine_to_lazyframe_passthrough_and_convert(profiling_frame: pd.DataFrame):
+    lf = pl.from_pandas(profiling_frame).lazy()
+    # passthrough
+    assert engine.to_lazyframe(lf) is lf
+    # convert from pandas
+    converted = engine.to_lazyframe(profiling_frame)
+    assert isinstance(converted, pl.LazyFrame)
+    assert converted.collect().height == len(profiling_frame)
+
+
+def test_engine_to_pandas_returns_pandas_identity(profiling_frame: pd.DataFrame):
+    # pandas inputs are returned untouched (object columns are never disturbed)
+    assert engine.to_pandas(profiling_frame) is profiling_frame
+    converted = engine.to_pandas(pl.from_pandas(profiling_frame))
+    assert isinstance(converted, pd.DataFrame)
+    _assert_tabular_equivalent(profiling_frame, converted)
+
+
+def test_engine_rejects_unsupported_type():
+    with pytest.raises(TypeError):
+        engine.to_arrow(object())
+    with pytest.raises(TypeError):
+        engine.to_polars(42)
+
+
+# --------------------------------------------------------------------------- #
+# Round-trip interchange guarantees
+# --------------------------------------------------------------------------- #
+def test_roundtrip_arrow(profiling_frame: pd.DataFrame):
+    cdf = CytoDataFrame(profiling_frame)
+    table = cdf.to_arrow()
+    restored = CytoDataFrame(table)
+    assert isinstance(restored, CytoDataFrame)
+    _assert_tabular_equivalent(profiling_frame, pd.DataFrame(restored))
+
+
+def test_roundtrip_polars(profiling_frame: pd.DataFrame):
+    cdf = CytoDataFrame(profiling_frame)
+    restored = CytoDataFrame(cdf.to_polars())
+    _assert_tabular_equivalent(profiling_frame, pd.DataFrame(restored))
+
+
+def test_roundtrip_pandas(profiling_frame: pd.DataFrame):
+    cdf = CytoDataFrame(profiling_frame)
+    restored = CytoDataFrame(cdf.to_pandas())
+    _assert_tabular_equivalent(profiling_frame, pd.DataFrame(restored))
+
+
+def test_roundtrip_parquet(profiling_frame: pd.DataFrame, tmp_path: pathlib.Path):
+    cdf = CytoDataFrame(profiling_frame)
+    out = tmp_path / "profiles.parquet"
+    cdf.export(str(out))
+    restored = CytoDataFrame(str(out))
+    _assert_tabular_equivalent(profiling_frame, pd.DataFrame(restored))
+
+
+def test_roundtrip_preserves_schema(profiling_frame: pd.DataFrame):
+    cdf = CytoDataFrame(profiling_frame)
+    # Arrow schema names + the inferred CytoSchema survive a polars round-trip.
+    before = cdf.cyto_schema.to_dict()
+    after = CytoDataFrame(cdf.to_polars()).cyto_schema.to_dict()
+    assert before == after
+
+
+def test_scan_parquet_helper(profiling_frame: pd.DataFrame, tmp_path: pathlib.Path):
+    out = tmp_path / "profiles.parquet"
+    profiling_frame.to_parquet(out)
+    lf = engine.scan_parquet(str(out))
+    assert isinstance(lf, pl.LazyFrame)
+    assert lf.collect().height == len(profiling_frame)
diff --git a/tests/test_lazy.py b/tests/test_lazy.py
new file mode 100644
index 0000000..daa1aa9
--- /dev/null
+++ b/tests/test_lazy.py
@@ -0,0 +1,164 @@
+"""
+Tests for the CytoLazyFrame lazy query builder (lazy.py).
+
+Covers the lazy-execution surface from the evolution plan and differential
+validation that lazy Polars execution matches the equivalent pandas result.
+"""
+
+import pathlib
+
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+import pytest
+
+from cytodataframe import CytoDataFrame, CytoLazyFrame
+
+
+@pytest.fixture(name="profiles")
+def fixture_profiles() -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            "Metadata_Well": ["A01", "A01", "B02", "B02", "C03"],
+            "Metadata_ObjectNumber": [1, 2, 1, 2, 1],
+            "Cells_AreaShape_Area": [10.0, 20.0, 30.0, 40.0, 50.0],
+            "Nuclei_Location_Center_X": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    )
+
+
+def test_to_lazy_returns_cytolazyframe(profiles: pd.DataFrame):
+    lazy = CytoDataFrame(profiles).to_lazy()
+    assert isinstance(lazy, CytoLazyFrame)
+    assert lazy.columns == list(profiles.columns)
+
+
+def test_lazy_filter_matches_pandas(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    lazy_result = (
+        cdf.to_lazy().filter(pl.col("Cells_AreaShape_Area") >= 30.0).collect()
+    )
+    pandas_result = profiles[profiles["Cells_AreaShape_Area"] >= 30.0]
+
+    assert isinstance(lazy_result, CytoDataFrame)
+    assert len(lazy_result) == len(pandas_result)
+    assert (
+        lazy_result["Cells_AreaShape_Area"].tolist()
+        == pandas_result["Cells_AreaShape_Area"].tolist()
+    )
+
+
+def test_lazy_eager_equivalence(profiles: pd.DataFrame):
+    """Lazy and eager polars execution produce identical results."""
+    cdf = CytoDataFrame(profiles)
+    lazy_df = (
+        cdf.to_lazy().filter(pl.col("Metadata_Well") == "B02").to_polars()
+    )
+    eager_df = cdf.to_polars().filter(pl.col("Metadata_Well") == "B02")
+    assert lazy_df.equals(eager_df)
+
+
+def test_lazy_select_features(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    result = cdf.to_lazy().select_features().collect()
+    # geometry column dropped; metadata + feature retained
+    assert "Nuclei_Location_Center_X" not in result.columns
+    assert "Cells_AreaShape_Area" in result.columns
+    assert "Metadata_Well" in result.columns
+
+
+def test_lazy_select_features_explicit_no_metadata(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    result = (
+        cdf.to_lazy()
+        .select_features(["Cells_AreaShape_Area"], keep_metadata=False)
+        .collect()
+    )
+    assert list(result.columns) == ["Cells_AreaShape_Area"]
+
+
+def test_lazy_group_by_agg(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    result = (
+        cdf.to_lazy()
+        .group_by("Metadata_Well")
+        .agg(pl.col("Cells_AreaShape_Area").sum().alias("total"))
+        .collect()
+    )
+    totals = dict(
+        zip(
+            result["Metadata_Well"].tolist(),
+            result["total"].tolist(),
+            strict=False,
+        )
+    )
+    expected = profiles.groupby("Metadata_Well")["Cells_AreaShape_Area"].sum()
+    assert totals["A01"] == expected["A01"]
+    assert totals["B02"] == expected["B02"]
+
+
+def test_lazy_join(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    annotations = pl.DataFrame(
+        {"Metadata_Well": ["A01", "B02"], "treatment": ["drug", "ctrl"]}
+    )
+    result = (
+        cdf.to_lazy().join(annotations, on="Metadata_Well", how="inner").collect()
+    )
+    assert "treatment" in result.columns
+    # only A01 (2 rows) + B02 (2 rows) survive the inner join
+    assert len(result) == 4
+
+
+def test_lazy_rename_and_drop(profiles: pd.DataFrame):
+    cdf = CytoDataFrame(profiles)
+    result = (
+        cdf.to_lazy()
+        .rename({"Cells_AreaShape_Area": "area"})
+        .drop("Nuclei_Location_Center_X")
+        .collect()
+    )
+    assert "area" in result.columns
+    assert "Nuclei_Location_Center_X" not in result.columns
+
+
+def test_lazy_to_arrow_and_polars(profiles: pd.DataFrame):
+    lazy = CytoDataFrame(profiles).to_lazy()
+    assert isinstance(lazy.to_arrow(), pa.Table)
+    assert isinstance(lazy.to_polars(), pl.DataFrame)
+    assert isinstance(lazy.to_pandas(), pd.DataFrame)
+
+
+def test_lazy_context_carry_through(profiles: pd.DataFrame, tmp_path: pathlib.Path):
+    """Image/display context survives a lazy pipeline into the collected frame."""
+    ctx_dir = str(tmp_path)
+    cdf = CytoDataFrame(
+        profiles,
+        data_context_dir=ctx_dir,
+        display_options={"width": 123},
+    )
+    collected = cdf.to_lazy().filter(pl.col("Metadata_Well") == "A01").collect()
+    assert collected._custom_attrs["data_context_dir"] == ctx_dir
+    assert collected._custom_attrs["display_options"] == {"width": 123}
+
+
+def test_scan_parquet_pipeline(profiles: pd.DataFrame, tmp_path: pathlib.Path):
+    out = tmp_path / "profiles.parquet"
+    profiles.to_parquet(out)
+    result = (
+        CytoDataFrame.scan_parquet(str(out), data_context_dir=str(tmp_path))
+        .filter(pl.col("Metadata_Well") == "A01")
+        .select_features()
+        .collect()
+    )
+    assert isinstance(result, CytoDataFrame)
+    assert len(result) == 2
+    assert result._custom_attrs["data_context_dir"] == str(tmp_path)
+
+
+def test_scan_parquet_returns_lazyframe(profiles: pd.DataFrame, tmp_path: pathlib.Path):
+    out = tmp_path / "profiles.parquet"
+    profiles.to_parquet(out)
+    scanned = CytoDataFrame.scan_parquet(str(out))
+    assert isinstance(scanned, CytoLazyFrame)
+    assert "CytoLazyFrame" in repr(scanned)
diff --git a/tests/test_schema.py b/tests/test_schema.py
new file mode 100644
index 0000000..e4cced9
--- /dev/null
+++ b/tests/test_schema.py
@@ -0,0 +1,199 @@
+"""
+Tests for the CytoDataFrame formal schema system (schema.py).
+
+Covers deterministic schema inference (differential against the hand-written
+classification rules), property-based invariants via Hypothesis, and the
+Arrow-native bounding-box / centroid struct helpers.
+"""
+
+import pandas as pd
+import polars as pl
+import pyarrow as pa
+import pytest
+from hypothesis import given
+from hypothesis import strategies as st
+
+from cytodataframe import CytoDataFrame, CytoSchema
+from cytodataframe.schema import add_bbox_struct, add_centroid_struct
+
+
+@pytest.fixture(name="cellprofiler_frame")
+def fixture_cellprofiler_frame() -> pd.DataFrame:
+    """A frame mirroring CellProfiler-style single-cell output."""
+    return pd.DataFrame(
+        {
+            "Metadata_Well": ["A01", "B02"],
+            "Metadata_Site": [1, 2],
+            "ImageNumber": [1, 1],
+            "ObjectNumber": [1, 2],
+            "Image_FileName_DNA": ["a.tif", "b.tif"],
+            "Image_PathName_DNA": ["/imgs", "/imgs"],
+            "Cells_AreaShape_Area": [100.0, 200.0],
+            "Cells_Intensity_MeanIntensity_DNA": [0.5, 0.6],
+            "Nuclei_Location_Center_X": [5.0, 6.0],
+            "Nuclei_Location_Center_Y": [7.0, 8.0],
+            "Cells_AreaShape_BoundingBoxMinimum_X": [0, 1],
+        }
+    )
+
+
+# --------------------------------------------------------------------------- #
+# Deterministic / differential classification
+# --------------------------------------------------------------------------- #
+def test_schema_classification_buckets(cellprofiler_frame: pd.DataFrame):
+    schema = CytoSchema.from_pandas(cellprofiler_frame)
+
+    assert schema.image_key == "Image_FileName_DNA"
+    assert schema.object_key == "ObjectNumber"
+
+    # Features are numeric measurement columns only.
+    assert set(schema.feature_columns) == {
+        "Cells_AreaShape_Area",
+        "Cells_Intensity_MeanIntensity_DNA",
+    }
+    # Geometry columns are spatial coordinates.
+    assert set(schema.geometry_columns) == {
+        "Nuclei_Location_Center_X",
+        "Nuclei_Location_Center_Y",
+        "Cells_AreaShape_BoundingBoxMinimum_X",
+    }
+    # Metadata holds identifiers + image references.
+    assert "Metadata_Well" in schema.metadata_columns
+    assert "Image_FileName_DNA" in schema.metadata_columns
+    assert "ObjectNumber" in schema.metadata_columns
+
+
+def test_schema_inference_matches_across_backends(cellprofiler_frame: pd.DataFrame):
+    """pandas, polars, and Arrow inference agree."""
+    from_pandas = CytoSchema.from_pandas(cellprofiler_frame).to_dict()
+    from_polars = CytoSchema.from_polars(
+        pl.from_pandas(cellprofiler_frame)
+    ).to_dict()
+    from_arrow = CytoSchema.from_arrow(
+        pa.Table.from_pandas(cellprofiler_frame, preserve_index=False).schema
+    ).to_dict()
+    assert from_pandas == from_polars == from_arrow
+
+
+def test_schema_infer_dispatch(cellprofiler_frame: pd.DataFrame):
+    table = pa.Table.from_pandas(cellprofiler_frame, preserve_index=False)
+    assert CytoSchema.infer(table).to_dict() == CytoSchema.infer(
+        cellprofiler_frame
+    ).to_dict()
+    assert CytoSchema.infer(pl.from_pandas(cellprofiler_frame).lazy()).to_dict() == (
+        CytoSchema.infer(cellprofiler_frame).to_dict()
+    )
+
+
+def test_schema_validate_and_require(cellprofiler_frame: pd.DataFrame):
+    schema = CytoSchema.from_pandas(cellprofiler_frame)
+    assert schema.validate() == []
+    assert schema.require("image_key", "object_key") is schema
+
+    bare = CytoSchema.from_columns(["Cells_AreaShape_Area"])
+    with pytest.raises(ValueError, match="missing required key"):
+        bare.require("image_key")
+
+
+def test_schema_validate_detects_overlap():
+    bad = CytoSchema(
+        feature_columns=["x"],
+        metadata_columns=["x"],
+    )
+    issues = bad.validate()
+    assert any("feature and metadata" in issue for issue in issues)
+    with pytest.raises(ValueError):
+        bad.validate(strict=True)
+
+
+def test_cytodataframe_cyto_schema_property(cellprofiler_frame: pd.DataFrame):
+    cdf = CytoDataFrame(cellprofiler_frame)
+    assert cdf.cyto_schema.image_key == "Image_FileName_DNA"
+
+
+# --------------------------------------------------------------------------- #
+# Property-based invariants
+# --------------------------------------------------------------------------- #
+_NAME_VOCAB = [
+    "Metadata_Well",
+    "Metadata_Plate",
+    "ImageNumber",
+    "ObjectNumber",
+    "Image_FileName_DNA",
+    "Image_PathName_DNA",
+    "Cells_AreaShape_Area",
+    "Nuclei_Intensity_MeanIntensity",
+    "Cells_AreaShape_BoundingBox_Minimum_X",
+    "Nuclei_Location_Center_X",
+    "RandomFeature_1",
+    "AnnotationLabel",
+]
+
+
+@given(
+    columns=st.lists(
+        st.sampled_from(_NAME_VOCAB), min_size=1, max_size=12, unique=True
+    ),
+    numeric_seed=st.lists(st.booleans(), min_size=12, max_size=12),
+)
+def test_schema_partition_invariants(columns: list, numeric_seed: list):
+    numeric = {
+        name: numeric_seed[idx % len(numeric_seed)]
+        for idx, name in enumerate(columns)
+    }
+    schema = CytoSchema.from_columns(columns, numeric=numeric)
+
+    meta = set(schema.metadata_columns)
+    feat = set(schema.feature_columns)
+    geom = set(schema.geometry_columns)
+
+    # Every column is classified into exactly one of the three buckets.
+    assert meta | feat | geom == set(columns)
+    assert meta.isdisjoint(feat)
+    assert feat.isdisjoint(geom)
+    assert meta.isdisjoint(geom)
+
+    # A non-numeric column is never treated as a feature.
+    for name in columns:
+        if not numeric[name]:
+            assert name not in feat
+
+
+# --------------------------------------------------------------------------- #
+# Arrow-native struct helpers (Phase 3)
+# --------------------------------------------------------------------------- #
+def test_add_bbox_struct_keeps_flattened():
+    df = pl.DataFrame(
+        {
+            "Cells_AreaShape_BoundingBoxMinimum_X": [0, 1],
+            "Cells_AreaShape_BoundingBoxMinimum_Y": [0, 1],
+            "Cells_AreaShape_BoundingBoxMaximum_X": [10, 11],
+            "Cells_AreaShape_BoundingBoxMaximum_Y": [10, 11],
+        }
+    )
+    out = add_bbox_struct(df)
+    assert "bbox" in out.columns
+    # flattened compatibility columns remain available
+    assert "Cells_AreaShape_BoundingBoxMinimum_X" in out.columns
+    struct = out["bbox"][0]
+    assert struct["min_x"] == 0
+    assert struct["max_y"] == 10
+
+
+def test_add_centroid_struct_xy():
+    df = pl.DataFrame(
+        {
+            "Nuclei_Location_Center_X": [5.0, 6.0],
+            "Nuclei_Location_Center_Y": [7.0, 8.0],
+        }
+    )
+    out = add_centroid_struct(df)
+    assert "centroid" in out.columns
+    assert out["centroid"][0]["x"] == 5.0
+    assert out["centroid"][0]["y"] == 7.0
+
+
+def test_struct_helpers_noop_without_columns():
+    df = pl.DataFrame({"a": [1, 2]})
+    assert add_bbox_struct(df).columns == ["a"]
+    assert add_centroid_struct(df).columns == ["a"]
diff --git a/uv.lock b/uv.lock
index cd3704a..92ceace 100644
--- a/uv.lock
+++ b/uv.lock
@@ -425,22 +425,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/51/aac7e419521d5519e13087a7198623655648c939822bd7f4bdc9ccbe07f9/botocore-1.42.42-py3-none-any.whl", hash = "sha256:1c9df5fc31e9073a9aa956271c4007d72f5d342cafca5f4154ea099bc6f83085", size = 14600186, upload-time = "2026-02-04T20:28:29.268Z" },
 ]
 
-[[package]]
-name = "bqplot"
-version = "0.12.45"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ipywidgets" },
-    { name = "numpy" },
-    { name = "pandas" },
-    { name = "traitlets" },
-    { name = "traittypes" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a3/e0/727335c5ff8cee68d21a8c79f5b8406011639a76ecd7a6462a60aa8b0608/bqplot-0.12.45.tar.gz", hash = "sha256:ede00e9fdf7d92e43cc2d1b9691c7da176b6216fdd187c8e92f19d7beaca5e2a", size = 1205882, upload-time = "2025-05-21T17:32:29.143Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/03/6b5370fc626e6f480c4a0b4cb25b3459d390745010618b21b4b573423a53/bqplot-0.12.45-py2.py3-none-any.whl", hash = "sha256:cf2e046adb401670902ab53a18d9f63540091279bc45c4ef281bfdadf6e7e92c", size = 1237450, upload-time = "2025-05-21T17:32:27.617Z" },
-]
-
 [[package]]
 name = "certifi"
 version = "2026.1.4"
@@ -779,17 +763,27 @@ dependencies = [
     { name = "imagecodecs" },
     { name = "imageio" },
     { name = "ipython" },
-    { name = "ipyvolume" },
     { name = "ipywidgets" },
-    { name = "matplotlib" },
-    { name = "nest-asyncio" },
-    { name = "ome-arrow" },
     { name = "opencv-python" },
     { name = "pandas" },
+    { name = "polars" },
     { name = "pyarrow" },
-    { name = "pyvista" },
-    { name = "pywavelets" },
     { name = "scikit-image" },
+]
+
+[package.optional-dependencies]
+all = [
+    { name = "ome-arrow" },
+    { name = "pyvista" },
+    { name = "trame" },
+    { name = "trame-vtk" },
+    { name = "trame-vuetify" },
+]
+ome = [
+    { name = "ome-arrow" },
+]
+viz3d = [
+    { name = "pyvista" },
     { name = "trame" },
     { name = "trame-vtk" },
     { name = "trame-vuetify" },
@@ -802,14 +796,20 @@ dev = [
     { name = "coverage" },
     { name = "duckdb" },
     { name = "httpcore" },
+    { name = "hypothesis" },
     { name = "isort" },
     { name = "jupyterlab" },
     { name = "jupyterlab-code-formatter" },
     { name = "jupytext" },
+    { name = "ome-arrow" },
     { name = "poethepoet" },
     { name = "pytest" },
     { name = "pytest-cov" },
+    { name = "pyvista" },
     { name = "sqlalchemy" },
+    { name = "trame" },
+    { name = "trame-vtk" },
+    { name = "trame-vuetify" },
 ]
 docs = [
     { name = "dunamai" },
@@ -822,24 +822,23 @@ docs = [
 
 [package.metadata]
 requires-dist = [
+    { name = "cytodataframe", extras = ["ome", "viz3d"], marker = "extra == 'all'" },
     { name = "imagecodecs", specifier = ">=2024.9.22,<2027" },
     { name = "imageio", specifier = ">=2.37,<3" },
     { name = "ipython", specifier = ">=8.12.3,<10" },
-    { name = "ipyvolume", specifier = ">=0.6.3,<0.7" },
     { name = "ipywidgets", specifier = ">=8.1.7,<9" },
-    { name = "matplotlib", specifier = ">=3.9.3,<4" },
-    { name = "nest-asyncio", specifier = ">=1.6,<2" },
-    { name = "ome-arrow", specifier = ">=0.0.3,<0.0.9" },
+    { name = "ome-arrow", marker = "extra == 'ome'", specifier = ">=0.0.3,<0.0.10" },
     { name = "opencv-python", specifier = ">=4.10.0.84,<5" },
     { name = "pandas", specifier = ">=2.2.2,<4" },
+    { name = "polars", specifier = ">=1,<2" },
     { name = "pyarrow", specifier = ">=16" },
-    { name = "pyvista", specifier = ">=0.46.4" },
-    { name = "pywavelets", specifier = ">1.4.1" },
+    { name = "pyvista", marker = "extra == 'viz3d'", specifier = ">=0.46.4" },
     { name = "scikit-image", specifier = ">0.19.3" },
-    { name = "trame", specifier = ">=3.12" },
-    { name = "trame-vtk", specifier = ">=2.10" },
-    { name = "trame-vuetify", specifier = ">=3.1" },
+    { name = "trame", marker = "extra == 'viz3d'", specifier = ">=3.12" },
+    { name = "trame-vtk", marker = "extra == 'viz3d'", specifier = ">=2.10" },
+    { name = "trame-vuetify", marker = "extra == 'viz3d'", specifier = ">=3.1" },
 ]
+provides-extras = ["ome", "viz3d", "all"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -848,20 +847,26 @@ dev = [
     { name = "coverage", specifier = ">=7.6,<8" },
     { name = "duckdb", specifier = ">=1.1.3,<2" },
     { name = "httpcore", specifier = ">=0.18,<1.1" },
+    { name = "hypothesis", specifier = ">=6,<7" },
     { name = "isort", specifier = ">=5.13.2,<9" },
     { name = "jupyterlab", specifier = ">=4.3,<5" },
     { name = "jupyterlab-code-formatter", specifier = ">=3.0.2,<4" },
     { name = "jupytext", specifier = ">=1.16.4,<2" },
-    { name = "poethepoet", specifier = ">=0.37,<0.43" },
+    { name = "ome-arrow", specifier = ">=0.0.3,<0.0.10" },
+    { name = "poethepoet", specifier = ">=0.37,<0.47" },
     { name = "pytest", specifier = ">=8.3.3,<10" },
     { name = "pytest-cov", specifier = ">=5,<8" },
+    { name = "pyvista", specifier = ">=0.46.4" },
     { name = "sqlalchemy", specifier = ">=1.3.6,<3" },
+    { name = "trame", specifier = ">=3.12" },
+    { name = "trame-vtk", specifier = ">=2.10" },
+    { name = "trame-vuetify", specifier = ">=3.1" },
 ]
 docs = [
     { name = "dunamai", specifier = ">=1.22,<2" },
     { name = "myst-nb", specifier = ">=1.1.2,<2" },
     { name = "myst-parser", specifier = ">=3,<6" },
-    { name = "pydata-sphinx-theme", specifier = ">=0.16,<0.17" },
+    { name = "pydata-sphinx-theme", specifier = ">=0.16,<0.19" },
     { name = "sphinx", specifier = ">=9,<9.1" },
     { name = "sphinx-multiversion", git = "https://github.com/J-RN/sphinx-multiversion?rev=a77f0c862dace3a62c18fc866da60ef7dde3873d" },
 ]
@@ -1315,6 +1320,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
+[[package]]
+name = "hypothesis"
+version = "6.155.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sortedcontainers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/aa/9a91a4addf285702a98713da44b3581799539426436617bfb8914478c166/hypothesis-6.155.6.tar.gz", hash = "sha256:7569e1897690336c85d49d8391b49ec6ab83d951009515bfc29faebbac286cf5", size = 478038, upload-time = "2026-06-19T13:21:23.379Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/a9/4c17e962c2e9cbc314bb579ed2e2b2da45d7b6b942aab6948d14d85abfea/hypothesis-6.155.6-py3-none-any.whl", hash = "sha256:a96d9a29f6bbc8ccac39dd84e140892da76765464929f401a4181b90c20c9ad1", size = 544521, upload-time = "2026-06-19T13:21:20.934Z" },
+]
+
 [[package]]
 name = "idna"
 version = "3.11"
@@ -1385,20 +1402,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
-[[package]]
-name = "ipydatawidgets"
-version = "4.3.5"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ipywidgets" },
-    { name = "numpy" },
-    { name = "traittypes" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/bc/88/332ba20bb0e0b8078f97bc1469f332be796b804c565b41163b93241e0657/ipydatawidgets-4.3.5.tar.gz", hash = "sha256:394f2489576587cfd755377a09a067f46cad22081965092021fd1abcbe7852a8", size = 799182, upload-time = "2023-06-14T11:16:06.587Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f1/5b/e63c877c4c94382b66de5045e08ec8cd960e8a4d22f0d62a4dfb1f9e5ac6/ipydatawidgets-4.3.5-py2.py3-none-any.whl", hash = "sha256:d590cdb7c364f2f6ab346f20b9d2dd661d27a834ef7845bc9d7113118f05ec87", size = 271703, upload-time = "2023-06-14T11:16:03.955Z" },
-]
-
 [[package]]
 name = "ipykernel"
 version = "7.2.0"
@@ -1457,62 +1460,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" },
 ]
 
-[[package]]
-name = "ipyvolume"
-version = "0.6.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "bqplot" },
-    { name = "ipyvue" },
-    { name = "ipyvuetify" },
-    { name = "ipywebrtc" },
-    { name = "ipywidgets" },
-    { name = "matplotlib" },
-    { name = "numpy" },
-    { name = "pillow" },
-    { name = "pythreejs" },
-    { name = "requests" },
-    { name = "traitlets" },
-    { name = "traittypes" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/bc/8c/560b41f231006d6b10749289aa33173268afc06cee92a77570d3fc4dff38/ipyvolume-0.6.3.tar.gz", hash = "sha256:823226f90a59ce08b1da2699a9ec505f34f65f01ce43accd80e7d3554082d035", size = 1596303, upload-time = "2023-06-02T14:33:08.671Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/ca/153406ca7ff41ea3ecf8c3b5c0db07364461e867fb197b1723bf0be2652d/ipyvolume-0.6.3-py3-none-any.whl", hash = "sha256:550761b5cc1a9fb0e8931056fd523b2f0074ddea46633a248f996168e5b0d7f6", size = 1612135, upload-time = "2023-06-02T14:33:05.246Z" },
-]
-
-[[package]]
-name = "ipyvue"
-version = "1.12.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ipywidgets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d2/37/7b66ea86cde30f4983566cbfb8bb133eed4d2252a7f0b941057855e666e7/ipyvue-1.12.0.tar.gz", hash = "sha256:408b5e6a64e203fc679f447a071e3dbc178ab2906982f248adf722fc84773ffa", size = 1749270, upload-time = "2026-02-11T10:07:43.884Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/be/cb0bd788bda9624a2facd270a6b1eef1b606bdeacee3a6c1cf9e79704afc/ipyvue-1.12.0-py2.py3-none-any.whl", hash = "sha256:c7f555a71c28724ceda344af294bdc48407eace17222065cfb7b4cff80665362", size = 2673161, upload-time = "2026-02-11T10:07:41.91Z" },
-]
-
-[[package]]
-name = "ipyvuetify"
-version = "1.11.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ipyvue" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/07/31c9615532b6c190a3033460e4aa83a64ac532281758ff734e1bc42e3c00/ipyvuetify-1.11.3.tar.gz", hash = "sha256:3580afa76d9add4ae04ccb7fd57d4a0cf03a261705742e7137def3ebb65ac71d", size = 6170730, upload-time = "2025-07-02T11:25:12.691Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/4d/fd1a6a888f8abb6b8dc316cc78b5153e75eff7ae66a94cf30b144fadd09d/ipyvuetify-1.11.3-py2.py3-none-any.whl", hash = "sha256:fa83aaf9f4ce669172d532094d60bd7c40d3cb9c5d6bb2f4a14565da2b09a8d8", size = 6290266, upload-time = "2025-07-02T11:25:10.553Z" },
-]
-
-[[package]]
-name = "ipywebrtc"
-version = "0.6.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/80/1f/7f603de52bb167eb37168c16dd5b0218cc3e336ef0538e178f0fbeff5e90/ipywebrtc-0.6.0.tar.gz", hash = "sha256:f8ac3cc02b3633b59f388aef67961cff57f90028fd303bb3886c63c3d631da13", size = 253863, upload-time = "2021-03-29T11:27:33.42Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/11/4b83894a009ef522b5751881e21ffec55d56b0900c0b788e2906ec01c51d/ipywebrtc-0.6.0-py2.py3-none-any.whl", hash = "sha256:01a6c9d79ab937c280ce4635a149c7b681457e99ea779c00c7a6aa44ee6916f8", size = 260745, upload-time = "2021-03-29T11:27:31.379Z" },
-]
-
 [[package]]
 name = "ipywidgets"
 version = "8.1.8"
@@ -2839,6 +2786,34 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/5e/0b83e0222ce5921b3f9081eeca8c6fb3e1cfd5ca0d06338adf93b28ce061/poethepoet-0.41.0-py3-none-any.whl", hash = "sha256:4bab9fd8271664c5d21407e8f12827daeb6aa484dc6cc7620f0c3b4e62b42ee4", size = 113590, upload-time = "2026-02-08T20:45:34.697Z" },
 ]
 
+[[package]]
+name = "polars"
+version = "1.41.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "polars-runtime-32" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/f9/aeda46259b0669247a160315d2d51269de9504b9dd2f70acadbcb22f46b7/polars-1.41.2.tar.gz", hash = "sha256:256d6731162371b77f3f29a55eacb8c0fc740ddb1a293a01d2ef5b5393c5c708", size = 737996, upload-time = "2026-05-29T17:39:15.604Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/22/28f62d24f7db56ac4343588f9362d49b7b4177e55ac47a466fe696b0099b/polars-1.41.2-py3-none-any.whl", hash = "sha256:23ce9a2910b6e3e8d4258770bf44aa17170958df7af6e85feedf4458a04d8d29", size = 833445, upload-time = "2026-05-29T17:37:05.576Z" },
+]
+
+[[package]]
+name = "polars-runtime-32"
+version = "1.41.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/56/54e3ea0e9b64f327179049e4742241cc6b1d3e8fa414b05a057dd26df367/polars_runtime_32-1.41.2.tar.gz", hash = "sha256:7af09ec1ab053da2c9669e8d15f809a4083a29be05db57111688b8051062af56", size = 2989474, upload-time = "2026-05-29T17:39:17.257Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d6/9b/fe72a3811c0357cdb06c67bdc7695fa1623ad47948fc523195f5ac31037f/polars_runtime_32-1.41.2-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:95a08346dac337357cdb825c8076df7d36da54c4caa59a5cb41d0a30691c5edd", size = 52265283, upload-time = "2026-05-29T17:37:09.407Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/93/fab9da803fd80d9e83ef88c20932f637a10bc611b20415fc322eec84bc44/polars_runtime_32-1.41.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:dedfaeec2c7f995298da7319dd9431d662e5dd1d0ec51b1459df4a0234ceff52", size = 46571222, upload-time = "2026-05-29T17:37:13.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/2a/8843f34a8ac57acd058a39b87b03b580dd352a490e9dae0415e02033bdd4/polars_runtime_32-1.41.2-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18eea22c5cc34e27f8a60950458ad81e6a9ea75e89363ca1367e14e7e7f781fc", size = 50409372, upload-time = "2026-05-29T17:37:17.875Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/c6/92b352fe88cf51bd0a19fb99e1c0cbe46aa26c14dcf7995b89869cd932ae/polars_runtime_32-1.41.2-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2630540dfdfb0f36f9b04a07c7c2e3f50bf2ad384113263c1c812007ee9141e0", size = 56405484, upload-time = "2026-05-29T17:37:22.684Z" },
+    { url = "https://files.pythonhosted.org/packages/74/c4/bae3174c3b02f6b441d2e58594387abcd509f67a098f682a83b195f08966/polars_runtime_32-1.41.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:20e969e08f9b137e233c04cc04de73d9795f89eb77d34854e40a025965a43763", size = 50603512, upload-time = "2026-05-29T17:37:27.422Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/ed/f2d26ae02d92c2689056838ed59e2a626326ad23c2831d58637d25f6c82a/polars_runtime_32-1.41.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e7016a3deb641b64a31447abbbee0f34bd020a6a9ae34ee6b743837def15e2a4", size = 54328561, upload-time = "2026-05-29T17:37:32.587Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/c4/9c3831cc885dc7769e59abf8f583821a5fb4403fd0e4eba0ccc6d47a3d4b/polars_runtime_32-1.41.2-cp310-abi3-win_amd64.whl", hash = "sha256:1e5e5377c315e0dcafdfb2a31adc546abbaeb3f9cb1864e6536523d2af473265", size = 51978643, upload-time = "2026-05-29T17:37:37.443Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c6/79e9f3f270270d7ed5575d92b7bfef49f01abd9275447161275b23b553a8/polars_runtime_32-1.41.2-cp310-abi3-win_arm64.whl", hash = "sha256:843d96f69d18eca53429c1198e58891db7f18111f83b9c419bb45ad9d73eaed5", size = 46006901, upload-time = "2026-05-29T17:37:42.522Z" },
+]
+
 [[package]]
 name = "pooch"
 version = "1.9.0"
@@ -3212,21 +3187,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
 ]
 
-[[package]]
-name = "pythreejs"
-version = "2.4.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ipydatawidgets" },
-    { name = "ipywidgets" },
-    { name = "numpy" },
-    { name = "traitlets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0a/2e/0ec94286b8eb3fe1200700080e8adb2c8d871bb8db589858a49600d97a7d/pythreejs-2.4.2.tar.gz", hash = "sha256:a568bfdc4c3797c4c2339158928edc7dcf6fa4a267b08e3cec5121e2078b5bd6", size = 4731310, upload-time = "2023-02-20T00:23:30.081Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d8/8b/e2bbeb42068f0c48899e8eddd34902afc0f7429d4d2a152d2dc2670dc661/pythreejs-2.4.2-py3-none-any.whl", hash = "sha256:8418807163ad91f4df53b58c4e991b26214852a1236f28f1afeaadf99d095818", size = 3363905, upload-time = "2023-02-20T00:23:27.283Z" },
-]
-
 [[package]]
 name = "pytokens"
 version = "0.4.1"
@@ -3279,49 +3239,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/c1/c8efb5c0696fa3f7f7c424234dc08fa1e0ecc2292c53500090d93c81a648/pyvista-0.47.0-py3-none-any.whl", hash = "sha256:35d9b003d3bfac709da5b76dd264919b6847c469be08283d3295833f6a7ea657", size = 2508448, upload-time = "2026-02-08T20:21:00.614Z" },
 ]
 
-[[package]]
-name = "pywavelets"
-version = "1.9.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "numpy" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5a/75/50581633d199812205ea8cdd0f6d52f12a624886b74bf1486335b67f01ff/pywavelets-1.9.0.tar.gz", hash = "sha256:148d12203377772bea452a59211d98649c8ee4a05eff019a9021853a36babdc8", size = 3938340, upload-time = "2025-08-04T16:20:04.978Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/8b/ca700d0c174c3a4eec1fbb603f04374d1fed84255c2a9f487cfaa749c865/pywavelets-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:54662cce4d56f0d6beaa6ebd34b2960f3aa4a43c83c9098a24729e9dc20a4be2", size = 4323640, upload-time = "2025-08-04T16:18:51.683Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/f3/0fa57b6407ea9c4452b0bc182141256b9481b479ffbfc9d7fdb73afe193b/pywavelets-1.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d8ed4b4d1eab9347e8fe0c5b45008ce5a67225ce5b05766b8b1fa923a5f8b34", size = 4294938, upload-time = "2025-08-04T16:18:53.818Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/95/a998313c8459a57e488ff2b18e24be9e836aedda3aa3a1673197deeaa59a/pywavelets-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:862be65481fdfecfd84c6b0ca132ba571c12697a082068921bca5b5e039f1371", size = 4472829, upload-time = "2025-08-04T16:18:55.508Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/8c/f316a153f7f89d2753df8a7371d15d0faab87e709fe02715dbc297c79385/pywavelets-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d76b7fa8fc500b09201d689b4f15bf5887e30ffbe2e1f338eb8470590eb4521a", size = 4524936, upload-time = "2025-08-04T16:18:57.146Z" },
-    { url = "https://files.pythonhosted.org/packages/24/f7/89fdc1caef4b384a341a8e149253e23f36c1702bbb986a26123348624854/pywavelets-1.9.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa859d0b686a697c87a47e29319aebe44125f114a4f8c7e444832b921f52de5a", size = 4481475, upload-time = "2025-08-04T16:18:58.725Z" },
-    { url = "https://files.pythonhosted.org/packages/82/53/b733fbfb71853e4a5c430da56e325a763562d65241dd785f0fadb67aed6a/pywavelets-1.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20e97b84a263003e2c7348bcf72beba96edda1a6169f072dc4e4d4ee3a6c7368", size = 4527994, upload-time = "2025-08-04T16:18:59.917Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/15/5f6a6e9fdad8341e42642ed622a5f3033da4ea9d426cc3e574ae418b4726/pywavelets-1.9.0-cp311-cp311-win32.whl", hash = "sha256:f8330cdbfa506000e63e79525716df888998a76414c5cd6ecd9a7e371191fb05", size = 4136109, upload-time = "2025-08-04T16:19:01.511Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/33/62dbb4aea86ec9d79b283127c42cc896f4d4ff265a9aeb1337a7836dd550/pywavelets-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:ed10959a17df294ef55948dcc76367d59ec7b6aad67e38dd4e313d2fe3ad47b2", size = 4228321, upload-time = "2025-08-04T16:19:03.164Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/37/3fda13fb2518fdd306528382d6b18c116ceafefff0a7dccd28f1034f4dd2/pywavelets-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:30baa0788317d3c938560c83fe4fc43817342d06e6c9662a440f73ba3fb25c9b", size = 4320835, upload-time = "2025-08-04T16:19:04.855Z" },
-    { url = "https://files.pythonhosted.org/packages/36/65/a5549325daafc3eae4b52de076798839eaf529a07218f8fb18cccefe76a1/pywavelets-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:df7436a728339696a7aa955c020ae65c85b0d9d2b5ff5b4cf4551f5d4c50f2c7", size = 4290469, upload-time = "2025-08-04T16:19:06.178Z" },
-    { url = "https://files.pythonhosted.org/packages/05/85/901bb756d37dfa56baa26ef4a3577aecfe9c55f50f51366fede322f8c91d/pywavelets-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:07b26526db2476974581274c43a9c2447c917418c6bd03c8d305ad2a5cd9fac3", size = 4437717, upload-time = "2025-08-04T16:19:07.514Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/34/0f54dd9c288941294898877008bcb5c07012340cc9c5db9cff1bd185d449/pywavelets-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:573b650805d2f3c981a0e5ae95191c781a722022c37a0f6eba3fa7eae8e0ee17", size = 4483843, upload-time = "2025-08-04T16:19:08.857Z" },
-    { url = "https://files.pythonhosted.org/packages/48/1f/cff6bb4ea64ff508d8cac3fe113c0aa95310a7446d9efa6829027cc2afdf/pywavelets-1.9.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3747ec804492436de6e99a7b6130480e53406d047e87dc7095ab40078a515a23", size = 4442236, upload-time = "2025-08-04T16:19:11.061Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/53/a3846eeefe0fb7ca63ae045f038457aa274989a15af793c1b824138caf98/pywavelets-1.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5163665686219c3f43fd5bbfef2391e87146813961dad0f86c62d4aed561f547", size = 4488077, upload-time = "2025-08-04T16:19:12.333Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/98/44852d2fe94455b72dece2db23562145179d63186a1c971125279a1c381f/pywavelets-1.9.0-cp312-cp312-win32.whl", hash = "sha256:80b8ab99f5326a3e724f71f23ba8b0a5b03e333fa79f66e965ea7bed21d42a2f", size = 4134094, upload-time = "2025-08-04T16:19:13.564Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/a7/0d9ee3fe454d606e0f5c8e3aebf99d2ecddbfb681826a29397729538c8f1/pywavelets-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:92bfb8a117b8c8d3b72f2757a85395346fcbf37f50598880879ae72bd8e1c4b9", size = 4213900, upload-time = "2025-08-04T16:19:14.939Z" },
-    { url = "https://files.pythonhosted.org/packages/db/a7/dec4e450675d62946ad975f5b4d924437df42d2fae46e91dfddda2de0f5a/pywavelets-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:74f8455c143818e4b026fc67b27fd82f38e522701b94b8a6d1aaf3a45fcc1a25", size = 4316201, upload-time = "2025-08-04T16:19:16.259Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/0c/b54b86596c0df68027e48c09210e907e628435003e77048384a2dd6767e3/pywavelets-1.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c50320fe0a4a23ddd8835b3dc9b53b09ee05c7cc6c56b81d0916f04fc1649070", size = 4286838, upload-time = "2025-08-04T16:19:17.92Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/9c/333969c3baad8af2e7999e83addcb7bb1d1fd48e2d812fb27e2e89582cb1/pywavelets-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d6e059265223ed659e5214ab52a84883c88ddf3decbf08d7ec6abb8e4c5ed7be", size = 4430753, upload-time = "2025-08-04T16:19:19.529Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/1b/a24c6ff03b026b826ad7b9267bd63cd34ce026795a0302f8a5403840b8e7/pywavelets-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ae10ed46c139c7ddb8b1249cfe0989f8ccb610d93f2899507b1b1573a0e424b5", size = 4491315, upload-time = "2025-08-04T16:19:20.717Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/c7/e3fbb502fca3469e51ced4f1e1326364c338be91edc5db5a8ddd26b303fa/pywavelets-1.9.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8f8b1cc2df012401cb837ee6fa2f59607c7b4fe0ff409d9a4f6906daf40dc86", size = 4437654, upload-time = "2025-08-04T16:19:22.359Z" },
-    { url = "https://files.pythonhosted.org/packages/92/44/c9b25084048d9324881a19b88e0969a4141bcfdc1d218f1b4b680b7af1c1/pywavelets-1.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:db43969c7a8fbb17693ecfd14f21616edc3b29f0e47a49b32fa4127c01312a67", size = 4496435, upload-time = "2025-08-04T16:19:23.842Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/b6/b27ec18c72b1dee3314e297af39c5f8136d43cc130dd93cb6c178ca820e5/pywavelets-1.9.0-cp313-cp313-win32.whl", hash = "sha256:9e7d60819d87dcd6c68a2d1bc1d37deb1f4d96607799ab6a25633ea484dcda41", size = 4132709, upload-time = "2025-08-04T16:19:25.415Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/87/78ef3f9fb36cdb16ee82371d22c3a7c89eeb79ec8c9daef6222060da6c79/pywavelets-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:0d70da9d7858c869e24dc254f16a61dc09d8a224cad85a10c393b2eccddeb126", size = 4213377, upload-time = "2025-08-04T16:19:26.875Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/cd/ca0d9db0ff29e3843f6af60c2f5eb588794e05ca8eeb872a595867b1f3f5/pywavelets-1.9.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dc85f44c38d76a184a1aa2cb038f802c3740428c9bb877525f4be83a223b134", size = 4354336, upload-time = "2025-08-04T16:19:28.745Z" },
-    { url = "https://files.pythonhosted.org/packages/82/d6/70afefcc1139f37d02018a3b1dba3b8fc87601bb7707d9616b7f7a76e269/pywavelets-1.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7acf6f950c6deaecd210fbff44421f234a8ca81eb6f4da945228e498361afa9d", size = 4335721, upload-time = "2025-08-04T16:19:30.371Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/3a/713f731b9ed6df0c36269c8fb62be8bb28eb343b9e26b13d6abda37bce38/pywavelets-1.9.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:144d4fc15c98da56654d0dca2d391b812b8d04127b194a37ad4a497f8e887141", size = 4418702, upload-time = "2025-08-04T16:19:31.743Z" },
-    { url = "https://files.pythonhosted.org/packages/44/e8/f801eb4b5f7a316ba20054948c5d6b27b879c77fab2674942e779974bd86/pywavelets-1.9.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1aa3729585408a979d655736f74b995b511c86b9be1544f95d4a3142f8f4b8b5", size = 4470023, upload-time = "2025-08-04T16:19:32.963Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/cc/44b002cb16f2a392f2082308dd470b3f033fa4925d3efa7c46f790ce895a/pywavelets-1.9.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e0e24ad6b8eb399c49606dd1fcdcbf9749ad7f6d638be3fe6f59c1f3098821e2", size = 4426498, upload-time = "2025-08-04T16:19:34.151Z" },
-    { url = "https://files.pythonhosted.org/packages/91/fe/2b70276ede7878c5fe8356ca07574db5da63e222ce39a463e84bfad135e8/pywavelets-1.9.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3830e6657236b53a3aae20c735cccead942bb97c54bbca9e7d07bae01645fe9c", size = 4477528, upload-time = "2025-08-04T16:19:35.932Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/ed/d58b540c15e36508cfeded7b0d39493e811b0dce18d9d4e6787fb2e89685/pywavelets-1.9.0-cp313-cp313t-win32.whl", hash = "sha256:81bb65facfbd7b50dec50450516e72cdc51376ecfdd46f2e945bb89d39bfb783", size = 4186493, upload-time = "2025-08-04T16:19:37.198Z" },
-    { url = "https://files.pythonhosted.org/packages/84/b2/12a849650d618a86bbe4d8876c7e20a7afe59a8cad6f49c57eca9af26dfa/pywavelets-1.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:47d52cf35e2afded8cfe1133663f6f67106a3220b77645476ae660ad34922cb4", size = 4274821, upload-time = "2025-08-04T16:19:38.436Z" },
-]
-
 [[package]]
 name = "pywinpty"
 version = "3.0.3"
@@ -4087,18 +4004,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" },
 ]
 
-[[package]]
-name = "traittypes"
-version = "0.2.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "traitlets" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/8d/37d686f52dfbccc47b857751531ffdec262b0f35158dd3b306030dafdb83/traittypes-0.2.3.tar.gz", hash = "sha256:212feed38d566d772648768b78d3347c148ef23915b91c02078188e631316c86", size = 16003, upload-time = "2025-10-22T11:06:09.952Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/c0/fdf9d3ee103ce66a55f0532835ad5e154226c5222423c6636ba049dc42fc/traittypes-0.2.3-py2.py3-none-any.whl", hash = "sha256:49016082ce740d6556d9bb4672ee2d899cd14f9365f17cbb79d5d96b47096d4e", size = 8130, upload-time = "2025-10-22T11:06:08.824Z" },
-]
-
 [[package]]
 name = "trame"
 version = "3.12.0"

From 16e39778e977aa6d24ffff4dac3ad3b12c81201f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Fri, 19 Jun 2026 17:02:28 +0000
Subject: [PATCH 2/3] [pre-commit.ci lite] apply automatic fixes

---
 .pre-commit-config.yaml     |  4 ++--
 pyproject.toml              | 18 +++++++++---------
 src/cytodataframe/engine.py |  4 +---
 src/cytodataframe/schema.py | 13 ++++---------
 tests/test_lazy.py          | 12 +++---------
 tests/test_schema.py        | 14 ++++++--------
 6 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index abcf4db..2542871 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: check-yaml
     -   id: detect-private-key
 -   repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "v2.23.0"
+    rev: "v2.25.0"
     hooks:
     -   id: pyproject-fmt
 -   repo: https://github.com/codespell-project/codespell
@@ -50,7 +50,7 @@ repos:
     hooks:
     -   id: actionlint
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.15.15"
+    rev: "v0.15.18"
     hooks:
     -   id: ruff-format
     -   id: ruff-check
diff --git a/pyproject.toml b/pyproject.toml
index 5186635..4a475ba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,9 @@ dependencies = [
   "pyarrow>=16",
   "scikit-image>0.19.3",
 ]
+optional-dependencies.all = [
+  "cytodataframe[ome,viz3d]",
+]
 # Optional feature stacks. Install with e.g. `pip install cytodataframe[viz3d,ome]`.
 # These are imported lazily, so the core package imports fine without them.
 optional-dependencies.ome = [
@@ -43,9 +46,6 @@ optional-dependencies.viz3d = [
   "trame-vtk>=2.10",
   "trame-vuetify>=3.1",
 ]
-optional-dependencies.all = [
-  "cytodataframe[ome,viz3d]",
-]
 
 [dependency-groups]
 dev = [
@@ -59,13 +59,13 @@ dev = [
   "jupyterlab>=4.3,<5",
   "jupyterlab-code-formatter>=3.0.2,<4",
   "jupytext>=1.16.4,<2",
+  # optional-feature stacks needed to exercise the full test suite
+  "ome-arrow>=0.0.3,<0.0.10",
   "poethepoet>=0.37,<0.47",
   "pytest>=8.3.3,<10",
   "pytest-cov>=5,<8",
-  "sqlalchemy>=1.3.6,<3",
-  # optional-feature stacks needed to exercise the full test suite
-  "ome-arrow>=0.0.3,<0.0.10",
   "pyvista>=0.46.4",
+  "sqlalchemy>=1.3.6,<3",
   "trame>=3.12",
   "trame-vtk>=2.10",
   "trame-vuetify>=3.1",
@@ -80,14 +80,14 @@ docs = [
 ]
 
 [tool.setuptools]
-package-dir = { "" = "src" }
 packages.find.where = [ "src" ]
+package-dir = { "" = "src" }
 
 [tool.setuptools_scm]
-root = "."
+version_file = "src/cytodataframe/_version.py"
 version_scheme = "no-guess-dev"
 local_scheme = "no-local-version"
-version_file = "src/cytodataframe/_version.py"
+root = "."
 
 [tool.uv]
 default-groups = [
diff --git a/src/cytodataframe/engine.py b/src/cytodataframe/engine.py
index 3b20b80..ecd1e92 100644
--- a/src/cytodataframe/engine.py
+++ b/src/cytodataframe/engine.py
@@ -197,9 +197,7 @@ def normalize_to_pandas(data: TabularData) -> pd.DataFrame:
     return to_pandas(data)
 
 
-def scan_parquet(
-    source: Union[str, pathlib.Path], **kwargs: Any
-) -> "pl.LazyFrame":
+def scan_parquet(source: Union[str, pathlib.Path], **kwargs: Any) -> "pl.LazyFrame":
     """
     Lazily scan a Parquet file/dataset into a :class:`polars.LazyFrame`.
 
diff --git a/src/cytodataframe/schema.py b/src/cytodataframe/schema.py
index 5ec5982..cc19a5f 100644
--- a/src/cytodataframe/schema.py
+++ b/src/cytodataframe/schema.py
@@ -75,8 +75,7 @@
 def _is_image_column(name: str) -> bool:
     """Return True when a column name references an image filename or path."""
     return bool(
-        _IMAGE_FILENAME_PATTERN.search(name)
-        or _IMAGE_PATHNAME_PATTERN.search(name)
+        _IMAGE_FILENAME_PATTERN.search(name) or _IMAGE_PATHNAME_PATTERN.search(name)
     )
 
 
@@ -300,13 +299,11 @@ def validate(self, strict: bool = False) -> List[str]:
         overlap_fg = feature_set & geometry_set
         if overlap_fm:
             issues.append(
-                f"Columns classified as both feature and metadata: "
-                f"{sorted(overlap_fm)}"
+                f"Columns classified as both feature and metadata: {sorted(overlap_fm)}"
             )
         if overlap_fg:
             issues.append(
-                f"Columns classified as both feature and geometry: "
-                f"{sorted(overlap_fg)}"
+                f"Columns classified as both feature and geometry: {sorted(overlap_fg)}"
             )
         if self.image_key is not None and self.image_key not in metadata_set:
             issues.append(
@@ -328,9 +325,7 @@ def require(self, *keys: str) -> "CytoSchema":
         """
         missing = [key for key in keys if getattr(self, key, None) is None]
         if missing:
-            raise ValueError(
-                f"CytoSchema is missing required key(s): {missing}"
-            )
+            raise ValueError(f"CytoSchema is missing required key(s): {missing}")
         return self
 
     def to_dict(self) -> dict:
diff --git a/tests/test_lazy.py b/tests/test_lazy.py
index daa1aa9..c5af565 100644
--- a/tests/test_lazy.py
+++ b/tests/test_lazy.py
@@ -35,9 +35,7 @@ def test_to_lazy_returns_cytolazyframe(profiles: pd.DataFrame):
 
 def test_lazy_filter_matches_pandas(profiles: pd.DataFrame):
     cdf = CytoDataFrame(profiles)
-    lazy_result = (
-        cdf.to_lazy().filter(pl.col("Cells_AreaShape_Area") >= 30.0).collect()
-    )
+    lazy_result = cdf.to_lazy().filter(pl.col("Cells_AreaShape_Area") >= 30.0).collect()
     pandas_result = profiles[profiles["Cells_AreaShape_Area"] >= 30.0]
 
     assert isinstance(lazy_result, CytoDataFrame)
@@ -51,9 +49,7 @@ def test_lazy_filter_matches_pandas(profiles: pd.DataFrame):
 def test_lazy_eager_equivalence(profiles: pd.DataFrame):
     """Lazy and eager polars execution produce identical results."""
     cdf = CytoDataFrame(profiles)
-    lazy_df = (
-        cdf.to_lazy().filter(pl.col("Metadata_Well") == "B02").to_polars()
-    )
+    lazy_df = cdf.to_lazy().filter(pl.col("Metadata_Well") == "B02").to_polars()
     eager_df = cdf.to_polars().filter(pl.col("Metadata_Well") == "B02")
     assert lazy_df.equals(eager_df)
 
@@ -102,9 +98,7 @@ def test_lazy_join(profiles: pd.DataFrame):
     annotations = pl.DataFrame(
         {"Metadata_Well": ["A01", "B02"], "treatment": ["drug", "ctrl"]}
     )
-    result = (
-        cdf.to_lazy().join(annotations, on="Metadata_Well", how="inner").collect()
-    )
+    result = cdf.to_lazy().join(annotations, on="Metadata_Well", how="inner").collect()
     assert "treatment" in result.columns
     # only A01 (2 rows) + B02 (2 rows) survive the inner join
     assert len(result) == 4
diff --git a/tests/test_schema.py b/tests/test_schema.py
index e4cced9..f3dbbc2 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -66,9 +66,7 @@ def test_schema_classification_buckets(cellprofiler_frame: pd.DataFrame):
 def test_schema_inference_matches_across_backends(cellprofiler_frame: pd.DataFrame):
     """pandas, polars, and Arrow inference agree."""
     from_pandas = CytoSchema.from_pandas(cellprofiler_frame).to_dict()
-    from_polars = CytoSchema.from_polars(
-        pl.from_pandas(cellprofiler_frame)
-    ).to_dict()
+    from_polars = CytoSchema.from_polars(pl.from_pandas(cellprofiler_frame)).to_dict()
     from_arrow = CytoSchema.from_arrow(
         pa.Table.from_pandas(cellprofiler_frame, preserve_index=False).schema
     ).to_dict()
@@ -77,9 +75,10 @@ def test_schema_inference_matches_across_backends(cellprofiler_frame: pd.DataFra
 
 def test_schema_infer_dispatch(cellprofiler_frame: pd.DataFrame):
     table = pa.Table.from_pandas(cellprofiler_frame, preserve_index=False)
-    assert CytoSchema.infer(table).to_dict() == CytoSchema.infer(
-        cellprofiler_frame
-    ).to_dict()
+    assert (
+        CytoSchema.infer(table).to_dict()
+        == CytoSchema.infer(cellprofiler_frame).to_dict()
+    )
     assert CytoSchema.infer(pl.from_pandas(cellprofiler_frame).lazy()).to_dict() == (
         CytoSchema.infer(cellprofiler_frame).to_dict()
     )
@@ -138,8 +137,7 @@ def test_cytodataframe_cyto_schema_property(cellprofiler_frame: pd.DataFrame):
 )
 def test_schema_partition_invariants(columns: list, numeric_seed: list):
     numeric = {
-        name: numeric_seed[idx % len(numeric_seed)]
-        for idx, name in enumerate(columns)
+        name: numeric_seed[idx % len(numeric_seed)] for idx, name in enumerate(columns)
     }
     schema = CytoSchema.from_columns(columns, numeric=numeric)
 

From f7a15d62d6888622bae140f74521037e5e385dfd Mon Sep 17 00:00:00 2001
From: d33bs <ekgto445@gmail.com>
Date: Fri, 19 Jun 2026 12:09:01 -0600
Subject: [PATCH 3/3] address coderabbit review

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 62a1c50..a4c82ea 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ With CytoDataFrame you can:
 - Automatically detect 3D image volumes and render interactive [trame](https://github.com/Kitware/trame) views in notebooks when 3D dependencies are installed (with graceful fallback otherwise).
 - Interoperate with the [Polars](https://pola.rs/) and [Apache Arrow](https://arrow.apache.org/) ecosystems while keeping the familiar Pandas-based experience.
 
-### Polars and Arrow interoperability
+## Polars and Arrow interoperability
 
 CytoDataFrame uses Apache Arrow as its canonical schema/interchange contract and
 Polars as an execution engine, while Pandas remains the compatibility layer. You