diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 89448a3..06b87a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: check-yaml - id: detect-private-key - repo: https://github.com/tox-dev/pyproject-fmt - rev: "v2.18.1" + rev: "v2.19.0" hooks: - id: pyproject-fmt - repo: https://github.com/codespell-project/codespell @@ -50,7 +50,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.15.5" + rev: "v0.15.6" hooks: - id: ruff-format - id: ruff-check diff --git a/README.md b/README.md index dff1373..3c97656 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,12 @@ For 3D notebook display behavior: - Disable automatic trame switching with `display_options={"auto_trame_for_3d": False}`. - Force trame layout regardless of auto-detection with `display_options={"view": "trame"}`. +For row display in notebook/widget tables: + +- CytoDataFrame respects pandas display settings (`display.max_rows`, `display.min_rows`). +- When the table is larger than `display.max_rows`, the widget table inserts a midpoint ellipsis row (`…`) to indicate omitted rows. +- You can control truncation behavior by changing pandas display options before rendering. + 📓 ___Want to see CytoDataFrame in action?___ Check out our [example notebook](docs/src/examples/cytodataframe_at_a_glance.ipynb) for a quick tour of its key features. > ✨ CytoDataFrame development began within **[coSMicQC](https://github.com/cytomining/coSMicQC)** - a single-cell profile quality control package. diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index ffaf8c0..7b07009 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,19 +49,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 818 ms, sys: 481 ms, total: 1.3 s\n", - "Wall time: 614 ms\n" + "CPU times: user 919 ms, sys: 636 ms, total: 1.56 s\n", + "Wall time: 554 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "909d4d7bf43c4542a06cc68f61236ff1", + "model_id": "b4a17a473aca47ffaedfde71d4647b65", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -76,6 +88,7 @@ " \n", " Metadata_ImageNumber\n", " Cells_Number_Object_Number\n", + " Nuclei_Texture_Variance_RNA_5_03_256\n", " Image_FileName_OrigAGP\n", " Image_FileName_OrigDNA\n", " Image_FileName_OrigRNA\n", @@ -86,6 +99,7 @@ " 0\n", " 1\n", " 1\n", + " 106.035972\n", " \n", " \n", " \n", @@ -94,6 +108,7 @@ " 1\n", " 1\n", " 2\n", + " 33.590487\n", " \n", " \n", " \n", @@ -102,6 +117,7 @@ " 2\n", " 1\n", " 3\n", + " 53.527363\n", " \n", " \n", " \n", @@ -135,6 +151,7 @@ " [\n", " \"Metadata_ImageNumber\",\n", " \"Cells_Number_Object_Number\",\n", + " \"Nuclei_Texture_Variance_RNA_5_03_256\",\n", " \"Image_FileName_OrigAGP\",\n", " \"Image_FileName_OrigDNA\",\n", " \"Image_FileName_OrigRNA\",\n", @@ -155,19 +172,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 824 ms, sys: 485 ms, total: 1.31 s\n", - "Wall time: 509 ms\n" + "CPU times: user 891 ms, sys: 588 ms, total: 1.48 s\n", + "Wall time: 516 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5724204c102e474f870b0d5e41bfd9be", + "model_id": "a0484aa10f164b59a4e96e0571d0dab4", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -260,19 +289,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 795 ms, sys: 548 ms, total: 1.34 s\n", - "Wall time: 447 ms\n" + "CPU times: user 868 ms, sys: 633 ms, total: 1.5 s\n", + "Wall time: 466 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8fce594f507a4fddb4e25b6abcf92b27", + "model_id": "aa29092edf7e45899f5223330f7bcf4d", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -366,19 +407,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 929 ms, sys: 548 ms, total: 1.48 s\n", - "Wall time: 599 ms\n" + "CPU times: user 930 ms, sys: 646 ms, total: 1.58 s\n", + "Wall time: 536 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "537bcb8beb6b440997e05c65ad2c6712", + "model_id": "98737647c2334289b67261b60bad7774", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -481,19 +534,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 845 ms, sys: 523 ms, total: 1.37 s\n", - "Wall time: 514 ms\n" + "CPU times: user 913 ms, sys: 661 ms, total: 1.57 s\n", + "Wall time: 502 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "22f9247a52ca475a8d037d6227740144", + "model_id": "57cc8f5611a1442ca8b54cee04f85ede", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -585,19 +650,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 810 ms, sys: 573 ms, total: 1.38 s\n", - "Wall time: 466 ms\n" + "CPU times: user 769 ms, sys: 481 ms, total: 1.25 s\n", + "Wall time: 468 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ac50878f3f445c1b5c4f1479ea5aa8d", + "model_id": "bd646e0f7bcc46348a62532baafdac12", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -691,19 +768,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 836 ms, sys: 533 ms, total: 1.37 s\n", - "Wall time: 515 ms\n" + "CPU times: user 951 ms, sys: 652 ms, total: 1.6 s\n", + "Wall time: 543 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a036410bfb374539ad0c69f52481d0e3", + "model_id": "51dcf433475b4b6c96851aad825def8f", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -796,19 +885,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 838 ms, sys: 489 ms, total: 1.33 s\n", - "Wall time: 520 ms\n" + "CPU times: user 935 ms, sys: 661 ms, total: 1.6 s\n", + "Wall time: 531 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9bf2fe6deec0442a9ac320ca7c35a505", + "model_id": "e5ecbebda5784d219b52915e78ec9286", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -918,19 +1019,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 504 ms, sys: 88.6 ms, total: 592 ms\n", - "Wall time: 604 ms\n" + "CPU times: user 508 ms, sys: 94.3 ms, total: 603 ms\n", + "Wall time: 629 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1a11d9d982e04fdd80720ec29ee86720", + "model_id": "d97cd74ad34441a5b7e09b7334a05496", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1050,19 +1163,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 848 ms, sys: 515 ms, total: 1.36 s\n", - "Wall time: 514 ms\n" + "CPU times: user 754 ms, sys: 305 ms, total: 1.06 s\n", + "Wall time: 529 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b91a36f356a245dda81ecf3edd717507", + "model_id": "d4e108120e0949fc9fdf4831aa91cf70", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1179,19 +1304,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 302 ms, sys: 157 ms, total: 459 ms\n", - "Wall time: 211 ms\n" + "CPU times: user 296 ms, sys: 130 ms, total: 425 ms\n", + "Wall time: 214 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18fd9eca9a0a4505b329af0c761f702f", + "model_id": "6592e9bfe80043bc8759a66d9cc9ace6", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1282,19 +1419,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 239 ms, sys: 163 ms, total: 402 ms\n", - "Wall time: 145 ms\n" + "CPU times: user 265 ms, sys: 180 ms, total: 445 ms\n", + "Wall time: 161 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4a31350cf3aa4729b8fd52cd92dda632", + "model_id": "5daa5677bb0b4668a2bec808e8818621", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1387,19 +1536,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 304 ms, sys: 152 ms, total: 455 ms\n", - "Wall time: 215 ms\n" + "CPU times: user 245 ms, sys: 152 ms, total: 396 ms\n", + "Wall time: 149 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c832249094414fc48196a28f9304035f", + "model_id": "e89cffe27d954fb18cdb86a76dc9eefa", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1502,19 +1663,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 813 ms, sys: 98.8 ms, total: 912 ms\n", - "Wall time: 879 ms\n" + "CPU times: user 848 ms, sys: 119 ms, total: 967 ms\n", + "Wall time: 933 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "523f6bdea17647d38159d54e1e0bca69", + "model_id": "a2a3606cec214cf481faab9c094f25c4", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1634,19 +1807,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 115 ms, sys: 70 ms, total: 185 ms\n", - "Wall time: 72.5 ms\n" + "CPU times: user 86.6 ms, sys: 21.9 ms, total: 108 ms\n", + "Wall time: 90 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d63c937dbfc648c9aaa0282b81892596", + "model_id": "d47761e9fb2b487b834b2d326cb62fd3", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1731,6 +1916,330 @@ { "cell_type": "code", "execution_count": 17, + "id": "c68c644b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 116 ms, sys: 61.3 ms, total: 177 ms\n", + "Wall time: 74.9 ms\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5516c496b742465fb5cdb752cbc94592", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Static snapshot (for non-interactive view)\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metadata_ImageNumberNuclei_Number_Object_NumberNuclei_Texture_Variance_DAPI_3_03_256Image_FileName_A647Image_FileName_DAPIImage_FileName_GOLD
0112.484139slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
11212.026326slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
21351.418746slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
31447.049561slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
415117.135912slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
51625.371580slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
61723.930735slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
7182.973642slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
8198.355843slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
9110150.652194slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
101117.919292slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
111120.432249slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1211318.161879slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1311432.575908slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1411529.200237slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
151169.793458slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
161178.513971slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1711831.487882slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
181194.329104slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1912032.853237slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
201217.200573slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
211223.978256slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2212332.280016slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2312426.525734slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2412551.948095slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# view nuclear speckles data with images and overlaid outlines from masks\n", + "# and also apply a filter to only show rows where the value for\n", + "# \"Nuclei_Texture_Variance_DAPI_3_03_256\".\n", + "CytoDataFrame(\n", + " data=f\"{nuclear_speckles_path}/test_slide1_converted.parquet\",\n", + " data_context_dir=f\"{nuclear_speckles_path}/images/plate1\",\n", + " data_mask_context_dir=f\"{nuclear_speckles_path}/masks/plate1\",\n", + " display_options={\n", + " \"filter_columns\": [\"Nuclei_Texture_Variance_DAPI_3_03_256\"],\n", + " },\n", + ")[\n", + " [\n", + " \"Metadata_ImageNumber\",\n", + " \"Nuclei_Number_Object_Number\",\n", + " \"Nuclei_Texture_Variance_DAPI_3_03_256\",\n", + " \"Image_FileName_A647\",\n", + " \"Image_FileName_DAPI\",\n", + " \"Image_FileName_GOLD\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "id": "4c9af999-c9a2-4408-aa16-9437d08013ae", "metadata": {}, "outputs": [ @@ -1738,19 +2247,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 316 ms, sys: 205 ms, total: 521 ms\n", - "Wall time: 184 ms\n" + "CPU times: user 395 ms, sys: 224 ms, total: 619 ms\n", + "Wall time: 249 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ae8351ea01f64659a6c1e2454f08e401", + "model_id": "ae2fd523eaf04609866badd5e3de2d12", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1819,7 +2340,7 @@ "data": { "text/plain": [] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1848,7 +2369,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "e8ebb16d-ee5f-4a34-b599-aef245b57705", "metadata": {}, "outputs": [ @@ -1856,15 +2377,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", - "Wall time: 2.15 μs\n" + "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n", + "Wall time: 3.1 μs\n" ] }, { "data": { "text/plain": [] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1878,7 +2399,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "0892633a-fdd2-448a-a96a-54dad4b5caf8", "metadata": {}, "outputs": [ @@ -1886,19 +2407,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 861 ms, sys: 213 ms, total: 1.07 s\n", - "Wall time: 950 ms\n" + "CPU times: user 820 ms, sys: 192 ms, total: 1.01 s\n", + "Wall time: 890 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5f360d46c54649a1a6a8371696720c70", + "model_id": "e19fb7d40b74464dabceb6ef487c7fb2", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -2003,7 +2536,7 @@ "data": { "text/plain": [] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2020,7 +2553,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "881e0542", "metadata": { "lines_to_next_cell": 0 @@ -2030,14 +2563,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.69 ms, sys: 1.72 ms, total: 8.41 ms\n", - "Wall time: 14.1 ms\n" + "CPU times: user 5.68 ms, sys: 1.28 ms, total: 6.96 ms\n", + "Wall time: 8.21 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5668ddbf35d54867b361b9c207878f15", + "model_id": "183410200d274f21845a1cf5f2782a09", "version_major": 2, "version_minor": 0 }, @@ -2093,7 +2626,7 @@ "data": { "text/plain": [] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2117,22 +2650,24 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "18ed220b", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 0 + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.92 ms, sys: 639 μs, total: 5.56 ms\n", - "Wall time: 5.38 ms\n" + "CPU times: user 5.63 ms, sys: 1.85 ms, total: 7.48 ms\n", + "Wall time: 6.12 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c3ad03a61372443582dacc58d7ab80f1", + "model_id": "f33dadf3b55e42adb775706372ca2412", "version_major": 2, "version_minor": 0 }, @@ -2188,14 +2723,13 @@ "data": { "text/plain": [] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", - "\n", "# read 3d images with segmentation masks and show the\n", "# segmentation masks are also 3D.\n", "cdf = CytoDataFrame(\n", diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py index 9050052..16d53cb 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.py +++ b/docs/src/examples/cytodataframe_at_a_glance.py @@ -45,6 +45,7 @@ [ "Metadata_ImageNumber", "Cells_Number_Object_Number", + "Nuclei_Texture_Variance_RNA_5_03_256", "Image_FileName_OrigAGP", "Image_FileName_OrigDNA", "Image_FileName_OrigRNA", @@ -305,6 +306,28 @@ ] ][:3] +# %%time +# view nuclear speckles data with images and overlaid outlines from masks +# and also apply a filter to only show rows where the value for +# "Nuclei_Texture_Variance_DAPI_3_03_256". +CytoDataFrame( + data=f"{nuclear_speckles_path}/test_slide1_converted.parquet", + data_context_dir=f"{nuclear_speckles_path}/images/plate1", + data_mask_context_dir=f"{nuclear_speckles_path}/masks/plate1", + display_options={ + "filter_columns": ["Nuclei_Texture_Variance_DAPI_3_03_256"], + }, +)[ + [ + "Metadata_ImageNumber", + "Nuclei_Number_Object_Number", + "Nuclei_Texture_Variance_DAPI_3_03_256", + "Image_FileName_A647", + "Image_FileName_DAPI", + "Image_FileName_GOLD", + ] +] + # %%time # view ALSF pediatric cancer atlas plate BR00143976 with images cdf = CytoDataFrame( @@ -356,7 +379,6 @@ cdf[["ImageNumber", "ObjectNumber", "FileName_Nuclei"]][:3] # + # %%time - # read 3d images with segmentation masks and show the # segmentation masks are also 3D. cdf = CytoDataFrame( diff --git a/pyproject.toml b/pyproject.toml index 5c165fa..54d132f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ lint.select = [ ] # Ignore `E402` and `F401` (unused imports) in all `__init__.py` files lint.per-file-ignores."__init__.py" = [ "E402", "F401" ] +lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ] lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ] # ignore typing rules for tests diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index dc4c1ed..937d0f1 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -12,7 +12,7 @@ import tempfile import uuid import warnings -from collections import OrderedDict +from collections import Counter, OrderedDict from io import BytesIO, StringIO from typing import ( Any, @@ -63,6 +63,25 @@ MIN_RGB_SPATIAL_DIM = 8 MAX_RGB_ASPECT_RATIO = 4.0 MIN_POSITION_COMPONENTS = 2 +FILTER_SLIDER_TOTAL_WIDTH_PX = 430 +FILTER_SLIDER_LABEL_WIDTH_PX = 170 +FILTER_SLIDER_READOUT_WIDTH_PX = 96 +MAX_FILTER_SLIDER_STOPS = 500 +# Fine-grained track-bound alignment for the background distribution plot. +# Positive values shift inward; negative values shift outward. +FILTER_SLIDER_TRACK_LEFT_ADJUST_PX = 13 +FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 13 +FILTER_PLOT_KDE_MIN_SAMPLES = 60 +FILTER_PLOT_KDE_MAX_SAMPLES = 180 +FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.1 +FILTER_PLOT_KDE_BANDWIDTH_SCALE = 0.3 +FILTER_PLOT_Y_SCALE_DEFAULT = "asinh" +FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT = 10.0 +FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT = 80.0 +FILTER_PLOT_Y_MAX_PERCENTILE_UPPER = 100.0 +FILTER_PLOT_Y_GAMMA_DEFAULT = 0.6 +FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT = 0.35 +FILTER_SLIDER_CSS_CLASS = "cdf-filter-range-slider" # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -230,8 +249,14 @@ def __init__( # noqa: PLR0913 # add widget control meta "_widget_state": { "scale": initial_brightness, + "filter_column": None, + "filter_range": None, + "filter_columns": [], + "filter_ranges": {}, "shown": False, # whether VBox has been displayed "observing": False, # whether slider observer is attached + "filter_observing": {}, # per-column observer attachment flags + "filter_readout_css_injected": False, }, "_snapshot_cache": {}, "_volume_cache": {}, @@ -251,6 +276,7 @@ def __init__( # noqa: PLR0913 overflow="visible", ) ), + "_filter_range_sliders": {}, } if self._custom_attrs["data_context_dir"] is not None: @@ -361,6 +387,9 @@ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] + cdf._custom_attrs["_filter_range_sliders"] = self._custom_attrs[ + "_filter_range_sliders" + ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] return cdf @@ -417,6 +446,9 @@ def _return_cytodataframe( # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] + cdf._custom_attrs["_filter_range_sliders"] = self._custom_attrs[ + "_filter_range_sliders" + ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] return cdf @@ -516,11 +548,765 @@ def _on_slider_change(self: CytoDataFrame_type, change: Dict[str, Any]) -> None: """ self._custom_attrs["_widget_state"]["scale"] = change["new"] - self._custom_attrs["_output"].clear_output(wait=True) + self._show_output_loading_indicator() # redraw output after adjustments to scale state self._render_output() + def _on_filter_slider_change( + self: CytoDataFrame_type, change: Dict[str, Any] + ) -> None: + """Update widget filter state when the selection range changes.""" + slider_owner = change.get("owner") + filter_col = ( + getattr(slider_owner, "_cyto_filter_column", None) + if slider_owner is not None + else None + ) + selection = change.get("new") + if ( + not isinstance(selection, tuple) + or len(selection) != MIN_POSITION_COMPONENTS + ): + return + try: + lower = float(selection[0]) + upper = float(selection[1]) + except (TypeError, ValueError): + return + + normalized_range = ( + min(lower, upper), + max(lower, upper), + ) + state = self._custom_attrs["_widget_state"] + if filter_col is not None: + state.setdefault("filter_ranges", {})[str(filter_col)] = normalized_range + # preserve legacy single-filter fields for backward compatibility + if state.get("filter_column") is None: + state["filter_column"] = filter_col + if str(state.get("filter_column")) == str(filter_col): + state["filter_range"] = normalized_range + else: + state["filter_range"] = normalized_range + if state.get("filter_column") is not None: + state.setdefault("filter_ranges", {})[str(state["filter_column"])] = ( + normalized_range + ) + self._show_output_loading_indicator() + self._render_output() + + def _show_output_loading_indicator( + self: CytoDataFrame_type, + message: str = "Updating table...", + ) -> None: + """Render a lightweight loading indicator in the output area.""" + self._custom_attrs["_output"].clear_output(wait=True) + with self._custom_attrs["_output"]: + display( + HTML( + "" + "
" + "" + f"{message}" + "
" + ) + ) + + def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: + """Return configured filter columns, preserving user-specified order.""" + display_options = self._custom_attrs.get("display_options", {}) or {} + configured_many = display_options.get("filter_columns") + configured_single = display_options.get("filter_column") + if isinstance(configured_many, (list, tuple)) and len(configured_many) == 0: + configured_many = None + configured: List[Any] = [] + if isinstance(configured_many, (list, tuple)) and len(configured_many) > 0: + configured.extend(configured_many) + elif configured_many is not None: + configured.append(configured_many) + elif configured_single is not None: + configured.append(configured_single) + + if not configured: + return [] + + selected_columns: List[Any] = [] + seen: set[str] = set() + for requested in configured: + requested_str = str(requested) + matched = next( + (col for col in self.columns if str(col) == requested_str), + None, + ) + if matched is None: + continue + key = str(matched) + if key in seen: + continue + seen.add(key) + selected_columns.append(matched) + return selected_columns + + def _ensure_filter_range_slider( # noqa: C901, PLR0915 + self: CytoDataFrame_type, filter_col: Optional[Any] = None + ) -> Optional[Any]: + """Build or refresh one range slider for row filtering.""" + if filter_col is None: + columns = self._get_filter_slider_columns() + filter_col = columns[0] if columns else None + state = self._custom_attrs["_widget_state"] + if filter_col is None: + self._custom_attrs["_filter_range_sliders"] = {} + state["filter_columns"] = [] + state["filter_column"] = None + state["filter_range"] = None + state["filter_ranges"] = {} + state["filter_observing"] = {} + return None + + slider_key = str(filter_col) + state["filter_column"] = state.get("filter_column") or filter_col + state.setdefault("filter_ranges", {}) + state.setdefault("filter_observing", {}) + + numeric_values = pd.to_numeric(self[filter_col], errors="coerce").dropna() + if numeric_values.empty: + self._custom_attrs.setdefault("_filter_range_sliders", {}).pop( + slider_key, None + ) + state["filter_ranges"].pop(slider_key, None) + state["filter_observing"].pop(slider_key, None) + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = None + return None + + unique_values = sorted(float(value) for value in pd.unique(numeric_values)) + if not unique_values: + self._custom_attrs.setdefault("_filter_range_sliders", {}).pop( + slider_key, None + ) + state["filter_ranges"].pop(slider_key, None) + state["filter_observing"].pop(slider_key, None) + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = None + return None + + slider_values = unique_values + if len(unique_values) > MAX_FILTER_SLIDER_STOPS: + slider_values = np.linspace( + unique_values[0], + unique_values[-1], + num=MAX_FILTER_SLIDER_STOPS, + dtype=np.float64, + ).tolist() + options = [ + (self._format_filter_slider_label(value), value) for value in slider_values + ] + default_lower = slider_values[0] + default_upper = slider_values[-1] + selected_range = state["filter_ranges"].get(slider_key) + if ( + not isinstance(selected_range, tuple) + or len(selected_range) != MIN_POSITION_COMPONENTS + ): + selected_range = (default_lower, default_upper) + lower = min(float(selected_range[0]), float(selected_range[1])) + upper = max(float(selected_range[0]), float(selected_range[1])) + lower = max(default_lower, min(lower, default_upper)) + upper = max(lower, min(upper, default_upper)) + slider_domain = np.asarray(slider_values, dtype=np.float64) + + def _nearest_slider_index(target: float) -> int: + idx = int(np.searchsorted(slider_domain, target, side="left")) + if idx <= 0: + return 0 + if idx >= slider_domain.size: + return int(slider_domain.size - 1) + left_idx = idx - 1 + right_idx = idx + if abs(float(slider_domain[left_idx]) - target) <= abs( + float(slider_domain[right_idx]) - target + ): + return int(left_idx) + return int(right_idx) + + lower_idx = _nearest_slider_index(lower) + upper_idx = _nearest_slider_index(upper) + upper_idx = max(upper_idx, lower_idx) + lower = float(slider_domain[lower_idx]) + upper = float(slider_domain[upper_idx]) + normalized_range = (lower, upper) + state["filter_ranges"][slider_key] = normalized_range + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = normalized_range + + cached_sliders = self._custom_attrs.setdefault("_filter_range_sliders", {}) + existing_slider = cached_sliders.get(slider_key) + if isinstance(existing_slider, widgets.SelectionRangeSlider): + existing_slider.options = options + existing_slider.value = (lower, upper) + existing_slider.description = f"{filter_col}:" + existing_slider.continuous_update = False + existing_slider.style = { + "description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px" + } + existing_slider.layout = widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px" + ) + existing_slider.add_class(FILTER_SLIDER_CSS_CLASS) + existing_slider._cyto_filter_column = filter_col # type: ignore[attr-defined] + return existing_slider + + slider = widgets.SelectionRangeSlider( + options=options, + value=(lower, upper), + description=f"{filter_col}:", + continuous_update=False, + style={"description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px"}, + layout=widgets.Layout(width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px"), + ) + slider.add_class(FILTER_SLIDER_CSS_CLASS) + slider._cyto_filter_column = filter_col # type: ignore[attr-defined] + cached_sliders[slider_key] = slider + return slider + + @staticmethod + def _format_filter_slider_label(value: float) -> str: + """Format displayed slider labels with two decimals for float values.""" + value = float(value) + if value.is_integer(): + return f"{int(value)}" + return f"{value:.2f}" + + @staticmethod + def _slider_relative_position(value: float, slider_domain: np.ndarray) -> float: + """Map a numeric value to SelectionRangeSlider's normalized track position.""" + domain_size = int(slider_domain.size) + if domain_size == 0: + return 0.0 + if domain_size == 1: + return 0.5 + + position: float + if value <= slider_domain[0]: + position = 0.0 + elif value >= slider_domain[-1]: + position = 1.0 + else: + right_idx = int(np.searchsorted(slider_domain, value, side="right")) + left_idx = max(0, right_idx - 1) + if right_idx >= domain_size: + position = 1.0 + else: + left_val = float(slider_domain[left_idx]) + right_val = float(slider_domain[right_idx]) + if right_val == left_val: + position = float(left_idx) / float(domain_size - 1) + else: + frac = (value - left_val) / (right_val - left_val) + position = (float(left_idx) + float(frac)) / float(domain_size - 1) + return position + + @staticmethod + def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 + values: pd.Series, + selected_range: Tuple[float, float], + threshold_x: Optional[float] = None, + slider_values: Optional[Sequence[float]] = None, + y_scale: str = FILTER_PLOT_Y_SCALE_DEFAULT, + y_min_percentile: float = FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT, + y_max_percentile: float = FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT, + y_gamma: float = FILTER_PLOT_Y_GAMMA_DEFAULT, + y_tail_log_scale: float = FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT, + size_px: Tuple[int, int] = (FILTER_SLIDER_TOTAL_WIDTH_PX, 96), + track_padding_px: Tuple[int, int] = ( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) -> str: + """Build an inline SVG area/line plot for filter-value counts.""" + width, height = size_px + numeric_values = pd.to_numeric(values, errors="coerce").dropna() + if numeric_values.empty: + return "" + + values_array = numeric_values.to_numpy(dtype=np.float64, copy=False) + if slider_values is None: + slider_domain = np.sort(np.unique(values_array)) + if slider_domain.size > MAX_FILTER_SLIDER_STOPS: + sample_idx = np.linspace( + 0, + int(slider_domain.size) - 1, + num=MAX_FILTER_SLIDER_STOPS, + dtype=int, + ) + slider_domain = slider_domain[sample_idx] + slider_domain = np.unique(slider_domain) + else: + slider_domain = np.asarray(slider_values, dtype=np.float64) + slider_domain = np.sort(slider_domain[np.isfinite(slider_domain)]) + slider_domain = np.unique(slider_domain) + if slider_domain.size == 0: + slider_domain = np.sort(np.unique(values_array)) + x_min = float(slider_domain[0]) + x_max = float(slider_domain[-1]) + if x_max == x_min: + # Keep constant-value distributions centered in the track rather than + # collapsing to the left edge. + pad = max(abs(x_min) * 0.05, 1e-6) + x_min = x_min - pad + x_max = x_max + pad + + # Build a smooth, KDE-like density in slider-option space (bounded by the + # slider domain) so runtime remains stable for near-unique columns. + option_count = int(slider_domain.size) + if option_count == 1: + option_counts = np.array([int(values_array.size)], dtype=np.int64) + else: + domain_midpoints = (slider_domain[:-1] + slider_domain[1:]) / 2.0 + binned_indices = np.searchsorted( + domain_midpoints, + values_array, + side="right", + ) + option_counts = np.bincount(binned_indices, minlength=option_count) + option_positions = np.arange(option_count, dtype=np.float64) + if option_count <= 1: + kde_x = np.array([0.5], dtype=np.float64) + kde_y = np.array([float(option_counts.sum())], dtype=np.float64) + else: + kde_sample_count = int( + min( + FILTER_PLOT_KDE_MAX_SAMPLES, + max(FILTER_PLOT_KDE_MIN_SAMPLES, option_count * 2), + ) + ) + kde_x = np.linspace(0.0, float(option_count - 1), num=kde_sample_count) + weights = option_counts.astype(np.float64, copy=False) + weight_sum = float(weights.sum()) + weighted_mean = float( + np.sum(option_positions * weights) / max(weight_sum, 1) + ) + weighted_var = float( + np.sum(weights * (option_positions - weighted_mean) ** 2) + / max(weight_sum, 1) + ) + weighted_std = float(max(0.0, np.sqrt(weighted_var))) + n_eff = float((weight_sum**2) / max(float(np.sum(weights**2)), 1.0)) + silverman_bw = 1.06 * weighted_std * (max(n_eff, 1.0) ** (-0.2)) + bandwidth = max( + FILTER_PLOT_KDE_MIN_BANDWIDTH, + ( + silverman_bw * FILTER_PLOT_KDE_BANDWIDTH_SCALE + if np.isfinite(silverman_bw) and silverman_bw > 0 + else 0.0 + ), + ) + # Numerically stable KDE-like smoothing in option-index space: + # smooth discrete option counts with a Gaussian kernel, then sample + # onto the denser x-grid via interpolation. + radius = int(max(1, np.ceil(3.0 * bandwidth))) + kernel_x = np.arange(-radius, radius + 1, dtype=np.float64) + kernel = np.exp(-0.5 * ((kernel_x / bandwidth) ** 2)) + kernel_sum = float(np.sum(kernel)) + if kernel_sum > 0: + kernel = kernel / kernel_sum + smoothed_full = np.convolve(weights, kernel, mode="full") + start = int((kernel.size - 1) // 2) + smoothed_weights = smoothed_full[start : start + option_count] + kde_y = np.interp(kde_x, option_positions, smoothed_weights) + kde_y = np.nan_to_num(kde_y, nan=0.0, posinf=0.0, neginf=0.0) + y_scale_normalized = str(y_scale).strip().lower() + if y_scale_normalized == "asinh": + plot_y = np.arcsinh(np.maximum(kde_y, 0.0)) + elif y_scale_normalized == "log": + plot_y = np.log1p(np.maximum(kde_y, 0.0)) + elif y_scale_normalized == "sqrt": + plot_y = np.sqrt(np.maximum(kde_y, 0.0)) + else: + plot_y = np.maximum(kde_y, 0.0) + + min_pct = float(y_min_percentile) + max_pct = float(y_max_percentile) + if ( + 0.0 <= min_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER + and 0.0 < max_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER + and min_pct < max_pct + ): + y_floor = float(np.percentile(plot_y, min_pct)) + y_cap = float(np.percentile(plot_y, max_pct)) + if y_cap > y_floor: + shifted = np.maximum(plot_y - y_floor, 0.0) + cap_shifted = y_cap - y_floor + above_cap = shifted > cap_shifted + if np.any(above_cap): + tail_scale = max( + cap_shifted * float(y_tail_log_scale), + 1e-9, + ) + shifted[above_cap] = cap_shifted + ( + np.log1p((shifted[above_cap] - cap_shifted) / tail_scale) + * tail_scale + ) + plot_y = shifted + else: + plot_y = np.maximum(plot_y - y_floor, 0.0) + + gamma = float(y_gamma) + if gamma > 0 and gamma != 1.0: + plot_y = np.power(np.maximum(plot_y, 0.0), gamma) + + y_max = float(np.max(plot_y, initial=1.0)) + y_max = float(max(1e-9, y_max)) + + lower, upper = selected_range + lower = max(x_min, min(float(lower), x_max)) + upper = max(lower, min(float(upper), x_max)) + + track_left_px, track_right_px = track_padding_px + plot_left = float(max(8, track_left_px + FILTER_SLIDER_TRACK_LEFT_ADJUST_PX)) + plot_right = float( + max( + plot_left + 1, + width - track_right_px - FILTER_SLIDER_TRACK_RIGHT_INSET_PX, + ) + ) + plot_top = 6.0 + # Keep slider widget position fixed; shift only plotted data upward by + # using extra bottom padding inside the background SVG. + plot_bottom = 22.0 + plot_w = max(1.0, plot_right - plot_left) + plot_h = max(1.0, plot_bottom - plot_top) + + def _sx(value: float) -> float: + return plot_left + ( + CytoDataFrame._slider_relative_position( + value=value, slider_domain=slider_domain + ) + * plot_w + ) + + def _sy(value: float) -> float: + return plot_bottom - (value / y_max * plot_h) + + def _sx_from_option_index(index: float) -> float: + if option_count <= 1: + return plot_left + (0.5 * plot_w) + value = float( + np.interp( + float(index), + option_positions, + slider_domain, + ) + ) + return _sx(value) + + highlight_x = _sx(lower) + highlight_w = max(1.0, _sx(upper) - highlight_x) + line_points = " ".join( + f"{_sx_from_option_index(float(option_index)):.2f},{_sy(float(count)):.2f}" + for option_index, count in zip(kde_x, plot_y, strict=False) + ) + area_points = ( + f"{_sx_from_option_index(float(kde_x[0])):.2f}," + f"{plot_bottom:.2f} " + f"{line_points} " + f"{_sx_from_option_index(float(kde_x[-1])):.2f}," + f"{plot_bottom:.2f}" + ) + threshold_line_html = "" + if threshold_x is not None: + try: + threshold_val = float(threshold_x) + except (TypeError, ValueError): + threshold_val = None + if threshold_val is not None and x_min <= threshold_val <= x_max: + threshold_px = _sx(threshold_val) + threshold_line_html = ( + "" + ) + + return ( + f"
" + f"" + "" + f"" + "" + "" + f"{threshold_line_html}" + "
" + ) + + def _get_raw_filter_plot_threshold( + self: CytoDataFrame_type, + filter_col: Any, + ) -> Tuple[bool, Optional[Any]]: + """Return whether threshold was configured and its raw value.""" + display_options = self._custom_attrs.get("display_options", {}) or {} + threshold_explicitly_configured = False + raw_threshold: Optional[Any] = None + threshold_map = display_options.get("filter_plot_thresholds") + if isinstance(threshold_map, dict): + filter_col_str = str(filter_col) + normalized_filter_col = filter_col_str.strip().casefold() + for threshold_key, threshold_value in threshold_map.items(): + threshold_key_str = str(threshold_key) + if threshold_key_str == filter_col_str: + raw_threshold = threshold_value + threshold_explicitly_configured = True + break + if threshold_key_str.strip().casefold() == normalized_filter_col: + raw_threshold = threshold_value + threshold_explicitly_configured = True + break + elif threshold_map is not None: + logger.warning( + ( + "Ignoring display option 'filter_plot_thresholds' because " + "it is not a mapping." + ) + ) + + if not threshold_explicitly_configured: + single_threshold = display_options.get("filter_plot_threshold") + if single_threshold is not None: + configured_columns = self._get_filter_slider_columns() + if len(configured_columns) <= 1: + raw_threshold = single_threshold + threshold_explicitly_configured = True + return threshold_explicitly_configured, raw_threshold + + def _resolve_filter_plot_threshold( + self: CytoDataFrame_type, + filter_col: Any, + values: pd.Series, + ) -> Optional[float]: + """Resolve an optional threshold marker for a filter-column distribution.""" + threshold_explicitly_configured, raw_threshold = ( + self._get_raw_filter_plot_threshold(filter_col=filter_col) + ) + if not threshold_explicitly_configured: + return None + + try: + threshold = float(raw_threshold) + except (TypeError, ValueError): + logger.warning( + ( + "Ignoring filter plot threshold for column '%s': " + "value %r is not numeric." + ), + filter_col, + raw_threshold, + ) + return None + + numeric_values = pd.to_numeric(values, errors="coerce").dropna() + if numeric_values.empty: + return None + data_min = float(numeric_values.min()) + data_max = float(numeric_values.max()) + if threshold < data_min: + logger.warning( + ( + "Clamping filter plot threshold for column '%s' from %s to %s " + "because it is outside data range [%s, %s]." + ), + filter_col, + threshold, + data_min, + data_min, + data_max, + ) + return data_min + if threshold > data_max: + logger.warning( + ( + "Clamping filter plot threshold for column '%s' from %s to %s " + "because it is outside data range [%s, %s]." + ), + filter_col, + threshold, + data_max, + data_min, + data_max, + ) + return data_max + return threshold + + def _build_filter_slider_control_for_column( + self: CytoDataFrame_type, filter_col: Any + ) -> Tuple[Optional[Any], Optional[Any]]: + """Return one filter slider and its display control widget.""" + slider = self._ensure_filter_range_slider(filter_col=filter_col) + if slider is None: + return None, None + selected_range = ( + self._custom_attrs["_widget_state"] + .get("filter_ranges", {}) + .get(str(filter_col)) + ) + if ( + not isinstance(selected_range, tuple) + or len(selected_range) != MIN_POSITION_COMPONENTS + or filter_col not in self.columns + ): + return slider, slider + threshold = self._resolve_filter_plot_threshold( + filter_col=filter_col, values=self[filter_col] + ) + + distribution_html = self._build_filter_distribution_html( + values=self[filter_col], + selected_range=(float(selected_range[0]), float(selected_range[1])), + threshold_x=threshold, + slider_values=[float(option[1]) for option in slider.options], + y_scale=str( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_scale", FILTER_PLOT_Y_SCALE_DEFAULT + ) + ), + y_min_percentile=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_min_percentile", + FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT, + ) + ), + y_max_percentile=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_percentile", + FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT, + ) + ), + y_gamma=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_gamma", + FILTER_PLOT_Y_GAMMA_DEFAULT, + ) + ), + y_tail_log_scale=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_tail_log_scale", + FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT, + ) + ), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + if not distribution_html: + return slider, slider + + plot_widget = widgets.HTML( + value=distribution_html, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + height="52px", + ), + ) + slider.layout = widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + margin="-44px 0 0 0", + ) + return slider, widgets.VBox( + [plot_widget, slider], + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + height="52px", + align_items="center", + overflow="hidden", + ), + ) + + def _build_filter_slider_controls( + self: CytoDataFrame_type, + ) -> Tuple[List[Any], List[Any]]: + """Return slider widgets and filter controls for all configured columns.""" + columns = self._get_filter_slider_columns() + state = self._custom_attrs["_widget_state"] + state["filter_columns"] = columns + if columns and state.get("filter_column") is None: + state["filter_column"] = columns[0] + if not columns: + state["filter_ranges"] = {} + state["filter_observing"] = {} + self._custom_attrs["_filter_range_sliders"] = {} + return [], [] + + sliders: List[Any] = [] + controls: List[Any] = [] + for filter_col in columns: + slider, control = self._build_filter_slider_control_for_column(filter_col) + if slider is None: + continue + sliders.append(slider) + controls.append(control if control is not None else slider) + return sliders, controls + + def _filter_display_indices_by_widget_range( + self: CytoDataFrame_type, + data: pd.DataFrame, + display_indices: List[Any], + ) -> List[Any]: + """Filter row labels by all configured slider ranges.""" + state = self._custom_attrs["_widget_state"] + filter_columns = state.get("filter_columns") or [] + filter_ranges = state.get("filter_ranges") or {} + if not filter_columns and state.get("filter_column") is not None: + filter_columns = [state.get("filter_column")] + if isinstance(state.get("filter_range"), tuple): + filter_ranges = { + str(state.get("filter_column")): state.get("filter_range") + } + if not filter_columns: + return display_indices + + active_indices = display_indices + for filter_col in filter_columns: + if filter_col not in data.columns: + continue + filter_range = filter_ranges.get(str(filter_col)) + if ( + not isinstance(filter_range, tuple) + or len(filter_range) != MIN_POSITION_COMPONENTS + ): + continue + try: + lower = float(filter_range[0]) + upper = float(filter_range[1]) + except (TypeError, ValueError): + continue + numeric_values = pd.to_numeric(data[filter_col], errors="coerce") + in_range = numeric_values[ + (numeric_values >= lower) & (numeric_values <= upper) + ] + allowed_counts = Counter(in_range.index.tolist()) + filtered_indices: List[Any] = [] + for row_label in active_indices: + if allowed_counts[row_label] > 0: + filtered_indices.append(row_label) + allowed_counts[row_label] -= 1 + active_indices = filtered_indices + + return active_indices + def get_bounding_box_from_data( self: CytoDataFrame_type, ) -> Optional[CytoDataFrame_type]: @@ -3267,6 +4053,10 @@ def show_widget_table( # noqa: C901, PLR0912, PLR0915 Use ``table_height`` (or ``table_max_height``) to override the default notebook table height. + + Row rendering follows pandas display limits. If the DataFrame is larger + than ``display.max_rows``, the widget table inserts a midpoint ellipsis + marker row (``…``) to indicate omitted rows. """ if backend is None: @@ -3661,6 +4451,42 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 # gather indices which will be displayed based on pandas configuration display_indices = CytoDataFrame(data).get_displayed_rows() + active_filter_columns = ( + self._custom_attrs["_widget_state"].get("filter_columns") or [] + ) + active_filter_ranges = self._custom_attrs["_widget_state"].get( + "filter_ranges", {} + ) + if ( + not active_filter_columns + and self._custom_attrs["_widget_state"].get("filter_column") is not None + ): + active_filter_columns = [ + self._custom_attrs["_widget_state"].get("filter_column") + ] + if isinstance( + self._custom_attrs["_widget_state"].get("filter_range"), tuple + ): + active_filter_ranges = { + str(active_filter_columns[0]): self._custom_attrs[ + "_widget_state" + ].get("filter_range") + } + if active_filter_columns and any( + isinstance(active_filter_ranges.get(str(col)), tuple) + for col in active_filter_columns + ): + full_filtered_indices = self._filter_display_indices_by_widget_range( + data=data, + display_indices=data.index.tolist(), + ) + data = data.loc[full_filtered_indices] + display_indices = CytoDataFrame(data).get_displayed_rows() + else: + display_indices = self._filter_display_indices_by_widget_range( + data=data, + display_indices=display_indices, + ) # gather bounding box columns for use below if self._custom_attrs["data_bounding_box"] is not None: @@ -3828,8 +4654,10 @@ def _render_output(self: CytoDataFrame_type) -> None: f"max-height:{table_height};'>" f"{html_content}" ) - - with self._custom_attrs["_output"]: + output_widget = self._custom_attrs["_output"] + if hasattr(output_widget, "clear_output"): + output_widget.clear_output(wait=True) + with output_widget: display(HTML(scroll_wrapped_html)) if "cyto-3d-image" in html_content and "data-volume" in html_content: display( @@ -4106,6 +4934,154 @@ def _render_cell( logger.debug("Failed to build trame snapshot HTML: %s", exc) return html_content + def _try_render_trame_widget_table( # noqa: PLR0911 + self: CytoDataFrame_type, debug: bool, display_options: dict[str, Any] + ) -> bool: + """Try rendering the trame widget table and return ``True`` on success.""" + if debug: + return False + configured_filter_columns = display_options.get("filter_columns") + configured_filter_column = display_options.get("filter_column") + if isinstance(configured_filter_columns, (list, tuple)): + if len(configured_filter_columns) > 0: + return False + elif configured_filter_columns: + return False + if configured_filter_column: + return False + force_trame = display_options.get("view") == "trame" + auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) + columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] + if not (force_trame or columns_3d): + return False + if force_trame and not columns_3d: + columns_3d = list( + dict.fromkeys( + [ + *(self.find_image_columns() or []), + *self.find_ome_arrow_columns(self), + ] + ) + ) + if not columns_3d: + return False + try: + widget_table = self.show_widget_table( + column=columns_3d[0], + columns_3d=columns_3d, + backend=None, + ) + display(widget_table) + html_content = self._generate_trame_snapshot_html() + details_html = ( + '
' + "Static snapshot (for non-interactive view)" + f"{html_content}
" + ) + display(HTML(details_html)) + return True + except Exception as exc: + logger.debug( + "Trame widget table render failed, falling back to HTML: %s", + exc, + ) + return False + + def _render_notebook_widget_output( + self: CytoDataFrame_type, display_options: dict[str, Any] + ) -> None: + """Render ipywidgets controls and the notebook HTML table output.""" + if not self._custom_attrs["_widget_state"].get( + "filter_readout_css_injected", False + ): + display( + HTML( + "" + ) + ) + self._custom_attrs["_widget_state"]["filter_readout_css_injected"] = True + + filter_sliders, filter_controls = self._build_filter_slider_controls() + filter_control: Optional[Any] = None + if len(filter_controls) == 1: + filter_control = widgets.VBox( + filter_controls, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + align_items="stretch", + ), + ) + elif len(filter_controls) >= MIN_POSITION_COMPONENTS: + accordion_content = widgets.VBox( + filter_controls, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + align_items="stretch", + ), + ) + accordion = widgets.Accordion(children=[accordion_content]) + with contextlib.suppress(Exception): + accordion.set_title(0, "Filters") + accordion.selected_index = None + filter_control = accordion + controls: List[Any] = [self._custom_attrs["_scale_slider"]] + self._custom_attrs["_scale_slider"].layout = widgets.Layout(margin="10px 0 0 0") + if filter_control is not None: + controls.append(filter_control) + controls_row = widgets.HBox(controls) + + if not self._custom_attrs["_widget_state"]["shown"]: + display( + widgets.VBox( + [ + controls_row, + self._custom_attrs["_output"], + ] + ) + ) + self._show_output_loading_indicator(message="Loading table...") + if bool(display_options.get("show_static_snapshot_details", True)): + snapshot_html = self._generate_jupyter_dataframe_html() + details_html = ( + '
' + "Static snapshot (for non-interactive view)" + f"{snapshot_html}
" + ) + display(HTML(details_html)) + self._custom_attrs["_widget_state"]["shown"] = True + + if not self._custom_attrs["_widget_state"]["observing"]: + self._custom_attrs["_scale_slider"].observe( + self._on_slider_change, names="value" + ) + self._custom_attrs["_widget_state"]["observing"] = True + filter_observing = self._custom_attrs["_widget_state"].setdefault( + "filter_observing", {} + ) + for filter_slider in filter_sliders: + filter_col = getattr(filter_slider, "_cyto_filter_column", None) + key = str(filter_col) if filter_col is not None else "" + if key and not filter_observing.get(key): + filter_slider.observe(self._on_filter_slider_change, names="value") + filter_observing[key] = True + + self._render_output() + def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str: """ Returns HTML representation of the underlying pandas DataFrame @@ -4120,82 +5096,17 @@ def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str: Returns: str: The data in a pandas DataFrame. """ - display_options = self._custom_attrs.get("display_options", {}) or {} - force_trame = display_options.get("view") == "trame" - auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) - columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] - if (force_trame or columns_3d) and not debug: - if force_trame and not columns_3d: - columns_3d = list( - dict.fromkeys( - [ - *(self.find_image_columns() or []), - *self.find_ome_arrow_columns(self), - ] - ) - ) - if columns_3d: - try: - widget_table = self.show_widget_table( - column=columns_3d[0], - columns_3d=columns_3d, - backend=None, - ) - display(widget_table) - html_content = self._generate_trame_snapshot_html() - details_html = ( - '
' - "Static snapshot (for non-interactive view)" - f"{html_content}
" - ) - display(HTML(details_html)) - return None - except Exception as exc: - logger.debug( - "Trame widget table render failed, falling back to HTML: %s", - exc, - ) - - # if we're in a notebook process as though in a jupyter environment + if self._try_render_trame_widget_table( + debug=debug, display_options=display_options + ): + return None if get_option("display.notebook_repr_html") and not debug: - if not self._custom_attrs["_widget_state"]["shown"]: - display( - widgets.VBox( - [ - self._custom_attrs["_scale_slider"], - self._custom_attrs["_output"], - ] - ) - ) - if bool(display_options.get("show_static_snapshot_details", True)): - snapshot_html = self._generate_jupyter_dataframe_html() - details_html = ( - '
' - "Static snapshot (for non-interactive view)" - f"{snapshot_html}
" - ) - display(HTML(details_html)) - self._custom_attrs["_widget_state"]["shown"] = True - - # Attach the slider observer exactly once - if not self._custom_attrs["_widget_state"]["observing"]: - self._custom_attrs["_scale_slider"].observe( - self._on_slider_change, names="value" - ) - self._custom_attrs["_widget_state"]["observing"] = True - - # render fresh HTML for this cell - self._render_output() - - # allow for debug mode to be set which returns the HTML - # without widgets. - - elif debug: - return self._generate_jupyter_dataframe_html() - - else: + self._render_notebook_widget_output(display_options=display_options) return None + if debug: + return self._generate_jupyter_dataframe_html() + return None def __repr__(self: CytoDataFrame_type, debug: bool = False) -> str: """ diff --git a/tests/test_frame.py b/tests/test_frame.py index c5afaba..d729715 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -4,8 +4,10 @@ import logging import pathlib +import re import sys import types +import warnings from collections import OrderedDict from contextlib import nullcontext from importlib.machinery import ModuleSpec @@ -19,7 +21,13 @@ from _pytest.monkeypatch import MonkeyPatch from pyarrow import parquet -from cytodataframe.frame import CytoDataFrame +from cytodataframe.frame import ( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + FILTER_SLIDER_TOTAL_WIDTH_PX, + MAX_FILTER_SLIDER_STOPS, + CytoDataFrame, +) from tests.utils import ( cytodataframe_image_display_contains_pixels, ) @@ -842,11 +850,16 @@ def test_slider_updates_state(monkeypatch: MonkeyPatch): # Track render calls using monkeypatch or a flag render_called = {} + loading_called = {} def mock_render_output() -> None: render_called["called"] = True + def mock_show_loading() -> None: + loading_called["called"] = True + monkeypatch.setattr(cdf, "_render_output", mock_render_output) + monkeypatch.setattr(cdf, "_show_output_loading_indicator", mock_show_loading) # Call the method manually cdf._on_slider_change(change) @@ -856,6 +869,387 @@ def mock_render_output() -> None: # Check if the render method was triggered assert render_called.get("called", False) + assert loading_called.get("called", False) + + +def test_filter_slider_updates_state(monkeypatch: MonkeyPatch): + """Test that the filter slider updates internal state and triggers render.""" + cdf = CytoDataFrame( + pd.DataFrame({"Image_FileName_DNA": ["example.tif"], "AreaShape_Area": [2.0]}), + display_options={"filter_column": "AreaShape_Area"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "AreaShape_Area" + render_called = {} + loading_called = {} + + def mock_render_output() -> None: + render_called["called"] = True + + def mock_show_loading() -> None: + loading_called["called"] = True + + monkeypatch.setattr(cdf, "_render_output", mock_render_output) + monkeypatch.setattr(cdf, "_show_output_loading_indicator", mock_show_loading) + cdf._on_filter_slider_change({"new": (1.5, 2.5)}) + + assert cdf._custom_attrs["_widget_state"]["filter_range"] == (1.5, 2.5) + assert render_called.get("called", False) + assert loading_called.get("called", False) + + +def test_filter_display_indices_by_widget_range() -> None: + cdf = CytoDataFrame(pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]})) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (1.5, 2.5) + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 1, 2] + ) + + assert filtered == [1] + + +def test_filter_display_indices_by_widget_range_multiple_columns() -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "FilterScoreA": [1.0, 2.0, 3.0, 4.0], + "FilterScoreB": [10.0, 20.0, 30.0, 40.0], + } + ) + ) + cdf._custom_attrs["_widget_state"]["filter_columns"] = [ + "FilterScoreA", + "FilterScoreB", + ] + cdf._custom_attrs["_widget_state"]["filter_ranges"] = { + "FilterScoreA": (1.5, 3.5), + "FilterScoreB": (15.0, 35.0), + } + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 1, 2, 3] + ) + + assert filtered == [1, 2] + + +def test_filter_display_indices_by_widget_range_preserves_duplicate_labels() -> None: + cdf = CytoDataFrame(pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}, index=[0, 0, 1])) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (0.5, 2.5) + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 0, 0, 1] + ) + + assert filtered == [0, 0] + + +def test_filter_slider_rounds_labels_but_preserves_values() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [0.0123, 0.456, 9.87]}), + display_options={"filter_column": "FilterScore"}, + ) + + slider = cdf._ensure_filter_range_slider() + + assert isinstance(slider, widgets.SelectionRangeSlider) + assert "cdf-filter-range-slider" in getattr(slider, "_dom_classes", ()) + options = list(slider.options) + assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] + + +def test_get_filter_slider_columns_falls_back_to_single_when_many_is_empty() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0]}), + display_options={"filter_columns": [], "filter_column": "FilterScore"}, + ) + + columns = cdf._get_filter_slider_columns() + + assert columns == ["FilterScore"] + + +def test_filter_slider_caps_option_count_for_near_unique_values() -> None: + values = np.arange(MAX_FILTER_SLIDER_STOPS + 200, dtype=np.float64) + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": values}), + display_options={"filter_column": "FilterScore"}, + ) + + slider = cdf._ensure_filter_range_slider() + + assert isinstance(slider, widgets.SelectionRangeSlider) + options = list(slider.options) + assert len(options) == MAX_FILTER_SLIDER_STOPS + assert options[0][1] == float(values.min()) + assert options[-1][1] == float(values.max()) + option_vals = np.array([float(option[1]) for option in options], dtype=np.float64) + deltas = np.diff(option_vals) + assert np.all(deltas > 0) + assert np.allclose(deltas, deltas[0], rtol=1e-6, atol=1e-12) + assert slider.value == (float(values.min()), float(values.max())) + + +def test_filter_slider_reuses_cached_widget_instance() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={"filter_column": "FilterScore"}, + ) + + first_slider = cdf._ensure_filter_range_slider(filter_col="FilterScore") + assert isinstance(first_slider, widgets.SelectionRangeSlider) + first_options = list(first_slider.options) + assert first_options[-1][1] == 3.0 + + cdf.loc[:, "FilterScore"] = [1.0, 2.0, 4.0] + second_slider = cdf._ensure_filter_range_slider(filter_col="FilterScore") + + assert isinstance(second_slider, widgets.SelectionRangeSlider) + assert second_slider is first_slider + second_options = list(second_slider.options) + assert second_options[-1][1] == 4.0 + + +def test_filter_distribution_constant_values_stays_centered() -> None: + html = CytoDataFrame._build_filter_distribution_html( + values=pd.Series([0.47, 0.47, 0.47, 0.47]), + selected_range=(0.47, 0.47), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + + match = re.search(r"]*points='([^']+)'", html) + assert match is not None + points = [ + (float(part.split(",")[0]), float(part.split(",")[1])) + for part in match.group(1).split() + ] + peak_x = min(points, key=lambda point: point[1])[0] + track_left = float(FILTER_SLIDER_LABEL_WIDTH_PX) + track_right = float(FILTER_SLIDER_TOTAL_WIDTH_PX - FILTER_SLIDER_READOUT_WIDTH_PX) + track_mid = (track_left + track_right) / 2.0 + assert abs(peak_x - track_mid) < 30.0 + + +def test_filter_slider_control_renders_threshold_line() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 2.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + assert "y1='6.00'" in control.children[0].value + assert "y2='22.00'" in control.children[0].value + + +def test_filter_slider_control_warns_and_clamps_out_of_range_threshold( + caplog: pytest.LogCaptureFixture, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 9.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + with caplog.at_level(logging.WARNING): + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + assert "outside data range" in caplog.text + + +def test_filter_slider_control_ignores_non_numeric_threshold( + caplog: pytest.LogCaptureFixture, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": "not-a-number", + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + with caplog.at_level(logging.WARNING): + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" not in control.children[0].value + assert "is not numeric" in caplog.text + + +def test_filter_slider_threshold_key_match_is_case_and_whitespace_tolerant() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_thresholds": {" filterscore ": 2.0}, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + + +def test_filter_slider_threshold_aligns_with_selection_slider_domain() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [0.0, 1.0, 100.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 1.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (0.0, 100.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + html = control.children[0].value + x_match = re.search(r"x1='([0-9.]+)' y1='[0-9.]+'", html) + assert x_match is not None + x_val = float(x_match.group(1)) + + track_left = float(FILTER_SLIDER_LABEL_WIDTH_PX) + track_right = float(FILTER_SLIDER_TOTAL_WIDTH_PX - FILTER_SLIDER_READOUT_WIDTH_PX) + track_mid = (track_left + track_right) / 2.0 + assert abs(x_val - track_mid) < 8.0 + + +def test_filter_distribution_is_not_flat_for_clustered_values() -> None: + html = CytoDataFrame._build_filter_distribution_html( + values=pd.Series([0.0] * 60 + [0.1] * 30 + [2.0] * 10), + selected_range=(0.0, 100.0), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + match = re.search(r"]*points='([^']+)'", html) + assert match is not None + ys = [float(part.split(",")[1]) for part in match.group(1).split()] + assert max(ys) - min(ys) > 2.0 + + +def test_filter_distribution_avoids_runtime_warnings_for_large_ranges() -> None: + values = pd.Series( + np.concatenate( + [ + np.full(2000, 0.0), + np.full(1500, 1.0), + np.linspace(2.0, 5000.0, 2000), + ] + ) + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + html = CytoDataFrame._build_filter_distribution_html( + values=values, + selected_range=(0.0, 5000.0), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + + runtime_warnings = [ + warning for warning in caught if issubclass(warning.category, RuntimeWarning) + ] + assert html + assert not runtime_warnings + + +def test_generate_html_removes_rows_outside_filter_range( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "Label": ["keep-row", "drop-row"], + "FilterScore": [2.0, 9.0], + } + ), + display_options={"filter_column": "FilterScore"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (1.5, 2.5) + + options = { + "display.notebook_repr_html": True, + "display.max_rows": 10, + "display.min_rows": 10, + "display.max_columns": 10, + "display.show_dimensions": False, + } + monkeypatch.setattr("cytodataframe.frame.get_option", lambda name: options[name]) + + html = cdf._generate_jupyter_dataframe_html() + + assert "keep-row" in html + assert "drop-row" not in html + + +def test_generate_html_filters_full_frame_before_display_window( + monkeypatch: pytest.MonkeyPatch, +) -> None: + labels = [f"row-{idx}" for idx in range(20)] + scores = [0.0] * 20 + labels[10] = "middle-keep" + scores[10] = 5.0 + + cdf = CytoDataFrame( + pd.DataFrame({"Label": labels, "FilterScore": scores}), + display_options={"filter_column": "FilterScore"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (4.9, 5.1) + cdf._custom_attrs["_widget_state"]["filter_columns"] = ["FilterScore"] + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (4.9, 5.1)} + + options = { + "display.notebook_repr_html": True, + "display.max_rows": 8, + "display.min_rows": 4, + "display.max_columns": 10, + "display.show_dimensions": False, + } + monkeypatch.setattr("cytodataframe.frame.get_option", lambda name: options[name]) + monkeypatch.setattr("pandas.get_option", lambda name: options[name]) + + html = cdf._generate_jupyter_dataframe_html() + + assert "middle-keep" in html + assert "row-0" not in html + assert "row-19" not in html def test_get_3d_volume_from_cell_loads_3d_tiff(tmp_path: pathlib.Path) -> None: @@ -1139,6 +1533,46 @@ def fake_show_widget_table(column: str, **kwargs: object) -> str: assert displayed +def test_repr_html_trame_with_filter_columns_uses_notebook_widget_path( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "Image_FileName_DNA": ["volume.tiff"], + "FilterScoreA": [1.0], + "FilterScoreB": [2.0], + } + ), + display_options={ + "view": "trame", + "filter_columns": ["FilterScoreA", "FilterScoreB"], + }, + ) + calls = {"show_widget_table": 0, "render_notebook": 0} + + monkeypatch.setattr( + cdf, "_find_3d_columns_for_display", lambda: ["Image_FileName_DNA"] + ) + + def fake_show_widget_table(**_kwargs: object) -> str: + calls["show_widget_table"] += 1 + return "widget_table" + + def fake_render_notebook_widget_output(**_kwargs: object) -> None: + calls["render_notebook"] += 1 + + monkeypatch.setattr(cdf, "show_widget_table", fake_show_widget_table) + monkeypatch.setattr( + cdf, "_render_notebook_widget_output", fake_render_notebook_widget_output + ) + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + assert cdf._repr_html_() is None + assert calls["show_widget_table"] == 0 + assert calls["render_notebook"] == 1 + + def test_repr_html_2d_displays_static_snapshot_details( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1164,6 +1598,78 @@ def capture_display(value: object) -> None: assert any("cyto-static-snapshot" in block for block in html_blocks) +def test_repr_html_2d_places_filter_slider_next_to_image_adjustment( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={"filter_column": "FilterScore"}, + ) + displayed: list[object] = [] + + monkeypatch.setattr(cdf, "_find_3d_columns_for_display", lambda: []) + monkeypatch.setattr(cdf, "_render_output", lambda: None) + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + def capture_display(value: object) -> None: + displayed.append(value) + + monkeypatch.setattr("cytodataframe.frame.display", capture_display) + + assert cdf._repr_html_() is None + + container = next(widget for widget in displayed if isinstance(widget, widgets.VBox)) + controls_row = container.children[0] + assert isinstance(controls_row, widgets.HBox) + assert len(controls_row.children) == 2 + filter_wrapper = controls_row.children[1] + assert isinstance(filter_wrapper, widgets.VBox) + assert len(filter_wrapper.children) == 1 + filter_control = filter_wrapper.children[0] + assert isinstance(filter_control, widgets.VBox) + assert isinstance(filter_control.children[0], widgets.HTML) + assert " None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "FilterScoreA": [1.0, 2.0, 3.0], + "FilterScoreB": [10.0, 20.0, 30.0], + } + ), + display_options={"filter_columns": ["FilterScoreA", "FilterScoreB"]}, + ) + displayed: list[object] = [] + + monkeypatch.setattr(cdf, "_find_3d_columns_for_display", lambda: []) + monkeypatch.setattr(cdf, "_render_output", lambda: None) + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "
") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + def capture_display(value: object) -> None: + displayed.append(value) + + monkeypatch.setattr("cytodataframe.frame.display", capture_display) + + assert cdf._repr_html_() is None + + container = next(widget for widget in displayed if isinstance(widget, widgets.VBox)) + controls_row = container.children[0] + assert isinstance(controls_row, widgets.HBox) + assert len(controls_row.children) == 2 + accordion = controls_row.children[1] + assert isinstance(accordion, widgets.Accordion) + assert len(accordion.children) == 1 + assert isinstance(accordion.children[0], widgets.VBox) + assert len(accordion.children[0].children) == 2 + + def test_is_notebook_or_lab_detects_zmq_shell(monkeypatch: pytest.MonkeyPatch) -> None: zmq_shell = type("ZMQInteractiveShell", (), {})() monkeypatch.setattr("cytodataframe.frame.get_ipython", lambda: zmq_shell) @@ -1768,6 +2274,35 @@ def capture_display(value: object) -> None: assert len(displayed) == 3 +def test_render_output_clears_output_before_display( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame(pd.DataFrame({"A": [1]})) + + class DummyOutput: + def __init__(self) -> None: + self.clear_calls: list[bool] = [] + + def clear_output(self, wait: bool = False) -> None: + self.clear_calls.append(wait) + + def __enter__(self) -> "DummyOutput": + return self + + def __exit__(self, exc_type, exc, tb) -> bool: # noqa: ANN001 + return False + + dummy_output = DummyOutput() + cdf._custom_attrs["_output"] = dummy_output + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "
") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + monkeypatch.setattr("cytodataframe.frame.display", lambda _value: None) + + cdf._render_output() + + assert dummy_output.clear_calls == [True] + + def test_generate_trame_snapshot_html_paths(monkeypatch: pytest.MonkeyPatch): cdf = CytoDataFrame(pd.DataFrame({"Image_FileName_DNA": ["dna.tiff"]}, index=[0])) monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "
")