From 8d1f827c1855d581eb676450ba5793a39f69106c Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 13 Mar 2026 07:13:22 -0600 Subject: [PATCH 01/18] initial work towards feature scroll filters --- src/cytodataframe/frame.py | 441 +++++++++++++++++++++++++++++++------ tests/test_frame.py | 93 ++++++++ 2 files changed, 461 insertions(+), 73 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index dc4c1ed..fe4198b 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -63,6 +63,9 @@ MIN_RGB_SPATIAL_DIM = 8 MAX_RGB_ASPECT_RATIO = 4.0 MIN_POSITION_COMPONENTS = 2 +FILTER_SLIDER_TOTAL_WIDTH_PX = 460 +FILTER_SLIDER_LABEL_WIDTH_PX = 140 +FILTER_SLIDER_READOUT_WIDTH_PX = 92 # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -230,8 +233,11 @@ def __init__( # noqa: PLR0913 # add widget control meta "_widget_state": { "scale": initial_brightness, + "filter_column": None, + "filter_range": None, "shown": False, # whether VBox has been displayed "observing": False, # whether slider observer is attached + "filter_observing": False, # whether filter observer is attached }, "_snapshot_cache": {}, "_volume_cache": {}, @@ -251,6 +257,7 @@ def __init__( # noqa: PLR0913 overflow="visible", ) ), + "_filter_range_slider": None, } if self._custom_attrs["data_context_dir"] is not None: @@ -361,6 +368,9 @@ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] + cdf._custom_attrs["_filter_range_slider"] = self._custom_attrs[ + "_filter_range_slider" + ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] return cdf @@ -417,6 +427,9 @@ def _return_cytodataframe( # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] + cdf._custom_attrs["_filter_range_slider"] = self._custom_attrs[ + "_filter_range_slider" + ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] return cdf @@ -521,6 +534,255 @@ def _on_slider_change(self: CytoDataFrame_type, change: Dict[str, Any]) -> None: # redraw output after adjustments to scale state self._render_output() + def _on_filter_slider_change( + self: CytoDataFrame_type, change: Dict[str, Any] + ) -> None: + """Update widget filter state when the selection range changes.""" + selection = change.get("new") + if ( + not isinstance(selection, tuple) + or len(selection) != MIN_POSITION_COMPONENTS + ): + return + try: + lower = float(selection[0]) + upper = float(selection[1]) + except (TypeError, ValueError): + return + + self._custom_attrs["_widget_state"]["filter_range"] = ( + min(lower, upper), + max(lower, upper), + ) + self._custom_attrs["_output"].clear_output(wait=True) + self._render_output() + + def _get_filter_slider_column(self: CytoDataFrame_type) -> Optional[Any]: + """Return the configured filter column label when available.""" + display_options = self._custom_attrs.get("display_options", {}) or {} + configured = display_options.get("filter_column") + if configured is None: + return None + configured_text = str(configured) + return next((col for col in self.columns if str(col) == configured_text), None) + + def _ensure_filter_range_slider(self: CytoDataFrame_type) -> Optional[Any]: + """Build or refresh the range slider for row filtering.""" + filter_col = self._get_filter_slider_column() + state = self._custom_attrs["_widget_state"] + if filter_col is None: + self._custom_attrs["_filter_range_slider"] = None + state["filter_column"] = None + state["filter_range"] = None + state["filter_observing"] = False + return None + + if state.get("filter_column") != filter_col: + state["filter_column"] = filter_col + state["filter_range"] = None + state["filter_observing"] = False + + numeric_values = pd.to_numeric(self[filter_col], errors="coerce").dropna() + if numeric_values.empty: + self._custom_attrs["_filter_range_slider"] = None + state["filter_range"] = None + state["filter_observing"] = False + return None + + unique_values = sorted(float(value) for value in pd.unique(numeric_values)) + if not unique_values: + self._custom_attrs["_filter_range_slider"] = None + state["filter_range"] = None + state["filter_observing"] = False + return None + + options = [ + ( + f"{int(value)}" if float(value).is_integer() else f"{value:g}", + value, + ) + for value in unique_values + ] + default_lower = unique_values[0] + default_upper = unique_values[-1] + selected_range = state.get("filter_range") + if ( + not isinstance(selected_range, tuple) + or len(selected_range) != MIN_POSITION_COMPONENTS + ): + selected_range = (default_lower, default_upper) + lower = min(float(selected_range[0]), float(selected_range[1])) + upper = max(float(selected_range[0]), float(selected_range[1])) + lower = max(default_lower, min(lower, default_upper)) + upper = max(lower, min(upper, default_upper)) + state["filter_range"] = (lower, upper) + + slider = widgets.SelectionRangeSlider( + options=options, + value=(lower, upper), + description=f"{filter_col}:", + continuous_update=False, + style={"description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px"}, + layout=widgets.Layout(width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px"), + ) + self._custom_attrs["_filter_range_slider"] = slider + return slider + + @staticmethod + def _build_filter_distribution_html( + values: pd.Series, + selected_range: Tuple[float, float], + width: int = FILTER_SLIDER_TOTAL_WIDTH_PX, + height: int = 96, + track_padding_px: Tuple[int, int] = ( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) -> str: + """Build an inline SVG area/line plot for filter-value counts.""" + numeric_values = pd.to_numeric(values, errors="coerce").dropna() + if numeric_values.empty: + return "" + + values_array = numeric_values.to_numpy(dtype=np.float64, copy=False) + x_min = float(np.min(values_array)) + original_x_max = float(np.max(values_array)) + x_max = original_x_max + if x_max == x_min: + x_max = x_min + 1.0 + # Use bins to avoid visually flat one-count-per-unique-value traces. + bin_count = int(min(40, max(10, np.sqrt(values_array.size)))) + hist_counts, bin_edges = np.histogram(values_array, bins=bin_count) + y_max = float(max(1, int(hist_counts.max(initial=0)))) + + lower, upper = selected_range + lower = max(x_min, min(float(lower), x_max)) + upper = max(lower, min(float(upper), x_max)) + + track_left_px, track_right_px = track_padding_px + plot_left = float(max(8, track_left_px)) + plot_right = float(max(plot_left + 1, width - track_right_px)) + plot_top = 2.0 + # Align plot baseline with the slider track zone in the overlapped control. + plot_bottom = 26.0 + plot_w = max(1.0, plot_right - plot_left) + plot_h = max(1.0, plot_bottom - plot_top) + + def _sx(value: float) -> float: + return plot_left + ((value - x_min) / (x_max - x_min) * plot_w) + + def _sy(value: float) -> float: + return plot_bottom - (value / y_max * plot_h) + + highlight_x = _sx(lower) + highlight_w = max(1.0, _sx(upper) - highlight_x) + centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + line_points = " ".join( + f"{_sx(float(center)):.2f},{_sy(float(count)):.2f}" + for center, count in zip(centers, hist_counts, strict=False) + ) + area_points = ( + f"{_sx(float(centers[0])):.2f},{plot_bottom:.2f} " + f"{line_points} " + f"{_sx(float(centers[-1])):.2f},{plot_bottom:.2f}" + ) + + return ( + f"
" + f"" + "" + f"" + f"" + f"" + "" + "" + "
" + ) + + def _build_filter_slider_control( + self: CytoDataFrame_type, + ) -> Tuple[Optional[Any], Optional[Any]]: + """Return the filter slider and its display control widget.""" + slider = self._ensure_filter_range_slider() + if slider is None: + return None, None + filter_col = self._custom_attrs["_widget_state"].get("filter_column") + selected_range = self._custom_attrs["_widget_state"].get("filter_range") + if ( + filter_col is None + or not isinstance(selected_range, tuple) + or len(selected_range) != MIN_POSITION_COMPONENTS + or filter_col not in self.columns + ): + return slider, slider + + distribution_html = self._build_filter_distribution_html( + values=self[filter_col], + selected_range=(float(selected_range[0]), float(selected_range[1])), + height=52, + width=FILTER_SLIDER_TOTAL_WIDTH_PX, + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + if not distribution_html: + return slider, slider + + plot_widget = widgets.HTML( + value=distribution_html, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + height="52px", + ), + ) + slider.layout = widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + margin="-44px 0 0 0", + ) + return slider, widgets.VBox( + [plot_widget, slider], + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + height="52px", + align_items="center", + overflow="hidden", + ), + ) + + def _filter_display_indices_by_widget_range( + self: CytoDataFrame_type, + data: pd.DataFrame, + display_indices: List[Any], + ) -> List[Any]: + """Filter row labels by configured slider range.""" + filter_col = self._custom_attrs["_widget_state"].get("filter_column") + filter_range = self._custom_attrs["_widget_state"].get("filter_range") + if ( + filter_col is None + or filter_col not in data.columns + or not isinstance(filter_range, tuple) + or len(filter_range) != MIN_POSITION_COMPONENTS + ): + return display_indices + + try: + lower = float(filter_range[0]) + upper = float(filter_range[1]) + except (TypeError, ValueError): + return display_indices + + numeric_values = pd.to_numeric(data[filter_col], errors="coerce") + in_range = numeric_values[(numeric_values >= lower) & (numeric_values <= upper)] + allowed = set(in_range.index.tolist()) + return [row_label for row_label in display_indices if row_label in allowed] + def get_bounding_box_from_data( self: CytoDataFrame_type, ) -> Optional[CytoDataFrame_type]: @@ -3661,6 +3923,17 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 # gather indices which will be displayed based on pandas configuration display_indices = CytoDataFrame(data).get_displayed_rows() + display_indices = self._filter_display_indices_by_widget_range( + data=data, + display_indices=display_indices, + ) + if self._custom_attrs["_widget_state"].get( + "filter_column" + ) is not None and isinstance( + self._custom_attrs["_widget_state"].get("filter_range"), tuple + ): + data = data.loc[display_indices] + display_indices = data.index.tolist() # gather bounding box columns for use below if self._custom_attrs["data_bounding_box"] is not None: @@ -4106,6 +4379,93 @@ def _render_cell( logger.debug("Failed to build trame snapshot HTML: %s", exc) return html_content + def _try_render_trame_widget_table( + self: CytoDataFrame_type, debug: bool, display_options: dict[str, Any] + ) -> bool: + """Try rendering the trame widget table and return ``True`` on success.""" + if debug: + return False + force_trame = display_options.get("view") == "trame" + auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) + columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] + if not (force_trame or columns_3d): + return False + if force_trame and not columns_3d: + columns_3d = list( + dict.fromkeys( + [ + *(self.find_image_columns() or []), + *self.find_ome_arrow_columns(self), + ] + ) + ) + if not columns_3d: + return False + try: + widget_table = self.show_widget_table( + column=columns_3d[0], + columns_3d=columns_3d, + backend=None, + ) + display(widget_table) + html_content = self._generate_trame_snapshot_html() + details_html = ( + '
' + "Static snapshot (for non-interactive view)" + f"{html_content}
" + ) + display(HTML(details_html)) + return True + except Exception as exc: + logger.debug( + "Trame widget table render failed, falling back to HTML: %s", + exc, + ) + return False + + def _render_notebook_widget_output( + self: CytoDataFrame_type, display_options: dict[str, Any] + ) -> None: + """Render ipywidgets controls and the notebook HTML table output.""" + filter_slider, filter_control = self._build_filter_slider_control() + controls: List[Any] = [self._custom_attrs["_scale_slider"]] + if filter_control is not None: + controls.append(filter_control) + controls_row = widgets.HBox(controls) + + if not self._custom_attrs["_widget_state"]["shown"]: + display( + widgets.VBox( + [ + controls_row, + self._custom_attrs["_output"], + ] + ) + ) + if bool(display_options.get("show_static_snapshot_details", True)): + snapshot_html = self._generate_jupyter_dataframe_html() + details_html = ( + '
' + "Static snapshot (for non-interactive view)" + f"{snapshot_html}
" + ) + display(HTML(details_html)) + self._custom_attrs["_widget_state"]["shown"] = True + + if not self._custom_attrs["_widget_state"]["observing"]: + self._custom_attrs["_scale_slider"].observe( + self._on_slider_change, names="value" + ) + self._custom_attrs["_widget_state"]["observing"] = True + if ( + filter_slider is not None + and not self._custom_attrs["_widget_state"]["filter_observing"] + ): + filter_slider.observe(self._on_filter_slider_change, names="value") + self._custom_attrs["_widget_state"]["filter_observing"] = True + + self._render_output() + def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str: """ Returns HTML representation of the underlying pandas DataFrame @@ -4120,82 +4480,17 @@ def _repr_html_(self: CytoDataFrame_type, debug: bool = False) -> str: Returns: str: The data in a pandas DataFrame. """ - display_options = self._custom_attrs.get("display_options", {}) or {} - force_trame = display_options.get("view") == "trame" - auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) - columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] - if (force_trame or columns_3d) and not debug: - if force_trame and not columns_3d: - columns_3d = list( - dict.fromkeys( - [ - *(self.find_image_columns() or []), - *self.find_ome_arrow_columns(self), - ] - ) - ) - if columns_3d: - try: - widget_table = self.show_widget_table( - column=columns_3d[0], - columns_3d=columns_3d, - backend=None, - ) - display(widget_table) - html_content = self._generate_trame_snapshot_html() - details_html = ( - '
' - "Static snapshot (for non-interactive view)" - f"{html_content}
" - ) - display(HTML(details_html)) - return None - except Exception as exc: - logger.debug( - "Trame widget table render failed, falling back to HTML: %s", - exc, - ) - - # if we're in a notebook process as though in a jupyter environment + if self._try_render_trame_widget_table( + debug=debug, display_options=display_options + ): + return None if get_option("display.notebook_repr_html") and not debug: - if not self._custom_attrs["_widget_state"]["shown"]: - display( - widgets.VBox( - [ - self._custom_attrs["_scale_slider"], - self._custom_attrs["_output"], - ] - ) - ) - if bool(display_options.get("show_static_snapshot_details", True)): - snapshot_html = self._generate_jupyter_dataframe_html() - details_html = ( - '
' - "Static snapshot (for non-interactive view)" - f"{snapshot_html}
" - ) - display(HTML(details_html)) - self._custom_attrs["_widget_state"]["shown"] = True - - # Attach the slider observer exactly once - if not self._custom_attrs["_widget_state"]["observing"]: - self._custom_attrs["_scale_slider"].observe( - self._on_slider_change, names="value" - ) - self._custom_attrs["_widget_state"]["observing"] = True - - # render fresh HTML for this cell - self._render_output() - - # allow for debug mode to be set which returns the HTML - # without widgets. - - elif debug: - return self._generate_jupyter_dataframe_html() - - else: + self._render_notebook_widget_output(display_options=display_options) return None + if debug: + return self._generate_jupyter_dataframe_html() + return None def __repr__(self: CytoDataFrame_type, debug: bool = False) -> str: """ diff --git a/tests/test_frame.py b/tests/test_frame.py index c5afaba..ee1fef9 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -858,6 +858,67 @@ def mock_render_output() -> None: assert render_called.get("called", False) +def test_filter_slider_updates_state(monkeypatch: MonkeyPatch): + """Test that the filter slider updates internal state and triggers render.""" + cdf = CytoDataFrame( + pd.DataFrame({"Image_FileName_DNA": ["example.tif"], "AreaShape_Area": [2.0]}), + display_options={"filter_column": "AreaShape_Area"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "AreaShape_Area" + render_called = {} + + def mock_render_output() -> None: + render_called["called"] = True + + monkeypatch.setattr(cdf, "_render_output", mock_render_output) + cdf._on_filter_slider_change({"new": (1.5, 2.5)}) + + assert cdf._custom_attrs["_widget_state"]["filter_range"] == (1.5, 2.5) + assert render_called.get("called", False) + + +def test_filter_display_indices_by_widget_range() -> None: + cdf = CytoDataFrame(pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]})) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (1.5, 2.5) + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 1, 2] + ) + + assert filtered == [1] + + +def test_generate_html_removes_rows_outside_filter_range( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "Label": ["keep-row", "drop-row"], + "FilterScore": [2.0, 9.0], + } + ), + display_options={"filter_column": "FilterScore"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (1.5, 2.5) + + options = { + "display.notebook_repr_html": True, + "display.max_rows": 10, + "display.min_rows": 10, + "display.max_columns": 10, + "display.show_dimensions": False, + } + monkeypatch.setattr("cytodataframe.frame.get_option", lambda name: options[name]) + + html = cdf._generate_jupyter_dataframe_html() + + assert "keep-row" in html + assert "drop-row" not in html + + def test_get_3d_volume_from_cell_loads_3d_tiff(tmp_path: pathlib.Path) -> None: volume = np.arange(4 * 5 * 6, dtype=np.uint8).reshape(4, 5, 6) image_path = tmp_path / "volume.tiff" @@ -1164,6 +1225,38 @@ def capture_display(value: object) -> None: assert any("cyto-static-snapshot" in block for block in html_blocks) +def test_repr_html_2d_places_filter_slider_next_to_image_adjustment( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={"filter_column": "FilterScore"}, + ) + displayed: list[object] = [] + + monkeypatch.setattr(cdf, "_find_3d_columns_for_display", lambda: []) + monkeypatch.setattr(cdf, "_render_output", lambda: None) + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + def capture_display(value: object) -> None: + displayed.append(value) + + monkeypatch.setattr("cytodataframe.frame.display", capture_display) + + assert cdf._repr_html_() is None + + container = next(widget for widget in displayed if isinstance(widget, widgets.VBox)) + controls_row = container.children[0] + assert isinstance(controls_row, widgets.HBox) + assert len(controls_row.children) == 2 + filter_control = controls_row.children[1] + assert isinstance(filter_control, widgets.VBox) + assert isinstance(filter_control.children[0], widgets.HTML) + assert " None: zmq_shell = type("ZMQInteractiveShell", (), {})() monkeypatch.setattr("cytodataframe.frame.get_ipython", lambda: zmq_shell) From c552eeece75da88e0c478f89155b09e7f3945a20 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sat, 14 Mar 2026 12:52:35 -0600 Subject: [PATCH 02/18] scroll bars for filters --- src/cytodataframe/frame.py | 312 +++++++++++++++++++++++++++---------- tests/test_frame.py | 75 +++++++++ 2 files changed, 305 insertions(+), 82 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index fe4198b..8104ca7 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -65,7 +65,7 @@ MIN_POSITION_COMPONENTS = 2 FILTER_SLIDER_TOTAL_WIDTH_PX = 460 FILTER_SLIDER_LABEL_WIDTH_PX = 140 -FILTER_SLIDER_READOUT_WIDTH_PX = 92 +FILTER_SLIDER_READOUT_WIDTH_PX = 72 # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -235,9 +235,11 @@ def __init__( # noqa: PLR0913 "scale": initial_brightness, "filter_column": None, "filter_range": None, + "filter_columns": [], + "filter_ranges": {}, "shown": False, # whether VBox has been displayed "observing": False, # whether slider observer is attached - "filter_observing": False, # whether filter observer is attached + "filter_observing": {}, # per-column observer attachment flags }, "_snapshot_cache": {}, "_volume_cache": {}, @@ -257,7 +259,7 @@ def __init__( # noqa: PLR0913 overflow="visible", ) ), - "_filter_range_slider": None, + "_filter_range_sliders": {}, } if self._custom_attrs["data_context_dir"] is not None: @@ -368,8 +370,8 @@ def __getitem__(self: CytoDataFrame_type, key: Union[int, str]) -> Any: # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] - cdf._custom_attrs["_filter_range_slider"] = self._custom_attrs[ - "_filter_range_slider" + cdf._custom_attrs["_filter_range_sliders"] = self._custom_attrs[ + "_filter_range_sliders" ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] @@ -427,8 +429,8 @@ def _return_cytodataframe( # add widget control meta cdf._custom_attrs["_widget_state"] = self._custom_attrs["_widget_state"] cdf._custom_attrs["_scale_slider"] = self._custom_attrs["_scale_slider"] - cdf._custom_attrs["_filter_range_slider"] = self._custom_attrs[ - "_filter_range_slider" + cdf._custom_attrs["_filter_range_sliders"] = self._custom_attrs[ + "_filter_range_sliders" ] cdf._custom_attrs["_output"] = self._custom_attrs["_output"] @@ -538,6 +540,12 @@ def _on_filter_slider_change( self: CytoDataFrame_type, change: Dict[str, Any] ) -> None: """Update widget filter state when the selection range changes.""" + slider_owner = change.get("owner") + filter_col = ( + getattr(slider_owner, "_cyto_filter_column", None) + if slider_owner is not None + else None + ) selection = change.get("new") if ( not isinstance(selection, tuple) @@ -550,62 +558,110 @@ def _on_filter_slider_change( except (TypeError, ValueError): return - self._custom_attrs["_widget_state"]["filter_range"] = ( + normalized_range = ( min(lower, upper), max(lower, upper), ) + state = self._custom_attrs["_widget_state"] + if filter_col is not None: + state.setdefault("filter_ranges", {})[str(filter_col)] = normalized_range + # preserve legacy single-filter fields for backward compatibility + if state.get("filter_column") is None: + state["filter_column"] = filter_col + if str(state.get("filter_column")) == str(filter_col): + state["filter_range"] = normalized_range + else: + state["filter_range"] = normalized_range + if state.get("filter_column") is not None: + state.setdefault("filter_ranges", {})[str(state["filter_column"])] = ( + normalized_range + ) self._custom_attrs["_output"].clear_output(wait=True) self._render_output() - def _get_filter_slider_column(self: CytoDataFrame_type) -> Optional[Any]: - """Return the configured filter column label when available.""" + def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: + """Return configured filter columns, preserving user-specified order.""" display_options = self._custom_attrs.get("display_options", {}) or {} - configured = display_options.get("filter_column") - if configured is None: - return None - configured_text = str(configured) - return next((col for col in self.columns if str(col) == configured_text), None) + configured_many = display_options.get("filter_columns") + configured_single = display_options.get("filter_column") + configured: List[Any] = [] + if isinstance(configured_many, (list, tuple)): + configured.extend(configured_many) + elif configured_many is not None: + configured.append(configured_many) + elif configured_single is not None: + configured.append(configured_single) + + if not configured: + return [] - def _ensure_filter_range_slider(self: CytoDataFrame_type) -> Optional[Any]: - """Build or refresh the range slider for row filtering.""" - filter_col = self._get_filter_slider_column() + selected_columns: List[Any] = [] + seen: set[str] = set() + for requested in configured: + requested_str = str(requested) + matched = next( + (col for col in self.columns if str(col) == requested_str), + None, + ) + if matched is None: + continue + key = str(matched) + if key in seen: + continue + seen.add(key) + selected_columns.append(matched) + return selected_columns + + def _ensure_filter_range_slider( + self: CytoDataFrame_type, filter_col: Optional[Any] = None + ) -> Optional[Any]: + """Build or refresh one range slider for row filtering.""" + if filter_col is None: + columns = self._get_filter_slider_columns() + filter_col = columns[0] if columns else None state = self._custom_attrs["_widget_state"] if filter_col is None: - self._custom_attrs["_filter_range_slider"] = None + self._custom_attrs["_filter_range_sliders"] = {} + state["filter_columns"] = [] state["filter_column"] = None state["filter_range"] = None - state["filter_observing"] = False + state["filter_ranges"] = {} + state["filter_observing"] = {} return None - if state.get("filter_column") != filter_col: - state["filter_column"] = filter_col - state["filter_range"] = None - state["filter_observing"] = False + slider_key = str(filter_col) + state["filter_column"] = state.get("filter_column") or filter_col + state.setdefault("filter_ranges", {}) + state.setdefault("filter_observing", {}) numeric_values = pd.to_numeric(self[filter_col], errors="coerce").dropna() if numeric_values.empty: - self._custom_attrs["_filter_range_slider"] = None - state["filter_range"] = None - state["filter_observing"] = False + self._custom_attrs.setdefault("_filter_range_sliders", {}).pop( + slider_key, None + ) + state["filter_ranges"].pop(slider_key, None) + state["filter_observing"].pop(slider_key, None) + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = None return None unique_values = sorted(float(value) for value in pd.unique(numeric_values)) if not unique_values: - self._custom_attrs["_filter_range_slider"] = None - state["filter_range"] = None - state["filter_observing"] = False + self._custom_attrs.setdefault("_filter_range_sliders", {}).pop( + slider_key, None + ) + state["filter_ranges"].pop(slider_key, None) + state["filter_observing"].pop(slider_key, None) + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = None return None options = [ - ( - f"{int(value)}" if float(value).is_integer() else f"{value:g}", - value, - ) - for value in unique_values + (self._format_filter_slider_label(value), value) for value in unique_values ] default_lower = unique_values[0] default_upper = unique_values[-1] - selected_range = state.get("filter_range") + selected_range = state["filter_ranges"].get(slider_key) if ( not isinstance(selected_range, tuple) or len(selected_range) != MIN_POSITION_COMPONENTS @@ -615,7 +671,10 @@ def _ensure_filter_range_slider(self: CytoDataFrame_type) -> Optional[Any]: upper = max(float(selected_range[0]), float(selected_range[1])) lower = max(default_lower, min(lower, default_upper)) upper = max(lower, min(upper, default_upper)) - state["filter_range"] = (lower, upper) + normalized_range = (lower, upper) + state["filter_ranges"][slider_key] = normalized_range + if str(state.get("filter_column")) == slider_key: + state["filter_range"] = normalized_range slider = widgets.SelectionRangeSlider( options=options, @@ -625,9 +684,18 @@ def _ensure_filter_range_slider(self: CytoDataFrame_type) -> Optional[Any]: style={"description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px"}, layout=widgets.Layout(width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px"), ) - self._custom_attrs["_filter_range_slider"] = slider + slider._cyto_filter_column = filter_col # type: ignore[attr-defined] + self._custom_attrs.setdefault("_filter_range_sliders", {})[slider_key] = slider return slider + @staticmethod + def _format_filter_slider_label(value: float) -> str: + """Format displayed slider labels with two decimals for float values.""" + value = float(value) + if value.is_integer(): + return f"{int(value)}" + return f"{value:.2f}" + @staticmethod def _build_filter_distribution_html( values: pd.Series, @@ -662,9 +730,10 @@ def _build_filter_distribution_html( track_left_px, track_right_px = track_padding_px plot_left = float(max(8, track_left_px)) plot_right = float(max(plot_left + 1, width - track_right_px)) - plot_top = 2.0 - # Align plot baseline with the slider track zone in the overlapped control. - plot_bottom = 26.0 + plot_top = 6.0 + # Keep slider widget position fixed; shift only plotted data upward by + # using extra bottom padding inside the background SVG. + plot_bottom = 22.0 plot_w = max(1.0, plot_right - plot_left) plot_h = max(1.0, plot_bottom - plot_top) @@ -694,10 +763,6 @@ def _sy(value: float) -> float: "" f"" - f"" - f"" "" " float: "" ) - def _build_filter_slider_control( - self: CytoDataFrame_type, + def _build_filter_slider_control_for_column( + self: CytoDataFrame_type, filter_col: Any ) -> Tuple[Optional[Any], Optional[Any]]: - """Return the filter slider and its display control widget.""" - slider = self._ensure_filter_range_slider() + """Return one filter slider and its display control widget.""" + slider = self._ensure_filter_range_slider(filter_col=filter_col) if slider is None: return None, None - filter_col = self._custom_attrs["_widget_state"].get("filter_column") - selected_range = self._custom_attrs["_widget_state"].get("filter_range") + selected_range = ( + self._custom_attrs["_widget_state"] + .get("filter_ranges", {}) + .get(str(filter_col)) + ) if ( - filter_col is None - or not isinstance(selected_range, tuple) + not isinstance(selected_range, tuple) or len(selected_range) != MIN_POSITION_COMPONENTS or filter_col not in self.columns ): @@ -756,32 +823,74 @@ def _build_filter_slider_control( ), ) + def _build_filter_slider_controls( + self: CytoDataFrame_type, + ) -> Tuple[List[Any], List[Any]]: + """Return slider widgets and filter controls for all configured columns.""" + columns = self._get_filter_slider_columns() + state = self._custom_attrs["_widget_state"] + state["filter_columns"] = columns + if columns and state.get("filter_column") is None: + state["filter_column"] = columns[0] + if not columns: + state["filter_ranges"] = {} + state["filter_observing"] = {} + self._custom_attrs["_filter_range_sliders"] = {} + return [], [] + + sliders: List[Any] = [] + controls: List[Any] = [] + for filter_col in columns: + slider, control = self._build_filter_slider_control_for_column(filter_col) + if slider is None: + continue + sliders.append(slider) + controls.append(control if control is not None else slider) + return sliders, controls + def _filter_display_indices_by_widget_range( self: CytoDataFrame_type, data: pd.DataFrame, display_indices: List[Any], ) -> List[Any]: - """Filter row labels by configured slider range.""" - filter_col = self._custom_attrs["_widget_state"].get("filter_column") - filter_range = self._custom_attrs["_widget_state"].get("filter_range") - if ( - filter_col is None - or filter_col not in data.columns - or not isinstance(filter_range, tuple) - or len(filter_range) != MIN_POSITION_COMPONENTS - ): + """Filter row labels by all configured slider ranges.""" + state = self._custom_attrs["_widget_state"] + filter_columns = state.get("filter_columns") or [] + filter_ranges = state.get("filter_ranges") or {} + if not filter_columns and state.get("filter_column") is not None: + filter_columns = [state.get("filter_column")] + if isinstance(state.get("filter_range"), tuple): + filter_ranges = { + str(state.get("filter_column")): state.get("filter_range") + } + if not filter_columns: return display_indices - try: - lower = float(filter_range[0]) - upper = float(filter_range[1]) - except (TypeError, ValueError): - return display_indices + active_indices = display_indices + for filter_col in filter_columns: + if filter_col not in data.columns: + continue + filter_range = filter_ranges.get(str(filter_col)) + if ( + not isinstance(filter_range, tuple) + or len(filter_range) != MIN_POSITION_COMPONENTS + ): + continue + try: + lower = float(filter_range[0]) + upper = float(filter_range[1]) + except (TypeError, ValueError): + continue + numeric_values = pd.to_numeric(data[filter_col], errors="coerce") + in_range = numeric_values[ + (numeric_values >= lower) & (numeric_values <= upper) + ] + allowed = set(in_range.index.tolist()) + active_indices = [ + row_label for row_label in active_indices if row_label in allowed + ] - numeric_values = pd.to_numeric(data[filter_col], errors="coerce") - in_range = numeric_values[(numeric_values >= lower) & (numeric_values <= upper)] - allowed = set(in_range.index.tolist()) - return [row_label for row_label in display_indices if row_label in allowed] + return active_indices def get_bounding_box_from_data( self: CytoDataFrame_type, @@ -3927,10 +4036,30 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 data=data, display_indices=display_indices, ) - if self._custom_attrs["_widget_state"].get( - "filter_column" - ) is not None and isinstance( - self._custom_attrs["_widget_state"].get("filter_range"), tuple + active_filter_columns = ( + self._custom_attrs["_widget_state"].get("filter_columns") or [] + ) + active_filter_ranges = self._custom_attrs["_widget_state"].get( + "filter_ranges", {} + ) + if ( + not active_filter_columns + and self._custom_attrs["_widget_state"].get("filter_column") is not None + ): + active_filter_columns = [ + self._custom_attrs["_widget_state"].get("filter_column") + ] + if isinstance( + self._custom_attrs["_widget_state"].get("filter_range"), tuple + ): + active_filter_ranges = { + str(active_filter_columns[0]): self._custom_attrs[ + "_widget_state" + ].get("filter_range") + } + if active_filter_columns and any( + isinstance(active_filter_ranges.get(str(col)), tuple) + for col in active_filter_columns ): data = data.loc[display_indices] display_indices = data.index.tolist() @@ -4427,7 +4556,23 @@ def _render_notebook_widget_output( self: CytoDataFrame_type, display_options: dict[str, Any] ) -> None: """Render ipywidgets controls and the notebook HTML table output.""" - filter_slider, filter_control = self._build_filter_slider_control() + filter_sliders, filter_controls = self._build_filter_slider_controls() + filter_control: Optional[Any] = None + if len(filter_controls) == 1: + filter_control = filter_controls[0] + elif len(filter_controls) >= MIN_POSITION_COMPONENTS: + accordion_content = widgets.VBox( + filter_controls, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + align_items="stretch", + ), + ) + accordion = widgets.Accordion(children=[accordion_content]) + with contextlib.suppress(Exception): + accordion.set_title(0, "Filters") + accordion.selected_index = None + filter_control = accordion controls: List[Any] = [self._custom_attrs["_scale_slider"]] if filter_control is not None: controls.append(filter_control) @@ -4457,12 +4602,15 @@ def _render_notebook_widget_output( self._on_slider_change, names="value" ) self._custom_attrs["_widget_state"]["observing"] = True - if ( - filter_slider is not None - and not self._custom_attrs["_widget_state"]["filter_observing"] - ): - filter_slider.observe(self._on_filter_slider_change, names="value") - self._custom_attrs["_widget_state"]["filter_observing"] = True + filter_observing = self._custom_attrs["_widget_state"].setdefault( + "filter_observing", {} + ) + for filter_slider in filter_sliders: + filter_col = getattr(filter_slider, "_cyto_filter_column", None) + key = str(filter_col) if filter_col is not None else "" + if key and not filter_observing.get(key): + filter_slider.observe(self._on_filter_slider_change, names="value") + filter_observing[key] = True self._render_output() diff --git a/tests/test_frame.py b/tests/test_frame.py index ee1fef9..84f8bc7 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -889,6 +889,44 @@ def test_filter_display_indices_by_widget_range() -> None: assert filtered == [1] +def test_filter_display_indices_by_widget_range_multiple_columns() -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "FilterScoreA": [1.0, 2.0, 3.0, 4.0], + "FilterScoreB": [10.0, 20.0, 30.0, 40.0], + } + ) + ) + cdf._custom_attrs["_widget_state"]["filter_columns"] = [ + "FilterScoreA", + "FilterScoreB", + ] + cdf._custom_attrs["_widget_state"]["filter_ranges"] = { + "FilterScoreA": (1.5, 3.5), + "FilterScoreB": (15.0, 35.0), + } + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 1, 2, 3] + ) + + assert filtered == [1, 2] + + +def test_filter_slider_rounds_labels_but_preserves_values() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [0.0123, 0.456, 9.87]}), + display_options={"filter_column": "FilterScore"}, + ) + + slider = cdf._ensure_filter_range_slider() + + assert isinstance(slider, widgets.SelectionRangeSlider) + options = list(slider.options) + assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] + + def test_generate_html_removes_rows_outside_filter_range( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1257,6 +1295,43 @@ def capture_display(value: object) -> None: assert isinstance(filter_control.children[1], widgets.SelectionRangeSlider) +def test_repr_html_2d_uses_accordion_for_multiple_filter_columns( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "FilterScoreA": [1.0, 2.0, 3.0], + "FilterScoreB": [10.0, 20.0, 30.0], + } + ), + display_options={"filter_columns": ["FilterScoreA", "FilterScoreB"]}, + ) + displayed: list[object] = [] + + monkeypatch.setattr(cdf, "_find_3d_columns_for_display", lambda: []) + monkeypatch.setattr(cdf, "_render_output", lambda: None) + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "
") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + def capture_display(value: object) -> None: + displayed.append(value) + + monkeypatch.setattr("cytodataframe.frame.display", capture_display) + + assert cdf._repr_html_() is None + + container = next(widget for widget in displayed if isinstance(widget, widgets.VBox)) + controls_row = container.children[0] + assert isinstance(controls_row, widgets.HBox) + assert len(controls_row.children) == 2 + accordion = controls_row.children[1] + assert isinstance(accordion, widgets.Accordion) + assert len(accordion.children) == 1 + assert isinstance(accordion.children[0], widgets.VBox) + assert len(accordion.children[0].children) == 2 + + def test_is_notebook_or_lab_detects_zmq_shell(monkeypatch: pytest.MonkeyPatch) -> None: zmq_shell = type("ZMQInteractiveShell", (), {})() monkeypatch.setattr("cytodataframe.frame.get_ipython", lambda: zmq_shell) From ed5e466092aba3eae89047b2617143323417a999 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sun, 15 Mar 2026 11:53:52 -0600 Subject: [PATCH 03/18] relative spacing for scroll plot --- src/cytodataframe/frame.py | 10 ++++++++-- tests/test_frame.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 8104ca7..8823b0f 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -717,10 +717,16 @@ def _build_filter_distribution_html( original_x_max = float(np.max(values_array)) x_max = original_x_max if x_max == x_min: - x_max = x_min + 1.0 + # Keep constant-value distributions centered in the track rather than + # collapsing to the left edge. + pad = max(abs(x_min) * 0.05, 1e-6) + x_min = x_min - pad + x_max = x_max + pad # Use bins to avoid visually flat one-count-per-unique-value traces. bin_count = int(min(40, max(10, np.sqrt(values_array.size)))) - hist_counts, bin_edges = np.histogram(values_array, bins=bin_count) + hist_counts, bin_edges = np.histogram( + values_array, bins=bin_count, range=(x_min, x_max) + ) y_max = float(max(1, int(hist_counts.max(initial=0)))) lower, upper = selected_range diff --git a/tests/test_frame.py b/tests/test_frame.py index 84f8bc7..7402994 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -4,6 +4,7 @@ import logging import pathlib +import re import sys import types from collections import OrderedDict @@ -19,7 +20,12 @@ from _pytest.monkeypatch import MonkeyPatch from pyarrow import parquet -from cytodataframe.frame import CytoDataFrame +from cytodataframe.frame import ( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + FILTER_SLIDER_TOTAL_WIDTH_PX, + CytoDataFrame, +) from tests.utils import ( cytodataframe_image_display_contains_pixels, ) @@ -927,6 +933,31 @@ def test_filter_slider_rounds_labels_but_preserves_values() -> None: assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] +def test_filter_distribution_constant_values_stays_centered() -> None: + html = CytoDataFrame._build_filter_distribution_html( + values=pd.Series([0.47, 0.47, 0.47, 0.47]), + selected_range=(0.47, 0.47), + width=FILTER_SLIDER_TOTAL_WIDTH_PX, + height=52, + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + + match = re.search(r"]*points='([^']+)'", html) + assert match is not None + points = [ + (float(part.split(",")[0]), float(part.split(",")[1])) + for part in match.group(1).split() + ] + peak_x = min(points, key=lambda point: point[1])[0] + track_left = float(FILTER_SLIDER_LABEL_WIDTH_PX) + track_right = float(FILTER_SLIDER_TOTAL_WIDTH_PX - FILTER_SLIDER_READOUT_WIDTH_PX) + track_mid = (track_left + track_right) / 2.0 + assert abs(peak_x - track_mid) < 30.0 + + def test_generate_html_removes_rows_outside_filter_range( monkeypatch: pytest.MonkeyPatch, ) -> None: From 7e7f83f90ddf9fdaf9f5bee8910ed6fd5dfbd172 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sun, 15 Mar 2026 13:21:07 -0600 Subject: [PATCH 04/18] scroll filter plot threshold indicators --- src/cytodataframe/frame.py | 217 ++++++++++++++++++++++++++++++++++--- tests/test_frame.py | 125 ++++++++++++++++++++- 2 files changed, 323 insertions(+), 19 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 8823b0f..98961e5 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -66,6 +66,10 @@ FILTER_SLIDER_TOTAL_WIDTH_PX = 460 FILTER_SLIDER_LABEL_WIDTH_PX = 140 FILTER_SLIDER_READOUT_WIDTH_PX = 72 +# Fine-grained track-bound alignment for the background distribution plot. +# Positive values shift inward; negative values shift outward. +FILTER_SLIDER_TRACK_LEFT_ADJUST_PX = 13 +FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 23 # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -696,18 +700,48 @@ def _format_filter_slider_label(value: float) -> str: return f"{int(value)}" return f"{value:.2f}" + @staticmethod + def _slider_relative_position(value: float, slider_domain: np.ndarray) -> float: + """Map a numeric value to SelectionRangeSlider's normalized track position.""" + domain_size = int(slider_domain.size) + if domain_size == 0: + return 0.0 + if domain_size == 1: + return 0.5 + + position: float + if value <= slider_domain[0]: + position = 0.0 + elif value >= slider_domain[-1]: + position = 1.0 + else: + right_idx = int(np.searchsorted(slider_domain, value, side="right")) + left_idx = max(0, right_idx - 1) + if right_idx >= domain_size: + position = 1.0 + else: + left_val = float(slider_domain[left_idx]) + right_val = float(slider_domain[right_idx]) + if right_val == left_val: + position = float(left_idx) / float(domain_size - 1) + else: + frac = (value - left_val) / (right_val - left_val) + position = (float(left_idx) + float(frac)) / float(domain_size - 1) + return position + @staticmethod def _build_filter_distribution_html( values: pd.Series, selected_range: Tuple[float, float], - width: int = FILTER_SLIDER_TOTAL_WIDTH_PX, - height: int = 96, + threshold_x: Optional[float] = None, + size_px: Tuple[int, int] = (FILTER_SLIDER_TOTAL_WIDTH_PX, 96), track_padding_px: Tuple[int, int] = ( FILTER_SLIDER_LABEL_WIDTH_PX, FILTER_SLIDER_READOUT_WIDTH_PX, ), ) -> str: """Build an inline SVG area/line plot for filter-value counts.""" + width, height = size_px numeric_values = pd.to_numeric(values, errors="coerce").dropna() if numeric_values.empty: return "" @@ -716,26 +750,49 @@ def _build_filter_distribution_html( x_min = float(np.min(values_array)) original_x_max = float(np.max(values_array)) x_max = original_x_max + slider_domain = np.sort(np.unique(values_array)) if x_max == x_min: # Keep constant-value distributions centered in the track rather than # collapsing to the left edge. pad = max(abs(x_min) * 0.05, 1e-6) x_min = x_min - pad x_max = x_max + pad - # Use bins to avoid visually flat one-count-per-unique-value traces. - bin_count = int(min(40, max(10, np.sqrt(values_array.size)))) - hist_counts, bin_edges = np.histogram( - values_array, bins=bin_count, range=(x_min, x_max) + # Build y-shape in slider-option space so x positions align exactly with + # the discrete scroll points. Re-bin adjacent options so traces stay + # informative when columns contain many unique values. + unique_vals, inverse_idx = np.unique(values_array, return_inverse=True) + option_counts = np.bincount(inverse_idx, minlength=unique_vals.size) + option_count = int(unique_vals.size) + plot_bin_count = int( + min(option_count, 60, max(12, np.sqrt(max(1, option_count)))) + ) + plot_bin_edges = np.linspace(0, option_count, num=plot_bin_count + 1, dtype=int) + binned_counts = np.array( + [ + int(option_counts[plot_bin_edges[i] : plot_bin_edges[i + 1]].sum()) + for i in range(plot_bin_count) + ], + dtype=np.float64, ) - y_max = float(max(1, int(hist_counts.max(initial=0)))) + binned_option_centers = ( + plot_bin_edges[:-1].astype(np.float64) + + plot_bin_edges[1:].astype(np.float64) + - 1.0 + ) / 2.0 + y_max = float(max(1.0, float(np.max(binned_counts, initial=1.0)))) lower, upper = selected_range lower = max(x_min, min(float(lower), x_max)) upper = max(lower, min(float(upper), x_max)) track_left_px, track_right_px = track_padding_px - plot_left = float(max(8, track_left_px)) - plot_right = float(max(plot_left + 1, width - track_right_px)) + plot_left = float(max(8, track_left_px + FILTER_SLIDER_TRACK_LEFT_ADJUST_PX)) + plot_right = float( + max( + plot_left + 1, + width - track_right_px - FILTER_SLIDER_TRACK_RIGHT_INSET_PX, + ) + ) plot_top = 6.0 # Keep slider widget position fixed; shift only plotted data upward by # using extra bottom padding inside the background SVG. @@ -744,23 +801,50 @@ def _build_filter_distribution_html( plot_h = max(1.0, plot_bottom - plot_top) def _sx(value: float) -> float: - return plot_left + ((value - x_min) / (x_max - x_min) * plot_w) + return plot_left + ( + CytoDataFrame._slider_relative_position( + value=value, slider_domain=slider_domain + ) + * plot_w + ) def _sy(value: float) -> float: return plot_bottom - (value / y_max * plot_h) + def _sx_from_option_index(index: float) -> float: + if unique_vals.size <= 1: + return plot_left + (0.5 * plot_w) + return plot_left + ((float(index) / float(unique_vals.size - 1)) * plot_w) + highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) - centers = (bin_edges[:-1] + bin_edges[1:]) / 2.0 line_points = " ".join( - f"{_sx(float(center)):.2f},{_sy(float(count)):.2f}" - for center, count in zip(centers, hist_counts, strict=False) + f"{_sx_from_option_index(float(option_center)):.2f},{_sy(float(count)):.2f}" + for option_center, count in zip( + binned_option_centers, binned_counts, strict=False + ) ) area_points = ( - f"{_sx(float(centers[0])):.2f},{plot_bottom:.2f} " + f"{_sx_from_option_index(float(binned_option_centers[0])):.2f}," + f"{plot_bottom:.2f} " f"{line_points} " - f"{_sx(float(centers[-1])):.2f},{plot_bottom:.2f}" + f"{_sx_from_option_index(float(binned_option_centers[-1])):.2f}," + f"{plot_bottom:.2f}" ) + threshold_line_html = "" + if threshold_x is not None: + try: + threshold_val = float(threshold_x) + except (TypeError, ValueError): + threshold_val = None + if threshold_val is not None and x_min <= threshold_val <= x_max: + threshold_px = _sx(threshold_val) + threshold_line_html = ( + "" + ) return ( f"
" @@ -773,9 +857,105 @@ def _sy(value: float) -> float: f"points='{area_points}' fill='#93c5fd' opacity='0.55'/>" "" + f"{threshold_line_html}" "
" ) + def _get_raw_filter_plot_threshold( + self: CytoDataFrame_type, + filter_col: Any, + ) -> Tuple[bool, Optional[Any]]: + """Return whether threshold was configured and its raw value.""" + display_options = self._custom_attrs.get("display_options", {}) or {} + threshold_explicitly_configured = False + raw_threshold: Optional[Any] = None + threshold_map = display_options.get("filter_plot_thresholds") + if isinstance(threshold_map, dict): + filter_col_str = str(filter_col) + normalized_filter_col = filter_col_str.strip().casefold() + for threshold_key, threshold_value in threshold_map.items(): + threshold_key_str = str(threshold_key) + if threshold_key_str == filter_col_str: + raw_threshold = threshold_value + threshold_explicitly_configured = True + break + if threshold_key_str.strip().casefold() == normalized_filter_col: + raw_threshold = threshold_value + threshold_explicitly_configured = True + break + elif threshold_map is not None: + logger.warning( + ( + "Ignoring display option 'filter_plot_thresholds' because " + "it is not a mapping." + ) + ) + + if not threshold_explicitly_configured: + single_threshold = display_options.get("filter_plot_threshold") + if single_threshold is not None: + configured_columns = self._get_filter_slider_columns() + if len(configured_columns) <= 1: + raw_threshold = single_threshold + threshold_explicitly_configured = True + return threshold_explicitly_configured, raw_threshold + + def _resolve_filter_plot_threshold( + self: CytoDataFrame_type, + filter_col: Any, + values: pd.Series, + ) -> Optional[float]: + """Resolve an optional threshold marker for a filter-column distribution.""" + threshold_explicitly_configured, raw_threshold = ( + self._get_raw_filter_plot_threshold(filter_col=filter_col) + ) + if not threshold_explicitly_configured: + return None + + try: + threshold = float(raw_threshold) + except (TypeError, ValueError): + logger.warning( + ( + "Ignoring filter plot threshold for column '%s': " + "value %r is not numeric." + ), + filter_col, + raw_threshold, + ) + return None + + numeric_values = pd.to_numeric(values, errors="coerce").dropna() + if numeric_values.empty: + return None + data_min = float(numeric_values.min()) + data_max = float(numeric_values.max()) + if threshold < data_min: + logger.warning( + ( + "Ignoring filter plot threshold for column '%s': %s is outside " + "data range [%s, %s]." + ), + filter_col, + threshold, + data_min, + data_max, + ) + return data_min + if threshold > data_max: + logger.warning( + ( + "Ignoring filter plot threshold for column '%s': %s is outside " + "data range [%s, %s]." + ), + filter_col, + threshold, + data_min, + data_max, + ) + return data_max + return threshold + def _build_filter_slider_control_for_column( self: CytoDataFrame_type, filter_col: Any ) -> Tuple[Optional[Any], Optional[Any]]: @@ -794,12 +974,15 @@ def _build_filter_slider_control_for_column( or filter_col not in self.columns ): return slider, slider + threshold = self._resolve_filter_plot_threshold( + filter_col=filter_col, values=self[filter_col] + ) distribution_html = self._build_filter_distribution_html( values=self[filter_col], selected_range=(float(selected_range[0]), float(selected_range[1])), - height=52, - width=FILTER_SLIDER_TOTAL_WIDTH_PX, + threshold_x=threshold, + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), track_padding_px=( FILTER_SLIDER_LABEL_WIDTH_PX, FILTER_SLIDER_READOUT_WIDTH_PX, diff --git a/tests/test_frame.py b/tests/test_frame.py index 7402994..0203436 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -937,8 +937,7 @@ def test_filter_distribution_constant_values_stays_centered() -> None: html = CytoDataFrame._build_filter_distribution_html( values=pd.Series([0.47, 0.47, 0.47, 0.47]), selected_range=(0.47, 0.47), - width=FILTER_SLIDER_TOTAL_WIDTH_PX, - height=52, + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), track_padding_px=( FILTER_SLIDER_LABEL_WIDTH_PX, FILTER_SLIDER_READOUT_WIDTH_PX, @@ -958,6 +957,128 @@ def test_filter_distribution_constant_values_stays_centered() -> None: assert abs(peak_x - track_mid) < 30.0 +def test_filter_slider_control_renders_threshold_line() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 2.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + assert "y1='0'" in control.children[0].value + + +def test_filter_slider_control_ignores_out_of_range_threshold( + caplog: pytest.LogCaptureFixture, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 9.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + with caplog.at_level(logging.WARNING): + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + assert "outside data range" in caplog.text + + +def test_filter_slider_control_ignores_non_numeric_threshold( + caplog: pytest.LogCaptureFixture, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": "not-a-number", + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + with caplog.at_level(logging.WARNING): + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" not in control.children[0].value + assert "is not numeric" in caplog.text + + +def test_filter_slider_threshold_key_match_is_case_and_whitespace_tolerant() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_thresholds": {" filterscore ": 2.0}, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (1.0, 3.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + assert "stroke='#dc2626'" in control.children[0].value + + +def test_filter_slider_threshold_aligns_with_selection_slider_domain() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [0.0, 1.0, 100.0]}), + display_options={ + "filter_column": "FilterScore", + "filter_plot_threshold": 1.0, + }, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (0.0, 100.0)} + + _slider, control = cdf._build_filter_slider_control_for_column("FilterScore") + assert isinstance(control, widgets.VBox) + assert isinstance(control.children[0], widgets.HTML) + html = control.children[0].value + x_match = re.search(r"x1='([0-9.]+)' y1='0'", html) + assert x_match is not None + x_val = float(x_match.group(1)) + + track_left = float(FILTER_SLIDER_LABEL_WIDTH_PX) + track_right = float(FILTER_SLIDER_TOTAL_WIDTH_PX - FILTER_SLIDER_READOUT_WIDTH_PX) + track_mid = (track_left + track_right) / 2.0 + assert abs(x_val - track_mid) < 8.0 + + +def test_filter_distribution_is_not_flat_for_clustered_values() -> None: + html = CytoDataFrame._build_filter_distribution_html( + values=pd.Series([0.0] * 60 + [0.1] * 30 + [2.0] * 10), + selected_range=(0.0, 100.0), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + match = re.search(r"]*points='([^']+)'", html) + assert match is not None + ys = [float(part.split(",")[1]) for part in match.group(1).split()] + assert max(ys) - min(ys) > 2.0 + + def test_generate_html_removes_rows_outside_filter_range( monkeypatch: pytest.MonkeyPatch, ) -> None: From a8f92be2747f5e26420e51a26e7ef39d520a57a6 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sun, 15 Mar 2026 13:25:09 -0600 Subject: [PATCH 05/18] binning and such --- src/cytodataframe/frame.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 98961e5..36aeac3 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -70,6 +70,10 @@ # Positive values shift inward; negative values shift outward. FILTER_SLIDER_TRACK_LEFT_ADJUST_PX = 13 FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 23 +FILTER_PLOT_SMOOTH_MIN_BINS = 8 +FILTER_PLOT_SMOOTH_MAX_WINDOW = 9 +FILTER_PLOT_SMOOTH_BIN_DIVISOR = 10 +FILTER_PLOT_SMOOTH_MIN_WINDOW = 3 # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -774,12 +778,27 @@ def _build_filter_distribution_html( ], dtype=np.float64, ) + smoothed_counts = binned_counts + if plot_bin_count >= FILTER_PLOT_SMOOTH_MIN_BINS: + smooth_window = int( + min( + FILTER_PLOT_SMOOTH_MAX_WINDOW, + max( + FILTER_PLOT_SMOOTH_MIN_WINDOW, + plot_bin_count // FILTER_PLOT_SMOOTH_BIN_DIVISOR, + ), + ) + ) + if smooth_window % 2 == 0: + smooth_window += 1 + kernel = np.ones(smooth_window, dtype=np.float64) / float(smooth_window) + smoothed_counts = np.convolve(binned_counts, kernel, mode="same") binned_option_centers = ( plot_bin_edges[:-1].astype(np.float64) + plot_bin_edges[1:].astype(np.float64) - 1.0 ) / 2.0 - y_max = float(max(1.0, float(np.max(binned_counts, initial=1.0)))) + y_max = float(max(1.0, float(np.max(smoothed_counts, initial=1.0)))) lower, upper = selected_range lower = max(x_min, min(float(lower), x_max)) @@ -821,7 +840,7 @@ def _sx_from_option_index(index: float) -> float: line_points = " ".join( f"{_sx_from_option_index(float(option_center)):.2f},{_sy(float(count)):.2f}" for option_center, count in zip( - binned_option_centers, binned_counts, strict=False + binned_option_centers, smoothed_counts, strict=False ) ) area_points = ( From 51ea9cc157b6ad95d053a3b03b5d6ede438bcb17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Sun, 15 Mar 2026 19:30:45 +0000 Subject: [PATCH 06/18] [pre-commit.ci lite] apply automatic fixes --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6959c1f..b6d40c4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,7 +50,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.15.5" + rev: "v0.15.6" hooks: - id: ruff-format - id: ruff-check From 8a79ee73a33602908b2efe4099355374dcd116d9 Mon Sep 17 00:00:00 2001 From: d33bs Date: Sun, 15 Mar 2026 21:53:33 -0600 Subject: [PATCH 07/18] fine tuning and docs --- README.md | 6 + .../examples/cytodataframe_at_a_glance.ipynb | 711 +++++++++++++++--- .../src/examples/cytodataframe_at_a_glance.py | 27 +- src/cytodataframe/frame.py | 145 ++-- tests/test_frame.py | 41 +- 5 files changed, 789 insertions(+), 141 deletions(-) diff --git a/README.md b/README.md index dff1373..3c97656 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,12 @@ For 3D notebook display behavior: - Disable automatic trame switching with `display_options={"auto_trame_for_3d": False}`. - Force trame layout regardless of auto-detection with `display_options={"view": "trame"}`. +For row display in notebook/widget tables: + +- CytoDataFrame respects pandas display settings (`display.max_rows`, `display.min_rows`). +- When the table is larger than `display.max_rows`, the widget table inserts a midpoint ellipsis row (`…`) to indicate omitted rows. +- You can control truncation behavior by changing pandas display options before rendering. + 📓 ___Want to see CytoDataFrame in action?___ Check out our [example notebook](docs/src/examples/cytodataframe_at_a_glance.ipynb) for a quick tour of its key features. > ✨ CytoDataFrame development began within **[coSMicQC](https://github.com/cytomining/coSMicQC)** - a single-cell profile quality control package. diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index ffaf8c0..4b5644e 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,19 +49,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 818 ms, sys: 481 ms, total: 1.3 s\n", - "Wall time: 614 ms\n" + "CPU times: user 1.21 s, sys: 769 ms, total: 1.98 s\n", + "Wall time: 833 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "909d4d7bf43c4542a06cc68f61236ff1", + "model_id": "8c47be4ce0e04398889e471bc0411116", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -76,6 +88,7 @@ "
\n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -86,6 +99,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -94,6 +108,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -102,6 +117,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -131,10 +147,11 @@ "frame = CytoDataFrame(\n", " data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n", " data_context_dir=f\"{jump_data_path}/images/orig\",\n", - ")[\n", + ")[ \n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Cells_Number_Object_Number\",\n", + " \"Nuclei_Texture_Variance_RNA_5_03_256\",\n", " \"Image_FileName_OrigAGP\",\n", " \"Image_FileName_OrigDNA\",\n", " \"Image_FileName_OrigRNA\",\n", @@ -155,19 +172,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 824 ms, sys: 485 ms, total: 1.31 s\n", - "Wall time: 509 ms\n" + "CPU times: user 1.22 s, sys: 785 ms, total: 2 s\n", + "Wall time: 722 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5724204c102e474f870b0d5e41bfd9be", + "model_id": "d0f4bf88205a443ba9c11997721eaa36", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -260,19 +289,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 795 ms, sys: 548 ms, total: 1.34 s\n", - "Wall time: 447 ms\n" + "CPU times: user 1.15 s, sys: 791 ms, total: 1.94 s\n", + "Wall time: 653 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8fce594f507a4fddb4e25b6abcf92b27", + "model_id": "3f25b56d7aae4807a3f18eb09b08d542", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -366,19 +407,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 929 ms, sys: 548 ms, total: 1.48 s\n", - "Wall time: 599 ms\n" + "CPU times: user 1.29 s, sys: 786 ms, total: 2.08 s\n", + "Wall time: 813 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "537bcb8beb6b440997e05c65ad2c6712", + "model_id": "4aa80396237b4f35ac1b508247a98384", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -481,19 +534,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 845 ms, sys: 523 ms, total: 1.37 s\n", - "Wall time: 514 ms\n" + "CPU times: user 1.15 s, sys: 822 ms, total: 1.98 s\n", + "Wall time: 655 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "22f9247a52ca475a8d037d6227740144", + "model_id": "7cdec8feae504c218017f59f711dc664", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -585,19 +650,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 810 ms, sys: 573 ms, total: 1.38 s\n", - "Wall time: 466 ms\n" + "CPU times: user 1.25 s, sys: 828 ms, total: 2.08 s\n", + "Wall time: 750 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ac50878f3f445c1b5c4f1479ea5aa8d", + "model_id": "bd4f49db8e004688a0afbdb0a26c4a36", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -691,19 +768,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 836 ms, sys: 533 ms, total: 1.37 s\n", - "Wall time: 515 ms\n" + "CPU times: user 1.24 s, sys: 818 ms, total: 2.06 s\n", + "Wall time: 738 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a036410bfb374539ad0c69f52481d0e3", + "model_id": "4c4785df9ada458c959d5aa4832c2235", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -796,19 +885,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 838 ms, sys: 489 ms, total: 1.33 s\n", - "Wall time: 520 ms\n" + "CPU times: user 1.22 s, sys: 795 ms, total: 2.02 s\n", + "Wall time: 739 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9bf2fe6deec0442a9ac320ca7c35a505", + "model_id": "e9877562d3ce41b2992468ea4fe41ec4", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -918,19 +1019,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 504 ms, sys: 88.6 ms, total: 592 ms\n", - "Wall time: 604 ms\n" + "CPU times: user 743 ms, sys: 111 ms, total: 854 ms\n", + "Wall time: 867 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1a11d9d982e04fdd80720ec29ee86720", + "model_id": "45df8cdabd724032839d7b6cea92bcb3", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1050,19 +1163,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 848 ms, sys: 515 ms, total: 1.36 s\n", - "Wall time: 514 ms\n" + "CPU times: user 1.23 s, sys: 809 ms, total: 2.04 s\n", + "Wall time: 749 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b91a36f356a245dda81ecf3edd717507", + "model_id": "d0dd79cd8f2c40daa1c630fae2879304", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1179,19 +1304,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 302 ms, sys: 157 ms, total: 459 ms\n", - "Wall time: 211 ms\n" + "CPU times: user 426 ms, sys: 224 ms, total: 649 ms\n", + "Wall time: 292 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "18fd9eca9a0a4505b329af0c761f702f", + "model_id": "0aa318c5e31a41abae2c2e518143978c", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1282,19 +1419,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 239 ms, sys: 163 ms, total: 402 ms\n", - "Wall time: 145 ms\n" + "CPU times: user 356 ms, sys: 244 ms, total: 600 ms\n", + "Wall time: 221 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4a31350cf3aa4729b8fd52cd92dda632", + "model_id": "0a3db2d87f0449d0ae13a145f909f4dd", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1387,19 +1536,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 304 ms, sys: 152 ms, total: 455 ms\n", - "Wall time: 215 ms\n" + "CPU times: user 357 ms, sys: 233 ms, total: 590 ms\n", + "Wall time: 217 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c832249094414fc48196a28f9304035f", + "model_id": "d0c0823a00ce4690a49064cd57bfa3ce", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1502,19 +1663,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 813 ms, sys: 98.8 ms, total: 912 ms\n", - "Wall time: 879 ms\n" + "CPU times: user 1.25 s, sys: 140 ms, total: 1.39 s\n", + "Wall time: 1.35 s\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "523f6bdea17647d38159d54e1e0bca69", + "model_id": "3f10d5f6c4074113843636a5f05e1b89", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1634,19 +1807,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 115 ms, sys: 70 ms, total: 185 ms\n", - "Wall time: 72.5 ms\n" + "CPU times: user 167 ms, sys: 102 ms, total: 269 ms\n", + "Wall time: 109 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d63c937dbfc648c9aaa0282b81892596", + "model_id": "a120c48c2cc241e9bfd9a454323c12f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1731,6 +1916,328 @@ { "cell_type": "code", "execution_count": 17, + "id": "c68c644b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 160 ms, sys: 90.4 ms, total: 251 ms\n", + "Wall time: 101 ms\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "740e21c8cbc24751a896b7933b2e9cab", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Static snapshot (for non-interactive view)
Metadata_ImageNumberCells_Number_Object_NumberNuclei_Texture_Variance_RNA_5_03_256Image_FileName_OrigAGPImage_FileName_OrigDNAImage_FileName_OrigRNA011106.03597211233.59048721353.527363
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Metadata_ImageNumberNuclei_Number_Object_NumberNuclei_Texture_Variance_DAPI_3_03_256Image_FileName_A647Image_FileName_DAPIImage_FileName_GOLD
0112.484139slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
11212.026326slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
21351.418746slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
31447.049561slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
415117.135912slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
51625.371580slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
61723.930735slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
7182.973642slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
8198.355843slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
9110150.652194slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
101117.919292slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
111120.432249slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1211318.161879slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1311432.575908slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1411529.200237slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
151169.793458slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
161178.513971slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1711831.487882slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
181194.329104slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
1912032.853237slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
201217.200573slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
211223.978256slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2212332.280016slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2312426.525734slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
2412551.948095slide1_A1_M10_CH1_Z09_illumcorrect.tiffslide1_A1_M10_CH2_Z09_illumcorrect.tiff
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# view nuclear speckles data with images and overlaid outlines from masks\n", + "# and also apply a filter to only show rows where the value for\n", + "# \"Nuclei_Texture_Variance_DAPI_3_03_256\".\n", + "CytoDataFrame(\n", + " data=f\"{nuclear_speckles_path}/test_slide1_converted.parquet\",\n", + " data_context_dir=f\"{nuclear_speckles_path}/images/plate1\",\n", + " data_mask_context_dir=f\"{nuclear_speckles_path}/masks/plate1\",\n", + " display_options={\"filter_columns\": [\"Nuclei_Texture_Variance_DAPI_3_03_256\"],}\n", + ")[\n", + " [\n", + " \"Metadata_ImageNumber\",\n", + " \"Nuclei_Number_Object_Number\",\n", + " \"Nuclei_Texture_Variance_DAPI_3_03_256\",\n", + " \"Image_FileName_A647\",\n", + " \"Image_FileName_DAPI\",\n", + " \"Image_FileName_GOLD\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "id": "4c9af999-c9a2-4408-aa16-9437d08013ae", "metadata": {}, "outputs": [ @@ -1738,19 +2245,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 316 ms, sys: 205 ms, total: 521 ms\n", - "Wall time: 184 ms\n" + "CPU times: user 475 ms, sys: 287 ms, total: 762 ms\n", + "Wall time: 279 ms\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ae8351ea01f64659a6c1e2454f08e401", + "model_id": "d0cc9f5544a149a89e07e2e8285e82db", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -1819,7 +2338,7 @@ "data": { "text/plain": [] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1848,7 +2367,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "e8ebb16d-ee5f-4a34-b599-aef245b57705", "metadata": {}, "outputs": [ @@ -1856,15 +2375,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", - "Wall time: 2.15 μs\n" + "CPU times: user 2 μs, sys: 1 μs, total: 3 μs\n", + "Wall time: 4.05 μs\n" ] }, { "data": { "text/plain": [] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1878,7 +2397,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "0892633a-fdd2-448a-a96a-54dad4b5caf8", "metadata": {}, "outputs": [ @@ -1886,19 +2405,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 861 ms, sys: 213 ms, total: 1.07 s\n", - "Wall time: 950 ms\n" + "CPU times: user 1.2 s, sys: 281 ms, total: 1.48 s\n", + "Wall time: 1.31 s\n" ] }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5f360d46c54649a1a6a8371696720c70", + "model_id": "bcaa1e9a00924c60a3386173986b0cc5", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', style=SliderStyle…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" ] }, "metadata": {}, @@ -2003,7 +2534,7 @@ "data": { "text/plain": [] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -2020,7 +2551,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "881e0542", "metadata": { "lines_to_next_cell": 0 @@ -2030,14 +2561,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.69 ms, sys: 1.72 ms, total: 8.41 ms\n", - "Wall time: 14.1 ms\n" + "CPU times: user 8.35 ms, sys: 2.37 ms, total: 10.7 ms\n", + "Wall time: 13.6 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5668ddbf35d54867b361b9c207878f15", + "model_id": "f5af6447f4a1405ea3a8b60476650c8d", "version_major": 2, "version_minor": 0 }, @@ -2093,7 +2624,7 @@ "data": { "text/plain": [] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -2117,22 +2648,24 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "18ed220b", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 0 + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.92 ms, sys: 639 μs, total: 5.56 ms\n", - "Wall time: 5.38 ms\n" + "CPU times: user 8.34 ms, sys: 2.12 ms, total: 10.5 ms\n", + "Wall time: 10.4 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c3ad03a61372443582dacc58d7ab80f1", + "model_id": "443bb9f1f00d48768142dc574f0bdd5e", "version_major": 2, "version_minor": 0 }, @@ -2188,13 +2721,17 @@ "data": { "text/plain": [] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", + "import pathlib\n", + "from cytodataframe.frame import CytoDataFrame\n", + "\n", + "cp_3d_path = \"../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation\"\n", "\n", "# read 3d images with segmentation masks and show the\n", "# segmentation masks are also 3D.\n", diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py index 9050052..056101e 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.py +++ b/docs/src/examples/cytodataframe_at_a_glance.py @@ -41,10 +41,11 @@ frame = CytoDataFrame( data=f"{jump_data_path}/BR00117006_shrunken.parquet", data_context_dir=f"{jump_data_path}/images/orig", -)[ +)[ [ "Metadata_ImageNumber", "Cells_Number_Object_Number", + "Nuclei_Texture_Variance_RNA_5_03_256", "Image_FileName_OrigAGP", "Image_FileName_OrigDNA", "Image_FileName_OrigRNA", @@ -305,6 +306,26 @@ ] ][:3] +# %%time +# view nuclear speckles data with images and overlaid outlines from masks +# and also apply a filter to only show rows where the value for +# "Nuclei_Texture_Variance_DAPI_3_03_256". +CytoDataFrame( + data=f"{nuclear_speckles_path}/test_slide1_converted.parquet", + data_context_dir=f"{nuclear_speckles_path}/images/plate1", + data_mask_context_dir=f"{nuclear_speckles_path}/masks/plate1", + display_options={"filter_columns": ["Nuclei_Texture_Variance_DAPI_3_03_256"],} +)[ + [ + "Metadata_ImageNumber", + "Nuclei_Number_Object_Number", + "Nuclei_Texture_Variance_DAPI_3_03_256", + "Image_FileName_A647", + "Image_FileName_DAPI", + "Image_FileName_GOLD", + ] +] + # %%time # view ALSF pediatric cancer atlas plate BR00143976 with images cdf = CytoDataFrame( @@ -356,6 +377,10 @@ cdf[["ImageNumber", "ObjectNumber", "FileName_Nuclei"]][:3] # + # %%time +import pathlib +from cytodataframe.frame import CytoDataFrame + +cp_3d_path = "../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation" # read 3d images with segmentation masks and show the # segmentation masks are also 3D. diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 36aeac3..2fd639d 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -63,17 +63,17 @@ MIN_RGB_SPATIAL_DIM = 8 MAX_RGB_ASPECT_RATIO = 4.0 MIN_POSITION_COMPONENTS = 2 -FILTER_SLIDER_TOTAL_WIDTH_PX = 460 -FILTER_SLIDER_LABEL_WIDTH_PX = 140 -FILTER_SLIDER_READOUT_WIDTH_PX = 72 +FILTER_SLIDER_TOTAL_WIDTH_PX = 430 +FILTER_SLIDER_LABEL_WIDTH_PX = 170 +FILTER_SLIDER_READOUT_WIDTH_PX = 96 # Fine-grained track-bound alignment for the background distribution plot. # Positive values shift inward; negative values shift outward. FILTER_SLIDER_TRACK_LEFT_ADJUST_PX = 13 -FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 23 -FILTER_PLOT_SMOOTH_MIN_BINS = 8 -FILTER_PLOT_SMOOTH_MAX_WINDOW = 9 -FILTER_PLOT_SMOOTH_BIN_DIVISOR = 10 -FILTER_PLOT_SMOOTH_MIN_WINDOW = 3 +FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 13 +FILTER_PLOT_KDE_MIN_SAMPLES = 60 +FILTER_PLOT_KDE_MAX_SAMPLES = 180 +FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.7 +FILTER_SLIDER_CSS_CLASS = "cdf-filter-range-slider" # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods @@ -248,6 +248,7 @@ def __init__( # noqa: PLR0913 "shown": False, # whether VBox has been displayed "observing": False, # whether slider observer is attached "filter_observing": {}, # per-column observer attachment flags + "filter_readout_css_injected": False, }, "_snapshot_cache": {}, "_volume_cache": {}, @@ -692,6 +693,7 @@ def _ensure_filter_range_slider( style={"description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px"}, layout=widgets.Layout(width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px"), ) + slider.add_class(FILTER_SLIDER_CSS_CLASS) slider._cyto_filter_column = filter_col # type: ignore[attr-defined] self._custom_attrs.setdefault("_filter_range_sliders", {})[slider_key] = slider return slider @@ -734,7 +736,7 @@ def _slider_relative_position(value: float, slider_domain: np.ndarray) -> float: return position @staticmethod - def _build_filter_distribution_html( + def _build_filter_distribution_html( # noqa: C901, PLR0915 values: pd.Series, selected_range: Tuple[float, float], threshold_x: Optional[float] = None, @@ -761,44 +763,54 @@ def _build_filter_distribution_html( pad = max(abs(x_min) * 0.05, 1e-6) x_min = x_min - pad x_max = x_max + pad - # Build y-shape in slider-option space so x positions align exactly with - # the discrete scroll points. Re-bin adjacent options so traces stay - # informative when columns contain many unique values. + # Build a smooth, KDE-like density in slider-option space so the curve + # aligns to scroll points without fixed value bins. unique_vals, inverse_idx = np.unique(values_array, return_inverse=True) option_counts = np.bincount(inverse_idx, minlength=unique_vals.size) option_count = int(unique_vals.size) - plot_bin_count = int( - min(option_count, 60, max(12, np.sqrt(max(1, option_count)))) - ) - plot_bin_edges = np.linspace(0, option_count, num=plot_bin_count + 1, dtype=int) - binned_counts = np.array( - [ - int(option_counts[plot_bin_edges[i] : plot_bin_edges[i + 1]].sum()) - for i in range(plot_bin_count) - ], - dtype=np.float64, - ) - smoothed_counts = binned_counts - if plot_bin_count >= FILTER_PLOT_SMOOTH_MIN_BINS: - smooth_window = int( + option_positions = np.arange(option_count, dtype=np.float64) + if option_count <= 1: + kde_x = np.array([0.5], dtype=np.float64) + kde_y = np.array([float(option_counts.sum())], dtype=np.float64) + else: + kde_sample_count = int( min( - FILTER_PLOT_SMOOTH_MAX_WINDOW, - max( - FILTER_PLOT_SMOOTH_MIN_WINDOW, - plot_bin_count // FILTER_PLOT_SMOOTH_BIN_DIVISOR, - ), + FILTER_PLOT_KDE_MAX_SAMPLES, + max(FILTER_PLOT_KDE_MIN_SAMPLES, option_count * 2), ) ) - if smooth_window % 2 == 0: - smooth_window += 1 - kernel = np.ones(smooth_window, dtype=np.float64) / float(smooth_window) - smoothed_counts = np.convolve(binned_counts, kernel, mode="same") - binned_option_centers = ( - plot_bin_edges[:-1].astype(np.float64) - + plot_bin_edges[1:].astype(np.float64) - - 1.0 - ) / 2.0 - y_max = float(max(1.0, float(np.max(smoothed_counts, initial=1.0)))) + kde_x = np.linspace(0.0, float(option_count - 1), num=kde_sample_count) + weights = option_counts.astype(np.float64, copy=False) + weight_sum = float(weights.sum()) + weighted_mean = float( + np.sum(option_positions * weights) / max(weight_sum, 1) + ) + weighted_var = float( + np.sum(weights * (option_positions - weighted_mean) ** 2) + / max(weight_sum, 1) + ) + weighted_std = float(max(0.0, np.sqrt(weighted_var))) + n_eff = float((weight_sum**2) / max(float(np.sum(weights**2)), 1.0)) + silverman_bw = 1.06 * weighted_std * (max(n_eff, 1.0) ** (-0.2)) + bandwidth = max( + FILTER_PLOT_KDE_MIN_BANDWIDTH, + silverman_bw if np.isfinite(silverman_bw) and silverman_bw > 0 else 0.0, + ) + # Numerically stable KDE-like smoothing in option-index space: + # smooth discrete option counts with a Gaussian kernel, then sample + # onto the denser x-grid via interpolation. + radius = int(max(1, np.ceil(3.0 * bandwidth))) + kernel_x = np.arange(-radius, radius + 1, dtype=np.float64) + kernel = np.exp(-0.5 * ((kernel_x / bandwidth) ** 2)) + kernel_sum = float(np.sum(kernel)) + if kernel_sum > 0: + kernel = kernel / kernel_sum + smoothed_full = np.convolve(weights, kernel, mode="full") + start = int((kernel.size - 1) // 2) + smoothed_weights = smoothed_full[start : start + option_count] + kde_y = np.interp(kde_x, option_positions, smoothed_weights) + kde_y = np.nan_to_num(kde_y, nan=0.0, posinf=0.0, neginf=0.0) + y_max = float(max(1.0, float(np.max(kde_y, initial=1.0)))) lower, upper = selected_range lower = max(x_min, min(float(lower), x_max)) @@ -838,16 +850,14 @@ def _sx_from_option_index(index: float) -> float: highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) line_points = " ".join( - f"{_sx_from_option_index(float(option_center)):.2f},{_sy(float(count)):.2f}" - for option_center, count in zip( - binned_option_centers, smoothed_counts, strict=False - ) + f"{_sx_from_option_index(float(option_index)):.2f},{_sy(float(count)):.2f}" + for option_index, count in zip(kde_x, kde_y, strict=False) ) area_points = ( - f"{_sx_from_option_index(float(binned_option_centers[0])):.2f}," + f"{_sx_from_option_index(float(kde_x[0])):.2f}," f"{plot_bottom:.2f} " f"{line_points} " - f"{_sx_from_option_index(float(binned_option_centers[-1])):.2f}," + f"{_sx_from_option_index(float(kde_x[-1])):.2f}," f"{plot_bottom:.2f}" ) threshold_line_html = "" @@ -860,8 +870,8 @@ def _sx_from_option_index(index: float) -> float: threshold_px = _sx(threshold_val) threshold_line_html = ( "" ) @@ -3846,6 +3856,10 @@ def show_widget_table( # noqa: C901, PLR0912, PLR0915 Use ``table_height`` (or ``table_max_height``) to override the default notebook table height. + + Row rendering follows pandas display limits. If the DataFrame is larger + than ``display.max_rows``, the widget table inserts a midpoint ellipsis + marker row (``…``) to indicate omitted rows. """ if backend is None: @@ -4764,10 +4778,41 @@ def _render_notebook_widget_output( self: CytoDataFrame_type, display_options: dict[str, Any] ) -> None: """Render ipywidgets controls and the notebook HTML table output.""" + if not self._custom_attrs["_widget_state"].get( + "filter_readout_css_injected", False + ): + display( + HTML( + "" + ) + ) + self._custom_attrs["_widget_state"]["filter_readout_css_injected"] = True + filter_sliders, filter_controls = self._build_filter_slider_controls() filter_control: Optional[Any] = None if len(filter_controls) == 1: - filter_control = filter_controls[0] + filter_control = widgets.VBox( + filter_controls, + layout=widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px", + align_items="stretch", + ), + ) elif len(filter_controls) >= MIN_POSITION_COMPONENTS: accordion_content = widgets.VBox( filter_controls, diff --git a/tests/test_frame.py b/tests/test_frame.py index 0203436..dc63537 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -7,6 +7,7 @@ import re import sys import types +import warnings from collections import OrderedDict from contextlib import nullcontext from importlib.machinery import ModuleSpec @@ -929,6 +930,7 @@ def test_filter_slider_rounds_labels_but_preserves_values() -> None: slider = cdf._ensure_filter_range_slider() assert isinstance(slider, widgets.SelectionRangeSlider) + assert "cdf-filter-range-slider" in getattr(slider, "_dom_classes", ()) options = list(slider.options) assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] @@ -973,7 +975,8 @@ def test_filter_slider_control_renders_threshold_line() -> None: assert isinstance(control, widgets.VBox) assert isinstance(control.children[0], widgets.HTML) assert "stroke='#dc2626'" in control.children[0].value - assert "y1='0'" in control.children[0].value + assert "y1='6.00'" in control.children[0].value + assert "y2='22.00'" in control.children[0].value def test_filter_slider_control_ignores_out_of_range_threshold( @@ -1053,7 +1056,7 @@ def test_filter_slider_threshold_aligns_with_selection_slider_domain() -> None: assert isinstance(control, widgets.VBox) assert isinstance(control.children[0], widgets.HTML) html = control.children[0].value - x_match = re.search(r"x1='([0-9.]+)' y1='0'", html) + x_match = re.search(r"x1='([0-9.]+)' y1='[0-9.]+'", html) assert x_match is not None x_val = float(x_match.group(1)) @@ -1079,6 +1082,35 @@ def test_filter_distribution_is_not_flat_for_clustered_values() -> None: assert max(ys) - min(ys) > 2.0 +def test_filter_distribution_avoids_runtime_warnings_for_large_ranges() -> None: + values = pd.Series( + np.concatenate( + [ + np.full(2000, 0.0), + np.full(1500, 1.0), + np.linspace(2.0, 5000.0, 2000), + ] + ) + ) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + html = CytoDataFrame._build_filter_distribution_html( + values=values, + selected_range=(0.0, 5000.0), + size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), + track_padding_px=( + FILTER_SLIDER_LABEL_WIDTH_PX, + FILTER_SLIDER_READOUT_WIDTH_PX, + ), + ) + + runtime_warnings = [ + warning for warning in caught if issubclass(warning.category, RuntimeWarning) + ] + assert html + assert not runtime_warnings + + def test_generate_html_removes_rows_outside_filter_range( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1440,7 +1472,10 @@ def capture_display(value: object) -> None: controls_row = container.children[0] assert isinstance(controls_row, widgets.HBox) assert len(controls_row.children) == 2 - filter_control = controls_row.children[1] + filter_wrapper = controls_row.children[1] + assert isinstance(filter_wrapper, widgets.VBox) + assert len(filter_wrapper.children) == 1 + filter_control = filter_wrapper.children[0] assert isinstance(filter_control, widgets.VBox) assert isinstance(filter_control.children[0], widgets.HTML) assert " Date: Sun, 15 Mar 2026 21:56:26 -0600 Subject: [PATCH 08/18] linting --- .pre-commit-config.yaml | 2 +- docs/src/examples/cytodataframe_at_a_glance.ipynb | 7 +++++-- docs/src/examples/cytodataframe_at_a_glance.py | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b6d40c4..96d9806 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: check-yaml - id: detect-private-key - repo: https://github.com/tox-dev/pyproject-fmt - rev: "v2.16.2" + rev: "v2.18.1" hooks: - id: pyproject-fmt - repo: https://github.com/codespell-project/codespell diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index 4b5644e..7f555fe 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -147,7 +147,7 @@ "frame = CytoDataFrame(\n", " data=f\"{jump_data_path}/BR00117006_shrunken.parquet\",\n", " data_context_dir=f\"{jump_data_path}/images/orig\",\n", - ")[ \n", + ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", " \"Cells_Number_Object_Number\",\n", @@ -2222,7 +2222,9 @@ " data=f\"{nuclear_speckles_path}/test_slide1_converted.parquet\",\n", " data_context_dir=f\"{nuclear_speckles_path}/images/plate1\",\n", " data_mask_context_dir=f\"{nuclear_speckles_path}/masks/plate1\",\n", - " display_options={\"filter_columns\": [\"Nuclei_Texture_Variance_DAPI_3_03_256\"],}\n", + " display_options={\n", + " \"filter_columns\": [\"Nuclei_Texture_Variance_DAPI_3_03_256\"],\n", + " },\n", ")[\n", " [\n", " \"Metadata_ImageNumber\",\n", @@ -2729,6 +2731,7 @@ "source": [ "%%time\n", "import pathlib\n", + "\n", "from cytodataframe.frame import CytoDataFrame\n", "\n", "cp_3d_path = \"../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation\"\n", diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py index 056101e..bae24a1 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.py +++ b/docs/src/examples/cytodataframe_at_a_glance.py @@ -41,7 +41,7 @@ frame = CytoDataFrame( data=f"{jump_data_path}/BR00117006_shrunken.parquet", data_context_dir=f"{jump_data_path}/images/orig", -)[ +)[ [ "Metadata_ImageNumber", "Cells_Number_Object_Number", @@ -314,7 +314,9 @@ data=f"{nuclear_speckles_path}/test_slide1_converted.parquet", data_context_dir=f"{nuclear_speckles_path}/images/plate1", data_mask_context_dir=f"{nuclear_speckles_path}/masks/plate1", - display_options={"filter_columns": ["Nuclei_Texture_Variance_DAPI_3_03_256"],} + display_options={ + "filter_columns": ["Nuclei_Texture_Variance_DAPI_3_03_256"], + }, )[ [ "Metadata_ImageNumber", @@ -378,6 +380,7 @@ # + # %%time import pathlib + from cytodataframe.frame import CytoDataFrame cp_3d_path = "../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation" From 95dfd0220323c055d883b9b8adf0945ffeed0f67 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 16 Mar 2026 10:29:10 -0600 Subject: [PATCH 09/18] address coderabbit review suggestions --- .../examples/cytodataframe_at_a_glance.ipynb | 160 +++++++++--------- pyproject.toml | 1 - src/cytodataframe/frame.py | 78 +++++++-- tests/test_frame.py | 135 ++++++++++++++- 4 files changed, 277 insertions(+), 97 deletions(-) diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index 7f555fe..4bc585c 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,8 +49,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.21 s, sys: 769 ms, total: 1.98 s\n", - "Wall time: 833 ms\n" + "CPU times: user 926 ms, sys: 654 ms, total: 1.58 s\n", + "Wall time: 533 ms\n" ] }, { @@ -68,12 +68,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8c47be4ce0e04398889e471bc0411116", + "model_id": "1552e7d06d3e4d12ba9d3aaf76261f9b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -172,8 +172,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.22 s, sys: 785 ms, total: 2 s\n", - "Wall time: 722 ms\n" + "CPU times: user 883 ms, sys: 567 ms, total: 1.45 s\n", + "Wall time: 520 ms\n" ] }, { @@ -191,12 +191,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d0f4bf88205a443ba9c11997721eaa36", + "model_id": "4f374e1ea2bb4c5899012f07da19426b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -289,8 +289,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.15 s, sys: 791 ms, total: 1.94 s\n", - "Wall time: 653 ms\n" + "CPU times: user 849 ms, sys: 586 ms, total: 1.43 s\n", + "Wall time: 466 ms\n" ] }, { @@ -308,12 +308,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3f25b56d7aae4807a3f18eb09b08d542", + "model_id": "9f9cd0404a764b269abbab0038525a56", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -407,8 +407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.29 s, sys: 786 ms, total: 2.08 s\n", - "Wall time: 813 ms\n" + "CPU times: user 931 ms, sys: 643 ms, total: 1.57 s\n", + "Wall time: 534 ms\n" ] }, { @@ -426,12 +426,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4aa80396237b4f35ac1b508247a98384", + "model_id": "0b4007d79acb4557acff8d8b828c102f", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -534,8 +534,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.15 s, sys: 822 ms, total: 1.98 s\n", - "Wall time: 655 ms\n" + "CPU times: user 891 ms, sys: 597 ms, total: 1.49 s\n", + "Wall time: 522 ms\n" ] }, { @@ -553,12 +553,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7cdec8feae504c218017f59f711dc664", + "model_id": "5ff287fd71f04ff0b085b92e201705a7", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=10, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -650,8 +650,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.25 s, sys: 828 ms, total: 2.08 s\n", - "Wall time: 750 ms\n" + "CPU times: user 839 ms, sys: 589 ms, total: 1.43 s\n", + "Wall time: 480 ms\n" ] }, { @@ -669,12 +669,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bd4f49db8e004688a0afbdb0a26c4a36", + "model_id": "08d49aa28f6243e2adfbdef73f705a31", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -768,8 +768,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.24 s, sys: 818 ms, total: 2.06 s\n", - "Wall time: 738 ms\n" + "CPU times: user 915 ms, sys: 601 ms, total: 1.52 s\n", + "Wall time: 535 ms\n" ] }, { @@ -787,12 +787,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4c4785df9ada458c959d5aa4832c2235", + "model_id": "87fba411f8e141538268ea3cd13aa213", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -885,8 +885,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.22 s, sys: 795 ms, total: 2.02 s\n", - "Wall time: 739 ms\n" + "CPU times: user 939 ms, sys: 625 ms, total: 1.56 s\n", + "Wall time: 566 ms\n" ] }, { @@ -904,12 +904,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e9877562d3ce41b2992468ea4fe41ec4", + "model_id": "93d3ec5f1cde47e5a76ad1d70ae46462", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1019,8 +1019,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 743 ms, sys: 111 ms, total: 854 ms\n", - "Wall time: 867 ms\n" + "CPU times: user 507 ms, sys: 88.2 ms, total: 595 ms\n", + "Wall time: 597 ms\n" ] }, { @@ -1038,12 +1038,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "45df8cdabd724032839d7b6cea92bcb3", + "model_id": "16a163f3b2be4c1bb9b60ef2bff27cc6", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1163,8 +1163,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.23 s, sys: 809 ms, total: 2.04 s\n", - "Wall time: 749 ms\n" + "CPU times: user 926 ms, sys: 635 ms, total: 1.56 s\n", + "Wall time: 527 ms\n" ] }, { @@ -1182,12 +1182,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d0dd79cd8f2c40daa1c630fae2879304", + "model_id": "023c01be857244b4b8c98bbd26ec5b77", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1304,8 +1304,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 426 ms, sys: 224 ms, total: 649 ms\n", - "Wall time: 292 ms\n" + "CPU times: user 313 ms, sys: 162 ms, total: 475 ms\n", + "Wall time: 220 ms\n" ] }, { @@ -1323,12 +1323,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0aa318c5e31a41abae2c2e518143978c", + "model_id": "8438389f9c2d40c2b640574ec7a18681", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1419,8 +1419,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 356 ms, sys: 244 ms, total: 600 ms\n", - "Wall time: 221 ms\n" + "CPU times: user 248 ms, sys: 156 ms, total: 404 ms\n", + "Wall time: 147 ms\n" ] }, { @@ -1438,12 +1438,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0a3db2d87f0449d0ae13a145f909f4dd", + "model_id": "f8dcbe0a18d647f19f64e6d35885c7b9", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1536,8 +1536,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 357 ms, sys: 233 ms, total: 590 ms\n", - "Wall time: 217 ms\n" + "CPU times: user 237 ms, sys: 134 ms, total: 372 ms\n", + "Wall time: 146 ms\n" ] }, { @@ -1555,12 +1555,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d0c0823a00ce4690a49064cd57bfa3ce", + "model_id": "374735b960b34759972232ca70fc868a", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1663,8 +1663,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.25 s, sys: 140 ms, total: 1.39 s\n", - "Wall time: 1.35 s\n" + "CPU times: user 835 ms, sys: 111 ms, total: 946 ms\n", + "Wall time: 920 ms\n" ] }, { @@ -1682,12 +1682,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3f10d5f6c4074113843636a5f05e1b89", + "model_id": "cbc0e6242da24d40a360356e6bcec1ab", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1807,8 +1807,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 167 ms, sys: 102 ms, total: 269 ms\n", - "Wall time: 109 ms\n" + "CPU times: user 121 ms, sys: 67.2 ms, total: 188 ms\n", + "Wall time: 85.1 ms\n" ] }, { @@ -1826,12 +1826,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a120c48c2cc241e9bfd9a454323c12f7", + "model_id": "a8478658cf2e4c8d902d8a936ecee71b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -1923,8 +1923,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 160 ms, sys: 90.4 ms, total: 251 ms\n", - "Wall time: 101 ms\n" + "CPU times: user 115 ms, sys: 60.2 ms, total: 175 ms\n", + "Wall time: 76.5 ms\n" ] }, { @@ -1942,12 +1942,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "740e21c8cbc24751a896b7933b2e9cab", + "model_id": "dc80db96d20348d7a1a7acd20e9c2ce7", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -2247,8 +2247,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 475 ms, sys: 287 ms, total: 762 ms\n", - "Wall time: 279 ms\n" + "CPU times: user 331 ms, sys: 204 ms, total: 536 ms\n", + "Wall time: 195 ms\n" ] }, { @@ -2266,12 +2266,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d0cc9f5544a149a89e07e2e8285e82db", + "model_id": "a2805f58ec94484b82f9fb8ce72e769b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -2377,8 +2377,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2 μs, sys: 1 μs, total: 3 μs\n", - "Wall time: 4.05 μs\n" + "CPU times: user 1e+03 ns, sys: 1 μs, total: 2 μs\n", + "Wall time: 3.1 μs\n" ] }, { @@ -2407,8 +2407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.2 s, sys: 281 ms, total: 1.48 s\n", - "Wall time: 1.31 s\n" + "CPU times: user 788 ms, sys: 173 ms, total: 961 ms\n", + "Wall time: 839 ms\n" ] }, { @@ -2426,12 +2426,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bcaa1e9a00924c60a3386173986b0cc5", + "model_id": "1190893591ac42d7a2b4477eed89dc6f", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', st…" + "VBox(children=(HBox(children=(IntSlider(value=50, continuous_update=False, description='Image adjustment:', la…" ] }, "metadata": {}, @@ -2563,14 +2563,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 8.35 ms, sys: 2.37 ms, total: 10.7 ms\n", - "Wall time: 13.6 ms\n" + "CPU times: user 6.41 ms, sys: 1.4 ms, total: 7.81 ms\n", + "Wall time: 11.2 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f5af6447f4a1405ea3a8b60476650c8d", + "model_id": "06218a18222045eebde314ec292c31c5", "version_major": 2, "version_minor": 0 }, @@ -2660,14 +2660,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 8.34 ms, sys: 2.12 ms, total: 10.5 ms\n", - "Wall time: 10.4 ms\n" + "CPU times: user 4.91 ms, sys: 1.33 ms, total: 6.24 ms\n", + "Wall time: 5.05 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "443bb9f1f00d48768142dc574f0bdd5e", + "model_id": "69bac242de1c4eb88699666b49d909a3", "version_major": 2, "version_minor": 0 }, diff --git a/pyproject.toml b/pyproject.toml index 78bf848..5c165fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,7 +130,6 @@ run.omit = [ formats = "ipynb,py:light" # specify where version replacement is performed - [tool.bandit] exclude_dirs = [ "tests" ] diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 2fd639d..2e7f078 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -66,6 +66,7 @@ FILTER_SLIDER_TOTAL_WIDTH_PX = 430 FILTER_SLIDER_LABEL_WIDTH_PX = 170 FILTER_SLIDER_READOUT_WIDTH_PX = 96 +MAX_FILTER_SLIDER_STOPS = 500 # Fine-grained track-bound alignment for the background distribution plot. # Positive values shift inward; negative values shift outward. FILTER_SLIDER_TRACK_LEFT_ADJUST_PX = 13 @@ -540,7 +541,7 @@ def _on_slider_change(self: CytoDataFrame_type, change: Dict[str, Any]) -> None: """ self._custom_attrs["_widget_state"]["scale"] = change["new"] - self._custom_attrs["_output"].clear_output(wait=True) + self._show_output_loading_indicator() # redraw output after adjustments to scale state self._render_output() @@ -585,9 +586,33 @@ def _on_filter_slider_change( state.setdefault("filter_ranges", {})[str(state["filter_column"])] = ( normalized_range ) - self._custom_attrs["_output"].clear_output(wait=True) + self._show_output_loading_indicator() self._render_output() + def _show_output_loading_indicator( + self: CytoDataFrame_type, + message: str = "Updating table...", + ) -> None: + """Render a lightweight loading indicator in the output area.""" + self._custom_attrs["_output"].clear_output(wait=True) + with self._custom_attrs["_output"]: + display( + HTML( + "" + "
" + "" + f"{message}" + "
" + ) + ) + def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: """Return configured filter columns, preserving user-specified order.""" display_options = self._custom_attrs.get("display_options", {}) or {} @@ -621,7 +646,7 @@ def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: selected_columns.append(matched) return selected_columns - def _ensure_filter_range_slider( + def _ensure_filter_range_slider( # noqa: PLR0915 self: CytoDataFrame_type, filter_col: Optional[Any] = None ) -> Optional[Any]: """Build or refresh one range slider for row filtering.""" @@ -665,11 +690,19 @@ def _ensure_filter_range_slider( state["filter_range"] = None return None + slider_values = unique_values + if len(unique_values) > MAX_FILTER_SLIDER_STOPS: + sample_idx = np.linspace( + 0, len(unique_values) - 1, num=MAX_FILTER_SLIDER_STOPS, dtype=int + ) + slider_values = [unique_values[idx] for idx in sample_idx] + # Guard against accidental duplicate picks if index rounding occurs. + slider_values = list(dict.fromkeys(slider_values)) options = [ - (self._format_filter_slider_label(value), value) for value in unique_values + (self._format_filter_slider_label(value), value) for value in slider_values ] - default_lower = unique_values[0] - default_upper = unique_values[-1] + default_lower = slider_values[0] + default_upper = slider_values[-1] selected_range = state["filter_ranges"].get(slider_key) if ( not isinstance(selected_range, tuple) @@ -4254,10 +4287,6 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 # gather indices which will be displayed based on pandas configuration display_indices = CytoDataFrame(data).get_displayed_rows() - display_indices = self._filter_display_indices_by_widget_range( - data=data, - display_indices=display_indices, - ) active_filter_columns = ( self._custom_attrs["_widget_state"].get("filter_columns") or [] ) @@ -4283,8 +4312,17 @@ def _generate_jupyter_dataframe_html( # noqa: C901, PLR0912, PLR0915 isinstance(active_filter_ranges.get(str(col)), tuple) for col in active_filter_columns ): - data = data.loc[display_indices] - display_indices = data.index.tolist() + full_filtered_indices = self._filter_display_indices_by_widget_range( + data=data, + display_indices=data.index.tolist(), + ) + data = data.loc[full_filtered_indices] + display_indices = CytoDataFrame(data).get_displayed_rows() + else: + display_indices = self._filter_display_indices_by_widget_range( + data=data, + display_indices=display_indices, + ) # gather bounding box columns for use below if self._custom_attrs["data_bounding_box"] is not None: @@ -4452,8 +4490,10 @@ def _render_output(self: CytoDataFrame_type) -> None: f"max-height:{table_height};'>" f"{html_content}" ) - - with self._custom_attrs["_output"]: + output_widget = self._custom_attrs["_output"] + if hasattr(output_widget, "clear_output"): + output_widget.clear_output(wait=True) + with output_widget: display(HTML(scroll_wrapped_html)) if "cyto-3d-image" in html_content and "data-volume" in html_content: display( @@ -4730,12 +4770,18 @@ def _render_cell( logger.debug("Failed to build trame snapshot HTML: %s", exc) return html_content - def _try_render_trame_widget_table( + def _try_render_trame_widget_table( # noqa: PLR0911 self: CytoDataFrame_type, debug: bool, display_options: dict[str, Any] ) -> bool: """Try rendering the trame widget table and return ``True`` on success.""" if debug: return False + configured_filter_columns = display_options.get("filter_columns") + if isinstance(configured_filter_columns, (list, tuple)): + if len(configured_filter_columns) > 0: + return False + elif configured_filter_columns: + return False force_trame = display_options.get("view") == "trame" auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] @@ -4827,6 +4873,7 @@ def _render_notebook_widget_output( accordion.selected_index = None filter_control = accordion controls: List[Any] = [self._custom_attrs["_scale_slider"]] + self._custom_attrs["_scale_slider"].layout = widgets.Layout(margin="10px 0 0 0") if filter_control is not None: controls.append(filter_control) controls_row = widgets.HBox(controls) @@ -4840,6 +4887,7 @@ def _render_notebook_widget_output( ] ) ) + self._show_output_loading_indicator(message="Loading table...") if bool(display_options.get("show_static_snapshot_details", True)): snapshot_html = self._generate_jupyter_dataframe_html() details_html = ( diff --git a/tests/test_frame.py b/tests/test_frame.py index dc63537..b923b26 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -25,6 +25,7 @@ FILTER_SLIDER_LABEL_WIDTH_PX, FILTER_SLIDER_READOUT_WIDTH_PX, FILTER_SLIDER_TOTAL_WIDTH_PX, + MAX_FILTER_SLIDER_STOPS, CytoDataFrame, ) from tests.utils import ( @@ -849,11 +850,16 @@ def test_slider_updates_state(monkeypatch: MonkeyPatch): # Track render calls using monkeypatch or a flag render_called = {} + loading_called = {} def mock_render_output() -> None: render_called["called"] = True + def mock_show_loading() -> None: + loading_called["called"] = True + monkeypatch.setattr(cdf, "_render_output", mock_render_output) + monkeypatch.setattr(cdf, "_show_output_loading_indicator", mock_show_loading) # Call the method manually cdf._on_slider_change(change) @@ -863,6 +869,7 @@ def mock_render_output() -> None: # Check if the render method was triggered assert render_called.get("called", False) + assert loading_called.get("called", False) def test_filter_slider_updates_state(monkeypatch: MonkeyPatch): @@ -873,15 +880,21 @@ def test_filter_slider_updates_state(monkeypatch: MonkeyPatch): ) cdf._custom_attrs["_widget_state"]["filter_column"] = "AreaShape_Area" render_called = {} + loading_called = {} def mock_render_output() -> None: render_called["called"] = True + def mock_show_loading() -> None: + loading_called["called"] = True + monkeypatch.setattr(cdf, "_render_output", mock_render_output) + monkeypatch.setattr(cdf, "_show_output_loading_indicator", mock_show_loading) cdf._on_filter_slider_change({"new": (1.5, 2.5)}) assert cdf._custom_attrs["_widget_state"]["filter_range"] == (1.5, 2.5) assert render_called.get("called", False) + assert loading_called.get("called", False) def test_filter_display_indices_by_widget_range() -> None: @@ -935,6 +948,23 @@ def test_filter_slider_rounds_labels_but_preserves_values() -> None: assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] +def test_filter_slider_caps_option_count_for_near_unique_values() -> None: + values = np.arange(MAX_FILTER_SLIDER_STOPS + 200, dtype=np.float64) + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": values}), + display_options={"filter_column": "FilterScore"}, + ) + + slider = cdf._ensure_filter_range_slider() + + assert isinstance(slider, widgets.SelectionRangeSlider) + options = list(slider.options) + assert len(options) == MAX_FILTER_SLIDER_STOPS + assert options[0][1] == float(values.min()) + assert options[-1][1] == float(values.max()) + assert slider.value == (float(values.min()), float(values.max())) + + def test_filter_distribution_constant_values_stays_centered() -> None: html = CytoDataFrame._build_filter_distribution_html( values=pd.Series([0.47, 0.47, 0.47, 0.47]), @@ -979,7 +1009,7 @@ def test_filter_slider_control_renders_threshold_line() -> None: assert "y2='22.00'" in control.children[0].value -def test_filter_slider_control_ignores_out_of_range_threshold( +def test_filter_slider_control_warns_and_clamps_out_of_range_threshold( caplog: pytest.LogCaptureFixture, ) -> None: cdf = CytoDataFrame( @@ -1141,6 +1171,40 @@ def test_generate_html_removes_rows_outside_filter_range( assert "drop-row" not in html +def test_generate_html_filters_full_frame_before_display_window( + monkeypatch: pytest.MonkeyPatch, +) -> None: + labels = [f"row-{idx}" for idx in range(20)] + scores = [0.0] * 20 + labels[10] = "middle-keep" + scores[10] = 5.0 + + cdf = CytoDataFrame( + pd.DataFrame({"Label": labels, "FilterScore": scores}), + display_options={"filter_column": "FilterScore"}, + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (4.9, 5.1) + cdf._custom_attrs["_widget_state"]["filter_columns"] = ["FilterScore"] + cdf._custom_attrs["_widget_state"]["filter_ranges"] = {"FilterScore": (4.9, 5.1)} + + options = { + "display.notebook_repr_html": True, + "display.max_rows": 8, + "display.min_rows": 4, + "display.max_columns": 10, + "display.show_dimensions": False, + } + monkeypatch.setattr("cytodataframe.frame.get_option", lambda name: options[name]) + monkeypatch.setattr("pandas.get_option", lambda name: options[name]) + + html = cdf._generate_jupyter_dataframe_html() + + assert "middle-keep" in html + assert "row-0" not in html + assert "row-19" not in html + + def test_get_3d_volume_from_cell_loads_3d_tiff(tmp_path: pathlib.Path) -> None: volume = np.arange(4 * 5 * 6, dtype=np.uint8).reshape(4, 5, 6) image_path = tmp_path / "volume.tiff" @@ -1422,6 +1486,46 @@ def fake_show_widget_table(column: str, **kwargs: object) -> str: assert displayed +def test_repr_html_trame_with_filter_columns_uses_notebook_widget_path( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame( + pd.DataFrame( + { + "Image_FileName_DNA": ["volume.tiff"], + "FilterScoreA": [1.0], + "FilterScoreB": [2.0], + } + ), + display_options={ + "view": "trame", + "filter_columns": ["FilterScoreA", "FilterScoreB"], + }, + ) + calls = {"show_widget_table": 0, "render_notebook": 0} + + monkeypatch.setattr( + cdf, "_find_3d_columns_for_display", lambda: ["Image_FileName_DNA"] + ) + + def fake_show_widget_table(**_kwargs: object) -> str: + calls["show_widget_table"] += 1 + return "widget_table" + + def fake_render_notebook_widget_output(**_kwargs: object) -> None: + calls["render_notebook"] += 1 + + monkeypatch.setattr(cdf, "show_widget_table", fake_show_widget_table) + monkeypatch.setattr( + cdf, "_render_notebook_widget_output", fake_render_notebook_widget_output + ) + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + + assert cdf._repr_html_() is None + assert calls["show_widget_table"] == 0 + assert calls["render_notebook"] == 1 + + def test_repr_html_2d_displays_static_snapshot_details( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -2123,6 +2227,35 @@ def capture_display(value: object) -> None: assert len(displayed) == 3 +def test_render_output_clears_output_before_display( + monkeypatch: pytest.MonkeyPatch, +) -> None: + cdf = CytoDataFrame(pd.DataFrame({"A": [1]})) + + class DummyOutput: + def __init__(self) -> None: + self.clear_calls: list[bool] = [] + + def clear_output(self, wait: bool = False) -> None: + self.clear_calls.append(wait) + + def __enter__(self) -> "DummyOutput": + return self + + def __exit__(self, exc_type, exc, tb) -> bool: # noqa: ANN001 + return False + + dummy_output = DummyOutput() + cdf._custom_attrs["_output"] = dummy_output + monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "") + monkeypatch.setattr("cytodataframe.frame.get_option", lambda _name: True) + monkeypatch.setattr("cytodataframe.frame.display", lambda _value: None) + + cdf._render_output() + + assert dummy_output.clear_calls == [True] + + def test_generate_trame_snapshot_html_paths(monkeypatch: pytest.MonkeyPatch): cdf = CytoDataFrame(pd.DataFrame({"Image_FileName_DNA": ["dna.tiff"]}, index=[0])) monkeypatch.setattr(cdf, "_generate_jupyter_dataframe_html", lambda: "
") From b5731b94d14cbf114f2a0a69c545da7f4ff51f52 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 16 Mar 2026 10:38:36 -0600 Subject: [PATCH 10/18] ignore notebook file imports --- docs/src/examples/cytodataframe_at_a_glance.ipynb | 8 +------- docs/src/examples/cytodataframe_at_a_glance.py | 6 ------ pyproject.toml | 1 + 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index 4bc585c..6d45908 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -2650,7 +2650,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "18ed220b", "metadata": { "lines_to_next_cell": 0 @@ -2730,12 +2730,6 @@ ], "source": [ "%%time\n", - "import pathlib\n", - "\n", - "from cytodataframe.frame import CytoDataFrame\n", - "\n", - "cp_3d_path = \"../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation\"\n", - "\n", "# read 3d images with segmentation masks and show the\n", "# segmentation masks are also 3D.\n", "cdf = CytoDataFrame(\n", diff --git a/docs/src/examples/cytodataframe_at_a_glance.py b/docs/src/examples/cytodataframe_at_a_glance.py index bae24a1..16d53cb 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.py +++ b/docs/src/examples/cytodataframe_at_a_glance.py @@ -379,12 +379,6 @@ cdf[["ImageNumber", "ObjectNumber", "FileName_Nuclei"]][:3] # + # %%time -import pathlib - -from cytodataframe.frame import CytoDataFrame - -cp_3d_path = "../../../tests/data/CP_tutorial_3D_noise_nuclei_segmentation" - # read 3d images with segmentation masks and show the # segmentation masks are also 3D. cdf = CytoDataFrame( diff --git a/pyproject.toml b/pyproject.toml index 5c165fa..a5f099e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ lint.select = [ lint.per-file-ignores."__init__.py" = [ "E402", "F401" ] lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ] lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ] +lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] # ignore typing rules for tests lint.per-file-ignores."tests/*" = [ "ANN201", "PLR0913", "PLR2004", "SIM105" ] From f91c1cf9acc96c91e1b6b59c188c3aceba815ffc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:42:54 +0000 Subject: [PATCH 11/18] [pre-commit.ci lite] apply automatic fixes --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5f099e..54d132f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,9 +110,9 @@ lint.select = [ ] # Ignore `E402` and `F401` (unused imports) in all `__init__.py` files lint.per-file-ignores."__init__.py" = [ "E402", "F401" ] +lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ] lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ] -lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] # ignore typing rules for tests lint.per-file-ignores."tests/*" = [ "ANN201", "PLR0913", "PLR2004", "SIM105" ] From a3304e00e431d1d4264f1af687e384e67bc9f9d5 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 16 Mar 2026 15:09:25 -0600 Subject: [PATCH 12/18] addressing copilot review suggestions --- .pre-commit-config.yaml | 2 +- .../examples/cytodataframe_at_a_glance.ipynb | 124 +++++++++--------- pyproject.toml | 2 +- src/cytodataframe/frame.py | 105 ++++++++++++--- tests/test_frame.py | 20 +++ 5 files changed, 171 insertions(+), 82 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96d9806..06b87a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: check-yaml - id: detect-private-key - repo: https://github.com/tox-dev/pyproject-fmt - rev: "v2.18.1" + rev: "v2.19.0" hooks: - id: pyproject-fmt - repo: https://github.com/codespell-project/codespell diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index 6d45908..771dd33 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,8 +49,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 926 ms, sys: 654 ms, total: 1.58 s\n", - "Wall time: 533 ms\n" + "CPU times: user 894 ms, sys: 627 ms, total: 1.52 s\n", + "Wall time: 630 ms\n" ] }, { @@ -68,7 +68,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1552e7d06d3e4d12ba9d3aaf76261f9b", + "model_id": "38505870868146fcaa12e5bfa6e62a49", "version_major": 2, "version_minor": 0 }, @@ -172,8 +172,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 883 ms, sys: 567 ms, total: 1.45 s\n", - "Wall time: 520 ms\n" + "CPU times: user 934 ms, sys: 653 ms, total: 1.59 s\n", + "Wall time: 527 ms\n" ] }, { @@ -191,7 +191,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4f374e1ea2bb4c5899012f07da19426b", + "model_id": "42ed914f31b244d2bc2c0ff6d1159f06", "version_major": 2, "version_minor": 0 }, @@ -289,8 +289,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 849 ms, sys: 586 ms, total: 1.43 s\n", - "Wall time: 466 ms\n" + "CPU times: user 884 ms, sys: 657 ms, total: 1.54 s\n", + "Wall time: 462 ms\n" ] }, { @@ -308,7 +308,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9f9cd0404a764b269abbab0038525a56", + "model_id": "1245983198f449ecb0e4bada8e376c5e", "version_major": 2, "version_minor": 0 }, @@ -407,8 +407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 931 ms, sys: 643 ms, total: 1.57 s\n", - "Wall time: 534 ms\n" + "CPU times: user 990 ms, sys: 657 ms, total: 1.65 s\n", + "Wall time: 591 ms\n" ] }, { @@ -426,7 +426,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0b4007d79acb4557acff8d8b828c102f", + "model_id": "d38aebaa28b94ba8bb5aca2247cca166", "version_major": 2, "version_minor": 0 }, @@ -534,8 +534,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 891 ms, sys: 597 ms, total: 1.49 s\n", - "Wall time: 522 ms\n" + "CPU times: user 917 ms, sys: 632 ms, total: 1.55 s\n", + "Wall time: 524 ms\n" ] }, { @@ -553,7 +553,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5ff287fd71f04ff0b085b92e201705a7", + "model_id": "351eb8c5ccd2464d860f9af188a22b27", "version_major": 2, "version_minor": 0 }, @@ -650,8 +650,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 839 ms, sys: 589 ms, total: 1.43 s\n", - "Wall time: 480 ms\n" + "CPU times: user 854 ms, sys: 634 ms, total: 1.49 s\n", + "Wall time: 466 ms\n" ] }, { @@ -669,7 +669,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "08d49aa28f6243e2adfbdef73f705a31", + "model_id": "36f2ba192a384c06a33aa36c0b99bb76", "version_major": 2, "version_minor": 0 }, @@ -768,8 +768,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 915 ms, sys: 601 ms, total: 1.52 s\n", - "Wall time: 535 ms\n" + "CPU times: user 949 ms, sys: 662 ms, total: 1.61 s\n", + "Wall time: 538 ms\n" ] }, { @@ -787,7 +787,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "87fba411f8e141538268ea3cd13aa213", + "model_id": "39fcd38d4e734262bd312782c93b67cc", "version_major": 2, "version_minor": 0 }, @@ -885,8 +885,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 939 ms, sys: 625 ms, total: 1.56 s\n", - "Wall time: 566 ms\n" + "CPU times: user 843 ms, sys: 501 ms, total: 1.34 s\n", + "Wall time: 531 ms\n" ] }, { @@ -904,7 +904,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "93d3ec5f1cde47e5a76ad1d70ae46462", + "model_id": "acb2a1051635423fb0afaa7b5b4f88a7", "version_major": 2, "version_minor": 0 }, @@ -1019,8 +1019,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 507 ms, sys: 88.2 ms, total: 595 ms\n", - "Wall time: 597 ms\n" + "CPU times: user 521 ms, sys: 101 ms, total: 622 ms\n", + "Wall time: 643 ms\n" ] }, { @@ -1038,7 +1038,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16a163f3b2be4c1bb9b60ef2bff27cc6", + "model_id": "f2745639dacc46419d01efb6fbbd8f68", "version_major": 2, "version_minor": 0 }, @@ -1163,8 +1163,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 926 ms, sys: 635 ms, total: 1.56 s\n", - "Wall time: 527 ms\n" + "CPU times: user 927 ms, sys: 643 ms, total: 1.57 s\n", + "Wall time: 534 ms\n" ] }, { @@ -1182,7 +1182,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "023c01be857244b4b8c98bbd26ec5b77", + "model_id": "dfbeede84e4449879606f2120e1665bf", "version_major": 2, "version_minor": 0 }, @@ -1304,8 +1304,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 313 ms, sys: 162 ms, total: 475 ms\n", - "Wall time: 220 ms\n" + "CPU times: user 253 ms, sys: 161 ms, total: 415 ms\n", + "Wall time: 152 ms\n" ] }, { @@ -1323,7 +1323,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8438389f9c2d40c2b640574ec7a18681", + "model_id": "3a09ae895aca44fc809f0d5b33ab933c", "version_major": 2, "version_minor": 0 }, @@ -1419,8 +1419,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 248 ms, sys: 156 ms, total: 404 ms\n", - "Wall time: 147 ms\n" + "CPU times: user 268 ms, sys: 191 ms, total: 460 ms\n", + "Wall time: 158 ms\n" ] }, { @@ -1438,7 +1438,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f8dcbe0a18d647f19f64e6d35885c7b9", + "model_id": "589bc1cfc01f405fa38a7fe7092540a6", "version_major": 2, "version_minor": 0 }, @@ -1536,8 +1536,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 237 ms, sys: 134 ms, total: 372 ms\n", - "Wall time: 146 ms\n" + "CPU times: user 313 ms, sys: 166 ms, total: 478 ms\n", + "Wall time: 210 ms\n" ] }, { @@ -1555,7 +1555,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "374735b960b34759972232ca70fc868a", + "model_id": "998614e49db64ca493c5e99312cac90d", "version_major": 2, "version_minor": 0 }, @@ -1663,8 +1663,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 835 ms, sys: 111 ms, total: 946 ms\n", - "Wall time: 920 ms\n" + "CPU times: user 838 ms, sys: 121 ms, total: 958 ms\n", + "Wall time: 926 ms\n" ] }, { @@ -1682,7 +1682,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cbc0e6242da24d40a360356e6bcec1ab", + "model_id": "9c478066ab4d4c0aaf5f64188beebf93", "version_major": 2, "version_minor": 0 }, @@ -1807,8 +1807,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 121 ms, sys: 67.2 ms, total: 188 ms\n", - "Wall time: 85.1 ms\n" + "CPU times: user 124 ms, sys: 76.7 ms, total: 201 ms\n", + "Wall time: 82.4 ms\n" ] }, { @@ -1826,7 +1826,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a8478658cf2e4c8d902d8a936ecee71b", + "model_id": "a463c66453ab48e1bce806761a99af24", "version_major": 2, "version_minor": 0 }, @@ -1923,8 +1923,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 115 ms, sys: 60.2 ms, total: 175 ms\n", - "Wall time: 76.5 ms\n" + "CPU times: user 176 ms, sys: 71.9 ms, total: 247 ms\n", + "Wall time: 127 ms\n" ] }, { @@ -1942,7 +1942,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dc80db96d20348d7a1a7acd20e9c2ce7", + "model_id": "f9d86dae1df44ffa81cd863576beef78", "version_major": 2, "version_minor": 0 }, @@ -2247,8 +2247,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 331 ms, sys: 204 ms, total: 536 ms\n", - "Wall time: 195 ms\n" + "CPU times: user 352 ms, sys: 225 ms, total: 577 ms\n", + "Wall time: 205 ms\n" ] }, { @@ -2266,7 +2266,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a2805f58ec94484b82f9fb8ce72e769b", + "model_id": "d51b047733644869b2bd990ca75b36a4", "version_major": 2, "version_minor": 0 }, @@ -2378,7 +2378,7 @@ "output_type": "stream", "text": [ "CPU times: user 1e+03 ns, sys: 1 μs, total: 2 μs\n", - "Wall time: 3.1 μs\n" + "Wall time: 4.05 μs\n" ] }, { @@ -2407,8 +2407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 788 ms, sys: 173 ms, total: 961 ms\n", - "Wall time: 839 ms\n" + "CPU times: user 780 ms, sys: 175 ms, total: 955 ms\n", + "Wall time: 829 ms\n" ] }, { @@ -2426,7 +2426,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1190893591ac42d7a2b4477eed89dc6f", + "model_id": "17f5856caae0429498fde367b4400a09", "version_major": 2, "version_minor": 0 }, @@ -2563,14 +2563,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.41 ms, sys: 1.4 ms, total: 7.81 ms\n", - "Wall time: 11.2 ms\n" + "CPU times: user 6.84 ms, sys: 2.52 ms, total: 9.36 ms\n", + "Wall time: 14.3 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "06218a18222045eebde314ec292c31c5", + "model_id": "2ffcd41a3fcf491eb8d2c154fdf159db", "version_major": 2, "version_minor": 0 }, @@ -2650,7 +2650,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "18ed220b", "metadata": { "lines_to_next_cell": 0 @@ -2660,14 +2660,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.91 ms, sys: 1.33 ms, total: 6.24 ms\n", - "Wall time: 5.05 ms\n" + "CPU times: user 4.88 ms, sys: 233 μs, total: 5.11 ms\n", + "Wall time: 5.15 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "69bac242de1c4eb88699666b49d909a3", + "model_id": "ad39440b032e41c5b1fa5bf265a80c81", "version_major": 2, "version_minor": 0 }, diff --git a/pyproject.toml b/pyproject.toml index a5f099e..54d132f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,9 +110,9 @@ lint.select = [ ] # Ignore `E402` and `F401` (unused imports) in all `__init__.py` files lint.per-file-ignores."__init__.py" = [ "E402", "F401" ] +lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] lint.per-file-ignores."src/cytodataframe/*.py" = [ "ANN401", "PLC0415" ] lint.per-file-ignores."src/cytodataframe/image.py" = [ "PLR2004" ] -lint.per-file-ignores."docs/src/examples/cytodataframe_at_a_glance.py" = [ "E402" ] # ignore typing rules for tests lint.per-file-ignores."tests/*" = [ "ANN201", "PLR0913", "PLR2004", "SIM105" ] diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 2e7f078..a0e795c 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -646,7 +646,7 @@ def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: selected_columns.append(matched) return selected_columns - def _ensure_filter_range_slider( # noqa: PLR0915 + def _ensure_filter_range_slider( # noqa: C901, PLR0915 self: CytoDataFrame_type, filter_col: Optional[Any] = None ) -> Optional[Any]: """Build or refresh one range slider for row filtering.""" @@ -713,11 +713,49 @@ def _ensure_filter_range_slider( # noqa: PLR0915 upper = max(float(selected_range[0]), float(selected_range[1])) lower = max(default_lower, min(lower, default_upper)) upper = max(lower, min(upper, default_upper)) + slider_domain = np.asarray(slider_values, dtype=np.float64) + + def _nearest_slider_index(target: float) -> int: + idx = int(np.searchsorted(slider_domain, target, side="left")) + if idx <= 0: + return 0 + if idx >= slider_domain.size: + return int(slider_domain.size - 1) + left_idx = idx - 1 + right_idx = idx + if abs(float(slider_domain[left_idx]) - target) <= abs( + float(slider_domain[right_idx]) - target + ): + return int(left_idx) + return int(right_idx) + + lower_idx = _nearest_slider_index(lower) + upper_idx = _nearest_slider_index(upper) + upper_idx = max(upper_idx, lower_idx) + lower = float(slider_domain[lower_idx]) + upper = float(slider_domain[upper_idx]) normalized_range = (lower, upper) state["filter_ranges"][slider_key] = normalized_range if str(state.get("filter_column")) == slider_key: state["filter_range"] = normalized_range + cached_sliders = self._custom_attrs.setdefault("_filter_range_sliders", {}) + existing_slider = cached_sliders.get(slider_key) + if isinstance(existing_slider, widgets.SelectionRangeSlider): + existing_slider.options = options + existing_slider.value = (lower, upper) + existing_slider.description = f"{filter_col}:" + existing_slider.continuous_update = False + existing_slider.style = { + "description_width": f"{FILTER_SLIDER_LABEL_WIDTH_PX}px" + } + existing_slider.layout = widgets.Layout( + width=f"{FILTER_SLIDER_TOTAL_WIDTH_PX}px" + ) + existing_slider.add_class(FILTER_SLIDER_CSS_CLASS) + existing_slider._cyto_filter_column = filter_col # type: ignore[attr-defined] + return existing_slider + slider = widgets.SelectionRangeSlider( options=options, value=(lower, upper), @@ -728,7 +766,7 @@ def _ensure_filter_range_slider( # noqa: PLR0915 ) slider.add_class(FILTER_SLIDER_CSS_CLASS) slider._cyto_filter_column = filter_col # type: ignore[attr-defined] - self._custom_attrs.setdefault("_filter_range_sliders", {})[slider_key] = slider + cached_sliders[slider_key] = slider return slider @staticmethod @@ -769,10 +807,11 @@ def _slider_relative_position(value: float, slider_domain: np.ndarray) -> float: return position @staticmethod - def _build_filter_distribution_html( # noqa: C901, PLR0915 + def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 values: pd.Series, selected_range: Tuple[float, float], threshold_x: Optional[float] = None, + slider_values: Optional[Sequence[float]] = None, size_px: Tuple[int, int] = (FILTER_SLIDER_TOTAL_WIDTH_PX, 96), track_padding_px: Tuple[int, int] = ( FILTER_SLIDER_LABEL_WIDTH_PX, @@ -786,21 +825,45 @@ def _build_filter_distribution_html( # noqa: C901, PLR0915 return "" values_array = numeric_values.to_numpy(dtype=np.float64, copy=False) - x_min = float(np.min(values_array)) - original_x_max = float(np.max(values_array)) - x_max = original_x_max - slider_domain = np.sort(np.unique(values_array)) + if slider_values is None: + slider_domain = np.sort(np.unique(values_array)) + if slider_domain.size > MAX_FILTER_SLIDER_STOPS: + sample_idx = np.linspace( + 0, + int(slider_domain.size) - 1, + num=MAX_FILTER_SLIDER_STOPS, + dtype=int, + ) + slider_domain = slider_domain[sample_idx] + slider_domain = np.unique(slider_domain) + else: + slider_domain = np.asarray(slider_values, dtype=np.float64) + slider_domain = np.sort(slider_domain[np.isfinite(slider_domain)]) + slider_domain = np.unique(slider_domain) + if slider_domain.size == 0: + slider_domain = np.sort(np.unique(values_array)) + x_min = float(slider_domain[0]) + x_max = float(slider_domain[-1]) if x_max == x_min: # Keep constant-value distributions centered in the track rather than # collapsing to the left edge. pad = max(abs(x_min) * 0.05, 1e-6) x_min = x_min - pad x_max = x_max + pad - # Build a smooth, KDE-like density in slider-option space so the curve - # aligns to scroll points without fixed value bins. - unique_vals, inverse_idx = np.unique(values_array, return_inverse=True) - option_counts = np.bincount(inverse_idx, minlength=unique_vals.size) - option_count = int(unique_vals.size) + + # Build a smooth, KDE-like density in slider-option space (bounded by the + # slider domain) so runtime remains stable for near-unique columns. + option_count = int(slider_domain.size) + if option_count == 1: + option_counts = np.array([int(values_array.size)], dtype=np.int64) + else: + domain_midpoints = (slider_domain[:-1] + slider_domain[1:]) / 2.0 + binned_indices = np.searchsorted( + domain_midpoints, + values_array, + side="right", + ) + option_counts = np.bincount(binned_indices, minlength=option_count) option_positions = np.arange(option_count, dtype=np.float64) if option_count <= 1: kde_x = np.array([0.5], dtype=np.float64) @@ -876,9 +939,9 @@ def _sy(value: float) -> float: return plot_bottom - (value / y_max * plot_h) def _sx_from_option_index(index: float) -> float: - if unique_vals.size <= 1: + if option_count <= 1: return plot_left + (0.5 * plot_w) - return plot_left + ((float(index) / float(unique_vals.size - 1)) * plot_w) + return plot_left + ((float(index) / float(option_count - 1)) * plot_w) highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) @@ -995,23 +1058,25 @@ def _resolve_filter_plot_threshold( if threshold < data_min: logger.warning( ( - "Ignoring filter plot threshold for column '%s': %s is outside " - "data range [%s, %s]." + "Clamping filter plot threshold for column '%s' from %s to %s " + "because it is outside data range [%s, %s]." ), filter_col, threshold, data_min, + data_min, data_max, ) return data_min if threshold > data_max: logger.warning( ( - "Ignoring filter plot threshold for column '%s': %s is outside " - "data range [%s, %s]." + "Clamping filter plot threshold for column '%s' from %s to %s " + "because it is outside data range [%s, %s]." ), filter_col, threshold, + data_max, data_min, data_max, ) @@ -1044,6 +1109,7 @@ def _build_filter_slider_control_for_column( values=self[filter_col], selected_range=(float(selected_range[0]), float(selected_range[1])), threshold_x=threshold, + slider_values=[float(option[1]) for option in slider.options], size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), track_padding_px=( FILTER_SLIDER_LABEL_WIDTH_PX, @@ -4777,11 +4843,14 @@ def _try_render_trame_widget_table( # noqa: PLR0911 if debug: return False configured_filter_columns = display_options.get("filter_columns") + configured_filter_column = display_options.get("filter_column") if isinstance(configured_filter_columns, (list, tuple)): if len(configured_filter_columns) > 0: return False elif configured_filter_columns: return False + if configured_filter_column: + return False force_trame = display_options.get("view") == "trame" auto_trame_for_3d = display_options.get("auto_trame_for_3d", True) columns_3d = self._find_3d_columns_for_display() if auto_trame_for_3d else [] diff --git a/tests/test_frame.py b/tests/test_frame.py index b923b26..b6a38f4 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -965,6 +965,26 @@ def test_filter_slider_caps_option_count_for_near_unique_values() -> None: assert slider.value == (float(values.min()), float(values.max())) +def test_filter_slider_reuses_cached_widget_instance() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}), + display_options={"filter_column": "FilterScore"}, + ) + + first_slider = cdf._ensure_filter_range_slider(filter_col="FilterScore") + assert isinstance(first_slider, widgets.SelectionRangeSlider) + first_options = list(first_slider.options) + assert first_options[-1][1] == 3.0 + + cdf.loc[:, "FilterScore"] = [1.0, 2.0, 4.0] + second_slider = cdf._ensure_filter_range_slider(filter_col="FilterScore") + + assert isinstance(second_slider, widgets.SelectionRangeSlider) + assert second_slider is first_slider + second_options = list(second_slider.options) + assert second_options[-1][1] == 4.0 + + def test_filter_distribution_constant_values_stays_centered() -> None: html = CytoDataFrame._build_filter_distribution_html( values=pd.Series([0.47, 0.47, 0.47, 0.47]), From 8be46320eb5f433cd448b1231891c2d1e811c2cf Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 17 Mar 2026 08:51:50 -0600 Subject: [PATCH 13/18] tighter kde plotting for y-axis visuals --- src/cytodataframe/frame.py | 80 +++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index a0e795c..9336cec 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -73,7 +73,13 @@ FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 13 FILTER_PLOT_KDE_MIN_SAMPLES = 60 FILTER_PLOT_KDE_MAX_SAMPLES = 180 -FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.7 +FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.25 +FILTER_PLOT_KDE_BANDWIDTH_SCALE = 0.4 +FILTER_PLOT_Y_SCALE_DEFAULT = "asinh" +FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT = 10.0 +FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT = 80.0 +FILTER_PLOT_Y_MAX_PERCENTILE_UPPER = 100.0 +FILTER_PLOT_Y_GAMMA_DEFAULT = 0.6 FILTER_SLIDER_CSS_CLASS = "cdf-filter-range-slider" # provide backwards compatibility for Self type in earlier Python versions. @@ -812,6 +818,10 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 selected_range: Tuple[float, float], threshold_x: Optional[float] = None, slider_values: Optional[Sequence[float]] = None, + y_scale: str = FILTER_PLOT_Y_SCALE_DEFAULT, + y_min_percentile: float = FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT, + y_max_percentile: float = FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT, + y_gamma: float = FILTER_PLOT_Y_GAMMA_DEFAULT, size_px: Tuple[int, int] = (FILTER_SLIDER_TOTAL_WIDTH_PX, 96), track_padding_px: Tuple[int, int] = ( FILTER_SLIDER_LABEL_WIDTH_PX, @@ -890,7 +900,11 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 silverman_bw = 1.06 * weighted_std * (max(n_eff, 1.0) ** (-0.2)) bandwidth = max( FILTER_PLOT_KDE_MIN_BANDWIDTH, - silverman_bw if np.isfinite(silverman_bw) and silverman_bw > 0 else 0.0, + ( + silverman_bw * FILTER_PLOT_KDE_BANDWIDTH_SCALE + if np.isfinite(silverman_bw) and silverman_bw > 0 + else 0.0 + ), ) # Numerically stable KDE-like smoothing in option-index space: # smooth discrete option counts with a Gaussian kernel, then sample @@ -906,7 +920,39 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 smoothed_weights = smoothed_full[start : start + option_count] kde_y = np.interp(kde_x, option_positions, smoothed_weights) kde_y = np.nan_to_num(kde_y, nan=0.0, posinf=0.0, neginf=0.0) - y_max = float(max(1.0, float(np.max(kde_y, initial=1.0)))) + y_scale_normalized = str(y_scale).strip().lower() + if y_scale_normalized == "asinh": + plot_y = np.arcsinh(np.maximum(kde_y, 0.0)) + elif y_scale_normalized == "log": + plot_y = np.log1p(np.maximum(kde_y, 0.0)) + elif y_scale_normalized == "sqrt": + plot_y = np.sqrt(np.maximum(kde_y, 0.0)) + else: + plot_y = np.maximum(kde_y, 0.0) + + min_pct = float(y_min_percentile) + max_pct = float(y_max_percentile) + if ( + 0.0 <= min_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER + and 0.0 < max_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER + and min_pct < max_pct + ): + y_floor = float(np.percentile(plot_y, min_pct)) + y_cap = float(np.percentile(plot_y, max_pct)) + if y_cap > y_floor: + plot_y = np.clip(plot_y, y_floor, y_cap) - y_floor + else: + plot_y = np.maximum(plot_y - y_floor, 0.0) + + gamma = float(y_gamma) + if gamma > 0 and gamma != 1.0: + plot_y = np.power(np.maximum(plot_y, 0.0), gamma) + + if 0.0 < max_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER: + y_max = float(np.percentile(plot_y, max_pct)) + else: + y_max = float(np.max(plot_y, initial=1.0)) + y_max = float(max(1e-9, y_max)) lower, upper = selected_range lower = max(x_min, min(float(lower), x_max)) @@ -946,8 +992,9 @@ def _sx_from_option_index(index: float) -> float: highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) line_points = " ".join( - f"{_sx_from_option_index(float(option_index)):.2f},{_sy(float(count)):.2f}" - for option_index, count in zip(kde_x, kde_y, strict=False) + f"{_sx_from_option_index(float(option_index)):.2f}," + f"{_sy(float(count)):.2f}" + for option_index, count in zip(kde_x, plot_y, strict=False) ) area_points = ( f"{_sx_from_option_index(float(kde_x[0])):.2f}," @@ -1110,6 +1157,29 @@ def _build_filter_slider_control_for_column( selected_range=(float(selected_range[0]), float(selected_range[1])), threshold_x=threshold, slider_values=[float(option[1]) for option in slider.options], + y_scale=str( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_scale", FILTER_PLOT_Y_SCALE_DEFAULT + ) + ), + y_min_percentile=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_min_percentile", + FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT, + ) + ), + y_max_percentile=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_percentile", + FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT, + ) + ), + y_gamma=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_gamma", + FILTER_PLOT_Y_GAMMA_DEFAULT, + ) + ), size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), track_padding_px=( FILTER_SLIDER_LABEL_WIDTH_PX, From 2a61bedda8d0060cf4900158e729d26a4f3ce50e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:56:41 +0000 Subject: [PATCH 14/18] [pre-commit.ci lite] apply automatic fixes --- src/cytodataframe/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 9336cec..ea4207f 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -992,8 +992,7 @@ def _sx_from_option_index(index: float) -> float: highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) line_points = " ".join( - f"{_sx_from_option_index(float(option_index)):.2f}," - f"{_sy(float(count)):.2f}" + f"{_sx_from_option_index(float(option_index)):.2f},{_sy(float(count)):.2f}" for option_index, count in zip(kde_x, plot_y, strict=False) ) area_points = ( From 928f4ef5aeeb6b4d84782e94cd77bdbc6b5044aa Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 17 Mar 2026 08:59:27 -0600 Subject: [PATCH 15/18] address coderabbit comment --- src/cytodataframe/frame.py | 13 ++++++++----- tests/test_frame.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 9336cec..e6c9081 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -12,7 +12,7 @@ import tempfile import uuid import warnings -from collections import OrderedDict +from collections import Counter, OrderedDict from io import BytesIO, StringIO from typing import ( Any, @@ -1272,10 +1272,13 @@ def _filter_display_indices_by_widget_range( in_range = numeric_values[ (numeric_values >= lower) & (numeric_values <= upper) ] - allowed = set(in_range.index.tolist()) - active_indices = [ - row_label for row_label in active_indices if row_label in allowed - ] + allowed_counts = Counter(in_range.index.tolist()) + filtered_indices: List[Any] = [] + for row_label in active_indices: + if allowed_counts[row_label] > 0: + filtered_indices.append(row_label) + allowed_counts[row_label] -= 1 + active_indices = filtered_indices return active_indices diff --git a/tests/test_frame.py b/tests/test_frame.py index b6a38f4..bd630f1 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -934,6 +934,20 @@ def test_filter_display_indices_by_widget_range_multiple_columns() -> None: assert filtered == [1, 2] +def test_filter_display_indices_by_widget_range_preserves_duplicate_labels() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}, index=[0, 0, 1]) + ) + cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" + cdf._custom_attrs["_widget_state"]["filter_range"] = (0.5, 2.5) + + filtered = cdf._filter_display_indices_by_widget_range( + data=cdf, display_indices=[0, 0, 0, 1] + ) + + assert filtered == [0, 0] + + def test_filter_slider_rounds_labels_but_preserves_values() -> None: cdf = CytoDataFrame( pd.DataFrame({"FilterScore": [0.0123, 0.456, 9.87]}), From 4251f1f5b8068d04d6f06238874a0f1a702797c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 15:04:46 +0000 Subject: [PATCH 16/18] [pre-commit.ci lite] apply automatic fixes --- tests/test_frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index bd630f1..6913daa 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -935,9 +935,7 @@ def test_filter_display_indices_by_widget_range_multiple_columns() -> None: def test_filter_display_indices_by_widget_range_preserves_duplicate_labels() -> None: - cdf = CytoDataFrame( - pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}, index=[0, 0, 1]) - ) + cdf = CytoDataFrame(pd.DataFrame({"FilterScore": [1.0, 2.0, 3.0]}, index=[0, 0, 1])) cdf._custom_attrs["_widget_state"]["filter_column"] = "FilterScore" cdf._custom_attrs["_widget_state"]["filter_range"] = (0.5, 2.5) From 6f6e604d7cf3f92779b806edede548b6675de096 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 17 Mar 2026 09:09:28 -0600 Subject: [PATCH 17/18] address coderabbit comments --- src/cytodataframe/frame.py | 4 +++- tests/test_frame.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 07eae89..959b97c 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -624,8 +624,10 @@ def _get_filter_slider_columns(self: CytoDataFrame_type) -> List[Any]: display_options = self._custom_attrs.get("display_options", {}) or {} configured_many = display_options.get("filter_columns") configured_single = display_options.get("filter_column") + if isinstance(configured_many, (list, tuple)) and len(configured_many) == 0: + configured_many = None configured: List[Any] = [] - if isinstance(configured_many, (list, tuple)): + if isinstance(configured_many, (list, tuple)) and len(configured_many) > 0: configured.extend(configured_many) elif configured_many is not None: configured.append(configured_many) diff --git a/tests/test_frame.py b/tests/test_frame.py index bd630f1..4405850 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -962,6 +962,17 @@ def test_filter_slider_rounds_labels_but_preserves_values() -> None: assert options == [("0.01", 0.0123), ("0.46", 0.456), ("9.87", 9.87)] +def test_get_filter_slider_columns_falls_back_to_single_when_many_is_empty() -> None: + cdf = CytoDataFrame( + pd.DataFrame({"FilterScore": [1.0, 2.0]}), + display_options={"filter_columns": [], "filter_column": "FilterScore"}, + ) + + columns = cdf._get_filter_slider_columns() + + assert columns == ["FilterScore"] + + def test_filter_slider_caps_option_count_for_near_unique_values() -> None: values = np.arange(MAX_FILTER_SLIDER_STOPS + 200, dtype=np.float64) cdf = CytoDataFrame( From 2c7f2e67ff55ff534fb02495d868352b16273ec9 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 17 Mar 2026 13:30:56 -0600 Subject: [PATCH 18/18] updates from jennas review Co-Authored-By: Jenna Tomkinson <107513215+jenna-tomkinson@users.noreply.github.com> --- .../examples/cytodataframe_at_a_glance.ipynb | 122 +++++++++--------- src/cytodataframe/frame.py | 52 ++++++-- tests/test_frame.py | 4 + 3 files changed, 103 insertions(+), 75 deletions(-) diff --git a/docs/src/examples/cytodataframe_at_a_glance.ipynb b/docs/src/examples/cytodataframe_at_a_glance.ipynb index 771dd33..7b07009 100644 --- a/docs/src/examples/cytodataframe_at_a_glance.ipynb +++ b/docs/src/examples/cytodataframe_at_a_glance.ipynb @@ -49,8 +49,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 894 ms, sys: 627 ms, total: 1.52 s\n", - "Wall time: 630 ms\n" + "CPU times: user 919 ms, sys: 636 ms, total: 1.56 s\n", + "Wall time: 554 ms\n" ] }, { @@ -68,7 +68,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "38505870868146fcaa12e5bfa6e62a49", + "model_id": "b4a17a473aca47ffaedfde71d4647b65", "version_major": 2, "version_minor": 0 }, @@ -172,8 +172,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 934 ms, sys: 653 ms, total: 1.59 s\n", - "Wall time: 527 ms\n" + "CPU times: user 891 ms, sys: 588 ms, total: 1.48 s\n", + "Wall time: 516 ms\n" ] }, { @@ -191,7 +191,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "42ed914f31b244d2bc2c0ff6d1159f06", + "model_id": "a0484aa10f164b59a4e96e0571d0dab4", "version_major": 2, "version_minor": 0 }, @@ -289,8 +289,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 884 ms, sys: 657 ms, total: 1.54 s\n", - "Wall time: 462 ms\n" + "CPU times: user 868 ms, sys: 633 ms, total: 1.5 s\n", + "Wall time: 466 ms\n" ] }, { @@ -308,7 +308,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1245983198f449ecb0e4bada8e376c5e", + "model_id": "aa29092edf7e45899f5223330f7bcf4d", "version_major": 2, "version_minor": 0 }, @@ -407,8 +407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 990 ms, sys: 657 ms, total: 1.65 s\n", - "Wall time: 591 ms\n" + "CPU times: user 930 ms, sys: 646 ms, total: 1.58 s\n", + "Wall time: 536 ms\n" ] }, { @@ -426,7 +426,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d38aebaa28b94ba8bb5aca2247cca166", + "model_id": "98737647c2334289b67261b60bad7774", "version_major": 2, "version_minor": 0 }, @@ -534,8 +534,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 917 ms, sys: 632 ms, total: 1.55 s\n", - "Wall time: 524 ms\n" + "CPU times: user 913 ms, sys: 661 ms, total: 1.57 s\n", + "Wall time: 502 ms\n" ] }, { @@ -553,7 +553,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "351eb8c5ccd2464d860f9af188a22b27", + "model_id": "57cc8f5611a1442ca8b54cee04f85ede", "version_major": 2, "version_minor": 0 }, @@ -650,8 +650,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 854 ms, sys: 634 ms, total: 1.49 s\n", - "Wall time: 466 ms\n" + "CPU times: user 769 ms, sys: 481 ms, total: 1.25 s\n", + "Wall time: 468 ms\n" ] }, { @@ -669,7 +669,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "36f2ba192a384c06a33aa36c0b99bb76", + "model_id": "bd646e0f7bcc46348a62532baafdac12", "version_major": 2, "version_minor": 0 }, @@ -768,8 +768,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 949 ms, sys: 662 ms, total: 1.61 s\n", - "Wall time: 538 ms\n" + "CPU times: user 951 ms, sys: 652 ms, total: 1.6 s\n", + "Wall time: 543 ms\n" ] }, { @@ -787,7 +787,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "39fcd38d4e734262bd312782c93b67cc", + "model_id": "51dcf433475b4b6c96851aad825def8f", "version_major": 2, "version_minor": 0 }, @@ -885,7 +885,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 843 ms, sys: 501 ms, total: 1.34 s\n", + "CPU times: user 935 ms, sys: 661 ms, total: 1.6 s\n", "Wall time: 531 ms\n" ] }, @@ -904,7 +904,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "acb2a1051635423fb0afaa7b5b4f88a7", + "model_id": "e5ecbebda5784d219b52915e78ec9286", "version_major": 2, "version_minor": 0 }, @@ -1019,8 +1019,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 521 ms, sys: 101 ms, total: 622 ms\n", - "Wall time: 643 ms\n" + "CPU times: user 508 ms, sys: 94.3 ms, total: 603 ms\n", + "Wall time: 629 ms\n" ] }, { @@ -1038,7 +1038,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f2745639dacc46419d01efb6fbbd8f68", + "model_id": "d97cd74ad34441a5b7e09b7334a05496", "version_major": 2, "version_minor": 0 }, @@ -1163,8 +1163,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 927 ms, sys: 643 ms, total: 1.57 s\n", - "Wall time: 534 ms\n" + "CPU times: user 754 ms, sys: 305 ms, total: 1.06 s\n", + "Wall time: 529 ms\n" ] }, { @@ -1182,7 +1182,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dfbeede84e4449879606f2120e1665bf", + "model_id": "d4e108120e0949fc9fdf4831aa91cf70", "version_major": 2, "version_minor": 0 }, @@ -1304,8 +1304,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 253 ms, sys: 161 ms, total: 415 ms\n", - "Wall time: 152 ms\n" + "CPU times: user 296 ms, sys: 130 ms, total: 425 ms\n", + "Wall time: 214 ms\n" ] }, { @@ -1323,7 +1323,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3a09ae895aca44fc809f0d5b33ab933c", + "model_id": "6592e9bfe80043bc8759a66d9cc9ace6", "version_major": 2, "version_minor": 0 }, @@ -1419,8 +1419,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 268 ms, sys: 191 ms, total: 460 ms\n", - "Wall time: 158 ms\n" + "CPU times: user 265 ms, sys: 180 ms, total: 445 ms\n", + "Wall time: 161 ms\n" ] }, { @@ -1438,7 +1438,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "589bc1cfc01f405fa38a7fe7092540a6", + "model_id": "5daa5677bb0b4668a2bec808e8818621", "version_major": 2, "version_minor": 0 }, @@ -1536,8 +1536,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 313 ms, sys: 166 ms, total: 478 ms\n", - "Wall time: 210 ms\n" + "CPU times: user 245 ms, sys: 152 ms, total: 396 ms\n", + "Wall time: 149 ms\n" ] }, { @@ -1555,7 +1555,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "998614e49db64ca493c5e99312cac90d", + "model_id": "e89cffe27d954fb18cdb86a76dc9eefa", "version_major": 2, "version_minor": 0 }, @@ -1663,8 +1663,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 838 ms, sys: 121 ms, total: 958 ms\n", - "Wall time: 926 ms\n" + "CPU times: user 848 ms, sys: 119 ms, total: 967 ms\n", + "Wall time: 933 ms\n" ] }, { @@ -1682,7 +1682,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9c478066ab4d4c0aaf5f64188beebf93", + "model_id": "a2a3606cec214cf481faab9c094f25c4", "version_major": 2, "version_minor": 0 }, @@ -1807,8 +1807,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 124 ms, sys: 76.7 ms, total: 201 ms\n", - "Wall time: 82.4 ms\n" + "CPU times: user 86.6 ms, sys: 21.9 ms, total: 108 ms\n", + "Wall time: 90 ms\n" ] }, { @@ -1826,7 +1826,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a463c66453ab48e1bce806761a99af24", + "model_id": "d47761e9fb2b487b834b2d326cb62fd3", "version_major": 2, "version_minor": 0 }, @@ -1923,8 +1923,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 176 ms, sys: 71.9 ms, total: 247 ms\n", - "Wall time: 127 ms\n" + "CPU times: user 116 ms, sys: 61.3 ms, total: 177 ms\n", + "Wall time: 74.9 ms\n" ] }, { @@ -1942,7 +1942,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f9d86dae1df44ffa81cd863576beef78", + "model_id": "5516c496b742465fb5cdb752cbc94592", "version_major": 2, "version_minor": 0 }, @@ -2247,8 +2247,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 352 ms, sys: 225 ms, total: 577 ms\n", - "Wall time: 205 ms\n" + "CPU times: user 395 ms, sys: 224 ms, total: 619 ms\n", + "Wall time: 249 ms\n" ] }, { @@ -2266,7 +2266,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d51b047733644869b2bd990ca75b36a4", + "model_id": "ae2fd523eaf04609866badd5e3de2d12", "version_major": 2, "version_minor": 0 }, @@ -2377,8 +2377,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1e+03 ns, sys: 1 μs, total: 2 μs\n", - "Wall time: 4.05 μs\n" + "CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns\n", + "Wall time: 3.1 μs\n" ] }, { @@ -2407,8 +2407,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 780 ms, sys: 175 ms, total: 955 ms\n", - "Wall time: 829 ms\n" + "CPU times: user 820 ms, sys: 192 ms, total: 1.01 s\n", + "Wall time: 890 ms\n" ] }, { @@ -2426,7 +2426,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "17f5856caae0429498fde367b4400a09", + "model_id": "e19fb7d40b74464dabceb6ef487c7fb2", "version_major": 2, "version_minor": 0 }, @@ -2563,14 +2563,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.84 ms, sys: 2.52 ms, total: 9.36 ms\n", - "Wall time: 14.3 ms\n" + "CPU times: user 5.68 ms, sys: 1.28 ms, total: 6.96 ms\n", + "Wall time: 8.21 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ffcd41a3fcf491eb8d2c154fdf159db", + "model_id": "183410200d274f21845a1cf5f2782a09", "version_major": 2, "version_minor": 0 }, @@ -2660,14 +2660,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.88 ms, sys: 233 μs, total: 5.11 ms\n", - "Wall time: 5.15 ms\n" + "CPU times: user 5.63 ms, sys: 1.85 ms, total: 7.48 ms\n", + "Wall time: 6.12 ms\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ad39440b032e41c5b1fa5bf265a80c81", + "model_id": "f33dadf3b55e42adb775706372ca2412", "version_major": 2, "version_minor": 0 }, diff --git a/src/cytodataframe/frame.py b/src/cytodataframe/frame.py index 959b97c..937d0f1 100644 --- a/src/cytodataframe/frame.py +++ b/src/cytodataframe/frame.py @@ -73,13 +73,14 @@ FILTER_SLIDER_TRACK_RIGHT_INSET_PX = 13 FILTER_PLOT_KDE_MIN_SAMPLES = 60 FILTER_PLOT_KDE_MAX_SAMPLES = 180 -FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.25 -FILTER_PLOT_KDE_BANDWIDTH_SCALE = 0.4 +FILTER_PLOT_KDE_MIN_BANDWIDTH = 0.1 +FILTER_PLOT_KDE_BANDWIDTH_SCALE = 0.3 FILTER_PLOT_Y_SCALE_DEFAULT = "asinh" FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT = 10.0 FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT = 80.0 FILTER_PLOT_Y_MAX_PERCENTILE_UPPER = 100.0 FILTER_PLOT_Y_GAMMA_DEFAULT = 0.6 +FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT = 0.35 FILTER_SLIDER_CSS_CLASS = "cdf-filter-range-slider" # provide backwards compatibility for Self type in earlier Python versions. @@ -700,12 +701,12 @@ def _ensure_filter_range_slider( # noqa: C901, PLR0915 slider_values = unique_values if len(unique_values) > MAX_FILTER_SLIDER_STOPS: - sample_idx = np.linspace( - 0, len(unique_values) - 1, num=MAX_FILTER_SLIDER_STOPS, dtype=int - ) - slider_values = [unique_values[idx] for idx in sample_idx] - # Guard against accidental duplicate picks if index rounding occurs. - slider_values = list(dict.fromkeys(slider_values)) + slider_values = np.linspace( + unique_values[0], + unique_values[-1], + num=MAX_FILTER_SLIDER_STOPS, + dtype=np.float64, + ).tolist() options = [ (self._format_filter_slider_label(value), value) for value in slider_values ] @@ -824,6 +825,7 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 y_min_percentile: float = FILTER_PLOT_Y_MIN_PERCENTILE_DEFAULT, y_max_percentile: float = FILTER_PLOT_Y_MAX_PERCENTILE_DEFAULT, y_gamma: float = FILTER_PLOT_Y_GAMMA_DEFAULT, + y_tail_log_scale: float = FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT, size_px: Tuple[int, int] = (FILTER_SLIDER_TOTAL_WIDTH_PX, 96), track_padding_px: Tuple[int, int] = ( FILTER_SLIDER_LABEL_WIDTH_PX, @@ -942,7 +944,19 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 y_floor = float(np.percentile(plot_y, min_pct)) y_cap = float(np.percentile(plot_y, max_pct)) if y_cap > y_floor: - plot_y = np.clip(plot_y, y_floor, y_cap) - y_floor + shifted = np.maximum(plot_y - y_floor, 0.0) + cap_shifted = y_cap - y_floor + above_cap = shifted > cap_shifted + if np.any(above_cap): + tail_scale = max( + cap_shifted * float(y_tail_log_scale), + 1e-9, + ) + shifted[above_cap] = cap_shifted + ( + np.log1p((shifted[above_cap] - cap_shifted) / tail_scale) + * tail_scale + ) + plot_y = shifted else: plot_y = np.maximum(plot_y - y_floor, 0.0) @@ -950,10 +964,7 @@ def _build_filter_distribution_html( # noqa: C901, PLR0912, PLR0913, PLR0915 if gamma > 0 and gamma != 1.0: plot_y = np.power(np.maximum(plot_y, 0.0), gamma) - if 0.0 < max_pct < FILTER_PLOT_Y_MAX_PERCENTILE_UPPER: - y_max = float(np.percentile(plot_y, max_pct)) - else: - y_max = float(np.max(plot_y, initial=1.0)) + y_max = float(np.max(plot_y, initial=1.0)) y_max = float(max(1e-9, y_max)) lower, upper = selected_range @@ -989,7 +1000,14 @@ def _sy(value: float) -> float: def _sx_from_option_index(index: float) -> float: if option_count <= 1: return plot_left + (0.5 * plot_w) - return plot_left + ((float(index) / float(option_count - 1)) * plot_w) + value = float( + np.interp( + float(index), + option_positions, + slider_domain, + ) + ) + return _sx(value) highlight_x = _sx(lower) highlight_w = max(1.0, _sx(upper) - highlight_x) @@ -1181,6 +1199,12 @@ def _build_filter_slider_control_for_column( FILTER_PLOT_Y_GAMMA_DEFAULT, ) ), + y_tail_log_scale=float( + (self._custom_attrs.get("display_options", {}) or {}).get( + "filter_plot_y_tail_log_scale", + FILTER_PLOT_Y_TAIL_LOG_SCALE_DEFAULT, + ) + ), size_px=(FILTER_SLIDER_TOTAL_WIDTH_PX, 52), track_padding_px=( FILTER_SLIDER_LABEL_WIDTH_PX, diff --git a/tests/test_frame.py b/tests/test_frame.py index ff95a4a..d729715 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -985,6 +985,10 @@ def test_filter_slider_caps_option_count_for_near_unique_values() -> None: assert len(options) == MAX_FILTER_SLIDER_STOPS assert options[0][1] == float(values.min()) assert options[-1][1] == float(values.max()) + option_vals = np.array([float(option[1]) for option in options], dtype=np.float64) + deltas = np.diff(option_vals) + assert np.all(deltas > 0) + assert np.allclose(deltas, deltas[0], rtol=1e-6, atol=1e-12) assert slider.value == (float(values.min()), float(values.max()))