From 7da1e074e4f9770f4e764d3cedb5daf76b34cab6 Mon Sep 17 00:00:00 2001
From: Dennis Soong <dso2ng@gmail.com>
Date: Thu, 30 Apr 2026 23:04:49 +0000
Subject: [PATCH 1/8] fix: expose session lineage metadata in API (#1370)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #1358 added the client-side lineage collapse helper, but
/api/sessions often did not include _lineage_root_id for the WebUI
JSON sessions visible in the sidebar. In that case the helper has no
grouping key and multiple same-title continuation rows remain visible.

This PR:
- Reads parent_session_id and end_reason from state.db.sessions for
  the WebUI sidebar's session ids
- Walks the parent chain when end_reason is 'compression' or
  'cli_close', producing _lineage_root_id and _compression_segment_count
- Cycle-detects via a 'seen' set
- Preserves projected lineage metadata on imported/gateway session rows
- Allows sidebar collapse to group cross-surface continuation chains
  (CLI-close → WebUI continuation) while keeping non-continuation
  parent rows flat

Co-authored-by: Dennis Soong <dso2ng@gmail.com>
---
 api/agent_sessions.py                      |  65 +++++++++
 api/models.py                              |  33 ++++-
 tests/test_gateway_sync.py                 |   3 +
 tests/test_session_lineage_metadata_api.py | 156 +++++++++++++++++++++
 4 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_session_lineage_metadata_api.py

diff --git a/api/agent_sessions.py b/api/agent_sessions.py
index ba9ae718..f35c681f 100644
--- a/api/agent_sessions.py
+++ b/api/agent_sessions.py
@@ -255,3 +255,68 @@ def read_importable_agent_session_rows(
         if limit is None:
             return projected
         return projected[:max(0, int(limit))]
+
+
+
+def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[str]) -> dict[str, dict]:
+    """Return compression-lineage metadata for known WebUI sidebar sessions.
+
+    WebUI sessions are persisted as JSON files, but Hermes Agent also mirrors
+    them into ``state.db.sessions`` for insights/session history. Compression
+    and cross-surface continuation create parent chains there. ``/api/sessions``
+    needs to surface that lineage to the sidebar so client-side collapse can
+    group logical continuations without mutating or deleting any session files.
+
+    Missing DBs, old schemas, or incomplete rows degrade to an empty mapping.
+    """
+    wanted = {str(sid) for sid in (session_ids or []) if sid}
+    db_path = Path(db_path)
+    if not wanted or not db_path.exists():
+        return {}
+
+    try:
+        with sqlite3.connect(str(db_path)) as conn:
+            conn.row_factory = sqlite3.Row
+            cur = conn.cursor()
+            cur.execute("PRAGMA table_info(sessions)")
+            session_cols = {row[1] for row in cur.fetchall()}
+            if 'parent_session_id' not in session_cols or 'end_reason' not in session_cols:
+                return {}
+            cur.execute("SELECT id, parent_session_id, end_reason FROM sessions")
+            rows = {row['id']: dict(row) for row in cur.fetchall()}
+    except Exception:
+        return {}
+
+    metadata: dict[str, dict] = {}
+    for sid in wanted:
+        row = rows.get(sid)
+        if not row:
+            continue
+
+        parent_id = row.get('parent_session_id')
+        if parent_id:
+            metadata.setdefault(sid, {})['parent_session_id'] = parent_id
+
+        root_id = sid
+        current_id = sid
+        segment_count = 1
+        seen = {sid}
+        while True:
+            current = rows.get(current_id)
+            parent_id = current.get('parent_session_id') if current else None
+            parent = rows.get(parent_id) if parent_id else None
+            if not parent or parent_id in seen:
+                break
+            if parent.get('end_reason') not in {'compression', 'cli_close'}:
+                break
+            root_id = parent_id
+            current_id = parent_id
+            seen.add(parent_id)
+            segment_count += 1
+
+        if root_id != sid:
+            entry = metadata.setdefault(sid, {})
+            entry['_lineage_root_id'] = root_id
+            entry['_compression_segment_count'] = segment_count
+
+    return metadata
diff --git a/api/models.py b/api/models.py
index c6e1dba4..8e8d604d 100644
--- a/api/models.py
+++ b/api/models.py
@@ -15,7 +15,7 @@ from api.config import (
     get_effective_default_model, _get_session_agent_lock,
 )
 from api.workspace import get_last_workspace
-from api.agent_sessions import read_importable_agent_session_rows
+from api.agent_sessions import read_importable_agent_session_rows, read_session_lineage_metadata
 
 logger = logging.getLogger(__name__)
 
@@ -746,6 +746,31 @@ def _hide_from_default_sidebar(session: dict) -> bool:
     return source == 'cron' or sid.startswith('cron_')
 
 
+def _active_state_db_path() -> Path:
+    """Return state.db for the active Hermes profile, degrading to HERMES_HOME."""
+    try:
+        from api.profiles import get_active_hermes_home
+        hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
+    except Exception:
+        hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve()
+    return hermes_home / 'state.db'
+
+
+def _enrich_sidebar_lineage_metadata(sessions: list[dict]) -> None:
+    """Attach state.db compression lineage metadata used by sidebar collapse."""
+    try:
+        metadata = read_session_lineage_metadata(
+            _active_state_db_path(),
+            {s.get('session_id') for s in sessions},
+        )
+    except Exception:
+        return
+    for session in sessions:
+        sid = session.get('session_id')
+        if sid in metadata:
+            session.update(metadata[sid])
+
+
 def all_sessions():
     active_stream_ids = _active_stream_ids()
     # Phase C: try index first for O(1) read; fall back to full scan
@@ -804,6 +829,7 @@ def all_sessions():
             for s in result:
                 if not s.get('profile'):
                     s['profile'] = 'default'
+            _enrich_sidebar_lineage_metadata(result)
             return result
         except Exception:
             logger.debug("Failed to load session index, falling back to full scan")
@@ -832,6 +858,7 @@ def all_sessions():
     for s in result:
         if not s.get('profile'):
             s['profile'] = 'default'
+    _enrich_sidebar_lineage_metadata(result)
     return result
 
 
@@ -1015,6 +1042,10 @@ def get_cli_sessions() -> list:
                 'raw_source': row.get('raw_source'),
                 'session_source': row.get('session_source'),
                 'source_label': row.get('source_label'),
+                'parent_session_id': row.get('parent_session_id'),
+                '_lineage_root_id': row.get('_lineage_root_id'),
+                '_lineage_tip_id': row.get('_lineage_tip_id'),
+                '_compression_segment_count': row.get('_compression_segment_count'),
                 'is_cli_session': True,
             })
     except Exception as _cli_err:
diff --git a/tests/test_gateway_sync.py b/tests/test_gateway_sync.py
index 439e7095..d2dff359 100644
--- a/tests/test_gateway_sync.py
+++ b/tests/test_gateway_sync.py
@@ -333,6 +333,9 @@ def test_compression_chain_collapses_to_latest_tip_in_sidebar():
         # bubbles to the top by true recency, not by the root's stale activity.
         # tip messages are at t0+201 and t0+202, so last_activity = t0 + 202.
         assert abs(tip.get('updated_at') - (t0 + 202)) < 0.01
+        assert tip.get('_lineage_root_id') == 'chain_root_001'
+        assert tip.get('_lineage_tip_id') == 'chain_tip_001'
+        assert tip.get('_compression_segment_count') == 3
 
         from api.agent_sessions import read_importable_agent_session_rows
 
diff --git a/tests/test_session_lineage_metadata_api.py b/tests/test_session_lineage_metadata_api.py
new file mode 100644
index 00000000..fdf04b6d
--- /dev/null
+++ b/tests/test_session_lineage_metadata_api.py
@@ -0,0 +1,156 @@
+"""Regression tests for /api/sessions lineage metadata used by sidebar collapse."""
+
+import sqlite3
+import time
+
+import pytest
+
+import api.models as models
+from api.models import SESSIONS, STREAMS, Session, all_sessions
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    session_dir = tmp_path / "sessions"
+    session_dir.mkdir()
+    index_file = session_dir / "_index.json"
+    state_db = tmp_path / "state.db"
+    monkeypatch.setattr(models, "SESSION_DIR", session_dir)
+    monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
+    monkeypatch.setattr(models, "_active_state_db_path", lambda: state_db)
+    SESSIONS.clear()
+    STREAMS.clear()
+    yield state_db
+    SESSIONS.clear()
+    STREAMS.clear()
+
+
+def _ensure_state_db(path):
+    conn = sqlite3.connect(str(path))
+    conn.executescript(
+        """
+        CREATE TABLE sessions (
+            id TEXT PRIMARY KEY,
+            source TEXT,
+            title TEXT,
+            model TEXT,
+            started_at REAL NOT NULL,
+            message_count INTEGER DEFAULT 0,
+            parent_session_id TEXT,
+            ended_at REAL,
+            end_reason TEXT
+        );
+        """
+    )
+    return conn
+
+
+def _insert_state_row(conn, sid, *, parent=None, ended_at=None, end_reason=None, started_at=None):
+    conn.execute(
+        """
+        INSERT INTO sessions
+        (id, source, title, model, started_at, message_count, parent_session_id, ended_at, end_reason)
+        VALUES (?, 'webui', ?, 'openai/gpt-5', ?, 2, ?, ?, ?)
+        """,
+        (sid, sid, started_at or time.time(), parent, ended_at, end_reason),
+    )
+    conn.commit()
+
+
+def _save_webui_session(sid, *, title, updated_at):
+    session = Session(
+        session_id=sid,
+        title=title,
+        messages=[{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}],
+        updated_at=updated_at,
+    )
+    session.save(touch_updated_at=False)
+    return session
+
+
+def test_all_sessions_exposes_state_db_lineage_metadata_for_webui_json_sessions(_isolate):
+    """PR #1358 can only collapse rows when /api/sessions exposes lineage keys."""
+    conn = _ensure_state_db(_isolate)
+    t0 = time.time() - 100
+    try:
+        _save_webui_session("lineage_api_root", title="Hermes WebUI", updated_at=t0)
+        _save_webui_session("lineage_api_tip", title="Hermes WebUI #2", updated_at=t0 + 10)
+        _insert_state_row(
+            conn,
+            "lineage_api_root",
+            started_at=t0,
+            ended_at=t0 + 5,
+            end_reason="compression",
+        )
+        _insert_state_row(
+            conn,
+            "lineage_api_tip",
+            parent="lineage_api_root",
+            started_at=t0 + 6,
+        )
+
+        rows = {row["session_id"]: row for row in all_sessions()}
+
+        assert rows["lineage_api_tip"].get("parent_session_id") == "lineage_api_root"
+        assert rows["lineage_api_tip"].get("_lineage_root_id") == "lineage_api_root"
+        assert rows["lineage_api_tip"].get("_compression_segment_count") == 2
+        assert "_lineage_root_id" not in rows["lineage_api_root"]
+    finally:
+        conn.close()
+
+
+def test_non_compression_state_db_parent_does_not_create_sidebar_lineage(_isolate):
+    conn = _ensure_state_db(_isolate)
+    t0 = time.time() - 100
+    try:
+        _save_webui_session("lineage_api_plain_parent", title="Parent", updated_at=t0)
+        _save_webui_session("lineage_api_plain_child", title="Child", updated_at=t0 + 10)
+        _insert_state_row(
+            conn,
+            "lineage_api_plain_parent",
+            started_at=t0,
+            ended_at=t0 + 5,
+            end_reason="user_stop",
+        )
+        _insert_state_row(
+            conn,
+            "lineage_api_plain_child",
+            parent="lineage_api_plain_parent",
+            started_at=t0 + 6,
+        )
+
+        rows = {row["session_id"]: row for row in all_sessions()}
+
+        assert rows["lineage_api_plain_child"].get("parent_session_id") == "lineage_api_plain_parent"
+        assert "_lineage_root_id" not in rows["lineage_api_plain_child"]
+    finally:
+        conn.close()
+
+
+
+def test_cli_close_parent_preserves_cross_surface_continuation_lineage(_isolate):
+    conn = _ensure_state_db(_isolate)
+    t0 = time.time() - 100
+    try:
+        _save_webui_session("lineage_api_cli_parent", title="Hermes WebUI #8", updated_at=t0)
+        _save_webui_session("lineage_api_webui_child", title="Hermes WebUI #8", updated_at=t0 + 10)
+        _insert_state_row(
+            conn,
+            "lineage_api_cli_parent",
+            started_at=t0,
+            ended_at=t0 + 5,
+            end_reason="cli_close",
+        )
+        _insert_state_row(
+            conn,
+            "lineage_api_webui_child",
+            parent="lineage_api_cli_parent",
+            started_at=t0 + 6,
+        )
+
+        rows = {row["session_id"]: row for row in all_sessions()}
+
+        assert rows["lineage_api_webui_child"].get("parent_session_id") == "lineage_api_cli_parent"
+        assert rows["lineage_api_webui_child"].get("_lineage_root_id") == "lineage_api_cli_parent"
+    finally:
+        conn.close()

From 571cfed180aefcb4a649192dc66ded018a0f9ad1 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:06:37 +0000
Subject: [PATCH 2/8] release: v0.50.251 (#1370 perf fix + orphan-parent guard
 + regression suite)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bundles:
- #1370 fix: expose session lineage metadata in API (@dso2ng)

Pre-release fixes applied:

1) Perf: replaced full table scan with parameterized WHERE id IN (...)
   query. Original code did SELECT id, parent_session_id, end_reason
   FROM sessions on every sidebar refresh. Measured 9ms cached scan at
   1000 rows in production (up to ~450ms cold-cache); scales linearly.
   New approach hits PRIMARY KEY + idx_sessions_parent — 50x faster
   at 1000 rows, ~0.2ms regardless of total row count. Depth-bounded
   to 20 hops to cap query count under pathological data.

2) Orphan-parent guard: suppress parent_session_id in API output when
   the referenced parent row doesn't exist in state.db. The frontend's
   #1358 _sessionLineageKey falls through to parent_session_id when
   _lineage_root_id is missing — orphan references would create
   never-collapsing single-row groups in the sidebar.

3) Regression suite (5 tests in
   test_pr1370_lineage_metadata_perf_and_orphan.py):
   - Pins the no-full-scan invariant by intercepting all SQL queries
     and asserting no SELECT FROM sessions without a WHERE clause
   - Pins orphan-parent suppression
   - Pins cycle termination via threading.Event watchdog (2s timeout)
   - End-to-end test for 4-segment compression chain root resolution
   - Pins non-compression end_reason boundary stops walk
---
 CHANGELOG.md                                  |   5 +
 api/agent_sessions.py                         |  41 +++-
 ...pr1370_lineage_metadata_perf_and_orphan.py | 200 ++++++++++++++++++
 3 files changed, 243 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_pr1370_lineage_metadata_perf_and_orphan.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 793dfdb7..fe3d732e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## [Unreleased]
 
+## [v0.50.251] — 2026-04-30
+
+### Fixed
+- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan (`SELECT id, parent_session_id, end_reason FROM sessions`) for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** suppressed orphan `parent_session_id` references (parent row pruned/deleted) so the frontend's lineage helper doesn't cluster orphans into never-collapsing single-row groups. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370
+
 ## [v0.50.250] — 2026-04-30
 
 ### Fixed
diff --git a/api/agent_sessions.py b/api/agent_sessions.py
index f35c681f..a6222ff6 100644
--- a/api/agent_sessions.py
+++ b/api/agent_sessions.py
@@ -282,8 +282,37 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st
             session_cols = {row[1] for row in cur.fetchall()}
             if 'parent_session_id' not in session_cols or 'end_reason' not in session_cols:
                 return {}
-            cur.execute("SELECT id, parent_session_id, end_reason FROM sessions")
-            rows = {row['id']: dict(row) for row in cur.fetchall()}
+            # Scoped fetch via PRIMARY KEY + idx_sessions_parent rather than a
+            # full table scan. The sessions table grows unbounded over time
+            # (1000+ rows is normal, 10000+ for power users), and this function
+            # runs on every sidebar refresh — a full SELECT was ~50x slower
+            # than the indexed lookup at 1000 rows and scales linearly.
+            #
+            # Fetch the wanted ids first, then chase parent_session_id chains
+            # in batches until no new ids appear. Each batch hits PRIMARY KEY
+            # so it's effectively O(N) lookups.
+            rows: dict[str, dict] = {}
+            to_fetch = set(wanted)
+            # Cap walk depth to bound worst-case query count. Real lineage
+            # chains seen in production are <10 segments; anything longer is
+            # almost certainly pathological data and not worth chasing.
+            for _hop in range(20):
+                if not to_fetch:
+                    break
+                placeholders = ','.join('?' * len(to_fetch))
+                fetch_list = list(to_fetch)
+                to_fetch = set()
+                cur.execute(
+                    f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})",
+                    fetch_list,
+                )
+                for row in cur.fetchall():
+                    rows[row['id']] = dict(row)
+                # Queue up parents we haven't fetched yet.
+                for sid in fetch_list:
+                    parent_id = rows.get(sid, {}).get('parent_session_id')
+                    if parent_id and parent_id not in rows and parent_id not in to_fetch:
+                        to_fetch.add(parent_id)
     except Exception:
         return {}
 
@@ -294,7 +323,13 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st
             continue
 
         parent_id = row.get('parent_session_id')
-        if parent_id:
+        # Only expose parent_session_id when the parent actually exists in
+        # state.db. Orphan references (parent row was pruned/deleted) used to
+        # leak through and the frontend would treat them as a sidebar
+        # grouping key (#1358's _sessionLineageKey falls through to
+        # parent_session_id when _lineage_root_id is missing). Caught during
+        # pre-release review of v0.50.251.
+        if parent_id and parent_id in rows:
             metadata.setdefault(sid, {})['parent_session_id'] = parent_id
 
         root_id = sid
diff --git a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py
new file mode 100644
index 00000000..e605664f
--- /dev/null
+++ b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py
@@ -0,0 +1,200 @@
+"""Regression tests for the v0.50.251 pre-release fixes to PR #1370.
+
+PR #1370 originally did `SELECT id, parent_session_id, end_reason FROM
+sessions` (full table scan) on every call. At ~1000 rows the indexed
+lookup was 50x faster; the pre-release fix replaces the full scan with
+a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY +
+idx_sessions_parent.
+
+Also pins: orphan-parent references (parent_session_id points to a
+row that doesn't exist in state.db) must NOT be exposed in the API
+output, because #1358's frontend `_sessionLineageKey` falls through to
+`parent_session_id` when `_lineage_root_id` is missing — and would
+group orphans by a key no other session shares (cosmetic dead state).
+"""
+
+import os
+import sqlite3
+import time
+
+import pytest
+
+
+def _make_db(path):
+    conn = sqlite3.connect(str(path))
+    conn.executescript("""
+    CREATE TABLE sessions (
+        id TEXT PRIMARY KEY,
+        source TEXT,
+        title TEXT,
+        model TEXT,
+        started_at REAL NOT NULL,
+        message_count INTEGER DEFAULT 0,
+        parent_session_id TEXT,
+        ended_at REAL,
+        end_reason TEXT
+    );
+    CREATE INDEX idx_sessions_parent ON sessions(parent_session_id);
+    """)
+    return conn
+
+
+def _insert(conn, sid, *, parent=None, end_reason=None, started_at=None):
+    conn.execute(
+        "INSERT INTO sessions (id, source, title, model, started_at, parent_session_id, end_reason) "
+        "VALUES (?, 'webui', ?, 'openai/gpt-5', ?, ?, ?)",
+        (sid, sid, started_at or time.time(), parent, end_reason),
+    )
+    conn.commit()
+
+
+def test_does_not_full_scan_sessions_table(tmp_path, monkeypatch):
+    """The perf-critical invariant: must NOT do a `SELECT * FROM sessions`
+    or `SELECT id, parent_session_id, end_reason FROM sessions` without a
+    WHERE clause. Full scans regress sidebar refresh latency on power users
+    with 1000+ session rows.
+    """
+    from api import agent_sessions
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    # Insert 500 unrelated rows + 5 we care about
+    for i in range(500):
+        _insert(conn, f"unrelated_{i:04d}")
+    _insert(conn, "wanted_1")
+    _insert(conn, "wanted_2", parent="wanted_1", end_reason=None)
+    conn.close()
+
+    # Track every SQL the function issues
+    queries = []
+    real_connect = sqlite3.connect
+
+    class _TrackingConn:
+        def __init__(self, *args, **kw):
+            self._real = real_connect(*args, **kw)
+        def cursor(self):
+            return _TrackingCursor(self._real.cursor())
+        def __enter__(self):
+            return self
+        def __exit__(self, *a):
+            return self._real.__exit__(*a)
+        @property
+        def row_factory(self):
+            return self._real.row_factory
+        @row_factory.setter
+        def row_factory(self, v):
+            self._real.row_factory = v
+
+    class _TrackingCursor:
+        def __init__(self, real):
+            self._real = real
+        def execute(self, sql, *args):
+            queries.append(sql)
+            return self._real.execute(sql, *args)
+        def fetchall(self):
+            return self._real.fetchall()
+        def fetchone(self):
+            return self._real.fetchone()
+
+    monkeypatch.setattr(sqlite3, "connect", _TrackingConn)
+    agent_sessions.read_session_lineage_metadata(db, ["wanted_1", "wanted_2"])
+
+    # No query may select from sessions without a WHERE clause
+    bad = [q for q in queries
+           if "from sessions" in q.lower()
+           and "where" not in q.lower()
+           and "pragma" not in q.lower()]
+    assert not bad, (
+        f"read_session_lineage_metadata must scope its SELECTs to specific "
+        f"session ids (PR #1370 originally did a full scan). Found unscoped "
+        f"queries: {bad}"
+    )
+
+
+def test_orphan_parent_reference_not_exposed_in_metadata(tmp_path):
+    """If a session row references a parent that doesn't exist in state.db
+    (orphan), the API output must NOT include `parent_session_id` — because
+    #1358's frontend lineage helper would treat it as a sidebar grouping key
+    and cluster the orphan into a never-collapsing single-row group.
+    """
+    from api.agent_sessions import read_session_lineage_metadata
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    _insert(conn, "child", parent="missing_parent", end_reason=None)
+    conn.close()
+
+    result = read_session_lineage_metadata(db, ["child"])
+    assert "child" not in result or "parent_session_id" not in result["child"], (
+        f"Orphan parent_session_id leaked into API output: {result}. "
+        f"This causes the frontend lineage helper to group the orphan under "
+        f"a key no other session shares — cosmetic dead state on the wire."
+    )
+
+
+def test_cycle_in_parent_chain_terminates(tmp_path):
+    """Pathological data with a parent cycle (A→B→A) must not infinite-loop."""
+    from api.agent_sessions import read_session_lineage_metadata
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    _insert(conn, "A", parent="B", end_reason="compression")
+    _insert(conn, "B", parent="A", end_reason="compression")
+    conn.close()
+
+    import threading
+    done = threading.Event()
+    result_box = []
+
+    def worker():
+        result_box.append(read_session_lineage_metadata(db, ["A"]))
+        done.set()
+
+    t = threading.Thread(target=worker, daemon=True)
+    t.start()
+    finished = done.wait(timeout=2.0)
+    assert finished, "read_session_lineage_metadata hung on a parent cycle"
+
+    result = result_box[0]
+    # Cycle should terminate; segment count should be bounded (≤ 2 for A→B→A)
+    seg = result.get("A", {}).get("_compression_segment_count", 0)
+    assert 1 <= seg <= 5, f"Cycle should produce a small bounded count, got {seg}"
+
+
+def test_chain_walk_returns_correct_root_for_real_compression_chain(tmp_path):
+    """End-to-end behavioural test: 4-segment compression chain returns the
+    correct root and a segment count of 4."""
+    from api.agent_sessions import read_session_lineage_metadata
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    _insert(conn, "root", end_reason="compression")
+    _insert(conn, "seg2", parent="root", end_reason="compression")
+    _insert(conn, "seg3", parent="seg2", end_reason="compression")
+    _insert(conn, "tip", parent="seg3", end_reason=None)
+    conn.close()
+
+    result = read_session_lineage_metadata(db, ["tip"])
+    assert result["tip"]["_lineage_root_id"] == "root"
+    assert result["tip"]["_compression_segment_count"] == 4
+    assert result["tip"]["parent_session_id"] == "seg3"
+
+
+def test_non_compression_parent_does_not_extend_lineage(tmp_path):
+    """If parent's end_reason is something OTHER than 'compression' or
+    'cli_close' (e.g. 'user_stop', 'session_reset', 'cron_complete'),
+    the chain stops at that boundary."""
+    from api.agent_sessions import read_session_lineage_metadata
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    _insert(conn, "parent", end_reason="user_stop")  # NOT compression
+    _insert(conn, "child", parent="parent", end_reason=None)
+    conn.close()
+
+    result = read_session_lineage_metadata(db, ["child"])
+    # parent_session_id should still be exposed (parent exists)
+    assert result["child"]["parent_session_id"] == "parent"
+    # But _lineage_root_id should NOT be set — chain doesn't span the boundary
+    assert "_lineage_root_id" not in result["child"]
+    assert "_compression_segment_count" not in result["child"]

From 89dcab832783e40a91eaa3ac038f2fe2f8ac9dc1 Mon Sep 17 00:00:00 2001
From: NocGeek <NocGeek@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:15:31 +0000
Subject: [PATCH 3/8] fix: persist manual cron run results (#1372)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Manual WebUI cron runs previously called cron.scheduler.run_job(job)
and then only cleared the in-memory running flag. That meant output
could be dropped and job metadata like last_run_at / last_status was
not updated after a manual run.

This PR matches the scheduled cron path (cron/scheduler.py:1334-1364)
exactly:
- Save manual-run output via save_job_output
- Mark manual runs complete via mark_job_run
- Treat empty final_response as a soft failure with the same error
  string as the scheduled path
- Record manual-run failures in job metadata via mark_job_run(False)
- Keep _run_cron_tracked self-contained for worker-thread execution

Includes 2 behavioral regression tests using monkeypatch.setitem on
sys.modules to mock cron.scheduler.run_job + cron.jobs helpers — the
right test pattern (exercises the real _run_cron_tracked code path).

Split out from #1352 (the larger profile-aware-cron-panel PR that's
on hold) per pre-release-review feedback. Self-contained, doesn't
touch the held PR's profile-filtering scope.

Co-authored-by: NocGeek <NocGeek@users.noreply.github.com>
---
 api/routes.py                             | 22 +++++++-
 tests/test_cron_manual_run_persistence.py | 69 +++++++++++++++++++++++
 2 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_cron_manual_run_persistence.py

diff --git a/api/routes.py b/api/routes.py
index a5e21a7c..03ed0c62 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -95,10 +95,28 @@ def _cron_output_content_window(text: str, limit: int = _CRON_OUTPUT_CONTENT_LIM
 def _run_cron_tracked(job):
     """Wrapper that tracks running state around cron.scheduler.run_job."""
     from cron.scheduler import run_job  # import here — runs inside a worker thread
+    from cron.jobs import mark_job_run, save_job_output
+
+    job_id = job.get("id", "")
     try:
-        run_job(job)
+        success, output, final_response, error = run_job(job)
+        save_job_output(job_id, output)
+
+        # Match the scheduled cron path: an apparently successful run with no
+        # final response should not leave the job looking healthy.
+        if success and not final_response:
+            success = False
+            error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
+
+        mark_job_run(job_id, success, error)
+    except Exception as e:
+        logger.exception("Manual cron run failed for job %s", job_id)
+        try:
+            mark_job_run(job_id, False, str(e))
+        except Exception:
+            logger.debug("Failed to mark manual cron run failure for %s", job_id)
     finally:
-        _mark_cron_done(job.get("id", ""))
+        _mark_cron_done(job_id)
 
 _PROVIDER_ALIASES = {
     "claude": "anthropic",
diff --git a/tests/test_cron_manual_run_persistence.py b/tests/test_cron_manual_run_persistence.py
new file mode 100644
index 00000000..7c1c365e
--- /dev/null
+++ b/tests/test_cron_manual_run_persistence.py
@@ -0,0 +1,69 @@
+"""Regression tests for manual WebUI cron runs."""
+
+import sys
+import types
+
+
+def test_manual_cron_run_saves_output_and_marks_job(monkeypatch):
+    import api.routes as routes
+
+    calls = []
+
+    cron_pkg = types.ModuleType("cron")
+    cron_pkg.__path__ = []
+
+    cron_jobs = types.ModuleType("cron.jobs")
+    cron_jobs.save_job_output = lambda job_id, output: calls.append(
+        ("save", job_id, output)
+    )
+    cron_jobs.mark_job_run = lambda job_id, success, error=None: calls.append(
+        ("mark", job_id, success, error)
+    )
+
+    cron_scheduler = types.ModuleType("cron.scheduler")
+    cron_scheduler.run_job = lambda job: (True, "manual output", "done", None)
+
+    monkeypatch.setitem(sys.modules, "cron", cron_pkg)
+    monkeypatch.setitem(sys.modules, "cron.jobs", cron_jobs)
+    monkeypatch.setitem(sys.modules, "cron.scheduler", cron_scheduler)
+
+    routes._mark_cron_running("job123")
+    routes._run_cron_tracked({"id": "job123"})
+
+    assert calls == [
+        ("save", "job123", "manual output"),
+        ("mark", "job123", True, None),
+    ]
+    assert routes._is_cron_running("job123") == (False, 0.0)
+
+
+def test_manual_cron_run_marks_empty_response_as_failure(monkeypatch):
+    import api.routes as routes
+
+    calls = []
+
+    cron_pkg = types.ModuleType("cron")
+    cron_pkg.__path__ = []
+
+    cron_jobs = types.ModuleType("cron.jobs")
+    cron_jobs.save_job_output = lambda job_id, output: calls.append(
+        ("save", job_id, output)
+    )
+    cron_jobs.mark_job_run = lambda job_id, success, error=None: calls.append(
+        ("mark", job_id, success, error)
+    )
+
+    cron_scheduler = types.ModuleType("cron.scheduler")
+    cron_scheduler.run_job = lambda job: (True, "manual output", "", None)
+
+    monkeypatch.setitem(sys.modules, "cron", cron_pkg)
+    monkeypatch.setitem(sys.modules, "cron.jobs", cron_jobs)
+    monkeypatch.setitem(sys.modules, "cron.scheduler", cron_scheduler)
+
+    routes._mark_cron_running("job-empty")
+    routes._run_cron_tracked({"id": "job-empty"})
+
+    assert calls[0] == ("save", "job-empty", "manual output")
+    assert calls[1][0:3] == ("mark", "job-empty", False)
+    assert "empty response" in calls[1][3]
+    assert routes._is_cron_running("job-empty") == (False, 0.0)

From 63251ad206e55111015fbdf7b6a66de2a062ded3 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:17:54 +0000
Subject: [PATCH 4/8] release: apply Opus SHOULD-FIX 1+2 + add #1372
 manual-cron persistence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Opus pre-release findings on #1370 applied:

SHOULD-FIX 1: Tightened parent_session_id exposure to only emit when
the parent's end_reason is in {compression, cli_close}. Without this,
two distinct WebUI sessions sharing a non-continuation parent (e.g.
'user_stop') would get clustered by frontend's _sessionLineageKey
(which falls through to parent_session_id when _lineage_root_id is
missing) and incorrectly collapsed into a single sidebar row.

  Updated assertions in:
  - tests/test_session_lineage_metadata_api.py::
    test_non_compression_state_db_parent_does_not_create_sidebar_lineage
  - tests/test_pr1370_lineage_metadata_perf_and_orphan.py::
    test_non_compression_parent_does_not_extend_lineage

SHOULD-FIX 2: Chunked the IN-clause to 500 vars to stay under
SQLITE_MAX_VARIABLE_NUMBER. Python 3.9 ships sqlite 3.31 with the
default limit of 999. A power user with 2000+ sessions in the
sidebar would hit OperationalError, the silent except-wrapper would
swallow it, and lineage collapse would never work. Added
test_in_clause_chunked_for_large_session_set with SQL interception
to lock the invariant in source.

PR addition (per user directive — Opus + my review, no second
independent review round needed for combined batch):

#1372 from @NocGeek — fix: persist manual cron run results.
Self-contained 89 LOC fix split out from the held #1352. Mirrors the
scheduled-cron path (cron/scheduler.py:1334-1364) exactly: saves
output, marks job complete, treats empty response as soft failure
with matching error string. 2 behavioral tests using sys.modules
monkeypatch to mock cron.scheduler.run_job. CI not yet attached
because branch is brand-new; ran the new tests + adjacent suites
locally — all pass.

Final test count: 3471 passing, 0 failed.

Also adds 2 more regression tests for the perf-fix invariants:
- test_in_clause_chunked_for_large_session_set
- test_two_children_sharing_non_continuation_parent_not_collapsed
---
 CHANGELOG.md                                  |   4 +-
 api/agent_sessions.py                         |  45 +++++---
 ...pr1370_lineage_metadata_perf_and_orphan.py | 102 ++++++++++++++++--
 tests/test_session_lineage_metadata_api.py    |   7 +-
 4 files changed, 136 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe3d732e..2cd65d0b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,9 @@
 ## [v0.50.251] — 2026-04-30
 
 ### Fixed
-- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan (`SELECT id, parent_session_id, end_reason FROM sessions`) for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** suppressed orphan `parent_session_id` references (parent row pruned/deleted) so the frontend's lineage helper doesn't cluster orphans into never-collapsing single-row groups. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370
+- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** chunked IN clause to 500 vars to stay under SQLITE_MAX_VARIABLE_NUMBER on older sqlite (Python 3.9 ships sqlite 3.31 with default limit 999) — without this a power user with 2000+ sessions in the sidebar would hit `OperationalError: too many SQL variables`, the silent except-wrapper would swallow it, and lineage collapse would never work for them. **Pre-release fix:** tightened `parent_session_id` exposure — only emitted when the parent's `end_reason` is `compression` or `cli_close` (not for `user_stop`/etc), since the frontend's `_sessionLineageKey` falls through to `parent_session_id` and would incorrectly collapse two children of a non-continuation parent into a single row. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370
+- **Manual cron runs persist output and metadata like scheduled runs** — manual WebUI cron runs called `cron.scheduler.run_job(job)` and then only cleared the in-memory running flag. The job's output was dropped (never written via `save_job_output`) and `last_run_at` / `last_status` were never updated. Now the manual-run wrapper (`_run_cron_tracked`) matches the scheduled-cron path at `cron/scheduler.py:1334-1364` exactly: saves output, marks the job complete, treats empty `final_response` as a soft failure (with the same error string), and records failures via `mark_job_run(False, str(e))`. (`api/routes.py`, `tests/test_cron_manual_run_persistence.py`) @NocGeek — PR #1372 (split out from the held #1352 per pre-release feedback)
+
 
 ## [v0.50.250] — 2026-04-30
 
diff --git a/api/agent_sessions.py b/api/agent_sessions.py
index a6222ff6..b98e5c87 100644
--- a/api/agent_sessions.py
+++ b/api/agent_sessions.py
@@ -291,6 +291,15 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st
             # Fetch the wanted ids first, then chase parent_session_id chains
             # in batches until no new ids appear. Each batch hits PRIMARY KEY
             # so it's effectively O(N) lookups.
+            #
+            # IN-clause is chunked to 500 to stay under SQLITE_MAX_VARIABLE_NUMBER
+            # on older sqlite (Python 3.9 ships sqlite 3.31 which defaults to 999;
+            # newer Python ships sqlite 3.32+ at 32766). On a power user with
+            # 2000+ sessions in the sidebar, an unchunked first hop would raise
+            # `OperationalError: too many SQL variables`, get swallowed by the
+            # except below, and silently disable lineage collapse forever.
+            # (Opus pre-release review of v0.50.251, SHOULD-FIX 2.)
+            IN_CHUNK = 500
             rows: dict[str, dict] = {}
             to_fetch = set(wanted)
             # Cap walk depth to bound worst-case query count. Real lineage
@@ -299,15 +308,17 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st
             for _hop in range(20):
                 if not to_fetch:
                     break
-                placeholders = ','.join('?' * len(to_fetch))
                 fetch_list = list(to_fetch)
                 to_fetch = set()
-                cur.execute(
-                    f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})",
-                    fetch_list,
-                )
-                for row in cur.fetchall():
-                    rows[row['id']] = dict(row)
+                for i in range(0, len(fetch_list), IN_CHUNK):
+                    chunk = fetch_list[i:i + IN_CHUNK]
+                    placeholders = ','.join('?' * len(chunk))
+                    cur.execute(
+                        f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})",
+                        chunk,
+                    )
+                    for row in cur.fetchall():
+                        rows[row['id']] = dict(row)
                 # Queue up parents we haven't fetched yet.
                 for sid in fetch_list:
                     parent_id = rows.get(sid, {}).get('parent_session_id')
@@ -323,13 +334,19 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st
             continue
 
         parent_id = row.get('parent_session_id')
-        # Only expose parent_session_id when the parent actually exists in
-        # state.db. Orphan references (parent row was pruned/deleted) used to
-        # leak through and the frontend would treat them as a sidebar
-        # grouping key (#1358's _sessionLineageKey falls through to
-        # parent_session_id when _lineage_root_id is missing). Caught during
-        # pre-release review of v0.50.251.
-        if parent_id and parent_id in rows:
+        # Only expose parent_session_id when:
+        #   1) the parent actually exists in state.db (orphan refs would
+        #      otherwise leak through and the frontend would treat them as
+        #      sidebar grouping keys via #1358's _sessionLineageKey
+        #      fall-through)
+        #   2) the parent's end_reason is one of {compression, cli_close} —
+        #      i.e. only TRUE continuations. Without this, two distinct
+        #      WebUI sessions sharing a `user_stop` parent would get
+        #      collapsed into a single sidebar row by #1358's helper
+        #      (it groups by parent_session_id as the third-fallback key).
+        # (Opus pre-release review of v0.50.251, SHOULD-FIX 1.)
+        parent_row = rows.get(parent_id) if parent_id else None
+        if parent_row and parent_row.get('end_reason') in {'compression', 'cli_close'}:
             metadata.setdefault(sid, {})['parent_session_id'] = parent_id
 
         root_id = sid
diff --git a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py
index e605664f..fd1f20d5 100644
--- a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py
+++ b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py
@@ -180,10 +180,97 @@ def test_chain_walk_returns_correct_root_for_real_compression_chain(tmp_path):
     assert result["tip"]["parent_session_id"] == "seg3"
 
 
+def test_in_clause_chunked_for_large_session_set(tmp_path, monkeypatch):
+    """The first hop must chunk the IN clause into batches of <= 500.
+    Without chunking, a power user with 2000+ sessions in the sidebar would
+    trigger SQLITE_MAX_VARIABLE_NUMBER on Python 3.9's sqlite 3.31 (default
+    limit 999), the OperationalError gets swallowed by the except wrapper,
+    and lineage collapse silently disables forever for that user.
+    (Opus pre-release review of v0.50.251, SHOULD-FIX 2.)
+    """
+    from api import agent_sessions
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    # 1500 unrelated rows
+    for i in range(1500):
+        _insert(conn, f"session_{i:04d}")
+    conn.close()
+
+    # Track each query's IN clause variable count
+    in_counts = []
+    real_connect = sqlite3.connect
+
+    class _TrackingConn:
+        def __init__(self, *args, **kw):
+            self._real = real_connect(*args, **kw)
+        def cursor(self):
+            return _TrackingCursor(self._real.cursor())
+        def __enter__(self): return self
+        def __exit__(self, *a): return self._real.__exit__(*a)
+        @property
+        def row_factory(self): return self._real.row_factory
+        @row_factory.setter
+        def row_factory(self, v): self._real.row_factory = v
+
+    class _TrackingCursor:
+        def __init__(self, real): self._real = real
+        def execute(self, sql, *args):
+            if "IN (" in sql:
+                in_counts.append(sql.count("?"))
+            return self._real.execute(sql, *args)
+        def fetchall(self): return self._real.fetchall()
+        def fetchone(self): return self._real.fetchone()
+
+    monkeypatch.setattr(sqlite3, "connect", _TrackingConn)
+
+    # Request 1500 ids (the full set we inserted)
+    wanted = [f"session_{i:04d}" for i in range(1500)]
+    agent_sessions.read_session_lineage_metadata(db, wanted)
+
+    assert in_counts, "Expected at least one IN-clause query"
+    over_limit = [n for n in in_counts if n > 999]
+    assert not over_limit, (
+        f"IN clause must be chunked to <= 999 vars to stay under SQLite default "
+        f"limit (chunked to 500 in the implementation). Found queries with "
+        f"{over_limit} variables — would raise OperationalError on older sqlite."
+    )
+
+
+def test_two_children_sharing_non_continuation_parent_not_collapsed(tmp_path):
+    """The contract Opus flagged: when two distinct WebUI sessions share the
+    same parent and that parent's end_reason is NOT compression/cli_close
+    (e.g. 'user_stop'), neither child should expose parent_session_id.
+    The frontend's #1358 lineage helper would otherwise group them under
+    the parent's id key and incorrectly collapse the rows.
+    """
+    from api.agent_sessions import read_session_lineage_metadata
+
+    db = tmp_path / "state.db"
+    conn = _make_db(db)
+    _insert(conn, "shared_parent", end_reason="user_stop")
+    _insert(conn, "child_a", parent="shared_parent", end_reason=None)
+    _insert(conn, "child_b", parent="shared_parent", end_reason=None)
+    conn.close()
+
+    result = read_session_lineage_metadata(db, ["child_a", "child_b"])
+    # Neither child should expose parent_session_id (would let frontend
+    # group them under the same key and collapse one)
+    for sid in ["child_a", "child_b"]:
+        entry = result.get(sid, {})
+        assert "parent_session_id" not in entry, (
+            f"{sid} leaked parent_session_id despite parent being non-continuation"
+        )
+
+
 def test_non_compression_parent_does_not_extend_lineage(tmp_path):
     """If parent's end_reason is something OTHER than 'compression' or
     'cli_close' (e.g. 'user_stop', 'session_reset', 'cron_complete'),
-    the chain stops at that boundary."""
+    the chain stops at that boundary AND parent_session_id is not exposed
+    (tightened in v0.50.251 per Opus SHOULD-FIX 1 — exposing the parent
+    ref would let the frontend group two children sharing the same
+    non-continuation parent into a single sidebar row).
+    """
     from api.agent_sessions import read_session_lineage_metadata
 
     db = tmp_path / "state.db"
@@ -193,8 +280,11 @@ def test_non_compression_parent_does_not_extend_lineage(tmp_path):
     conn.close()
 
     result = read_session_lineage_metadata(db, ["child"])
-    # parent_session_id should still be exposed (parent exists)
-    assert result["child"]["parent_session_id"] == "parent"
-    # But _lineage_root_id should NOT be set — chain doesn't span the boundary
-    assert "_lineage_root_id" not in result["child"]
-    assert "_compression_segment_count" not in result["child"]
+    # parent_session_id must NOT be exposed (parent's end_reason is non-continuation)
+    assert "child" not in result or "parent_session_id" not in result["child"], (
+        "parent_session_id leaked through despite parent's end_reason being "
+        "'user_stop' — would cause incorrect sidebar collapse."
+    )
+    # _lineage_root_id should NOT be set — chain doesn't span the boundary
+    assert "child" not in result or "_lineage_root_id" not in result["child"]
+    assert "child" not in result or "_compression_segment_count" not in result["child"]
diff --git a/tests/test_session_lineage_metadata_api.py b/tests/test_session_lineage_metadata_api.py
index fdf04b6d..8d1d282f 100644
--- a/tests/test_session_lineage_metadata_api.py
+++ b/tests/test_session_lineage_metadata_api.py
@@ -121,7 +121,12 @@ def test_non_compression_state_db_parent_does_not_create_sidebar_lineage(_isolat
 
         rows = {row["session_id"]: row for row in all_sessions()}
 
-        assert rows["lineage_api_plain_child"].get("parent_session_id") == "lineage_api_plain_parent"
+        # parent_session_id must NOT be exposed when the parent's end_reason
+        # is not a continuation (compression / cli_close). The frontend's
+        # _sessionLineageKey would otherwise group two children sharing a
+        # `user_stop` parent under the same key — incorrect collapse.
+        # (Tightened in v0.50.251 per Opus SHOULD-FIX 1.)
+        assert "parent_session_id" not in rows["lineage_api_plain_child"]
         assert "_lineage_root_id" not in rows["lineage_api_plain_child"]
     finally:
         conn.close()

From c5f4f569d675b06da5bc9aa3b68b1c363d8dec4a Mon Sep 17 00:00:00 2001
From: bergeouss <bergeouss@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:24:29 +0000
Subject: [PATCH 5/8] fix(#1361): preserve reasoning, tool calls, and partial
 output on Stop/Cancel (#1375)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three distinct data-loss paths fixed:

§A — Reasoning text was accumulated in a thread-local _reasoning_text
inside _run_agent_streaming. cancel_stream() never saw it because it
went out of scope when the thread was interrupted. Now mirrored to a
new shared dict STREAM_REASONING_TEXT keyed by stream_id, populated
in on_reasoning() and the reasoning branch of on_tool(), read in
cancel_stream().

§B — Live tool calls in thread-local _live_tool_calls were similarly
invisible to cancel_stream(). Now mirrored to STREAM_LIVE_TOOL_CALLS
on tool.started + tool.completed.

§C — Reasoning-only streams produced no partial message because the
thinking-block regex strip returned empty string and the `if _stripped:`
guard skipped the append. Now appends the partial message when EITHER
content text, reasoning trace, OR tool calls exist.

Mirrors the existing STREAM_PARTIAL_TEXT pattern from #893 exactly:
same dict creation in _run_agent_streaming, same _live_config fallback
in cancel_stream, same cleanup in _periodic_checkpoint.

8 regression tests in tests/test_issue1361_cancel_data_loss.py
covering all three sections plus tools+text combinations.

Co-authored-by: bergeouss <bergeouss@users.noreply.github.com>
---
 api/config.py                            |   2 +
 api/streaming.py                         |  74 +++++-
 tests/test_issue1361_cancel_data_loss.py | 317 +++++++++++++++++++++++
 3 files changed, 382 insertions(+), 11 deletions(-)
 create mode 100644 tests/test_issue1361_cancel_data_loss.py

diff --git a/api/config.py b/api/config.py
index e49ba23a..45694f40 100644
--- a/api/config.py
+++ b/api/config.py
@@ -2115,6 +2115,8 @@ STREAMS_LOCK = threading.Lock()
 CANCEL_FLAGS: dict = {}
 AGENT_INSTANCES: dict = {}  # stream_id -> AIAgent instance for interrupt propagation
 STREAM_PARTIAL_TEXT: dict = {}  # stream_id -> partial assistant text accumulated during streaming
+STREAM_REASONING_TEXT: dict = {}  # stream_id -> reasoning trace accumulated during streaming (#1361 §A)
+STREAM_LIVE_TOOL_CALLS: dict = {}  # stream_id -> live tool calls accumulated during streaming (#1361 §B)
 SERVER_START_TIME = time.time()
 
 # Agent cache: reuse AIAgent across messages in the same WebUI session so that
diff --git a/api/streaming.py b/api/streaming.py
index 62a034a8..e6c6700c 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -21,6 +21,7 @@ logger = logging.getLogger(__name__)
 
 from api.config import (
     STREAMS, STREAMS_LOCK, CANCEL_FLAGS, AGENT_INSTANCES, STREAM_PARTIAL_TEXT,
+    STREAM_REASONING_TEXT, STREAM_LIVE_TOOL_CALLS,
     LOCK, SESSIONS, SESSION_DIR,
     _get_session_agent_lock, _set_thread_env, _clear_thread_env,
     SESSION_AGENT_LOCKS, SESSION_AGENT_LOCKS_LOCK,
@@ -1373,6 +1374,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
     with STREAMS_LOCK:
         CANCEL_FLAGS[stream_id] = cancel_event
         STREAM_PARTIAL_TEXT[stream_id] = ''  # start accumulating partial text (#893)
+        STREAM_REASONING_TEXT[stream_id] = ''  # start accumulating reasoning trace (#1361 §A)
+        STREAM_LIVE_TOOL_CALLS[stream_id] = []  # start accumulating tool calls (#1361 §B)
 
     # Register this stream with the global streaming meter
     meter().begin_session(stream_id)
@@ -1575,6 +1578,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                 if text is None:
                     return
                 _reasoning_text += str(text)
+                # Mirror to shared dict so cancel_stream() can persist it (#1361 §A)
+                if stream_id in STREAM_REASONING_TEXT:
+                    STREAM_REASONING_TEXT[stream_id] += str(text)
                 put('reasoning', {'text': str(text)})
                 # Track reasoning tokens in the meter so TPS reflects all AI output
                 meter().record_reasoning(stream_id, len(_reasoning_text))
@@ -1607,6 +1613,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                     reason_text = preview if event_type == 'reasoning.available' else name
                     if reason_text:
                         _reasoning_text += str(reason_text)
+                        # Mirror to shared dict so cancel_stream() can persist it (#1361 §A)
+                        if stream_id in STREAM_REASONING_TEXT:
+                            STREAM_REASONING_TEXT[stream_id] += str(reason_text)
                         put('reasoning', {'text': str(reason_text)})
                         meter().record_reasoning(stream_id, len(_reasoning_text))
                         _emit_metering()
@@ -1623,6 +1632,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                         'name': name,
                         'args': args if isinstance(args, dict) else {},
                     })
+                    # Mirror to shared dict so cancel_stream() can persist it (#1361 §B)
+                    if stream_id in STREAM_LIVE_TOOL_CALLS:
+                        STREAM_LIVE_TOOL_CALLS[stream_id].append({
+                            'name': name,
+                            'args': args if isinstance(args, dict) else {},
+                            'done': False,
+                        })
                     put('tool', {
                         'event_type': event_type or 'tool.started',
                         'name': name,
@@ -1651,6 +1667,16 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                             live_tc['duration'] = cb_kwargs.get('duration')
                             live_tc['is_error'] = bool(cb_kwargs.get('is_error', False))
                             break
+                    # Mirror done state to shared dict (#1361 §B)
+                    if stream_id in STREAM_LIVE_TOOL_CALLS:
+                        for shared_tc in reversed(STREAM_LIVE_TOOL_CALLS[stream_id]):
+                            if shared_tc.get('done'):
+                                continue
+                            if not name or shared_tc.get('name') == name:
+                                shared_tc['done'] = True
+                                shared_tc['duration'] = cb_kwargs.get('duration')
+                                shared_tc['is_error'] = bool(cb_kwargs.get('is_error', False))
+                                break
                     # Signal the checkpoint thread that new work has completed (Issue #765).
                     # Each completed tool call is a meaningful unit of progress worth persisting.
                     _checkpoint_activity[0] += 1
@@ -2443,6 +2469,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
             CANCEL_FLAGS.pop(stream_id, None)
             AGENT_INSTANCES.pop(stream_id, None)  # Clean up agent instance reference
             STREAM_PARTIAL_TEXT.pop(stream_id, None)  # Clean up partial text buffer (#893)
+            STREAM_REASONING_TEXT.pop(stream_id, None)  # Clean up reasoning trace (#1361 §A)
+            STREAM_LIVE_TOOL_CALLS.pop(stream_id, None)  # Clean up tool calls (#1361 §B)
 
 # ============================================================
 # SECTION: HTTP Request Handler
@@ -2630,6 +2658,17 @@ def cancel_stream(stream_id: str) -> bool:
             live_partials = getattr(_live_config, 'STREAM_PARTIAL_TEXT', partial_texts)
             if live_partials is not partial_texts:
                 _cancel_partial_text = live_partials.get(stream_id, '')
+        # Capture reasoning trace and live tool calls (#1361 §A + §B)
+        _cancel_reasoning = STREAM_REASONING_TEXT.get(stream_id, '')
+        if not _cancel_reasoning:
+            live_reasoning = getattr(_live_config, 'STREAM_REASONING_TEXT', STREAM_REASONING_TEXT)
+            if live_reasoning is not STREAM_REASONING_TEXT:
+                _cancel_reasoning = live_reasoning.get(stream_id, '')
+        _cancel_tool_calls = STREAM_LIVE_TOOL_CALLS.get(stream_id, [])
+        if not _cancel_tool_calls:
+            live_tools = getattr(_live_config, 'STREAM_LIVE_TOOL_CALLS', STREAM_LIVE_TOOL_CALLS)
+            if live_tools is not STREAM_LIVE_TOOL_CALLS:
+                _cancel_tool_calls = live_tools.get(stream_id, [])
 
     # Session cleanup outside STREAMS_LOCK to preserve lock ordering.
     # Acquire the per-session _agent_lock too, mirroring every other session
@@ -2708,7 +2747,13 @@ def cancel_stream(stream_id: str) -> bool:
                 # content IS kept in the history sent to the agent on the next user
                 # message, letting the model continue from where it was cut off.
                 # See the inner comment on the append call below for the rationale.
+                #
+                # #1361: Also persist reasoning trace and live tool calls that were
+                # accumulated in thread-local variables but invisible to the cancel path.
+                # This prevents paid-token data loss when cancelling mid-reasoning or
+                # mid-tool-execution.
                 partial_text = _cancel_partial_text.strip() if _cancel_partial_text else ''
+                _stripped = ''
                 if partial_text:
                     import re as _re
                     # Strip thinking/reasoning markup from partial content before saving.
@@ -2721,17 +2766,24 @@ def cancel_stream(stream_id: str) -> bool:
                     _stripped = _re.sub(r'<think(?:ing)?\b[^>]*>.*',
                                         '', _stripped,
                                         flags=_re.DOTALL | _re.IGNORECASE).strip()
-                    if _stripped:
-                        # Mark _partial=True for session/UI identification only.
-                        # Deliberately NOT _error=True — the partial content is real model
-                        # output and should be visible in conversation history so the model
-                        # can continue from it on the next turn (#893).
-                        _cs.messages.append({
-                            'role': 'assistant',
-                            'content': _stripped,
-                            '_partial': True,
-                            'timestamp': int(time.time()),
-                        })
+                # Determine whether there is anything to preserve beyond just the
+                # cancel marker.  Content text, reasoning trace, or tool calls all
+                # count (#1361 §C — previously only _stripped was checked, so a
+                # reasoning-only or tool-only stream produced NO partial message).
+                _has_reasoning = bool(_cancel_reasoning and _cancel_reasoning.strip())
+                _has_tools = bool(_cancel_tool_calls)
+                if _stripped or _has_reasoning or _has_tools:
+                    _partial_msg: dict = {
+                        'role': 'assistant',
+                        'content': _stripped,  # may be empty for reasoning/tool-only turns
+                        '_partial': True,
+                        'timestamp': int(time.time()),
+                    }
+                    if _has_reasoning:
+                        _partial_msg['reasoning'] = _cancel_reasoning.strip()
+                    if _has_tools:
+                        _partial_msg['tool_calls'] = list(_cancel_tool_calls)
+                    _cs.messages.append(_partial_msg)
                 # Cancel marker — flagged _error=True so it is stripped from conversation
                 # history on the next turn (prevents model from seeing "Task cancelled."
                 # as a prior assistant reply).
diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py
new file mode 100644
index 00000000..c7f08d52
--- /dev/null
+++ b/tests/test_issue1361_cancel_data_loss.py
@@ -0,0 +1,317 @@
+"""Regression tests for #1361 — Stop/Cancel discards already-streamed content.
+
+Three distinct data-loss paths on cancel:
+
+  §A  Reasoning text accumulated in a thread-local `_reasoning_text` is never
+      visible to cancel_stream(), so it's lost on cancel.
+  §B  Live tool calls accumulated in thread-local `_live_tool_calls` are lost
+      on cancel — only STREAM_PARTIAL_TEXT is captured.
+  §C  When the entire streamed output is reasoning (no visible tokens),
+      _stripped is empty after regex cleanup, so NO partial assistant message
+      is appended — only the *Task cancelled.* marker survives.
+
+All three fix the same "tokens-paid-for-data-loss" class of bug.
+"""
+
+import pathlib
+import queue
+import re
+import threading
+from unittest.mock import Mock, patch
+
+import pytest
+
+import api.config as config
+import api.models as models
+import api.streaming as streaming
+from api.models import Session
+from api.streaming import cancel_stream
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+
+# ── Fixtures ────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def _isolate_session_dir(tmp_path, monkeypatch):
+    """Redirect SESSION_DIR / SESSION_INDEX_FILE to an isolated temp dir."""
+    session_dir = tmp_path / "sessions"
+    session_dir.mkdir()
+    index_file = session_dir / "_index.json"
+    monkeypatch.setattr(models, "SESSION_DIR", session_dir)
+    monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
+    models.SESSIONS.clear()
+    yield
+    models.SESSIONS.clear()
+
+
+@pytest.fixture(autouse=True)
+def _isolate_stream_state():
+    """Clear all shared streaming dicts before/after each test."""
+    config.STREAMS.clear()
+    config.CANCEL_FLAGS.clear()
+    config.AGENT_INSTANCES.clear()
+    config.STREAM_PARTIAL_TEXT.clear()
+    # New shared dicts for §A and §B
+    if hasattr(config, 'STREAM_REASONING_TEXT'):
+        config.STREAM_REASONING_TEXT.clear()
+    if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
+        config.STREAM_LIVE_TOOL_CALLS.clear()
+    yield
+    config.STREAMS.clear()
+    config.CANCEL_FLAGS.clear()
+    config.AGENT_INSTANCES.clear()
+    config.STREAM_PARTIAL_TEXT.clear()
+    if hasattr(config, 'STREAM_REASONING_TEXT'):
+        config.STREAM_REASONING_TEXT.clear()
+    if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
+        config.STREAM_LIVE_TOOL_CALLS.clear()
+
+
+@pytest.fixture(autouse=True)
+def _isolate_agent_locks():
+    config.SESSION_AGENT_LOCKS.clear()
+    yield
+    config.SESSION_AGENT_LOCKS.clear()
+
+
+def _make_session(session_id="cancel_sid_1361",
+                  pending_msg="Help me debug this",
+                  messages=None):
+    """Build a session in mid-stream state."""
+    s = Session(
+        session_id=session_id,
+        title="Test Session",
+        messages=messages or [],
+    )
+    s.pending_user_message = pending_msg
+    s.pending_attachments = []
+    s.pending_started_at = None
+    s.active_stream_id = "stream_1361"
+    s.save()
+    models.SESSIONS[session_id] = s
+    return s
+
+
+def _setup_cancel_state(session_id, stream_id="stream_1361"):
+    """Wire up STREAMS/CANCEL_FLAGS/AGENT_INSTANCES for cancel_stream()."""
+    config.STREAMS[stream_id] = queue.Queue()
+    config.CANCEL_FLAGS[stream_id] = threading.Event()
+    mock_agent = Mock()
+    mock_agent.session_id = session_id
+    mock_agent.interrupt = Mock()
+    config.AGENT_INSTANCES[stream_id] = mock_agent
+    return stream_id, mock_agent
+
+
+# ── §A: Reasoning text lost on cancel ───────────────────────────────────────
+
+class TestCancelPreservesReasoningText:
+    """§A: _reasoning_text is thread-local and invisible to cancel_stream().
+    
+    After fix: reasoning text should be persisted in a shared dict
+    (STREAM_REASONING_TEXT) keyed by stream_id, and cancel_stream()
+    should append it as a 'reasoning' field on the partial assistant message.
+    """
+
+    def test_cancel_with_reasoning_only_preserves_reasoning(self):
+        """Cancel during reasoning phase (no visible tokens) should persist reasoning."""
+        sid = "test_1361_a1"
+        stream_id = "stream_a1"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        # Simulate: reasoning was accumulated but no visible tokens
+        reasoning = "Let me think about this step by step..."
+        config.STREAM_PARTIAL_TEXT[stream_id] = ""  # no visible tokens
+
+        if hasattr(config, 'STREAM_REASONING_TEXT'):
+            config.STREAM_REASONING_TEXT[stream_id] = reasoning
+
+        cancel_stream(stream_id)
+
+        # Reload and check
+        s2 = models.SESSIONS[sid]
+        msgs = s2.messages
+        # There should be a partial assistant message with reasoning
+        assistant_msgs = [m for m in msgs if isinstance(m, dict) and m.get('role') == 'assistant']
+        has_reasoning = any(m.get('reasoning') for m in assistant_msgs)
+        assert has_reasoning, \
+            f"Expected reasoning field on partial assistant msg after cancel. Got messages: {assistant_msgs}"
+
+    def test_cancel_with_reasoning_and_partial_tokens_preserves_both(self):
+        """Cancel mid-stream with both reasoning and some visible tokens."""
+        sid = "test_1361_a2"
+        stream_id = "stream_a2"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        reasoning = "Let me analyze the code..."
+        partial_text = "Based on my analysis, the bug is in the"
+        config.STREAM_PARTIAL_TEXT[stream_id] = partial_text
+
+        if hasattr(config, 'STREAM_REASONING_TEXT'):
+            config.STREAM_REASONING_TEXT[stream_id] = reasoning
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        # Should have partial content
+        partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
+        has_content = any(m.get('content') for m in partial_msgs)
+        assert has_content, \
+            f"Expected partial assistant content after cancel. Got: {partial_msgs}"
+
+    def test_cancel_without_reasoning_dict_works_as_before(self):
+        """If STREAM_REASONING_TEXT doesn't exist yet (pre-fix), cancel still works."""
+        sid = "test_1361_a3"
+        stream_id = "stream_a3"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        config.STREAM_PARTIAL_TEXT[stream_id] = "Some partial text"
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        msgs = s2.messages
+        # Should have the cancel marker
+        has_cancel = any(
+            isinstance(m, dict) and m.get('role') == 'assistant' and m.get('_error')
+            for m in msgs
+        )
+        assert has_cancel, "Cancel marker should always be present"
+
+
+# ── §B: Tool calls lost on cancel ───────────────────────────────────────────
+
+class TestCancelPreservesToolCalls:
+    """§B: _live_tool_calls is thread-local and invisible to cancel_stream().
+    
+    After fix: tool calls should be persisted in a shared dict
+    (STREAM_LIVE_TOOL_CALLS) keyed by stream_id, and cancel_stream()
+    should append them as tool_call entries on the partial assistant message.
+    """
+
+    def test_cancel_with_tool_calls_preserves_tools(self):
+        """Cancel after tool execution should preserve the tool call info."""
+        sid = "test_1361_b1"
+        stream_id = "stream_b1"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        config.STREAM_PARTIAL_TEXT[stream_id] = ""
+
+        if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
+            config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
+                {"name": "read_file", "args": {"path": "/tmp/test.py"}, "done": True},
+                {"name": "terminal", "args": {"command": "ls"}, "done": False},
+            ]
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        has_tools = any(m.get('tool_calls') or m.get('tools') for m in assistant_msgs)
+        assert has_tools, \
+            f"Expected tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}"
+
+    def test_cancel_with_tools_and_text_preserves_both(self):
+        """Cancel after tools + partial text should keep both."""
+        sid = "test_1361_b2"
+        stream_id = "stream_b2"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        config.STREAM_PARTIAL_TEXT[stream_id] = "Here's what I found:"
+        if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
+            config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
+                {"name": "web_search", "args": {"query": "test"}, "done": True},
+            ]
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
+        has_content = any(m.get('content') for m in partial_msgs)
+        assert has_content, \
+            f"Expected partial content with tools after cancel. Got: {partial_msgs}"
+
+
+# ── §C: Empty _stripped skips entire append ─────────────────────────────────
+
+class TestCancelWithReasoningOnlyNoText:
+    """§C: When streaming was 100% reasoning (no visible tokens), _stripped is
+    empty after regex cleanup, so no partial assistant message is appended.
+    
+    After fix: even when _stripped is empty, if reasoning or tool calls exist,
+    a partial assistant message should be appended (with no content, but with
+    reasoning and/or tool_calls fields).
+    """
+
+    def test_reasoning_only_creates_partial_message(self):
+        """Cancel after reasoning-only output should still create a partial msg."""
+        sid = "test_1361_c1"
+        stream_id = "stream_c1"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        # Only reasoning, no visible tokens at all
+        config.STREAM_PARTIAL_TEXT[stream_id] = ""
+
+        if hasattr(config, 'STREAM_REASONING_TEXT'):
+            config.STREAM_REASONING_TEXT[stream_id] = "Deep reasoning here..."
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        # Should NOT be only the cancel marker — there should be a partial msg
+        partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
+        assert len(partial_msgs) > 0, \
+            f"Expected at least one partial assistant msg for reasoning-only cancel. Got: {assistant_msgs}"
+
+    def test_tools_only_creates_partial_message(self):
+        """Cancel after tool-only output (no text, no reasoning) should still create a partial msg."""
+        sid = "test_1361_c2"
+        stream_id = "stream_c2"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        config.STREAM_PARTIAL_TEXT[stream_id] = ""
+
+        if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
+            config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
+                {"name": "read_file", "args": {"path": "/tmp/x"}, "done": True},
+            ]
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
+        assert len(partial_msgs) > 0, \
+            f"Expected at least one partial assistant msg for tools-only cancel. Got: {assistant_msgs}"
+
+    def test_no_reasoning_no_tools_no_partial(self):
+        """Cancel with no reasoning and no tools and no text = only cancel marker (no change)."""
+        sid = "test_1361_c3"
+        stream_id = "stream_c3"
+        s = _make_session(session_id=sid)
+        _setup_cancel_state(sid, stream_id)
+
+        config.STREAM_PARTIAL_TEXT[stream_id] = ""
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[sid]
+        assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
+        # Should only have the cancel marker, no partial messages
+        partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
+        cancel_msgs = [m for m in assistant_msgs if m.get('_error')]
+        assert len(partial_msgs) == 0, \
+            f"Expected no partial msg when nothing was streamed. Got partials: {partial_msgs}"
+        assert len(cancel_msgs) == 1, \
+            f"Expected exactly 1 cancel marker. Got: {cancel_msgs}"

From f14280e2c432e67990b7605d4eb74a7346ada422 Mon Sep 17 00:00:00 2001
From: bergeouss <bergeouss@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:24:31 +0000
Subject: [PATCH 6/8] fix(#1195): route sessions to profile dir even when dir
 doesn't exist yet (#1373)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a user switched profiles and created a new session, the session
was saved to the default profile directory instead of the active
profile directory — because get_hermes_home_for_profile() silently
fell back to _DEFAULT_HERMES_HOME when the profile directory didn't
exist yet on disk.

Root cause: api/profiles.py:156 had `if profile_dir.is_dir(): return
profile_dir; return _DEFAULT_HERMES_HOME`. New profiles (no session
yet, so no dir) routed every session back to default.

Fix: remove the is_dir() guard, return the profile path
unconditionally. The profile directory is created on first use by
the agent/session layer.

5 regression tests in tests/test_issue1195_session_profile_routing.py:
existing-profile, non-existent-profile (the core fix), None, empty-
string, 'default' all return the expected path.

Co-authored-by: bergeouss <bergeouss@users.noreply.github.com>
---
 api/profiles.py                               |  4 +-
 docs/ISSUES.md                                | 23 ++++++
 .../test_issue1195_session_profile_routing.py | 81 +++++++++++++++++++
 3 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100644 docs/ISSUES.md
 create mode 100644 tests/test_issue1195_session_profile_routing.py

diff --git a/api/profiles.py b/api/profiles.py
index a3965991..f87ac985 100644
--- a/api/profiles.py
+++ b/api/profiles.py
@@ -153,9 +153,7 @@ def get_hermes_home_for_profile(name: str) -> Path:
     if not name or name == 'default' or not _PROFILE_ID_RE.match(name):
         return _DEFAULT_HERMES_HOME
     profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name
-    if profile_dir.is_dir():
-        return profile_dir
-    return _DEFAULT_HERMES_HOME
+    return profile_dir
 
 
 _TERMINAL_ENV_MAPPINGS = {
diff --git a/docs/ISSUES.md b/docs/ISSUES.md
new file mode 100644
index 00000000..e3f65641
--- /dev/null
+++ b/docs/ISSUES.md
@@ -0,0 +1,23 @@
+# Upstream Issues — Root Cause Analysis
+
+## #1256: Browser tools fail with "Playwright not installed"
+
+### Root Cause
+The check lives in **hermes-agent** (upstream), not hermes-webui:
+
+```
+hermes-agent/tools/browser_tool.py → check_browser_requirements()
+```
+
+`check_browser_requirements()` does not recognize CDP (Chrome DevTools Protocol) mode — it only looks for a local Playwright/Puppeteer install. When the agent runs in CDP mode (connecting to an existing browser), the check still fails.
+
+### WebUI side
+The WebUI already passes `CLI_TOOLSETS` correctly per-request. The `enabled_toolsets` field in the cron/chat config is dynamic and works as intended.
+
+### Fix required
+The fix must happen in `hermes-agent/tools/browser_tool.py`:
+- `check_browser_requirements()` should skip the Playwright check when CDP mode is configured
+- Or add a `BROWSER_MODE=cdp` env var that bypasses the local browser requirement
+
+### Workaround
+Use `CLOUD_BROWSER=true` or configure `browser.base_url` to point to a remote CDP endpoint. This bypasses the local Playwright requirement.
diff --git a/tests/test_issue1195_session_profile_routing.py b/tests/test_issue1195_session_profile_routing.py
new file mode 100644
index 00000000..534a3f73
--- /dev/null
+++ b/tests/test_issue1195_session_profile_routing.py
@@ -0,0 +1,81 @@
+"""Tests for issue #1195: sessions must route to the correct profile directory
+even when that profile directory does not exist yet on disk."""
+
+import os
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def _make_hermes_home(base: Path, profile_name: str | None = None) -> Path:
+    """Create a temp HERMES_HOME (with optional profile dir) and return it."""
+    hermes_home = base / ".hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    if profile_name:
+        (hermes_home / "profiles" / profile_name).mkdir(parents=True, exist_ok=True)
+    return hermes_home
+
+
+# ── tests ────────────────────────────────────────────────────────────────────
+
+class TestGetHermesHomeForProfile:
+    """get_hermes_home_for_profile() must return the profile path regardless of
+    whether the directory already exists on disk (#1195)."""
+
+    @pytest.fixture(autouse=True)
+    def _patch_default_home(self, tmp_path):
+        """Patch _DEFAULT_HERMES_HOME to a temp directory for isolation."""
+        from api.profiles import _DEFAULT_HERMES_HOME as real_default
+
+        fake_home = tmp_path / ".hermes"
+        fake_home.mkdir(parents=True)
+        with patch("api.profiles._DEFAULT_HERMES_HOME", fake_home):
+            yield fake_home, real_default
+
+    def test_existing_profile_returns_profile_dir(self, _patch_default_home):
+        fake_home, _ = _patch_default_home
+        from api.profiles import get_hermes_home_for_profile
+
+        # Create an existing profile directory
+        profile_dir = fake_home / "profiles" / "ayan"
+        profile_dir.mkdir(parents=True)
+
+        result = get_hermes_home_for_profile("ayan")
+        assert result == profile_dir
+
+    def test_nonexistent_profile_still_returns_profile_path(self, _patch_default_home):
+        """Core bug fix: profile dir doesn't exist yet but should still route there."""
+        fake_home, _ = _patch_default_home
+        from api.profiles import get_hermes_home_for_profile
+
+        # Do NOT create the profile directory
+        expected = fake_home / "profiles" / "newprofile"
+        assert not expected.exists()  # confirm it doesn't exist
+
+        result = get_hermes_home_for_profile("newprofile")
+        assert result == expected, "Should route to profile path even when dir missing"
+
+    def test_none_returns_default(self, _patch_default_home):
+        fake_home, _ = _patch_default_home
+        from api.profiles import get_hermes_home_for_profile
+
+        result = get_hermes_home_for_profile(None)
+        assert result == fake_home
+
+    def test_empty_string_returns_default(self, _patch_default_home):
+        fake_home, _ = _patch_default_home
+        from api.profiles import get_hermes_home_for_profile
+
+        result = get_hermes_home_for_profile("")
+        assert result == fake_home
+
+    def test_default_string_returns_default(self, _patch_default_home):
+        fake_home, _ = _patch_default_home
+        from api.profiles import get_hermes_home_for_profile
+
+        result = get_hermes_home_for_profile("default")
+        assert result == fake_home

From d071e46e1f66c0580a0c5a5807b6c8ec40d636b2 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:27:04 +0000
Subject: [PATCH 7/8] release: add #1373 + #1375; fix R19c/R19j contracts for
 #1373 behavior change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two more contributor PRs to the v0.50.251 batch per user
directive (per-PR review + Opus review for #1373; #1375 was clean
ship-on-sight).

#1375 (@bergeouss, +382 LOC, all CI green) — fixes #1361 paid-token
data loss on Stop/Cancel. Mirrors the existing STREAM_PARTIAL_TEXT
pattern from #893: adds STREAM_REASONING_TEXT and STREAM_LIVE_TOOL_CALLS
shared dicts populated during streaming and read by cancel_stream().
Also fixes the §C reasoning-only-creates-no-message gap where the
strip-thinking-blocks regex returned empty string and the if-guard
skipped the partial append. 8 regression tests covering all 3
sections plus tools+text combinations.

#1373 (@bergeouss, +105 LOC, had CI failures pre-fix) — fixes #1195
new-profile-routes-to-default. The is_dir() guard in
get_hermes_home_for_profile() caused new profiles (no session yet)
to silently route every session back to the default profile until
the directory existed on disk. Removed the guard; profile path is
now returned unconditionally.

Pre-release fix for #1373's CI failures: the change flipped two
behaviors pinned by tests in #798:
- R19c (test_get_hermes_home_for_profile_falls_back_for_missing_profile)
  asserted nonexistent → base. Renamed and updated to assert the
  new always-return-profile-path behavior.
- R19j (test_get_hermes_home_for_profile_rejects_path_traversal)
  asserted that valid-but-nonexistent profile names → base. Updated
  to assert profile-scoped path. Also updated docstring: the
  _PROFILE_ID_RE regex is now the SOLE defense against path
  traversal (previously is_dir() was a defense-in-depth layer);
  verified each known-bad shape still returns base.

Tests: 3484 passing (3471 → 3484, +13).
---
 CHANGELOG.md           |  2 ++
 tests/test_issue798.py | 26 +++++++++++++++++---------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2cd65d0b..e6c02638 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
 ### Fixed
 - **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** chunked IN clause to 500 vars to stay under SQLITE_MAX_VARIABLE_NUMBER on older sqlite (Python 3.9 ships sqlite 3.31 with default limit 999) — without this a power user with 2000+ sessions in the sidebar would hit `OperationalError: too many SQL variables`, the silent except-wrapper would swallow it, and lineage collapse would never work for them. **Pre-release fix:** tightened `parent_session_id` exposure — only emitted when the parent's `end_reason` is `compression` or `cli_close` (not for `user_stop`/etc), since the frontend's `_sessionLineageKey` falls through to `parent_session_id` and would incorrectly collapse two children of a non-continuation parent into a single row. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370
 - **Manual cron runs persist output and metadata like scheduled runs** — manual WebUI cron runs called `cron.scheduler.run_job(job)` and then only cleared the in-memory running flag. The job's output was dropped (never written via `save_job_output`) and `last_run_at` / `last_status` were never updated. Now the manual-run wrapper (`_run_cron_tracked`) matches the scheduled-cron path at `cron/scheduler.py:1334-1364` exactly: saves output, marks the job complete, treats empty `final_response` as a soft failure (with the same error string), and records failures via `mark_job_run(False, str(e))`. (`api/routes.py`, `tests/test_cron_manual_run_persistence.py`) @NocGeek — PR #1372 (split out from the held #1352 per pre-release feedback)
+- **Reasoning trace, tool calls, and partial output preserved on Stop/Cancel** — three distinct data-loss paths fixed: §A reasoning text accumulated in a thread-local `_reasoning_text` was invisible to `cancel_stream()` because it went out of scope when the thread was interrupted; §B live tool calls in thread-local `_live_tool_calls` were similarly lost; §C reasoning-only streams (no visible tokens) produced no partial assistant message because the thinking-block regex strip returned empty string and the `if _stripped:` guard skipped the append. The fix mirrors the existing `STREAM_PARTIAL_TEXT` pattern (#893) by adding two new shared dicts (`STREAM_REASONING_TEXT`, `STREAM_LIVE_TOOL_CALLS`) populated during streaming and read by `cancel_stream()`. The cancel path now appends the partial assistant message when content text, reasoning trace, OR tool calls exist (not just text). Eliminates "paid tokens disappeared" reports on Stop. 8 regression tests covering all three sections plus tools+text combinations. (`api/config.py`, `api/streaming.py`, `tests/test_issue1361_cancel_data_loss.py`) @bergeouss — PR #1375, fixes #1361
+- **New profiles route sessions to the profile dir on first use, not back to default** — `get_hermes_home_for_profile()` had a `if profile_dir.is_dir(): return profile_dir; return _DEFAULT_HERMES_HOME` fallback. New profiles (no session yet, so no dir) routed every session back to default until the directory existed on disk — making profile switching silently broken for the first session of every new profile. Removed the `is_dir()` guard; the profile path is now returned unconditionally and the directory is created on first use by the agent/session layer. Path traversal is still blocked by the `_PROFILE_ID_RE` regex (`^[a-z0-9][a-z0-9_-]{0,63}$`); R19j tests were updated to pin that the regex is now the sole defense. R19c was tightened to assert the new behavior. 5 regression tests in `test_issue1195_session_profile_routing.py` covering existing-profile, non-existent-profile (the core fix), None, empty-string, and 'default' return paths. (`api/profiles.py`, `tests/test_issue798.py`, `tests/test_issue1195_session_profile_routing.py`) @bergeouss — PR #1373, fixes #1195
 
 
 ## [v0.50.250] — 2026-04-30
diff --git a/tests/test_issue798.py b/tests/test_issue798.py
index fef8a00b..702bef9f 100644
--- a/tests/test_issue798.py
+++ b/tests/test_issue798.py
@@ -40,13 +40,18 @@ def test_get_hermes_home_for_profile_returns_profile_subdir(tmp_path, monkeypatc
     assert result == profile_dir
 
 
-def test_get_hermes_home_for_profile_falls_back_for_missing_profile(tmp_path, monkeypatch):
-    """R19c: Named profile that does not exist falls back to base home."""
+def test_get_hermes_home_for_profile_returns_profile_path_for_missing_profile(tmp_path, monkeypatch):
+    """R19c: Named profile that does not exist on disk now returns the
+    profile-scoped path (created on first use by the agent layer), NOT the
+    base home. Tightened in v0.50.251 / PR #1373 to fix #1195: the previous
+    is_dir() fallback caused new profiles to silently route every session
+    back to the default profile until the directory existed on disk.
+    Path traversal is still blocked by the _PROFILE_ID_RE regex (R19j)."""
     import api.profiles as p
 
     monkeypatch.setattr(p, '_DEFAULT_HERMES_HOME', tmp_path)
     result = p.get_hermes_home_for_profile('ghost')
-    assert result == tmp_path
+    assert result == tmp_path / 'profiles' / 'ghost'
 
 
 def test_get_hermes_home_for_profile_does_not_mutate_globals():
@@ -171,8 +176,10 @@ def test_sessions_js_sends_profile_in_new_session_post():
 
 def test_get_hermes_home_for_profile_rejects_path_traversal():
     """R19j: get_hermes_home_for_profile() must reject names that don't match
-    _PROFILE_ID_RE (e.g. path traversal like '../../etc') and return the base home.
-    The regex guard is defence-in-depth on top of the is_dir() fallback."""
+    _PROFILE_ID_RE (e.g. path traversal like '../../etc') and return the base
+    home. After v0.50.251 / PR #1373 removed the is_dir() fallback, the regex
+    is the SOLE guard against path traversal — verify each known-bad shape
+    still returns the base home, not a traversed path."""
     import api.profiles as p
     base = p._DEFAULT_HERMES_HOME
     assert p.get_hermes_home_for_profile('../../etc') == base
@@ -180,7 +187,8 @@ def test_get_hermes_home_for_profile_rejects_path_traversal():
     assert p.get_hermes_home_for_profile('/absolute/path') == base
     assert p.get_hermes_home_for_profile('has spaces') == base
     assert p.get_hermes_home_for_profile('UPPERCASE') == base
-    # Valid names still work
-    assert p.get_hermes_home_for_profile('alice') == base   # nonexistent → fallback
-    assert p.get_hermes_home_for_profile('my-profile') == base
-    assert p.get_hermes_home_for_profile('profile_1') == base
+    # Valid names now route to the profile-scoped path (created on first use).
+    # Previously these returned `base` because no profile dir existed on disk.
+    assert p.get_hermes_home_for_profile('alice') == base / 'profiles' / 'alice'
+    assert p.get_hermes_home_for_profile('my-profile') == base / 'profiles' / 'my-profile'
+    assert p.get_hermes_home_for_profile('profile_1') == base / 'profiles' / 'profile_1'

From f53556b3ff1098935b56318a01d9499388eff61e Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 23:43:23 +0000
Subject: [PATCH 8/8] fix(cancel-stream): rename tool_calls to
 _partial_tool_calls (Opus MUST-FIX)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Opus pass-2 review of v0.50.251 caught a critical regression in PR
#1375:

The cancel-partial message stored captured tool calls under the
'tool_calls' key. That key is whitelisted by _API_SAFE_MSG_KEYS so
_sanitize_messages_for_api forwarded the entries to the next-turn
LLM call. But the captured entries use the WebUI internal shape
({name, args, done, duration, is_error}) — they don't have the
OpenAI/Anthropic id + function: {name, arguments} envelope. Strict
providers (OpenAI, Anthropic, Z.AI/GLM) would 400 on the malformed
entries. Net effect: the very cancel-then-continue scenario PR
#1375 aimed to improve becomes a hard fail.

Fix:
- Rename the persisted key to '_partial_tool_calls' (underscore-
  prefixed private key NOT in _API_SAFE_MSG_KEYS, so sanitize
  correctly strips it).
- Update static/messages.js hasMessageToolMetadata check to also
  recognize _partial_tool_calls for UI rendering.
- Update test_issue1361_cancel_data_loss.py assertion to check
  _partial_tool_calls (and tool_calls as legacy fallback).

Plus 2 NIT fixes from the same Opus review:

NIT 1 (api/profiles.py:153): re.match → re.fullmatch for consistency
with other _PROFILE_ID_RE callers in the codebase. The trailing-
newline footgun ($ matches before final \n in re.match) is now
closed. Without #1373's is_dir() guard, a name like 'valid\n' would
have created a directory named 'valid\n' on Linux. Doesn't escape
<HERMES_HOME>/profiles/ via Path joining, but unintended.

NIT 2 (test_issue798.py): R19j coverage gaps — added trailing-
newline tests, length-boundary tests (64-char valid, 65-char
rejected), single-char minimum, and non-ASCII / Unicode-trick tests.

New regression test (tests/test_pr1375_partial_tool_calls_sanitize.py):
- test_partial_tool_calls_field_not_forwarded_to_llm: pins that
  sanitize-for-API strips _partial_tool_calls + reasoning + does
  NOT have tool_calls on a partial message
- test_legitimate_tool_calls_are_preserved_for_completed_turns:
  pins that real OpenAI-shape tool_calls on completed turns survive
  sanitize unchanged

Tests: 3486 passing (3484 → 3486, +2 sanitize tests).
---
 api/profiles.py                               |   2 +-
 api/streaming.py                              |  18 ++-
 static/messages.js                            |   7 +-
 tests/test_issue1361_cancel_data_loss.py      |   4 +-
 tests/test_issue798.py                        |  14 +++
 ...test_pr1375_partial_tool_calls_sanitize.py | 105 ++++++++++++++++++
 6 files changed, 145 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_pr1375_partial_tool_calls_sanitize.py

diff --git a/api/profiles.py b/api/profiles.py
index f87ac985..fc94a336 100644
--- a/api/profiles.py
+++ b/api/profiles.py
@@ -150,7 +150,7 @@ def get_hermes_home_for_profile(name: str) -> Path:
     empty, 'default', or does not match the profile-name format (rejects path
     traversal such as '../../etc').
     """
-    if not name or name == 'default' or not _PROFILE_ID_RE.match(name):
+    if not name or name == 'default' or not _PROFILE_ID_RE.fullmatch(name):
         return _DEFAULT_HERMES_HOME
     profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name
     return profile_dir
diff --git a/api/streaming.py b/api/streaming.py
index e6c6700c..4ed12b83 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -2782,7 +2782,23 @@ def cancel_stream(stream_id: str) -> bool:
                     if _has_reasoning:
                         _partial_msg['reasoning'] = _cancel_reasoning.strip()
                     if _has_tools:
-                        _partial_msg['tool_calls'] = list(_cancel_tool_calls)
+                        # NOTE: store under the private '_partial_tool_calls' key
+                        # (NOT 'tool_calls'). The captured entries use the WebUI
+                        # internal shape {name, args, done, duration, is_error}
+                        # — they do NOT carry the OpenAI/Anthropic API id +
+                        # function: {name, arguments} envelope. If we put them
+                        # under 'tool_calls', `_sanitize_messages_for_api`
+                        # (which whitelists 'tool_calls' via _API_SAFE_MSG_KEYS)
+                        # would forward them to the next-turn LLM call and
+                        # strict providers (OpenAI, Anthropic, Z.AI/GLM) would
+                        # 400 on the malformed entries — turning a "data lost
+                        # on cancel" bug into a "next message returns 400"
+                        # bug, which is worse. The underscore-prefixed key is
+                        # not in the whitelist, so sanitize strips it. The UI
+                        # reads it via static/messages.js and renders it
+                        # alongside the regular tool_calls path.
+                        # (Opus pre-release review pass 2 of v0.50.251.)
+                        _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls)
                     _cs.messages.append(_partial_msg)
                 # Cancel marker — flagged _error=True so it is stripped from conversation
                 # history on the next turn (prevents model from seeing "Task cancelled."
diff --git a/static/messages.js b/static/messages.js
index e9e9b868..7bbf8c19 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1072,9 +1072,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         S.session=session;S.messages=(session.messages||[]).filter(m=>m&&m.role);
         const hasMessageToolMetadata=S.messages.some(m=>{
           if(!m||m.role!=='assistant') return false;
+          // Recognize both the standard `tool_calls` (used by completed assistant
+          // turns where the LLM emitted tool_call entries) and the WebUI-internal
+          // `_partial_tool_calls` (used on Stop/Cancel partial messages — see
+          // api/streaming.py cancel_stream).
           const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0;
+          const hasPartialTc=Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0;
           const hasTu=Array.isArray(m.content)&&m.content.some(p=>p&&p.type==='tool_use');
-          return hasTc||hasTu;
+          return hasTc||hasPartialTc||hasTu;
         });
         if(!hasMessageToolMetadata&&session.tool_calls&&session.tool_calls.length){
           S.toolCalls=(session.tool_calls||[]).map(tc=>({...tc,done:true}));
diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py
index c7f08d52..cdf89dcd 100644
--- a/tests/test_issue1361_cancel_data_loss.py
+++ b/tests/test_issue1361_cancel_data_loss.py
@@ -213,9 +213,9 @@ class TestCancelPreservesToolCalls:
 
         s2 = models.SESSIONS[sid]
         assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
-        has_tools = any(m.get('tool_calls') or m.get('tools') for m in assistant_msgs)
+        has_tools = any(m.get('_partial_tool_calls') or m.get('tool_calls') or m.get('tools') for m in assistant_msgs)
         assert has_tools, \
-            f"Expected tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}"
+            f"Expected _partial_tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}"
 
     def test_cancel_with_tools_and_text_preserves_both(self):
         """Cancel after tools + partial text should keep both."""
diff --git a/tests/test_issue798.py b/tests/test_issue798.py
index 702bef9f..4207400b 100644
--- a/tests/test_issue798.py
+++ b/tests/test_issue798.py
@@ -192,3 +192,17 @@ def test_get_hermes_home_for_profile_rejects_path_traversal():
     assert p.get_hermes_home_for_profile('alice') == base / 'profiles' / 'alice'
     assert p.get_hermes_home_for_profile('my-profile') == base / 'profiles' / 'my-profile'
     assert p.get_hermes_home_for_profile('profile_1') == base / 'profiles' / 'profile_1'
+    # R19j coverage gaps closed in v0.50.251 per Opus pre-release review:
+    # - Trailing-newline names must be rejected (re.match would let them through;
+    #   re.fullmatch correctly anchors $). Catches the match-vs-fullmatch footgun.
+    assert p.get_hermes_home_for_profile('valid\n') == base
+    assert p.get_hermes_home_for_profile('a\n') == base
+    # - Length boundaries: 64 chars (max valid: 1 + 63 suffix) routes to profile path,
+    #   65 chars rejected.
+    assert p.get_hermes_home_for_profile('a' * 64) == base / 'profiles' / ('a' * 64)
+    assert p.get_hermes_home_for_profile('a' * 65) == base
+    # - Single-char name is the minimum valid form.
+    assert p.get_hermes_home_for_profile('a') == base / 'profiles' / 'a'
+    # - Non-ASCII / Unicode-trick names are rejected by the ASCII-only charset.
+    assert p.get_hermes_home_for_profile('voilà') == base
+    assert p.get_hermes_home_for_profile('名前') == base
diff --git a/tests/test_pr1375_partial_tool_calls_sanitize.py b/tests/test_pr1375_partial_tool_calls_sanitize.py
new file mode 100644
index 00000000..1c4a5ff6
--- /dev/null
+++ b/tests/test_pr1375_partial_tool_calls_sanitize.py
@@ -0,0 +1,105 @@
+"""Regression test for the v0.50.251 Opus pass-2 MUST-FIX on PR #1375.
+
+Original PR #1375 stored cancelled tool calls under the message key
+`tool_calls`. That key is whitelisted by `_API_SAFE_MSG_KEYS` so the
+sanitize-for-API path forwarded them to the next-turn LLM call. But the
+captured entries use the WebUI internal shape ({name, args, done,
+duration, is_error}) — they don't have OpenAI/Anthropic's id +
+function: {name, arguments} envelope. Strict providers (OpenAI,
+Anthropic, Z.AI/GLM) would 400 on the malformed entries — turning a
+"data lost on cancel" bug into a "next message returns 400" bug.
+
+The fix renames the key to `_partial_tool_calls` (underscore-prefixed
+private key NOT in the whitelist), so sanitize correctly strips it.
+The UI reads it via static/messages.js.
+
+This test pins the invariant: a partial assistant message with
+`_partial_tool_calls` set must produce ZERO `tool_calls` after
+sanitize-for-API.
+"""
+import pathlib
+import sys
+
+import pytest
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(REPO_ROOT))
+
+
+def test_partial_tool_calls_field_not_forwarded_to_llm():
+    """The `_partial_tool_calls` field must not survive _sanitize_messages_for_api.
+    Otherwise the malformed entries get sent to the LLM and cause 400 errors."""
+    from api.streaming import _sanitize_messages_for_api
+
+    messages = [
+        {"role": "user", "content": "do a search"},
+        {
+            "role": "assistant",
+            "content": "Looking up...",
+            "_partial": True,
+            "_partial_tool_calls": [
+                {"name": "web_search", "args": {"query": "x"}, "done": False},
+            ],
+            "reasoning": "Let me think about this",
+        },
+    ]
+    sanitized = _sanitize_messages_for_api(messages)
+    # The partial assistant message must NOT have _partial_tool_calls (private key).
+    # It must NOT have tool_calls (would bypass the rename and 400 the LLM).
+    # It must NOT have reasoning (not in whitelist).
+    assistant_msgs = [m for m in sanitized if m.get("role") == "assistant"]
+    assert assistant_msgs, "Sanitized output must include the assistant message"
+    for m in assistant_msgs:
+        assert "_partial_tool_calls" not in m, (
+            "Sanitize-for-API must strip _partial_tool_calls — it's a UI-only key. "
+            f"Got: {m}"
+        )
+        assert "tool_calls" not in m, (
+            "Sanitize-for-API must NOT have tool_calls on a partial message — the "
+            "captured entries use WebUI shape and would 400 on strict providers. "
+            f"Got: {m}"
+        )
+        assert "reasoning" not in m, (
+            "Sanitize-for-API must strip reasoning (not in _API_SAFE_MSG_KEYS). "
+            f"Got: {m}"
+        )
+
+
+def test_legitimate_tool_calls_are_preserved_for_completed_turns():
+    """Completed assistant turns with REAL tool_calls (with id + function envelope)
+    must still pass through sanitize unchanged. The rename only affects
+    cancel-partial messages, not normal completed turns."""
+    from api.streaming import _sanitize_messages_for_api
+
+    messages = [
+        {"role": "user", "content": "search"},
+        {
+            "role": "assistant",
+            "content": "I'll search.",
+            "tool_calls": [
+                {
+                    "id": "call_abc",
+                    "type": "function",
+                    "function": {"name": "web_search", "arguments": '{"query":"x"}'}
+                },
+            ],
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_abc",
+            "name": "web_search",
+            "content": "result",
+        },
+    ]
+    sanitized = _sanitize_messages_for_api(messages)
+    assistant_msgs = [m for m in sanitized if m.get("role") == "assistant"]
+    assert assistant_msgs, "Sanitized output must include the assistant message"
+    assert assistant_msgs[0].get("tool_calls"), (
+        "Legitimate tool_calls on completed turns must survive sanitize. "
+        f"Got: {assistant_msgs[0]}"
+    )
+    # Must still have the OpenAI envelope shape
+    tc = assistant_msgs[0]["tool_calls"][0]
+    assert "id" in tc and "function" in tc, (
+        f"tool_calls envelope must be preserved. Got: {tc}"
+    )