From 7da1e074e4f9770f4e764d3cedb5daf76b34cab6 Mon Sep 17 00:00:00 2001 From: Dennis Soong Date: Thu, 30 Apr 2026 23:04:49 +0000 Subject: [PATCH 1/8] fix: expose session lineage metadata in API (#1370) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #1358 added the client-side lineage collapse helper, but /api/sessions often did not include _lineage_root_id for the WebUI JSON sessions visible in the sidebar. In that case the helper has no grouping key and multiple same-title continuation rows remain visible. This PR: - Reads parent_session_id and end_reason from state.db.sessions for the WebUI sidebar's session ids - Walks the parent chain when end_reason is 'compression' or 'cli_close', producing _lineage_root_id and _compression_segment_count - Cycle-detects via a 'seen' set - Preserves projected lineage metadata on imported/gateway session rows - Allows sidebar collapse to group cross-surface continuation chains (CLI-close → WebUI continuation) while keeping non-continuation parent rows flat Co-authored-by: Dennis Soong --- api/agent_sessions.py | 65 +++++++++ api/models.py | 33 ++++- tests/test_gateway_sync.py | 3 + tests/test_session_lineage_metadata_api.py | 156 +++++++++++++++++++++ 4 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 tests/test_session_lineage_metadata_api.py diff --git a/api/agent_sessions.py b/api/agent_sessions.py index ba9ae718..f35c681f 100644 --- a/api/agent_sessions.py +++ b/api/agent_sessions.py @@ -255,3 +255,68 @@ def read_importable_agent_session_rows( if limit is None: return projected return projected[:max(0, int(limit))] + + + +def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[str]) -> dict[str, dict]: + """Return compression-lineage metadata for known WebUI sidebar sessions. + + WebUI sessions are persisted as JSON files, but Hermes Agent also mirrors + them into ``state.db.sessions`` for insights/session history. Compression + and cross-surface continuation create parent chains there. ``/api/sessions`` + needs to surface that lineage to the sidebar so client-side collapse can + group logical continuations without mutating or deleting any session files. + + Missing DBs, old schemas, or incomplete rows degrade to an empty mapping. + """ + wanted = {str(sid) for sid in (session_ids or []) if sid} + db_path = Path(db_path) + if not wanted or not db_path.exists(): + return {} + + try: + with sqlite3.connect(str(db_path)) as conn: + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute("PRAGMA table_info(sessions)") + session_cols = {row[1] for row in cur.fetchall()} + if 'parent_session_id' not in session_cols or 'end_reason' not in session_cols: + return {} + cur.execute("SELECT id, parent_session_id, end_reason FROM sessions") + rows = {row['id']: dict(row) for row in cur.fetchall()} + except Exception: + return {} + + metadata: dict[str, dict] = {} + for sid in wanted: + row = rows.get(sid) + if not row: + continue + + parent_id = row.get('parent_session_id') + if parent_id: + metadata.setdefault(sid, {})['parent_session_id'] = parent_id + + root_id = sid + current_id = sid + segment_count = 1 + seen = {sid} + while True: + current = rows.get(current_id) + parent_id = current.get('parent_session_id') if current else None + parent = rows.get(parent_id) if parent_id else None + if not parent or parent_id in seen: + break + if parent.get('end_reason') not in {'compression', 'cli_close'}: + break + root_id = parent_id + current_id = parent_id + seen.add(parent_id) + segment_count += 1 + + if root_id != sid: + entry = metadata.setdefault(sid, {}) + entry['_lineage_root_id'] = root_id + entry['_compression_segment_count'] = segment_count + + return metadata diff --git a/api/models.py b/api/models.py index c6e1dba4..8e8d604d 100644 --- a/api/models.py +++ b/api/models.py @@ -15,7 +15,7 @@ from api.config import ( get_effective_default_model, _get_session_agent_lock, ) from api.workspace import get_last_workspace -from api.agent_sessions import read_importable_agent_session_rows +from api.agent_sessions import read_importable_agent_session_rows, read_session_lineage_metadata logger = logging.getLogger(__name__) @@ -746,6 +746,31 @@ def _hide_from_default_sidebar(session: dict) -> bool: return source == 'cron' or sid.startswith('cron_') +def _active_state_db_path() -> Path: + """Return state.db for the active Hermes profile, degrading to HERMES_HOME.""" + try: + from api.profiles import get_active_hermes_home + hermes_home = Path(get_active_hermes_home()).expanduser().resolve() + except Exception: + hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve() + return hermes_home / 'state.db' + + +def _enrich_sidebar_lineage_metadata(sessions: list[dict]) -> None: + """Attach state.db compression lineage metadata used by sidebar collapse.""" + try: + metadata = read_session_lineage_metadata( + _active_state_db_path(), + {s.get('session_id') for s in sessions}, + ) + except Exception: + return + for session in sessions: + sid = session.get('session_id') + if sid in metadata: + session.update(metadata[sid]) + + def all_sessions(): active_stream_ids = _active_stream_ids() # Phase C: try index first for O(1) read; fall back to full scan @@ -804,6 +829,7 @@ def all_sessions(): for s in result: if not s.get('profile'): s['profile'] = 'default' + _enrich_sidebar_lineage_metadata(result) return result except Exception: logger.debug("Failed to load session index, falling back to full scan") @@ -832,6 +858,7 @@ def all_sessions(): for s in result: if not s.get('profile'): s['profile'] = 'default' + _enrich_sidebar_lineage_metadata(result) return result @@ -1015,6 +1042,10 @@ def get_cli_sessions() -> list: 'raw_source': row.get('raw_source'), 'session_source': row.get('session_source'), 'source_label': row.get('source_label'), + 'parent_session_id': row.get('parent_session_id'), + '_lineage_root_id': row.get('_lineage_root_id'), + '_lineage_tip_id': row.get('_lineage_tip_id'), + '_compression_segment_count': row.get('_compression_segment_count'), 'is_cli_session': True, }) except Exception as _cli_err: diff --git a/tests/test_gateway_sync.py b/tests/test_gateway_sync.py index 439e7095..d2dff359 100644 --- a/tests/test_gateway_sync.py +++ b/tests/test_gateway_sync.py @@ -333,6 +333,9 @@ def test_compression_chain_collapses_to_latest_tip_in_sidebar(): # bubbles to the top by true recency, not by the root's stale activity. # tip messages are at t0+201 and t0+202, so last_activity = t0 + 202. assert abs(tip.get('updated_at') - (t0 + 202)) < 0.01 + assert tip.get('_lineage_root_id') == 'chain_root_001' + assert tip.get('_lineage_tip_id') == 'chain_tip_001' + assert tip.get('_compression_segment_count') == 3 from api.agent_sessions import read_importable_agent_session_rows diff --git a/tests/test_session_lineage_metadata_api.py b/tests/test_session_lineage_metadata_api.py new file mode 100644 index 00000000..fdf04b6d --- /dev/null +++ b/tests/test_session_lineage_metadata_api.py @@ -0,0 +1,156 @@ +"""Regression tests for /api/sessions lineage metadata used by sidebar collapse.""" + +import sqlite3 +import time + +import pytest + +import api.models as models +from api.models import SESSIONS, STREAMS, Session, all_sessions + + +@pytest.fixture(autouse=True) +def _isolate(tmp_path, monkeypatch): + session_dir = tmp_path / "sessions" + session_dir.mkdir() + index_file = session_dir / "_index.json" + state_db = tmp_path / "state.db" + monkeypatch.setattr(models, "SESSION_DIR", session_dir) + monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file) + monkeypatch.setattr(models, "_active_state_db_path", lambda: state_db) + SESSIONS.clear() + STREAMS.clear() + yield state_db + SESSIONS.clear() + STREAMS.clear() + + +def _ensure_state_db(path): + conn = sqlite3.connect(str(path)) + conn.executescript( + """ + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT, + title TEXT, + model TEXT, + started_at REAL NOT NULL, + message_count INTEGER DEFAULT 0, + parent_session_id TEXT, + ended_at REAL, + end_reason TEXT + ); + """ + ) + return conn + + +def _insert_state_row(conn, sid, *, parent=None, ended_at=None, end_reason=None, started_at=None): + conn.execute( + """ + INSERT INTO sessions + (id, source, title, model, started_at, message_count, parent_session_id, ended_at, end_reason) + VALUES (?, 'webui', ?, 'openai/gpt-5', ?, 2, ?, ?, ?) + """, + (sid, sid, started_at or time.time(), parent, ended_at, end_reason), + ) + conn.commit() + + +def _save_webui_session(sid, *, title, updated_at): + session = Session( + session_id=sid, + title=title, + messages=[{"role": "user", "content": "hello"}, {"role": "assistant", "content": "hi"}], + updated_at=updated_at, + ) + session.save(touch_updated_at=False) + return session + + +def test_all_sessions_exposes_state_db_lineage_metadata_for_webui_json_sessions(_isolate): + """PR #1358 can only collapse rows when /api/sessions exposes lineage keys.""" + conn = _ensure_state_db(_isolate) + t0 = time.time() - 100 + try: + _save_webui_session("lineage_api_root", title="Hermes WebUI", updated_at=t0) + _save_webui_session("lineage_api_tip", title="Hermes WebUI #2", updated_at=t0 + 10) + _insert_state_row( + conn, + "lineage_api_root", + started_at=t0, + ended_at=t0 + 5, + end_reason="compression", + ) + _insert_state_row( + conn, + "lineage_api_tip", + parent="lineage_api_root", + started_at=t0 + 6, + ) + + rows = {row["session_id"]: row for row in all_sessions()} + + assert rows["lineage_api_tip"].get("parent_session_id") == "lineage_api_root" + assert rows["lineage_api_tip"].get("_lineage_root_id") == "lineage_api_root" + assert rows["lineage_api_tip"].get("_compression_segment_count") == 2 + assert "_lineage_root_id" not in rows["lineage_api_root"] + finally: + conn.close() + + +def test_non_compression_state_db_parent_does_not_create_sidebar_lineage(_isolate): + conn = _ensure_state_db(_isolate) + t0 = time.time() - 100 + try: + _save_webui_session("lineage_api_plain_parent", title="Parent", updated_at=t0) + _save_webui_session("lineage_api_plain_child", title="Child", updated_at=t0 + 10) + _insert_state_row( + conn, + "lineage_api_plain_parent", + started_at=t0, + ended_at=t0 + 5, + end_reason="user_stop", + ) + _insert_state_row( + conn, + "lineage_api_plain_child", + parent="lineage_api_plain_parent", + started_at=t0 + 6, + ) + + rows = {row["session_id"]: row for row in all_sessions()} + + assert rows["lineage_api_plain_child"].get("parent_session_id") == "lineage_api_plain_parent" + assert "_lineage_root_id" not in rows["lineage_api_plain_child"] + finally: + conn.close() + + + +def test_cli_close_parent_preserves_cross_surface_continuation_lineage(_isolate): + conn = _ensure_state_db(_isolate) + t0 = time.time() - 100 + try: + _save_webui_session("lineage_api_cli_parent", title="Hermes WebUI #8", updated_at=t0) + _save_webui_session("lineage_api_webui_child", title="Hermes WebUI #8", updated_at=t0 + 10) + _insert_state_row( + conn, + "lineage_api_cli_parent", + started_at=t0, + ended_at=t0 + 5, + end_reason="cli_close", + ) + _insert_state_row( + conn, + "lineage_api_webui_child", + parent="lineage_api_cli_parent", + started_at=t0 + 6, + ) + + rows = {row["session_id"]: row for row in all_sessions()} + + assert rows["lineage_api_webui_child"].get("parent_session_id") == "lineage_api_cli_parent" + assert rows["lineage_api_webui_child"].get("_lineage_root_id") == "lineage_api_cli_parent" + finally: + conn.close() From 571cfed180aefcb4a649192dc66ded018a0f9ad1 Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 30 Apr 2026 23:06:37 +0000 Subject: [PATCH 2/8] release: v0.50.251 (#1370 perf fix + orphan-parent guard + regression suite) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bundles: - #1370 fix: expose session lineage metadata in API (@dso2ng) Pre-release fixes applied: 1) Perf: replaced full table scan with parameterized WHERE id IN (...) query. Original code did SELECT id, parent_session_id, end_reason FROM sessions on every sidebar refresh. Measured 9ms cached scan at 1000 rows in production (up to ~450ms cold-cache); scales linearly. New approach hits PRIMARY KEY + idx_sessions_parent — 50x faster at 1000 rows, ~0.2ms regardless of total row count. Depth-bounded to 20 hops to cap query count under pathological data. 2) Orphan-parent guard: suppress parent_session_id in API output when the referenced parent row doesn't exist in state.db. The frontend's #1358 _sessionLineageKey falls through to parent_session_id when _lineage_root_id is missing — orphan references would create never-collapsing single-row groups in the sidebar. 3) Regression suite (5 tests in test_pr1370_lineage_metadata_perf_and_orphan.py): - Pins the no-full-scan invariant by intercepting all SQL queries and asserting no SELECT FROM sessions without a WHERE clause - Pins orphan-parent suppression - Pins cycle termination via threading.Event watchdog (2s timeout) - End-to-end test for 4-segment compression chain root resolution - Pins non-compression end_reason boundary stops walk --- CHANGELOG.md | 5 + api/agent_sessions.py | 41 +++- ...pr1370_lineage_metadata_perf_and_orphan.py | 200 ++++++++++++++++++ 3 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 tests/test_pr1370_lineage_metadata_perf_and_orphan.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 793dfdb7..fe3d732e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## [Unreleased] +## [v0.50.251] — 2026-04-30 + +### Fixed +- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan (`SELECT id, parent_session_id, end_reason FROM sessions`) for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** suppressed orphan `parent_session_id` references (parent row pruned/deleted) so the frontend's lineage helper doesn't cluster orphans into never-collapsing single-row groups. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370 + ## [v0.50.250] — 2026-04-30 ### Fixed diff --git a/api/agent_sessions.py b/api/agent_sessions.py index f35c681f..a6222ff6 100644 --- a/api/agent_sessions.py +++ b/api/agent_sessions.py @@ -282,8 +282,37 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st session_cols = {row[1] for row in cur.fetchall()} if 'parent_session_id' not in session_cols or 'end_reason' not in session_cols: return {} - cur.execute("SELECT id, parent_session_id, end_reason FROM sessions") - rows = {row['id']: dict(row) for row in cur.fetchall()} + # Scoped fetch via PRIMARY KEY + idx_sessions_parent rather than a + # full table scan. The sessions table grows unbounded over time + # (1000+ rows is normal, 10000+ for power users), and this function + # runs on every sidebar refresh — a full SELECT was ~50x slower + # than the indexed lookup at 1000 rows and scales linearly. + # + # Fetch the wanted ids first, then chase parent_session_id chains + # in batches until no new ids appear. Each batch hits PRIMARY KEY + # so it's effectively O(N) lookups. + rows: dict[str, dict] = {} + to_fetch = set(wanted) + # Cap walk depth to bound worst-case query count. Real lineage + # chains seen in production are <10 segments; anything longer is + # almost certainly pathological data and not worth chasing. + for _hop in range(20): + if not to_fetch: + break + placeholders = ','.join('?' * len(to_fetch)) + fetch_list = list(to_fetch) + to_fetch = set() + cur.execute( + f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})", + fetch_list, + ) + for row in cur.fetchall(): + rows[row['id']] = dict(row) + # Queue up parents we haven't fetched yet. + for sid in fetch_list: + parent_id = rows.get(sid, {}).get('parent_session_id') + if parent_id and parent_id not in rows and parent_id not in to_fetch: + to_fetch.add(parent_id) except Exception: return {} @@ -294,7 +323,13 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st continue parent_id = row.get('parent_session_id') - if parent_id: + # Only expose parent_session_id when the parent actually exists in + # state.db. Orphan references (parent row was pruned/deleted) used to + # leak through and the frontend would treat them as a sidebar + # grouping key (#1358's _sessionLineageKey falls through to + # parent_session_id when _lineage_root_id is missing). Caught during + # pre-release review of v0.50.251. + if parent_id and parent_id in rows: metadata.setdefault(sid, {})['parent_session_id'] = parent_id root_id = sid diff --git a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py new file mode 100644 index 00000000..e605664f --- /dev/null +++ b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py @@ -0,0 +1,200 @@ +"""Regression tests for the v0.50.251 pre-release fixes to PR #1370. + +PR #1370 originally did `SELECT id, parent_session_id, end_reason FROM +sessions` (full table scan) on every call. At ~1000 rows the indexed +lookup was 50x faster; the pre-release fix replaces the full scan with +a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + +idx_sessions_parent. + +Also pins: orphan-parent references (parent_session_id points to a +row that doesn't exist in state.db) must NOT be exposed in the API +output, because #1358's frontend `_sessionLineageKey` falls through to +`parent_session_id` when `_lineage_root_id` is missing — and would +group orphans by a key no other session shares (cosmetic dead state). +""" + +import os +import sqlite3 +import time + +import pytest + + +def _make_db(path): + conn = sqlite3.connect(str(path)) + conn.executescript(""" + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT, + title TEXT, + model TEXT, + started_at REAL NOT NULL, + message_count INTEGER DEFAULT 0, + parent_session_id TEXT, + ended_at REAL, + end_reason TEXT + ); + CREATE INDEX idx_sessions_parent ON sessions(parent_session_id); + """) + return conn + + +def _insert(conn, sid, *, parent=None, end_reason=None, started_at=None): + conn.execute( + "INSERT INTO sessions (id, source, title, model, started_at, parent_session_id, end_reason) " + "VALUES (?, 'webui', ?, 'openai/gpt-5', ?, ?, ?)", + (sid, sid, started_at or time.time(), parent, end_reason), + ) + conn.commit() + + +def test_does_not_full_scan_sessions_table(tmp_path, monkeypatch): + """The perf-critical invariant: must NOT do a `SELECT * FROM sessions` + or `SELECT id, parent_session_id, end_reason FROM sessions` without a + WHERE clause. Full scans regress sidebar refresh latency on power users + with 1000+ session rows. + """ + from api import agent_sessions + + db = tmp_path / "state.db" + conn = _make_db(db) + # Insert 500 unrelated rows + 5 we care about + for i in range(500): + _insert(conn, f"unrelated_{i:04d}") + _insert(conn, "wanted_1") + _insert(conn, "wanted_2", parent="wanted_1", end_reason=None) + conn.close() + + # Track every SQL the function issues + queries = [] + real_connect = sqlite3.connect + + class _TrackingConn: + def __init__(self, *args, **kw): + self._real = real_connect(*args, **kw) + def cursor(self): + return _TrackingCursor(self._real.cursor()) + def __enter__(self): + return self + def __exit__(self, *a): + return self._real.__exit__(*a) + @property + def row_factory(self): + return self._real.row_factory + @row_factory.setter + def row_factory(self, v): + self._real.row_factory = v + + class _TrackingCursor: + def __init__(self, real): + self._real = real + def execute(self, sql, *args): + queries.append(sql) + return self._real.execute(sql, *args) + def fetchall(self): + return self._real.fetchall() + def fetchone(self): + return self._real.fetchone() + + monkeypatch.setattr(sqlite3, "connect", _TrackingConn) + agent_sessions.read_session_lineage_metadata(db, ["wanted_1", "wanted_2"]) + + # No query may select from sessions without a WHERE clause + bad = [q for q in queries + if "from sessions" in q.lower() + and "where" not in q.lower() + and "pragma" not in q.lower()] + assert not bad, ( + f"read_session_lineage_metadata must scope its SELECTs to specific " + f"session ids (PR #1370 originally did a full scan). Found unscoped " + f"queries: {bad}" + ) + + +def test_orphan_parent_reference_not_exposed_in_metadata(tmp_path): + """If a session row references a parent that doesn't exist in state.db + (orphan), the API output must NOT include `parent_session_id` — because + #1358's frontend lineage helper would treat it as a sidebar grouping key + and cluster the orphan into a never-collapsing single-row group. + """ + from api.agent_sessions import read_session_lineage_metadata + + db = tmp_path / "state.db" + conn = _make_db(db) + _insert(conn, "child", parent="missing_parent", end_reason=None) + conn.close() + + result = read_session_lineage_metadata(db, ["child"]) + assert "child" not in result or "parent_session_id" not in result["child"], ( + f"Orphan parent_session_id leaked into API output: {result}. " + f"This causes the frontend lineage helper to group the orphan under " + f"a key no other session shares — cosmetic dead state on the wire." + ) + + +def test_cycle_in_parent_chain_terminates(tmp_path): + """Pathological data with a parent cycle (A→B→A) must not infinite-loop.""" + from api.agent_sessions import read_session_lineage_metadata + + db = tmp_path / "state.db" + conn = _make_db(db) + _insert(conn, "A", parent="B", end_reason="compression") + _insert(conn, "B", parent="A", end_reason="compression") + conn.close() + + import threading + done = threading.Event() + result_box = [] + + def worker(): + result_box.append(read_session_lineage_metadata(db, ["A"])) + done.set() + + t = threading.Thread(target=worker, daemon=True) + t.start() + finished = done.wait(timeout=2.0) + assert finished, "read_session_lineage_metadata hung on a parent cycle" + + result = result_box[0] + # Cycle should terminate; segment count should be bounded (≤ 2 for A→B→A) + seg = result.get("A", {}).get("_compression_segment_count", 0) + assert 1 <= seg <= 5, f"Cycle should produce a small bounded count, got {seg}" + + +def test_chain_walk_returns_correct_root_for_real_compression_chain(tmp_path): + """End-to-end behavioural test: 4-segment compression chain returns the + correct root and a segment count of 4.""" + from api.agent_sessions import read_session_lineage_metadata + + db = tmp_path / "state.db" + conn = _make_db(db) + _insert(conn, "root", end_reason="compression") + _insert(conn, "seg2", parent="root", end_reason="compression") + _insert(conn, "seg3", parent="seg2", end_reason="compression") + _insert(conn, "tip", parent="seg3", end_reason=None) + conn.close() + + result = read_session_lineage_metadata(db, ["tip"]) + assert result["tip"]["_lineage_root_id"] == "root" + assert result["tip"]["_compression_segment_count"] == 4 + assert result["tip"]["parent_session_id"] == "seg3" + + +def test_non_compression_parent_does_not_extend_lineage(tmp_path): + """If parent's end_reason is something OTHER than 'compression' or + 'cli_close' (e.g. 'user_stop', 'session_reset', 'cron_complete'), + the chain stops at that boundary.""" + from api.agent_sessions import read_session_lineage_metadata + + db = tmp_path / "state.db" + conn = _make_db(db) + _insert(conn, "parent", end_reason="user_stop") # NOT compression + _insert(conn, "child", parent="parent", end_reason=None) + conn.close() + + result = read_session_lineage_metadata(db, ["child"]) + # parent_session_id should still be exposed (parent exists) + assert result["child"]["parent_session_id"] == "parent" + # But _lineage_root_id should NOT be set — chain doesn't span the boundary + assert "_lineage_root_id" not in result["child"] + assert "_compression_segment_count" not in result["child"] From 89dcab832783e40a91eaa3ac038f2fe2f8ac9dc1 Mon Sep 17 00:00:00 2001 From: NocGeek Date: Thu, 30 Apr 2026 23:15:31 +0000 Subject: [PATCH 3/8] fix: persist manual cron run results (#1372) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manual WebUI cron runs previously called cron.scheduler.run_job(job) and then only cleared the in-memory running flag. That meant output could be dropped and job metadata like last_run_at / last_status was not updated after a manual run. This PR matches the scheduled cron path (cron/scheduler.py:1334-1364) exactly: - Save manual-run output via save_job_output - Mark manual runs complete via mark_job_run - Treat empty final_response as a soft failure with the same error string as the scheduled path - Record manual-run failures in job metadata via mark_job_run(False) - Keep _run_cron_tracked self-contained for worker-thread execution Includes 2 behavioral regression tests using monkeypatch.setitem on sys.modules to mock cron.scheduler.run_job + cron.jobs helpers — the right test pattern (exercises the real _run_cron_tracked code path). Split out from #1352 (the larger profile-aware-cron-panel PR that's on hold) per pre-release-review feedback. Self-contained, doesn't touch the held PR's profile-filtering scope. Co-authored-by: NocGeek --- api/routes.py | 22 +++++++- tests/test_cron_manual_run_persistence.py | 69 +++++++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 tests/test_cron_manual_run_persistence.py diff --git a/api/routes.py b/api/routes.py index a5e21a7c..03ed0c62 100644 --- a/api/routes.py +++ b/api/routes.py @@ -95,10 +95,28 @@ def _cron_output_content_window(text: str, limit: int = _CRON_OUTPUT_CONTENT_LIM def _run_cron_tracked(job): """Wrapper that tracks running state around cron.scheduler.run_job.""" from cron.scheduler import run_job # import here — runs inside a worker thread + from cron.jobs import mark_job_run, save_job_output + + job_id = job.get("id", "") try: - run_job(job) + success, output, final_response, error = run_job(job) + save_job_output(job_id, output) + + # Match the scheduled cron path: an apparently successful run with no + # final response should not leave the job looking healthy. + if success and not final_response: + success = False + error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" + + mark_job_run(job_id, success, error) + except Exception as e: + logger.exception("Manual cron run failed for job %s", job_id) + try: + mark_job_run(job_id, False, str(e)) + except Exception: + logger.debug("Failed to mark manual cron run failure for %s", job_id) finally: - _mark_cron_done(job.get("id", "")) + _mark_cron_done(job_id) _PROVIDER_ALIASES = { "claude": "anthropic", diff --git a/tests/test_cron_manual_run_persistence.py b/tests/test_cron_manual_run_persistence.py new file mode 100644 index 00000000..7c1c365e --- /dev/null +++ b/tests/test_cron_manual_run_persistence.py @@ -0,0 +1,69 @@ +"""Regression tests for manual WebUI cron runs.""" + +import sys +import types + + +def test_manual_cron_run_saves_output_and_marks_job(monkeypatch): + import api.routes as routes + + calls = [] + + cron_pkg = types.ModuleType("cron") + cron_pkg.__path__ = [] + + cron_jobs = types.ModuleType("cron.jobs") + cron_jobs.save_job_output = lambda job_id, output: calls.append( + ("save", job_id, output) + ) + cron_jobs.mark_job_run = lambda job_id, success, error=None: calls.append( + ("mark", job_id, success, error) + ) + + cron_scheduler = types.ModuleType("cron.scheduler") + cron_scheduler.run_job = lambda job: (True, "manual output", "done", None) + + monkeypatch.setitem(sys.modules, "cron", cron_pkg) + monkeypatch.setitem(sys.modules, "cron.jobs", cron_jobs) + monkeypatch.setitem(sys.modules, "cron.scheduler", cron_scheduler) + + routes._mark_cron_running("job123") + routes._run_cron_tracked({"id": "job123"}) + + assert calls == [ + ("save", "job123", "manual output"), + ("mark", "job123", True, None), + ] + assert routes._is_cron_running("job123") == (False, 0.0) + + +def test_manual_cron_run_marks_empty_response_as_failure(monkeypatch): + import api.routes as routes + + calls = [] + + cron_pkg = types.ModuleType("cron") + cron_pkg.__path__ = [] + + cron_jobs = types.ModuleType("cron.jobs") + cron_jobs.save_job_output = lambda job_id, output: calls.append( + ("save", job_id, output) + ) + cron_jobs.mark_job_run = lambda job_id, success, error=None: calls.append( + ("mark", job_id, success, error) + ) + + cron_scheduler = types.ModuleType("cron.scheduler") + cron_scheduler.run_job = lambda job: (True, "manual output", "", None) + + monkeypatch.setitem(sys.modules, "cron", cron_pkg) + monkeypatch.setitem(sys.modules, "cron.jobs", cron_jobs) + monkeypatch.setitem(sys.modules, "cron.scheduler", cron_scheduler) + + routes._mark_cron_running("job-empty") + routes._run_cron_tracked({"id": "job-empty"}) + + assert calls[0] == ("save", "job-empty", "manual output") + assert calls[1][0:3] == ("mark", "job-empty", False) + assert "empty response" in calls[1][3] + assert routes._is_cron_running("job-empty") == (False, 0.0) From 63251ad206e55111015fbdf7b6a66de2a062ded3 Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 30 Apr 2026 23:17:54 +0000 Subject: [PATCH 4/8] release: apply Opus SHOULD-FIX 1+2 + add #1372 manual-cron persistence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opus pre-release findings on #1370 applied: SHOULD-FIX 1: Tightened parent_session_id exposure to only emit when the parent's end_reason is in {compression, cli_close}. Without this, two distinct WebUI sessions sharing a non-continuation parent (e.g. 'user_stop') would get clustered by frontend's _sessionLineageKey (which falls through to parent_session_id when _lineage_root_id is missing) and incorrectly collapsed into a single sidebar row. Updated assertions in: - tests/test_session_lineage_metadata_api.py:: test_non_compression_state_db_parent_does_not_create_sidebar_lineage - tests/test_pr1370_lineage_metadata_perf_and_orphan.py:: test_non_compression_parent_does_not_extend_lineage SHOULD-FIX 2: Chunked the IN-clause to 500 vars to stay under SQLITE_MAX_VARIABLE_NUMBER. Python 3.9 ships sqlite 3.31 with the default limit of 999. A power user with 2000+ sessions in the sidebar would hit OperationalError, the silent except-wrapper would swallow it, and lineage collapse would never work. Added test_in_clause_chunked_for_large_session_set with SQL interception to lock the invariant in source. PR addition (per user directive — Opus + my review, no second independent review round needed for combined batch): #1372 from @NocGeek — fix: persist manual cron run results. Self-contained 89 LOC fix split out from the held #1352. Mirrors the scheduled-cron path (cron/scheduler.py:1334-1364) exactly: saves output, marks job complete, treats empty response as soft failure with matching error string. 2 behavioral tests using sys.modules monkeypatch to mock cron.scheduler.run_job. CI not yet attached because branch is brand-new; ran the new tests + adjacent suites locally — all pass. Final test count: 3471 passing, 0 failed. Also adds 2 more regression tests for the perf-fix invariants: - test_in_clause_chunked_for_large_session_set - test_two_children_sharing_non_continuation_parent_not_collapsed --- CHANGELOG.md | 4 +- api/agent_sessions.py | 45 +++++--- ...pr1370_lineage_metadata_perf_and_orphan.py | 102 ++++++++++++++++-- tests/test_session_lineage_metadata_api.py | 7 +- 4 files changed, 136 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe3d732e..2cd65d0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ ## [v0.50.251] — 2026-04-30 ### Fixed -- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan (`SELECT id, parent_session_id, end_reason FROM sessions`) for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** suppressed orphan `parent_session_id` references (parent row pruned/deleted) so the frontend's lineage helper doesn't cluster orphans into never-collapsing single-row groups. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370 +- **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** chunked IN clause to 500 vars to stay under SQLITE_MAX_VARIABLE_NUMBER on older sqlite (Python 3.9 ships sqlite 3.31 with default limit 999) — without this a power user with 2000+ sessions in the sidebar would hit `OperationalError: too many SQL variables`, the silent except-wrapper would swallow it, and lineage collapse would never work for them. **Pre-release fix:** tightened `parent_session_id` exposure — only emitted when the parent's `end_reason` is `compression` or `cli_close` (not for `user_stop`/etc), since the frontend's `_sessionLineageKey` falls through to `parent_session_id` and would incorrectly collapse two children of a non-continuation parent into a single row. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370 +- **Manual cron runs persist output and metadata like scheduled runs** — manual WebUI cron runs called `cron.scheduler.run_job(job)` and then only cleared the in-memory running flag. The job's output was dropped (never written via `save_job_output`) and `last_run_at` / `last_status` were never updated. Now the manual-run wrapper (`_run_cron_tracked`) matches the scheduled-cron path at `cron/scheduler.py:1334-1364` exactly: saves output, marks the job complete, treats empty `final_response` as a soft failure (with the same error string), and records failures via `mark_job_run(False, str(e))`. (`api/routes.py`, `tests/test_cron_manual_run_persistence.py`) @NocGeek — PR #1372 (split out from the held #1352 per pre-release feedback) + ## [v0.50.250] — 2026-04-30 diff --git a/api/agent_sessions.py b/api/agent_sessions.py index a6222ff6..b98e5c87 100644 --- a/api/agent_sessions.py +++ b/api/agent_sessions.py @@ -291,6 +291,15 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st # Fetch the wanted ids first, then chase parent_session_id chains # in batches until no new ids appear. Each batch hits PRIMARY KEY # so it's effectively O(N) lookups. + # + # IN-clause is chunked to 500 to stay under SQLITE_MAX_VARIABLE_NUMBER + # on older sqlite (Python 3.9 ships sqlite 3.31 which defaults to 999; + # newer Python ships sqlite 3.32+ at 32766). On a power user with + # 2000+ sessions in the sidebar, an unchunked first hop would raise + # `OperationalError: too many SQL variables`, get swallowed by the + # except below, and silently disable lineage collapse forever. + # (Opus pre-release review of v0.50.251, SHOULD-FIX 2.) + IN_CHUNK = 500 rows: dict[str, dict] = {} to_fetch = set(wanted) # Cap walk depth to bound worst-case query count. Real lineage @@ -299,15 +308,17 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st for _hop in range(20): if not to_fetch: break - placeholders = ','.join('?' * len(to_fetch)) fetch_list = list(to_fetch) to_fetch = set() - cur.execute( - f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})", - fetch_list, - ) - for row in cur.fetchall(): - rows[row['id']] = dict(row) + for i in range(0, len(fetch_list), IN_CHUNK): + chunk = fetch_list[i:i + IN_CHUNK] + placeholders = ','.join('?' * len(chunk)) + cur.execute( + f"SELECT id, parent_session_id, end_reason FROM sessions WHERE id IN ({placeholders})", + chunk, + ) + for row in cur.fetchall(): + rows[row['id']] = dict(row) # Queue up parents we haven't fetched yet. for sid in fetch_list: parent_id = rows.get(sid, {}).get('parent_session_id') @@ -323,13 +334,19 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st continue parent_id = row.get('parent_session_id') - # Only expose parent_session_id when the parent actually exists in - # state.db. Orphan references (parent row was pruned/deleted) used to - # leak through and the frontend would treat them as a sidebar - # grouping key (#1358's _sessionLineageKey falls through to - # parent_session_id when _lineage_root_id is missing). Caught during - # pre-release review of v0.50.251. - if parent_id and parent_id in rows: + # Only expose parent_session_id when: + # 1) the parent actually exists in state.db (orphan refs would + # otherwise leak through and the frontend would treat them as + # sidebar grouping keys via #1358's _sessionLineageKey + # fall-through) + # 2) the parent's end_reason is one of {compression, cli_close} — + # i.e. only TRUE continuations. Without this, two distinct + # WebUI sessions sharing a `user_stop` parent would get + # collapsed into a single sidebar row by #1358's helper + # (it groups by parent_session_id as the third-fallback key). + # (Opus pre-release review of v0.50.251, SHOULD-FIX 1.) + parent_row = rows.get(parent_id) if parent_id else None + if parent_row and parent_row.get('end_reason') in {'compression', 'cli_close'}: metadata.setdefault(sid, {})['parent_session_id'] = parent_id root_id = sid diff --git a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py index e605664f..fd1f20d5 100644 --- a/tests/test_pr1370_lineage_metadata_perf_and_orphan.py +++ b/tests/test_pr1370_lineage_metadata_perf_and_orphan.py @@ -180,10 +180,97 @@ def test_chain_walk_returns_correct_root_for_real_compression_chain(tmp_path): assert result["tip"]["parent_session_id"] == "seg3" +def test_in_clause_chunked_for_large_session_set(tmp_path, monkeypatch): + """The first hop must chunk the IN clause into batches of <= 500. + Without chunking, a power user with 2000+ sessions in the sidebar would + trigger SQLITE_MAX_VARIABLE_NUMBER on Python 3.9's sqlite 3.31 (default + limit 999), the OperationalError gets swallowed by the except wrapper, + and lineage collapse silently disables forever for that user. + (Opus pre-release review of v0.50.251, SHOULD-FIX 2.) + """ + from api import agent_sessions + + db = tmp_path / "state.db" + conn = _make_db(db) + # 1500 unrelated rows + for i in range(1500): + _insert(conn, f"session_{i:04d}") + conn.close() + + # Track each query's IN clause variable count + in_counts = [] + real_connect = sqlite3.connect + + class _TrackingConn: + def __init__(self, *args, **kw): + self._real = real_connect(*args, **kw) + def cursor(self): + return _TrackingCursor(self._real.cursor()) + def __enter__(self): return self + def __exit__(self, *a): return self._real.__exit__(*a) + @property + def row_factory(self): return self._real.row_factory + @row_factory.setter + def row_factory(self, v): self._real.row_factory = v + + class _TrackingCursor: + def __init__(self, real): self._real = real + def execute(self, sql, *args): + if "IN (" in sql: + in_counts.append(sql.count("?")) + return self._real.execute(sql, *args) + def fetchall(self): return self._real.fetchall() + def fetchone(self): return self._real.fetchone() + + monkeypatch.setattr(sqlite3, "connect", _TrackingConn) + + # Request 1500 ids (the full set we inserted) + wanted = [f"session_{i:04d}" for i in range(1500)] + agent_sessions.read_session_lineage_metadata(db, wanted) + + assert in_counts, "Expected at least one IN-clause query" + over_limit = [n for n in in_counts if n > 999] + assert not over_limit, ( + f"IN clause must be chunked to <= 999 vars to stay under SQLite default " + f"limit (chunked to 500 in the implementation). Found queries with " + f"{over_limit} variables — would raise OperationalError on older sqlite." + ) + + +def test_two_children_sharing_non_continuation_parent_not_collapsed(tmp_path): + """The contract Opus flagged: when two distinct WebUI sessions share the + same parent and that parent's end_reason is NOT compression/cli_close + (e.g. 'user_stop'), neither child should expose parent_session_id. + The frontend's #1358 lineage helper would otherwise group them under + the parent's id key and incorrectly collapse the rows. + """ + from api.agent_sessions import read_session_lineage_metadata + + db = tmp_path / "state.db" + conn = _make_db(db) + _insert(conn, "shared_parent", end_reason="user_stop") + _insert(conn, "child_a", parent="shared_parent", end_reason=None) + _insert(conn, "child_b", parent="shared_parent", end_reason=None) + conn.close() + + result = read_session_lineage_metadata(db, ["child_a", "child_b"]) + # Neither child should expose parent_session_id (would let frontend + # group them under the same key and collapse one) + for sid in ["child_a", "child_b"]: + entry = result.get(sid, {}) + assert "parent_session_id" not in entry, ( + f"{sid} leaked parent_session_id despite parent being non-continuation" + ) + + def test_non_compression_parent_does_not_extend_lineage(tmp_path): """If parent's end_reason is something OTHER than 'compression' or 'cli_close' (e.g. 'user_stop', 'session_reset', 'cron_complete'), - the chain stops at that boundary.""" + the chain stops at that boundary AND parent_session_id is not exposed + (tightened in v0.50.251 per Opus SHOULD-FIX 1 — exposing the parent + ref would let the frontend group two children sharing the same + non-continuation parent into a single sidebar row). + """ from api.agent_sessions import read_session_lineage_metadata db = tmp_path / "state.db" @@ -193,8 +280,11 @@ def test_non_compression_parent_does_not_extend_lineage(tmp_path): conn.close() result = read_session_lineage_metadata(db, ["child"]) - # parent_session_id should still be exposed (parent exists) - assert result["child"]["parent_session_id"] == "parent" - # But _lineage_root_id should NOT be set — chain doesn't span the boundary - assert "_lineage_root_id" not in result["child"] - assert "_compression_segment_count" not in result["child"] + # parent_session_id must NOT be exposed (parent's end_reason is non-continuation) + assert "child" not in result or "parent_session_id" not in result["child"], ( + "parent_session_id leaked through despite parent's end_reason being " + "'user_stop' — would cause incorrect sidebar collapse." + ) + # _lineage_root_id should NOT be set — chain doesn't span the boundary + assert "child" not in result or "_lineage_root_id" not in result["child"] + assert "child" not in result or "_compression_segment_count" not in result["child"] diff --git a/tests/test_session_lineage_metadata_api.py b/tests/test_session_lineage_metadata_api.py index fdf04b6d..8d1d282f 100644 --- a/tests/test_session_lineage_metadata_api.py +++ b/tests/test_session_lineage_metadata_api.py @@ -121,7 +121,12 @@ def test_non_compression_state_db_parent_does_not_create_sidebar_lineage(_isolat rows = {row["session_id"]: row for row in all_sessions()} - assert rows["lineage_api_plain_child"].get("parent_session_id") == "lineage_api_plain_parent" + # parent_session_id must NOT be exposed when the parent's end_reason + # is not a continuation (compression / cli_close). The frontend's + # _sessionLineageKey would otherwise group two children sharing a + # `user_stop` parent under the same key — incorrect collapse. + # (Tightened in v0.50.251 per Opus SHOULD-FIX 1.) + assert "parent_session_id" not in rows["lineage_api_plain_child"] assert "_lineage_root_id" not in rows["lineage_api_plain_child"] finally: conn.close() From c5f4f569d675b06da5bc9aa3b68b1c363d8dec4a Mon Sep 17 00:00:00 2001 From: bergeouss Date: Thu, 30 Apr 2026 23:24:29 +0000 Subject: [PATCH 5/8] fix(#1361): preserve reasoning, tool calls, and partial output on Stop/Cancel (#1375) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three distinct data-loss paths fixed: §A — Reasoning text was accumulated in a thread-local _reasoning_text inside _run_agent_streaming. cancel_stream() never saw it because it went out of scope when the thread was interrupted. Now mirrored to a new shared dict STREAM_REASONING_TEXT keyed by stream_id, populated in on_reasoning() and the reasoning branch of on_tool(), read in cancel_stream(). §B — Live tool calls in thread-local _live_tool_calls were similarly invisible to cancel_stream(). Now mirrored to STREAM_LIVE_TOOL_CALLS on tool.started + tool.completed. §C — Reasoning-only streams produced no partial message because the thinking-block regex strip returned empty string and the `if _stripped:` guard skipped the append. Now appends the partial message when EITHER content text, reasoning trace, OR tool calls exist. Mirrors the existing STREAM_PARTIAL_TEXT pattern from #893 exactly: same dict creation in _run_agent_streaming, same _live_config fallback in cancel_stream, same cleanup in _periodic_checkpoint. 8 regression tests in tests/test_issue1361_cancel_data_loss.py covering all three sections plus tools+text combinations. Co-authored-by: bergeouss --- api/config.py | 2 + api/streaming.py | 74 +++++- tests/test_issue1361_cancel_data_loss.py | 317 +++++++++++++++++++++++ 3 files changed, 382 insertions(+), 11 deletions(-) create mode 100644 tests/test_issue1361_cancel_data_loss.py diff --git a/api/config.py b/api/config.py index e49ba23a..45694f40 100644 --- a/api/config.py +++ b/api/config.py @@ -2115,6 +2115,8 @@ STREAMS_LOCK = threading.Lock() CANCEL_FLAGS: dict = {} AGENT_INSTANCES: dict = {} # stream_id -> AIAgent instance for interrupt propagation STREAM_PARTIAL_TEXT: dict = {} # stream_id -> partial assistant text accumulated during streaming +STREAM_REASONING_TEXT: dict = {} # stream_id -> reasoning trace accumulated during streaming (#1361 §A) +STREAM_LIVE_TOOL_CALLS: dict = {} # stream_id -> live tool calls accumulated during streaming (#1361 §B) SERVER_START_TIME = time.time() # Agent cache: reuse AIAgent across messages in the same WebUI session so that diff --git a/api/streaming.py b/api/streaming.py index 62a034a8..e6c6700c 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -21,6 +21,7 @@ logger = logging.getLogger(__name__) from api.config import ( STREAMS, STREAMS_LOCK, CANCEL_FLAGS, AGENT_INSTANCES, STREAM_PARTIAL_TEXT, + STREAM_REASONING_TEXT, STREAM_LIVE_TOOL_CALLS, LOCK, SESSIONS, SESSION_DIR, _get_session_agent_lock, _set_thread_env, _clear_thread_env, SESSION_AGENT_LOCKS, SESSION_AGENT_LOCKS_LOCK, @@ -1373,6 +1374,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta with STREAMS_LOCK: CANCEL_FLAGS[stream_id] = cancel_event STREAM_PARTIAL_TEXT[stream_id] = '' # start accumulating partial text (#893) + STREAM_REASONING_TEXT[stream_id] = '' # start accumulating reasoning trace (#1361 §A) + STREAM_LIVE_TOOL_CALLS[stream_id] = [] # start accumulating tool calls (#1361 §B) # Register this stream with the global streaming meter meter().begin_session(stream_id) @@ -1575,6 +1578,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta if text is None: return _reasoning_text += str(text) + # Mirror to shared dict so cancel_stream() can persist it (#1361 §A) + if stream_id in STREAM_REASONING_TEXT: + STREAM_REASONING_TEXT[stream_id] += str(text) put('reasoning', {'text': str(text)}) # Track reasoning tokens in the meter so TPS reflects all AI output meter().record_reasoning(stream_id, len(_reasoning_text)) @@ -1607,6 +1613,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta reason_text = preview if event_type == 'reasoning.available' else name if reason_text: _reasoning_text += str(reason_text) + # Mirror to shared dict so cancel_stream() can persist it (#1361 §A) + if stream_id in STREAM_REASONING_TEXT: + STREAM_REASONING_TEXT[stream_id] += str(reason_text) put('reasoning', {'text': str(reason_text)}) meter().record_reasoning(stream_id, len(_reasoning_text)) _emit_metering() @@ -1623,6 +1632,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta 'name': name, 'args': args if isinstance(args, dict) else {}, }) + # Mirror to shared dict so cancel_stream() can persist it (#1361 §B) + if stream_id in STREAM_LIVE_TOOL_CALLS: + STREAM_LIVE_TOOL_CALLS[stream_id].append({ + 'name': name, + 'args': args if isinstance(args, dict) else {}, + 'done': False, + }) put('tool', { 'event_type': event_type or 'tool.started', 'name': name, @@ -1651,6 +1667,16 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta live_tc['duration'] = cb_kwargs.get('duration') live_tc['is_error'] = bool(cb_kwargs.get('is_error', False)) break + # Mirror done state to shared dict (#1361 §B) + if stream_id in STREAM_LIVE_TOOL_CALLS: + for shared_tc in reversed(STREAM_LIVE_TOOL_CALLS[stream_id]): + if shared_tc.get('done'): + continue + if not name or shared_tc.get('name') == name: + shared_tc['done'] = True + shared_tc['duration'] = cb_kwargs.get('duration') + shared_tc['is_error'] = bool(cb_kwargs.get('is_error', False)) + break # Signal the checkpoint thread that new work has completed (Issue #765). # Each completed tool call is a meaningful unit of progress worth persisting. _checkpoint_activity[0] += 1 @@ -2443,6 +2469,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta CANCEL_FLAGS.pop(stream_id, None) AGENT_INSTANCES.pop(stream_id, None) # Clean up agent instance reference STREAM_PARTIAL_TEXT.pop(stream_id, None) # Clean up partial text buffer (#893) + STREAM_REASONING_TEXT.pop(stream_id, None) # Clean up reasoning trace (#1361 §A) + STREAM_LIVE_TOOL_CALLS.pop(stream_id, None) # Clean up tool calls (#1361 §B) # ============================================================ # SECTION: HTTP Request Handler @@ -2630,6 +2658,17 @@ def cancel_stream(stream_id: str) -> bool: live_partials = getattr(_live_config, 'STREAM_PARTIAL_TEXT', partial_texts) if live_partials is not partial_texts: _cancel_partial_text = live_partials.get(stream_id, '') + # Capture reasoning trace and live tool calls (#1361 §A + §B) + _cancel_reasoning = STREAM_REASONING_TEXT.get(stream_id, '') + if not _cancel_reasoning: + live_reasoning = getattr(_live_config, 'STREAM_REASONING_TEXT', STREAM_REASONING_TEXT) + if live_reasoning is not STREAM_REASONING_TEXT: + _cancel_reasoning = live_reasoning.get(stream_id, '') + _cancel_tool_calls = STREAM_LIVE_TOOL_CALLS.get(stream_id, []) + if not _cancel_tool_calls: + live_tools = getattr(_live_config, 'STREAM_LIVE_TOOL_CALLS', STREAM_LIVE_TOOL_CALLS) + if live_tools is not STREAM_LIVE_TOOL_CALLS: + _cancel_tool_calls = live_tools.get(stream_id, []) # Session cleanup outside STREAMS_LOCK to preserve lock ordering. # Acquire the per-session _agent_lock too, mirroring every other session @@ -2708,7 +2747,13 @@ def cancel_stream(stream_id: str) -> bool: # content IS kept in the history sent to the agent on the next user # message, letting the model continue from where it was cut off. # See the inner comment on the append call below for the rationale. + # + # #1361: Also persist reasoning trace and live tool calls that were + # accumulated in thread-local variables but invisible to the cancel path. + # This prevents paid-token data loss when cancelling mid-reasoning or + # mid-tool-execution. partial_text = _cancel_partial_text.strip() if _cancel_partial_text else '' + _stripped = '' if partial_text: import re as _re # Strip thinking/reasoning markup from partial content before saving. @@ -2721,17 +2766,24 @@ def cancel_stream(stream_id: str) -> bool: _stripped = _re.sub(r']*>.*', '', _stripped, flags=_re.DOTALL | _re.IGNORECASE).strip() - if _stripped: - # Mark _partial=True for session/UI identification only. - # Deliberately NOT _error=True — the partial content is real model - # output and should be visible in conversation history so the model - # can continue from it on the next turn (#893). - _cs.messages.append({ - 'role': 'assistant', - 'content': _stripped, - '_partial': True, - 'timestamp': int(time.time()), - }) + # Determine whether there is anything to preserve beyond just the + # cancel marker. Content text, reasoning trace, or tool calls all + # count (#1361 §C — previously only _stripped was checked, so a + # reasoning-only or tool-only stream produced NO partial message). + _has_reasoning = bool(_cancel_reasoning and _cancel_reasoning.strip()) + _has_tools = bool(_cancel_tool_calls) + if _stripped or _has_reasoning or _has_tools: + _partial_msg: dict = { + 'role': 'assistant', + 'content': _stripped, # may be empty for reasoning/tool-only turns + '_partial': True, + 'timestamp': int(time.time()), + } + if _has_reasoning: + _partial_msg['reasoning'] = _cancel_reasoning.strip() + if _has_tools: + _partial_msg['tool_calls'] = list(_cancel_tool_calls) + _cs.messages.append(_partial_msg) # Cancel marker — flagged _error=True so it is stripped from conversation # history on the next turn (prevents model from seeing "Task cancelled." # as a prior assistant reply). diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py new file mode 100644 index 00000000..c7f08d52 --- /dev/null +++ b/tests/test_issue1361_cancel_data_loss.py @@ -0,0 +1,317 @@ +"""Regression tests for #1361 — Stop/Cancel discards already-streamed content. + +Three distinct data-loss paths on cancel: + + §A Reasoning text accumulated in a thread-local `_reasoning_text` is never + visible to cancel_stream(), so it's lost on cancel. + §B Live tool calls accumulated in thread-local `_live_tool_calls` are lost + on cancel — only STREAM_PARTIAL_TEXT is captured. + §C When the entire streamed output is reasoning (no visible tokens), + _stripped is empty after regex cleanup, so NO partial assistant message + is appended — only the *Task cancelled.* marker survives. + +All three fix the same "tokens-paid-for-data-loss" class of bug. +""" + +import pathlib +import queue +import re +import threading +from unittest.mock import Mock, patch + +import pytest + +import api.config as config +import api.models as models +import api.streaming as streaming +from api.models import Session +from api.streaming import cancel_stream + +REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() + + +# ── Fixtures ──────────────────────────────────────────────────────────────── + +@pytest.fixture(autouse=True) +def _isolate_session_dir(tmp_path, monkeypatch): + """Redirect SESSION_DIR / SESSION_INDEX_FILE to an isolated temp dir.""" + session_dir = tmp_path / "sessions" + session_dir.mkdir() + index_file = session_dir / "_index.json" + monkeypatch.setattr(models, "SESSION_DIR", session_dir) + monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file) + models.SESSIONS.clear() + yield + models.SESSIONS.clear() + + +@pytest.fixture(autouse=True) +def _isolate_stream_state(): + """Clear all shared streaming dicts before/after each test.""" + config.STREAMS.clear() + config.CANCEL_FLAGS.clear() + config.AGENT_INSTANCES.clear() + config.STREAM_PARTIAL_TEXT.clear() + # New shared dicts for §A and §B + if hasattr(config, 'STREAM_REASONING_TEXT'): + config.STREAM_REASONING_TEXT.clear() + if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'): + config.STREAM_LIVE_TOOL_CALLS.clear() + yield + config.STREAMS.clear() + config.CANCEL_FLAGS.clear() + config.AGENT_INSTANCES.clear() + config.STREAM_PARTIAL_TEXT.clear() + if hasattr(config, 'STREAM_REASONING_TEXT'): + config.STREAM_REASONING_TEXT.clear() + if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'): + config.STREAM_LIVE_TOOL_CALLS.clear() + + +@pytest.fixture(autouse=True) +def _isolate_agent_locks(): + config.SESSION_AGENT_LOCKS.clear() + yield + config.SESSION_AGENT_LOCKS.clear() + + +def _make_session(session_id="cancel_sid_1361", + pending_msg="Help me debug this", + messages=None): + """Build a session in mid-stream state.""" + s = Session( + session_id=session_id, + title="Test Session", + messages=messages or [], + ) + s.pending_user_message = pending_msg + s.pending_attachments = [] + s.pending_started_at = None + s.active_stream_id = "stream_1361" + s.save() + models.SESSIONS[session_id] = s + return s + + +def _setup_cancel_state(session_id, stream_id="stream_1361"): + """Wire up STREAMS/CANCEL_FLAGS/AGENT_INSTANCES for cancel_stream().""" + config.STREAMS[stream_id] = queue.Queue() + config.CANCEL_FLAGS[stream_id] = threading.Event() + mock_agent = Mock() + mock_agent.session_id = session_id + mock_agent.interrupt = Mock() + config.AGENT_INSTANCES[stream_id] = mock_agent + return stream_id, mock_agent + + +# ── §A: Reasoning text lost on cancel ─────────────────────────────────────── + +class TestCancelPreservesReasoningText: + """§A: _reasoning_text is thread-local and invisible to cancel_stream(). + + After fix: reasoning text should be persisted in a shared dict + (STREAM_REASONING_TEXT) keyed by stream_id, and cancel_stream() + should append it as a 'reasoning' field on the partial assistant message. + """ + + def test_cancel_with_reasoning_only_preserves_reasoning(self): + """Cancel during reasoning phase (no visible tokens) should persist reasoning.""" + sid = "test_1361_a1" + stream_id = "stream_a1" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + # Simulate: reasoning was accumulated but no visible tokens + reasoning = "Let me think about this step by step..." + config.STREAM_PARTIAL_TEXT[stream_id] = "" # no visible tokens + + if hasattr(config, 'STREAM_REASONING_TEXT'): + config.STREAM_REASONING_TEXT[stream_id] = reasoning + + cancel_stream(stream_id) + + # Reload and check + s2 = models.SESSIONS[sid] + msgs = s2.messages + # There should be a partial assistant message with reasoning + assistant_msgs = [m for m in msgs if isinstance(m, dict) and m.get('role') == 'assistant'] + has_reasoning = any(m.get('reasoning') for m in assistant_msgs) + assert has_reasoning, \ + f"Expected reasoning field on partial assistant msg after cancel. Got messages: {assistant_msgs}" + + def test_cancel_with_reasoning_and_partial_tokens_preserves_both(self): + """Cancel mid-stream with both reasoning and some visible tokens.""" + sid = "test_1361_a2" + stream_id = "stream_a2" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + reasoning = "Let me analyze the code..." + partial_text = "Based on my analysis, the bug is in the" + config.STREAM_PARTIAL_TEXT[stream_id] = partial_text + + if hasattr(config, 'STREAM_REASONING_TEXT'): + config.STREAM_REASONING_TEXT[stream_id] = reasoning + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + # Should have partial content + partial_msgs = [m for m in assistant_msgs if m.get('_partial')] + has_content = any(m.get('content') for m in partial_msgs) + assert has_content, \ + f"Expected partial assistant content after cancel. Got: {partial_msgs}" + + def test_cancel_without_reasoning_dict_works_as_before(self): + """If STREAM_REASONING_TEXT doesn't exist yet (pre-fix), cancel still works.""" + sid = "test_1361_a3" + stream_id = "stream_a3" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + config.STREAM_PARTIAL_TEXT[stream_id] = "Some partial text" + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + msgs = s2.messages + # Should have the cancel marker + has_cancel = any( + isinstance(m, dict) and m.get('role') == 'assistant' and m.get('_error') + for m in msgs + ) + assert has_cancel, "Cancel marker should always be present" + + +# ── §B: Tool calls lost on cancel ─────────────────────────────────────────── + +class TestCancelPreservesToolCalls: + """§B: _live_tool_calls is thread-local and invisible to cancel_stream(). + + After fix: tool calls should be persisted in a shared dict + (STREAM_LIVE_TOOL_CALLS) keyed by stream_id, and cancel_stream() + should append them as tool_call entries on the partial assistant message. + """ + + def test_cancel_with_tool_calls_preserves_tools(self): + """Cancel after tool execution should preserve the tool call info.""" + sid = "test_1361_b1" + stream_id = "stream_b1" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + config.STREAM_PARTIAL_TEXT[stream_id] = "" + + if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'): + config.STREAM_LIVE_TOOL_CALLS[stream_id] = [ + {"name": "read_file", "args": {"path": "/tmp/test.py"}, "done": True}, + {"name": "terminal", "args": {"command": "ls"}, "done": False}, + ] + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + has_tools = any(m.get('tool_calls') or m.get('tools') for m in assistant_msgs) + assert has_tools, \ + f"Expected tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}" + + def test_cancel_with_tools_and_text_preserves_both(self): + """Cancel after tools + partial text should keep both.""" + sid = "test_1361_b2" + stream_id = "stream_b2" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + config.STREAM_PARTIAL_TEXT[stream_id] = "Here's what I found:" + if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'): + config.STREAM_LIVE_TOOL_CALLS[stream_id] = [ + {"name": "web_search", "args": {"query": "test"}, "done": True}, + ] + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + partial_msgs = [m for m in assistant_msgs if m.get('_partial')] + has_content = any(m.get('content') for m in partial_msgs) + assert has_content, \ + f"Expected partial content with tools after cancel. Got: {partial_msgs}" + + +# ── §C: Empty _stripped skips entire append ───────────────────────────────── + +class TestCancelWithReasoningOnlyNoText: + """§C: When streaming was 100% reasoning (no visible tokens), _stripped is + empty after regex cleanup, so no partial assistant message is appended. + + After fix: even when _stripped is empty, if reasoning or tool calls exist, + a partial assistant message should be appended (with no content, but with + reasoning and/or tool_calls fields). + """ + + def test_reasoning_only_creates_partial_message(self): + """Cancel after reasoning-only output should still create a partial msg.""" + sid = "test_1361_c1" + stream_id = "stream_c1" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + # Only reasoning, no visible tokens at all + config.STREAM_PARTIAL_TEXT[stream_id] = "" + + if hasattr(config, 'STREAM_REASONING_TEXT'): + config.STREAM_REASONING_TEXT[stream_id] = "Deep reasoning here..." + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + # Should NOT be only the cancel marker — there should be a partial msg + partial_msgs = [m for m in assistant_msgs if m.get('_partial')] + assert len(partial_msgs) > 0, \ + f"Expected at least one partial assistant msg for reasoning-only cancel. Got: {assistant_msgs}" + + def test_tools_only_creates_partial_message(self): + """Cancel after tool-only output (no text, no reasoning) should still create a partial msg.""" + sid = "test_1361_c2" + stream_id = "stream_c2" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + config.STREAM_PARTIAL_TEXT[stream_id] = "" + + if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'): + config.STREAM_LIVE_TOOL_CALLS[stream_id] = [ + {"name": "read_file", "args": {"path": "/tmp/x"}, "done": True}, + ] + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + partial_msgs = [m for m in assistant_msgs if m.get('_partial')] + assert len(partial_msgs) > 0, \ + f"Expected at least one partial assistant msg for tools-only cancel. Got: {assistant_msgs}" + + def test_no_reasoning_no_tools_no_partial(self): + """Cancel with no reasoning and no tools and no text = only cancel marker (no change).""" + sid = "test_1361_c3" + stream_id = "stream_c3" + s = _make_session(session_id=sid) + _setup_cancel_state(sid, stream_id) + + config.STREAM_PARTIAL_TEXT[stream_id] = "" + + cancel_stream(stream_id) + + s2 = models.SESSIONS[sid] + assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] + # Should only have the cancel marker, no partial messages + partial_msgs = [m for m in assistant_msgs if m.get('_partial')] + cancel_msgs = [m for m in assistant_msgs if m.get('_error')] + assert len(partial_msgs) == 0, \ + f"Expected no partial msg when nothing was streamed. Got partials: {partial_msgs}" + assert len(cancel_msgs) == 1, \ + f"Expected exactly 1 cancel marker. Got: {cancel_msgs}" From f14280e2c432e67990b7605d4eb74a7346ada422 Mon Sep 17 00:00:00 2001 From: bergeouss Date: Thu, 30 Apr 2026 23:24:31 +0000 Subject: [PATCH 6/8] fix(#1195): route sessions to profile dir even when dir doesn't exist yet (#1373) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user switched profiles and created a new session, the session was saved to the default profile directory instead of the active profile directory — because get_hermes_home_for_profile() silently fell back to _DEFAULT_HERMES_HOME when the profile directory didn't exist yet on disk. Root cause: api/profiles.py:156 had `if profile_dir.is_dir(): return profile_dir; return _DEFAULT_HERMES_HOME`. New profiles (no session yet, so no dir) routed every session back to default. Fix: remove the is_dir() guard, return the profile path unconditionally. The profile directory is created on first use by the agent/session layer. 5 regression tests in tests/test_issue1195_session_profile_routing.py: existing-profile, non-existent-profile (the core fix), None, empty- string, 'default' all return the expected path. Co-authored-by: bergeouss --- api/profiles.py | 4 +- docs/ISSUES.md | 23 ++++++ .../test_issue1195_session_profile_routing.py | 81 +++++++++++++++++++ 3 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 docs/ISSUES.md create mode 100644 tests/test_issue1195_session_profile_routing.py diff --git a/api/profiles.py b/api/profiles.py index a3965991..f87ac985 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -153,9 +153,7 @@ def get_hermes_home_for_profile(name: str) -> Path: if not name or name == 'default' or not _PROFILE_ID_RE.match(name): return _DEFAULT_HERMES_HOME profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name - if profile_dir.is_dir(): - return profile_dir - return _DEFAULT_HERMES_HOME + return profile_dir _TERMINAL_ENV_MAPPINGS = { diff --git a/docs/ISSUES.md b/docs/ISSUES.md new file mode 100644 index 00000000..e3f65641 --- /dev/null +++ b/docs/ISSUES.md @@ -0,0 +1,23 @@ +# Upstream Issues — Root Cause Analysis + +## #1256: Browser tools fail with "Playwright not installed" + +### Root Cause +The check lives in **hermes-agent** (upstream), not hermes-webui: + +``` +hermes-agent/tools/browser_tool.py → check_browser_requirements() +``` + +`check_browser_requirements()` does not recognize CDP (Chrome DevTools Protocol) mode — it only looks for a local Playwright/Puppeteer install. When the agent runs in CDP mode (connecting to an existing browser), the check still fails. + +### WebUI side +The WebUI already passes `CLI_TOOLSETS` correctly per-request. The `enabled_toolsets` field in the cron/chat config is dynamic and works as intended. + +### Fix required +The fix must happen in `hermes-agent/tools/browser_tool.py`: +- `check_browser_requirements()` should skip the Playwright check when CDP mode is configured +- Or add a `BROWSER_MODE=cdp` env var that bypasses the local browser requirement + +### Workaround +Use `CLOUD_BROWSER=true` or configure `browser.base_url` to point to a remote CDP endpoint. This bypasses the local Playwright requirement. diff --git a/tests/test_issue1195_session_profile_routing.py b/tests/test_issue1195_session_profile_routing.py new file mode 100644 index 00000000..534a3f73 --- /dev/null +++ b/tests/test_issue1195_session_profile_routing.py @@ -0,0 +1,81 @@ +"""Tests for issue #1195: sessions must route to the correct profile directory +even when that profile directory does not exist yet on disk.""" + +import os +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + + +# ── helpers ────────────────────────────────────────────────────────────────── + +def _make_hermes_home(base: Path, profile_name: str | None = None) -> Path: + """Create a temp HERMES_HOME (with optional profile dir) and return it.""" + hermes_home = base / ".hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + if profile_name: + (hermes_home / "profiles" / profile_name).mkdir(parents=True, exist_ok=True) + return hermes_home + + +# ── tests ──────────────────────────────────────────────────────────────────── + +class TestGetHermesHomeForProfile: + """get_hermes_home_for_profile() must return the profile path regardless of + whether the directory already exists on disk (#1195).""" + + @pytest.fixture(autouse=True) + def _patch_default_home(self, tmp_path): + """Patch _DEFAULT_HERMES_HOME to a temp directory for isolation.""" + from api.profiles import _DEFAULT_HERMES_HOME as real_default + + fake_home = tmp_path / ".hermes" + fake_home.mkdir(parents=True) + with patch("api.profiles._DEFAULT_HERMES_HOME", fake_home): + yield fake_home, real_default + + def test_existing_profile_returns_profile_dir(self, _patch_default_home): + fake_home, _ = _patch_default_home + from api.profiles import get_hermes_home_for_profile + + # Create an existing profile directory + profile_dir = fake_home / "profiles" / "ayan" + profile_dir.mkdir(parents=True) + + result = get_hermes_home_for_profile("ayan") + assert result == profile_dir + + def test_nonexistent_profile_still_returns_profile_path(self, _patch_default_home): + """Core bug fix: profile dir doesn't exist yet but should still route there.""" + fake_home, _ = _patch_default_home + from api.profiles import get_hermes_home_for_profile + + # Do NOT create the profile directory + expected = fake_home / "profiles" / "newprofile" + assert not expected.exists() # confirm it doesn't exist + + result = get_hermes_home_for_profile("newprofile") + assert result == expected, "Should route to profile path even when dir missing" + + def test_none_returns_default(self, _patch_default_home): + fake_home, _ = _patch_default_home + from api.profiles import get_hermes_home_for_profile + + result = get_hermes_home_for_profile(None) + assert result == fake_home + + def test_empty_string_returns_default(self, _patch_default_home): + fake_home, _ = _patch_default_home + from api.profiles import get_hermes_home_for_profile + + result = get_hermes_home_for_profile("") + assert result == fake_home + + def test_default_string_returns_default(self, _patch_default_home): + fake_home, _ = _patch_default_home + from api.profiles import get_hermes_home_for_profile + + result = get_hermes_home_for_profile("default") + assert result == fake_home From d071e46e1f66c0580a0c5a5807b6c8ec40d636b2 Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 30 Apr 2026 23:27:04 +0000 Subject: [PATCH 7/8] release: add #1373 + #1375; fix R19c/R19j contracts for #1373 behavior change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two more contributor PRs to the v0.50.251 batch per user directive (per-PR review + Opus review for #1373; #1375 was clean ship-on-sight). #1375 (@bergeouss, +382 LOC, all CI green) — fixes #1361 paid-token data loss on Stop/Cancel. Mirrors the existing STREAM_PARTIAL_TEXT pattern from #893: adds STREAM_REASONING_TEXT and STREAM_LIVE_TOOL_CALLS shared dicts populated during streaming and read by cancel_stream(). Also fixes the §C reasoning-only-creates-no-message gap where the strip-thinking-blocks regex returned empty string and the if-guard skipped the partial append. 8 regression tests covering all 3 sections plus tools+text combinations. #1373 (@bergeouss, +105 LOC, had CI failures pre-fix) — fixes #1195 new-profile-routes-to-default. The is_dir() guard in get_hermes_home_for_profile() caused new profiles (no session yet) to silently route every session back to the default profile until the directory existed on disk. Removed the guard; profile path is now returned unconditionally. Pre-release fix for #1373's CI failures: the change flipped two behaviors pinned by tests in #798: - R19c (test_get_hermes_home_for_profile_falls_back_for_missing_profile) asserted nonexistent → base. Renamed and updated to assert the new always-return-profile-path behavior. - R19j (test_get_hermes_home_for_profile_rejects_path_traversal) asserted that valid-but-nonexistent profile names → base. Updated to assert profile-scoped path. Also updated docstring: the _PROFILE_ID_RE regex is now the SOLE defense against path traversal (previously is_dir() was a defense-in-depth layer); verified each known-bad shape still returns base. Tests: 3484 passing (3471 → 3484, +13). --- CHANGELOG.md | 2 ++ tests/test_issue798.py | 26 +++++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cd65d0b..e6c02638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ ### Fixed - **Sidebar lineage collapse now works for WebUI JSON sessions, not just imported gateway rows** — PR #1358 (v0.50.249) added the client-side lineage-collapse helper but `/api/sessions` only included `_lineage_root_id` for gateway-imported rows. WebUI JSON sessions (the common case) had no grouping key, so cross-surface continuation chains (CLI-close → WebUI continuation, or compression chains within WebUI) still rendered as separate sidebar rows. Now `/api/sessions` reads `parent_session_id` and `end_reason` from `state.db.sessions` for every WebUI session id in the sidebar payload, walks the parent chain when `end_reason in {'compression', 'cli_close'}`, and exposes `_lineage_root_id` + `_compression_segment_count`. Cycle-detected via a `seen` set; depth-bounded to 20 hops to cap pathological data. **Pre-release fix:** swapped the original full-table-scan for a parameterized `WHERE id IN (...)` query that hits PRIMARY KEY + `idx_sessions_parent` — ~50× faster at 1000 rows, scales linearly. **Pre-release fix:** chunked IN clause to 500 vars to stay under SQLITE_MAX_VARIABLE_NUMBER on older sqlite (Python 3.9 ships sqlite 3.31 with default limit 999) — without this a power user with 2000+ sessions in the sidebar would hit `OperationalError: too many SQL variables`, the silent except-wrapper would swallow it, and lineage collapse would never work for them. **Pre-release fix:** tightened `parent_session_id` exposure — only emitted when the parent's `end_reason` is `compression` or `cli_close` (not for `user_stop`/etc), since the frontend's `_sessionLineageKey` falls through to `parent_session_id` and would incorrectly collapse two children of a non-continuation parent into a single row. (`api/agent_sessions.py`, `api/models.py`, `tests/test_session_lineage_metadata_api.py`, `tests/test_pr1370_lineage_metadata_perf_and_orphan.py`, `tests/test_gateway_sync.py`) @dso2ng — PR #1370 - **Manual cron runs persist output and metadata like scheduled runs** — manual WebUI cron runs called `cron.scheduler.run_job(job)` and then only cleared the in-memory running flag. The job's output was dropped (never written via `save_job_output`) and `last_run_at` / `last_status` were never updated. Now the manual-run wrapper (`_run_cron_tracked`) matches the scheduled-cron path at `cron/scheduler.py:1334-1364` exactly: saves output, marks the job complete, treats empty `final_response` as a soft failure (with the same error string), and records failures via `mark_job_run(False, str(e))`. (`api/routes.py`, `tests/test_cron_manual_run_persistence.py`) @NocGeek — PR #1372 (split out from the held #1352 per pre-release feedback) +- **Reasoning trace, tool calls, and partial output preserved on Stop/Cancel** — three distinct data-loss paths fixed: §A reasoning text accumulated in a thread-local `_reasoning_text` was invisible to `cancel_stream()` because it went out of scope when the thread was interrupted; §B live tool calls in thread-local `_live_tool_calls` were similarly lost; §C reasoning-only streams (no visible tokens) produced no partial assistant message because the thinking-block regex strip returned empty string and the `if _stripped:` guard skipped the append. The fix mirrors the existing `STREAM_PARTIAL_TEXT` pattern (#893) by adding two new shared dicts (`STREAM_REASONING_TEXT`, `STREAM_LIVE_TOOL_CALLS`) populated during streaming and read by `cancel_stream()`. The cancel path now appends the partial assistant message when content text, reasoning trace, OR tool calls exist (not just text). Eliminates "paid tokens disappeared" reports on Stop. 8 regression tests covering all three sections plus tools+text combinations. (`api/config.py`, `api/streaming.py`, `tests/test_issue1361_cancel_data_loss.py`) @bergeouss — PR #1375, fixes #1361 +- **New profiles route sessions to the profile dir on first use, not back to default** — `get_hermes_home_for_profile()` had a `if profile_dir.is_dir(): return profile_dir; return _DEFAULT_HERMES_HOME` fallback. New profiles (no session yet, so no dir) routed every session back to default until the directory existed on disk — making profile switching silently broken for the first session of every new profile. Removed the `is_dir()` guard; the profile path is now returned unconditionally and the directory is created on first use by the agent/session layer. Path traversal is still blocked by the `_PROFILE_ID_RE` regex (`^[a-z0-9][a-z0-9_-]{0,63}$`); R19j tests were updated to pin that the regex is now the sole defense. R19c was tightened to assert the new behavior. 5 regression tests in `test_issue1195_session_profile_routing.py` covering existing-profile, non-existent-profile (the core fix), None, empty-string, and 'default' return paths. (`api/profiles.py`, `tests/test_issue798.py`, `tests/test_issue1195_session_profile_routing.py`) @bergeouss — PR #1373, fixes #1195 ## [v0.50.250] — 2026-04-30 diff --git a/tests/test_issue798.py b/tests/test_issue798.py index fef8a00b..702bef9f 100644 --- a/tests/test_issue798.py +++ b/tests/test_issue798.py @@ -40,13 +40,18 @@ def test_get_hermes_home_for_profile_returns_profile_subdir(tmp_path, monkeypatc assert result == profile_dir -def test_get_hermes_home_for_profile_falls_back_for_missing_profile(tmp_path, monkeypatch): - """R19c: Named profile that does not exist falls back to base home.""" +def test_get_hermes_home_for_profile_returns_profile_path_for_missing_profile(tmp_path, monkeypatch): + """R19c: Named profile that does not exist on disk now returns the + profile-scoped path (created on first use by the agent layer), NOT the + base home. Tightened in v0.50.251 / PR #1373 to fix #1195: the previous + is_dir() fallback caused new profiles to silently route every session + back to the default profile until the directory existed on disk. + Path traversal is still blocked by the _PROFILE_ID_RE regex (R19j).""" import api.profiles as p monkeypatch.setattr(p, '_DEFAULT_HERMES_HOME', tmp_path) result = p.get_hermes_home_for_profile('ghost') - assert result == tmp_path + assert result == tmp_path / 'profiles' / 'ghost' def test_get_hermes_home_for_profile_does_not_mutate_globals(): @@ -171,8 +176,10 @@ def test_sessions_js_sends_profile_in_new_session_post(): def test_get_hermes_home_for_profile_rejects_path_traversal(): """R19j: get_hermes_home_for_profile() must reject names that don't match - _PROFILE_ID_RE (e.g. path traversal like '../../etc') and return the base home. - The regex guard is defence-in-depth on top of the is_dir() fallback.""" + _PROFILE_ID_RE (e.g. path traversal like '../../etc') and return the base + home. After v0.50.251 / PR #1373 removed the is_dir() fallback, the regex + is the SOLE guard against path traversal — verify each known-bad shape + still returns the base home, not a traversed path.""" import api.profiles as p base = p._DEFAULT_HERMES_HOME assert p.get_hermes_home_for_profile('../../etc') == base @@ -180,7 +187,8 @@ def test_get_hermes_home_for_profile_rejects_path_traversal(): assert p.get_hermes_home_for_profile('/absolute/path') == base assert p.get_hermes_home_for_profile('has spaces') == base assert p.get_hermes_home_for_profile('UPPERCASE') == base - # Valid names still work - assert p.get_hermes_home_for_profile('alice') == base # nonexistent → fallback - assert p.get_hermes_home_for_profile('my-profile') == base - assert p.get_hermes_home_for_profile('profile_1') == base + # Valid names now route to the profile-scoped path (created on first use). + # Previously these returned `base` because no profile dir existed on disk. + assert p.get_hermes_home_for_profile('alice') == base / 'profiles' / 'alice' + assert p.get_hermes_home_for_profile('my-profile') == base / 'profiles' / 'my-profile' + assert p.get_hermes_home_for_profile('profile_1') == base / 'profiles' / 'profile_1' From f53556b3ff1098935b56318a01d9499388eff61e Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 30 Apr 2026 23:43:23 +0000 Subject: [PATCH 8/8] fix(cancel-stream): rename tool_calls to _partial_tool_calls (Opus MUST-FIX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opus pass-2 review of v0.50.251 caught a critical regression in PR #1375: The cancel-partial message stored captured tool calls under the 'tool_calls' key. That key is whitelisted by _API_SAFE_MSG_KEYS so _sanitize_messages_for_api forwarded the entries to the next-turn LLM call. But the captured entries use the WebUI internal shape ({name, args, done, duration, is_error}) — they don't have the OpenAI/Anthropic id + function: {name, arguments} envelope. Strict providers (OpenAI, Anthropic, Z.AI/GLM) would 400 on the malformed entries. Net effect: the very cancel-then-continue scenario PR #1375 aimed to improve becomes a hard fail. Fix: - Rename the persisted key to '_partial_tool_calls' (underscore- prefixed private key NOT in _API_SAFE_MSG_KEYS, so sanitize correctly strips it). - Update static/messages.js hasMessageToolMetadata check to also recognize _partial_tool_calls for UI rendering. - Update test_issue1361_cancel_data_loss.py assertion to check _partial_tool_calls (and tool_calls as legacy fallback). Plus 2 NIT fixes from the same Opus review: NIT 1 (api/profiles.py:153): re.match → re.fullmatch for consistency with other _PROFILE_ID_RE callers in the codebase. The trailing- newline footgun ($ matches before final \n in re.match) is now closed. Without #1373's is_dir() guard, a name like 'valid\n' would have created a directory named 'valid\n' on Linux. Doesn't escape /profiles/ via Path joining, but unintended. NIT 2 (test_issue798.py): R19j coverage gaps — added trailing- newline tests, length-boundary tests (64-char valid, 65-char rejected), single-char minimum, and non-ASCII / Unicode-trick tests. New regression test (tests/test_pr1375_partial_tool_calls_sanitize.py): - test_partial_tool_calls_field_not_forwarded_to_llm: pins that sanitize-for-API strips _partial_tool_calls + reasoning + does NOT have tool_calls on a partial message - test_legitimate_tool_calls_are_preserved_for_completed_turns: pins that real OpenAI-shape tool_calls on completed turns survive sanitize unchanged Tests: 3486 passing (3484 → 3486, +2 sanitize tests). --- api/profiles.py | 2 +- api/streaming.py | 18 ++- static/messages.js | 7 +- tests/test_issue1361_cancel_data_loss.py | 4 +- tests/test_issue798.py | 14 +++ ...test_pr1375_partial_tool_calls_sanitize.py | 105 ++++++++++++++++++ 6 files changed, 145 insertions(+), 5 deletions(-) create mode 100644 tests/test_pr1375_partial_tool_calls_sanitize.py diff --git a/api/profiles.py b/api/profiles.py index f87ac985..fc94a336 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -150,7 +150,7 @@ def get_hermes_home_for_profile(name: str) -> Path: empty, 'default', or does not match the profile-name format (rejects path traversal such as '../../etc'). """ - if not name or name == 'default' or not _PROFILE_ID_RE.match(name): + if not name or name == 'default' or not _PROFILE_ID_RE.fullmatch(name): return _DEFAULT_HERMES_HOME profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name return profile_dir diff --git a/api/streaming.py b/api/streaming.py index e6c6700c..4ed12b83 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -2782,7 +2782,23 @@ def cancel_stream(stream_id: str) -> bool: if _has_reasoning: _partial_msg['reasoning'] = _cancel_reasoning.strip() if _has_tools: - _partial_msg['tool_calls'] = list(_cancel_tool_calls) + # NOTE: store under the private '_partial_tool_calls' key + # (NOT 'tool_calls'). The captured entries use the WebUI + # internal shape {name, args, done, duration, is_error} + # — they do NOT carry the OpenAI/Anthropic API id + + # function: {name, arguments} envelope. If we put them + # under 'tool_calls', `_sanitize_messages_for_api` + # (which whitelists 'tool_calls' via _API_SAFE_MSG_KEYS) + # would forward them to the next-turn LLM call and + # strict providers (OpenAI, Anthropic, Z.AI/GLM) would + # 400 on the malformed entries — turning a "data lost + # on cancel" bug into a "next message returns 400" + # bug, which is worse. The underscore-prefixed key is + # not in the whitelist, so sanitize strips it. The UI + # reads it via static/messages.js and renders it + # alongside the regular tool_calls path. + # (Opus pre-release review pass 2 of v0.50.251.) + _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls) _cs.messages.append(_partial_msg) # Cancel marker — flagged _error=True so it is stripped from conversation # history on the next turn (prevents model from seeing "Task cancelled." diff --git a/static/messages.js b/static/messages.js index e9e9b868..7bbf8c19 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1072,9 +1072,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ S.session=session;S.messages=(session.messages||[]).filter(m=>m&&m.role); const hasMessageToolMetadata=S.messages.some(m=>{ if(!m||m.role!=='assistant') return false; + // Recognize both the standard `tool_calls` (used by completed assistant + // turns where the LLM emitted tool_call entries) and the WebUI-internal + // `_partial_tool_calls` (used on Stop/Cancel partial messages — see + // api/streaming.py cancel_stream). const hasTc=Array.isArray(m.tool_calls)&&m.tool_calls.length>0; + const hasPartialTc=Array.isArray(m._partial_tool_calls)&&m._partial_tool_calls.length>0; const hasTu=Array.isArray(m.content)&&m.content.some(p=>p&&p.type==='tool_use'); - return hasTc||hasTu; + return hasTc||hasPartialTc||hasTu; }); if(!hasMessageToolMetadata&&session.tool_calls&&session.tool_calls.length){ S.toolCalls=(session.tool_calls||[]).map(tc=>({...tc,done:true})); diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py index c7f08d52..cdf89dcd 100644 --- a/tests/test_issue1361_cancel_data_loss.py +++ b/tests/test_issue1361_cancel_data_loss.py @@ -213,9 +213,9 @@ class TestCancelPreservesToolCalls: s2 = models.SESSIONS[sid] assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant'] - has_tools = any(m.get('tool_calls') or m.get('tools') for m in assistant_msgs) + has_tools = any(m.get('_partial_tool_calls') or m.get('tool_calls') or m.get('tools') for m in assistant_msgs) assert has_tools, \ - f"Expected tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}" + f"Expected _partial_tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}" def test_cancel_with_tools_and_text_preserves_both(self): """Cancel after tools + partial text should keep both.""" diff --git a/tests/test_issue798.py b/tests/test_issue798.py index 702bef9f..4207400b 100644 --- a/tests/test_issue798.py +++ b/tests/test_issue798.py @@ -192,3 +192,17 @@ def test_get_hermes_home_for_profile_rejects_path_traversal(): assert p.get_hermes_home_for_profile('alice') == base / 'profiles' / 'alice' assert p.get_hermes_home_for_profile('my-profile') == base / 'profiles' / 'my-profile' assert p.get_hermes_home_for_profile('profile_1') == base / 'profiles' / 'profile_1' + # R19j coverage gaps closed in v0.50.251 per Opus pre-release review: + # - Trailing-newline names must be rejected (re.match would let them through; + # re.fullmatch correctly anchors $). Catches the match-vs-fullmatch footgun. + assert p.get_hermes_home_for_profile('valid\n') == base + assert p.get_hermes_home_for_profile('a\n') == base + # - Length boundaries: 64 chars (max valid: 1 + 63 suffix) routes to profile path, + # 65 chars rejected. + assert p.get_hermes_home_for_profile('a' * 64) == base / 'profiles' / ('a' * 64) + assert p.get_hermes_home_for_profile('a' * 65) == base + # - Single-char name is the minimum valid form. + assert p.get_hermes_home_for_profile('a') == base / 'profiles' / 'a' + # - Non-ASCII / Unicode-trick names are rejected by the ASCII-only charset. + assert p.get_hermes_home_for_profile('voilà') == base + assert p.get_hermes_home_for_profile('名前') == base diff --git a/tests/test_pr1375_partial_tool_calls_sanitize.py b/tests/test_pr1375_partial_tool_calls_sanitize.py new file mode 100644 index 00000000..1c4a5ff6 --- /dev/null +++ b/tests/test_pr1375_partial_tool_calls_sanitize.py @@ -0,0 +1,105 @@ +"""Regression test for the v0.50.251 Opus pass-2 MUST-FIX on PR #1375. + +Original PR #1375 stored cancelled tool calls under the message key +`tool_calls`. That key is whitelisted by `_API_SAFE_MSG_KEYS` so the +sanitize-for-API path forwarded them to the next-turn LLM call. But the +captured entries use the WebUI internal shape ({name, args, done, +duration, is_error}) — they don't have OpenAI/Anthropic's id + +function: {name, arguments} envelope. Strict providers (OpenAI, +Anthropic, Z.AI/GLM) would 400 on the malformed entries — turning a +"data lost on cancel" bug into a "next message returns 400" bug. + +The fix renames the key to `_partial_tool_calls` (underscore-prefixed +private key NOT in the whitelist), so sanitize correctly strips it. +The UI reads it via static/messages.js. + +This test pins the invariant: a partial assistant message with +`_partial_tool_calls` set must produce ZERO `tool_calls` after +sanitize-for-API. +""" +import pathlib +import sys + +import pytest + +REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() +sys.path.insert(0, str(REPO_ROOT)) + + +def test_partial_tool_calls_field_not_forwarded_to_llm(): + """The `_partial_tool_calls` field must not survive _sanitize_messages_for_api. + Otherwise the malformed entries get sent to the LLM and cause 400 errors.""" + from api.streaming import _sanitize_messages_for_api + + messages = [ + {"role": "user", "content": "do a search"}, + { + "role": "assistant", + "content": "Looking up...", + "_partial": True, + "_partial_tool_calls": [ + {"name": "web_search", "args": {"query": "x"}, "done": False}, + ], + "reasoning": "Let me think about this", + }, + ] + sanitized = _sanitize_messages_for_api(messages) + # The partial assistant message must NOT have _partial_tool_calls (private key). + # It must NOT have tool_calls (would bypass the rename and 400 the LLM). + # It must NOT have reasoning (not in whitelist). + assistant_msgs = [m for m in sanitized if m.get("role") == "assistant"] + assert assistant_msgs, "Sanitized output must include the assistant message" + for m in assistant_msgs: + assert "_partial_tool_calls" not in m, ( + "Sanitize-for-API must strip _partial_tool_calls — it's a UI-only key. " + f"Got: {m}" + ) + assert "tool_calls" not in m, ( + "Sanitize-for-API must NOT have tool_calls on a partial message — the " + "captured entries use WebUI shape and would 400 on strict providers. " + f"Got: {m}" + ) + assert "reasoning" not in m, ( + "Sanitize-for-API must strip reasoning (not in _API_SAFE_MSG_KEYS). " + f"Got: {m}" + ) + + +def test_legitimate_tool_calls_are_preserved_for_completed_turns(): + """Completed assistant turns with REAL tool_calls (with id + function envelope) + must still pass through sanitize unchanged. The rename only affects + cancel-partial messages, not normal completed turns.""" + from api.streaming import _sanitize_messages_for_api + + messages = [ + {"role": "user", "content": "search"}, + { + "role": "assistant", + "content": "I'll search.", + "tool_calls": [ + { + "id": "call_abc", + "type": "function", + "function": {"name": "web_search", "arguments": '{"query":"x"}'} + }, + ], + }, + { + "role": "tool", + "tool_call_id": "call_abc", + "name": "web_search", + "content": "result", + }, + ] + sanitized = _sanitize_messages_for_api(messages) + assistant_msgs = [m for m in sanitized if m.get("role") == "assistant"] + assert assistant_msgs, "Sanitized output must include the assistant message" + assert assistant_msgs[0].get("tool_calls"), ( + "Legitimate tool_calls on completed turns must survive sanitize. " + f"Got: {assistant_msgs[0]}" + ) + # Must still have the OpenAI envelope shape + tc = assistant_msgs[0]["tool_calls"][0] + assert "id" in tc and "function" in tc, ( + f"tool_calls envelope must be preserved. Got: {tc}" + )