From 8aed650b4ca20473d350977edbb2b5419d5f61de Mon Sep 17 00:00:00 2001 From: Jellypowered Date: Sun, 10 May 2026 11:10:54 -0500 Subject: [PATCH] Stitch continued session transcripts in WebUI --- .gitignore | 2 + api/models.py | 56 +++++++++++++++-- api/routes.py | 27 +++++++- tests/test_session_lineage_full_transcript.py | 61 +++++++++++++++++++ 4 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 tests/test_session_lineage_full_transcript.py diff --git a/.gitignore b/.gitignore index b4ee8a54..0edd66af 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ docs/* graphify-out/ .graphify_cached.json .graphify_uncached.txt + +.venv/ diff --git a/api/models.py b/api/models.py index 1aac37a5..767c1e5e 100644 --- a/api/models.py +++ b/api/models.py @@ -1662,7 +1662,9 @@ def get_cli_session_messages(sid) -> list: Preserve tool-call/result and reasoning metadata from the agent state.db so CLI-origin transcripts render with the same tool cards as WebUI-native - sessions. Returns empty list on any error. + sessions. When the requested session is the tip of a compression/CLI-close + continuation chain, return the stitched full transcript across all segments + in chronological order. Returns empty list on any error. """ import os if str(sid or '').startswith(f'{CLAUDE_CODE_SOURCE}_'): @@ -1701,12 +1703,56 @@ def get_cli_session_messages(sid) -> list: 'codex_message_items', ] selected = ['role', 'content', 'timestamp'] + [c for c in optional if c in available] + + cur.execute("PRAGMA table_info(sessions)") + session_cols = {str(row['name']) for row in cur.fetchall()} + session_chain = [str(sid)] + if {'parent_session_id', 'end_reason', 'started_at', 'source'}.issubset(session_cols): + cur.execute( + """ + SELECT id, source, started_at, parent_session_id, ended_at, end_reason + FROM sessions + WHERE id = ? + """, + (sid,), + ) + rows_by_id = {} + row = cur.fetchone() + if row: + rows_by_id[str(row['id'])] = dict(row) + current_id = str(row['id']) + seen = {current_id} + for _ in range(20): + current = rows_by_id.get(current_id) + parent_id = current.get('parent_session_id') if current else None + if not parent_id or parent_id in seen: + break + cur.execute( + """ + SELECT id, source, started_at, parent_session_id, ended_at, end_reason + FROM sessions + WHERE id = ? + """, + (parent_id,), + ) + parent_row = cur.fetchone() + if not parent_row: + break + parent_dict = dict(parent_row) + rows_by_id[str(parent_row['id'])] = parent_dict + if not _is_continuation_session(parent_dict, current): + break + session_chain.insert(0, str(parent_row['id'])) + current_id = str(parent_row['id']) + seen.add(current_id) + + placeholders = ', '.join('?' for _ in session_chain) cur.execute(f""" - SELECT {', '.join(selected)} + SELECT {', '.join(selected)}, session_id FROM messages - WHERE session_id = ? - ORDER BY timestamp ASC - """, (sid,)) + WHERE session_id IN ({placeholders}) + ORDER BY timestamp ASC, id ASC + """, session_chain) msgs = [] for row in cur.fetchall(): msg = { diff --git a/api/routes.py b/api/routes.py index cdf9e12a..72863491 100644 --- a/api/routes.py +++ b/api/routes.py @@ -3028,8 +3028,31 @@ def handle_get(handler, parsed) -> bool: # longer visible conversation than the single state.db # segment for this messaging session id. Prefer the longer # sidecar so repaired WebUI history is not hidden behind the - # canonical per-segment transcript. - _all_msgs = sidecar_messages if len(sidecar_messages) > len(cli_messages) else cli_messages + # canonical per-segment transcript. When both sources carry + # different slices of the same stitched conversation, merge + # them chronologically and dedupe exact repeats. + if sidecar_messages and sidecar_messages != cli_messages: + merged_messages = [] + seen_message_keys = set() + for msg in sorted(list(cli_messages) + list(sidecar_messages), key=lambda m: ( + float(m.get("timestamp") or 0), + str(m.get("role") or ""), + str(m.get("content") or ""), + )): + key = ( + str(msg.get("role") or ""), + str(msg.get("content") or ""), + str(msg.get("timestamp") or ""), + str(msg.get("tool_call_id") or ""), + str(msg.get("tool_name") or msg.get("name") or ""), + ) + if key in seen_message_keys: + continue + seen_message_keys.add(key) + merged_messages.append(msg) + _all_msgs = merged_messages + else: + _all_msgs = sidecar_messages if len(sidecar_messages) > len(cli_messages) else cli_messages else: _all_msgs = s.messages else: diff --git a/tests/test_session_lineage_full_transcript.py b/tests/test_session_lineage_full_transcript.py new file mode 100644 index 00000000..7efc6d18 --- /dev/null +++ b/tests/test_session_lineage_full_transcript.py @@ -0,0 +1,61 @@ +"""Regression coverage for stitched full-transcript loading across session segments.""" + +from __future__ import annotations + +import api.routes as routes + + + +def test_session_endpoint_merges_sidecar_and_lineage_messages_for_cli_sessions(monkeypatch): + class DummySession: + def __init__(self): + self.messages = [{"role": "assistant", "content": "sidecar tail", "timestamp": 10.0}] + self.tool_calls = [] + self.active_stream_id = None + self.pending_user_message = None + self.pending_attachments = [] + self.pending_started_at = None + self.context_length = 0 + self.threshold_tokens = 0 + self.last_prompt_tokens = 0 + self.model = "openai/gpt-5" + self.session_id = "tip" + + def compact(self): + return {"session_id": "tip", "title": "Tip", "model": "openai/gpt-5"} + + captured = {} + + monkeypatch.setattr(routes, "get_session", lambda sid, metadata_only=False: DummySession()) + monkeypatch.setattr(routes, "_clear_stale_stream_state", lambda s: None) + monkeypatch.setattr(routes, "_lookup_cli_session_metadata", lambda sid: {"session_source": "messaging"}) + monkeypatch.setattr(routes, "_is_messaging_session_record", lambda s: True) + monkeypatch.setattr( + routes, + "get_cli_session_messages", + lambda sid: [ + {"role": "user", "content": "root user", "timestamp": 1.0}, + {"role": "assistant", "content": "tip assistant", "timestamp": 2.0}, + ], + ) + monkeypatch.setattr(routes, "_resolve_effective_session_model_for_display", lambda s: getattr(s, "model", None)) + monkeypatch.setattr(routes, "_resolve_effective_session_model_provider_for_display", lambda s: None) + monkeypatch.setattr(routes, "_merge_cli_sidebar_metadata", lambda raw, meta: raw) + monkeypatch.setattr(routes, "redact_session_data", lambda raw: raw) + monkeypatch.setattr(routes, "j", lambda handler, payload, status=200: captured.setdefault("payload", payload)) + + class Handler: + pass + + class Parsed: + path = "/api/session" + query = "session_id=tip" + + routes.handle_get(Handler(), Parsed()) + + session = captured["payload"]["session"] + assert [m["content"] for m in session["messages"]] == [ + "root user", + "tip assistant", + "sidecar tail", + ]