Stage 331: PR #2015 — fix(sessions): stitch continued session transcripts by @Jellypowered

This commit is contained in:
nesquena-hermes
2026-05-10 17:09:21 +00:00
4 changed files with 139 additions and 7 deletions
+2
View File
@@ -50,3 +50,5 @@ docs/*
graphify-out/
.graphify_cached.json
.graphify_uncached.txt
.venv/
+51 -5
View File
@@ -1666,7 +1666,9 @@ def get_cli_session_messages(sid) -> list:
Preserve tool-call/result and reasoning metadata from the agent state.db so
CLI-origin transcripts render with the same tool cards as WebUI-native
sessions. Returns empty list on any error.
sessions. When the requested session is the tip of a compression/CLI-close
continuation chain, return the stitched full transcript across all segments
in chronological order. Returns empty list on any error.
"""
import os
if str(sid or '').startswith(f'{CLAUDE_CODE_SOURCE}_'):
@@ -1705,12 +1707,56 @@ def get_cli_session_messages(sid) -> list:
'codex_message_items',
]
selected = ['role', 'content', 'timestamp'] + [c for c in optional if c in available]
cur.execute("PRAGMA table_info(sessions)")
session_cols = {str(row['name']) for row in cur.fetchall()}
session_chain = [str(sid)]
if {'parent_session_id', 'end_reason', 'started_at', 'source'}.issubset(session_cols):
cur.execute(
"""
SELECT id, source, started_at, parent_session_id, ended_at, end_reason
FROM sessions
WHERE id = ?
""",
(sid,),
)
rows_by_id = {}
row = cur.fetchone()
if row:
rows_by_id[str(row['id'])] = dict(row)
current_id = str(row['id'])
seen = {current_id}
for _ in range(20):
current = rows_by_id.get(current_id)
parent_id = current.get('parent_session_id') if current else None
if not parent_id or parent_id in seen:
break
cur.execute(
"""
SELECT id, source, started_at, parent_session_id, ended_at, end_reason
FROM sessions
WHERE id = ?
""",
(parent_id,),
)
parent_row = cur.fetchone()
if not parent_row:
break
parent_dict = dict(parent_row)
rows_by_id[str(parent_row['id'])] = parent_dict
if not _is_continuation_session(parent_dict, current):
break
session_chain.insert(0, str(parent_row['id']))
current_id = str(parent_row['id'])
seen.add(current_id)
placeholders = ', '.join('?' for _ in session_chain)
cur.execute(f"""
SELECT {', '.join(selected)}
SELECT {', '.join(selected)}, session_id
FROM messages
WHERE session_id = ?
ORDER BY timestamp ASC
""", (sid,))
WHERE session_id IN ({placeholders})
ORDER BY timestamp ASC, id ASC
""", session_chain)
msgs = []
for row in cur.fetchall():
msg = {
+25 -2
View File
@@ -3029,8 +3029,31 @@ def handle_get(handler, parsed) -> bool:
# longer visible conversation than the single state.db
# segment for this messaging session id. Prefer the longer
# sidecar so repaired WebUI history is not hidden behind the
# canonical per-segment transcript.
_all_msgs = sidecar_messages if len(sidecar_messages) > len(cli_messages) else cli_messages
# canonical per-segment transcript. When both sources carry
# different slices of the same stitched conversation, merge
# them chronologically and dedupe exact repeats.
if sidecar_messages and sidecar_messages != cli_messages:
merged_messages = []
seen_message_keys = set()
for msg in sorted(list(cli_messages) + list(sidecar_messages), key=lambda m: (
float(m.get("timestamp") or 0),
str(m.get("role") or ""),
str(m.get("content") or ""),
)):
key = (
str(msg.get("role") or ""),
str(msg.get("content") or ""),
str(msg.get("timestamp") or ""),
str(msg.get("tool_call_id") or ""),
str(msg.get("tool_name") or msg.get("name") or ""),
)
if key in seen_message_keys:
continue
seen_message_keys.add(key)
merged_messages.append(msg)
_all_msgs = merged_messages
else:
_all_msgs = sidecar_messages if len(sidecar_messages) > len(cli_messages) else cli_messages
else:
_all_msgs = s.messages
else:
@@ -0,0 +1,61 @@
"""Regression coverage for stitched full-transcript loading across session segments."""
from __future__ import annotations
import api.routes as routes
def test_session_endpoint_merges_sidecar_and_lineage_messages_for_cli_sessions(monkeypatch):
class DummySession:
def __init__(self):
self.messages = [{"role": "assistant", "content": "sidecar tail", "timestamp": 10.0}]
self.tool_calls = []
self.active_stream_id = None
self.pending_user_message = None
self.pending_attachments = []
self.pending_started_at = None
self.context_length = 0
self.threshold_tokens = 0
self.last_prompt_tokens = 0
self.model = "openai/gpt-5"
self.session_id = "tip"
def compact(self):
return {"session_id": "tip", "title": "Tip", "model": "openai/gpt-5"}
captured = {}
monkeypatch.setattr(routes, "get_session", lambda sid, metadata_only=False: DummySession())
monkeypatch.setattr(routes, "_clear_stale_stream_state", lambda s: None)
monkeypatch.setattr(routes, "_lookup_cli_session_metadata", lambda sid: {"session_source": "messaging"})
monkeypatch.setattr(routes, "_is_messaging_session_record", lambda s: True)
monkeypatch.setattr(
routes,
"get_cli_session_messages",
lambda sid: [
{"role": "user", "content": "root user", "timestamp": 1.0},
{"role": "assistant", "content": "tip assistant", "timestamp": 2.0},
],
)
monkeypatch.setattr(routes, "_resolve_effective_session_model_for_display", lambda s: getattr(s, "model", None))
monkeypatch.setattr(routes, "_resolve_effective_session_model_provider_for_display", lambda s: None)
monkeypatch.setattr(routes, "_merge_cli_sidebar_metadata", lambda raw, meta: raw)
monkeypatch.setattr(routes, "redact_session_data", lambda raw: raw)
monkeypatch.setattr(routes, "j", lambda handler, payload, status=200: captured.setdefault("payload", payload))
class Handler:
pass
class Parsed:
path = "/api/session"
query = "session_id=tip"
routes.handle_get(Handler(), Parsed())
session = captured["payload"]["session"]
assert [m["content"] for m in session["messages"]] == [
"root user",
"tip assistant",
"sidecar tail",
]