From 6bce34c27e42c88b6312a640455609618163f202 Mon Sep 17 00:00:00 2001 From: Manfred Date: Sun, 3 May 2026 11:46:42 +0200 Subject: [PATCH] fix: clear stale WebUI stream state Clear persisted active_stream_id and pending runtime fields when the server no longer has the referenced live stream. Also drop browser-side INFLIGHT state when the server reports a session idle and bump the service-worker cache so the frontend fix is delivered. Adds regression coverage for backend stale-stream cleanup, frontend inflight invalidation, and cache busting. --- api/routes.py | 32 +++++++++++++++++++++- static/sessions.js | 9 ++++++ static/sw.js | 2 +- tests/test_stale_stream_cleanup.py | 44 ++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/test_stale_stream_cleanup.py diff --git a/api/routes.py b/api/routes.py index 8c4bc834..f128d6f3 100644 --- a/api/routes.py +++ b/api/routes.py @@ -233,6 +233,34 @@ from api.helpers import ( _redact_text, ) + +def _clear_stale_stream_state(session) -> bool: + """Clear persisted streaming flags when the in-memory stream no longer exists. + + A server restart or worker crash can leave active_stream_id/pending_* in the + session JSON while STREAMS is empty. The frontend then keeps reconnecting to + a dead stream and shows a permanent running/thinking state. + """ + stream_id = getattr(session, "active_stream_id", None) + if not stream_id: + return False + with STREAMS_LOCK: + stream_alive = stream_id in STREAMS + if stream_alive: + return False + session.active_stream_id = None + if hasattr(session, "pending_user_message"): + session.pending_user_message = None + if hasattr(session, "pending_attachments"): + session.pending_attachments = [] + if hasattr(session, "pending_started_at"): + session.pending_started_at = None + try: + session.save() + except Exception: + pass + return True + # ── CSRF: validate Origin/Referer on POST ──────────────────────────────────── import re as _re @@ -1309,6 +1337,7 @@ def handle_get(handler, parsed) -> bool: try: _t1 = _time.monotonic() s = get_session(sid, metadata_only=(not load_messages)) + _clear_stale_stream_state(s) _t2 = _time.monotonic() effective_model = ( _resolve_effective_session_model_for_display(s) @@ -1435,6 +1464,7 @@ def handle_get(handler, parsed) -> bool: return bad(handler, "Missing session_id") try: from api.session_ops import session_status + _clear_stale_stream_state(get_session(sid, metadata_only=True)) return j(handler, session_status(sid)) except KeyError: return bad(handler, "Session not found", 404) @@ -4265,7 +4295,7 @@ def _handle_chat_start(handler, body): status=409, ) # Stale stream id from a previous run; clear and continue. - s.active_stream_id = None + _clear_stale_stream_state(s) stream_id = uuid.uuid4().hex with _get_session_agent_lock(s.session_id): s.workspace = workspace diff --git a/static/sessions.js b/static/sessions.js index 318d88ca..56906ae4 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -387,6 +387,15 @@ async function loadSession(sid){ _setActiveSessionUrl(S.session.session_id); const activeStreamId=S.session.active_stream_id||null; + // If the server says the session is idle, discard any browser-side inflight + // cache left behind by a crashed/restarted stream. Otherwise the UI can keep + // showing a permanent thinking/running state even though active_streams=0. + if(!activeStreamId&&INFLIGHT[sid]){ + delete INFLIGHT[sid]; + if(typeof clearInflightState==='function') clearInflightState(sid); + S.activeStreamId=null; + S.busy=false; + } // Phase 2a: If session is streaming, restore from INFLIGHT cache before // loading full messages (INFLIGHT state is self-contained and sufficient). diff --git a/static/sw.js b/static/sw.js index 9e43db66..d8e867b4 100644 --- a/static/sw.js +++ b/static/sw.js @@ -7,7 +7,7 @@ // Cache version is injected by the server at request time (routes.py /sw.js handler). // Bumps automatically whenever the git commit changes — no manual edits needed. -const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__'; +const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__-stale-stream-cleanup1'; // Static assets that form the app shell. // diff --git a/tests/test_stale_stream_cleanup.py b/tests/test_stale_stream_cleanup.py new file mode 100644 index 00000000..c6070feb --- /dev/null +++ b/tests/test_stale_stream_cleanup.py @@ -0,0 +1,44 @@ +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +ROUTES_SRC = (REPO / "api" / "routes.py").read_text(encoding="utf-8") +SESSIONS_SRC = (REPO / "static" / "sessions.js").read_text(encoding="utf-8") +SW_SRC = (REPO / "static" / "sw.js").read_text(encoding="utf-8") + + +def test_stale_stream_cleanup_helper_exists(): + assert "def _clear_stale_stream_state(session)" in ROUTES_SRC + assert "stream_id in STREAMS" in ROUTES_SRC + assert "session.active_stream_id = None" in ROUTES_SRC + assert "session.pending_user_message = None" in ROUTES_SRC + assert "session.pending_attachments = []" in ROUTES_SRC + assert "session.pending_started_at = None" in ROUTES_SRC + assert "session.save()" in ROUTES_SRC + + +def test_session_load_clears_stale_stream_before_response(): + load_pos = ROUTES_SRC.index("s = get_session(sid, metadata_only=(not load_messages))") + cleanup_pos = ROUTES_SRC.index("_clear_stale_stream_state(s)", load_pos) + response_pos = ROUTES_SRC.index('"active_stream_id": getattr(s, "active_stream_id", None)', cleanup_pos) + assert load_pos < cleanup_pos < response_pos + + +def test_chat_start_clears_stale_pending_state_not_only_active_id(): + stale_comment_pos = ROUTES_SRC.index("# Stale stream id from a previous run; clear and continue.") + cleanup_pos = ROUTES_SRC.index("_clear_stale_stream_state(s)", stale_comment_pos) + stream_id_pos = ROUTES_SRC.index("stream_id = uuid.uuid4().hex", cleanup_pos) + assert stale_comment_pos < cleanup_pos < stream_id_pos + + +def test_frontend_drops_inflight_cache_when_server_session_is_idle(): + marker = "If the server says the session is idle, discard any browser-side inflight" + marker_pos = SESSIONS_SRC.index(marker) + window = SESSIONS_SRC[marker_pos:marker_pos + 500] + assert "if(!activeStreamId&&INFLIGHT[sid])" in window + assert "delete INFLIGHT[sid]" in window + assert "clearInflightState" in window + assert "S.busy=false" in window + + +def test_service_worker_cache_bumped_for_frontend_fix_delivery(): + assert "stale-stream-cleanup1" in SW_SRC