diff --git a/api/models.py b/api/models.py index f448f7a3..e0e52004 100644 --- a/api/models.py +++ b/api/models.py @@ -679,6 +679,20 @@ def _get_profile_home(profile) -> Path: return Path(os.environ.get('HERMES_HOME') or '~/.hermes').expanduser() +def _interrupted_recovery_marker() -> dict: + return { + 'role': 'assistant', + 'content': ( + '**Response interrupted.**\n\n' + 'The WebUI process restarted before this turn finished. ' + 'The user message above was preserved, but no agent output was recovered.' + ), + 'timestamp': int(time.time()), + '_error': True, + 'type': 'interrupted', + } + + def _apply_core_sync_or_error_marker( session, core_path, @@ -745,12 +759,7 @@ def _apply_core_sync_or_error_marker( session.pending_user_message = None session.pending_attachments = [] session.pending_started_at = None - session.messages.append({ - 'role': 'assistant', - 'content': '**Previous turn did not complete.**', - 'timestamp': int(time.time()), - '_error': True, - }) + session.messages.append(_interrupted_recovery_marker()) session.save() logger.info( "Session %s: recovered pending user turn (messages non-empty), added error marker", @@ -794,12 +803,7 @@ def _apply_core_sync_or_error_marker( session.pending_user_message = None session.pending_attachments = [] session.pending_started_at = None - session.messages.append({ - 'role': 'assistant', - 'content': '**Previous turn did not complete.**', - 'timestamp': int(time.time()), - '_error': True, - }) + session.messages.append(_interrupted_recovery_marker()) session.save() logger.info("Session %s: no core transcript found, added error marker", sid) return True @@ -811,7 +815,7 @@ def _apply_core_sync_or_error_marker( # pending_user_message and STREAMS.pop(stream_id). Without this guard, any # fast turn (e.g. command approval) that exits the thread before the on-disk # pending clear has flushed gets misdiagnosed as a crashed turn, producing a -# spurious "Previous turn did not complete." marker. +# spurious "Response interrupted." marker. # # 30s covers the worst-case post-loop persistence window: LLM finishing a tool # batch + lock contention with the checkpoint thread + a multi-MB session.save. diff --git a/tests/test_session_sidecar_repair.py b/tests/test_session_sidecar_repair.py index 4d575125..10a599ba 100644 --- a/tests/test_session_sidecar_repair.py +++ b/tests/test_session_sidecar_repair.py @@ -231,7 +231,7 @@ class TestRepairStalePendingNoDeadlock: class TestDraftRecovery: """When no core transcript exists, the pending user message is restored as a recovered user turn (_recovered=True) and the error marker says - 'Previous turn did not complete.' — NOT 'preserved as a draft'.""" + a clear restart interruption marker — NOT 'preserved as a draft'.""" def test_pending_message_recovered_as_user_turn(self, hermes_home, monkeypatch): """When core transcript is missing, the pending_user_message is appended @@ -310,7 +310,10 @@ class TestDraftRecovery: assert "preserved as a draft" not in content, ( f"Error marker should not say 'preserved as a draft', got: {content}" ) - assert "Previous turn did not complete" in content + assert "Response interrupted" in content + assert "WebUI process restarted" in content + assert "user message above was preserved" in content + assert error_msgs[0].get("type") == "interrupted" def test_pending_attachments_recovered(self, hermes_home, monkeypatch): """Attachments on the pending message are carried over to the recovered turn.""" @@ -604,7 +607,9 @@ class TestNonEmptyMessagesPendingCleared: # Exactly one error marker error_msgs = [m for m in s.messages if m.get("_error")] assert len(error_msgs) == 1 - assert "Previous turn did not complete" in error_msgs[0]["content"] + assert "Response interrupted" in error_msgs[0]["content"] + assert "WebUI process restarted" in error_msgs[0]["content"] + assert error_msgs[0].get("type") == "interrupted" # Pending fields fully cleared assert s.pending_user_message is None