From 98941571209f0eafa01119f508e7c8e094df733e Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Fri, 15 May 2026 22:42:02 -0700 Subject: [PATCH 01/15] fix: hybridize background profile env routing --- CHANGELOG.md | 4 +++ api/profiles.py | 42 ++++++++++++++------------ tests/test_sprint46.py | 9 +++++- tests/test_title_aux_routing.py | 50 +++++++++++++++++++++++++++++++ tests/test_update_banner_fixes.py | 14 +++++++-- 5 files changed, 97 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..df4a44a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Background profile workers now route broad runtime env through thread-local state while setting only `HERMES_HOME` under the narrow process-env lock, preserving `hermes_cli.config.load_config()` compatibility for non-default profiles without leaking profile-specific env keys process-wide. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/profiles.py b/api/profiles.py index 22c73312..e637dcb7 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -697,7 +697,8 @@ def profile_env_for_background_worker( return try: - # Lazy import avoids a module-load cycle: streaming imports this helper. + # Lazy imports avoid a module-load cycle: streaming imports this helper. + from api.config import _clear_thread_env, _set_thread_env, _thread_ctx from api.streaming import _ENV_LOCK profile_home_path = Path(get_hermes_home_for_profile(profile)) @@ -712,23 +713,23 @@ def profile_env_for_background_worker( yield return - env_keys = set(runtime_env.keys()) | {"HERMES_HOME"} - # Stage-360 maintainer fix: narrow the _ENV_LOCK critical section to just - # the env mutation (and the env restoration). Pre-fix, this held _ENV_LOCK - # for the entire `yield` duration — i.e. the whole background worker's - # runtime (title generation, compression, update summary). That caused - # _ENV_LOCK to be held for many seconds, blocking ALL other sessions and - # surfacing as the QA `test_third_message_completes` timeout. The fix - # mirrors the narrow-lock pattern in _run_agent_streaming: acquire briefly - # to set env, run worker without holding the lock, reacquire to restore. - # See also QA `test_finally_restores_env_with_lock`. + thread_env = dict(runtime_env) + thread_env["HERMES_HOME"] = str(profile_home_path) + # Hybrid profile routing: keep the broad runtime env in WebUI's thread-local + # channel so provider/API-key overrides do not leak through process-global + # os.environ, but still set HERMES_HOME under the narrow _ENV_LOCK because + # hermes_cli.config.load_config() currently resolves homes from os.environ. + # Do not hold _ENV_LOCK across the worker body. skill_home_snapshot = None - old_env = {} + old_hermes_home = None + had_hermes_home = False + previous_thread_env = getattr(_thread_ctx, "env", {}).copy() try: + _set_thread_env(**thread_env) with _ENV_LOCK: - old_env = {key: os.environ.get(key) for key in env_keys} + had_hermes_home = "HERMES_HOME" in os.environ + old_hermes_home = os.environ.get("HERMES_HOME") skill_home_snapshot = snapshot_skill_home_modules() - os.environ.update(runtime_env) os.environ["HERMES_HOME"] = str(profile_home_path) try: patch_skill_home_modules(profile_home_path) @@ -741,12 +742,15 @@ def profile_env_for_background_worker( ) yield finally: + if previous_thread_env: + _set_thread_env(**previous_thread_env) + else: + _clear_thread_env() with _ENV_LOCK: - for key, old_value in old_env.items(): - if old_value is None: - os.environ.pop(key, None) - else: - os.environ[key] = old_value + if had_hermes_home: + os.environ["HERMES_HOME"] = old_hermes_home or "" + else: + os.environ.pop("HERMES_HOME", None) if skill_home_snapshot is not None: restore_skill_home_modules(skill_home_snapshot) diff --git a/tests/test_sprint46.py b/tests/test_sprint46.py index 0db0ffa2..cffe73e2 100644 --- a/tests/test_sprint46.py +++ b/tests/test_sprint46.py @@ -415,10 +415,15 @@ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, seen_env = None def __init__(self, **kwargs): + from api.config import _thread_ctx + skill_module = sys.modules.get("tools.skills_tool") + thread_env = getattr(_thread_ctx, "env", {}) EnvAssertingAgent.seen_env = { "HERMES_HOME": os.environ.get("HERMES_HOME"), "HERMES_TEST_PROFILE_ENV": os.environ.get("HERMES_TEST_PROFILE_ENV"), + "THREAD_HERMES_HOME": thread_env.get("HERMES_HOME"), + "THREAD_HERMES_TEST_PROFILE_ENV": thread_env.get("HERMES_TEST_PROFILE_ENV"), "SKILL_MODULE_HOME": getattr(skill_module, "HERMES_HOME", None), "SKILL_MODULE_DIR": getattr(skill_module, "SKILLS_DIR", None), } @@ -460,7 +465,9 @@ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, assert EnvAssertingAgent.seen_env == { "HERMES_HOME": str(profile_home), - "HERMES_TEST_PROFILE_ENV": "work-runtime", + "HERMES_TEST_PROFILE_ENV": None, + "THREAD_HERMES_HOME": str(profile_home), + "THREAD_HERMES_TEST_PROFILE_ENV": "work-runtime", "SKILL_MODULE_HOME": profile_home, "SKILL_MODULE_DIR": profile_home / "skills", } diff --git a/tests/test_title_aux_routing.py b/tests/test_title_aux_routing.py index a125d361..0acffbbb 100644 --- a/tests/test_title_aux_routing.py +++ b/tests/test_title_aux_routing.py @@ -502,6 +502,56 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): self.assertEqual(getattr(fake_skill_module, 'SKILLS_DIR'), 'default-home/skills') self.assertEqual(mock_session.title, 'Profile Routed Title') + def test_background_profile_env_routes_load_config_without_process_env_leak(self): + """Hybrid worker env must satisfy hermes_cli.load_config without leaking profile env keys.""" + import tempfile + + import pytest + + import api.profiles as profiles + from api.config import _thread_ctx + try: + from hermes_cli import config as hermes_config + except ModuleNotFoundError: + pytest.skip('hermes_cli is not installed in this CI environment') + + session = types.SimpleNamespace(profile='work') + captured = {} + + with tempfile.TemporaryDirectory() as tmp: + default_home = os.path.join(tmp, 'default-home') + profile_home = os.path.join(tmp, 'profile-home') + os.makedirs(default_home, exist_ok=True) + os.makedirs(profile_home, exist_ok=True) + with open(os.path.join(default_home, 'config.yaml'), 'w', encoding='utf-8') as f: + f.write('model:\n provider: default-provider\n default: default-model\n') + with open(os.path.join(profile_home, 'config.yaml'), 'w', encoding='utf-8') as f: + f.write('model:\n provider: profile-provider\n default: profile-model\n') + + with patch('api.profiles.get_hermes_home_for_profile', return_value=profile_home): + with patch('api.profiles.get_profile_runtime_env', return_value={'PROFILE_ONLY_KEY': 'profile-only'}): + with patch.dict(os.environ, {'HERMES_HOME': default_home}, clear=False): + os.environ.pop('PROFILE_ONLY_KEY', None) + hermes_config._LOAD_CONFIG_CACHE.clear() + with profiles.profile_env_for_background_worker(session, 'background title'): + loaded = hermes_config.load_config() + captured['loaded_provider'] = loaded.get('model', {}).get('provider') + captured['process_home'] = os.environ.get('HERMES_HOME') + captured['process_runtime_key'] = os.environ.get('PROFILE_ONLY_KEY') + captured['thread_home'] = getattr(_thread_ctx, 'env', {}).get('HERMES_HOME') + captured['thread_runtime_key'] = getattr(_thread_ctx, 'env', {}).get('PROFILE_ONLY_KEY') + captured['restored_home'] = os.environ.get('HERMES_HOME') + captured['restored_runtime_key'] = os.environ.get('PROFILE_ONLY_KEY') + hermes_config._LOAD_CONFIG_CACHE.clear() + + self.assertEqual(captured['loaded_provider'], 'profile-provider') + self.assertEqual(captured['process_home'], profile_home) + self.assertIsNone(captured['process_runtime_key']) + self.assertEqual(captured['thread_home'], profile_home) + self.assertEqual(captured['thread_runtime_key'], 'profile-only') + self.assertEqual(captured['restored_home'], default_home) + self.assertIsNone(captured['restored_runtime_key']) + class TestAuxTitleTimeoutEdgeCases(unittest.TestCase): """_aux_title_timeout must reject zero, negative, and non-numeric values.""" diff --git a/tests/test_update_banner_fixes.py b/tests/test_update_banner_fixes.py index bc3a2cba..210a1806 100644 --- a/tests/test_update_banner_fixes.py +++ b/tests/test_update_banner_fixes.py @@ -476,9 +476,12 @@ class TestUpdateSummaryRouteModelSelection: monkeypatch.setattr(cfg, 'get_effective_default_model', lambda: 'openai/test-main') def fake_resolve_model_provider(model): + thread_env = getattr(cfg._thread_ctx, 'env', {}) captured['model_resolution_env'] = { 'HERMES_HOME': os.environ.get('HERMES_HOME'), 'HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), + 'THREAD_HERMES_HOME': thread_env.get('HERMES_HOME'), + 'THREAD_HERMES_TEST_PROFILE_ENV': thread_env.get('HERMES_TEST_PROFILE_ENV'), } return model, 'openai', 'https://example.test/v1' @@ -514,9 +517,12 @@ class TestUpdateSummaryRouteModelSelection: ) def fake_get_text_auxiliary_client(task, main_runtime=None): + thread_env = getattr(cfg._thread_ctx, 'env', {}) captured['aux_env'] = { 'HERMES_HOME': os.environ.get('HERMES_HOME'), 'HERMES_TEST_PROFILE_ENV': os.environ.get('HERMES_TEST_PROFILE_ENV'), + 'THREAD_HERMES_HOME': thread_env.get('HERMES_HOME'), + 'THREAD_HERMES_TEST_PROFILE_ENV': thread_env.get('HERMES_TEST_PROFILE_ENV'), 'SKILL_MODULE_HOME': getattr(fake_skill_module, 'HERMES_HOME'), 'SKILL_MODULE_DIR': getattr(fake_skill_module, 'SKILLS_DIR'), } @@ -563,11 +569,15 @@ class TestUpdateSummaryRouteModelSelection: assert captured['aux_task'] == 'compression' assert captured['model_resolution_env'] == { 'HERMES_HOME': str(profile_home), - 'HERMES_TEST_PROFILE_ENV': 'work-runtime', + 'HERMES_TEST_PROFILE_ENV': 'default-runtime', + 'THREAD_HERMES_HOME': str(profile_home), + 'THREAD_HERMES_TEST_PROFILE_ENV': 'work-runtime', } assert captured['aux_env'] == { 'HERMES_HOME': str(profile_home), - 'HERMES_TEST_PROFILE_ENV': 'work-runtime', + 'HERMES_TEST_PROFILE_ENV': 'default-runtime', + 'THREAD_HERMES_HOME': str(profile_home), + 'THREAD_HERMES_TEST_PROFILE_ENV': 'work-runtime', 'SKILL_MODULE_HOME': profile_home, 'SKILL_MODULE_DIR': profile_home / 'skills', } From 49bea3ad0187f600c622c8fabd665f9a1d345b84 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 14:29:58 +0800 Subject: [PATCH 02/15] Clarify interrupted turn recovery marker --- CHANGELOG.md | 4 ++++ api/models.py | 28 ++++++++++++++++------------ tests/test_session_sidecar_repair.py | 11 ++++++++--- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..7adff893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR TBD** by @franksong2702 (fixes #2370) — Recovered interrupted turns now explain that the WebUI process restarted before the agent finished, that the user message above was preserved, and that no agent output was recovered. Pre-fix, stale pending-turn repair appended the vague `Previous turn did not complete` marker, which looked like an unexplained assistant response after a WebUI restart killed the in-process worker. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/models.py b/api/models.py index f448f7a3..0f6397c3 100644 --- a/api/models.py +++ b/api/models.py @@ -679,6 +679,20 @@ def _get_profile_home(profile) -> Path: return Path(os.environ.get('HERMES_HOME') or '~/.hermes').expanduser() +def _interrupted_recovery_marker() -> dict: + return { + 'role': 'assistant', + 'content': ( + '**Response interrupted.**\n\n' + 'The WebUI process restarted before this turn finished. ' + 'The user message above was preserved, but no agent output was recovered.' + ), + 'timestamp': int(time.time()), + '_error': True, + 'type': 'interrupted', + } + + def _apply_core_sync_or_error_marker( session, core_path, @@ -745,12 +759,7 @@ def _apply_core_sync_or_error_marker( session.pending_user_message = None session.pending_attachments = [] session.pending_started_at = None - session.messages.append({ - 'role': 'assistant', - 'content': '**Previous turn did not complete.**', - 'timestamp': int(time.time()), - '_error': True, - }) + session.messages.append(_interrupted_recovery_marker()) session.save() logger.info( "Session %s: recovered pending user turn (messages non-empty), added error marker", @@ -794,12 +803,7 @@ def _apply_core_sync_or_error_marker( session.pending_user_message = None session.pending_attachments = [] session.pending_started_at = None - session.messages.append({ - 'role': 'assistant', - 'content': '**Previous turn did not complete.**', - 'timestamp': int(time.time()), - '_error': True, - }) + session.messages.append(_interrupted_recovery_marker()) session.save() logger.info("Session %s: no core transcript found, added error marker", sid) return True diff --git a/tests/test_session_sidecar_repair.py b/tests/test_session_sidecar_repair.py index 4d575125..10a599ba 100644 --- a/tests/test_session_sidecar_repair.py +++ b/tests/test_session_sidecar_repair.py @@ -231,7 +231,7 @@ class TestRepairStalePendingNoDeadlock: class TestDraftRecovery: """When no core transcript exists, the pending user message is restored as a recovered user turn (_recovered=True) and the error marker says - 'Previous turn did not complete.' — NOT 'preserved as a draft'.""" + a clear restart interruption marker — NOT 'preserved as a draft'.""" def test_pending_message_recovered_as_user_turn(self, hermes_home, monkeypatch): """When core transcript is missing, the pending_user_message is appended @@ -310,7 +310,10 @@ class TestDraftRecovery: assert "preserved as a draft" not in content, ( f"Error marker should not say 'preserved as a draft', got: {content}" ) - assert "Previous turn did not complete" in content + assert "Response interrupted" in content + assert "WebUI process restarted" in content + assert "user message above was preserved" in content + assert error_msgs[0].get("type") == "interrupted" def test_pending_attachments_recovered(self, hermes_home, monkeypatch): """Attachments on the pending message are carried over to the recovered turn.""" @@ -604,7 +607,9 @@ class TestNonEmptyMessagesPendingCleared: # Exactly one error marker error_msgs = [m for m in s.messages if m.get("_error")] assert len(error_msgs) == 1 - assert "Previous turn did not complete" in error_msgs[0]["content"] + assert "Response interrupted" in error_msgs[0]["content"] + assert "WebUI process restarted" in error_msgs[0]["content"] + assert error_msgs[0].get("type") == "interrupted" # Pending fields fully cleared assert s.pending_user_message is None From f87e32d0c76a5066b4ecc6ff175e8448fb436fdb Mon Sep 17 00:00:00 2001 From: BonyFish Date: Sat, 16 May 2026 14:57:01 +0800 Subject: [PATCH 03/15] fix: add i18n support to settings conversation page action buttons - Add data-i18n attributes to JSON, Import tooltip, and Clear button - Reuse existing 'clear' and 'import' i18n keys for button labels - Add 'export_session_json', 'export_session_json_tooltip', 'import_session_json_tooltip', 'clear_conversation_btn_tooltip' keys to all 11 locales (en, it, ja, ru, es, de, zh, zh-Hant, pt, ko, fr) --- static/i18n.js | 44 ++++++++++++++++++++++++++++++++++++++++++++ static/index.html | 6 +++--- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/static/i18n.js b/static/i18n.js index 30c497f1..3ea54532 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -731,6 +731,10 @@ const LOCALES = { transcript: 'Transcript', download_transcript: 'Download as Markdown', import: 'Import', + export_session_json: 'JSON', + export_session_json_tooltip: 'Export full session as JSON', + import_session_json_tooltip: 'Import session from JSON', + clear_conversation_btn_tooltip: 'Clear all messages in this conversation', // Settings detail settings_label_sound: 'Notification sound', settings_desc_sound: 'Play a sound when the assistant finishes a response.', @@ -1913,6 +1917,10 @@ const LOCALES = { transcript: 'Trascrizione', download_transcript: 'Scarica come Markdown', import: 'Importa', + export_session_json: 'JSON', + export_session_json_tooltip: 'Esporta sessione completa come JSON', + import_session_json_tooltip: 'Importa sessione da JSON', + clear_conversation_btn_tooltip: 'Cancella tutti i messaggi in questa conversazione', // Settings detail settings_label_sound: 'Suono notifica', settings_desc_sound: 'Riproduci un suono quando l\'assistente termina una risposta.', @@ -3100,6 +3108,10 @@ const LOCALES = { transcript: 'トランスクリプト', download_transcript: 'Markdown としてダウンロード', import: 'インポート', + export_session_json: 'JSON', + export_session_json_tooltip: 'セッション全体をJSONとしてエクスポート', + import_session_json_tooltip: 'JSONからセッションをインポート', + clear_conversation_btn_tooltip: 'この会話のすべてのメッセージをクリア', // Settings detail settings_label_sound: '通知音', settings_desc_sound: 'アシスタントが応答を完了したときに音を鳴らします。', @@ -4072,6 +4084,10 @@ const LOCALES = { transcript: 'Транскрипт', download_transcript: 'Скачать как Markdown', import: 'Импорт', + export_session_json: 'JSON', + export_session_json_tooltip: 'Экспортировать сессию как JSON', + import_session_json_tooltip: 'Импортировать сессию из JSON', + clear_conversation_btn_tooltip: 'Очистить все сообщения в этой беседе', settings_label_sound: 'Звук уведомления', settings_desc_sound: 'Проигрывать звук, когда помощник завершает ответ.', settings_label_notifications: 'Уведомления браузера', @@ -5193,6 +5209,10 @@ const LOCALES = { transcript: 'Transcripción', download_transcript: 'Descargar como Markdown', import: 'Importar', + export_session_json: 'JSON', + export_session_json_tooltip: 'Exportar sesión completa como JSON', + import_session_json_tooltip: 'Importar sesión desde JSON', + clear_conversation_btn_tooltip: 'Borrar todos los mensajes de esta conversación', // Settings detail settings_label_sound: 'Sonido de notificación', settings_desc_sound: 'Reproduce un sonido cuando el asistente termina una respuesta.', @@ -6287,6 +6307,10 @@ const LOCALES = { transcript: 'Protokoll', download_transcript: 'Als Markdown herunterladen', import: 'Importieren', + export_session_json: 'JSON', + export_session_json_tooltip: 'Gesamte Sitzung als JSON exportieren', + import_session_json_tooltip: 'Sitzung aus JSON importieren', + clear_conversation_btn_tooltip: 'Alle Nachrichten in dieser Konversation löschen', // Settings detail settings_label_sound: 'Benachrichtigungston', settings_desc_sound: 'Spielt einen Ton ab, wenn der Assistent eine Antwort beendet.', @@ -7434,6 +7458,10 @@ const LOCALES = { transcript: '记录', download_transcript: '下载为 Markdown', import: '导入', + export_session_json: 'JSON', + export_session_json_tooltip: '将会话完整导出为 JSON', + import_session_json_tooltip: '从 JSON 导入会话', + clear_conversation_btn_tooltip: '清空此会话中的所有消息', editing: '编辑中', empty_title: '有什么可以帮您?', empty_subtitle: '随时提问、运行命令、浏览文件或管理定时任务。', @@ -8545,6 +8573,10 @@ const LOCALES = { transcript: '\u8a18\u9304', download_transcript: '\u4e0b\u8f09\u8a18\u9304', import: '\u5c0e\u5165', + export_session_json: 'JSON', + export_session_json_tooltip: '\u5c07\u6703\u8a71\u5b8c\u6574\u532f\u51fa\u70ba JSON', + import_session_json_tooltip: '\u5f9e JSON \u532f\u5165\u6703\u8a71', + clear_conversation_btn_tooltip: '\u6e05\u7a7a\u6b64\u6703\u8a71\u4e2d\u7684\u6240\u6709\u8a0a\u606f', editing: '\u7de8\u8f2f\u4e2d', empty_title: '有什麼可以幫忙?', empty_subtitle: '點擊上方按鈕開始對話', @@ -9834,6 +9866,10 @@ const LOCALES = { transcript: 'Transcrição', download_transcript: 'Baixar como Markdown', import: 'Importar', + export_session_json: 'JSON', + export_session_json_tooltip: 'Exportar sessão completa como JSON', + import_session_json_tooltip: 'Importar sessão de JSON', + clear_conversation_btn_tooltip: 'Limpar todas as mensagens nesta conversa', // Settings detail settings_label_sound: 'Som de notificação', settings_desc_sound: 'Tocar som quando assistente finalizar resposta.', @@ -10918,6 +10954,10 @@ const LOCALES = { transcript: '대화 기록', download_transcript: 'Download as Markdown', import: '가져오기', + export_session_json: 'JSON', + export_session_json_tooltip: '전체 세션을 JSON으로 내보내기', + import_session_json_tooltip: 'JSON에서 세션 가져오기', + clear_conversation_btn_tooltip: '이 대화의 모든 메시지 지우기', // Settings detail settings_label_sound: '알림음', settings_desc_sound: 'Assistant 응답이 끝나면 소리를 재생합니다.', @@ -12017,6 +12057,10 @@ const LOCALES = { transcript: 'Transcription', download_transcript: 'Télécharger en Markdown', import: 'Importer', + export_session_json: 'JSON', + export_session_json_tooltip: 'Exporter la session complète en JSON', + import_session_json_tooltip: 'Importer une session depuis JSON', + clear_conversation_btn_tooltip: 'Effacer tous les messages de cette conversation', settings_label_sound: 'Son de notification', settings_desc_sound: 'Jouez un son lorsque l\'assistant termine une réponse.', tts_listen: 'Écouter', diff --git a/static/index.html b/static/index.html index 884ca786..22a3e29d 100644 --- a/static/index.html +++ b/static/index.html @@ -816,9 +816,9 @@
- - - + + +
From 5bd1f1441de62076f42c7c475fe52408809247cc Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 01:02:19 -0700 Subject: [PATCH 04/15] fix: route background worker credentials through profile env --- CHANGELOG.md | 2 +- api/profiles.py | 16 ++++++++++++---- tests/test_sprint46.py | 2 +- tests/test_title_aux_routing.py | 18 +++++++++++++----- tests/test_update_banner_fixes.py | 4 ++-- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4a44a6..8abe1cad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Fixed -- Background profile workers now route broad runtime env through thread-local state while setting only `HERMES_HOME` under the narrow process-env lock, preserving `hermes_cli.config.load_config()` compatibility for non-default profiles without leaking profile-specific env keys process-wide. +- Background profile workers now route broad runtime env through thread-local state and mirror it into process env for the worker body, preserving `hermes_cli.config.load_config()` compatibility plus provider credential readers that still call `os.getenv()` directly, then restoring prior env values after the worker exits. ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) diff --git a/api/profiles.py b/api/profiles.py index e637dcb7..763654e9 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -716,20 +716,23 @@ def profile_env_for_background_worker( thread_env = dict(runtime_env) thread_env["HERMES_HOME"] = str(profile_home_path) # Hybrid profile routing: keep the broad runtime env in WebUI's thread-local - # channel so provider/API-key overrides do not leak through process-global - # os.environ, but still set HERMES_HOME under the narrow _ENV_LOCK because - # hermes_cli.config.load_config() currently resolves homes from os.environ. - # Do not hold _ENV_LOCK across the worker body. + # channel for WebUI helpers, and also mirror it into process env for the + # worker body because several production Hermes readers still call + # os.getenv() directly for provider credentials. Keep the _ENV_LOCK scope + # narrow: serialize only setup/restore, not the whole worker body. skill_home_snapshot = None + old_runtime_env: dict[str, Optional[str]] = {} old_hermes_home = None had_hermes_home = False previous_thread_env = getattr(_thread_ctx, "env", {}).copy() try: _set_thread_env(**thread_env) with _ENV_LOCK: + old_runtime_env = {key: os.environ.get(key) for key in runtime_env} had_hermes_home = "HERMES_HOME" in os.environ old_hermes_home = os.environ.get("HERMES_HOME") skill_home_snapshot = snapshot_skill_home_modules() + os.environ.update(runtime_env) os.environ["HERMES_HOME"] = str(profile_home_path) try: patch_skill_home_modules(profile_home_path) @@ -747,6 +750,11 @@ def profile_env_for_background_worker( else: _clear_thread_env() with _ENV_LOCK: + for key, old_value in old_runtime_env.items(): + if old_value is None: + os.environ.pop(key, None) + else: + os.environ[key] = old_value if had_hermes_home: os.environ["HERMES_HOME"] = old_hermes_home or "" else: diff --git a/tests/test_sprint46.py b/tests/test_sprint46.py index cffe73e2..1b0856a5 100644 --- a/tests/test_sprint46.py +++ b/tests/test_sprint46.py @@ -465,7 +465,7 @@ def test_manual_compress_worker_uses_session_profile_env(monkeypatch, tmp_path, assert EnvAssertingAgent.seen_env == { "HERMES_HOME": str(profile_home), - "HERMES_TEST_PROFILE_ENV": None, + "HERMES_TEST_PROFILE_ENV": "work-runtime", "THREAD_HERMES_HOME": str(profile_home), "THREAD_HERMES_TEST_PROFILE_ENV": "work-runtime", "SKILL_MODULE_HOME": profile_home, diff --git a/tests/test_title_aux_routing.py b/tests/test_title_aux_routing.py index 0acffbbb..c9a1552d 100644 --- a/tests/test_title_aux_routing.py +++ b/tests/test_title_aux_routing.py @@ -502,8 +502,8 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): self.assertEqual(getattr(fake_skill_module, 'SKILLS_DIR'), 'default-home/skills') self.assertEqual(mock_session.title, 'Profile Routed Title') - def test_background_profile_env_routes_load_config_without_process_env_leak(self): - """Hybrid worker env must satisfy hermes_cli.load_config without leaking profile env keys.""" + def test_background_profile_env_routes_load_config_and_provider_credentials(self): + """Hybrid worker env must satisfy config and os.getenv provider-key readers.""" import tempfile import pytest @@ -529,8 +529,12 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): f.write('model:\n provider: profile-provider\n default: profile-model\n') with patch('api.profiles.get_hermes_home_for_profile', return_value=profile_home): - with patch('api.profiles.get_profile_runtime_env', return_value={'PROFILE_ONLY_KEY': 'profile-only'}): - with patch.dict(os.environ, {'HERMES_HOME': default_home}, clear=False): + runtime_env = { + 'PROFILE_ONLY_KEY': 'profile-only', + 'OPENROUTER_API_KEY': 'profile-openrouter-key', + } + with patch('api.profiles.get_profile_runtime_env', return_value=runtime_env): + with patch.dict(os.environ, {'HERMES_HOME': default_home, 'OPENROUTER_API_KEY': 'default-openrouter-key'}, clear=False): os.environ.pop('PROFILE_ONLY_KEY', None) hermes_config._LOAD_CONFIG_CACHE.clear() with profiles.profile_env_for_background_worker(session, 'background title'): @@ -538,19 +542,23 @@ class TestBackgroundTitleProfileRouting(unittest.TestCase): captured['loaded_provider'] = loaded.get('model', {}).get('provider') captured['process_home'] = os.environ.get('HERMES_HOME') captured['process_runtime_key'] = os.environ.get('PROFILE_ONLY_KEY') + captured['provider_credential'] = os.getenv('OPENROUTER_API_KEY') captured['thread_home'] = getattr(_thread_ctx, 'env', {}).get('HERMES_HOME') captured['thread_runtime_key'] = getattr(_thread_ctx, 'env', {}).get('PROFILE_ONLY_KEY') captured['restored_home'] = os.environ.get('HERMES_HOME') captured['restored_runtime_key'] = os.environ.get('PROFILE_ONLY_KEY') + captured['restored_provider_credential'] = os.environ.get('OPENROUTER_API_KEY') hermes_config._LOAD_CONFIG_CACHE.clear() self.assertEqual(captured['loaded_provider'], 'profile-provider') self.assertEqual(captured['process_home'], profile_home) - self.assertIsNone(captured['process_runtime_key']) + self.assertEqual(captured['process_runtime_key'], 'profile-only') + self.assertEqual(captured['provider_credential'], 'profile-openrouter-key') self.assertEqual(captured['thread_home'], profile_home) self.assertEqual(captured['thread_runtime_key'], 'profile-only') self.assertEqual(captured['restored_home'], default_home) self.assertIsNone(captured['restored_runtime_key']) + self.assertEqual(captured['restored_provider_credential'], 'default-openrouter-key') class TestAuxTitleTimeoutEdgeCases(unittest.TestCase): diff --git a/tests/test_update_banner_fixes.py b/tests/test_update_banner_fixes.py index 210a1806..1750668c 100644 --- a/tests/test_update_banner_fixes.py +++ b/tests/test_update_banner_fixes.py @@ -569,13 +569,13 @@ class TestUpdateSummaryRouteModelSelection: assert captured['aux_task'] == 'compression' assert captured['model_resolution_env'] == { 'HERMES_HOME': str(profile_home), - 'HERMES_TEST_PROFILE_ENV': 'default-runtime', + 'HERMES_TEST_PROFILE_ENV': 'work-runtime', 'THREAD_HERMES_HOME': str(profile_home), 'THREAD_HERMES_TEST_PROFILE_ENV': 'work-runtime', } assert captured['aux_env'] == { 'HERMES_HOME': str(profile_home), - 'HERMES_TEST_PROFILE_ENV': 'default-runtime', + 'HERMES_TEST_PROFILE_ENV': 'work-runtime', 'THREAD_HERMES_HOME': str(profile_home), 'THREAD_HERMES_TEST_PROFILE_ENV': 'work-runtime', 'SKILL_MODULE_HOME': profile_home, From 3cbe206832047389cf243c57ef788bc0931fcbbd Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 02:12:11 -0700 Subject: [PATCH 05/15] fix: keep markdown tables block-level --- CHANGELOG.md | 4 ++++ static/ui.js | 7 +++++-- tests/test_renderer_js_behaviour.py | 31 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..ae569eec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2375** by @Michaelyklam (closes #2374) — Markdown tables rendered by chat messages now stay as block-level `` elements instead of being wrapped in paragraph tags by the renderer's final paragraph pass. This keeps CommonMark-style pipe tables visible as tables across browsers. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/static/ui.js b/static/ui.js index c050811c..253dd86a 100644 --- a/static/ui.js +++ b/static/ui.js @@ -2614,7 +2614,10 @@ function renderMd(raw){ const parseHeader=r=>r.trim().replace(/^\|/,'').replace(/\|$/,'').split('|').map(c=>``).join(''); const header=`${parseHeader(rows[0])}`; const body=rows.slice(2).map(r=>`${parseRow(r)}`).join(''); - return `
${inlineMd(c.trim())}
${header}${body}
`; + // Surround with blank lines so the final paragraph splitter treats the + // generated table as its own block even when the regex consumes one of the + // markdown block's trailing newlines. + return `\n\n${header}${body}
\n\n`; }); // #487: Outer image pass — handles ![alt](url) in plain paragraphs (outside tables/lists). // Runs AFTER the table pass (images in table cells are handled by inlineMd() above). @@ -2757,7 +2760,7 @@ function renderMd(raw){ return '\x00E'+(_pre_stash.length-1)+'\x00'; }); const parts=s.split(/\n{2,}/); - s=parts.map(p=>{p=p.trim();if(!p)return '';if(/^<(h[1-6]|ul|ol|pre|hr|blockquote)|^\x00[EQ]/.test(p))return p;return `

${p.replace(/\n/g,'
')}

`;}).join('\n'); + s=parts.map(p=>{p=p.trim();if(!p)return '';if(/^<(h[1-6]|ul|ol|table|pre|hr|blockquote)|^\x00[EQ]/.test(p))return p;return `

${p.replace(/\n/g,'
')}

`;}).join('\n'); s=s.replace(/\x00E(\d+)\x00/g,(_,i)=>_pre_stash[+i]); // ── Restore MEDIA stash → inline images or download links ───────────────── s=s.replace(/\x00D(\d+)\x00/g,(_,i)=>{ diff --git a/tests/test_renderer_js_behaviour.py b/tests/test_renderer_js_behaviour.py index 22a831b7..509b1d72 100644 --- a/tests/test_renderer_js_behaviour.py +++ b/tests/test_renderer_js_behaviour.py @@ -187,6 +187,37 @@ class TestRendererSanitization: class TestCommonLLMShapes: + def test_commonmark_table_is_not_wrapped_in_paragraph(self, driver_path): + src = ( + "| 升级时段 | 人数 |\n" + "|---------|------|\n" + "| 5/15(发布当天) | ~30 人 |\n" + "| 5/16(今天) | ~10 人 |" + ) + out = _render(driver_path, src) + assert "" in out + assert "" in out + assert "" in out + assert "" in out + assert "

Before the table.

" in out + assert "
升级时段5/15(发布当天)~10 人
" in out + assert "

After the table.

" in out + assert "

" not in out + def test_strikethrough_outside_quote(self, driver_path): out = _render(driver_path, "This was ~~outdated~~ but is now fine.") assert "outdated" in out From 574ee36460d5563c4394aa93492e2b7121464385 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 17:52:35 +0800 Subject: [PATCH 06/15] Add run journal replay timeline parity checks --- CHANGELOG.md | 4 ++ .../webui-run-state-consistency-contract.md | 8 ++- tests/test_run_journal_frontend_static.py | 57 +++++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..79d0f0f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- **PR TBD** by @franksong2702 (refs #2376, refs #1925, refs #2283, refs #2361, refs #2363) — Adds a focused run-journal replay parity check for long tool-heavy turns. The frontend regression coverage now pins that replayed `reasoning`, `interim_assistant`, `tool`, `tool_complete`, `compressing`, `compressed`, `metering`, and terminal events enter the same EventSource timeline handlers as live streaming, and that each user-visible long-task event advances the replay cursor to avoid duplicate replay. The run-state consistency RFC now calls out live/replay timeline parity explicitly. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/docs/rfcs/webui-run-state-consistency-contract.md b/docs/rfcs/webui-run-state-consistency-contract.md index 96d9ff3b..b3329a25 100644 --- a/docs/rfcs/webui-run-state-consistency-contract.md +++ b/docs/rfcs/webui-run-state-consistency-contract.md @@ -78,7 +78,10 @@ while WebUI still has multiple overlapping state stores. assistant just acted. 5. **Replay is idempotent.** Replaying a run from a cursor must not duplicate transcript rows, thinking content, interim assistant text, tool cards, or - compression cards. + compression cards. Replayed long-task events should enter the same + browser-facing timeline renderer as live SSE events so recovery does not + downgrade a structured Thinking / progress / tool / compression turn into a + separate flattened presentation. 6. **Compression is not current intent.** Automatic compression summaries and reference cards are recovery/handoff material. They must not be treated as a new user request, active-turn content, or the default visible explanation for @@ -102,6 +105,8 @@ context reconstruction, or session metadata: - What happens after browser refresh, session switch, SSE reconnect, and WebUI restart? - Does replay rebuild the same scene without duplicates? +- Does replay use the same timeline-rendering path as live SSE for thinking, + interim assistant text, tool cards, compression cards, and terminal states? - Can this change move a session in the sidebar without meaningful user or assistant activity? - Can automatic compression or recovery text become visible active-turn content? @@ -147,4 +152,3 @@ The two documents should be read together: 4. If #1925 introduces a new adapter-backed runtime layer, update this RFC or replace it with the accepted implementation contract so these invariants do not live only in historical discussion. - diff --git a/tests/test_run_journal_frontend_static.py b/tests/test_run_journal_frontend_static.py index aaee3048..02d1b1e5 100644 --- a/tests/test_run_journal_frontend_static.py +++ b/tests/test_run_journal_frontend_static.py @@ -35,3 +35,60 @@ def test_frontend_replay_cursor_uses_eventsource_last_event_id(): assert "source.addEventListener(_runJournalEventName,_rememberRunJournalCursor)" in MESSAGES_SRC assert "after_seq=${encodeURIComponent(String(_runJournalReplayAfterSeq()))}" in MESSAGES_SRC assert "after_seq=0" not in MESSAGES_SRC + + +def test_replayed_long_task_events_enter_the_same_live_timeline_handlers(): + """Run-journal replay must not grow a parallel long-task renderer. + + The run-state consistency contract depends on replayed journal events + flowing through the same EventSource handlers as live streams. Otherwise a + live long task can render as Thinking -> progress text -> tool cards, while + the same journaled event sequence replays as a flattened or reordered scene. + """ + wire_pos = MESSAGES_SRC.index("function _wireSSE(source)") + wire_block = MESSAGES_SRC[wire_pos : MESSAGES_SRC.index("async function _restoreSettledSession", wire_pos)] + replay_events = [ + "reasoning", + "interim_assistant", + "tool", + "tool_complete", + "compressing", + "compressed", + "metering", + "done", + "apperror", + ] + + for event_name in replay_events: + assert f"source.addEventListener('{event_name}'" in wire_block, ( + f"{event_name} must be handled by the shared live/replay SSE pipeline" + ) + + assert "updateThinking(" in wire_block, "reasoning replay should use the live Thinking card path" + assert "appendLiveToolCard(tc)" in wire_block, "tool replay should use live tool-card rendering" + assert "setCompressionUi({" in wire_block, "compression replay should use the compression card path" + assert "_runJournalReplayParams()" in MESSAGES_SRC, "replay attachments should enter _wireSSE via EventSource" + + +def test_run_journal_cursor_tracks_every_long_task_timeline_event(): + """Every user-visible long-task event needs cursor tracking for parity replay.""" + cursor_loop_pos = MESSAGES_SRC.index("for(const _runJournalEventName of [") + cursor_loop = MESSAGES_SRC[cursor_loop_pos : cursor_loop_pos + 700] + timeline_events = [ + "token", + "interim_assistant", + "reasoning", + "tool", + "tool_complete", + "compressing", + "compressed", + "metering", + "done", + "apperror", + "cancel", + ] + + for event_name in timeline_events: + assert f"'{event_name}'" in cursor_loop, ( + f"{event_name} must advance the replay cursor to avoid duplicate timeline replay" + ) From 962b3840e6604b71b2c054a7db325caa218bc63f Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 03:54:28 -0700 Subject: [PATCH 07/15] fix: strip historical images in text mode --- CHANGELOG.md | 4 +++ api/streaming.py | 42 +++++++++++++++++++++++--- tests/test_native_image_attachments.py | 36 ++++++++++++++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..3e582476 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2378** by @Michaelyklam (closes #2297) — Text-mode image handling now strips historical native `image_url` parts from provider-facing conversation replay. Current-turn uploads already respected `agent.image_input_mode: text`; this closes the remaining gap where an older image in the saved transcript could keep making text-only providers such as DeepSeek reject every later turn with `unknown variant image_url, expected text`. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/streaming.py b/api/streaming.py index 18a32fc2..7ef8dc4c 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1831,7 +1831,32 @@ def _maybe_schedule_title_refresh(session, put_event, agent): ).start() -def _sanitize_messages_for_api(messages): +def _strip_native_image_parts_from_content(content): + """Return provider-safe content with native image parts removed. + + Text-only provider endpoints (for example DeepSeek/OpenAI-compatible text + models) reject historical OpenAI-style ``image_url`` parts before the agent + can recover. When WebUI is configured for text-mode image handling, preserve + textual content from mixed content arrays and drop only the native image + blocks from replayed history. + """ + if not isinstance(content, list): + return content + clean_parts = [] + for part in content: + if not isinstance(part, dict): + continue + if part.get('type') == 'image_url' or 'image_url' in part: + continue + clean_parts.append(copy.deepcopy(part)) + if not clean_parts: + return '' + if len(clean_parts) == 1 and clean_parts[0].get('type') == 'text': + return str(clean_parts[0].get('text') or '') + return clean_parts + + +def _sanitize_messages_for_api(messages, *, cfg: dict = None): """Return a deep copy of messages with only API-safe fields. The webui stores extra metadata on messages (attachments, timestamp, _ts) @@ -1843,7 +1868,14 @@ def _sanitize_messages_for_api(messages): (Mercury-2/Inception, newer OpenAI models) reject histories containing dangling tool results with a 400 error: "Message has tool role, but there was no previous assistant message with a tool call." + + If ``agent.image_input_mode`` resolves to ``text``, native historical + ``image_url`` content parts are stripped too. Current-turn uploads already + respect text mode in ``_build_native_multimodal_message``; this closes the + remaining replay gap where an older native image in the saved transcript kept + causing 400s on every later text-only turn (#2297). """ + strip_native_images = cfg is not None and _resolve_image_input_mode(cfg) == "text" # First pass: collect all tool_call_ids declared by assistant messages. # Handles both OpenAI ('id') and Anthropic ('call_id') field names. valid_tool_call_ids: set = set() @@ -1872,6 +1904,8 @@ def _sanitize_messages_for_api(messages): # Orphaned tool result — skip to avoid 400 from strict providers. continue sanitized = {k: v for k, v in msg.items() if k in _API_SAFE_MSG_KEYS} + if strip_native_images and 'content' in sanitized: + sanitized['content'] = _strip_native_image_parts_from_content(sanitized.get('content')) if sanitized.get('role'): clean.append(sanitized) return clean @@ -3515,7 +3549,7 @@ def _run_agent_streaming( result = agent.run_conversation( user_message=user_message, system_message=workspace_system_msg, - conversation_history=_sanitize_messages_for_api(_previous_context_messages), + conversation_history=_sanitize_messages_for_api(_previous_context_messages, cfg=_cfg), task_id=session_id, persist_user_message=msg_text, ) @@ -3726,7 +3760,7 @@ def _run_agent_streaming( _heal_result = agent.run_conversation( user_message=user_message, system_message=workspace_system_msg, - conversation_history=_sanitize_messages_for_api(_previous_context_messages), + conversation_history=_sanitize_messages_for_api(_previous_context_messages, cfg=_cfg), task_id=session_id, persist_user_message=msg_text, ) @@ -4505,7 +4539,7 @@ def _run_agent_streaming( _heal_result = _heal_agent.run_conversation( user_message=user_message, system_message=workspace_system_msg, - conversation_history=_sanitize_messages_for_api(_previous_context_messages), + conversation_history=_sanitize_messages_for_api(_previous_context_messages, cfg=_cfg), task_id=session_id, persist_user_message=msg_text, ) diff --git a/tests/test_native_image_attachments.py b/tests/test_native_image_attachments.py index f6b04166..8c38aea5 100644 --- a/tests/test_native_image_attachments.py +++ b/tests/test_native_image_attachments.py @@ -17,6 +17,7 @@ from api.streaming import ( _attachment_name, _build_native_multimodal_message, _NATIVE_IMAGE_MAX_BYTES, + _sanitize_messages_for_api, ) from api.routes import _normalize_chat_attachments @@ -318,6 +319,41 @@ class TestBuildNativeMultimodalMessage: assert data_url.startswith('data:image/png;base64,') assert len(result) == 2 + def test_text_image_mode_strips_historical_image_url_parts(self): + """#2297: text-only providers must not replay old native image parts.""" + history = [ + { + 'role': 'user', + 'content': [ + {'type': 'text', 'text': 'what is in this image?'}, + {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,AAA='}}, + ], + 'attachments': [{'name': 'photo.png'}], + 'timestamp': 123, + }, + {'role': 'assistant', 'content': 'It is a chart.'}, + ] + cfg = {'agent': {'image_input_mode': 'text'}} + + sanitized = _sanitize_messages_for_api(history, cfg=cfg) + + assert sanitized[0] == {'role': 'user', 'content': 'what is in this image?'} + assert 'image_url' not in str(sanitized) + assert 'attachments' not in sanitized[0] + assert sanitized[1] == {'role': 'assistant', 'content': 'It is a chart.'} + + def test_native_image_mode_keeps_historical_image_url_parts(self): + """Vision-capable/native mode keeps existing multimodal history intact.""" + content = [ + {'type': 'text', 'text': 'describe'}, + {'type': 'image_url', 'image_url': {'url': 'data:image/png;base64,AAA='}}, + ] + cfg = {'agent': {'image_input_mode': 'native'}} + + sanitized = _sanitize_messages_for_api([{'role': 'user', 'content': content}], cfg=cfg) + + assert sanitized == [{'role': 'user', 'content': content}] + def test_fake_png_rejected_by_magic_bytes(self): """A file named .png that is not actually an image must be rejected.""" with TemporaryDirectory() as d: From 2284d42695311213b149d0eeda023f3966d007b8 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 04:31:16 -0700 Subject: [PATCH 08/15] fix: interpolate German session time labels --- CHANGELOG.md | 4 ++++ static/i18n.js | 6 +++--- tests/test_session_sidebar_relative_time.py | 9 +++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..6434724c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2381** by @Michaelyklam (fixes #2379) — German relative session-time labels now interpolate the elapsed value instead of rendering the literal `{n}` placeholder in the sidebar/header. The German locale now uses function-valued translations for minutes, hours, and days, matching the other locale bundles. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/static/i18n.js b/static/i18n.js index 30c497f1..71860fd6 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -6581,9 +6581,9 @@ const LOCALES = { session_toolsets_cleared: 'Toolsets cleared — using global config', // TODO: translate session_toolsets_failed: 'Failed to update toolsets: ', // TODO: translate session_time_unknown: 'Unbekannt', - session_time_minutes_ago: 'Vor {n} Minuten', - session_time_hours_ago: 'Vor {n} Stunden', - session_time_days_ago: 'Vor {n} Tagen', + session_time_minutes_ago: (n) => `Vor ${n} Minuten`, + session_time_hours_ago: (n) => `Vor ${n} Stunden`, + session_time_days_ago: (n) => `Vor ${n} Tagen`, session_time_last_week: 'Letzte Woche', session_time_bucket_today: 'Heute', session_time_bucket_yesterday: 'Gestern', diff --git a/tests/test_session_sidebar_relative_time.py b/tests/test_session_sidebar_relative_time.py index 5aa95be8..84697dec 100644 --- a/tests/test_session_sidebar_relative_time.py +++ b/tests/test_session_sidebar_relative_time.py @@ -169,3 +169,12 @@ def test_relative_time_strings_are_localized_in_english_and_spanish_bundles(): "session_time_bucket_older", ): assert key in I18N_JS + + +def test_german_relative_time_translations_interpolate_numbers(): + assert "session_time_minutes_ago: (n) => `Vor ${n} Minuten`" in I18N_JS + assert "session_time_hours_ago: (n) => `Vor ${n} Stunden`" in I18N_JS + assert "session_time_days_ago: (n) => `Vor ${n} Tagen`" in I18N_JS + assert "session_time_minutes_ago: 'Vor {n} Minuten'" not in I18N_JS + assert "session_time_hours_ago: 'Vor {n} Stunden'" not in I18N_JS + assert "session_time_days_ago: 'Vor {n} Tagen'" not in I18N_JS From 11796fe7a87a187e54501f4e5d794dda18b41c3b Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 20:05:19 +0800 Subject: [PATCH 09/15] Avoid magic cursor-loop test window --- tests/test_run_journal_frontend_static.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_run_journal_frontend_static.py b/tests/test_run_journal_frontend_static.py index 02d1b1e5..773b6618 100644 --- a/tests/test_run_journal_frontend_static.py +++ b/tests/test_run_journal_frontend_static.py @@ -73,7 +73,7 @@ def test_replayed_long_task_events_enter_the_same_live_timeline_handlers(): def test_run_journal_cursor_tracks_every_long_task_timeline_event(): """Every user-visible long-task event needs cursor tracking for parity replay.""" cursor_loop_pos = MESSAGES_SRC.index("for(const _runJournalEventName of [") - cursor_loop = MESSAGES_SRC[cursor_loop_pos : cursor_loop_pos + 700] + cursor_loop = MESSAGES_SRC[cursor_loop_pos : MESSAGES_SRC.index("]", cursor_loop_pos)] timeline_events = [ "token", "interim_assistant", From c415c843dfd77ddb859838d5f30ada1dcc6958c7 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 20:05:47 +0800 Subject: [PATCH 10/15] Update interrupted recovery comment wording --- api/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/models.py b/api/models.py index 0f6397c3..e0e52004 100644 --- a/api/models.py +++ b/api/models.py @@ -815,7 +815,7 @@ def _apply_core_sync_or_error_marker( # pending_user_message and STREAMS.pop(stream_id). Without this guard, any # fast turn (e.g. command approval) that exits the thread before the on-disk # pending clear has flushed gets misdiagnosed as a crashed turn, producing a -# spurious "Previous turn did not complete." marker. +# spurious "Response interrupted." marker. # # 30s covers the worst-case post-loop persistence window: LLM finishing a tool # batch + lock contention with the checkpoint thread + a multi-MB session.save. From e4dad1c25de1174fb2eeee89abd2e96235c6d531 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 05:08:02 -0700 Subject: [PATCH 11/15] fix: serve raw chat attachments from inbox --- CHANGELOG.md | 4 ++++ api/routes.py | 28 ++++++++++++++++++++++++++-- tests/test_sprint2.py | 2 +- tests/test_sprint6.py | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..2e26fa92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2382** by @Michaelyklam (closes #2380) — `api/file/raw` now falls back to the requesting session's attachment inbox when a chat-upload filename is not present in the workspace. This keeps existing `api/file/raw?session_id=...&path=` image URLs working after uploads moved under `~/.hermes/webui/attachments//`, while preserving traversal protection and cross-session isolation. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/routes.py b/api/routes.py index 3ad444e9..d6697058 100644 --- a/api/routes.py +++ b/api/routes.py @@ -6360,10 +6360,34 @@ def _handle_media(handler, parsed): or html_inline_ok ) ) else "attachment" + # _serve_file_bytes sends Content-Security-Policy when csp is set. csp = "sandbox allow-scripts" if html_inline_ok else None return _serve_file_bytes(handler, target, mime, disposition, "private, max-age=3600", csp=csp) +def _file_raw_target(session, sid: str, rel: str) -> Path | None: + """Resolve /api/file/raw paths from the workspace or this session's uploads.""" + try: + target = safe_resolve(Path(session.workspace), rel) + except ValueError: + target = None + if target and target.exists() and target.is_file(): + return target + + # Chat uploads now live in a per-session attachment inbox outside the + # workspace. Keep the public URL stable while scoping fallback lookup to + # the requesting session's own attachment directory. + try: + from api.upload import _session_attachment_dir + + attachment_target = safe_resolve(_session_attachment_dir(sid), rel) + except Exception: + return None + if attachment_target.exists() and attachment_target.is_file(): + return attachment_target + return None + + def _handle_file_raw(handler, parsed): qs = parse_qs(parsed.query) sid = qs.get("session_id", [""])[0] @@ -6375,8 +6399,8 @@ def _handle_file_raw(handler, parsed): return bad(handler, "Session not found", 404) rel = qs.get("path", [""])[0] force_download = qs.get("download", [""])[0] == "1" - target = safe_resolve(Path(s.workspace), rel) - if not target.exists() or not target.is_file(): + target = _file_raw_target(s, sid, rel) + if target is None: return j(handler, {"error": "not found"}, status=404) ext = target.suffix.lower() mime = MIME_MAP.get(ext, "application/octet-stream") diff --git a/tests/test_sprint2.py b/tests/test_sprint2.py index aa5c5f2a..efb2085b 100644 --- a/tests/test_sprint2.py +++ b/tests/test_sprint2.py @@ -67,7 +67,7 @@ def test_raw_endpoint_path_traversal_blocked(cleanup_test_sessions): get_raw(f"/api/file/raw?session_id={sid}&path=../../etc/passwd") assert False except urllib.error.HTTPError as e: - assert e.code in (400, 500) + assert e.code in (400, 404, 500) def test_raw_endpoint_missing_file_returns_404(cleanup_test_sessions): sid, _ = make_session_tracked(cleanup_test_sessions) diff --git a/tests/test_sprint6.py b/tests/test_sprint6.py index 6c2ba1df..fcfbbe1e 100644 --- a/tests/test_sprint6.py +++ b/tests/test_sprint6.py @@ -1,5 +1,5 @@ """Sprint 6 tests: Escape from editor, Phase D validation, HTML extraction, cron create, session export.""" -import json, uuid, pathlib, urllib.request, urllib.error +import json, uuid, pathlib, urllib.parse, urllib.request, urllib.error REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() from tests._pytest_port import BASE @@ -74,6 +74,37 @@ def test_file_raw_unknown_session(): except urllib.error.HTTPError as e: assert e.code == 404 +def test_file_raw_serves_session_attachment_inbox(cleanup_test_sessions): + from api.upload import _session_attachment_dir + + sid, workspace = make_session_tracked(cleanup_test_sessions) + filename = f"uploaded-chat-image-{uuid.uuid4().hex}.png" + attachment_dir = _session_attachment_dir(sid) + attachment_dir.mkdir(parents=True, exist_ok=True) + payload = b"fake-png-bytes" + (attachment_dir / filename).write_bytes(payload) + + assert not (workspace / filename).exists(), "regression must exercise attachment fallback" + raw, headers, status = get_raw( + f"/api/file/raw?session_id={sid}&path={urllib.parse.quote(filename)}" + ) + assert status == 200 + assert raw == payload + assert "image/png" in headers.get("Content-Type", "") + +def test_file_raw_attachment_fallback_rejects_traversal(cleanup_test_sessions): + from api.upload import _session_attachment_dir + + sid, _ = make_session_tracked(cleanup_test_sessions) + attachment_dir = _session_attachment_dir(sid) + attachment_dir.mkdir(parents=True, exist_ok=True) + (attachment_dir / "safe.txt").write_text("safe", encoding="utf-8") + try: + get_raw(f"/api/file/raw?session_id={sid}&path={urllib.parse.quote('../../safe.txt')}") + assert False, "Expected 404" + except urllib.error.HTTPError as e: + assert e.code == 404 + # ── Cron create ── def test_cron_create_requires_prompt(): From 4899ae17b9dcf8296f0b4fc142c44a455bc8593b Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 20:58:44 +0800 Subject: [PATCH 12/15] Keep fuller compression snapshots reachable --- CHANGELOG.md | 4 ++ api/models.py | 109 +++++++++++++++++++++++++++++++++++- tests/test_session_index.py | 38 +++++++++++++ 3 files changed, 150 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..bd77735c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Sidebar session lists now keep the fuller transcript reachable when an auto-compression snapshot has more messages than the visible continuation segment. Ordinary pre-compression snapshots remain hidden, but if hiding the snapshot would leave the user on a shorter segment, the sidebar prefers the fuller row so recent conversation content does not appear to vanish. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/models.py b/api/models.py index f448f7a3..fc85c75b 100644 --- a/api/models.py +++ b/api/models.py @@ -1012,7 +1012,110 @@ def _hide_from_default_sidebar(session: dict) -> bool: """Return True for internal/background sessions hidden from the default list.""" sid = str(session.get('session_id') or '') source = session.get('source_tag') or session.get('source') - return bool(session.get('pre_compression_snapshot')) or source == 'cron' or sid.startswith('cron_') + if source == 'cron' or sid.startswith('cron_'): + return True + if bool(session.get('pre_compression_snapshot')): + return not bool(session.get('_show_pre_compression_snapshot')) + return False + + +def _sidebar_message_count(session: dict) -> int: + for key in ('message_count', 'actual_message_count'): + try: + value = int(session.get(key) or 0) + except (TypeError, ValueError): + value = 0 + if value > 0: + return value + return 0 + + +def _sidebar_lineage_root_id(session: dict, sessions_by_id: dict[str, dict]) -> str: + sid = str(session.get('session_id') or '') + root = sid + parent = session.get('parent_session_id') + seen = {sid} + while parent and parent not in seen and parent in sessions_by_id: + root = str(parent) + seen.add(root) + parent = sessions_by_id.get(root, {}).get('parent_session_id') + return root + + +def _has_live_sidebar_state(session: dict) -> bool: + return bool( + session.get('active_stream_id') + or session.get('has_pending_user_message') + or session.get('pending_user_message') + ) + + +def _prefer_fuller_snapshots_for_sidebar(sessions: list[dict]) -> list[dict]: + """Expose a hidden snapshot when it is the fuller transcript for a lineage. + + Pre-compression snapshots are normally hidden so archived compression + segments do not duplicate the current continuation in the sidebar. If a + snapshot row has more messages than the visible continuation for the same + lineage, hiding it makes the conversation look truncated. In that case, + show the fuller snapshot and suppress the shorter inactive continuation. + """ + sessions_by_id = { + str(session.get('session_id')): session + for session in sessions + if session.get('session_id') + } + groups: dict[str, list[dict]] = {} + for session in sessions: + sid = str(session.get('session_id') or '') + source = session.get('source_tag') or session.get('source') + if source == 'cron' or sid.startswith('cron_'): + continue + root = _sidebar_lineage_root_id(session, sessions_by_id) + groups.setdefault(root, []).append(session) + + snapshot_ids_to_show: set[str] = set() + continuation_ids_to_hide: set[str] = set() + for group in groups.values(): + visible = [session for session in group if not session.get('pre_compression_snapshot')] + snapshots = [session for session in group if session.get('pre_compression_snapshot')] + if not visible or not snapshots: + continue + if any(_has_live_sidebar_state(session) for session in visible): + continue + + best_visible_count = max(_sidebar_message_count(session) for session in visible) + best_snapshot = max( + snapshots, + key=lambda session: (_sidebar_message_count(session), _session_sort_timestamp(session)), + ) + if _sidebar_message_count(best_snapshot) <= best_visible_count: + continue + + snapshot_ids_to_show.add(str(best_snapshot.get('session_id'))) + continuation_ids_to_hide.update( + str(session.get('session_id')) + for session in visible + if session.get('session_id') + ) + + if not snapshot_ids_to_show and not continuation_ids_to_hide: + return sessions + + out = [] + for session in sessions: + sid = str(session.get('session_id') or '') + if sid in continuation_ids_to_hide: + continue + if sid in snapshot_ids_to_show: + session = dict(session) + session['_show_pre_compression_snapshot'] = True + out.append(session) + return out + + +def _strip_sidebar_internal_flags(sessions: list[dict]) -> None: + for session in sessions: + session.pop('_show_pre_compression_snapshot', None) def _active_state_db_path() -> Path: @@ -1131,7 +1234,9 @@ def all_sessions(diag=None): and not s.get('has_pending_user_message') and not s.get('worktree_path') )] + result = _prefer_fuller_snapshots_for_sidebar(result) result = [s for s in result if not _hide_from_default_sidebar(s)] + _strip_sidebar_internal_flags(result) # Backfill: sessions created before Sprint 22 have no profile tag. # Attribute them to 'default' so the client profile filter works correctly. for s in result: @@ -1167,7 +1272,9 @@ def all_sessions(diag=None): and not s.pending_user_message and not getattr(s, 'worktree_path', None) )] + result = _prefer_fuller_snapshots_for_sidebar(result) result = [s for s in result if not _hide_from_default_sidebar(s)] + _strip_sidebar_internal_flags(result) for s in result: if not s.get('profile'): s['profile'] = 'default' diff --git a/tests/test_session_index.py b/tests/test_session_index.py index 6a9b5fca..6944da84 100644 --- a/tests/test_session_index.py +++ b/tests/test_session_index.py @@ -369,6 +369,44 @@ def test_pre_compression_snapshot_hidden_from_active_sidebar_but_file_remains(mo assert [row["session_id"] for row in rows] == ["new_sid"] +def test_fuller_pre_compression_snapshot_replaces_shorter_visible_segment(monkeypatch): + """If the hidden snapshot has the fuller transcript, keep it reachable. + + Auto-compression can leave a visible continuation segment in the sidebar + while the fuller transcript remains on disk marked as a pre-compression + snapshot. In that case the default session list should prefer the fuller + transcript so the conversation does not look like recent messages vanished. + """ + snapshot = Session( + session_id="full_parent", + title="Long Conversation", + messages=[ + {"role": "user", "content": "first"}, + {"role": "assistant", "content": "second"}, + {"role": "user", "content": "latest user"}, + {"role": "assistant", "content": "latest answer"}, + ], + pre_compression_snapshot=True, + updated_at=300.0, + ) + continuation = Session( + session_id="short_child", + title="Long Conversation", + messages=[{"role": "user", "content": "first"}], + parent_session_id="full_parent", + updated_at=400.0, + ) + snapshot.save() + continuation.save() + monkeypatch.setattr(models, "_enrich_sidebar_lineage_metadata", lambda _sessions: None) + + rows = models.all_sessions() + + assert [row["session_id"] for row in rows] == ["full_parent"] + assert rows[0]["message_count"] == 4 + assert rows[0]["pre_compression_snapshot"] is True + + def test_session_save_does_not_persist_metadata_message_count_hint(): s = Session( session_id="sess_private_hint", From 3bb8c7b276422138e9d6c3adfd9d3f02638b7622 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 07:31:03 -0700 Subject: [PATCH 13/15] fix: guard localStorage quota writes --- CHANGELOG.md | 4 +++ static/boot.js | 2 +- static/commands.js | 2 +- static/messages.js | 4 +-- static/sessions.js | 4 +-- tests/test_issue2386_localstorage_quota.py | 34 ++++++++++++++++++++++ 6 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 tests/test_issue2386_localstorage_quota.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..88849105 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2387** by @Michaelyklam (closes #2386) — Active-session and workspace-panel `localStorage` writes now degrade gracefully when browser storage quota is exhausted. The missed session/panel persistence writes now match the existing best-effort storage pattern so clicking sessions, creating chats, completing streams, and toggling the workspace panel no longer throw uncaught `QuotaExceededError` exceptions. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/static/boot.js b/static/boot.js index 492b87e0..617bc171 100644 --- a/static/boot.js +++ b/static/boot.js @@ -101,7 +101,7 @@ function _setWorkspacePanelMode(mode){ // Persist open/closed across refreshes (browse/preview → open; closed → closed) // Do NOT overwrite the user's "keep open" preference — only track runtime state // so that toggleWorkspacePanel(false) from the toolbar doesn't clear the setting. - localStorage.setItem('hermes-webui-workspace-panel', open ? 'open' : 'closed'); + try{localStorage.setItem('hermes-webui-workspace-panel', open ? 'open' : 'closed');}catch(_){} layout.classList.toggle('workspace-panel-collapsed',!open); if(_isCompactWorkspaceViewport()){ panel.classList.toggle('mobile-open',open); diff --git a/static/commands.js b/static/commands.js index 45441279..95c0e429 100644 --- a/static/commands.js +++ b/static/commands.js @@ -424,7 +424,7 @@ async function _applyManualCompressionResult(data, focusTopic, visibleCount, com S.messages=data.session.messages||[]; S.toolCalls=data.session.tool_calls||[]; clearLiveToolCards(); - localStorage.setItem('hermes-webui-session',S.session.session_id); + try{localStorage.setItem('hermes-webui-session',S.session.session_id);}catch(_){} if(typeof _setActiveSessionUrl==='function') _setActiveSessionUrl(S.session.session_id); syncTopbar(); renderMessages(); diff --git a/static/messages.js b/static/messages.js index d21d1144..e673e528 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1454,7 +1454,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ const _prevCost=(S.session&&S.session.estimated_cost)||0; S.session=d.session;S.messages=d.session.messages||[];if(typeof _messagesTruncated!=='undefined')_messagesTruncated=!!d.session._messages_truncated; if(S.session&&S.session.session_id){ - localStorage.setItem('hermes-webui-session',S.session.session_id); + try{localStorage.setItem('hermes-webui-session',S.session.session_id);}catch(_){} if(typeof _setActiveSessionUrl==='function') _setActiveSessionUrl(S.session.session_id); } const _markerOnlyAssistantError=_replaceMarkerOnlyAssistantWithStreamError(S.messages); @@ -1824,7 +1824,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ clearLiveToolCards();if(!assistantText)removeThinking(); S.session=session;S.messages=(session.messages||[]).filter(m=>m&&m.role); if(S.session&&S.session.session_id){ - localStorage.setItem('hermes-webui-session',S.session.session_id); + try{localStorage.setItem('hermes-webui-session',S.session.session_id);}catch(_){} if(typeof _setActiveSessionUrl==='function') _setActiveSessionUrl(S.session.session_id); } const _markerOnlyAssistantError=_replaceMarkerOnlyAssistantWithStreamError(S.messages); diff --git a/static/sessions.js b/static/sessions.js index 771e1d80..e1e5c6c6 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -416,7 +416,7 @@ async function newSession(flash, options={}){ S.session=data.session;S.messages=data.session.messages||[]; S.lastUsage={...(data.session.last_usage||{})}; if(flash)S.session._flash=true; - localStorage.setItem('hermes-webui-session',S.session.session_id); + try{localStorage.setItem('hermes-webui-session',S.session.session_id);}catch(_){} _setActiveSessionUrl(S.session.session_id); _setSessionViewedCount(S.session.session_id, S.session.message_count || 0); // Sync chat-header dropdown to the session's model so the UI reflects @@ -526,7 +526,7 @@ async function loadSession(sid){ if(typeof syncTopbar==='function') syncTopbar(); _setSessionViewedCount(S.session.session_id, Number(data.session.message_count || 0)); _clearSessionCompletionUnread(S.session.session_id); - localStorage.setItem('hermes-webui-session',S.session.session_id); + try{localStorage.setItem('hermes-webui-session',S.session.session_id);}catch(_){} _setActiveSessionUrl(S.session.session_id); const activeStreamId=S.session.active_stream_id||null; diff --git a/tests/test_issue2386_localstorage_quota.py b/tests/test_issue2386_localstorage_quota.py new file mode 100644 index 00000000..11775281 --- /dev/null +++ b/tests/test_issue2386_localstorage_quota.py @@ -0,0 +1,34 @@ +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] + + +def _script(path): + return (ROOT / path).read_text() + + +def _assert_storage_setitem_guarded(src, needle): + matches = [line.strip() for line in src.splitlines() if needle in line] + assert matches, f"expected at least one {needle} write" + for line in matches: + assert line.startswith("try{localStorage.setItem("), ( + f"localStorage quota errors must not escape from {needle} writes: {line}" + ) + assert "catch(_)" in line or "catch(e)" in line or "catch{}" in line + + +def test_active_session_localstorage_writes_ignore_quota_errors(): + """Session persistence writes are best-effort when the browser quota is full (#2386).""" + for path in ["static/sessions.js", "static/commands.js", "static/messages.js"]: + _assert_storage_setitem_guarded( + _script(path), + "localStorage.setItem('hermes-webui-session'", + ) + + +def test_workspace_panel_localstorage_write_ignores_quota_errors(): + """Workspace panel state should not break UI toggles if localStorage throws (#2386).""" + _assert_storage_setitem_guarded( + _script("static/boot.js"), + "localStorage.setItem('hermes-webui-workspace-panel'", + ) From 48b82d57dbb631db76916697c593eda21addb208 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 16 May 2026 08:44:04 -0700 Subject: [PATCH 14/15] fix: reduce browser storage pressure --- CHANGELOG.md | 4 ++ static/sessions.js | 14 ++++++ static/sw.js | 37 +++++++++------ tests/test_issue2389_storage_pressure.py | 59 ++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 tests/test_issue2389_storage_pressure.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fa25bab..23f9b8e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Service worker updates now delete old shell caches before creating the new versioned cache, reducing temporary Cache Storage pressure during frequent releases. Deleting sessions also prunes the localStorage maps that track viewed counts, completion unread state, and observed streaming state so stale per-session entries do not accumulate indefinitely. Closes #2389. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/static/sessions.js b/static/sessions.js index 771e1d80..8ab50b58 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -170,6 +170,14 @@ function _clearSessionCompletionUnread(sid) { _saveSessionCompletionUnread(); } +function _clearSessionViewedCount(sid) { + if (!sid) return; + const counts = _getSessionViewedCounts(); + if (!Object.prototype.hasOwnProperty.call(counts, sid)) return; + delete counts[sid]; + _saveSessionViewedCounts(); +} + function _hasSessionCompletionUnread(sid) { if (!sid) return false; return Object.prototype.hasOwnProperty.call(_getSessionCompletionUnread(), sid); @@ -810,6 +818,12 @@ function _clearHandoffStorageForSession(sid) { _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_DISMISSED_AT, null); _setHandoffStorageValue(sid, _HANDOFF_SUFFIX_SUMMARY_HANDLED_AT, null); } catch {} + // Session deletion should also prune per-session tracking maps. Otherwise + // heavy users accumulate one localStorage entry per deleted session forever, + // which increases quota pressure and can make future UI persistence fail. + try { _clearSessionViewedCount(sid); } catch {} + try { _clearSessionCompletionUnread(sid); } catch {} + try { _forgetObservedStreamingSession(sid); } catch {} } function _getHandoffDismissedAt(sid) { diff --git a/static/sw.js b/static/sw.js index ebfccf35..9ea770b1 100644 --- a/static/sw.js +++ b/static/sw.js @@ -39,28 +39,35 @@ const SHELL_ASSETS = [ './manifest.json', ]; -// Install: pre-cache the app shell +function deleteOldShellCaches() { + return caches.keys().then((keys) => + Promise.all( + keys.filter((k) => k !== CACHE_NAME).map((k) => caches.delete(k)) + ) + ); +} + +// Install: prune old shell caches first, then pre-cache the app shell. Doing +// this before caches.open(CACHE_NAME) avoids a temporary double-cache window on +// quota-sensitive browsers during frequent version bumps. self.addEventListener('install', (event) => { event.waitUntil( - caches.open(CACHE_NAME).then((cache) => { - return cache.addAll(SHELL_ASSETS).catch((err) => { - // Non-fatal: if any asset fails, still activate - console.warn('[sw] Shell pre-cache partial failure:', err); - }); - }) + deleteOldShellCaches().then(() => + caches.open(CACHE_NAME).then((cache) => { + return cache.addAll(SHELL_ASSETS).catch((err) => { + // Non-fatal: if any asset fails, still activate + console.warn('[sw] Shell pre-cache partial failure:', err); + }); + }) + ) ); self.skipWaiting(); }); -// Activate: clean up old caches +// Activate: keep the old-cache cleanup as a safety net in case install was +// interrupted or an older worker was already waiting. self.addEventListener('activate', (event) => { - event.waitUntil( - caches.keys().then((keys) => - Promise.all( - keys.filter((k) => k !== CACHE_NAME).map((k) => caches.delete(k)) - ) - ) - ); + event.waitUntil(deleteOldShellCaches()); self.clients.claim(); }); diff --git a/tests/test_issue2389_storage_pressure.py b/tests/test_issue2389_storage_pressure.py new file mode 100644 index 00000000..5ab8baac --- /dev/null +++ b/tests/test_issue2389_storage_pressure.py @@ -0,0 +1,59 @@ +"""Regression coverage for storage-pressure cleanup from issue #2389.""" +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SW_SRC = (ROOT / "static" / "sw.js").read_text(encoding="utf-8") +SESSIONS_SRC = (ROOT / "static" / "sessions.js").read_text(encoding="utf-8") + + +def _function_block(src: str, name: str, window: int = 1600) -> str: + idx = src.find(f"function {name}(") + assert idx != -1, f"missing function {name}" + return src[idx : idx + window] + + +def test_service_worker_install_deletes_old_caches_before_opening_new_cache(): + install_idx = SW_SRC.find("self.addEventListener('install'") + assert install_idx != -1, "service worker must define an install handler" + install_block = SW_SRC[install_idx : SW_SRC.find("self.addEventListener('activate'", install_idx)] + cleanup_idx = install_block.find("deleteOldShellCaches().then") + open_idx = install_block.find("caches.open(CACHE_NAME)") + assert cleanup_idx != -1, "install must delete stale shell caches before pre-cache" + assert open_idx != -1, "install must still pre-cache the current shell cache" + assert cleanup_idx < open_idx, ( + "opening the new shell cache before deleting old ones creates a temporary " + "double-cache window that increases quota pressure" + ) + + +def test_service_worker_keeps_activate_cleanup_safety_net(): + activate_idx = SW_SRC.find("self.addEventListener('activate'") + assert activate_idx != -1, "service worker must define an activate handler" + activate_block = SW_SRC[activate_idx : activate_idx + 500] + assert "event.waitUntil(deleteOldShellCaches())" in activate_block + assert "self.clients.claim()" in activate_block + + +def test_deleted_sessions_prune_all_session_tracking_maps(): + assert "const SESSION_VIEWED_COUNTS_KEY = 'hermes-session-viewed-counts';" in SESSIONS_SRC + assert "const SESSION_COMPLETION_UNREAD_KEY = 'hermes-session-completion-unread';" in SESSIONS_SRC + assert "const SESSION_OBSERVED_STREAMING_KEY = 'hermes-session-observed-streaming';" in SESSIONS_SRC + assert "function _clearSessionViewedCount(sid)" in SESSIONS_SRC + + clear_block = _function_block(SESSIONS_SRC, "_clearHandoffStorageForSession") + assert "_clearSessionViewedCount(sid)" in clear_block + assert "_clearSessionCompletionUnread(sid)" in clear_block + assert "_forgetObservedStreamingSession(sid)" in clear_block + + +def test_session_viewed_count_prune_is_best_effort_and_persists_when_changed(): + viewed_block = _function_block(SESSIONS_SRC, "_clearSessionViewedCount") + assert "Object.prototype.hasOwnProperty.call(counts, sid)" in viewed_block + assert "delete counts[sid]" in viewed_block + assert "_saveSessionViewedCounts()" in viewed_block + + clear_block = _function_block(SESSIONS_SRC, "_clearHandoffStorageForSession") + assert "try { _clearSessionViewedCount(sid); } catch {}" in clear_block + assert "try { _clearSessionCompletionUnread(sid); } catch {}" in clear_block + assert "try { _forgetObservedStreamingSession(sid); } catch {}" in clear_block From 20bd845416fe99e0ed3aba6039465a1cd1bec363 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Sat, 16 May 2026 19:32:49 +0000 Subject: [PATCH 15/15] fix(tests): permanent os.execv guard to stop pytest self-restart loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit api.updates._schedule_restart() spawns a daemon thread that calls os.execv() after a short sleep. Tests in test_update_banner_fixes.py monkeypatch os.execv to a no-op, but monkeypatch teardown can win the race against the daemon thread — when the thread wakes up after teardown, the real os.execv is back, and it re-execs pytest with the original argv. From the outside this looked like pytest hanging at 99% and then restarting the entire suite from 0% in a loop. The fix shadows os.execv with a permanent no-op wrapper at conftest module-import time, so late-firing daemon threads can't escape. Tests that need to verify execv was called still patch it themselves; their patches sit on top of the wrapper for their lifetime. Also adds tests/test_pytest_execv_guard.py to pin the guard against future conftest refactors. --- CHANGELOG.md | 28 +++++++++++++++++++++++++ tests/conftest.py | 30 +++++++++++++++++++++++++++ tests/test_pytest_execv_guard.py | 35 ++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 tests/test_pytest_execv_guard.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6434724c..ddc5be00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,36 @@ ## [Unreleased] +## [v0.51.75] — 2026-05-16 — Release AY (stage-368 — 11-PR safe-lane batch — storage + i18n + run-journal parity + attachments + compression sidebar + restart-recovery + text-mode images + tables + settings i18n + German labels) + +### Test infrastructure + +- Stage-368 maintainer fix — pytest no longer self-loops on the `_schedule_restart` daemon thread. Several existing tests in `tests/test_update_banner_fixes.py` call `api.updates._schedule_restart()`, which spawns a daemon thread that eventually calls `os.execv()`. Those tests monkeypatch `os.execv` for the test scope, but monkeypatch teardown can win the race against the daemon thread, restoring the real `os.execv` before the thread fires it — at which point the daemon re-execs the entire pytest process with the original argv, looking from the outside like pytest hangs at 99 % then restarts the suite from 0 % in an infinite loop. `tests/conftest.py` now installs a permanent no-op wrapper on `os.execv` at module-import time so late-firing daemon threads cannot re-exec pytest. New `tests/test_pytest_execv_guard.py` pins the guard against future regressions. + +### Added + +- **PR #2377** by @franksong2702 (refs #2283, refs #2363, refs #1925) — Run-journal replay timeline parity checks. After #2283 shipped the first run-journal replay slice and #2363 documented the cross-layer state consistency contract, this PR adds explicit parity assertions over the replayed timeline so divergences between the journal and the visible transcript (Thinking → tool calls → assistant text) surface as test failures instead of silent drift. + ### Fixed +- **PR #2391** by @Michaelyklam (fixes #2389) — Reduce browser storage pressure during service-worker updates and over long-running sessions. `static/sw.js` now calls `deleteOldShellCaches()` BEFORE `caches.open(CACHE_NAME)` in the install handler so the new ~2.2 MB shell cache no longer overlaps the old one during a version bump (especially painful on shared-origin quota accounting). A new `_clearSessionViewedCount()` helper plus extended `_clearHandoffStorageForSession()` prune `hermes-session-viewed-counts`, `hermes-session-completion-unread`, and `hermes-session-observed-streaming` on every single-session delete and batch-delete so per-session tracking maps no longer grow unbounded. + +- **PR #2387** by @Michaelyklam (fixes #2386) — Guard `localStorage.setItem('hermes-webui-session', ...)` and workspace-panel runtime-state writes with `try { … } catch (_) {}` across `static/boot.js`, `static/sessions.js`, `static/commands.js`, and `static/messages.js`. These convenience writes were previously fatal UI operations on quota-exhausted browsers (especially Firefox public-domain setups where shared quota fills up after a service-worker shell rotation). + +- **PR #2368** by @Michaelyklam — Hybridize background profile env routing so background title generation, manual compression, and update-summary workers honor a session's non-default profile. The pure thread-local refactor for #2321 was reverted because `hermes_cli.config.load_config()` still reads `HERMES_HOME` from process env. This PR keeps the thread-local layer for WebUI helpers and adds an `os.environ.update(runtime_env)` mirror under a narrow `_ENV_LOCK` for the worker body, with proper restore of prior values. New test asserts `OPENROUTER_API_KEY` is visible from the worker against a non-default profile. + +- **PR #2382** by @Michaelyklam (fixes #2380) — Serve raw chat attachments from the per-session inbox in addition to the session workspace. Chat uploads were intentionally moved out of workspaces into a per-session attachment inbox in an earlier release; the transcript renderer still emits stable `api/file/raw?session_id=...&path=` URLs, but `_handle_file_raw` only checked `session.workspace` so inbox-backed uploads rendered as broken images. The URL surface is preserved and a session-attachment fallback is added with path-traversal guards intact. + +- **PR #2385** by @franksong2702 — Keep fuller compression snapshots reachable in the sidebar. The default behavior hides `pre_compression_snapshot: true` rows so archived compression segments do not duplicate the active continuation. A real long Kanban session exposed a narrower failure: the fuller transcript was still present on disk but remained marked as `pre_compression_snapshot`, so the sidebar surfaced a shorter row and the fuller transcript became unreachable. The fix preserves discoverability without re-introducing duplication in normal cases. + +- **PR #2371** by @franksong2702 — Clarify interrupted turn recovery after a WebUI restart. WebUI executes browser-originated agent turns inside the WebUI process; if that process restarts mid-turn, the worker dies with it. Run journal replay can only replay events that were already emitted, so the stale-pending repair path is now annotated and refined to make the post-restart state explicit (interrupted, recoverable, or terminal) instead of leaving the user with a half-rendered turn and no signal. + +- **PR #2378** by @Michaelyklam — Strip historical images in text-only mode. Current-turn uploads already respect `agent.image_input_mode: text`, but saved conversation history still passed native `image_url` content parts back into later provider calls, breaking text-only providers on replayed turns. `_sanitize_messages_for_api()` gains a `cfg=` keyword argument so the API-history sanitizer can strip historical native image parts when the mode is text. Default `cfg=None` preserves prior behavior for callers that don't pass the new argument. + +- **PR #2375** by @Michaelyklam — Keep Markdown tables block-level. Pipe tables were already converted to `
` markup, but the final paragraph pass did not treat generated tables as block-level output, occasionally wrapping them in `

` and breaking the surrounding layout. The fix isolates generated tables and adds `table` to the paragraph-wrap skip list so valid CommonMark tables render predictably. + +- **PR #2372** by @mccxj — Settings → Conversation page action buttons now respect locale selection. Pre-fix, the JSON export, MD export, and Copy buttons had hardcoded English labels/titles. Adds `data-i18n` / `data-i18n-title` attributes plus the missing translation keys so non-English locales no longer see English labels stuck in the middle of a translated screen. + - **PR #2381** by @Michaelyklam (fixes #2379) — German relative session-time labels now interpolate the elapsed value instead of rendering the literal `{n}` placeholder in the sidebar/header. The German locale now uses function-valued translations for minutes, hours, and days, matching the other locale bundles. ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) diff --git a/tests/conftest.py b/tests/conftest.py index 6d4e7ecc..66cf0102 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -171,6 +171,36 @@ def pytest_configure(config): # imports trigger botocore initialisation. os.environ.setdefault("AWS_EC2_METADATA_DISABLED", "true") +# ── Permanent os.execv guard for the pytest session ──────────────────────── +# Several tests in tests/test_update_banner_fixes.py exercise +# api.updates._schedule_restart(), which spawns a DAEMON thread that sleeps +# for a short delay and then calls ``os.execv(sys.executable, sys.argv)``. +# Those tests monkeypatch ``os.execv`` to a no-op for the test scope, but +# monkeypatch teardown happens at test exit — if the daemon thread has not +# yet woken up by then (system load, GC pause, _apply_lock contention), the +# real ``os.execv`` is restored before the thread fires it. The daemon then +# REPLACES the pytest process image with a fresh ``pytest tests/ -q ...`` +# invocation, looking from the outside like pytest "hangs at 99%" and then +# restarts the entire suite from 0% — a self-perpetuating loop. +# +# Daemon threads cannot be reliably joined from a test fixture (they live in +# ``api.updates`` module scope), so the only safe answer is to render +# ``os.execv`` permanently inert for the pytest session. Production code is +# unaffected because production never imports this conftest. +# +# Tests that need to verify execv WAS called still monkeypatch it themselves +# — their patched version takes precedence over this no-op wrapper for the +# test's lifetime, and the no-op only kicks in after teardown for daemon +# threads that wake up late. +_real_execv = os.execv + +def _pytest_session_safe_execv(_exe, _args): # pragma: no cover — never called in prod + # Drop the call on the floor. A late-firing daemon thread from + # _schedule_restart() must not be able to re-exec the pytest process. + return None + +os.execv = _pytest_session_safe_execv + # ── Hermetic network isolation ───────────────────────────────────────────── # Tests must not reach the public internet. Outbound to Anthropic / OpenAI / # Amazon / OpenRouter / etc. is forbidden by default. The test suite already diff --git a/tests/test_pytest_execv_guard.py b/tests/test_pytest_execv_guard.py new file mode 100644 index 00000000..851ae3f3 --- /dev/null +++ b/tests/test_pytest_execv_guard.py @@ -0,0 +1,35 @@ +"""Regression guard for the pytest "hangs at 99% then restarts from 0%" loop. + +Root cause documented in tests/conftest.py — daemon threads spawned by +api.updates._schedule_restart() can fire os.execv() AFTER monkeypatch +teardown restores the real os.execv, which re-execs the entire pytest +process. The conftest installs a permanent no-op wrapper on os.execv that +shadows any late-firing daemon thread. + +This test pins the guard so a future conftest refactor can't silently +remove it. +""" +import os + + +def test_conftest_installs_permanent_execv_guard(): + """os.execv must be replaced by the conftest's safe no-op wrapper.""" + # The wrapper is named `_pytest_session_safe_execv` in conftest.py. + # Verify the module attribute now points to that wrapper, not the real + # libc-bound function. + assert os.execv.__name__ == '_pytest_session_safe_execv', ( + f"os.execv must be the conftest-installed pytest-safe no-op, but " + f"resolves to {os.execv!r}. Did a recent conftest refactor remove " + f"the guard? See conftest.py § 'Permanent os.execv guard for the " + f"pytest session' — without it, late-firing _schedule_restart " + f"daemon threads re-exec pytest and the suite loops forever." + ) + + +def test_safe_execv_returns_none_does_not_exec(): + """The wrapper must be a true no-op — it must not raise, exec, or block.""" + # Pass deliberately bogus args to confirm the wrapper drops them rather + # than passing them through to the real execv. + result = os.execv('/nonexistent/binary/path/that/should/not/be/executed', + ['/nonexistent/binary/path/that/should/not/be/executed']) + assert result is None