From b57e80f70680936b4400acbef632e836ff090b59 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 14:54:34 +0800 Subject: [PATCH 01/11] fix: YAML code blocks collapse newlines due to Prism token white-space (#1463) Prism's YAML grammar wraps tokens in elements where white-space defaults to normal, collapsing \n characters into spaces. The DOM textContent is correct (confirmed by reporter's probe), so the bug is purely CSS. Force white-space:pre on .token elements inside language-yaml code blocks for both .msg-body and .preview-md contexts. --- static/style.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/static/style.css b/static/style.css index fa89fdb7..1dfcadb5 100644 --- a/static/style.css +++ b/static/style.css @@ -735,6 +735,8 @@ .msg-body pre code{background:none;padding:0;border-radius:0;color:var(--pre-text);font-size:13px;line-height:1.6;} /* Keep original theme background — prevent prism-tomorrow from overriding --code-bg */ .msg-body pre[class*="language-"],.msg-body pre code[class*="language-"]{background:var(--code-bg) !important;} + /* Fix #1463: Prism YAML grammar collapses newlines inside token spans — force pre */ + .msg-body pre code.language-yaml .token{white-space:pre !important;} .pre-header{font-size:10px;font-weight:600;text-transform:uppercase;letter-spacing:.06em;color:var(--muted);padding:8px 16px 8px;background:var(--input-bg);border-radius:10px 10px 0 0;border:1px solid var(--border);border-bottom:1px solid var(--border);display:flex;align-items:center;gap:6px;} .pre-header::before{content:'';width:8px;height:8px;border-radius:50%;background:var(--muted);opacity:.4;} .pre-header+pre{border-radius:0 0 10px 10px;border-top:none;margin-top:0;} @@ -1128,6 +1130,8 @@ .preview-md pre code{background:none;padding:0;color:var(--pre-text);font-size:11.5px;line-height:1.55;} /* Keep original theme background — prevent prism-tomorrow from overriding --code-bg */ .preview-md pre[class*="language-"],.preview-md pre code[class*="language-"]{background:var(--code-bg) !important;} + /* Fix #1463: Prism YAML grammar collapses newlines inside token spans — force pre */ + .preview-md pre code.language-yaml .token{white-space:pre !important;} .preview-md blockquote{border-left:3px solid var(--blue);padding-left:12px;color:var(--muted);font-style:italic;margin:8px 0;} .preview-md blockquote p{margin:0;} .preview-md strong{color:var(--strong);font-weight:600;}.preview-md em{color:var(--em);} From 8f3dbe185dfeacdff586bc7a56e998aedf6d45d2 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 14:59:37 +0800 Subject: [PATCH 02/11] =?UTF-8?q?fix:=20consolidate=20=5F=5FCACHE=5FVERSIO?= =?UTF-8?q?N=5F=5F=20=E2=86=92=20=5F=5FWEBUI=5FVERSION=5F=5F=20(#1509)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __CACHE_VERSION__ (sw.js) and __WEBUI_VERSION__ (index.html) are functionally identical — both resolve to quote(WEBUI_VERSION, safe='') at request time. Two names exist for historical reasons (different files added at different times). Rename __CACHE_VERSION__ → __WEBUI_VERSION__ in: - static/sw.js (CACHE_NAME + VQ constant + comment) - api/routes.py (substitution string) - tests/test_pwa_manifest_sw.py (all assertions) Single canonical name. No behavior change — same ?v=vX.Y.Z query strings on the same URLs. --- api/routes.py | 2 +- static/sw.js | 6 +++--- tests/test_pwa_manifest_sw.py | 18 +++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/api/routes.py b/api/routes.py index 8c4bc834..863680c5 100644 --- a/api/routes.py +++ b/api/routes.py @@ -1188,7 +1188,7 @@ def handle_get(handler, parsed) -> bool: from api.updates import WEBUI_VERSION version_token = quote(WEBUI_VERSION, safe="") text = sw_path.read_text(encoding="utf-8").replace( - "__CACHE_VERSION__", version_token + "__WEBUI_VERSION__", version_token ) data = text.encode("utf-8") handler.send_response(200) diff --git a/static/sw.js b/static/sw.js index 9e43db66..cb8e2071 100644 --- a/static/sw.js +++ b/static/sw.js @@ -7,18 +7,18 @@ // Cache version is injected by the server at request time (routes.py /sw.js handler). // Bumps automatically whenever the git commit changes — no manual edits needed. -const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__'; +const CACHE_NAME = 'hermes-shell-__WEBUI_VERSION__'; // Static assets that form the app shell. // -// Versioned assets (CSS + JS) include `?v=__CACHE_VERSION__` to match the +// Versioned assets (CSS + JS) include `?v=__WEBUI_VERSION__` to match the // query string the page sends — see index.html. Without the version query // here, every cache lookup against `?v=...` URLs would miss and fall through // to network, defeating the pre-cache. // // Unversioned assets (`./`, manifest.json, favicons) are referenced from // index.html without a cache-bust query, so they stay unversioned here too. -const VQ = '?v=__CACHE_VERSION__'; +const VQ = '?v=__WEBUI_VERSION__'; const SHELL_ASSETS = [ './', './static/style.css' + VQ, diff --git a/tests/test_pwa_manifest_sw.py b/tests/test_pwa_manifest_sw.py index 40220dec..c5e46982 100644 --- a/tests/test_pwa_manifest_sw.py +++ b/tests/test_pwa_manifest_sw.py @@ -2,7 +2,7 @@ Covers: - manifest.json is valid JSON with required PWA fields -- sw.js has the `__CACHE_VERSION__` placeholder the server replaces at request time +- sw.js has the `__WEBUI_VERSION__` placeholder the server replaces at request time - sw.js offline-fallback uses a resolved promise (not `caches.match() || fallback` which is broken — Promise objects are always truthy in `||` checks, so the fallback Response would never be used) @@ -52,8 +52,8 @@ class TestManifest: class TestServiceWorker: def test_sw_has_cache_version_placeholder(self): src = SW.read_text(encoding="utf-8") - assert "__CACHE_VERSION__" in src, ( - "sw.js must contain __CACHE_VERSION__ placeholder for the server " + assert "__WEBUI_VERSION__" in src, ( + "sw.js must contain __WEBUI_VERSION__ placeholder for the server " "handler at /sw.js to replace with WEBUI_VERSION at request time" ) @@ -117,8 +117,8 @@ class TestPWARoutes: idx = src.find('"/sw.js"') assert idx != -1, "routes.py must handle /sw.js" block = src[idx:idx + 1000] - assert "__CACHE_VERSION__" in block, ( - "sw.js route must replace __CACHE_VERSION__ with the current WEBUI_VERSION" + assert "__WEBUI_VERSION__" in block, ( + "sw.js route must replace __WEBUI_VERSION__ with the current WEBUI_VERSION" ) assert "WEBUI_VERSION" in block, ( "sw.js route must import and use WEBUI_VERSION for cache busting" @@ -185,7 +185,7 @@ class TestIndexHtmlIntegration: def test_sw_shell_assets_match_versioned_asset_urls(self): """The service worker's SHELL_ASSETS pre-cache list must use the same - `?v=__CACHE_VERSION__` suffix on JS+CSS that index.html sends, so that + `?v=__WEBUI_VERSION__` suffix on JS+CSS that index.html sends, so that the pre-cached entries actually serve when the page requests them. Without this, every `cache.match()` for a versioned asset URL (e.g. @@ -208,13 +208,13 @@ class TestIndexHtmlIntegration: "terminal.js", "onboarding.js", ): - # Either inline `?v=__CACHE_VERSION__` or via the VQ constant + # Either inline `?v=__WEBUI_VERSION__` or via the VQ constant # produces a URL string the cache lookup can match. - has_inline = f"{asset}?v=__CACHE_VERSION__" in src + has_inline = f"{asset}?v=__WEBUI_VERSION__" in src has_concat = f"{asset}' + VQ" in src or f"{asset}\" + VQ" in src assert has_inline or has_concat, ( f"sw.js SHELL_ASSETS entry for {asset} must carry " - "?v=__CACHE_VERSION__ to match the URL the page requests" + "?v=__WEBUI_VERSION__ to match the URL the page requests" ) def test_index_route_url_encodes_asset_version(self): From f32989d5bb0afd962bbbdb2a9ce231dda89fc831 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 15:03:17 +0800 Subject: [PATCH 03/11] fix: voice-mode pref toggle-off now stops the recognizer (#1491) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user disables 'Hands-free voice mode' in Settings while voice mode is active, the button hides but the SpeechRecognition keeps running — the user can't stop it because the button is invisible. Fix: _applyVoiceModePref() now checks if voice mode is active and calls _deactivate() when the pref is toggled off. Move _voiceModeActive declaration above the function to avoid TDZ. Also removes a duplicate window._applyVoiceModePref assignment. --- static/boot.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/static/boot.js b/static/boot.js index 4c9f1ce7..a54f75a5 100644 --- a/static/boot.js +++ b/static/boot.js @@ -470,14 +470,17 @@ window._micPendingSend=window._micPendingSend||false; try{ return localStorage.getItem('hermes-voice-mode-button')==='true'; } catch(_){ return false; } } + let _voiceModeActive=false; + function _applyVoiceModePref(){ - modeBtn.style.display = _voiceModePrefEnabled() ? '' : 'none'; + const enabled = _voiceModePrefEnabled(); + modeBtn.style.display = enabled ? '' : 'none'; + if(!enabled && _voiceModeActive) _deactivate(); } _applyVoiceModePref(); // Expose so the settings pane can re-apply immediately on toggle. window._applyVoiceModePref = _applyVoiceModePref; - let _voiceModeActive=false; let _voiceModeState='idle'; // idle | listening | thinking | speaking let _recognition=null; let _silenceTimer=null; From ac3d3368754d563297f869454fd906610e2bab09 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 15:05:40 +0800 Subject: [PATCH 04/11] fix: onboarding API-key input loses focus when probe completes (#1503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The onboarding wizard's API-key input calls _scheduleOnboardingProbe() on every keystroke (oninput). When the 400ms-debounced probe completes, _setOnboardingProbeState() calls _renderOnboardingBody() which rebuilds the entire form — destroying and recreating the element. The user's focus and cursor position are lost. On fast connections (localhost) the probe completes between keystrokes so the bug window is narrow. On slow networks (VPN, corporate proxy, cold-start vLLM) the re-render routinely lands mid-typing. Fix: remove _scheduleOnboardingProbe() from the api-key input's oninput handler. The probe still fires on: - baseUrl input change (oninput + debounce, unchanged) - api-key field blur (onblur, added) - 'Test connection' button click (unchanged) - nextOnboardingStep() before Continue (unchanged) The baseUrl input retains the oninput probe because the UX trade-off is acceptable there (text input preserves visible content on re-render). --- static/onboarding.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/onboarding.js b/static/onboarding.js index e6086176..05582576 100644 --- a/static/onboarding.js +++ b/static/onboarding.js @@ -197,7 +197,7 @@ function _renderOnboardingApiKeyField(){ const labelKey=keyOptional?'onboarding_api_key_label_optional':'onboarding_api_key_label'; const placeholderKey=keyOptional?'onboarding_api_key_placeholder_optional':'onboarding_api_key_placeholder'; const helpHtml=keyOptional?`

${esc(t('onboarding_api_key_help_keyless')||'')}

`:''; - return `${helpHtml}`; + return `${helpHtml}`; } function _getOnboardingSelectedModel(){ From dc7b142bb5add4816961c0a780adee0e55099c1f Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 15:31:15 +0800 Subject: [PATCH 05/11] =?UTF-8?q?fix:=20use=20correct=20Unicode=20codepoin?= =?UTF-8?q?t=20for=20branch=20indicator=20(=E2=91=82=20not=20=E2=92=82)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit \u2482 (PARENTHESIZED DIGIT FIFTEEN, displayed as ⒂) → \u2442 (OCR FORK, displayed as ⑂) Fixes #1522 --- static/sessions.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/sessions.js b/static/sessions.js index 318d88ca..c2efe376 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -1654,7 +1654,7 @@ function renderSessionListFromCache(){ if(s.parent_session_id){ const branchInd=document.createElement('span'); branchInd.className='session-branch-indicator'; - branchInd.textContent='\u2482'; // ⑂ + branchInd.textContent='\u2442'; // ⑂ branchInd.title=(typeof t==='function'?t('forked_from'):'Forked from')+' '+s.parent_session_id; branchInd.style.cursor='pointer'; branchInd.onclick=(e)=>{ From 57eb2fbf567bf50c7c49be6bc8b7d40037ff2d83 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sun, 3 May 2026 15:36:05 +0800 Subject: [PATCH 06/11] fix: update test assertion to match corrected Unicode codepoint (\u2442) --- tests/test_465_session_branching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_465_session_branching.py b/tests/test_465_session_branching.py index 058a0862..8be147fc 100644 --- a/tests/test_465_session_branching.py +++ b/tests/test_465_session_branching.py @@ -225,7 +225,7 @@ def test_sidebar_parent_indicator(): "sessions.js should check parent_session_id" assert 'session-branch-indicator' in src, \ "Should have session-branch-indicator class" - assert '\\u2482' in src, \ + assert '\\u2442' in src, \ "Should use ⑂ character for parent indicator" From 6bce34c27e42c88b6312a640455609618163f202 Mon Sep 17 00:00:00 2001 From: Manfred Date: Sun, 3 May 2026 11:46:42 +0200 Subject: [PATCH 07/11] fix: clear stale WebUI stream state Clear persisted active_stream_id and pending runtime fields when the server no longer has the referenced live stream. Also drop browser-side INFLIGHT state when the server reports a session idle and bump the service-worker cache so the frontend fix is delivered. Adds regression coverage for backend stale-stream cleanup, frontend inflight invalidation, and cache busting. --- api/routes.py | 32 +++++++++++++++++++++- static/sessions.js | 9 ++++++ static/sw.js | 2 +- tests/test_stale_stream_cleanup.py | 44 ++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/test_stale_stream_cleanup.py diff --git a/api/routes.py b/api/routes.py index 8c4bc834..f128d6f3 100644 --- a/api/routes.py +++ b/api/routes.py @@ -233,6 +233,34 @@ from api.helpers import ( _redact_text, ) + +def _clear_stale_stream_state(session) -> bool: + """Clear persisted streaming flags when the in-memory stream no longer exists. + + A server restart or worker crash can leave active_stream_id/pending_* in the + session JSON while STREAMS is empty. The frontend then keeps reconnecting to + a dead stream and shows a permanent running/thinking state. + """ + stream_id = getattr(session, "active_stream_id", None) + if not stream_id: + return False + with STREAMS_LOCK: + stream_alive = stream_id in STREAMS + if stream_alive: + return False + session.active_stream_id = None + if hasattr(session, "pending_user_message"): + session.pending_user_message = None + if hasattr(session, "pending_attachments"): + session.pending_attachments = [] + if hasattr(session, "pending_started_at"): + session.pending_started_at = None + try: + session.save() + except Exception: + pass + return True + # ── CSRF: validate Origin/Referer on POST ──────────────────────────────────── import re as _re @@ -1309,6 +1337,7 @@ def handle_get(handler, parsed) -> bool: try: _t1 = _time.monotonic() s = get_session(sid, metadata_only=(not load_messages)) + _clear_stale_stream_state(s) _t2 = _time.monotonic() effective_model = ( _resolve_effective_session_model_for_display(s) @@ -1435,6 +1464,7 @@ def handle_get(handler, parsed) -> bool: return bad(handler, "Missing session_id") try: from api.session_ops import session_status + _clear_stale_stream_state(get_session(sid, metadata_only=True)) return j(handler, session_status(sid)) except KeyError: return bad(handler, "Session not found", 404) @@ -4265,7 +4295,7 @@ def _handle_chat_start(handler, body): status=409, ) # Stale stream id from a previous run; clear and continue. - s.active_stream_id = None + _clear_stale_stream_state(s) stream_id = uuid.uuid4().hex with _get_session_agent_lock(s.session_id): s.workspace = workspace diff --git a/static/sessions.js b/static/sessions.js index 318d88ca..56906ae4 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -387,6 +387,15 @@ async function loadSession(sid){ _setActiveSessionUrl(S.session.session_id); const activeStreamId=S.session.active_stream_id||null; + // If the server says the session is idle, discard any browser-side inflight + // cache left behind by a crashed/restarted stream. Otherwise the UI can keep + // showing a permanent thinking/running state even though active_streams=0. + if(!activeStreamId&&INFLIGHT[sid]){ + delete INFLIGHT[sid]; + if(typeof clearInflightState==='function') clearInflightState(sid); + S.activeStreamId=null; + S.busy=false; + } // Phase 2a: If session is streaming, restore from INFLIGHT cache before // loading full messages (INFLIGHT state is self-contained and sufficient). diff --git a/static/sw.js b/static/sw.js index 9e43db66..d8e867b4 100644 --- a/static/sw.js +++ b/static/sw.js @@ -7,7 +7,7 @@ // Cache version is injected by the server at request time (routes.py /sw.js handler). // Bumps automatically whenever the git commit changes — no manual edits needed. -const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__'; +const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__-stale-stream-cleanup1'; // Static assets that form the app shell. // diff --git a/tests/test_stale_stream_cleanup.py b/tests/test_stale_stream_cleanup.py new file mode 100644 index 00000000..c6070feb --- /dev/null +++ b/tests/test_stale_stream_cleanup.py @@ -0,0 +1,44 @@ +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] +ROUTES_SRC = (REPO / "api" / "routes.py").read_text(encoding="utf-8") +SESSIONS_SRC = (REPO / "static" / "sessions.js").read_text(encoding="utf-8") +SW_SRC = (REPO / "static" / "sw.js").read_text(encoding="utf-8") + + +def test_stale_stream_cleanup_helper_exists(): + assert "def _clear_stale_stream_state(session)" in ROUTES_SRC + assert "stream_id in STREAMS" in ROUTES_SRC + assert "session.active_stream_id = None" in ROUTES_SRC + assert "session.pending_user_message = None" in ROUTES_SRC + assert "session.pending_attachments = []" in ROUTES_SRC + assert "session.pending_started_at = None" in ROUTES_SRC + assert "session.save()" in ROUTES_SRC + + +def test_session_load_clears_stale_stream_before_response(): + load_pos = ROUTES_SRC.index("s = get_session(sid, metadata_only=(not load_messages))") + cleanup_pos = ROUTES_SRC.index("_clear_stale_stream_state(s)", load_pos) + response_pos = ROUTES_SRC.index('"active_stream_id": getattr(s, "active_stream_id", None)', cleanup_pos) + assert load_pos < cleanup_pos < response_pos + + +def test_chat_start_clears_stale_pending_state_not_only_active_id(): + stale_comment_pos = ROUTES_SRC.index("# Stale stream id from a previous run; clear and continue.") + cleanup_pos = ROUTES_SRC.index("_clear_stale_stream_state(s)", stale_comment_pos) + stream_id_pos = ROUTES_SRC.index("stream_id = uuid.uuid4().hex", cleanup_pos) + assert stale_comment_pos < cleanup_pos < stream_id_pos + + +def test_frontend_drops_inflight_cache_when_server_session_is_idle(): + marker = "If the server says the session is idle, discard any browser-side inflight" + marker_pos = SESSIONS_SRC.index(marker) + window = SESSIONS_SRC[marker_pos:marker_pos + 500] + assert "if(!activeStreamId&&INFLIGHT[sid])" in window + assert "delete INFLIGHT[sid]" in window + assert "clearInflightState" in window + assert "S.busy=false" in window + + +def test_service_worker_cache_bumped_for_frontend_fix_delivery(): + assert "stale-stream-cleanup1" in SW_SRC From dbb0879956f8b59b741e80a79eab3710ea569db1 Mon Sep 17 00:00:00 2001 From: Manfred Date: Sun, 3 May 2026 11:46:42 +0200 Subject: [PATCH 08/11] fix: pass WebUI max_tokens to agents Read configured max_tokens from config.yaml, pass it into WebUI-created AIAgent instances when supported, and include it in the agent cache signature. Also classify OpenRouter quota phrasing such as more credits, can only afford, and fewer max_tokens. Adds regression coverage for max_tokens propagation, cache signature isolation, and quota error classification. --- api/streaming.py | 29 ++++++++++++++++++ tests/test_streaming_max_tokens_quota.py | 39 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/test_streaming_max_tokens_quota.py diff --git a/api/streaming.py b/api/streaming.py index 25b29db4..bb208a41 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1792,6 +1792,25 @@ def _run_agent_streaming( import inspect as _inspect _agent_params = set(_inspect.signature(_AIAgent.__init__).parameters) + # CLI-parity max output cap: read config.yaml's max_tokens and pass + # it to AIAgent when supported. Without this WebUI-created agents use + # provider-native output ceilings (e.g. Claude via OpenRouter can + # request 64k), which may turn an otherwise usable fallback into a + # 402 "more credits / fewer max_tokens" failure. + _max_tokens_cfg = None + try: + _raw_max_tokens = _cfg.get('max_tokens') + if _raw_max_tokens is None: + _agent_cfg_for_tokens = _cfg.get('agent', {}) + if isinstance(_agent_cfg_for_tokens, dict): + _raw_max_tokens = _agent_cfg_for_tokens.get('max_tokens') + if _raw_max_tokens is not None: + _parsed_max_tokens = int(_raw_max_tokens) + if _parsed_max_tokens > 0: + _max_tokens_cfg = _parsed_max_tokens + except Exception: + _max_tokens_cfg = None + # CLI-parity reasoning effort: read agent.reasoning_effort from the # active profile's config.yaml (the same key the CLI writes via # `/reasoning `) and hand the parsed dict to AIAgent. When @@ -1830,6 +1849,8 @@ def _run_agent_streaming( # but guard defensively to avoid TypeError on an older agent build. if 'reasoning_config' in _agent_params and _reasoning_config is not None: _agent_kwargs['reasoning_config'] = _reasoning_config + if 'max_tokens' in _agent_params and _max_tokens_cfg is not None: + _agent_kwargs['max_tokens'] = _max_tokens_cfg # Params added in newer hermes-agent — skip if not supported if 'api_mode' in _agent_params: _agent_kwargs['api_mode'] = _rt.get('api_mode') @@ -1861,6 +1882,8 @@ def _run_agent_streaming( _hashlib.sha256((resolved_api_key or '').encode()).hexdigest()[:16], resolved_base_url or '', resolved_provider or '', + _max_tokens_cfg or '', + _fallback_resolved or {}, sorted(_toolsets) if _toolsets else [], ], sort_keys=True) _agent_sig = _hashlib.sha256(_sig_blob.encode()).hexdigest()[:16] @@ -2098,6 +2121,9 @@ def _run_agent_streaming( 'insufficient credit' in _err_lower or 'credit balance' in _err_lower or 'credits exhausted' in _err_lower + or 'more credits' in _err_lower + or 'can only afford' in _err_lower + or 'fewer max_tokens' in _err_lower or 'quota_exceeded' in _err_lower or 'quota exceeded' in _err_lower or 'exceeded your current quota' in _err_lower @@ -2433,6 +2459,9 @@ def _run_agent_streaming( 'insufficient credit' in _exc_lower or 'credit balance' in _exc_lower or 'credits exhausted' in _exc_lower + or 'more credits' in _exc_lower + or 'can only afford' in _exc_lower + or 'fewer max_tokens' in _exc_lower or 'quota_exceeded' in _exc_lower or 'quota exceeded' in _exc_lower or 'exceeded your current quota' in _exc_lower diff --git a/tests/test_streaming_max_tokens_quota.py b/tests/test_streaming_max_tokens_quota.py new file mode 100644 index 00000000..2e37734d --- /dev/null +++ b/tests/test_streaming_max_tokens_quota.py @@ -0,0 +1,39 @@ +"""Regression coverage for WebUI streaming provider failure handling. + +The incident this guards against: WebUI-created AIAgent instances did not pass +config.yaml's max_tokens, so a fallback Claude model via OpenRouter requested its +native 64k output ceiling and failed with HTTP 402 "more credits / fewer +max_tokens". The stream then looked like a stuck Thinking card instead of a +clear quota error. +""" +from pathlib import Path + + +STREAMING = Path(__file__).resolve().parents[1] / "api" / "streaming.py" + + +def _src() -> str: + return STREAMING.read_text(encoding="utf-8") + + +def test_streaming_passes_configured_max_tokens_to_agent(): + src = _src() + assert "_raw_max_tokens = _cfg.get('max_tokens')" in src + assert "_agent_cfg_for_tokens.get('max_tokens')" in src + assert "_agent_kwargs['max_tokens'] = _max_tokens_cfg" in src + + +def test_streaming_agent_cache_signature_includes_max_tokens_and_fallback(): + src = _src() + assert "_max_tokens_cfg or ''" in src + assert "_fallback_resolved or {}" in src + + +def test_openrouter_more_credits_error_is_classified_as_quota(): + src = _src() + assert "'more credits' in _err_lower" in src + assert "'can only afford' in _err_lower" in src + assert "'fewer max_tokens' in _err_lower" in src + assert "'more credits' in _exc_lower" in src + assert "'can only afford' in _exc_lower" in src + assert "'fewer max_tokens' in _exc_lower" in src From 9c0a16fdd6369b6c24c5d0c30047f97751b91ec6 Mon Sep 17 00:00:00 2001 From: Manfred Date: Sun, 3 May 2026 15:41:56 +0200 Subject: [PATCH 09/11] fix: recover WebUI-origin state.db sessions --- api/agent_sessions.py | 6 +++--- tests/test_gateway_sync.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/api/agent_sessions.py b/api/agent_sessions.py index 7b024f8b..fb35ffbd 100644 --- a/api/agent_sessions.py +++ b/api/agent_sessions.py @@ -214,9 +214,9 @@ def read_importable_agent_session_rows( db_path: Path, limit: int = 200, log=None, - exclude_sources: tuple[str, ...] | None = ("cron",), + exclude_sources: tuple[str, ...] | None = ("cron", "webui"), ) -> list[dict]: - """Return non-WebUI agent sessions projected as importable conversations. + """Return agent sessions projected as importable conversations. Hermes Agent can create rows in ``state.db.sessions`` before a session has any messages, and long conversations can be split into compression-linked @@ -256,7 +256,7 @@ def read_importable_agent_session_rows( ended_expr = _optional_col('ended_at', session_cols) end_reason_expr = _optional_col('end_reason', session_cols) - where_clauses = ["s.source IS NOT NULL", "s.source != 'webui'"] + where_clauses = ["s.source IS NOT NULL"] params: list[str] = [] if exclude_sources: excluded = tuple(str(source) for source in exclude_sources if source) diff --git a/tests/test_gateway_sync.py b/tests/test_gateway_sync.py index d2dff359..e9eef69a 100644 --- a/tests/test_gateway_sync.py +++ b/tests/test_gateway_sync.py @@ -208,6 +208,41 @@ def test_gateway_sessions_appear_when_enabled(): post('/api/settings', {'show_cli_sessions': False}) +def test_webui_state_db_session_without_sidecar_appears_when_agent_sessions_enabled(): + """Regression: WebUI-origin rows in state.db can recover missing JSON sidecars.""" + conn = _ensure_state_db() + sid = 'webui_state_only_001' + try: + _insert_agent_session_row( + conn, + session_id=sid, + source='webui', + title='Recovered WebUI Session', + model='openai/gpt-5', + messages=2, + ) + + post('/api/settings', {'show_cli_sessions': True}) + + data, status = get('/api/sessions') + assert status == 200 + sessions = data.get('sessions', []) + recovered = [s for s in sessions if s.get('session_id') == sid] + assert len(recovered) == 1, ( + "WebUI-origin sessions that exist in state.db but have no JSON sidecar " + "should be surfaced through the agent-session bridge for recovery." + ) + assert recovered[0].get('source_tag') == 'webui' + assert recovered[0].get('is_cli_session') is True + finally: + try: + _remove_test_sessions(conn, sid) + conn.close() + except Exception: + pass + post('/api/settings', {'show_cli_sessions': False}) + + def test_gateway_sessions_without_messages_are_hidden_from_sidebar(): """Regression: empty agent session rows must not appear as broken sidebar entries.""" conn = _ensure_state_db() From 2856ee66378fb0990ff48cb73d66db1cee3d8b7a Mon Sep 17 00:00:00 2001 From: Hermes Bot Date: Sun, 3 May 2026 16:21:42 +0000 Subject: [PATCH 10/11] =?UTF-8?q?fix(stage-279):=20absorb=20Opus=20MUST-FI?= =?UTF-8?q?X=20=E2=80=94=20sw.js=20conflict-marker=20resolution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opus advisor flagged that the conflict-marker resolution from PR #1525's merge had not actually landed — static/sw.js still contained the literal <<<<<<< HEAD / ======= / >>>>>>> pr-1525 markers, which made the file fail to parse as JavaScript even though the substring-based source-string tests still passed (the __WEBUI_VERSION__ token was present, just inside the conflict block). Concrete impact pre-fix when shipped: - Service worker install handler would throw on script load - SW would never reach activated state - Old SW (from v0.50.278) would keep controlling the page indefinitely - Frontend cache-bust pathway silently broken - The INFLIGHT[sid] clear in static/sessions.js (the frontend half of PR #1525's stale-stream cleanup) would never deliver to existing browsers because the new SW would never activate Fix: - Resolve sw.js conflict to keep CACHE_NAME = 'hermes-shell-__WEBUI_VERSION__' (the post-#1517 rename, with the manual -stale-stream-cleanup1 suffix dropped as redundant — natural version-token bump invalidates old caches). - Add tests/test_pwa_manifest_sw.py::test_sw_js_has_no_merge_conflict_markers regression guard that scans for <<<<<<<, =======, >>>>>>> in sw.js source. - Update tests/test_stale_stream_cleanup.py::test_service_worker_cache_ bumped_for_frontend_fix_delivery to assert the canonical version-token CACHE_NAME pattern instead of the (now-removed) -stale-stream-cleanup1 manual suffix. 3945 → 3946 tests passing (+1 from the new conflict-marker guard). This issue would have shipped a broken service worker if Opus hadn't caught it. The new test_sw_js_has_no_merge_conflict_markers test would have flagged it earlier in the pipeline. Caught-by: Opus advisor pass on stage-279 brief Co-authored-by: ai-ag2026 --- static/sw.js | 4 ---- tests/test_pwa_manifest_sw.py | 19 +++++++++++++++++++ tests/test_stale_stream_cleanup.py | 23 ++++++++++++++++++++++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/static/sw.js b/static/sw.js index e4e7ac38..cb8e2071 100644 --- a/static/sw.js +++ b/static/sw.js @@ -7,11 +7,7 @@ // Cache version is injected by the server at request time (routes.py /sw.js handler). // Bumps automatically whenever the git commit changes — no manual edits needed. -<<<<<<< HEAD const CACHE_NAME = 'hermes-shell-__WEBUI_VERSION__'; -======= -const CACHE_NAME = 'hermes-shell-__CACHE_VERSION__-stale-stream-cleanup1'; ->>>>>>> pr-1525 // Static assets that form the app shell. // diff --git a/tests/test_pwa_manifest_sw.py b/tests/test_pwa_manifest_sw.py index c5e46982..730d4a9a 100644 --- a/tests/test_pwa_manifest_sw.py +++ b/tests/test_pwa_manifest_sw.py @@ -57,6 +57,25 @@ class TestServiceWorker: "handler at /sw.js to replace with WEBUI_VERSION at request time" ) + def test_sw_js_has_no_merge_conflict_markers(self): + """Regression guard for v0.50.279 stage build: a leftover git conflict + marker in static/sw.js made the file fail to parse as JavaScript even + though the substring-based source-string tests still passed (the + ``__WEBUI_VERSION__`` token was present, just inside the conflict block). + + A broken sw.js means the install handler throws on script load → SW + never reaches activated state → old SW keeps controlling the page → + every "old SW deletes other caches" guarantee is forfeited and frontend + cache-bust pathways silently break. Caught by Opus advisor pre-merge, + ship blocked. This test would have caught it too. + """ + src = SW.read_text(encoding="utf-8") + for marker in ("<<<<<<<", "=======\n", ">>>>>>>"): + assert marker not in src, ( + f"static/sw.js contains conflict marker {marker!r}; " + "the merge resolution did not actually land. Reject ship." + ) + def test_sw_bypasses_api_and_stream(self): src = SW.read_text(encoding="utf-8") assert "/api/" in src, "SW must bypass /api/* (no cached auth/session responses)" diff --git a/tests/test_stale_stream_cleanup.py b/tests/test_stale_stream_cleanup.py index c6070feb..fe117d01 100644 --- a/tests/test_stale_stream_cleanup.py +++ b/tests/test_stale_stream_cleanup.py @@ -41,4 +41,25 @@ def test_frontend_drops_inflight_cache_when_server_session_is_idle(): def test_service_worker_cache_bumped_for_frontend_fix_delivery(): - assert "stale-stream-cleanup1" in SW_SRC + """The SW CACHE_NAME must be keyed on the WEBUI_VERSION placeholder so + every release naturally invalidates the previous shell cache and delivers + the frontend half of the stale-stream cleanup fix to existing browsers. + + Originally pinned a manual `-stale-stream-cleanup1` suffix on + `CACHE_NAME` (PR #1525 author shipped that to force-bump existing + SWs). During the v0.50.279 stage build that suffix collided with the + independent #1517 placeholder rename (`__CACHE_VERSION__` → + `__WEBUI_VERSION__`), so the maintainer dropped the manual suffix in + favor of the canonical version-token path. The natural bump still + invalidates the old cache via `keys.filter((k) => k !== CACHE_NAME)` + in the activate handler — same delivery guarantee, less churn. + """ + # CACHE_NAME must include the WEBUI_VERSION placeholder so each release + # produces a different cache name. The activate handler then deletes any + # cache whose key != current CACHE_NAME, so the old shell is reaped on + # every upgrade and the new sessions.js (with the INFLIGHT[sid] clear) + # ships to existing browsers. + assert "CACHE_NAME = 'hermes-shell-__WEBUI_VERSION__'" in SW_SRC, ( + "SW CACHE_NAME must include __WEBUI_VERSION__ so each release " + "invalidates the previous cache and delivers frontend changes." + ) From 11cc493806ab2b4e4fb150e4696e2d3adb7c7724 Mon Sep 17 00:00:00 2001 From: Hermes Bot Date: Sun, 3 May 2026 16:23:30 +0000 Subject: [PATCH 11/11] release: stamp v0.50.279 \u2014 8-PR batch (sweep) + Opus MUST-FIX absorbed CHANGELOG, ROADMAP, TESTING bumped (3936 \u2192 3946). 8 constituent PRs: - #1523 (@franksong2702) branch indicator codepoint fix - #1519 (@franksong2702) onboarding API-key focus loss fix - #1518 (@franksong2702) voice-mode toggle-off recognizer stop - #1516 (@franksong2702) YAML newline CSS rules - #1517 (@franksong2702) __CACHE_VERSION__ \u2192 __WEBUI_VERSION__ rename - #1532 (@ai-ag2026) state.db WebUI session recovery - #1525 (@ai-ag2026) stale stream state proactive cleanup - #1526 (@ai-ag2026) max_tokens forwarding + OpenRouter quota classifier Opus MUST-FIX absorbed: sw.js conflict-marker cleanup + regression guard. Opus SHOULD-FIX deferred to follow-up #1533 (race in _clear_stale_stream_state). 2 closed as duplicates: #1528 (identical to #1517), #1529 (superseded by #1516). 1 maintainer-review label: #1531 (Asunfly stowaway change in force-push). 5 stay on hold: #1418 #1464 #1404 #1353 #1311. --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ ROADMAP.md | 2 +- TESTING.md | 4 ++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e852b72b..ae291eb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Hermes Web UI -- Changelog +## [v0.50.279] — 2026-05-03 + +### Fixed (8-PR batch from full PR sweep — closes #1463, #1491, #1503, #1509, #1522) + +- **Branch indicator codepoint corrected** (#1523, @franksong2702; closes #1522) — the fork-indicator glyph in the sidebar was rendering `⒂ PARENTHESIZED DIGIT FIFTEEN` (`\u2482`) instead of the intended `⑂ OCR FORK` (`\u2442`). Forked sessions appeared with a mysterious "(15)" prefix that looked like a message count or unread badge — users would click expecting something related to "15" and find nothing. The actual fork indicator was invisible. One-character fix in `static/sessions.js:1657` plus the matching test assertion update. + +- **Onboarding API-key field stops losing focus during probe** (#1519, @franksong2702; closes #1503) — the wizard's API-key input had `oninput="_scheduleOnboardingProbe()"` firing a 400ms-debounced probe on every keystroke. When the probe completed, `_renderOnboardingBody()` rebuilt the entire form DOM, destroying the `` element the user was typing into. On localhost the probe completes in ~5-50ms so the bug window was narrow; on slow networks (VPN, corporate proxy, cold-start vLLM) the re-render routinely landed between keystrokes. Especially painful on the password field where users paste long secrets. **Fix:** removed `_scheduleOnboardingProbe()` from the api-key input's `oninput` handler in `static/onboarding.js:200`; added `onblur="_runOnboardingProbe()"` so the probe still fires when the user tabs away. The probe also still fires via the "Test connection" button and `nextOnboardingStep()` before Continue — no flow breakage. + +- **Voice-mode pref toggle-off now stops the recognizer** (#1518, @franksong2702; closes #1491) — if a user enabled the hands-free voice mode (PR #1489, v0.50.271), started a conversation, then opened Settings → Preferences and disabled the pref, the button disappeared but the SpeechRecognition kept running. The user had no way to stop it short of reloading the page — and it was consuming microphone access + battery the whole time. **Fix:** `_applyVoiceModePref()` in `static/boot.js` now reads the pref into a local `enabled` variable and calls `_deactivate()` (the standard cleanup path that stops recognition, clears timers, restores TTS, resets UI state) when `!enabled && _voiceModeActive`. Plus a TDZ-safety hoist: `let _voiceModeActive = false` moved above `_applyVoiceModePref()` (was previously declared after the function — Temporal Dead Zone risk if the function were ever called before init). + +- **YAML code blocks render with newlines** (#1516, @franksong2702; closes #1463) — Prism's YAML grammar wraps tokens in `` elements where `white-space` defaults to `normal`, collapsing `\n` characters into spaces even when the underlying `textContent` preserved them. Plain code blocks and `language-bash` rendered correctly; only `language-yaml` was affected. YAML is one of the most common LLM output formats (config files, docker-compose, CI pipelines, Kubernetes manifests) — flattened YAML in chat is unreadable. **Fix:** two CSS rules in `static/style.css` forcing `white-space: pre !important` on `.msg-body pre code.language-yaml .token` and `.preview-md pre code.language-yaml .token`. Scoped tightly to YAML — no impact on other languages. Verified via the reporter's two diagnostic probes (`textContent` had `\n`, only `language-yaml` was affected) that the renderer pipeline was correct and the fix needed to be at the CSS layer. + +- **Service-worker placeholder consolidation** (#1517, @franksong2702; closes #1509) — `__CACHE_VERSION__` (in `static/sw.js`) and `__WEBUI_VERSION__` (in `static/index.html`) were functionally identical: both substituted at request time via `quote(WEBUI_VERSION, safe="")`. Two names existed for historical reasons (different files added at different releases). Naming hygiene flagged by both the independent reviewer and the Opus advisor during the v0.50.276 release review. **Fix:** rename `__CACHE_VERSION__` → `__WEBUI_VERSION__` across `static/sw.js`, `api/routes.py`, `tests/test_pwa_manifest_sw.py`. Pure rename, no behavior change — same `?v=vX.Y.Z` query strings on the same URLs at the wire. + +- **WebUI-origin state.db sessions recoverable when JSON sidecar missing** (#1532, @ai-ag2026; refs #1471) — when a WebUI-origin session existed in `state.db.sessions` / `state.db.messages` but the matching `~/.hermes/webui/sessions/.json` sidecar was missing (possible after disk-write failures, partial restore, or interrupted writes), the session was invisible to `/api/sessions` even though the canonical SQLite messages were intact. Root cause: `read_importable_agent_session_rows()` had a hard-coded `s.source != 'webui'` predicate that re-applied the filter even when callers opted out via `exclude_sources=None`. Slice 1 of the #1471 session-recovery class. **Fix:** `api/agent_sessions.py` makes the default exclusion explicit (`("cron", "webui")`) and removes the hard-coded predicate so `exclude_sources=None` actually includes WebUI-origin rows. New regression test `test_webui_state_db_session_without_sidecar_appears_when_agent_sessions_enabled`. + +- **Stale runtime stream state cleared proactively** (#1525, @ai-ag2026; refs #1471) — session JSON could retain `active_stream_id` plus paired pending fields (`pending_user_message`, `pending_attachments`, `pending_started_at`) after a stream failure, provider exception, or server restart. `/health` would correctly report `active_streams: 0`, but `/sessions/` would still claim `agent_running` (pure truthiness on `s.active_stream_id`) and the frontend's `INFLIGHT[sid]` would keep the UI busy on a dead stream. Slice 2 of the #1471 session-recovery class, distinct from #1532's "session in DB but no sidecar" path. **Fix:** new `_clear_stale_stream_state()` helper in `api/streaming.py` runs proactively at the read boundary (`/sessions/` GET) and before new turns start. Verifies the stream is actually missing from `STREAMS` (the in-memory registry) before clearing — never expires live streams by age. Frontend half: `static/sessions.js` clears `INFLIGHT[sid]` when the server reports no `active_stream_id`. **Maintainer merge-conflict resolution:** kept the rename-side `CACHE_NAME = 'hermes-shell-__WEBUI_VERSION__'` (post-#1517 rename) over the PR's manual `-stale-stream-cleanup1` suffix. The renamed placeholder still auto-bumps with each release through `quote(WEBUI_VERSION, safe="")`, so the manual suffix was redundant — natural version bump (v0.50.278 → v0.50.279) already invalidates the old cache via `caches.delete(k)` for `k !== CACHE_NAME` in the SW activate handler. 5 new regression tests in `test_stale_stream_cleanup.py`. + +- **WebUI max_tokens forwarded to agent + OpenRouter quota classifier** (#1526, @ai-ag2026; refs #1524) — WebUI agent initialization didn't pass the configured `max_tokens` to `AIAgent`, so provider-native output ceilings could be requested. On OpenRouter this could fail with quota-style HTTP 402 messages like `more credits`, `can only afford`, `fewer max_tokens`. Pre-fix, those phrases weren't classified as quota failures and didn't trigger the fallback chain — users saw raw 402 errors instead of automatic fallback to a less-expensive model. **Fix:** `api/streaming.py` reads configured `max_tokens` from top-level + `agent.max_tokens` fallback, parses positive integers, includes both `max_tokens` and the fallback state in the `SESSION_AGENT_CACHE` signature (so config changes don't reuse a stale cached agent), and passes `max_tokens` to `AIAgent` only when the constructor supports it (uses `inspect.signature(AIAgent.__init__)` rather than a try/except that would swallow real `TypeError`s). Quota classifier additions for the three OpenRouter phrases route to the same fallback chain as existing quota markers. New regression tests in `test_streaming_max_tokens_quota.py`. + +### Notes + +- 3936 → **3946** tests passing (+9 from constituent PRs + 1 conflict-marker regression guard added in-release per Opus MUST-FIX). +- Pre-release Opus advisor pass: **caught a MUST-FIX (sw.js merge-conflict markers still in tree despite earlier `git add`/`commit`)** that would have shipped a broken service worker. Resolution applied in stage and a `test_sw_js_has_no_merge_conflict_markers` regression guard added so this can't happen silently again. One SHOULD-FIX (race in `_clear_stale_stream_state` between registry-check and session-mutate) explicitly deferred to follow-up #1533 per Opus's "fine to defer given the narrow window" advice — bounded effect (orphaned stream requires retry, no data corruption). +- One merge conflict resolved during stage build (#1525 vs #1517 cache-name placeholder collision); resolution drops PR #1525's manual `-stale-stream-cleanup1` suffix in favor of the canonical `__WEBUI_VERSION__` token (natural release-bump preserves the cache-invalidation guarantee). +- 2 PRs closed as duplicates during sweep: #1528 (identical to #1517) and #1529 (superseded by #1516, `.preview-md` coverage missing). +- 5 PRs stay on hold: #1418 (hard prereq hermes-agent#18534 not yet merged), #1464 (blocker — `noResults` ternary inverted, awaiting JKJameson fix), #1404 (UX — aronprins width feedback unresolved), #1353 (already `ready-for-review` tagged, durability path needs independent review), #1311 (draft + CONFLICTING). +- 1 PR routed to maintainer-review: #1531 (Asunfly stowaway change in force-push to title aux generation that wasn't in PR description; awaiting scope decision). + ## [v0.50.278] — 2026-05-03 ### Added (1 PR — splices best of #1497 + #1513) diff --git a/ROADMAP.md b/ROADMAP.md index 6784147b..1253a049 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -3,7 +3,7 @@ > Goal: Full 1:1 parity with the Hermes CLI experience via a clean dark web UI. > Everything you can do from the CLI terminal, you can do from this UI. > -> Last updated: v0.50.278 (May 03, 2026) — 3936 tests collected +> Last updated: v0.50.279 (May 03, 2026) — 3946 tests collected > Tests: `pytest tests/ --collect-only -q` > Source: / diff --git a/TESTING.md b/TESTING.md index a8a15379..73e1ba80 100644 --- a/TESTING.md +++ b/TESTING.md @@ -1835,8 +1835,8 @@ Bridged CLI sessions: --- -*Last updated: v0.50.278, May 03, 2026* -*Total automated tests collected: 3936* +*Last updated: v0.50.279, May 03, 2026* +*Total automated tests collected: 3946* *Regression gate: tests/test_regressions.py* *Run: pytest tests/ -v --timeout=60* *Source: /*