Stage 384: PR #2505

# Conflicts:
#	CHANGELOG.md
This commit is contained in:
nesquena-hermes
2026-05-18 22:44:02 +00:00
3 changed files with 74 additions and 7 deletions
+2
View File
@@ -7,6 +7,8 @@
- **PR #2536** (closes #2514, refs #2535) — Stop reasoning-only Thinking entries from being replayed into provider-facing history as blank assistant turns, preventing long WebUI sessions from accumulating duplicated stale Thinking blocks and inflated Activity/tool metadata on later turns. Settled compact Activity rerenders now also clear previously inserted Thinking rows before rebuilding the visible transcript.
- **PR #2520** by @OneFat3 (refs #2247) — Route archive extraction (`/api/upload/extract`) through the per-session attachment inbox (`_session_attachment_dir`) instead of hardcoded `Path(s.workspace)`, matching the single-file upload path. Extracted archives now land at `<attachment_root>/<session_id>/<archive_stem>/` so session deletion cleanup covers them and per-session isolation is preserved when `HERMES_WEBUI_ATTACHMENT_DIR` is configured.
- Surface provider fallback and rate-limit lifecycle notices as auto-clearing fallback warnings in the streaming composer status, matching the frontend warning contract.
## [v0.51.90] — 2026-05-18 — Release BN (stage-383 — 10-PR full sweep batch — empty-gateway messaging history fix + previous-messaging-sessions setting + Kanban board switcher layout + UI/UX demo theme controls + Slice 3c queue/goal RFC gate + keyless custom endpoints + custom-provider remote model catalog parity + auto-compression elapsed timer + new-conversation cold-start guard + Kanban drag-drop detail open fix)
### Fixed
+32 -6
View File
@@ -85,6 +85,22 @@ def _resolve_custom_provider_runtime_overrides(
return resolved_provider, resolved_api_key, resolved_base_url
def _is_fallback_lifecycle_message(kind: str, message: str) -> bool:
"""Return True if an agent lifecycle status should surface as a fallback warning."""
k = str(kind or '').strip().lower()
m = str(message or '').strip().lower()
return (
k == 'lifecycle'
and (
'rate limited' in m
or 'switching to fallback' in m
or 'falling back' in m
or 'fallback activated' in m
or 'trying fallback' in m
)
)
def _prewarm_skill_tool_modules():
"""Import tools.skills_tool and tools.skill_manager_tool outside any lock.
@@ -3066,7 +3082,12 @@ def _run_agent_streaming(
logger.debug("Failed to put event to queue")
def _agent_status_callback(kind, message):
"""Bridge Agent lifecycle compression status into WebUI SSE."""
"""Bridge Agent lifecycle status into WebUI SSE.
Passes compression events as 'compressing' events and rate-limit/fallback
events as 'warning' events so the frontend can surface them to the user.
All other lifecycle messages are dropped silently.
"""
_message = str(message or '').strip()
_kind = str(kind or '').strip().lower()
if not _message:
@@ -3081,12 +3102,17 @@ def _run_agent_streaming(
or 'context too large' in _lower
)
)
if not _is_compression_start:
if _is_compression_start:
put('compressing', {
'session_id': session_id,
'message': 'Auto-compressing context to continue...',
})
return
put('compressing', {
'session_id': session_id,
'message': 'Auto-compressing context to continue...',
})
# Pass through rate-limit and fallback messages so the frontend can
# show them as warnings via the existing messages.js 'warning' listener.
_is_fallback_notice = _is_fallback_lifecycle_message(_kind, _message)
if _is_fallback_notice:
put('warning', {'type': 'fallback', 'message': _message})
# Initialised here (before any code that may raise) so the outer `finally`
# block can safely check `if _checkpoint_stop is not None` even when an
+40 -1
View File
@@ -1,5 +1,7 @@
from pathlib import Path
from api.streaming import _is_fallback_lifecycle_message
ROOT = Path(__file__).resolve().parents[1]
@@ -38,7 +40,7 @@ def test_auto_compression_running_sse_uses_active_session_running_card():
assert "message:d.message||'Auto-compressing context...'" in block
def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
def test_agent_status_callback_emits_compressing_and_warning_events():
src = _read("api/streaming.py")
start = src.find("def _agent_status_callback")
assert start != -1, "agent status callback bridge not found"
@@ -46,6 +48,7 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
assert end != -1, "status callback block end marker not found"
block = src[start:end]
# compressing events for compression lifecycle notices
assert "put('compressing'" in block
assert "'session_id': session_id" in block
assert "'message': 'Auto-compressing context to continue...'" in block
@@ -53,11 +56,47 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
assert "'compressing'" in block
assert "'compacting context'" in block
assert "'context too large'" in block
# warning events with type:fallback for rate-limit/fallback lifecycle notices
assert "put('warning'" in block
assert "'type': 'fallback'" in block
assert "'rate limited'" in src
assert "'switching to fallback'" in src
assert "'falling back'" in src
assert "'fallback activated'" in src
assert "'trying fallback'" in src
# Verify callback is wired to agent
assert "'status_callback' in _agent_params" in src
assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src
assert "agent.status_callback = _agent_kwargs.get('status_callback')" in src
def test_agent_status_callback_wiring():
src = _read("api/streaming.py")
assert "_agent_status_callback" in src
assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src
def test_fallback_lifecycle_message_predicate_matches_agent_emitters():
assert _is_fallback_lifecycle_message(
"lifecycle",
"Rate limited — switching to fallback provider...",
)
assert _is_fallback_lifecycle_message(
"lifecycle",
"Non-retryable error (HTTP 500) — trying fallback...",
)
assert not _is_fallback_lifecycle_message(
"tool",
"Rate limited — switching to fallback provider...",
)
assert not _is_fallback_lifecycle_message(
"lifecycle",
"Auto-compressing context to continue...",
)
def test_auto_compression_completion_transition_is_preserved_after_running_listener():
src = _read("static/messages.js")
compressing_idx = src.find("source.addEventListener('compressing'")