mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-24 18:50:15 +00:00
feat: add agent heartbeat alert
This commit is contained in:
@@ -0,0 +1,132 @@
|
||||
"""Hermes agent/gateway heartbeat payload helpers (#716).
|
||||
|
||||
The WebUI process is not always paired with a long-running Hermes gateway. Some
|
||||
setups use WebUI only, while self-hosted messaging deployments run a separate
|
||||
Hermes gateway daemon that records runtime metadata in the Hermes Agent home.
|
||||
This module turns those existing safe runtime signals into a small UI-facing
|
||||
heartbeat without shelling out or adding psutil as a hard dependency.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _checked_at() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _gateway_status_module():
|
||||
"""Load gateway.status lazily so tests and WebUI-only installs stay isolated."""
|
||||
return importlib.import_module("gateway.status")
|
||||
|
||||
|
||||
def _runtime_detail_subset(runtime_status: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Return only non-sensitive runtime fields for the browser.
|
||||
|
||||
gateway.status records argv/PID metadata so the CLI can validate process
|
||||
identity. The WebUI alert only needs health semantics, never raw command
|
||||
lines, paths, environment, or tokens.
|
||||
"""
|
||||
if not isinstance(runtime_status, dict):
|
||||
return {}
|
||||
|
||||
details: dict[str, Any] = {}
|
||||
gateway_state = runtime_status.get("gateway_state")
|
||||
if isinstance(gateway_state, str) and gateway_state:
|
||||
details["gateway_state"] = gateway_state
|
||||
|
||||
updated_at = runtime_status.get("updated_at")
|
||||
if isinstance(updated_at, str) and updated_at:
|
||||
details["updated_at"] = updated_at
|
||||
|
||||
try:
|
||||
details["active_agents"] = max(0, int(runtime_status.get("active_agents") or 0))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
platforms = runtime_status.get("platforms")
|
||||
if isinstance(platforms, dict):
|
||||
details["platform_count"] = len(platforms)
|
||||
states: dict[str, int] = {}
|
||||
for payload in platforms.values():
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
state = payload.get("state")
|
||||
if isinstance(state, str) and state:
|
||||
states[state] = states.get(state, 0) + 1
|
||||
if states:
|
||||
details["platform_states"] = states
|
||||
|
||||
return details
|
||||
|
||||
|
||||
def build_agent_health_payload() -> dict[str, Any]:
|
||||
"""Return `{alive, checked_at, details}` for the Hermes gateway/agent.
|
||||
|
||||
`alive` is intentionally tri-state:
|
||||
* True: a gateway runtime signal says the process is alive.
|
||||
* False: gateway metadata exists, but no live gateway process owns it.
|
||||
* None: no gateway metadata/status is available, so this WebUI setup is
|
||||
probably not configured with a separate gateway process.
|
||||
"""
|
||||
checked_at = _checked_at()
|
||||
try:
|
||||
gateway_status = _gateway_status_module()
|
||||
except Exception as exc:
|
||||
return {
|
||||
"alive": None,
|
||||
"checked_at": checked_at,
|
||||
"details": {
|
||||
"state": "unknown",
|
||||
"reason": "gateway_status_unavailable",
|
||||
"error": type(exc).__name__,
|
||||
},
|
||||
}
|
||||
|
||||
runtime_status = None
|
||||
try:
|
||||
runtime_status = gateway_status.read_runtime_status()
|
||||
except Exception:
|
||||
runtime_status = None
|
||||
|
||||
try:
|
||||
running_pid = gateway_status.get_running_pid(cleanup_stale=False)
|
||||
except TypeError:
|
||||
# Older agent versions may not expose cleanup_stale. Keep compatibility.
|
||||
running_pid = gateway_status.get_running_pid()
|
||||
except Exception:
|
||||
running_pid = None
|
||||
|
||||
safe_details = _runtime_detail_subset(runtime_status)
|
||||
if running_pid is not None:
|
||||
return {
|
||||
"alive": True,
|
||||
"checked_at": checked_at,
|
||||
"details": {
|
||||
"state": "alive",
|
||||
**safe_details,
|
||||
},
|
||||
}
|
||||
|
||||
if isinstance(runtime_status, dict):
|
||||
return {
|
||||
"alive": False,
|
||||
"checked_at": checked_at,
|
||||
"details": {
|
||||
"state": "down",
|
||||
"reason": "gateway_not_running",
|
||||
**safe_details,
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
"alive": None,
|
||||
"checked_at": checked_at,
|
||||
"details": {
|
||||
"state": "unknown",
|
||||
"reason": "gateway_not_configured",
|
||||
},
|
||||
}
|
||||
@@ -479,6 +479,7 @@ from api.helpers import (
|
||||
redact_session_data,
|
||||
_redact_text,
|
||||
)
|
||||
from api.agent_health import build_agent_health_payload
|
||||
|
||||
|
||||
def _clear_stale_stream_state(session) -> bool:
|
||||
@@ -2487,6 +2488,9 @@ def handle_get(handler, parsed) -> bool:
|
||||
if parsed.path == "/health":
|
||||
return _handle_health(handler, parsed)
|
||||
|
||||
if parsed.path == "/api/health/agent":
|
||||
return j(handler, build_agent_health_payload())
|
||||
|
||||
if parsed.path == "/api/models":
|
||||
return j(handler, get_available_models())
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 146 KiB |
@@ -342,6 +342,13 @@
|
||||
<button class="reconnect-btn" onclick="refreshSession()"><svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true" style="vertical-align:-1px"><polyline points="23 4 23 10 17 10"/><polyline points="1 20 1 14 7 14"/><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"/></svg> Reload</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="agent-health-banner" id="agentHealthBanner" role="alert" aria-live="assertive" hidden>
|
||||
<div class="agent-health-copy">
|
||||
<strong id="agentHealthTitle">Hermes agent is not responding</strong>
|
||||
<span id="agentHealthDetails">The gateway heartbeat failed. Messages may not be delivered until it comes back.</span>
|
||||
</div>
|
||||
<button class="agent-health-dismiss" id="agentHealthDismiss" type="button" onclick="dismissAgentHealthAlert()" aria-label="Dismiss Hermes agent heartbeat alert">Dismiss</button>
|
||||
</div>
|
||||
<div class="composer-wrap" id="composerWrap">
|
||||
<div class="composer-flyout">
|
||||
<!-- Queue flyout: slides up from behind composer, same pattern as approval-card -->
|
||||
|
||||
@@ -519,6 +519,13 @@
|
||||
.reconnect-banner.visible{display:flex;}
|
||||
.reconnect-btn{padding:6px 12px;border-radius:8px;font-size:12px;font-weight:600;background:var(--accent-bg-strong);border:1px solid var(--accent-bg-strong);color:var(--accent-text);cursor:pointer;}
|
||||
.reconnect-btn:hover{background:var(--accent-bg-strong);}
|
||||
.agent-health-banner{position:sticky;bottom:0;z-index:4;display:none;align-items:center;justify-content:space-between;gap:12px;margin:10px auto 0;max-width:var(--msg-max);width:calc(100% - 40px);padding:12px 16px;border:1px solid color-mix(in srgb,var(--error) 55%,var(--surface));border-radius:12px;background:color-mix(in srgb,var(--error) 14%,var(--surface));color:var(--text);box-shadow:0 10px 32px rgba(0,0,0,.16);}
|
||||
.agent-health-banner.visible{display:flex;}
|
||||
.agent-health-copy{display:flex;flex-direction:column;gap:3px;min-width:0;font-size:13px;line-height:1.35;}
|
||||
.agent-health-copy strong{color:var(--error);font-size:13px;}
|
||||
.agent-health-copy span{color:var(--muted);}
|
||||
.agent-health-dismiss{flex-shrink:0;padding:6px 12px;border-radius:8px;border:1px solid color-mix(in srgb,var(--error) 45%,var(--surface));background:color-mix(in srgb,var(--error) 10%,var(--surface));color:var(--error);font-size:12px;font-weight:600;cursor:pointer;}
|
||||
.agent-health-dismiss:hover{background:color-mix(in srgb,var(--error) 18%,var(--surface));}
|
||||
/* ── Update banner ── */
|
||||
.update-banner{display:none;background:var(--surface);border:1px solid var(--accent);border-radius:10px;padding:10px 16px;margin:10px auto;max-width:780px;font-size:13px;color:var(--accent-text);align-items:center;justify-content:space-between;gap:12px;}
|
||||
.update-banner.visible{display:flex;}
|
||||
|
||||
@@ -3021,6 +3021,82 @@ function dismissReconnect() {
|
||||
$('reconnectBanner').classList.remove('visible');
|
||||
clearInflight();
|
||||
}
|
||||
|
||||
// ── Hermes agent/gateway heartbeat alert (#716) ──
|
||||
const AGENT_HEALTH_INTERVAL_MS=30000;
|
||||
const AGENT_HEALTH_DISMISSED_KEY='agent-health-dismissed';
|
||||
let _agentHealthTimer=null;
|
||||
let _agentHealthLastState='unknown';
|
||||
function _agentHealthDismissed(){
|
||||
try{return localStorage.getItem(AGENT_HEALTH_DISMISSED_KEY)==='1';}
|
||||
catch(_){return false;}
|
||||
}
|
||||
function _setAgentHealthDismissed(value){
|
||||
try{
|
||||
if(value)localStorage.setItem(AGENT_HEALTH_DISMISSED_KEY,'1');
|
||||
else localStorage.removeItem(AGENT_HEALTH_DISMISSED_KEY);
|
||||
}catch(_){ }
|
||||
}
|
||||
function _hideAgentHealthAlert(){
|
||||
const banner=$('agentHealthBanner');
|
||||
if(banner){banner.classList.remove('visible');banner.hidden=true;}
|
||||
}
|
||||
function _showAgentHealthAlert(payload){
|
||||
if(_agentHealthDismissed()) return;
|
||||
const banner=$('agentHealthBanner');
|
||||
const title=$('agentHealthTitle');
|
||||
const details=$('agentHealthDetails');
|
||||
if(!banner) return;
|
||||
if(title) title.textContent='Hermes agent is not responding';
|
||||
const state=payload&&payload.details&&payload.details.gateway_state?` State: ${payload.details.gateway_state}.`:'';
|
||||
if(details) details.textContent=`Gateway heartbeat failed.${state} Messages may not be delivered until it comes back.`;
|
||||
banner.hidden=false;
|
||||
banner.classList.add('visible');
|
||||
}
|
||||
function dismissAgentHealthAlert(){
|
||||
_setAgentHealthDismissed(true);
|
||||
_hideAgentHealthAlert();
|
||||
}
|
||||
async function pollAgentHealth(){
|
||||
if(document.visibilityState !== 'visible') return;
|
||||
try{
|
||||
const payload=await api('/api/health/agent');
|
||||
if(payload.alive === true){
|
||||
_agentHealthLastState='alive';
|
||||
_setAgentHealthDismissed(false);
|
||||
_hideAgentHealthAlert();
|
||||
return;
|
||||
}
|
||||
if(payload.alive === false){
|
||||
_agentHealthLastState='down';
|
||||
_showAgentHealthAlert(payload);
|
||||
return;
|
||||
}
|
||||
if(payload.alive == null){
|
||||
_agentHealthLastState='unknown';
|
||||
_hideAgentHealthAlert();
|
||||
}
|
||||
}catch(_){
|
||||
_agentHealthLastState='unknown';
|
||||
_hideAgentHealthAlert();
|
||||
}
|
||||
}
|
||||
function startAgentHealthMonitor(){
|
||||
if(document.visibilityState !== 'visible') return;
|
||||
if(_agentHealthTimer) return;
|
||||
void pollAgentHealth();
|
||||
_agentHealthTimer=setInterval(pollAgentHealth, AGENT_HEALTH_INTERVAL_MS);
|
||||
}
|
||||
function stopAgentHealthMonitor(){
|
||||
if(_agentHealthTimer){clearInterval(_agentHealthTimer);_agentHealthTimer=null;}
|
||||
}
|
||||
function _syncAgentHealthMonitorVisibility(){
|
||||
if(document.visibilityState === 'visible') startAgentHealthMonitor();
|
||||
else stopAgentHealthMonitor();
|
||||
}
|
||||
document.addEventListener('visibilitychange',_syncAgentHealthMonitorVisibility);
|
||||
if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',startAgentHealthMonitor);
|
||||
else startAgentHealthMonitor();
|
||||
async function refreshSession() {
|
||||
// When the banner is in post-update restart mode, the "Reload" button
|
||||
// should do a full page reload — a session refresh would just 502 while
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
"""Regression coverage for #716 Hermes agent/gateway heartbeat monitor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
|
||||
|
||||
REPO_ROOT = pathlib.Path(__file__).parent.parent
|
||||
UI_JS = (REPO_ROOT / "static" / "ui.js").read_text(encoding="utf-8")
|
||||
INDEX_HTML = (REPO_ROOT / "static" / "index.html").read_text(encoding="utf-8")
|
||||
STYLE_CSS = (REPO_ROOT / "static" / "style.css").read_text(encoding="utf-8")
|
||||
ROUTES_PY = (REPO_ROOT / "api" / "routes.py").read_text(encoding="utf-8")
|
||||
|
||||
|
||||
class _FakeGatewayStatus:
|
||||
def __init__(self, runtime_status, running_pid):
|
||||
self._runtime_status = runtime_status
|
||||
self._running_pid = running_pid
|
||||
|
||||
def read_runtime_status(self):
|
||||
return self._runtime_status
|
||||
|
||||
def get_running_pid(self, cleanup_stale=False):
|
||||
assert cleanup_stale is False
|
||||
return self._running_pid
|
||||
|
||||
|
||||
def _runtime_status(**overrides):
|
||||
payload = {
|
||||
"gateway_state": "running",
|
||||
"updated_at": "2026-05-04T12:00:00+00:00",
|
||||
"active_agents": 2,
|
||||
"platforms": {
|
||||
"discord": {"state": "connected"},
|
||||
"telegram": {"state": "starting"},
|
||||
},
|
||||
# Sensitive/raw process fields that must never reach the browser.
|
||||
"pid": 12345,
|
||||
"argv": ["hermes", "gateway", "--token", "secret-token"],
|
||||
"command": "hermes gateway --token secret-token",
|
||||
"executable": "/home/user/.hermes/hermes-agent/venv/bin/python",
|
||||
"env": {"API_KEY": "secret"},
|
||||
}
|
||||
payload.update(overrides)
|
||||
return payload
|
||||
|
||||
|
||||
def test_agent_health_payload_alive_uses_safe_runtime_details(monkeypatch):
|
||||
from api import agent_health
|
||||
|
||||
monkeypatch.setattr(
|
||||
agent_health,
|
||||
"_gateway_status_module",
|
||||
lambda: _FakeGatewayStatus(_runtime_status(), running_pid=12345),
|
||||
)
|
||||
|
||||
payload = agent_health.build_agent_health_payload()
|
||||
|
||||
assert payload["alive"] is True
|
||||
assert payload["checked_at"]
|
||||
assert payload["details"] == {
|
||||
"state": "alive",
|
||||
"gateway_state": "running",
|
||||
"updated_at": "2026-05-04T12:00:00+00:00",
|
||||
"active_agents": 2,
|
||||
"platform_count": 2,
|
||||
"platform_states": {"connected": 1, "starting": 1},
|
||||
}
|
||||
rendered = repr(payload)
|
||||
assert "secret-token" not in rendered
|
||||
assert "API_KEY" not in rendered
|
||||
assert "argv" not in rendered
|
||||
assert "command" not in rendered
|
||||
assert "executable" not in rendered
|
||||
assert "pid" not in payload["details"]
|
||||
|
||||
|
||||
def test_agent_health_payload_down_when_gateway_metadata_exists_but_no_process(monkeypatch):
|
||||
from api import agent_health
|
||||
|
||||
monkeypatch.setattr(
|
||||
agent_health,
|
||||
"_gateway_status_module",
|
||||
lambda: _FakeGatewayStatus(_runtime_status(gateway_state="stale"), running_pid=None),
|
||||
)
|
||||
|
||||
payload = agent_health.build_agent_health_payload()
|
||||
|
||||
assert payload["alive"] is False
|
||||
assert payload["details"]["state"] == "down"
|
||||
assert payload["details"]["reason"] == "gateway_not_running"
|
||||
assert payload["details"]["gateway_state"] == "stale"
|
||||
|
||||
|
||||
def test_agent_health_payload_unknown_when_gateway_is_not_configured(monkeypatch):
|
||||
from api import agent_health
|
||||
|
||||
monkeypatch.setattr(
|
||||
agent_health,
|
||||
"_gateway_status_module",
|
||||
lambda: _FakeGatewayStatus(runtime_status=None, running_pid=None),
|
||||
)
|
||||
|
||||
payload = agent_health.build_agent_health_payload()
|
||||
|
||||
assert payload["alive"] is None
|
||||
assert payload["details"] == {"state": "unknown", "reason": "gateway_not_configured"}
|
||||
|
||||
|
||||
def test_agent_health_route_is_registered_with_tri_state_payload_shape():
|
||||
assert 'parsed.path == "/api/health/agent"' in ROUTES_PY
|
||||
assert "build_agent_health_payload()" in ROUTES_PY
|
||||
src = (REPO_ROOT / "api" / "agent_health.py").read_text(encoding="utf-8")
|
||||
assert '"alive"' in src
|
||||
assert '"checked_at"' in src
|
||||
assert '"details"' in src
|
||||
|
||||
|
||||
def test_agent_health_banner_markup_and_styles_exist():
|
||||
assert 'id="agentHealthBanner"' in INDEX_HTML
|
||||
assert 'role="alert"' in INDEX_HTML
|
||||
assert 'aria-live="assertive"' in INDEX_HTML
|
||||
assert 'onclick="dismissAgentHealthAlert()"' in INDEX_HTML
|
||||
assert ".agent-health-banner" in STYLE_CSS
|
||||
assert ".agent-health-banner.visible" in STYLE_CSS
|
||||
assert ".agent-health-dismiss" in STYLE_CSS
|
||||
|
||||
|
||||
def test_agent_health_frontend_polls_only_visible_and_distinguishes_states():
|
||||
assert "const AGENT_HEALTH_INTERVAL_MS=30000" in UI_JS
|
||||
assert "api('/api/health/agent')" in UI_JS
|
||||
assert "document.visibilityState !== 'visible'" in UI_JS
|
||||
assert "document.addEventListener('visibilitychange',_syncAgentHealthMonitorVisibility)" in UI_JS
|
||||
assert "if(payload.alive === true)" in UI_JS
|
||||
assert "if(payload.alive === false)" in UI_JS
|
||||
assert "if(payload.alive == null)" in UI_JS
|
||||
assert "_showAgentHealthAlert(payload)" in UI_JS
|
||||
assert "_hideAgentHealthAlert()" in UI_JS
|
||||
|
||||
|
||||
def test_agent_health_dismiss_persists_until_recovery():
|
||||
assert "const AGENT_HEALTH_DISMISSED_KEY='agent-health-dismissed'" in UI_JS
|
||||
assert "localStorage.setItem(AGENT_HEALTH_DISMISSED_KEY,'1')" in UI_JS
|
||||
assert "localStorage.removeItem(AGENT_HEALTH_DISMISSED_KEY)" in UI_JS
|
||||
assert "function dismissAgentHealthAlert()" in UI_JS
|
||||
assert "if(_agentHealthDismissed()) return;" in UI_JS
|
||||
assert "_setAgentHealthDismissed(false)" in UI_JS
|
||||
|
||||
|
||||
def test_agent_health_backend_does_not_use_shell_or_expose_raw_process_fields():
|
||||
src = (REPO_ROOT / "api" / "agent_health.py").read_text(encoding="utf-8")
|
||||
assert "import subprocess" not in src
|
||||
assert "import psutil" not in src
|
||||
for private_field in ("argv", "command", "executable", "env"):
|
||||
assert f'details["{private_field}"]' not in src
|
||||
assert f"details['{private_field}']" not in src
|
||||
Reference in New Issue
Block a user