diff --git a/.env.example b/.env.example index 19dff7dc..768eca50 100644 --- a/.env.example +++ b/.env.example @@ -15,7 +15,7 @@ # Port to listen on (default: 8787) # HERMES_WEBUI_PORT=8787 -# Where to store sessions, workspaces, and other state (default: ~/.hermes/webui-mvp) +# Where to store sessions, workspaces, and other state (default: ~/.hermes/webui) # HERMES_WEBUI_STATE_DIR=~/.hermes/webui # Default workspace directory shown on first launch diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b16b1d57..c7649620 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,14 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pyyaml>=6.0 pytest pytest-timeout + pip install pyyaml>=6.0 pytest pytest-timeout pytest-asyncio + # Install the `mcp` package so tests/test_mcp_server.py runs in CI. + # The package is an optional runtime dep of mcp_server.py — users + # who run the MCP integration install it themselves; CI installs + # it so test coverage exists. If mcp install fails (Python 3.13 + # wheel not yet available, etc.), tests/test_mcp_server.py uses + # importorskip and the matrix stays green. + pip install mcp || echo "mcp install failed — test_mcp_server.py will importorskip" - name: Run tests run: pytest tests/ -v --timeout=60 diff --git a/.gitignore b/.gitignore index 3846911e..529563ba 100644 --- a/.gitignore +++ b/.gitignore @@ -40,8 +40,11 @@ Thumbs.db docs/* !docs/ui-ux/ !docs/ui-ux/** +!docs/rfcs/ +!docs/rfcs/** !docs/docker.md !docs/supervisor.md +!docs/troubleshooting.md # Local-only PR review harness: rendering drivers, sample bank, fixtures. # Used by Claude during deep reviews; never shared in the repo. @@ -49,3 +52,5 @@ docs/* graphify-out/ .graphify_cached.json .graphify_uncached.txt + +.venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index e852b72b..6c6f75e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,2077 @@ # Hermes Web UI -- Changelog +## [Unreleased] + +## [v0.51.44] — 2026-05-11 — Release T (5-PR contributor batch — security + worktree sessions + LM Studio + onboarding docs + transcript dedup, plus comprehensive test-suite network isolation) + +### Added + +- **PR #2052** by @franksong2702 — `docs/onboarding.md` (181 lines) covering install path choices, safe wizard re-runs with isolated `HERMES_HOME` / `HERMES_WEBUI_STATE_DIR`, provider groups, Docker/local-server Base URL rules (the most common Discord support question — `localhost` inside a container is not the host running LM Studio or Ollama), workspace setup, password step, files written by the wizard, and issue-reporting diagnostics. README pointer added from the quick-start section and Docs list. Stale `~/.hermes/webui-mvp` → `~/.hermes/webui` correction in `.env.example` and the README env-var table (the running app uses `~/.hermes/webui` per `api/config.py:42`). + +- **PR #2053** by @franksong2702 — Worktree-backed session creation. `POST /api/session/new` accepts a `worktree: true` flag that calls the agent's `_setup_worktree()` helper to create an isolated git worktree at `/.worktrees/hermes-XXXX`, persists `worktree_path` / `worktree_branch` / `worktree_repo_root` / `worktree_created_at` on the WebUI `Session`, surfaces a "New conversation in worktree" action in the workspace menu, and shows a subtle sidebar worktree indicator. Empty worktree sessions stay visible in the sidebar (the empty-session filter at `api/models.py:1067/1107` exempts sessions with a `worktree_path`). Note: the underlying Hermes Agent helper may add `.worktrees/` to the repository `.gitignore` the first time a worktree is created for that repo — operators will see a small uncommitted edit to `.gitignore` after their first worktree session. Cleanup lifecycle (auto-remove on session delete/archive) is deliberately deferred to a follow-up PR — needs explicit safeguards for active streams, terminals, dirty files, and unpushed commits. Closes #1955. + +- **PR #1970** by @dobby-d-elf — First-class LM Studio provider support with live model discovery. A dedicated `elif pid == "lmstudio":` branch in `get_available_models()` calls `hermes_cli.provider_model_ids("lmstudio")` first, falling back to a direct GET `/models` request when env vars (`LM_API_KEY` + `LM_BASE_URL`) haven't been injected yet — this fixes the race where the provider's `.env` isn't loaded into `os.environ` before the picker runs. Detection in `detected_providers` now also fires on `LM_API_KEY` + `LM_BASE_URL` env vars and on `cfg["providers"]["lmstudio"]` config entries. The new `_get_provider_base_url()` helper plus the change to `resolve_model_provider()` from `return bare_model, provider_hint, None` to `return bare_model, provider_hint, _get_provider_base_url(provider_hint)` lets users with `providers..base_url` in `config.yaml` flow that URL through model resolution consistently (pre-fix they had to also set it under `cfg["model"]`). The "Configured" badge code from the initial PR submission was dropped per maintainer review — see PR #1970 thread for the UX discussion. + +### Fixed + +- **PR #2048** by @Hinotoi-agent — `[security]` Session import validates `workspace` field against `resolve_trusted_workspace()`. Pre-fix, a crafted JSON import with `"workspace": "/"` was persisted as the `Session.workspace`, after which `/api/file?session_id=&path=etc/hosts` resolved against `/` and served host files. The patch routes the imported value through the same resolver every other workspace-bearing endpoint already uses (`/api/session/new`, `/api/branch`, `/api/fork`, `/api/clone`), returning 400 on `ValueError` (blocked system root) or `TypeError` (non-path workspace value like `{"not": "a path"}`). Severity is highest on `0.0.0.0`-bound / reverse-proxied / LAN-exposed deployments with password auth where `PR:L` applies — there the bug turned "authenticated session creation" into "authenticated read of any process-readable file." Default loopback-only deployments without auth were lower risk because anyone on loopback can usually read `/etc/hosts` directly. Includes 105 LOC of regression coverage in `tests/test_session_import_workspace_validation.py` and a belt-and-braces invariant test against the resolver itself. + +- **PR #2055** by @franksong2702 — Duplicate assistant transcript merge. `_merge_display_messages_after_agent_result()` at `api/streaming.py:1754` now skips adjacent duplicate assistant messages by merge identity (`role + content + tool_call_id + json.dumps(tool_calls, sort_keys=True)`). Some provider/result replay paths produced two copies of the same assistant bubble in the current delta, which then got persisted into `s.messages` and sent back to the browser in the `done` SSE payload, producing duplicate assistant chat bubbles. The guard is intentionally adjacent-only so two separate turns that happen to produce identical assistant text remain visible — confirmed via the new negative-path test. Closes #2051. + +### Fixed (maintainer review on stage-337) + +- **PR #1970 lmstudio regression** — the new lmstudio branch in `get_available_models()` only looked at `cfg["providers"]["lmstudio"]["base_url"]`, missing the historical config shape where users put `base_url` under `cfg["model"]` when `model.provider == lmstudio`. Three pre-existing tests in `tests/test_issue1527_lmstudio_base_url_classification.py` broke on stage-337 because of this gap. The fix enhances `_get_provider_base_url()` to fall back to `cfg["model"]["base_url"]` when `cfg["model"]["provider"]` matches the requested provider id, then routes the lmstudio branch through the helper. Belt-and-suspenders negative-case test asserts `model.base_url` does NOT leak to non-active providers (so a user with `model.provider: anthropic` + `model.base_url: ` + `providers.openai` without base_url still gets None for openai, not the anthropic proxy URL). 6 new regression tests in `tests/test_pr1970_lmstudio_base_url_fallback.py`. + +- **PR #2053 × PR #2041 state.db worktree recovery silent data loss** — Opus advisor caught this during stage review. PR #2041 (v0.51.42) added state.db sidecar reconciliation that rebuilds a missing `.json` from the canonical state.db row. PR #2053 added worktree-backed sessions with new metadata fields. `_state_db_row_to_sidecar()` was hard-coding `'workspace': ''` and not propagating `worktree_path` / `worktree_branch` / `worktree_repo_root` / `worktree_created_at` / `message_count` from the row to the rebuilt sidecar. Result: a worktree-backed session that lost its JSON sidecar and got rebuilt from state.db disappeared from the sidebar (the empty-session filter at `api/models.py:1067` exempts sessions with `worktree_path`, but the rebuilt sidecar had none) and downstream tools (terminal panels, file pickers using `s.workspace`) operated on empty string. Fix: extend the `_read_state_db_missing_sidecar_rows()` SELECT to include the missing columns (each gated by `_sql_optional_col()` for older state.db schemas) and propagate them in `_state_db_row_to_sidecar()`. Three new regression tests in `tests/test_state_db_worktree_recovery.py` lock the round-trip, the non-worktree no-spurious-propagation case, and the empty-worktree-session-must-stay-visible invariant. + +### Test infrastructure + +- **Hermetic network isolation across the whole test suite.** Before this release, an accidentally-leaking outbound TLS handshake from the test_server fixture (Anthropic IPv6, Amazon, OpenRouter, observed via `ss -tnp` during stage-337 debugging) was adding 60+s of wall-time to pytest runs and creating a class of flaky failures. Two new layers now enforce no-outbound by default: + + 1. **Pytest process** (tests/conftest.py module-level monkey-patch on `socket.create_connection` + `socket.socket.connect`). Allowed destinations: loopback (`127.0.0.0/8`, `::1`), RFC1918 (`10/8`, `172.16/12`, `192.168/16`), link-local (`169.254/16`), RFC5737 TEST-NET-3 (`203.0.113/24`), RFC2606 reserved TLDs (`.invalid`, `.test`, `.example`, `.local`, `localhost`). Everything else raises `OSError("hermes test network isolation")`. Tests that legitimately need real outbound opt back in via the new `allow_outbound_network` fixture (zero current callers). + + 2. **test_server subprocess** (server.py). `HERMES_WEBUI_TEST_NETWORK_BLOCK=1` env var (set by tests/conftest.py on every spawn) activates an identical guard at the top of server.py at import time, before any api/* module loads. The env var is unset in production, so the guard is a no-op outside the test harness. Without this, the pytest-side block didn't cover the spawned subprocess. + +- **`test_dns_resolution_failure` refactored** to mock `socket.getaddrinfo` raising `gaierror` instead of relying on real DNS for a `*.invalid` hostname. Hermetic now, and matches the mock-based pattern every other test in the same file uses. + +- **`tests/test_conftest_network_isolation.py`** with 9 adversarial tests proving (a) outbound to the exact Anthropic IPv6 + Amazon IPv4 + Google DNS destinations we observed leaking is now blocked, (b) loopback / RFC1918 / link-local / reserved-TLD destinations pass through, (c) the `allow_outbound_network` opt-in fixture works. + +### Tests + +5,166 → **5,192 collected** (+26 net new across the 4 new regression test files). All passing on Python 3.11/3.12/3.13. Full suite wall-time: 161s → **95s** (the previously-leaking outbound TLS handshakes were the long tail). + +### Contributors + +@Hinotoi-agent (×1, first contribution) · @franksong2702 (×3) · @dobby-d-elf (×1, first contribution) · @nesquena (3 maintainer review fixes) + +### Notes + +- The state.db × worktree recovery interaction (PR #2053 × PR #2041) is the second case in two releases where Opus advisor caught a real cross-PR data-loss bug that neither PR's individual test suite would have surfaced (the first was the v0.51.43 CSS breakpoint asymmetry). The pattern is worth its weight — cross-PR adversarial review with grep-grounded prompts catches what unit tests miss when the failure mode lives at the seam between two features. + +- LM Studio support is now first-class. Live model discovery + base URL discovery from either `providers..base_url` OR `cfg["model"]["base_url"]` (when `model.provider` matches) means users with either config shape get a populated model picker without manual `config.yaml` edits. + +## [v0.51.43] — 2026-05-11 — Release S (fused community PR — desktop sidebar collapse) + +### Added + +- **PR #2054** by @jasonjcwu and @spektro33 (fused, co-authored) — Desktop users can now collapse the session-list sidebar by clicking the already-active rail icon, or with Cmd/Ctrl+B. State persists across reloads via localStorage and survives bfcache restores. Two discoverability paths, **no new visible UI affordance** — default appearance is identical to master, only users who actively try to toggle ever see a difference. Cross-panel rail clicks behave exactly as before (no collapse, just panel switch). Mobile (<641px) is unaffected. The behaviour is gated behind one new `opts.fromRailClick` flag on `switchPanel()` so every programmatic call-site (commands, deeplinks, internal state changes) preserves master semantics exactly. Inline ` -``` - -This runs synchronously before the stylesheet parses. Zero flicker. - -**3. Theme loading in `static/boot.js`** - -In the existing `api('/api/settings')` call, read and apply the theme: - -```js -const s = await api('/api/settings'); -window._sendKey = s.send_key || 'enter'; -window._showTokenUsage = !!s.show_token_usage; -window._showCliSessions = !!s.show_cli_sessions; -// Theme: apply server preference, update localStorage for flicker prevention -const theme = s.theme || 'dark'; -document.documentElement.dataset.theme = theme; -localStorage.setItem('hermes-theme', theme); -``` - -**4. Theme setting in `api/config.py`** - -```python -_SETTINGS_DEFAULTS = { - ... - 'theme': 'dark', # active UI theme name - ... -} -_SETTINGS_ALLOWED_KEYS = set(_SETTINGS_DEFAULTS.keys()) - {'password_hash'} -``` - -No enum constraint on `theme` -- allows user-defined theme names to work -without server changes. - ---- - -### Track B: Theme picker UI - -**Settings panel addition (`static/index.html` + `static/panels.js`)** - -A ` - - - - - - - -``` - -In `loadSettingsPanel()`: -```js -const themeSel = $('settingsTheme'); -if(themeSel) themeSel.value = settings.theme || 'dark'; -``` - -In `saveSettings()`: -```js -body.theme = $('settingsTheme').value; -``` - -**Live preview on select change (no save required):** -```js -$('settingsTheme').addEventListener('change', e => { - document.documentElement.dataset.theme = e.target.value; - localStorage.setItem('hermes-theme', e.target.value); -}); -``` - -This gives instant visual feedback as the user clicks through options. -The full settings save then persists it server-side. - -**`/theme` slash command (`static/commands.js`)** - -```js -async function cmdTheme(arg) { - const themes = ['dark','light','solarized','monokai','nord']; - if(!arg || !themes.includes(arg)) { - showToast('Usage: /theme dark|light|solarized|monokai|nord'); - return; - } - document.documentElement.dataset.theme = arg; - localStorage.setItem('hermes-theme', arg); - try { await api('/api/settings', {method:'POST', body: JSON.stringify({theme: arg})}); } catch(e) {} - showToast('Theme: ' + arg); -} -``` - ---- - -### Track C: Tests - -New test cases in `tests/test_sprint26.py`: - -1. `GET /api/settings` returns `theme: 'dark'` by default -2. `POST /api/settings` with `{theme: 'light'}` persists and round-trips -3. `POST /api/settings` with `{theme: 'nord'}` accepts any string (no enum gate) -4. Theme value survives server restart (reads from `settings.json`) -5. `/theme` command fires without error for each named theme -6. `loadSettingsPanel()` populates the select with the current theme value -7. Settings save includes theme in the POST body -8. `data-theme` attribute is set on `` before first paint (inline script) - -**Estimated new tests:** 8. Target total after sprint: ~443. - ---- - -### What's out of scope - -- **Custom color editors** (hex pickers for each variable): saves that for v2. - The five shipped themes cover the main use cases. A custom theme can always - be added by dropping a CSS block with no code changes. -- **Per-session themes**: single global preference is the right call for v1. -- **System `prefers-color-scheme` sync**: nice-to-have, low priority. The - flicker-prevention script could be extended to read the media query if no - explicit preference is set. -- **Prism.js theme switching**: the code-block syntax highlighting comes from - a CDN stylesheet. Swapping it requires a `` swap and SRI re-check. - Defer to a future sprint; the default Prism Tomorrow theme works on all - current dark themes and is acceptable on light. - ---- - -**Estimated tests:** 8 new. Target total: ~443. -**Hermes CLI parity impact:** None -**Claude parity impact:** Medium (Claude.ai has light/dark/system sync) -**User-facing value:** High -- first thing many users ask for - ---- - -*Last updated: April 12, 2026* -*Current version: v0.49.1 | 700 tests* -*Next sprint: Sprint 24 (Web Polish + Bug Fix Pass)* -*Horizon sprint: Sprint 25 (macOS Desktop Application)* -*Docs sweep policy: update markdown proactively during PR reviews and after significant releases* +The maintainer's planning notes for each sprint live in the workspace repo (private), not in this file. This file is the public-facing planning shape. diff --git a/TESTING.md b/TESTING.md index a8a15379..ee35af45 100644 --- a/TESTING.md +++ b/TESTING.md @@ -1835,8 +1835,8 @@ Bridged CLI sessions: --- -*Last updated: v0.50.278, May 03, 2026* -*Total automated tests collected: 3936* +*Last updated: v0.51.31, May 9, 2026* +*Total automated tests collected: 4977* *Regression gate: tests/test_regressions.py* *Run: pytest tests/ -v --timeout=60* *Source: /* diff --git a/api/agent_health.py b/api/agent_health.py new file mode 100644 index 00000000..ea3bc572 --- /dev/null +++ b/api/agent_health.py @@ -0,0 +1,330 @@ +"""Hermes agent/gateway heartbeat payload helpers (#716, #1879). + +The WebUI process is not always paired with a long-running Hermes gateway. Some +setups use WebUI only, while self-hosted messaging deployments run a separate +Hermes gateway daemon that records runtime metadata in the Hermes Agent home. +This module turns those existing safe runtime signals into a small UI-facing +heartbeat without shelling out or adding psutil as a hard dependency. + +Cross-container note (#1879): ``gateway.status.get_running_pid()`` uses +``fcntl.flock`` and ``os.kill(pid, 0)``, both of which require the caller to +share a PID namespace with the gateway process. In multi-container deployments +where the WebUI runs separately from ``hermes-agent`` and only a Hermes data +volume is shared, those checks always return ``None`` and the dashboard +incorrectly shows "Gateway not running". To stay accurate without forcing a +``pid: "service:hermes-agent"`` compose workaround, we accept a recent +``updated_at`` timestamp on ``gateway_state.json`` (combined with +``gateway_state == "running"``) as an equivalent live-process signal — the +gateway already writes that file on every tick. +""" + +from __future__ import annotations + +import importlib +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +_GATEWAY_PID_FILE = "gateway.pid" +_GATEWAY_RUNTIME_STATUS_FILE = "gateway_state.json" + + +# Two cron ticks (~60s each). Chosen to avoid false negatives during brief +# gateway restarts while still surfacing a true outage within a couple of +# minutes. Override is intentionally not exposed: keep the check deterministic +# and identical across deployments so support diagnostics are reproducible. +GATEWAY_FRESHNESS_THRESHOLD_S: float = 120.0 + + +def _checked_at() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _runtime_status_is_fresh( + runtime_status: dict[str, Any] | None, + *, + now: datetime | None = None, + threshold_s: float = GATEWAY_FRESHNESS_THRESHOLD_S, +) -> bool: + """Return ``True`` when ``gateway_state.json`` looks freshly written. + + "Fresh" means the gateway self-reported ``running`` and the ``updated_at`` + ISO-8601 timestamp is no older than ``threshold_s`` seconds. This is the + cross-container liveness signal used when ``get_running_pid()`` returns + ``None`` purely because of PID-namespace isolation (#1879). + + Any unparseable input is treated as "not fresh" — a stale or missing + timestamp must never report alive. + """ + if not isinstance(runtime_status, dict): + return False + if runtime_status.get("gateway_state") != "running": + return False + + raw_updated_at = runtime_status.get("updated_at") + if not isinstance(raw_updated_at, str) or not raw_updated_at: + return False + + # ``datetime.fromisoformat`` accepts the exact format gateway/status.py + # writes (``datetime.now(timezone.utc).isoformat()``). We deliberately + # don't pull in dateutil — keeping this stdlib-only matches the rest of + # this module. + try: + updated_at = datetime.fromisoformat(raw_updated_at) + except (TypeError, ValueError): + return False + + if updated_at.tzinfo is None: + # A naive timestamp could mean anything across containers / hosts. + # Refuse to interpret it rather than assume UTC. + return False + + reference = now if now is not None else datetime.now(timezone.utc) + age_s = (reference - updated_at).total_seconds() + if age_s < 0: + # Clock skew between containers can produce small negatives. A future + # timestamp is still a "fresh" signal — the gateway clearly wrote it + # very recently — so accept it. A wildly-future timestamp (> threshold + # in the future) is rejected to avoid trusting a broken clock. + return -age_s <= threshold_s + return age_s <= threshold_s + + +def _runtime_status_is_stale_stopped( + runtime_status: dict[str, Any] | None, + *, + now: datetime | None = None, + threshold_s: float = GATEWAY_FRESHNESS_THRESHOLD_S, +) -> bool: + """Return ``True`` for an old clean-stop root gateway state. + + A user may run only profile-scoped gateways while a root + ``gateway_state.json`` from an older, intentionally stopped gateway remains + on disk (#1944). Treat that stale stopped file like "no root gateway + configured" so the heartbeat banner does not keep warning about a service + the user is not running. Fresh stopped state still reports down. + """ + if not isinstance(runtime_status, dict): + return False + if runtime_status.get("gateway_state") != "stopped": + return False + + raw_updated_at = runtime_status.get("updated_at") + if not isinstance(raw_updated_at, str) or not raw_updated_at: + return False + + try: + updated_at = datetime.fromisoformat(raw_updated_at) + except (TypeError, ValueError): + return False + if updated_at.tzinfo is None: + return False + + reference = now if now is not None else datetime.now(timezone.utc) + age_s = (reference - updated_at).total_seconds() + return age_s > threshold_s + + +def _gateway_status_module(): + """Load gateway.status lazily so tests and WebUI-only installs stay isolated.""" + return importlib.import_module("gateway.status") + + +def _gateway_root_pid_path() -> Path | None: + """Return the root Hermes gateway PID path. + + Gateway runtime files are root-level singletons. A profile-scoped WebUI + process may have HERMES_HOME=/profiles/, but gateway.pid, + gateway.lock, and gateway_state.json still live under . + """ + try: + from hermes_constants import get_default_hermes_root + return get_default_hermes_root() / _GATEWAY_PID_FILE + except Exception: + return None + + +def _read_runtime_status_path(path: Path) -> dict[str, Any] | None: + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, UnicodeDecodeError, json.JSONDecodeError): + return None + if isinstance(payload, dict): + return payload + return None + + +def _read_gateway_runtime_status(gateway_status: Any, pid_path: Path | None) -> dict[str, Any] | None: + read_runtime_status = gateway_status.read_runtime_status + if pid_path is not None: + try: + return read_runtime_status(pid_path=pid_path) + except TypeError: + try: + return read_runtime_status(pid_path) + except TypeError: + if getattr(gateway_status, "__name__", "") == "gateway.status" or hasattr( + gateway_status, + "_read_json_file", + ): + runtime_status_file = str( + getattr(gateway_status, "_RUNTIME_STATUS_FILE", _GATEWAY_RUNTIME_STATUS_FILE) + ) + runtime_status = _read_runtime_status_path(pid_path.with_name(runtime_status_file)) + if runtime_status is not None: + return runtime_status + return read_runtime_status() + + +def _gateway_running_pid(gateway_status: Any, pid_path: Path | None) -> int | None: + get_running_pid = gateway_status.get_running_pid + if pid_path is not None: + try: + return get_running_pid(pid_path=pid_path, cleanup_stale=False) + except TypeError: + try: + return get_running_pid(pid_path, cleanup_stale=False) + except TypeError: + pass + try: + return get_running_pid(cleanup_stale=False) + except TypeError: + # Older agent versions may not expose cleanup_stale. Keep compatibility. + return get_running_pid() + + +def _runtime_detail_subset(runtime_status: dict[str, Any] | None) -> dict[str, Any]: + """Return only non-sensitive runtime fields for the browser. + + gateway.status records argv/PID metadata so the CLI can validate process + identity. The WebUI alert only needs health semantics, never raw command + lines, paths, environment, or tokens. + """ + if not isinstance(runtime_status, dict): + return {} + + details: dict[str, Any] = {} + gateway_state = runtime_status.get("gateway_state") + if isinstance(gateway_state, str) and gateway_state: + details["gateway_state"] = gateway_state + + updated_at = runtime_status.get("updated_at") + if isinstance(updated_at, str) and updated_at: + details["updated_at"] = updated_at + + try: + details["active_agents"] = max(0, int(runtime_status.get("active_agents") or 0)) + except (TypeError, ValueError): + pass + + platforms = runtime_status.get("platforms") + if isinstance(platforms, dict): + details["platform_count"] = len(platforms) + states: dict[str, int] = {} + for payload in platforms.values(): + if not isinstance(payload, dict): + continue + state = payload.get("state") + if isinstance(state, str) and state: + states[state] = states.get(state, 0) + 1 + if states: + details["platform_states"] = states + + return details + + +def build_agent_health_payload() -> dict[str, Any]: + """Return `{alive, checked_at, details}` for the Hermes gateway/agent. + + `alive` is intentionally tri-state: + * True: a gateway runtime signal says the process is alive. + * False: gateway metadata exists, but no live gateway process owns it. + * None: no gateway metadata/status is available, so this WebUI setup is + probably not configured with a separate gateway process. + """ + checked_at = _checked_at() + try: + gateway_status = _gateway_status_module() + except Exception as exc: + return { + "alive": None, + "checked_at": checked_at, + "details": { + "state": "unknown", + "reason": "gateway_status_unavailable", + "error": type(exc).__name__, + }, + } + + gateway_pid_path = _gateway_root_pid_path() + + runtime_status = None + try: + runtime_status = _read_gateway_runtime_status(gateway_status, gateway_pid_path) + except Exception: + runtime_status = None + + try: + running_pid = _gateway_running_pid(gateway_status, gateway_pid_path) + except Exception: + running_pid = None + + safe_details = _runtime_detail_subset(runtime_status) + if running_pid is not None: + return { + "alive": True, + "checked_at": checked_at, + "details": { + "state": "alive", + **safe_details, + }, + } + + # Cross-container fallback (#1879): when ``get_running_pid()`` cannot see + # the gateway because we're in a different PID namespace, a recent + # ``updated_at`` on ``gateway_state.json`` is a reliable equivalent signal + # since the gateway writes it on every tick. We only trust this fallback + # when the gateway also self-reports ``gateway_state == "running"`` so + # crash-without-cleanup scenarios still surface as "down". + if _runtime_status_is_fresh(runtime_status): + return { + "alive": True, + "checked_at": checked_at, + "details": { + "state": "alive", + "reason": "cross_container_freshness", + **safe_details, + }, + } + + if _runtime_status_is_stale_stopped(runtime_status): + return { + "alive": None, + "checked_at": checked_at, + "details": { + "state": "unknown", + "reason": "gateway_stale_stopped_state", + **safe_details, + }, + } + + if isinstance(runtime_status, dict): + return { + "alive": False, + "checked_at": checked_at, + "details": { + "state": "down", + "reason": "gateway_not_running", + **safe_details, + }, + } + + return { + "alive": None, + "checked_at": checked_at, + "details": { + "state": "unknown", + "reason": "gateway_not_configured", + }, + } diff --git a/api/agent_sessions.py b/api/agent_sessions.py index 7b024f8b..dce28853 100644 --- a/api/agent_sessions.py +++ b/api/agent_sessions.py @@ -14,6 +14,9 @@ MESSAGING_SOURCES = { 'weixin', } +CLI_MIN_UNTITLED_MESSAGE_COUNT = 6 +CLI_MIN_UNTITLED_USER_MESSAGE_COUNT = 2 + SOURCE_LABELS = { 'api_server': 'API', 'cli': 'CLI', @@ -71,6 +74,115 @@ def _optional_col(name: str, columns: set[str], fallback: str = "NULL") -> str: return f"s.{name}" if name in columns else f"{fallback} AS {name}" +def _safe_lower(value) -> str: + return str(value or "").strip().lower() + + +def _normalize_source_name(value: object) -> str: + source = _safe_lower(value) + if not source: + return "" + if source.endswith(" session"): + source = source[:-len(" session")].strip() + return source + + +def _looks_like_default_cli_title(row: dict) -> bool: + """Return True when a CLI row looks like framework-generated metadata.""" + title = _safe_lower(row.get("title")) + if not title or title == "untitled": + return True + if title in {"cli", "cli session"}: + return True + + source_candidates = { + _normalize_source_name(row.get("source")), + _normalize_source_name(row.get("session_source")), + _normalize_source_name(row.get("source_tag")), + _normalize_source_name(row.get("raw_source")), + _normalize_source_name(row.get("source_label")), + } + source_candidates.discard("") + source_candidates.add("cli") + return any(title == f"{candidate} session" for candidate in source_candidates) + + +def _as_positive_int(value) -> int: + try: + return max(0, int(float(value))) + except (TypeError, ValueError): + return 0 + + +def _count_user_turns(row: dict) -> int: + user_turns = row.get("actual_user_message_count") + if user_turns is None: + user_turns = row.get("user_message_count") + if user_turns is None: + messages = row.get("messages") or [] + if isinstance(messages, list): + return sum( + 1 + for msg in messages + if _safe_lower(msg.get("role") if isinstance(msg, dict) else msg) == "user" + ) + return 0 + return _as_positive_int(user_turns) + + +def _has_cli_lineage(row: dict) -> bool: + segment_count = _as_positive_int(row.get("_compression_segment_count")) + return segment_count > 1 or bool(row.get("_lineage_root_id")) + + +def is_cli_session_row(row: dict) -> bool: + """Return True for rows that should be treated as CLI-imported sessions.""" + if not isinstance(row, dict): + return False + source = _safe_lower(row.get("session_source")) + if source == "messaging": + return False + if source == "cli": + return True + source_tag = _safe_lower(row.get("source_tag")) + raw_source = _safe_lower(row.get("raw_source")) + source_name = _safe_lower(row.get("source")) + source_label = _safe_lower(row.get("source_label")) + if source_tag == "cli" or raw_source == "cli" or source_name == "cli" or source_label == "cli": + return True + + # Legacy imported CLI rows may only be marked as CLI in sidebar metadata. + # Keep this conservative to avoid treating messaging sessions as CLI. + return bool( + row.get("is_cli_session") + and source not in MESSAGING_SOURCES + and source_tag not in MESSAGING_SOURCES + and raw_source not in MESSAGING_SOURCES + and source_name not in MESSAGING_SOURCES + and _looks_like_default_cli_title(row) + ) + + +def is_cli_session_row_visible(row: dict) -> bool: + """Return whether a CLI-related row should remain visible in the sidebar.""" + if not isinstance(row, dict): + return False + if not is_cli_session_row(row): + return True + + message_count = _as_positive_int(row.get("actual_message_count") or row.get("message_count")) + if message_count <= 0: + return False + + if _has_cli_lineage(row): + return True + + if not _looks_like_default_cli_title(row): + return True + + return _count_user_turns(row) >= CLI_MIN_UNTITLED_USER_MESSAGE_COUNT + + def _is_continuation_session(parent: dict | None, child: dict | None) -> bool: """Return True when ``child`` is the next segment of the same conversation. @@ -79,9 +191,18 @@ def _is_continuation_session(parent: dict | None, child: dict | None) -> bool: should continue the same visible conversation rather than becoming a separate child-session row. Plain parent/child links that started before the parent's ended boundary remain child sessions. + + Do not collapse lineage across raw sources. A WebUI session that continues + from a Telegram/CLI/etc. parent must remain visible as its own surface-owned + conversation; otherwise the tip inherits the root's title/source metadata and + can disappear under messaging/sidebar policies. """ if not parent or not child: return False + parent_source = str(parent.get('source') or '').strip().lower() + child_source = str(child.get('source') or '').strip().lower() + if parent_source and child_source and parent_source != child_source: + return False if parent.get('end_reason') not in {'compression', 'cli_close'}: return False ended_at = parent.get('ended_at') @@ -133,10 +254,13 @@ def _project_agent_session_rows(rows: list[dict]) -> list[dict]: if not parent_id: continue children_by_parent.setdefault(parent_id, []).append(row) - if _is_continuation_session(rows_by_id.get(parent_id), row): + parent = rows_by_id.get(parent_id) + if _is_continuation_session(parent, row): continuation_child_ids.add(row['id']) else: row['relationship_type'] = 'child_session' + row['parent_title'] = parent.get('title') if parent else None + row['parent_source'] = parent.get('source') if parent else None parent_root = _continuation_root_id(rows_by_id, parent_id) if parent_root: row['_parent_lineage_root_id'] = parent_root @@ -189,7 +313,7 @@ def _project_agent_session_rows(rows: list[dict]) -> list[dict]: # touched standalone sessions — exactly the inverse of what a user # expects from "Show agent sessions" sorted by activity. for key in ( - 'id', 'model', 'message_count', 'actual_message_count', + 'id', 'model', 'message_count', 'actual_message_count', 'actual_user_message_count', 'ended_at', 'end_reason', 'last_activity', ): if key in tip: @@ -214,9 +338,9 @@ def read_importable_agent_session_rows( db_path: Path, limit: int = 200, log=None, - exclude_sources: tuple[str, ...] | None = ("cron",), + exclude_sources: tuple[str, ...] | None = ("cron", "webui"), ) -> list[dict]: - """Return non-WebUI agent sessions projected as importable conversations. + """Return agent sessions projected as importable conversations. Hermes Agent can create rows in ``state.db.sessions`` before a session has any messages, and long conversations can be split into compression-linked @@ -243,6 +367,8 @@ def read_importable_agent_session_rows( # source column we cannot safely distinguish WebUI rows from agent rows. cur.execute("PRAGMA table_info(sessions)") session_cols = {row[1] for row in cur.fetchall()} + cur.execute("PRAGMA table_info(messages)") + message_cols = {row[1] for row in cur.fetchall()} if 'source' not in session_cols: log.warning( "agent session listing skipped: state.db at %s has no 'source' column " @@ -255,8 +381,21 @@ def read_importable_agent_session_rows( parent_expr = _optional_col('parent_session_id', session_cols) ended_expr = _optional_col('ended_at', session_cols) end_reason_expr = _optional_col('end_reason', session_cols) + user_id_expr = _optional_col('user_id', session_cols) + chat_id_expr = _optional_col('chat_id', session_cols) + chat_type_expr = _optional_col('chat_type', session_cols) + thread_id_expr = _optional_col('thread_id', session_cols) + session_key_expr = _optional_col('session_key', session_cols) + origin_chat_id_expr = _optional_col('origin_chat_id', session_cols) + origin_user_id_expr = _optional_col('origin_user_id', session_cols) + platform_expr = _optional_col('platform', session_cols) + user_message_count_expr = ( + "COUNT(CASE WHEN LOWER(m.role) = 'user' THEN 1 END)" + if 'role' in message_cols + else "COUNT(m.id)" + ) - where_clauses = ["s.source IS NOT NULL", "s.source != 'webui'"] + where_clauses = ["s.source IS NOT NULL"] params: list[str] = [] if exclude_sources: excluded = tuple(str(source) for source in exclude_sources if source) @@ -269,10 +408,19 @@ def read_importable_agent_session_rows( f""" SELECT s.id, s.title, s.model, s.message_count, s.started_at, s.source, + {user_id_expr}, + {chat_id_expr}, + {chat_type_expr}, + {thread_id_expr}, + {session_key_expr}, + {origin_chat_id_expr}, + {origin_user_id_expr}, + {platform_expr}, {parent_expr}, {ended_expr}, {end_reason_expr}, COUNT(m.id) AS actual_message_count, + {user_message_count_expr} AS actual_user_message_count, MAX(m.timestamp) AS last_activity FROM sessions s LEFT JOIN messages m ON m.session_id = s.id @@ -284,12 +432,170 @@ def read_importable_agent_session_rows( ) projected = _project_agent_session_rows([dict(row) for row in cur.fetchall()]) projected = [_with_normalized_source(row) for row in projected] + projected = [row for row in projected if is_cli_session_row_visible(row)] if limit is None: return projected return projected[:max(0, int(limit))] +def _lineage_report_row(row: dict, role: str) -> dict: + updated_at = row.get('ended_at') if row.get('ended_at') is not None else row.get('started_at') + return { + 'session_id': row.get('id'), + 'role': role, + 'title': row.get('title'), + 'source': row.get('source'), + 'started_at': row.get('started_at'), + 'updated_at': updated_at, + 'end_reason': row.get('end_reason'), + 'active': row.get('ended_at') is None, + 'archived': False, + } + + +def _empty_lineage_report(session_id: str, *, found: bool = False) -> dict: + return { + 'mutation': False, + 'found': found, + 'session_id': session_id, + 'lineage_key': session_id, + 'tip_session_id': session_id, + 'total_segments': 0, + 'materialized_segments': 0, + 'segments': [], + 'children': [], + 'manual_review': False, + } + + +def read_session_lineage_report(db_path: Path, session_id: str | None, max_hops: int = 20) -> dict: + """Return a bounded, read-only lifecycle report for a session lineage. + + This helper intentionally reports only facts that can be derived from + ``state.db.sessions`` without mutating WebUI JSON, archiving rows, or + deleting historical segments. It mirrors the sidebar continuation rules so + a future UI/PR can explain which rows are hidden compression/cli-close + segments and which child-session branches remain distinct. + """ + sid = str(session_id or '').strip() + if not sid: + return _empty_lineage_report('') + db_path = Path(db_path) + if not db_path.exists(): + return _empty_lineage_report(sid) + + try: + with closing(sqlite3.connect(str(db_path))) as conn: + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute("PRAGMA table_info(sessions)") + session_cols = {row[1] for row in cur.fetchall()} + required = {'id', 'parent_session_id', 'end_reason'} + if not required.issubset(session_cols): + return _empty_lineage_report(sid) + + source_expr = _optional_col('source', session_cols) + title_expr = _optional_col('title', session_cols) + started_expr = _optional_col('started_at', session_cols, '0') + ended_expr = _optional_col('ended_at', session_cols) + end_reason_expr = _optional_col('end_reason', session_cols) + parent_expr = _optional_col('parent_session_id', session_cols) + + def fetch_one(row_id: str | None) -> dict | None: + if not row_id: + return None + cur.execute( + f""" + SELECT s.id, + {source_expr}, + {title_expr}, + {started_expr}, + {parent_expr}, + {ended_expr}, + {end_reason_expr} + FROM sessions s + WHERE s.id = ? + """, + (row_id,), + ) + row = cur.fetchone() + return dict(row) if row else None + + target = fetch_one(sid) + if not target: + return _empty_lineage_report(sid) + + segments = [target] + current = target + seen = {sid} + manual_review = False + for _hop in range(max(0, int(max_hops))): + parent_id = current.get('parent_session_id') + parent = fetch_one(parent_id) + if not parent or parent_id in seen: + manual_review = bool(parent_id and parent_id in seen) + break + if not _is_continuation_session(parent, current): + break + segments.append(parent) + seen.add(parent_id) + current = parent + else: + manual_review = True + + segment_ids = {row['id'] for row in segments} + child_rows: list[dict] = [] + for parent in segments: + cur.execute( + f""" + SELECT s.id, + {source_expr}, + {title_expr}, + {started_expr}, + {parent_expr}, + {ended_expr}, + {end_reason_expr} + FROM sessions s + WHERE s.parent_session_id = ? + ORDER BY s.started_at DESC + """, + (parent['id'],), + ) + for child_row in cur.fetchall(): + child = dict(child_row) + if child['id'] in segment_ids: + continue + if _is_continuation_session(parent, child): + # A continuation outside the selected path means the + # lineage is branched or the caller selected an older + # segment. Report manual review rather than proposing + # destructive cleanup candidates. + manual_review = True + continue + child_rows.append(child) + except Exception: + return _empty_lineage_report(sid) + + root_id = segments[-1]['id'] if segments else sid + tip_id = segments[0]['id'] if segments else sid + return { + 'mutation': False, + 'found': True, + 'session_id': sid, + 'lineage_key': root_id, + 'tip_session_id': tip_id, + 'total_segments': len(segments), + 'materialized_segments': len(segments), + 'segments': [ + _lineage_report_row(row, 'tip' if idx == 0 else 'hidden_segment') + for idx, row in enumerate(segments) + ], + 'children': [_lineage_report_row(row, 'child_session') for row in child_rows], + 'manual_review': manual_review, + } + + def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[str]) -> dict[str, dict]: """Return compression-lineage metadata for known WebUI sidebar sessions. @@ -378,6 +684,10 @@ def read_session_lineage_metadata(db_path: Path, session_ids: list[str] | set[st entry['relationship_type'] = 'child_session' entry['parent_title'] = parent_row.get('title') entry['parent_source'] = parent_row.get('source') + parent_source = str(parent_row.get('source') or '').strip().lower() + child_source = str(row.get('source') or '').strip().lower() + if parent_source and child_source and parent_source != child_source: + entry['_cross_surface_child_session'] = True parent_root = _continuation_root_id(rows, parent_id) if parent_root: entry['_parent_lineage_root_id'] = parent_root diff --git a/api/auth.py b/api/auth.py index 480f3659..73303f01 100644 --- a/api/auth.py +++ b/api/auth.py @@ -17,16 +17,41 @@ from api.config import STATE_DIR, load_settings logger = logging.getLogger(__name__) + +# Default session TTL — 30 days. Kept as a module-level constant for backwards +# compatibility with downstream code and regression tests that import it. +# At runtime, prefer ``_resolve_session_ttl()`` which honours the env var and +# settings.json overrides; this constant is the floor / fallback. +SESSION_TTL = 86400 * 30 # 30 days + + +def _resolve_session_ttl() -> int: + """Resolve session TTL from env > settings > default. + + Priority mirrors get_password_hash(): HERMES_WEBUI_SESSION_TTL env var + first, then settings.json, falling back to ``SESSION_TTL`` (30 days). + Clamped to [60s, 1 year] to prevent runaway cookies or self-lockout. + """ + env_v = os.getenv('HERMES_WEBUI_SESSION_TTL', '').strip() + if env_v.isdigit(): + val = int(env_v) + if 60 <= val <= 86400 * 365: + return val + s = load_settings() + v = s.get('session_ttl_seconds') + if isinstance(v, int) and 60 <= v <= 86400 * 365: + return v + return SESSION_TTL + + # ── Public paths (no auth required) ───────────────────────────────────────── PUBLIC_PATHS = frozenset({ - '/login', '/health', '/favicon.ico', + '/login', '/health', '/favicon.ico', '/sw.js', '/api/auth/login', '/api/auth/status', '/manifest.json', '/manifest.webmanifest', - '/sw.js', }) COOKIE_NAME = 'hermes_session' -SESSION_TTL = 86400 * 30 # 30 days _SESSIONS_FILE = STATE_DIR / '.sessions.json' @@ -78,24 +103,79 @@ def _save_sessions(sessions: dict[str, float]) -> None: _sessions = _load_sessions() # ── Login rate limiter ────────────────────────────────────────────────────── -_login_attempts = {} # ip -> [timestamp, ...] +_LOGIN_ATTEMPTS_FILE = STATE_DIR / '.login_attempts.json' _LOGIN_MAX_ATTEMPTS = 5 _LOGIN_WINDOW = 60 # seconds + +def _load_login_attempts() -> dict[str, list[float]]: + """Load persisted login attempts from STATE_DIR, pruning expired entries.""" + try: + if _LOGIN_ATTEMPTS_FILE.exists(): + data = json.loads(_LOGIN_ATTEMPTS_FILE.read_text(encoding='utf-8')) + if not isinstance(data, dict): + raise ValueError('malformed login-attempts file — expected dict') + now = time.time() + attempts: dict[str, list[float]] = {} + for ip, raw_times in data.items(): + if not isinstance(ip, str) or not isinstance(raw_times, list): + continue + fresh = [ + float(t) + for t in raw_times + if isinstance(t, (int, float)) and now - float(t) < _LOGIN_WINDOW + ] + if fresh: + attempts[ip] = fresh + return attempts + except Exception as e: + logger.debug("Failed to load login attempts file, starting fresh: %s", e) + return {} + + +def _save_login_attempts(attempts: dict[str, list[float]]) -> None: + """Atomically persist login attempts to STATE_DIR/.login_attempts.json (0600).""" + try: + _LOGIN_ATTEMPTS_FILE.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp(dir=_LOGIN_ATTEMPTS_FILE.parent, suffix='.login_attempts.tmp') + try: + with os.fdopen(fd, 'w', encoding='utf-8') as f: + json.dump(attempts, f) + os.chmod(tmp, 0o600) + os.replace(tmp, _LOGIN_ATTEMPTS_FILE) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + except Exception as e: + logger.debug("Failed to persist login attempts: %s", e) + + +_login_attempts = _load_login_attempts() # ip -> [timestamp, ...] + + def _check_login_rate(ip: str) -> bool: """Return True if the IP is allowed to attempt login.""" now = time.time() attempts = _login_attempts.get(ip, []) # Prune old attempts attempts = [t for t in attempts if now - t < _LOGIN_WINDOW] - _login_attempts[ip] = attempts + if attempts: + _login_attempts[ip] = attempts + else: + _login_attempts.pop(ip, None) + _save_login_attempts(_login_attempts) return len(attempts) < _LOGIN_MAX_ATTEMPTS + def _record_login_attempt(ip: str) -> None: now = time.time() attempts = _login_attempts.get(ip, []) attempts.append(now) _login_attempts[ip] = attempts + _save_login_attempts(_login_attempts) def _signing_key(): @@ -156,7 +236,7 @@ def verify_password(plain) -> bool: def create_session() -> str: """Create a new auth session. Returns signed cookie value.""" token = secrets.token_hex(32) - _sessions[token] = time.time() + SESSION_TTL + _sessions[token] = time.time() + _resolve_session_ttl() _save_sessions(_sessions) sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:32] return f"{token}.{sig}" @@ -257,7 +337,7 @@ def check_auth(handler, parsed) -> bool: # safe='/' keeps path separators readable; everything else (including # `?`, `&`, `=`) gets percent-encoded. _next = _urlparse.quote(_path_with_query, safe='/') - handler.send_header('Location', '/login?next=' + _next) + handler.send_header('Location', 'login?next=' + _next) handler.end_headers() return False @@ -269,7 +349,7 @@ def set_auth_cookie(handler, cookie_value) -> None: cookie[COOKIE_NAME]['httponly'] = True cookie[COOKIE_NAME]['samesite'] = 'Lax' cookie[COOKIE_NAME]['path'] = '/' - cookie[COOKIE_NAME]['max-age'] = str(SESSION_TTL) + cookie[COOKIE_NAME]['max-age'] = str(_resolve_session_ttl()) # Set Secure flag when connection is HTTPS if getattr(handler.request, 'getpeercert', None) is not None or handler.headers.get('X-Forwarded-Proto', '') == 'https': cookie[COOKIE_NAME]['secure'] = True diff --git a/api/config.py b/api/config.py index f71bc812..0c241ce5 100644 --- a/api/config.py +++ b/api/config.py @@ -14,6 +14,7 @@ import copy import json import logging import os +import queue import sys import threading import time @@ -183,6 +184,45 @@ else: _cfg_cache = {} _cfg_lock = threading.Lock() _cfg_mtime: float = 0.0 # last known mtime of config.yaml; 0 = never loaded +_cfg_path: Path | None = None # active config.yaml path for the disk-loaded cache +_cfg_fingerprint: str | None = None # serialized snapshot from the last disk load + + +def _fingerprint_config(data: dict) -> str: + """Return a stable fingerprint for config dictionaries. + + A few tests and legacy call sites still mutate ``cfg`` directly for + in-memory overrides. Path-aware reloads should not immediately discard + those overrides just because the active profile path differs from the last + disk load, but an unchanged disk-loaded cache must still reload on profile + switches. + """ + try: + return json.dumps(data, sort_keys=True, separators=(",", ":"), default=str) + except Exception: + return repr(data) + + +def _cfg_has_in_memory_overrides() -> bool: + """True when cfg was changed after the last successful reload_config(). + + Detects two override shapes: + 1. ``_cfg_cache`` was mutated in place (fingerprint differs). + 2. ``cfg`` (the module attribute) was rebound to a different dict — + e.g. ``monkeypatch.setattr(config, "cfg", {...})`` in tests. The + alias-with-the-cache pattern at module load means this is a common + test-isolation override, and silently reloading from disk over it + (the v0.51.7 path-aware reload regression) breaks any test that + relies on the override. + """ + if _cfg_fingerprint is not None and _fingerprint_config(_cfg_cache) != _cfg_fingerprint: + return True + # Module attribute rebound away from _cfg_cache by a test or runtime caller. + try: + return cfg is not _cfg_cache + except NameError: + # cfg not yet defined (during initial reload_config() at import time). + return False def _get_config_path() -> Path: @@ -198,22 +238,66 @@ def _get_config_path() -> Path: return HOME / ".hermes" / "config.yaml" +_WEBUI_SESSION_SAVE_MODES = {"deferred", "eager"} +_DEFAULT_WEBUI_SESSION_SAVE_MODE = "deferred" + + def get_config() -> dict: """Return the cached config dict, loading from disk if needed.""" - if not _cfg_cache: + config_path = _get_config_path() + try: + current_mtime = config_path.stat().st_mtime + except OSError: + current_mtime = 0.0 + cache_stale = current_mtime != _cfg_mtime or _cfg_path != config_path + if not _cfg_cache or (cache_stale and not _cfg_has_in_memory_overrides()): reload_config() + # When a test (or runtime caller) has rebound ``cfg`` to a different dict + # via monkeypatch.setattr(config, "cfg", ...), return that override rather + # than the underlying _cfg_cache. Without this branch, get_config() would + # silently bypass the override even though _cfg_has_in_memory_overrides() + # correctly suppressed the reload. + try: + if cfg is not _cfg_cache: + return cfg + except NameError: + pass return _cfg_cache +def get_webui_session_save_mode(config_data: dict | None = None) -> str: + """Return the validated first-turn session persistence mode. + + ``deferred`` preserves the current first-turn sidecar behaviour: persist + pending_user_message/runtime fields before streaming, then merge the turn + after the agent finishes. ``eager`` additionally checkpoints the current + user turn into ``messages`` before launching the agent thread. Unknown + values fail closed to ``deferred`` so a typo never reintroduces eager disk + writes unexpectedly. + """ + active_cfg = config_data if isinstance(config_data, dict) else cfg + webui_cfg = active_cfg.get("webui", {}) if isinstance(active_cfg, dict) else {} + if not isinstance(webui_cfg, dict): + return _DEFAULT_WEBUI_SESSION_SAVE_MODE + mode = webui_cfg.get("session_save_mode", _DEFAULT_WEBUI_SESSION_SAVE_MODE) + if isinstance(mode, str): + normalized = mode.strip().lower() + if normalized in _WEBUI_SESSION_SAVE_MODES: + return normalized + return _DEFAULT_WEBUI_SESSION_SAVE_MODE + + def reload_config() -> None: """Reload config.yaml from the active profile's directory.""" - global _cfg_mtime + global _cfg_mtime, _cfg_path, _cfg_fingerprint with _cfg_lock: _cfg_cache.clear() config_path = _get_config_path() # Remember the old mtime so we can tell whether config actually changed # vs. first-ever load (mtime == 0.0, e.g. server start or profile switch). _old_cfg_mtime = _cfg_mtime + _cfg_path = config_path + _cfg_mtime = 0.0 try: import yaml as _yaml @@ -227,6 +311,7 @@ def reload_config() -> None: _cfg_mtime = 0.0 except Exception: logger.debug("Failed to load yaml config from %s", config_path) + _cfg_fingerprint = _fingerprint_config(_cfg_cache) # Bust the models cache so the next request sees fresh config values. # Only delete the disk cache when config has actually changed -- not on # first-ever load (when _old_cfg_mtime == 0.0, i.e. server start or @@ -536,6 +621,14 @@ _FALLBACK_MODELS = [ {"provider": "Z.AI", "id": "zai/glm-4.7", "label": "GLM-4.7"}, {"provider": "Z.AI", "id": "zai/glm-4.5", "label": "GLM-4.5"}, {"provider": "Z.AI", "id": "zai/glm-4.5-flash", "label": "GLM-4.5 Flash"}, + # OpenRouter free-tier models — must appear in fallback list so they + # are visible even when the tool-support filter in hermes_cli strips + # them out of the live catalog (see #1426). + {"provider": "OpenRouter", "id": "openrouter/elephant-alpha", "label": "Elephant Alpha (free)"}, + {"provider": "OpenRouter", "id": "openrouter/owl-alpha", "label": "Owl Alpha (free)"}, + {"provider": "OpenRouter", "id": "tencent/hy3-preview:free", "label": "Hy3 Preview (free)"}, + {"provider": "OpenRouter", "id": "nvidia/nemotron-3-super-120b-a12b:free", "label": "Nemotron 3 Super (free)"}, + {"provider": "OpenRouter", "id": "arcee-ai/trinity-large-preview:free", "label": "Trinity Large Preview (free)"}, ] # Provider display names for known Hermes provider IDs @@ -564,6 +657,7 @@ _PROVIDER_DISPLAY = { "qwen": "Qwen", "x-ai": "xAI", "nvidia": "NVIDIA NIM", + "xiaomi": "Xiaomi", } # Provider alias → canonical slug. Users configure providers using the @@ -614,6 +708,8 @@ _PROVIDER_ALIASES = { "nvidia-nim": "nvidia", "build-nvidia": "nvidia", "nemotron": "nvidia", + "mimo": "xiaomi", + "xiaomi-mimo": "xiaomi", # Legacy alias — earlier WebUI builds wrote ``provider: local`` for unknown # loopback endpoints, but ``local`` is not registered in # ``hermes_cli.auth.PROVIDER_REGISTRY``. Routing it through ``custom`` @@ -645,6 +741,167 @@ def _resolve_provider_alias(name: str) -> str: return _PROVIDER_ALIASES.get(raw, name) +def _custom_provider_slug_from_name(name: object) -> str: + raw = str(name or "").strip().lower() + if not raw: + return "" + if raw.startswith("custom:"): + return raw + return "custom:" + raw.replace(" ", "-") + + +def _custom_provider_entries(config_obj: dict | None = None) -> list[dict]: + source = config_obj if isinstance(config_obj, dict) else cfg + entries = source.get("custom_providers", []) + if not isinstance(entries, list): + return [] + return [entry for entry in entries if isinstance(entry, dict)] + + +def _named_custom_provider_slugs(config_obj: dict | None = None) -> set[str]: + return { + slug + for slug in ( + _custom_provider_slug_from_name(entry.get("name")) + for entry in _custom_provider_entries(config_obj) + ) + if slug + } + + +def _named_custom_provider_slug_for_provider( + provider: object, + config_obj: dict | None = None, +) -> str: + raw = str(provider or "").strip().lower() + if not raw: + return "" + raw_suffix = raw.removeprefix("custom:") + for entry in _custom_provider_entries(config_obj): + entry_name = str(entry.get("name") or "").strip().lower() + slug = _custom_provider_slug_from_name(entry_name) + if not entry_name or not slug: + continue + if raw in {entry_name, slug} or raw_suffix == slug.removeprefix("custom:"): + return slug + return "" + + +def _resolve_configured_provider_id( + provider: object, + config_obj: dict | None = None, + *, + base_url: object = None, + resolve_alias: bool = True, +) -> str: + """Normalize a configured provider id. + + When ``resolve_alias`` is True (default, used for active-provider / + badge surfaces), falls through to ``_resolve_provider_alias`` after the + named-custom check. When False (used by ``resolve_model_provider``), + preserves the raw provider value so downstream local-server detection + (`_LOCAL_SERVER_PROVIDERS` membership in #1625) sees the original name + like ``ollama`` / ``lm-studio`` rather than alias-collapsed ``custom`` / + ``lmstudio``. The base-url-to-named-slug fallback still runs in both + modes when applicable. + + See in-stage absorption note on stage-313 for the #1625 regression that + motivated the ``resolve_alias`` flag. + """ + named_slug = _named_custom_provider_slug_for_provider(provider, config_obj) + if named_slug: + return named_slug + + if not resolve_alias: + raw = str(provider or "").strip().lower() + if base_url and raw == "custom": + by_base_url = _named_custom_provider_slug_for_base_url(base_url, config_obj) + if by_base_url: + return by_base_url + return str(provider or "") + + resolved = _resolve_provider_alias(provider) + if ( + base_url + and str(resolved or "").strip().lower() == "custom" + ): + by_base_url = _named_custom_provider_slug_for_base_url(base_url, config_obj) + if by_base_url: + return by_base_url + + return resolved + + +def _canonicalise_provider_id(name: object) -> str: + """Normalise a provider id slug into a stable lowercase-hyphenated form. + + Folds underscores to hyphens and lowercases the result, so a user with + ``providers.opencode_go.api_key`` in ``config.yaml`` and + ``model.provider: opencode-go`` sees ONE provider group, not two + (#1568). Then attempts alias resolution but only if the alias target + is itself a known canonical id in ``_PROVIDER_DISPLAY`` — this avoids + converting ``x-ai`` (canonical in WebUI's data structures) to ``xai`` + (the hermes_cli alias target which the WebUI doesn't index by). + + Examples:: + + opencode-go -> opencode-go (canonical, no change) + opencode_go -> opencode-go (underscore folded) + OpenCode-Go -> opencode-go (case folded) + OPENCODE_GO -> opencode-go (both folded) + z_ai -> zai (alias-resolved — zai is canonical) + x-ai -> x-ai (preserved — x-ai is canonical) + + Empty input passes through as the empty string. Unknown ids preserve + their normalised form. + """ + if not name: + return "" + raw = str(name).strip().lower().replace("_", "-") + if not raw: + return "" + # Already a canonical id known to _PROVIDER_DISPLAY/_PROVIDER_MODELS: + # keep as-is to avoid round-tripping through aliases (e.g. x-ai → xai). + if raw in _PROVIDER_DISPLAY or raw in _PROVIDER_MODELS: + return raw + # Try alias resolution. Only accept the result if it's itself a + # canonical id in _PROVIDER_DISPLAY — that prevents aliases pointing + # at non-canonical strings (legacy, hermes_cli-specific) from leaking + # in. Falls back to the normalised input otherwise. + resolved = _resolve_provider_alias(raw) + if resolved and resolved.lower() in _PROVIDER_DISPLAY: + return resolved.lower() + return raw + + +def _normalize_base_url_for_match(value: object) -> str: + url = str(value or "").strip().rstrip("/") + if not url: + return "" + parsed_url = urlparse(url if "://" in url else f"http://{url}") + scheme = (parsed_url.scheme or "http").lower() + netloc = (parsed_url.netloc or parsed_url.path).lower().rstrip("/") + path = parsed_url.path.rstrip("/") + if not parsed_url.netloc: + path = "" + return f"{scheme}://{netloc}{path}" + + +def _named_custom_provider_slug_for_base_url( + base_url: object, + config_obj: dict | None = None, +) -> str: + target = _normalize_base_url_for_match(base_url) + if not target: + return "" + for entry in _custom_provider_entries(config_obj): + entry_base_url = _normalize_base_url_for_match(entry.get("base_url")) + if entry_base_url != target: + continue + return _custom_provider_slug_from_name(entry.get("name")) or "custom" + return "" + + # Well-known models per provider (used to populate dropdown for direct API providers) _PROVIDER_MODELS = { "anthropic": [ @@ -812,6 +1069,14 @@ _PROVIDER_MODELS = { {"id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "label": "Llama 3.3 Nemotron Super 49B"}, {"id": "qwen/qwen3-next-80b-a3b-instruct", "label": "Qwen3 Next 80B"}, ], + # Xiaomi MiMo — direct API via api.xiaomimimo.com + "xiaomi": [ + {"id": "mimo-v2.5-pro", "label": "MiMo V2.5 Pro"}, + {"id": "mimo-v2.5", "label": "MiMo V2.5"}, + {"id": "mimo-v2-pro", "label": "MiMo V2 Pro"}, + {"id": "mimo-v2-omni", "label": "MiMo V2 Omni"}, + {"id": "mimo-v2-flash", "label": "MiMo V2 Flash"}, + ], # xAI — prefix used in OpenRouter model IDs (x-ai/grok-4-20) "x-ai": [ {"id": "grok-4.20", "label": "Grok 4.20"}, @@ -860,6 +1125,153 @@ def _format_ollama_label(mid: str) -> str: return label +def _format_nous_label(mid: str) -> str: + """Turn a Nous Portal model id into a readable display label. + + Nous IDs are ``/[:]`` (e.g. ``anthropic/claude-opus-4.7``); + drop the vendor namespace, prettify the model name with the same token + rules as :func:`_format_ollama_label` (short acronyms uppercase, size + suffixes uppercase, capitalize the rest), then append ``" (via Nous)"`` + so the entry is visually distinct from same-named models in other + provider groups (e.g. direct Anthropic). + + Examples (matches the helper's actual output — labels are produced by + :func:`_format_ollama_label`'s token rules, so 3-letter tokens like + ``GPT`` and ``PRO`` render uppercase):: + + anthropic/claude-opus-4.7 -> Claude Opus 4.7 (via Nous) + openai/gpt-5.4-mini -> GPT 5.4 Mini (via Nous) + google/gemini-3.1-pro-preview -> Gemini 3.1 PRO Preview (via Nous) + moonshotai/kimi-k2.6 -> Kimi K2.6 (via Nous) + qwen/qwen3.5-plus-02-15 -> Qwen3.5 Plus 02 15 (via Nous) + nvidia/nemotron-3-super-120b-a12b -> Nemotron 3 Super 120B A12b (via Nous) + minimax/minimax-m2.5:free -> MiniMax M2.5 (Free) (via Nous) + """ + name_part = mid.split("/", 1)[-1] if "/" in mid else mid + # MiniMax-CN ids come back lowercase on the live wire (`minimax-m2.5`) but + # the curated label convention is mixed-case "MiniMax M2.5" — match that. + if name_part.lower().startswith("minimax"): + name_part = "MiniMax" + name_part[len("minimax"):] + base = _format_ollama_label(name_part) + return f"{base} (via Nous)" + + +# Soft cap on how many Nous Portal models surface in the picker dropdown. +# Above this count, _build_nous_featured_set() trims the visible list to +# ~_NOUS_FEATURED_TARGET entries; the full catalog is still returned to the +# client under ``extra_models`` so /model autocomplete covers everything. +# Caps reflect human scannability — a 25-row dropdown is the practical UX +# ceiling, and per-vendor sampling at 15 keeps the flagship shape visible +# without one vendor dominating. +_NOUS_FEATURED_THRESHOLD = 25 +_NOUS_FEATURED_TARGET = 15 + +# Vendor-prefix priority order for featured selection. Lower index = picked +# earlier when sampling the live catalog. Reflects which vendors users have +# historically reached for first via Nous Portal (driven by the curated +# static list maintained in _PROVIDER_MODELS["nous"] and Discord feedback). +_NOUS_VENDOR_PRIORITY = ( + "anthropic", "openai", "google", "moonshotai", "z-ai", + "minimax", "qwen", "x-ai", "deepseek", "stepfun", + "xiaomi", "tencent", "nvidia", "arcee-ai", +) + + +def _build_nous_featured_set( + live_ids: list[str], + *, + selected_model_id: str | None = None, + target: int = _NOUS_FEATURED_TARGET, +) -> tuple[list[str], list[str]]: + """Trim a Nous Portal catalog into a (featured, extras) split. + + ``featured`` is what the picker dropdown renders. ``extras`` is everything + else — kept available so the slash-command `/model` autocomplete and the + ``_dynamicModelLabels`` map cover the full catalog. + + Selection rules (in order, deterministic): + + 1. Always include the user's currently-selected model if it's in the + catalog (preserves selection stickiness — no orphan IDs in the + dropdown after a refresh). + 2. Always include every entry from the curated static + ``_PROVIDER_MODELS["nous"]`` list whose id maps onto a live id — + those four are explicitly maintained as flagship picks. + 3. Top up to ``target`` by walking ``_NOUS_VENDOR_PRIORITY`` round-robin + (one model per vendor each pass) so no vendor monopolises the slot + budget. Within a vendor, the original ``live_ids`` order is preserved + — that's the order Nous Portal returned, which approximates recency. + + Returns ``(featured_ids, extras_ids)`` — both lists are subsets of + ``live_ids`` with disjoint membership and union equal to ``live_ids``. + + For catalogs ≤ ``_NOUS_FEATURED_THRESHOLD`` entries the function is a + no-op: ``featured == live_ids``, ``extras == []``. + """ + if not live_ids: + return [], [] + if len(live_ids) <= _NOUS_FEATURED_THRESHOLD: + return list(live_ids), [] + + chosen: list[str] = [] # preserves insertion order + chosen_set: set[str] = set() + + def _add(mid: str) -> None: + if mid and mid not in chosen_set: + chosen.append(mid) + chosen_set.add(mid) + + # Rule 1: sticky selection. Strip "@nous:" prefix if present so we can + # match against the live id space (which is bare "vendor/model"). + if selected_model_id: + sel = selected_model_id + if sel.startswith("@nous:"): + sel = sel[len("@nous:"):] + if sel in live_ids: + _add(sel) + + # Rule 2: curated flagships. Extract the bare ids from the static list + # entries (which are stored as "@nous:vendor/model"). + for static in _PROVIDER_MODELS.get("nous", []): + sid = static.get("id", "") + if sid.startswith("@nous:"): + sid = sid[len("@nous:"):] + if sid in live_ids: + _add(sid) + + # Rule 3: vendor-priority round-robin top-up. + by_vendor: dict[str, list[str]] = {} + for mid in live_ids: + if mid in chosen_set: + continue + vendor = mid.split("/", 1)[0] if "/" in mid else "" + by_vendor.setdefault(vendor, []).append(mid) + + # Walk vendors in priority order, then any leftover vendors alphabetically. + priority = list(_NOUS_VENDOR_PRIORITY) + leftover = sorted(v for v in by_vendor if v not in set(priority)) + vendor_order = priority + leftover + + # Round-robin: one model per vendor per pass until we hit the target or + # exhaust every bucket. + while len(chosen) < target: + added_this_pass = 0 + for vendor in vendor_order: + if len(chosen) >= target: + break + bucket = by_vendor.get(vendor) + if not bucket: + continue + _add(bucket.pop(0)) + added_this_pass += 1 + if added_this_pass == 0: + break # all buckets empty + + # Anything not chosen becomes extras (full-catalog completion surface). + extras = [m for m in live_ids if m not in chosen_set] + return chosen, extras + + def _apply_provider_prefix( raw_models: list[dict], provider_id: str, @@ -949,6 +1361,151 @@ def _deduplicate_model_ids(groups: list[dict]) -> None: model["label"] = f"{original_id} ({provider_name})" +# ── Local-server provider preservation (#1625) ───────────────────────────── +# +# LM Studio, Ollama, llama.cpp, vLLM, TabbyAPI etc. are inference servers, +# not OpenAI-compatible proxies. They register models under their FULL path +# as the registry key (the HuggingFace-style "namespace/model" id, e.g. +# "qwen/qwen3.6-27b"). Stripping the namespace prefix would cause a registry +# miss and the server loads a brand-new instance with default settings, +# silently ignoring the user's tuned context length / parallel slots. +# +# This is distinct from OpenAI-compatible proxies (LiteLLM, OpenRouter relays) +# where stripping "openai/gpt-5.4" → "gpt-5.4" is the correct behavior. +# +# Detection has two layers: +# 1. Static set of known local-server provider names (canonical + common +# custom-provider naming). +# 2. Loopback / private-host base_url heuristic: an OpenAI-compatible URL +# pointing at 127.0.0.1, localhost, or a private IP block is almost +# certainly a local model server, regardless of the provider name. +# Reuses the same private-IP detection logic used elsewhere in +# api/config.py for SSRF host trust. +_LOCAL_SERVER_PROVIDERS = { + "lmstudio", # canonical (in hermes_cli.models.CANONICAL_PROVIDERS) + "lm-studio", # alias used in some custom_providers configs (#1625 Opus NIT) + "ollama", # via custom_providers, common pattern + "llamacpp", # via custom_providers + "llama-cpp", # alias + "vllm", # via custom_providers + "tabby", # via custom_providers (TabbyAPI) + "tabbyapi", # alias + "koboldcpp", # local llama.cpp UI fork + "textgen", # text-generation-webui (oobabooga) OpenAI-compat extension + "localai", # LocalAI project (#1625 Opus NIT) +} + + +def _is_local_server_provider(provider_id: str) -> bool: + """True when provider_id names a local model server. + + Named custom providers resolve to ``custom:``. Treat those as local + when the bare slug is one of the known local-server provider names too. + """ + provider = str(provider_id or "").strip().lower() + if provider in _LOCAL_SERVER_PROVIDERS: + return True + if provider.startswith("custom:"): + return provider.removeprefix("custom:") in _LOCAL_SERVER_PROVIDERS + return False + + +def _base_url_points_at_local_server(base_url: str) -> bool: + """True if base_url's host is a loopback or private IP (likely local server). + + Reuses ipaddress.is_loopback / is_private / is_link_local — the same + heuristic used in the `api/config.py` SSRF/credential-routing code. + Errors (DNS failure, malformed URL) return False so callers fall back to + the static-provider-name check. + """ + if not base_url: + return False + try: + from urllib.parse import urlparse + import ipaddress + host = (urlparse(base_url).hostname or "").lower() + if not host: + return False + # Plain-text "localhost" doesn't ipaddress-parse but is unambiguous. + if host in ("localhost", "ip6-localhost", "ip6-loopback"): + return True + try: + addr = ipaddress.ip_address(host) + except ValueError: + # Not an IP literal — could be a hostname like "ollama.internal". + # Don't try DNS resolution here (slow + ambient): only IP literals + # and the `localhost` alias get the no-strip treatment via this path. + return False + return addr.is_loopback or addr.is_private or addr.is_link_local + except Exception: + return False + + +def _custom_slug_rest_looks_like_host_port(rest: str) -> bool: + """True when ``custom:`` is an endpoint-style slug ``host:port``. + + WebUI sometimes derives ``custom:10.8.71.41:8080`` from ``base_url`` authority. + The #1776 peel must not treat that middle colon as part of an eaten model + segment — otherwise ``@custom:10.8.71.41:8080:Qwen3`` wrongly becomes model + ``8080:Qwen3``. + """ + rest = str(rest or "").strip() + if ":" not in rest: + return False + host, port_s = rest.rsplit(":", 1) + if not host or ":" in host: + return False + if not port_s.isdigit(): + return False + try: + port_n = int(port_s) + except ValueError: + return False + if not (1 <= port_n <= 65535): + return False + try: + import ipaddress + + ipaddress.ip_address(host) + return True + except ValueError: + pass + hl = host.lower() + if hl == "localhost": + return True + # Typical DNS hostname used as proxy slug (contains at least one label dot). + if "." in host: + return True + return False + + +def _get_provider_base_url(provider_id): + """Look up the configured base_url for a provider (e.g. lmstudio). + + Checks two locations, in order: + 1. ``cfg["providers"][]["base_url"]`` — the explicit + per-provider override. + 2. ``cfg["model"]["base_url"]`` — falls back here when + ``cfg["model"]["provider"] == provider_id``. This is the historical + shape (the model block carries both the active provider AND the + base URL for that provider in a single record). + + Returns the URL stripped of trailing ``/`` if configured, otherwise None. + """ + prov_cfg = cfg.get("providers", {}).get(provider_id, {}) or {} + explicit = (prov_cfg.get("base_url") or "").strip().rstrip("/") + if explicit: + return explicit + model_cfg = cfg.get("model", {}) or {} + if isinstance(model_cfg, dict): + model_provider = str(model_cfg.get("provider") or "").strip().lower() + if model_provider == str(provider_id).strip().lower(): + model_base = (model_cfg.get("base_url") or "").strip().rstrip("/") + if model_base: + return model_base + return None + + def resolve_model_provider(model_id: str) -> tuple: """Resolve model name, provider, and base_url for AIAgent. @@ -974,8 +1531,13 @@ def resolve_model_provider(model_id: str) -> tuple: config_base_url = None model_cfg = cfg.get("model", {}) if isinstance(model_cfg, dict): - config_provider = model_cfg.get("provider") config_base_url = model_cfg.get("base_url") + config_provider = _resolve_configured_provider_id( + model_cfg.get("provider"), + cfg, + base_url=config_base_url, + resolve_alias=False, + ) # Heal legacy ``provider: local`` entries (written by WebUI < v0.50.252) # at read time. ``local`` is not a registered provider, so passing it @@ -993,16 +1555,44 @@ def resolve_model_provider(model_id: str) -> tuple: # Custom providers declared in config.yaml should win over slash-based # OpenRouter heuristics. Their model IDs commonly contain '/' too. - custom_providers = cfg.get("custom_providers", []) - if isinstance(custom_providers, list): + # However, when the active provider is an explicit non-custom provider and + # the requested model_id is the configured default model, that active + # provider takes precedence over overlapping custom_providers[] entries. + # Otherwise WebUI routes to custom: instead of the intended endpoint + # and can surface a 401 from the wrong provider (#1922). + # For all other cases, preserve custom_providers[] routing for explicitly + # selected custom provider models. + _is_explicit_non_custom_provider = ( + config_provider is not None + and config_provider != 'custom' + and not config_provider.startswith('custom:') + ) + _default_model = model_cfg.get('default') if isinstance(model_cfg, dict) else None + _skip_custom_providers = ( + _is_explicit_non_custom_provider + and _default_model is not None + and model_id == _default_model + ) + custom_providers = cfg.get('custom_providers', []) + if isinstance(custom_providers, list) and not _skip_custom_providers: for entry in custom_providers: if not isinstance(entry, dict): continue - entry_model = (entry.get("model") or "").strip() - entry_name = (entry.get("name") or "").strip() - entry_base_url = (entry.get("base_url") or "").strip() - if entry_model and entry_name and model_id == entry_model: - provider_hint = "custom:" + entry_name.lower().replace(" ", "-") + entry_model = (entry.get('model') or '').strip() + entry_name = (entry.get('name') or '').strip() + entry_base_url = (entry.get('base_url') or '').strip() + entry_model_ids = set() + if entry_model: + entry_model_ids.add(entry_model) + entry_models = entry.get('models') + if isinstance(entry_models, dict): + entry_model_ids.update( + key.strip() + for key in entry_models.keys() + if isinstance(key, str) and key.strip() + ) + if entry_name and model_id in entry_model_ids: + provider_hint = 'custom:' + entry_name.lower().replace(' ', '-') return model_id, provider_hint, entry_base_url or None # @provider:model format — explicit provider hint from the dropdown. @@ -1010,9 +1600,35 @@ def resolve_model_provider(model_id: str) -> tuple: # resolve credentials in streaming.py). # Use rsplit to handle provider_ids that contain ':' (e.g. custom:my-key). # With rsplit, "@custom:my-key:model" → provider="custom:my-key", model="model". + # BUT: model IDs that end in :free / :beta / :thinking collide with the + # rsplit grammar (e.g. "@openrouter:tencent/hy3-preview:free" would split + # into provider="openrouter:tencent/hy3-preview", model="free"). Guard + # against that by falling back to split(":") when the rsplit result is not + # a recognised provider (#1744). + # + # Edge case (#1776): for custom providers with the same suffix + # ("@custom:my-key:some-model:free"), rsplit yields + # provider_hint="custom:my-key:some-model", bare_model="free", and the + # custom-prefix guard below skips the split-fallback. Detect the + # over-split structurally — custom hints normally carry one slug segment + # after ``custom:``. If ``provider_hint`` has extra ``:`` tokens because the + # model ID contained tags like ``:free``, peel one segment back (#1776). + # + # Exception: ``custom::`` is a single logical slug derived + # from OpenAI ``base_url`` authority and contains no eaten model segments. if model_id.startswith("@") and ":" in model_id: - provider_hint, bare_model = model_id[1:].rsplit(":", 1) - return bare_model, provider_hint, None + inner = model_id[1:] + provider_hint, bare_model = inner.rsplit(":", 1) + if provider_hint.startswith("custom:") and provider_hint.count(":") >= 2: + _slug_rest = provider_hint[len("custom:"):] + if not _custom_slug_rest_looks_like_host_port(_slug_rest): + provider_hint, extra = provider_hint.rsplit(":", 1) + bare_model = f"{extra}:{bare_model}" + elif (provider_hint not in _PROVIDER_MODELS + and provider_hint not in _PROVIDER_DISPLAY + and not provider_hint.startswith("custom:")): + provider_hint, bare_model = inner.split(":", 1) + return bare_model, provider_hint, _get_provider_base_url(provider_hint) if "/" in model_id: prefix, bare = model_id.split("/", 1) @@ -1052,6 +1668,15 @@ def resolve_model_provider(model_id: str) -> tuple: # just because the model name contains a slash (e.g. google/gemma-4-26b-a4b). # The user has explicitly pointed at a base_url, so trust their routing config. if config_base_url: + # Local model servers (LM Studio, Ollama, llama.cpp, vLLM, TabbyAPI) + # register models under their full HuggingFace-style id. Stripping the + # prefix breaks the lookup and causes a fresh instance to load with + # default settings, ignoring user-tuned context length / parallel slots. + # See #1625. Detect either by canonical provider name OR by base_url + # pointing at a loopback/private host. + if (_is_local_server_provider(config_provider) + or _base_url_points_at_local_server(config_base_url)): + return model_id, config_provider, config_base_url # Only strip the provider prefix when it's a known provider namespace # (e.g. "openai/gpt-5.4" → "gpt-5.4" for a custom OpenAI-compatible proxy). # Unknown prefixes (e.g. "zai-org/GLM-5.1" on DeepInfra) are intrinsic to @@ -1070,6 +1695,102 @@ def resolve_model_provider(model_id: str) -> tuple: return model_id, config_provider, config_base_url +def resolve_custom_provider_connection(provider_id: str) -> tuple[str | None, str | None]: + """Return (api_key, base_url) for a named ``custom:*`` provider. + + Supports ``custom_providers[].api_key`` as either a literal key or + ``${ENV_VAR}``, and ``custom_providers[].key_env`` as an env-var hint. + Returns ``(None, None)`` when no named custom provider matches. + """ + pid = str(provider_id or "").strip().lower() + if not pid.startswith("custom:"): + return None, None + + def _slugify(value: str) -> str: + s = str(value or "").strip().lower().replace("_", "-").replace(" ", "-") + while "--" in s: + s = s.replace("--", "-") + return s.strip("-") + + slug = _slugify(pid.split(":", 1)[1].strip()) + if not slug: + return None, None + + # Read the live config snapshot to avoid stale module-level cache edge + # cases after profile switches or runtime config edits. + cfg_data = get_config() + + def _resolve_key(raw_api_key, raw_key_env) -> str | None: + api_key = None + if raw_api_key is not None: + key_text = str(raw_api_key).strip() + if key_text.startswith("${") and key_text.endswith("}") and len(key_text) > 3: + api_key = os.getenv(key_text[2:-1], "").strip() or None + elif key_text: + api_key = key_text + if not api_key: + key_env = str(raw_key_env or "").strip() + if key_env: + api_key = os.getenv(key_env, "").strip() or None + return api_key + + custom_providers = cfg_data.get("custom_providers", []) + if not isinstance(custom_providers, list): + custom_providers = [] + + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = str(entry.get("name") or "").strip() + if not name: + continue + entry_slug = _slugify(name) + if entry_slug != slug: + continue + + base_url = str(entry.get("base_url") or "").strip() or None + api_key = _resolve_key(entry.get("api_key"), entry.get("key_env")) + return api_key, base_url + + # If exactly one custom provider is configured, use it as a pragmatic + # fallback for mismatched slugs (e.g. punctuation differences). + if len(custom_providers) == 1 and isinstance(custom_providers[0], dict): + entry = custom_providers[0] + return ( + _resolve_key(entry.get("api_key"), entry.get("key_env")), + str(entry.get("base_url") or "").strip() or None, + ) + + # Fallbacks for setups that don't use custom_providers names directly. + providers_cfg = cfg_data.get("providers", {}) + provider_specific = providers_cfg.get(pid, {}) if isinstance(providers_cfg, dict) else {} + provider_custom = providers_cfg.get("custom", {}) if isinstance(providers_cfg, dict) else {} + + model_cfg = cfg_data.get("model", {}) + model_provider = str(model_cfg.get("provider") or "").strip().lower() if isinstance(model_cfg, dict) else "" + + fallback_base = None + for candidate in (provider_specific, provider_custom, model_cfg): + if isinstance(candidate, dict): + _base = str(candidate.get("base_url") or "").strip() + if _base: + fallback_base = _base + break + + fallback_key = None + if isinstance(provider_specific, dict): + fallback_key = _resolve_key(provider_specific.get("api_key"), provider_specific.get("key_env")) + if not fallback_key and isinstance(provider_custom, dict): + fallback_key = _resolve_key(provider_custom.get("api_key"), provider_custom.get("key_env")) + if not fallback_key and isinstance(model_cfg, dict) and model_provider in {"custom", pid, slug}: + fallback_key = _resolve_key(model_cfg.get("api_key"), model_cfg.get("key_env")) + + if fallback_key or fallback_base: + return fallback_key, fallback_base or None + + return None, None + + def model_with_provider_context(model_id: str, model_provider: str | None = None) -> str: """Return the model string to pass to ``resolve_model_provider()``. @@ -1286,6 +2007,7 @@ def set_hermes_default_model(model_id: str) -> dict: # ── TTL cache for get_available_models() ───────────────────────────────────── _available_models_cache: dict | None = None _available_models_cache_ts: float = 0.0 +_available_models_cache_source_fingerprint: dict | None = None _AVAILABLE_MODELS_CACHE_TTL: float = 86400.0 # 24 hours _available_models_cache_lock = threading.RLock() # must be RLock: cold path refactoring moved slow work inside this lock, requiring re-entry _cache_build_cv = threading.Condition(_available_models_cache_lock) # shares underlying RLock so notify_all() is safe inside with _available_models_cache_lock @@ -1308,9 +2030,82 @@ _provider_models_invalidated_ts: dict[str, float] = {} # provider_id -> timesta # HERMES_WEBUI_STATE_DIR / port) has its own file and test runs never # pollute the production server's cache. Also works on macOS and Windows # where /dev/shm does not exist. +def _current_webui_version() -> str | None: + """Lazy resolver for the WebUI version, used to stamp the disk cache (#1633). + + `api.updates` imports `api.config` at module-load time, so we cannot + `from api.updates import WEBUI_VERSION` at the top of this module without a + circular import. Instead we resolve lazily on each cache load/save. + + Returns the runtime version string (e.g. ``v0.50.293``) when api.updates + has been imported, or None if it isn't loaded yet (boot-time corner case + before the server has finished initializing). A None return is treated as + "do not stamp / do not validate" by the cache layer so cache reads/writes + that happen during early init still work — the next call after init will + stamp normally. + """ + try: + # Read attribute via dotted lookup so we don't add an import-time edge. + import sys as _sys + mod = _sys.modules.get('api.updates') + if mod is None: + return None + v = getattr(mod, 'WEBUI_VERSION', None) + return str(v) if v else None + except Exception: + return None + + +# Disk-cache schema version (#1633). +# +# Bumped any time the disk cache shape changes in a backward-incompatible way +# (e.g. new required field, renamed key). Independent of the WebUI version +# stamp — _webui_version forces a rebuild on every release; _schema_version +# guarantees that even if a future release accidentally reuses the same +# WebUI version string (or a debug build doesn't have a version), a structural +# change still invalidates the cache. +_MODELS_CACHE_SCHEMA_VERSION = 3 + + _models_cache_path = STATE_DIR / "models_cache.json" +def _get_auth_store_path() -> Path: + """Return the auth.json path for the active Hermes profile.""" + try: + from api.profiles import get_active_hermes_home as _gah + + return _gah() / "auth.json" + except ImportError: + return HOME / ".hermes" / "auth.json" + + +def _models_cache_file_fingerprint(path: Path) -> dict: + """Return non-secret identity metadata for a cache dependency file. + + The /api/models response depends on config.yaml (model/provider defaults) + and auth.json (active_provider + credential_pool). The cache only needs + cheap invalidation signals here, not file contents; never include secrets. + """ + fingerprint = {"path": str(Path(path).expanduser())} + try: + st = Path(path).stat() + except OSError: + fingerprint["missing"] = True + return fingerprint + fingerprint["mtime_ns"] = st.st_mtime_ns + fingerprint["size"] = st.st_size + return fingerprint + + +def _models_cache_source_fingerprint() -> dict: + """Return the current config/auth-store fingerprint for /api/models cache.""" + return { + "config_yaml": _models_cache_file_fingerprint(_get_config_path()), + "auth_json": _models_cache_file_fingerprint(_get_auth_store_path()), + } + + def _delete_models_cache_on_disk() -> None: try: os.unlink(str(_models_cache_path)) @@ -1319,7 +2114,15 @@ def _delete_models_cache_on_disk() -> None: def _is_valid_models_cache(cache: object) -> bool: - """Return True when a disk cache payload has the full /api/models shape.""" + """Return True when a cache payload has the full /api/models shape. + + SHAPE-only check: validates structural correctness of an in-memory or + on-disk cache. Use _is_loadable_disk_cache() for the strictness needed + when reading from disk (it adds version-stamp invalidation per #1633). + + Kept loose so in-memory cache writes (which never touch disk and so don't + need version stamping) can use this validator unchanged. + """ if not isinstance(cache, dict): return False if not {"active_provider", "default_model", "configured_model_badges", "groups"}.issubset(cache): @@ -1333,8 +2136,68 @@ def _is_valid_models_cache(cache: object) -> bool: ) +def _is_loadable_disk_cache(cache: object) -> bool: + """Return True when an on-disk cache is safe to use after a process boot. + + Adds two checks on top of _is_valid_models_cache (#1633): + 1. ``_schema_version`` matches `_MODELS_CACHE_SCHEMA_VERSION`. A bumped + schema version unconditionally invalidates older cache files. + 2. ``_webui_version`` matches the current runtime version. Forces a + rebuild after every release so users see picker-shape fixes + immediately, instead of waiting up to 24 hours for the TTL to expire. + If the runtime version cannot be resolved (early-init edge case), + skip this check rather than wedge the boot. + + Note: ``_webui_version`` is a string equality check, not a semver compare — + two debug builds with the same `WEBUI_VERSION` string but different actual + code wouldn't invalidate via this axis. ``_schema_version`` is the + independent invalidation axis for breaking changes that lack a tag bump; + bump it whenever the cache shape changes incompatibly. + """ + if not _is_valid_models_cache(cache): + return False + if not isinstance(cache, dict): # appease type-narrowing — already guarded above + return False + cached_schema = cache.get("_schema_version") + if cached_schema != _MODELS_CACHE_SCHEMA_VERSION: + # DEBUG telemetry per stage-294 absorption: makes "why did my cache + # rebuild" investigations one log-grep away. + logger.debug( + "models cache rejected: schema=%r vs runtime=%r", + cached_schema, _MODELS_CACHE_SCHEMA_VERSION, + ) + return False + runtime_version = _current_webui_version() + if runtime_version is not None: + cached_version = cache.get("_webui_version") + if not isinstance(cached_version, str) or cached_version != runtime_version: + logger.debug( + "models cache rejected: webui_version=%r vs runtime=%r", + cached_version, runtime_version, + ) + return False + cached_sources = cache.get("_source_fingerprint") + runtime_sources = _models_cache_source_fingerprint() + if cached_sources != runtime_sources: + logger.debug( + "models cache rejected: source_fingerprint=%r vs runtime=%r", + cached_sources, + runtime_sources, + ) + return False + return True + + def _load_models_cache_from_disk() -> dict | None: - """Load /api/models cache from disk if it exists and has current metadata.""" + """Load /api/models cache from disk if it exists and has current metadata. + + Adds the per-release version check from #1633: a cache stamped with a + different WebUI version is treated as missing, forcing a fresh rebuild + that picks up any picker-shape fixes shipped in the new release. The + returned dict is the SHAPE-only cache (without the `_webui_version` / + `_schema_version` stamps) so callers don't have to know about the + on-disk metadata fields. + """ try: import json as _j @@ -1342,28 +2205,53 @@ def _load_models_cache_from_disk() -> dict | None: return None with open(_models_cache_path, encoding="utf-8") as f: cache = _j.load(f) - return cache if _is_valid_models_cache(cache) else None + if not _is_loadable_disk_cache(cache): + return None + # Strip the disk-only metadata before returning, so the in-memory + # cache shape stays exactly what the rest of the code expects. + return { + "active_provider": cache["active_provider"], + "default_model": cache["default_model"], + "configured_model_badges": cache["configured_model_badges"], + "groups": cache["groups"], + } except Exception: return None def _save_models_cache_to_disk(cache: dict) -> None: - """Save cache to disk so it survives server restarts.""" + """Save cache to disk so it survives server restarts. + + Stamps the payload with `_webui_version` and `_schema_version` (#1633) so + a subsequent process running a different WebUI version, or a future + release that bumps the schema, will treat the file as invalid and + rebuild from live provider data on its first /api/models call. + + The version stamp is omitted (not the literal None — the field is just + skipped) when the runtime version cannot be resolved at the moment of + save, which would happen only in a very early boot path before + api.updates is loaded. _is_loadable_disk_cache treats a missing field as + a mismatch (since runtime_version is non-None on every subsequent call), + so this is safe — at worst we write one cache file that gets rejected + once on the next boot. + """ try: if not _is_valid_models_cache(cache): return + payload = { + "_schema_version": _MODELS_CACHE_SCHEMA_VERSION, + "_source_fingerprint": _models_cache_source_fingerprint(), + "active_provider": cache["active_provider"], + "default_model": cache["default_model"], + "configured_model_badges": cache["configured_model_badges"], + "groups": cache["groups"], + } + runtime_version = _current_webui_version() + if runtime_version is not None: + payload["_webui_version"] = runtime_version tmp = str(_models_cache_path) + f".{os.getpid()}.tmp" with open(tmp, "w", encoding="utf-8") as f: - json.dump( - { - "active_provider": cache["active_provider"], - "default_model": cache["default_model"], - "configured_model_badges": cache["configured_model_badges"], - "groups": cache["groups"], - }, - f, - indent=2, - ) + json.dump(payload, f, indent=2) os.rename(tmp, str(_models_cache_path)) except Exception: pass # Non-fatal -- cache will rebuild on next call @@ -1371,15 +2259,27 @@ def _save_models_cache_to_disk(cache: dict) -> None: def _get_fresh_memory_models_cache(now: float) -> dict | None: """Return a valid fresh in-memory /api/models cache, or clear stale shapes.""" - global _available_models_cache, _available_models_cache_ts + global _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint if _available_models_cache is None: return None if (now - _available_models_cache_ts) >= _AVAILABLE_MODELS_CACHE_TTL: return None + current_sources = _models_cache_source_fingerprint() + if _available_models_cache_source_fingerprint != current_sources: + logger.debug( + "models memory cache rejected: source_fingerprint=%r vs runtime=%r", + _available_models_cache_source_fingerprint, + current_sources, + ) + _available_models_cache = None + _available_models_cache_ts = 0.0 + _available_models_cache_source_fingerprint = None + return None if _is_valid_models_cache(_available_models_cache): return copy.deepcopy(_available_models_cache) _available_models_cache = None _available_models_cache_ts = 0.0 + _available_models_cache_source_fingerprint = None return None @@ -1397,10 +2297,11 @@ def invalidate_models_cache(): result from the disk cache because the disk hit is checked before the memory cache rebuild runs. """ - global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _cache_build_cv + global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _cache_build_cv with _available_models_cache_lock: _available_models_cache = None _available_models_cache_ts = 0.0 + _available_models_cache_source_fingerprint = None _cache_build_in_progress = False _cache_build_cv.notify_all() # Clear the credential pool cache too. The cache key is provider_id @@ -1413,6 +2314,18 @@ def invalidate_models_cache(): _delete_models_cache_on_disk() +def invalidate_credential_pool_cache(provider_id: str): + """Invalidate the credential pool cache for a specific provider. + + Used by the streaming layer's credential self-heal logic (#1401) to + force a fresh credential pool load after re-reading auth.json. + """ + global _CREDENTIAL_POOL_CACHE + with _available_models_cache_lock: + _CREDENTIAL_POOL_CACHE.pop(provider_id, None) + _CREDENTIAL_POOL_CACHE.pop(_resolve_provider_alias(provider_id), None) + + def invalidate_provider_models_cache(provider_id: str): """Invalidate cached models for a single provider. @@ -1425,10 +2338,11 @@ def invalidate_provider_models_cache(provider_id: str): Args: provider_id: canonical provider id (e.g. 'openai', 'anthropic', 'custom:my-key') """ - global _available_models_cache, _available_models_cache_ts, _CREDENTIAL_POOL_CACHE + global _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _CREDENTIAL_POOL_CACHE with _available_models_cache_lock: _available_models_cache = None _available_models_cache_ts = 0.0 + _available_models_cache_source_fingerprint = None _provider_models_invalidated_ts[provider_id] = time.time() # Also evict the credential pool so the next cold path re-loads it. # Must evict both the original key and its canonical form (load_pool @@ -1471,6 +2385,106 @@ def _get_label_for_model(model_id: str, existing_groups: list) -> str: ) +def _read_live_provider_model_ids(provider_id: str) -> list[str]: + """Return live model IDs from Hermes CLI for a provider, or [] on failure. + + WebUI's static ``_PROVIDER_MODELS`` table is only a fallback. The agent CLI + owns the provider registry and catalog-discovery logic, so ordinary picker + groups should ask ``hermes_cli.models.provider_model_ids()`` first (#1240). + Provider aliases are tried as a secondary lookup because WebUI keeps a few + display-facing IDs (for example ``google`` / ``x-ai``) that Hermes CLI may + normalize internally. + """ + pid = str(provider_id or "").strip() + if not pid: + return [] + try: + from hermes_cli.models import provider_model_ids as _provider_model_ids + except Exception: + return [] + + candidates = [pid] + try: + alias = _resolve_provider_alias(pid) + except Exception: + alias = "" + if alias and alias not in candidates: + candidates.append(alias) + + seen: set[str] = set() + for candidate in candidates: + try: + live_ids = _provider_model_ids(candidate) or [] + except Exception: + logger.debug("Failed to load %s models from hermes_cli", candidate) + continue + result: list[str] = [] + for mid in live_ids: + mid_s = str(mid or "").strip() + if mid_s and mid_s not in seen: + seen.add(mid_s) + result.append(mid_s) + if result: + return result + return [] + + +def _models_from_live_provider_ids(provider_id: str, live_ids: list[str]) -> list[dict]: + """Convert Hermes CLI model ids into WebUI picker model entries.""" + formatter = _format_ollama_label if provider_id in ("ollama", "ollama-cloud") else None + models: list[dict] = [] + seen: set[str] = set() + for mid in live_ids: + mid_s = str(mid or "").strip() + if not mid_s or mid_s in seen: + continue + seen.add(mid_s) + label = formatter(mid_s) if formatter else _get_label_for_model(mid_s, []) + models.append({"id": mid_s, "label": label}) + return models + + +def _read_visible_codex_cache_model_ids() -> list[str]: + """Return visible model slugs from Codex's local models_cache.json. + + The agent's provider_model_ids('openai-codex') intentionally filters IDs + with ``supported_in_api: false``. Codex CLI still lists some of those models + in its picker (notably ``gpt-5.3-codex-spark`` from #1680), so the WebUI + merges this visible local catalog to stay in sync with Codex itself. + """ + codex_home = Path(os.getenv("CODEX_HOME", "").strip() or (HOME / ".codex")).expanduser() + cache_path = codex_home / "models_cache.json" + try: + payload = json.loads(cache_path.read_text(encoding="utf-8")) + except Exception: + return [] + + entries = payload.get("models") if isinstance(payload, dict) else None + if not isinstance(entries, list): + return [] + + sortable: list[tuple[int, str]] = [] + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + if not isinstance(slug, str) or not slug.strip(): + continue + visibility = item.get("visibility", "") + if isinstance(visibility, str) and visibility.strip().lower() in ("hide", "hidden"): + continue + priority = item.get("priority") + rank = int(priority) if isinstance(priority, (int, float)) else 10_000 + sortable.append((rank, slug.strip())) + + sortable.sort(key=lambda item: (item[0], item[1])) + ordered: list[str] = [] + for _, slug in sortable: + if slug not in ordered: + ordered.append(slug) + return ordered + + def get_available_models() -> dict: """ Return available models grouped by provider. @@ -1487,14 +2501,19 @@ def get_available_models() -> dict: 'groups': [{'provider': str, 'models': [{'id': str, 'label': str}]}] } """ - global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _cache_build_cv + global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _cache_build_cv # Config mtime check — must come before any config reads. # (Test #585 verifies _current_mtime appears before active_provider = None) try: - _current_mtime = Path(_get_config_path()).stat().st_mtime + _current_path = _get_config_path() + _current_mtime = _current_path.stat().st_mtime except OSError: + _current_path = _get_config_path() _current_mtime = 0.0 - if _current_mtime != _cfg_mtime: + if ( + (_current_mtime != _cfg_mtime or _current_path != _cfg_path) + and not _cfg_has_in_memory_overrides() + ): reload_config() # ── COLD PATH helper ───────────────────────────────────────────────────── # Extracted so it runs inside _available_models_cache_lock (RLock) to @@ -1616,25 +2635,31 @@ def get_available_models() -> dict: if cfg_default: default_model = cfg_default - # Normalize active_provider to its canonical key + # Normalize active_provider to its canonical key. Named custom + # providers are first-class provider ids in WebUI routing; accept the + # user-facing name from config.yaml (``provider: ollama-local``) and + # route it through the same ``custom:`` slug the picker emits. if active_provider: - active_provider = _resolve_provider_alias(active_provider) + active_provider = _resolve_configured_provider_id( + active_provider, + cfg, + base_url=cfg_base_url, + ) # 2. Read auth store (active_provider fallback + credential_pool inspection) auth_store = {} - try: - from api.profiles import get_active_hermes_home as _gah - - auth_store_path = _gah() / "auth.json" - except ImportError: - auth_store_path = HOME / ".hermes" / "auth.json" + auth_store_path = _get_auth_store_path() if auth_store_path.exists(): try: import json as _j auth_store = _j.loads(auth_store_path.read_text(encoding="utf-8")) if not active_provider: - active_provider = _resolve_provider_alias(auth_store.get("active_provider")) + active_provider = _resolve_configured_provider_id( + auth_store.get("active_provider"), + cfg, + base_url=cfg_base_url, + ) except Exception: logger.debug("Failed to load auth store from %s", auth_store_path) @@ -1716,6 +2741,22 @@ def get_available_models() -> dict: logger.debug("Failed to get key source for provider %s", _p.get("id", "unknown")) detected_providers.add(_p["id"]) _hermes_auth_used = True + + # Belt-and-braces: list_available_providers() is the primary signal + # for OAuth providers, but its `authenticated` field can disagree + # with `get_auth_status().logged_in` on some hermes_cli versions + # (the two fields are computed via different code paths). When the + # disagreement happens for Nous Portal, the Settings → Providers + # card renders the live catalog (because api/providers.py iterates + # all OAuth providers regardless of authentication state) but the + # picker dropdown comes up empty — a confusing asymmetry reported + # in #1567. Add Nous explicitly when get_auth_status agrees so the + # picker stays in sync with the providers card. + try: + if _gas("nous").get("logged_in"): + detected_providers.add("nous") + except Exception: + logger.debug("Failed to check Nous Portal auth status") except Exception: logger.debug("Failed to detect auth providers from hermes") @@ -1746,6 +2787,7 @@ def get_available_models() -> dict: "GLM_API_KEY", "KIMI_API_KEY", "DEEPSEEK_API_KEY", + "XIAOMI_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY", "MINIMAX_API_KEY", @@ -1781,6 +2823,8 @@ def get_available_models() -> dict: detected_providers.add("minimax-cn") if all_env.get("DEEPSEEK_API_KEY"): detected_providers.add("deepseek") + if all_env.get("XIAOMI_API_KEY"): + detected_providers.add("xiaomi") if all_env.get("XAI_API_KEY"): detected_providers.add("x-ai") if all_env.get("MISTRAL_API_KEY"): @@ -1789,18 +2833,76 @@ def get_available_models() -> dict: detected_providers.add("opencode-zen") if all_env.get("OPENCODE_GO_API_KEY"): detected_providers.add("opencode-go") + # LM Studio: detect via LM_API_KEY + LM_BASE_URL in ~/.hermes/.env + if all_env.get("LM_API_KEY") and all_env.get("LM_BASE_URL"): + detected_providers.add("lmstudio") # Also detect providers explicitly listed in config.yaml providers section. # A user may configure a provider key via config.yaml providers..api_key # without setting the corresponding env var. (#604) + # + # Canonicalise the id slug here so a user with ``providers.opencode_go`` + # (underscore variant) doesn't see TWO provider groups in the picker — + # one for the canonical ``opencode-go`` from active_provider detection + # and a phantom ``Opencode_Go`` group for the config-key form (#1568). + # The same applies to mixed-case ids like ``OpenCode-Go`` and to + # legitimate aliases like ``z-ai`` → ``zai``. _cfg_providers = cfg.get("providers", {}) if isinstance(_cfg_providers, dict): for _pid_key in _cfg_providers: - if _pid_key in _PROVIDER_MODELS or _pid_key in cfg.get("providers", {}): - detected_providers.add(_pid_key) + _canonical = _canonicalise_provider_id(_pid_key) + if not _canonical: + continue + if _canonical in _PROVIDER_MODELS or _canonical in _cfg_providers or _pid_key in _cfg_providers: + detected_providers.add(_canonical) + + def _configured_provider_for_base_url(base_url: object) -> str: + target = _normalize_base_url_for_match(base_url) + if not target: + return "" + + if isinstance(model_cfg, dict): + model_base_url = _normalize_base_url_for_match(model_cfg.get("base_url")) + if model_base_url == target: + provider_hint = _resolve_configured_provider_id( + model_cfg.get("provider"), + cfg, + base_url=base_url, + ) + if provider_hint: + return str(provider_hint).strip().lower() + + providers_cfg = cfg.get("providers", {}) + if isinstance(providers_cfg, dict): + for provider_key, provider_cfg in providers_cfg.items(): + if not isinstance(provider_cfg, dict): + continue + provider_base_url = _normalize_base_url_for_match( + provider_cfg.get("base_url") + ) + if provider_base_url == target: + provider_hint = _resolve_provider_alias(provider_key) + if provider_hint: + return str(provider_hint).strip().lower() + + custom_providers_cfg = cfg.get("custom_providers", []) + if isinstance(custom_providers_cfg, list): + for entry in custom_providers_cfg: + if not isinstance(entry, dict): + continue + entry_base_url = _normalize_base_url_for_match(entry.get("base_url")) + if entry_base_url != target: + continue + entry_name = str(entry.get("name") or "").strip() + if entry_name: + return "custom:" + entry_name.lower().replace(" ", "-") + return "custom" + + return "" # 4. Fetch models from custom endpoint if base_url is configured auto_detected_models = [] + auto_detected_models_by_provider: dict[str, list[dict]] = {} if cfg_base_url: try: import ipaddress @@ -1812,11 +2914,13 @@ def get_available_models() -> dict: else: endpoint_url = base_url.rstrip("/") + "/v1/models" - provider = "custom" + configured_provider = _configured_provider_for_base_url(base_url) + provider = configured_provider or "custom" + provider_from_config = bool(configured_provider) parsed = urlparse(base_url if "://" in base_url else f"http://{base_url}") host = (parsed.netloc or parsed.path).lower() - if parsed.hostname: + if parsed.hostname and not provider_from_config: try: addr = ipaddress.ip_address(parsed.hostname) if addr.is_private or addr.is_loopback or addr.is_link_local: @@ -1939,20 +3043,25 @@ def get_available_models() -> dict: model_name = model.get("name", "") or model.get("model", "") or model_id if model_id and model_name: label = _format_ollama_label(model_id) if provider in ("ollama", "ollama-cloud") else model_name - auto_detected_models.append({"id": model_id, "label": label}) - detected_providers.add(provider.lower()) + auto_model = {"id": model_id, "label": label} + auto_detected_models.append(auto_model) + provider_key = provider.lower() + auto_detected_models_by_provider.setdefault(provider_key, []).append(auto_model) + detected_providers.add(provider_key) except Exception: logger.debug("Custom endpoint unreachable or misconfigured for provider: %s", provider) _custom_providers_cfg = cfg.get("custom_providers", []) _named_custom_groups: dict = {} if isinstance(_custom_providers_cfg, list): - _seen_custom_ids = {m["id"] for m in auto_detected_models} + _seen_custom_ids = set() for _cp in _custom_providers_cfg: if not isinstance(_cp, dict): continue _cp_name = (_cp.get("name") or "").strip() - _slug = ("custom:" + _cp_name.lower().replace(" ", "-")) if _cp_name else None + _slug = _custom_provider_slug_from_name(_cp_name) if _cp_name else None + if _slug and _slug not in _named_custom_groups: + _named_custom_groups[_slug] = (_cp_name, []) # Collect model IDs: singular "model" field first, then "models" dict keys _cp_model_ids: list[str] = [] @@ -1966,12 +3075,11 @@ def get_available_models() -> dict: _cp_model_ids.append(_m_id.strip()) for _cp_model in _cp_model_ids: - if _cp_model and _cp_model not in _seen_custom_ids: + _dedup_key = f"{_slug}:{_cp_model}" if _slug else _cp_model + if _cp_model and _dedup_key not in _seen_custom_ids: _cp_label = _get_label_for_model(_cp_model, []) - _seen_custom_ids.add(_cp_model) + _seen_custom_ids.add(_dedup_key) if _slug: - if _slug not in _named_custom_groups: - _named_custom_groups[_slug] = (_cp_name, []) detected_providers.add(_slug) _cp_option_id = _cp_model if active_provider != _slug and not _cp_option_id.startswith("@"): @@ -1997,6 +3105,14 @@ def get_available_models() -> dict: if not _has_unnamed: detected_providers.discard("custom") + _named_custom_slugs = _named_custom_provider_slugs(cfg) + _base_matched_named_slug = _named_custom_provider_slug_for_base_url(cfg_base_url, cfg) + if _base_matched_named_slug and _named_custom_slugs: + for _pid in list(detected_providers): + _pid_norm = str(_pid or "").strip().lower() + if _pid_norm.startswith("custom:") and _pid_norm not in _named_custom_slugs: + detected_providers.discard(_pid) + # Filter providers if providers.only_configured is set providers_cfg = cfg.get("providers", {}) only_show_configured = providers_cfg.get("only_configured", False) if isinstance(providers_cfg, dict) else False @@ -2006,28 +3122,149 @@ def get_available_models() -> dict: configured_providers.add(active_provider) cfg_providers = cfg.get("providers", {}) if isinstance(cfg_providers, dict): - configured_providers.update(cfg_providers.keys()) + # Canonicalise here too — same rationale as #1568 detection + # path. Without this, only_show_configured mode could + # exclude detected ``opencode-go`` because configured_providers + # only has the underscore-variant key from config.yaml. + configured_providers.update( + _canonicalise_provider_id(k) or k for k in cfg_providers.keys() + ) # Only show providers that are both detected and configured detected_providers = detected_providers.intersection(configured_providers) + # Post-collection dedup: re-canonicalise every entry so any path that + # added a non-canonical id (mixed-case from auth-store, raw config-key, + # legacy alias) gets folded onto the canonical key. Belt-and-braces for + # #1568 — protects against future regressions in any of the ~25 + # `detected_providers.add(...)` callsites without auditing each one. + # The fold is idempotent for already-canonical ids, so safe to run + # unconditionally. + if detected_providers: + _canonicalised_detected = set() + for _pid in detected_providers: + _c = _canonicalise_provider_id(_pid) or _pid + _canonicalised_detected.add(_c) + detected_providers = _canonicalised_detected + # 5. Build model groups if detected_providers: for pid in sorted(detected_providers): - if pid.startswith("custom:") and pid in _named_custom_groups: - _nc_display, _nc_models = _named_custom_groups[pid] - if _nc_models: - groups.append({"provider": _nc_display, "provider_id": pid, "models": _nc_models}) + # Custom-provider PIDs are populated above via the + # _named_custom_groups branch (or skipped intentionally). + # They MUST NOT fall through to the auto_detected_models + # fallback below, otherwise the active provider's models + # get copied into a phantom Custom group with mismatched + # provider prefixes (#1881). + if pid.startswith("custom:"): + if pid in _named_custom_groups: + _nc_display, _nc_models = _named_custom_groups[pid] + # If all named-group models were deduped (already auto-detected + # from base_url /v1/models), fall back to auto-detected models + # instead of silently dropping the group (issue #1619). + # + # Per Opus advisor on stage-295: the load-bearing fix for the + # reporter's symptom is the api/routes.py:/api/models/live + # broadening to handle custom:* slugs. This block is defensive + # belt-and-braces — under current _named_custom_groups + # population logic (atomic add+append inside the same dedup + # guard at line ~2640), an empty list shouldn't reach here. + # Kept for future-proofing in case the population logic + # changes (e.g. supporting model-less custom_providers entries). + if not _nc_models: + _nc_models = auto_detected_models_by_provider.get(pid, []) + if _nc_models: + groups.append({"provider": _nc_display, "provider_id": pid, "models": _nc_models}) continue provider_name = _PROVIDER_DISPLAY.get(pid, pid.title()) if pid == "openrouter": + # OpenRouter has two model surfaces: + # (1) curated tool-supporting catalog via hermes_cli.models.fetch_openrouter_models() + # — the canonical agent-ready list, applies a tool-support filter + # (Kilo-Org/kilocode#9068) that hides image/completion-only models + # (2) free-tier `:free` variants — newly-added models OpenRouter ships + # experimentally that may not yet advertise `tools` in supported_parameters + # (see #1426). These get filtered out of (1) but users want them visible. + # + # Strategy: take the live curated list as the base, then augment with a + # separate live-fetch of OpenRouter's /v1/models filtered to free-tier-only. + # Free-tier entries get a "(free)" label suffix so the picker is honest about + # what the user is selecting. Falls back to the static _FALLBACK_MODELS list + # when both live fetches fail (offline, transient API error, test env). + raw_models = [] + seen_ids = set() + try: + from hermes_cli.models import ( + fetch_openrouter_models as _fetch_or_models, + ) + live_curated = _fetch_or_models() or [] + for mid, _desc in live_curated: + if mid and mid not in seen_ids: + seen_ids.add(mid) + raw_models.append({"id": mid, "label": mid}) + except Exception: + logger.warning("Failed to load OpenRouter curated catalog from hermes_cli") + + # Free-tier live fetch — bypasses the tool-support filter so models + # OpenRouter has flagged free but hasn't yet annotated with tools=[] + # (or that have tools=[] but the user explicitly wants to try) appear. + try: + import urllib.request as _urlreq + _req = _urlreq.Request( + "https://openrouter.ai/api/v1/models", + headers={"Accept": "application/json"}, + ) + with _urlreq.urlopen(_req, timeout=8.0) as _resp: + _payload = json.loads(_resp.read().decode()) + _free_count = 0 + _free_cap = 30 # don't drown the picker — top 30 free tier + for _item in _payload.get("data", []) or []: + if not isinstance(_item, dict): + continue + _mid = str(_item.get("id") or "").strip() + if not _mid or _mid in seen_ids: + continue + _pricing = _item.get("pricing") or {} + try: + _is_free = ( + float(_pricing.get("prompt", "0") or "0") == 0 + and float(_pricing.get("completion", "0") or "0") == 0 + ) + except (TypeError, ValueError): + _is_free = False + # Also include explicit `:free` suffix variants + _is_free = _is_free or _mid.endswith(":free") + if not _is_free: + continue + _name = ( + str(_item.get("name") or "").strip() or _mid + ) + # Strip provider prefix from name for display, append (free) + _label = _name.split("/")[-1] if "/" in _name else _name + if "(free)" not in _label.lower(): + _label = f"{_label} (free)" + seen_ids.add(_mid) + raw_models.append({"id": _mid, "label": _label}) + _free_count += 1 + if _free_count >= _free_cap: + break + except Exception: + logger.debug("OpenRouter free-tier live fetch unavailable; using fallback") + + if not raw_models: + # Both live fetches failed — fall back to the curated static list. + # Deepcopy so dedup/prefix mutation downstream does not bleed + # into the module-level catalog. + raw_models = [ + {"id": m["id"], "label": m["label"]} + for m in _FALLBACK_MODELS + if m.get("provider") == "OpenRouter" + ] + groups.append( { "provider": "OpenRouter", "provider_id": "openrouter", - "models": [ - {"id": m["id"], "label": m["label"]} - for m in _FALLBACK_MODELS - ], + "models": raw_models, } ) elif pid == "ollama-cloud": @@ -2051,16 +3288,224 @@ def get_available_models() -> dict: "models": models, } ) - elif pid in _PROVIDER_MODELS or pid in cfg.get("providers", {}): - raw_models = copy.deepcopy(_PROVIDER_MODELS.get(pid, [])) + elif pid == "openai-codex": + # Codex account catalogs drift faster than WebUI releases + # (for example gpt-5.3-codex-spark in #1680). Ask the + # agent's Codex resolver first so /api/models inherits the + # live Codex API / local ~/.codex cache / static fallback + # chain instead of freezing the picker to WebUI's curated + # _PROVIDER_MODELS snapshot. + raw_models = [] + codex_ids = [] + try: + from hermes_cli.models import provider_model_ids as _provider_model_ids + codex_ids = [mid for mid in (_provider_model_ids("openai-codex") or []) if mid] + except Exception: + logger.warning("Failed to load OpenAI Codex models from hermes_cli") + + for mid in _read_visible_codex_cache_model_ids(): + if mid not in codex_ids: + codex_ids.append(mid) + + raw_models = [ + {"id": mid, "label": _get_label_for_model(mid, [])} + for mid in codex_ids + ] + + if not raw_models: + raw_models = copy.deepcopy(_PROVIDER_MODELS.get("openai-codex", [])) + + if raw_models: + models = _apply_provider_prefix(raw_models, pid, active_provider) + groups.append( + { + "provider": provider_name, + "provider_id": pid, + "models": models, + } + ) + elif pid == "nous": + # Nous Portal exposes a curated catalog (~30 models on most + # accounts, up to several hundred for enterprise tiers) via + # inference-api.nousresearch.com. Like ollama-cloud, we + # live-fetch through hermes_cli.models.provider_model_ids() + # rather than relying on the static four-entry list, which + # chronically drifts out of date (#1538). + # + # When the catalog exceeds _NOUS_FEATURED_THRESHOLD (~25) + # the picker dropdown gets a curated subset to stay + # scannable — the full list is still returned under + # "extra_models" for the slash-command autocomplete and + # the dynamic-label map (#1567). The optgroup label is + # decorated with the truncation count so users know more + # exists. + raw_models = [] + extra_models: list[dict] = [] + truncated_label_suffix = "" + live_fetch_failed = False + try: + from hermes_cli.models import provider_model_ids as _provider_model_ids + + live_ids = _provider_model_ids("nous") or [] + except Exception: + logger.warning("Failed to load Nous Portal models from hermes_cli") + live_ids = [] + live_fetch_failed = True + + if live_ids: + # Sticky-selection signal: prefer the explicitly-active + # model from cfg["model"]["model"] (what the user is + # currently using) over cfg["model"]["default"] (the + # configured default suggestion). Falls back to the + # latter so first-load before any selection still works. + _model_cfg = cfg.get("model", {}) + _selected = ( + (isinstance(_model_cfg, dict) and _model_cfg.get("model")) + or default_model + or None + ) + featured_ids, extras_ids = _build_nous_featured_set( + live_ids, + selected_model_id=_selected, + ) + # Prefix every live id with "@nous:" so routing matches + # the explicit-provider-hint branch of resolve_model_provider + # (same convention as the curated static list — see + # tests/test_nous_portal_routing.py for the invariant). + raw_models = [ + {"id": f"@nous:{mid}", "label": _format_nous_label(mid)} + for mid in featured_ids + ] + extra_models = [ + {"id": f"@nous:{mid}", "label": _format_nous_label(mid)} + for mid in extras_ids + ] + if extras_ids: + # Show "(15 of 397)" so the user understands the picker + # is showing a featured subset, not a broken short list. + truncated_label_suffix = ( + f" ({len(featured_ids)} of {len(live_ids)})" + ) + elif not live_fetch_failed: + # Live-fetch returned an empty list AND did not raise — + # the user is gated as authenticated by detection above + # but the catalog endpoint replied with no models. + # Showing the static 4-entry curated list here would + # contradict the providers card (which always shows + # the live catalog) — exactly the asymmetry #1567 + # reports. Omit the Nous group entirely; the providers + # card already tells the truth, and a transient empty + # response will self-heal on the next cache rebuild. + logger.warning( + "Nous Portal authenticated but live-fetch returned empty — " + "omitting from picker (will retry on next cache rebuild)" + ) + else: + # hermes_cli unavailable / raised — fall back to the + # curated 4-entry static list so the picker is never + # empty in this degraded state. This matches pre-#1538 + # behaviour for environments without hermes_cli (test + # envs, package mismatches, isolated WebUI builds). + raw_models = copy.deepcopy(_PROVIDER_MODELS.get("nous", [])) + + if raw_models: + models = _apply_provider_prefix(raw_models, pid, active_provider) + # Apply the same prefix transform to extras so /model + # autocomplete sees consistent IDs across the two lists. + extras = _apply_provider_prefix(extra_models, pid, active_provider) if extra_models else [] + group_entry = { + "provider": provider_name + truncated_label_suffix, + "provider_id": pid, + "models": models, + } + if extras: + group_entry["extra_models"] = extras + groups.append(group_entry) + elif pid == "lmstudio": + # LM Studio is a local server — fetch live loaded models via + # the OpenAI-compatible /v1/models endpoint (#WebUI). + # + # Two-tier lookup, each in its own try so a failure in one + # does not abort the other (the bug pattern that broke + # tests/test_issue1527_lmstudio_base_url_classification on + # CI environments where hermes_cli isn't importable — + # ImportError in the cli tier was hijacking the whole + # branch and silently skipping the urlopen fallback). + raw_models = [] + lm_ids: list[str] = [] + try: + from hermes_cli.models import provider_model_ids as _provider_model_ids + lm_ids = _provider_model_ids("lmstudio") or [] + except Exception: + logger.debug("hermes_cli LM Studio lookup unavailable; using urlopen fallback") + + if lm_ids: + raw_models = [{"id": mid, "label": mid} for mid in lm_ids] + else: + # Fallback: fetch /models directly from the configured + # base URL. Looks for the URL in either + # `cfg["providers"]["lmstudio"]["base_url"]` or + # `cfg["model"]["base_url"]` (via _get_provider_base_url), + # so the historical model-block config shape still works. + lm_cfg = cfg.get("providers", {}).get("lmstudio", {}) or {} + lm_base_url = _get_provider_base_url("lmstudio") or "" + lm_api_key = str(lm_cfg.get("api_key") or "").strip() if isinstance(lm_cfg, dict) else "" + if lm_base_url: + headers = {"User-Agent": "OpenAI/Python 1.0"} + if lm_api_key: + headers["Authorization"] = f"Bearer {lm_api_key}" + endpoint = (lm_base_url + "/models").rstrip("/") + try: + import urllib.request as _urlreq + req = _urlreq.Request(endpoint, method="GET", headers=headers) + with _urlreq.urlopen(req, timeout=5) as resp: + lm_data = json.loads(resp.read().decode()) + for m in (lm_data.get("data") or []): + if isinstance(m, dict): + mid = str(m.get("id") or "").strip() + if mid and {"id": mid, "label": mid} not in raw_models: + raw_models.append({"id": mid, "label": mid}) + except Exception: + logger.debug("LM Studio /models fetch failed at %s", endpoint) + + if raw_models: + models = _apply_provider_prefix(raw_models, pid, active_provider) + groups.append( + { + "provider": provider_name, + "provider_id": pid, + "models": models, + } + ) + elif pid in _PROVIDER_MODELS or pid in cfg.get("providers", {}): provider_cfg = cfg.get("providers", {}).get(pid, {}) + raw_models = [] + + # User-configured model allowlists are explicit local + # source-of-truth and should still beat auto-discovery. + # Otherwise, ask Hermes CLI first so WebUI tracks the same + # live catalog as the agent/CLI picker; WebUI's static + # _PROVIDER_MODELS table is now a fallback only (#1240). if isinstance(provider_cfg, dict) and "models" in provider_cfg: cfg_models = provider_cfg["models"] if isinstance(cfg_models, dict): raw_models = [{"id": k, "label": k} for k in cfg_models.keys()] elif isinstance(cfg_models, list): raw_models = [{"id": k, "label": k} for k in cfg_models] + + if not raw_models: + raw_models = _models_from_live_provider_ids( + pid, + _read_live_provider_model_ids(pid), + ) + + if not raw_models: + raw_models = copy.deepcopy(_PROVIDER_MODELS.get(pid, [])) + + detected_models = auto_detected_models_by_provider.get(pid, []) + if detected_models and not raw_models: + raw_models = copy.deepcopy(detected_models) models = _apply_provider_prefix(raw_models, pid, active_provider) groups.append( { @@ -2070,7 +3515,24 @@ def get_available_models() -> dict: } ) else: - if auto_detected_models: + detected_models = auto_detected_models_by_provider.get(pid) + if detected_models: + models_for_group = copy.deepcopy(detected_models) + elif auto_detected_models: + # Don't fall back to the global auto_detected_models + # list for the bare "custom" PID when the active + # provider is something concrete (e.g. ai-gateway, + # openrouter). Those auto-detected entries already + # belong to the active provider's group — copying + # them into a Custom group too produces phantom + # duplicates with mismatched prefixes (#1881). + if pid == "custom" and active_provider and active_provider != "custom": + models_for_group = [] + else: + models_for_group = copy.deepcopy(auto_detected_models) + else: + models_for_group = [] + if models_for_group: # Per-group deep copy so subsequent mutation by # _deduplicate_model_ids() (which prefixes ids with # @provider_id:) does not bleed into other groups @@ -2084,7 +3546,7 @@ def get_available_models() -> dict: { "provider": provider_name, "provider_id": pid, - "models": copy.deepcopy(auto_detected_models), + "models": models_for_group, } ) else: @@ -2095,34 +3557,69 @@ def get_available_models() -> dict: ) if default_model: - all_ids_norm = {_norm_model_id(m["id"]) for g in groups for m in g.get("models", [])} - if _norm_model_id(default_model) not in all_ids_norm: - label = _get_label_for_model(default_model, groups) - target_display = ( - _PROVIDER_DISPLAY.get(active_provider, active_provider or "").lower() - if active_provider - else "" + # Guard against provider-id values mistakenly stored in + # ``model.default``. The injection logic below puts ANY string + # into the picker as a fake option, so a stray provider id + # surfaces as a self-referential phantom model labelled e.g. + # ``Opencode GO`` — a 15th entry under the OpenCode Go group + # (#1568). The user's misconfig is real, but the picker is + # the wrong surface to surface it; we'd rather skip injection + # and emit a warning so the underlying config issue is logged. + _looks_like_provider_id = ( + str(default_model).strip().lower().replace("_", "-") in _PROVIDER_DISPLAY + or _canonicalise_provider_id(default_model) in _PROVIDER_DISPLAY + ) + if _looks_like_provider_id: + logger.warning( + "Suspicious model.default value %r — looks like a provider id, " + "not a model id. Skipping picker injection. Check `model.default` " + "in config.yaml.", + default_model, ) - injected = False - for g in groups: - if target_display and g.get("provider", "").lower() == target_display: - g["models"].insert(0, {"id": default_model, "label": label}) - injected = True - break - if not injected and groups: - groups.append( - { - "provider": "Default", - "provider_id": active_provider or "default", - "models": [{"id": default_model, "label": label}], - } + else: + all_ids_norm = {_norm_model_id(m["id"]) for g in groups for m in g.get("models", [])} + if _norm_model_id(default_model) not in all_ids_norm: + label = _get_label_for_model(default_model, groups) + target_display = ( + _PROVIDER_DISPLAY.get(active_provider, active_provider or "").lower() + if active_provider + else "" ) + injected = False + for g in groups: + if target_display and g.get("provider", "").lower() == target_display: + g["models"].insert(0, {"id": default_model, "label": label}) + injected = True + break + if not injected and groups: + groups.append( + { + "provider": "Default", + "provider_id": active_provider or "default", + "models": [{"id": default_model, "label": label}], + } + ) # Post-process: ensure model IDs are globally unique across groups. # When multiple providers expose the same bare model ID, prefix # collisions with @provider_id: so the frontend can distinguish them. _deduplicate_model_ids(groups) + # Defense-in-depth: drop any optgroup that ended up with zero models + # — those are pure UI noise. A zero-model group typically means a + # detection path added an id that has no static catalog AND the + # live-fetch returned empty (#1568 — the user's + # ``providers.opencode_go`` config-key path produced an empty + # ``Opencode_Go`` group at the end of the picker before this fix). + # Custom providers from ``custom_providers`` config are exempt — + # they may legitimately render with zero entries when the user + # hasn't filled in models yet but wants the card visible. + groups = [ + g for g in groups + if g.get("models") + or (g.get("provider_id") or "").startswith("custom:") + ] + return { "active_provider": active_provider, "default_model": default_model, @@ -2168,6 +3665,7 @@ def get_available_models() -> dict: reload_config() _available_models_cache = None _available_models_cache_ts = 0.0 + _available_models_cache_source_fingerprint = None disk_groups = None # Serve from memory cache if fresh @@ -2180,6 +3678,7 @@ def get_available_models() -> dict: if disk_groups is not None: _available_models_cache = disk_groups _available_models_cache_ts = now + _available_models_cache_source_fingerprint = _models_cache_source_fingerprint() _save_models_cache_to_disk(disk_groups) return copy.deepcopy(disk_groups) @@ -2197,6 +3696,7 @@ def get_available_models() -> dict: with _cache_build_cv: _available_models_cache = result _available_models_cache_ts = time.monotonic() + _available_models_cache_source_fingerprint = _models_cache_source_fingerprint() _cache_build_in_progress = False _cache_build_cv.notify_all() _save_models_cache_to_disk(result) @@ -2210,6 +3710,57 @@ _INDEX_HTML_PATH = REPO_ROOT / "static" / "index.html" LOCK = threading.Lock() SESSIONS_MAX = 100 CHAT_LOCK = threading.Lock() + + +class StreamChannel: + """Broadcast SSE events to every connected browser tab for a stream. + + While no tab is connected, events are buffered so the first/reconnected + subscriber still receives the stream tail that arrived during the gap. + Once one or more subscribers are attached, new events are broadcast to all + of them instead of being consumed destructively by a single queue reader. + """ + + def __init__(self): + self._lock = threading.Lock() + self._subscribers: list[queue.Queue] = [] + self._offline_buffer: list[tuple[str, object]] = [] + + def subscribe(self) -> queue.Queue: + q: queue.Queue = queue.Queue() + with self._lock: + # Replay buffered events to the new subscriber INSIDE the lock so a + # concurrent put_nowait() can't broadcast a newer event before we + # finish replaying the older buffered tail. queue.Queue.put_nowait + # is non-blocking on an unbounded queue, so holding the lock here + # is safe. Per Opus advisor on stage-292. + for item in self._offline_buffer: + q.put_nowait(item) + self._subscribers.append(q) + return q + + def unsubscribe(self, q: queue.Queue) -> None: + with self._lock: + try: + self._subscribers.remove(q) + except ValueError: + pass + + def put_nowait(self, item: tuple[str, object]) -> None: + with self._lock: + subscribers = list(self._subscribers) + if not subscribers: + self._offline_buffer.append(item) + return + self._offline_buffer.clear() + for q in subscribers: + q.put_nowait(item) + + +def create_stream_channel() -> StreamChannel: + return StreamChannel() + + STREAMS: dict = {} STREAMS_LOCK = threading.Lock() CANCEL_FLAGS: dict = {} @@ -2217,8 +3768,50 @@ AGENT_INSTANCES: dict = {} # stream_id -> AIAgent instance for interrupt propag STREAM_PARTIAL_TEXT: dict = {} # stream_id -> partial assistant text accumulated during streaming STREAM_REASONING_TEXT: dict = {} # stream_id -> reasoning trace accumulated during streaming (#1361 §A) STREAM_LIVE_TOOL_CALLS: dict = {} # stream_id -> live tool calls accumulated during streaming (#1361 §B) +STREAM_GOAL_RELATED: dict = {} # stream_id -> bool: only evaluate goal for goal-related turns (#1932) +PENDING_GOAL_CONTINUATION: set = set() # session_ids awaiting a goal continuation turn (#1932) + +# Active agent-run registry. This intentionally tracks worker lifecycle rather +# than SSE lifecycle: cancel/reconnect may remove STREAMS while the worker is +# still unwinding, blocked in a provider call, or waiting for delegated work. +ACTIVE_RUNS: dict = {} +ACTIVE_RUNS_LOCK = threading.Lock() +LAST_RUN_FINISHED_AT: float | None = None SERVER_START_TIME = time.time() + +def register_active_run(stream_id: str, **metadata) -> None: + """Mark a WebUI agent worker as alive until its outer finally exits.""" + if not stream_id: + return + now = time.time() + entry = dict(metadata or {}) + entry.setdefault("stream_id", stream_id) + entry.setdefault("started_at", now) + entry.setdefault("phase", "running") + with ACTIVE_RUNS_LOCK: + ACTIVE_RUNS[stream_id] = entry + + +def update_active_run(stream_id: str, **metadata) -> None: + """Update active-run metadata without creating a new run implicitly.""" + if not stream_id: + return + with ACTIVE_RUNS_LOCK: + entry = ACTIVE_RUNS.get(stream_id) + if entry is not None: + entry.update(metadata) + + +def unregister_active_run(stream_id: str) -> None: + """Remove a worker from the active-run registry and record idle start.""" + if not stream_id: + return + global LAST_RUN_FINISHED_AT + with ACTIVE_RUNS_LOCK: + ACTIVE_RUNS.pop(stream_id, None) + LAST_RUN_FINISHED_AT = time.time() + # Agent cache: reuse AIAgent across messages in the same WebUI session so that # _user_turn_count survives between turns. This mirrors the gateway's # _agent_cache pattern and is required for injectionFrequency: "first-turn". @@ -2286,12 +3879,15 @@ _SETTINGS_DEFAULTS = { "onboarding_completed": False, "send_key": "enter", # 'enter' or 'ctrl+enter' "show_token_usage": False, # show input/output token badge below assistant messages + "show_tps": False, # show tokens-per-second chip in assistant message headers "show_cli_sessions": False, # merge CLI sessions from state.db into the sidebar "sync_to_insights": False, # mirror WebUI token usage to state.db for /insights "check_for_updates": True, # check if webui/agent repos are behind upstream "theme": "dark", # light | dark | system "skin": "default", # accent color skin: default | ares | mono | slate | poseidon | sisyphus | charizard "font_size": "default", # small | default | large + "session_jump_buttons": False, # show Start/End transcript jump pills + "session_endless_scroll": False, # auto-load older transcript pages while scrolling upward "language": "en", # UI locale code; must match a key in static/i18n.js LOCALES "bot_name": os.getenv( "HERMES_WEBUI_BOT_NAME", "Hermes" @@ -2411,6 +4007,7 @@ _SETTINGS_ENUM_VALUES = { _SETTINGS_BOOL_KEYS = { "onboarding_completed", "show_token_usage", + "show_tps", "show_cli_sessions", "sync_to_insights", "check_for_updates", @@ -2419,6 +4016,8 @@ _SETTINGS_BOOL_KEYS = { "show_thinking", "simplified_tool_calling", "api_redact_enabled", + "session_jump_buttons", + "session_endless_scroll", } # Language codes are validated as short alphanumeric BCP-47-like tags (e.g. 'en', 'zh', 'fr') _SETTINGS_LANG_RE = __import__("re").compile(r"^[a-zA-Z]{2,10}(-[a-zA-Z0-9]{2,8})?$") diff --git a/api/dashboard_probe.py b/api/dashboard_probe.py new file mode 100644 index 00000000..cc15ef91 --- /dev/null +++ b/api/dashboard_probe.py @@ -0,0 +1,211 @@ +"""Safe server-side probe for the official Hermes Agent dashboard. + +The official `hermes dashboard` binds to 127.0.0.1:9119 by default and exposes +GET /api/status as a public, read-only identity/status endpoint. Keep all +probing server-side to avoid browser CORS/mixed-content failures, and only allow +loopback targets so a user-controlled setting cannot become an SSRF primitive. +""" + +from __future__ import annotations + +import json +import logging +import os +import urllib.request +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + +DEFAULT_DASHBOARD_PORT = 9119 +DEFAULT_DASHBOARD_TIMEOUT = 0.5 +DEFAULT_DASHBOARD_TARGETS = (("127.0.0.1", DEFAULT_DASHBOARD_PORT), ("localhost", DEFAULT_DASHBOARD_PORT)) +_DASHBOARD_ENABLED_VALUES = {"auto", "always", "never"} +_LOOPBACK_HOSTS = {"127.0.0.1", "localhost", "::1"} + + +def _base_url(host: str, port: int, scheme: str = "http") -> str: + display_host = f"[{host}]" if ":" in host and not host.startswith("[") else host + return f"{scheme}://{display_host}:{port}" + + +def normalize_dashboard_url(raw_url: str | None) -> tuple[str, int, str, str] | None: + """Return (host, port, scheme, base_url) for a safe loopback dashboard URL. + + Overrides intentionally accept only scheme + loopback host + explicit port. + Paths, query strings, fragments, and credentials are rejected: the probe + appends the official `/api/status` fingerprint itself and must not become an + arbitrary local URL fetcher. + """ + raw = str(raw_url or "").strip() + if not raw: + return None + parsed = urlparse(raw) + if parsed.scheme not in {"http", "https"}: + raise ValueError("invalid dashboard URL scheme") + if parsed.username or parsed.password: + raise ValueError("invalid dashboard URL credentials") + host = parsed.hostname or "" + normalized_host = host.strip().lower() + if normalized_host not in _LOOPBACK_HOSTS: + raise ValueError("invalid dashboard URL host") + try: + port = parsed.port + except ValueError as exc: + raise ValueError("invalid dashboard URL port") from exc + if not isinstance(port, int) or not (1 <= port <= 65535): + raise ValueError("invalid dashboard URL port") + path = parsed.path or "" + if path not in ("", "/") or parsed.params or parsed.query or parsed.fragment: + raise ValueError("invalid dashboard URL path") + base = _base_url(normalized_host, port, parsed.scheme) + return normalized_host, port, parsed.scheme, base + + +def _looks_like_official_dashboard(payload: object) -> bool: + if not isinstance(payload, dict): + return False + version = payload.get("version") + if not isinstance(version, str) or not version.strip(): + return False + # Verified against current Hermes Agent `hermes_cli.web_server.get_status()`: + # /api/status returns version plus these Hermes-specific fields. Requiring at + # least one avoids treating any generic {version: ...} local service as the + # official dashboard. + return any(key in payload for key in ("release_date", "hermes_home", "config_path", "gateway_running")) + + +def probe_official_dashboard( + host: str, + port: int, + timeout: float = DEFAULT_DASHBOARD_TIMEOUT, + scheme: str = "http", +) -> dict: + """Best-effort check that `hermes dashboard` is running on host:port.""" + try: + normalized_host = str(host or "").strip().lower() + if normalized_host not in _LOOPBACK_HOSTS: + raise ValueError("dashboard probe host must be loopback") + port = int(port) + if not (1 <= port <= 65535): + raise ValueError("dashboard probe port out of range") + if scheme not in {"http", "https"}: + raise ValueError("dashboard probe scheme must be http or https") + base = _base_url(normalized_host, port, scheme) + request = urllib.request.Request( + f"{base}/api/status", + headers={"Accept": "application/json", "User-Agent": "hermes-webui-dashboard-probe"}, + ) + with urllib.request.urlopen(request, timeout=timeout) as response: + if getattr(response, "status", None) != 200: + return {"running": False} + payload = json.loads(response.read().decode("utf-8")) + if not _looks_like_official_dashboard(payload): + return {"running": False} + result = {"running": True, "host": normalized_host, "port": port, "url": base} + version = payload.get("version") + if isinstance(version, str) and version.strip(): + result["version"] = version.strip() + return result + except Exception: + logger.debug("official Hermes dashboard probe failed", exc_info=True) + return {"running": False} + + +def _dashboard_config(config_data: dict | None = None) -> dict: + if config_data is None: + try: + from api.config import get_config + + config_data = get_config() + except Exception: + config_data = {} + webui_cfg = config_data.get("webui", {}) if isinstance(config_data, dict) else {} + dashboard_cfg = webui_cfg.get("dashboard", {}) if isinstance(webui_cfg, dict) else {} + return dashboard_cfg if isinstance(dashboard_cfg, dict) else {} + + +def get_dashboard_config(config_data: dict | None = None) -> dict: + """Return normalized profile config for the Settings → System controls.""" + dashboard_cfg = _dashboard_config(config_data) + enabled = str(dashboard_cfg.get("enabled", "auto") or "auto").strip().lower() + if enabled not in _DASHBOARD_ENABLED_VALUES: + enabled = "auto" + raw_url = str(dashboard_cfg.get("url") or "").strip() + if raw_url: + # Normalize before echoing so the UI never displays unsafe/stale values. + _host, _port, _scheme, raw_url = normalize_dashboard_url(raw_url) + return {"enabled": enabled, "url": raw_url} + + +def save_dashboard_config(payload: dict) -> dict: + """Persist dashboard link settings under webui.dashboard in config.yaml.""" + enabled = str((payload or {}).get("enabled", "auto") or "auto").strip().lower() + if enabled not in _DASHBOARD_ENABLED_VALUES: + raise ValueError("invalid dashboard enabled mode") + raw_url = str((payload or {}).get("url", "") or "").strip() + normalized_url = "" + if raw_url: + _host, _port, _scheme, normalized_url = normalize_dashboard_url(raw_url) + + from api import config as webui_config + + config_path = webui_config._get_config_path() + config_data = webui_config._load_yaml_config_file(config_path) + webui_section = config_data.get("webui") + if not isinstance(webui_section, dict): + webui_section = {} + config_data["webui"] = webui_section + dashboard_section = webui_section.get("dashboard") + if not isinstance(dashboard_section, dict): + dashboard_section = {} + webui_section["dashboard"] = dashboard_section + dashboard_section["enabled"] = enabled + if normalized_url: + dashboard_section["url"] = normalized_url + else: + dashboard_section.pop("url", None) + webui_config._save_yaml_config_file(config_path, config_data) + webui_config.reload_config() + return {"enabled": enabled, "url": normalized_url} + + +def _webui_bind_host_allows_auto_probe() -> bool: + raw_host = str(os.environ.get("HERMES_WEBUI_HOST") or "127.0.0.1").strip().lower() + host = raw_host.replace("[", "").replace("]", "") + return host in _LOOPBACK_HOSTS + + +def get_dashboard_status(config_data: dict | None = None) -> dict: + """Return the safe status payload consumed by GET /api/dashboard/status.""" + dashboard_cfg = _dashboard_config(config_data) + enabled = str(dashboard_cfg.get("enabled", "auto") or "auto").strip().lower() + if enabled not in _DASHBOARD_ENABLED_VALUES: + enabled = "auto" + if enabled == "never": + return {"running": False, "enabled": "never"} + + raw_url = dashboard_cfg.get("url") or dashboard_cfg.get("target") or "" + try: + override = normalize_dashboard_url(raw_url) + except ValueError: + return {"running": False, "enabled": enabled, "error": "invalid dashboard url"} + + targets: list[tuple[str, int, str, str]] + if override: + targets = [override] + else: + targets = [(host, port, "http", _base_url(host, port)) for host, port in DEFAULT_DASHBOARD_TARGETS] + + if enabled == "always": + host, port, scheme, base = targets[0] + return {"running": True, "enabled": enabled, "host": host, "port": port, "url": base} + + if not _webui_bind_host_allows_auto_probe(): + return {"running": False, "enabled": enabled} + + for host, port, scheme, _base in targets: + result = probe_official_dashboard(host, port, timeout=DEFAULT_DASHBOARD_TIMEOUT, scheme=scheme) + if result.get("running"): + result["enabled"] = enabled + return result + return {"running": False, "enabled": enabled} diff --git a/api/goals.py b/api/goals.py new file mode 100644 index 00000000..3e4e23ea --- /dev/null +++ b/api/goals.py @@ -0,0 +1,608 @@ +"""WebUI bridge for Hermes persistent session goals.""" + +from __future__ import annotations + +import copy +import logging +import re +import time +from pathlib import Path +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + +try: # Exposed as a module attribute so tests can monkeypatch it directly. + from hermes_cli.goals import ( # type: ignore + CONTINUATION_PROMPT_TEMPLATE, + DEFAULT_MAX_TURNS, + GoalManager as _NativeGoalManager, + GoalState, + judge_goal, + ) +except Exception: # pragma: no cover - depends on installed hermes-agent + CONTINUATION_PROMPT_TEMPLATE = "" # type: ignore + DEFAULT_MAX_TURNS = 20 # type: ignore + _NativeGoalManager = None # type: ignore + GoalState = None # type: ignore + judge_goal = None # type: ignore + +GoalManager = _NativeGoalManager # type: ignore + +_DB_CACHE: dict[str, Any] = {} + + +def _default_max_turns() -> int: + """Return the configured /goal turn budget, defaulting to Hermes' 20 turns.""" + try: + from api import config as _config + + cfg = getattr(_config, "cfg", {}) or {} + goals_cfg = cfg.get("goals", {}) if isinstance(cfg, dict) else {} + if not isinstance(goals_cfg, dict): + return int(DEFAULT_MAX_TURNS or 20) + return max(1, int(goals_cfg.get("max_turns", DEFAULT_MAX_TURNS or 20) or 20)) + except Exception: + return int(DEFAULT_MAX_TURNS or 20) + + +def _meta_key(session_id: str) -> str: + return f"goal:{session_id}" + + +def _profile_db(profile_home: str | Path): + """Return a SessionDB pinned to *profile_home*, without reading HERMES_HOME. + + The upstream Hermes GoalManager persists through hermes_cli.goals.load_goal(), + which resolves SessionDB from process-global HERMES_HOME. WebUI sessions are + profile-scoped and can run concurrently, so the WebUI bridge uses an explicit + state.db path whenever the caller provides the session's profile home. + """ + home = Path(profile_home).expanduser().resolve() + key = str(home) + cached = _DB_CACHE.get(key) + if cached is not None: + return cached + try: + from hermes_state import SessionDB # type: ignore + + db = SessionDB(db_path=home / "state.db") + except Exception as exc: # pragma: no cover - import/env dependent + logger.debug("GoalManager profile DB unavailable for %s: %s", home, exc) + return None + _DB_CACHE[key] = db + return db + + +class _ProfileGoalManager: + """Small WebUI-local GoalManager adapter with explicit profile persistence.""" + + def __init__(self, session_id: str, *, profile_home: str | Path, default_max_turns: int = 20): + if GoalState is None: + raise RuntimeError("Hermes goal state unavailable") + self.session_id = session_id + self.profile_home = Path(profile_home).expanduser().resolve() + self.default_max_turns = int(default_max_turns or DEFAULT_MAX_TURNS or 20) + self._state = self._load() + + @property + def state(self): + return self._state + + def _load(self): + db = _profile_db(self.profile_home) + if db is None or not self.session_id: + return None + try: + raw = db.get_meta(_meta_key(self.session_id)) + except Exception as exc: + logger.debug("GoalManager profile get_meta failed: %s", exc) + return None + if not raw: + return None + try: + return GoalState.from_json(raw) # type: ignore[union-attr] + except Exception as exc: + logger.warning("GoalManager profile state parse failed for %s: %s", self.session_id, exc) + return None + + def _save(self, state) -> None: + db = _profile_db(self.profile_home) + if db is None or not self.session_id or state is None: + return + try: + db.set_meta(_meta_key(self.session_id), state.to_json()) + except Exception as exc: + logger.debug("GoalManager profile set_meta failed: %s", exc) + + def is_active(self) -> bool: + return self._state is not None and self._state.status == "active" + + def has_goal(self) -> bool: + return self._state is not None and self._state.status in ("active", "paused") + + def status_line(self) -> str: + s = self._state + if s is None or s.status in ("cleared",): + return "No active goal. Set one with /goal ." + turns = f"{s.turns_used}/{s.max_turns} turns" + if s.status == "active": + return f"⊙ Goal (active, {turns}): {s.goal}" + if s.status == "paused": + extra = f" — {s.paused_reason}" if s.paused_reason else "" + return f"⏸ Goal (paused, {turns}{extra}): {s.goal}" + if s.status == "done": + return f"✓ Goal done ({turns}): {s.goal}" + return f"Goal ({s.status}, {turns}): {s.goal}" + + def set(self, goal: str, *, max_turns: Optional[int] = None): + goal = (goal or "").strip() + if not goal: + raise ValueError("goal text is empty") + state = GoalState( # type: ignore[operator] + goal=goal, + status="active", + turns_used=0, + max_turns=int(max_turns) if max_turns else self.default_max_turns, + created_at=time.time(), + last_turn_at=0.0, + ) + self._state = state + self._save(state) + return state + + def pause(self, reason: str = "user-paused"): + if not self._state: + return None + self._state.status = "paused" + self._state.paused_reason = reason + self._save(self._state) + return self._state + + def resume(self, *, reset_budget: bool = True): + if not self._state: + return None + self._state.status = "active" + self._state.paused_reason = None + if reset_budget: + self._state.turns_used = 0 + self._save(self._state) + return self._state + + def clear(self) -> None: + if self._state is None: + return + self._state.status = "cleared" + self._save(self._state) + self._state = None + + def evaluate_after_turn(self, last_response: str, *, user_initiated: bool = True) -> Dict[str, Any]: + state = self._state + if state is None or state.status != "active": + return { + "status": state.status if state else None, + "should_continue": False, + "continuation_prompt": None, + "verdict": "inactive", + "reason": "no active goal", + "message": "", + } + + state.turns_used += 1 + state.last_turn_at = time.time() + + if judge_goal is None: + verdict, reason = "continue", "goal judge unavailable" + else: + verdict, reason = judge_goal(state.goal, str(last_response or "")) + state.last_verdict = verdict + state.last_reason = reason + + if verdict == "done": + state.status = "done" + self._save(state) + return { + "status": "done", + "should_continue": False, + "continuation_prompt": None, + "verdict": "done", + "reason": reason, + "message": f"✓ Goal achieved: {reason}", + } + + if state.turns_used >= state.max_turns: + state.status = "paused" + state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})" + self._save(state) + return { + "status": "paused", + "should_continue": False, + "continuation_prompt": None, + "verdict": "continue", + "reason": reason, + "message": ( + f"⏸ Goal paused — {state.turns_used}/{state.max_turns} turns used. " + "Use /goal resume to keep going, or /goal clear to stop." + ), + } + + self._save(state) + return { + "status": "active", + "should_continue": True, + "continuation_prompt": self.next_continuation_prompt(), + "verdict": "continue", + "reason": reason, + "message": f"↻ Continuing toward goal ({state.turns_used}/{state.max_turns}): {reason}", + } + + def next_continuation_prompt(self) -> Optional[str]: + if not self._state or self._state.status != "active": + return None + return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal) + + +def _manager(session_id: str, *, profile_home: str | Path | None = None): + if GoalManager is None: + return None + if profile_home and GoalManager is _NativeGoalManager and GoalState is not None: + try: + return _ProfileGoalManager( + session_id=session_id, + profile_home=profile_home, + default_max_turns=_default_max_turns(), + ) + except Exception as exc: + logger.debug("Profile-scoped GoalManager unavailable: %s", exc) + return None + return GoalManager(session_id=session_id, default_max_turns=_default_max_turns()) + + +def _state_payload(state: Any) -> Optional[Dict[str, Any]]: + if state is None: + return None + return { + "goal": getattr(state, "goal", "") or "", + "status": getattr(state, "status", "") or "", + "turns_used": int(getattr(state, "turns_used", 0) or 0), + "max_turns": int(getattr(state, "max_turns", 0) or 0), + "last_verdict": getattr(state, "last_verdict", None), + "last_reason": getattr(state, "last_reason", None), + "paused_reason": getattr(state, "paused_reason", None), + } + + +def _payload( + *, + ok: bool = True, + action: str, + message: str, + state: Any = None, + error: str | None = None, + kickoff_prompt: str | None = None, + decision: Dict[str, Any] | None = None, + message_key: str | None = None, + message_args: list[Any] | None = None, +) -> Dict[str, Any]: + body: Dict[str, Any] = { + "ok": bool(ok), + "action": action, + "message": message, + "goal": _state_payload(state), + } + if error: + body["error"] = error + if kickoff_prompt: + body["kickoff_prompt"] = kickoff_prompt + if decision is not None: + body["decision"] = decision + if message_key: + body["message_key"] = message_key + if message_args is not None: + body["message_args"] = [a for a in message_args if a is not None] + return body + + +def _goal_status_payload(state: Any, *, default_message: str | None = None) -> Dict[str, Any]: + """Build localized-status style payload fields from a goal state.""" + if default_message is None: + default_message = "No active goal. Set one with /goal ." + if state is None: + return {"message": default_message, "message_key": "goal_status_none"} + status = str(getattr(state, "status", "") or "").strip() + if status in ("cleared",): + return {"message": default_message, "message_key": "goal_status_none"} + turns_used = int(getattr(state, "turns_used", 0) or 0) + max_turns = int(getattr(state, "max_turns", 0) or 0) + goal = str(getattr(state, "goal", "") or "") + if status == "active": + return { + "message": f"⊙ Goal (active, {turns_used}/{max_turns} turns): {goal}", + "message_key": "goal_status_active", + "message_args": [turns_used, max_turns, goal], + } + if status == "paused": + reason = str(getattr(state, "paused_reason", "") or "") + return { + "message": f"⏸ Goal (paused, {turns_used}/{max_turns}{' — ' + reason if reason else ''}): {goal}", + "message_key": "goal_status_paused", + "message_args": [turns_used, max_turns, reason, goal], + } + if status == "done": + return { + "message": f"✓ Goal done ({turns_used}/{max_turns}): {goal}", + "message_key": "goal_status_done", + "message_args": [turns_used, max_turns, goal], + } + return { + "message": f"Goal ({status}, {turns_used}/{max_turns}): {goal}", + "message_args": [status, turns_used, max_turns, goal], + } + + +def _extract_goal_turns_from_message(message: str) -> tuple[int, int]: + """Best-effort extraction for continuation messages like '(1/20)'.""" + if not message: + return 0, 0 + match = re.search(r"\((\d+)\s*/\s*(\d+)\)", message) + if not match: + return 0, 0 + try: + return int(match.group(1)), int(match.group(2)) + except Exception: + return 0, 0 + + +def _goal_decision_payload( + decision: Dict[str, Any], + state: Any, +) -> Dict[str, Any]: + """Attach goal message i18n key/args to an evaluation decision.""" + if not isinstance(decision, dict): + return decision + status = str(decision.get("status") or "").strip() + reason = str(decision.get("reason") or "").strip() + turns_used = int(getattr(state, "turns_used", 0) or 0) + max_turns = int(getattr(state, "max_turns", 0) or 0) + if (turns_used, max_turns) == (0, 0): + turns_used, max_turns = _extract_goal_turns_from_message(str(decision.get("message") or "")) + + if status == "done": + return { + **decision, + "message_key": "goal_achieved", + "message_args": [reason], + } + if status == "paused": + return { + **decision, + "message_key": "goal_paused_budget_exhausted", + "message_args": [turns_used, max_turns], + } + if decision.get("should_continue"): + return { + **decision, + "message_key": "goal_continuing", + "message_args": [turns_used, max_turns, reason], + } + return decision + + +def goal_state_snapshot(session_id: str, *, profile_home: str | Path | None = None) -> Any: + """Return a deep copy of current goal state for rollback before kickoff.""" + mgr = _manager(str(session_id or ""), profile_home=profile_home) + if mgr is None: + return None + return copy.deepcopy(getattr(mgr, "state", None)) + + +def restore_goal_state(session_id: str, snapshot: Any, *, profile_home: str | Path | None = None) -> None: + """Restore a prior goal state after kickoff stream creation fails.""" + mgr = _manager(str(session_id or ""), profile_home=profile_home) + if mgr is None: + return + if snapshot is None: + try: + mgr.clear() + except Exception: + pass + return + if isinstance(mgr, _ProfileGoalManager): + mgr._state = snapshot + mgr._save(snapshot) + return + try: + from hermes_cli.goals import save_goal # type: ignore + + save_goal(str(session_id or ""), snapshot) + except Exception as exc: # pragma: no cover - native fallback only + logger.debug("Goal state restore failed for %s: %s", session_id, exc) + + +def goal_command_payload( + session_id: str, + args: str = "", + *, + stream_running: bool = False, + profile_home: str | Path | None = None, +) -> Dict[str, Any]: + """Return the WebUI response payload for a /goal command. + + Mirrors the gateway command semantics: + - /goal or /goal status shows status + - /goal pause pauses + - /goal resume resumes without auto-starting a turn + - /goal clear|stop|done clears + - /goal sets a new active goal and returns kickoff_prompt so the + caller can start the first normal user-role turn immediately. + """ + sid = str(session_id or "").strip() + if not sid: + return _payload(ok=False, action="error", error="missing_session", message="session_id required") + + mgr = _manager(sid, profile_home=profile_home) + if mgr is None: + return _payload(ok=False, action="error", error="unavailable", message="Goals unavailable on this session.") + + text = str(args or "").strip() + lower = text.lower() + + if not text or lower == "status": + state = getattr(mgr, "state", None) + status_payload = _goal_status_payload(state) + return _payload(action="status", state=state, **status_payload) + + if lower == "pause": + state = mgr.pause(reason="user-paused") + if state is None: + return _payload( + ok=False, + action="pause", + error="no_goal", + message="No goal set.", + message_key="goal_no_goal", + ) + return _payload( + action="pause", + message=f"⏸ Goal paused: {state.goal}", + message_key="goal_paused", + message_args=[str(state.goal)], + state=state, + ) + + if lower == "resume": + state = mgr.resume() + if state is None: + return _payload( + ok=False, + action="resume", + error="no_goal", + message="No goal to resume.", + message_key="goal_no_goal", + ) + return _payload( + action="resume", + message=( + f"▶ Goal resumed: {state.goal}\n" + "Send a new message, or type continue, to kick it off." + ), + message_key="goal_resumed", + message_args=[str(state.goal)], + state=state, + ) + + if lower in ("clear", "stop", "done"): + had = bool(mgr.has_goal()) + mgr.clear() + return _payload( + action="clear", + message="Goal cleared." if had else "No active goal.", + message_key="goal_cleared" if had else "goal_no_goal", + state=getattr(mgr, "state", None), + ) + + if stream_running: + return _payload( + ok=False, + action="set", + error="agent_running", + message=( + "Agent is running — use /goal status / pause / clear mid-run, " + "or /stop before setting a new goal." + ), + ) + + try: + state = mgr.set(text) + except ValueError as exc: + return _payload(ok=False, action="set", error="invalid_goal", message=f"Invalid goal: {exc}") + + return _payload( + action="set", + message=( + f"⊙ Goal set ({state.max_turns}-turn budget): {state.goal}\n" + "I'll keep working until the goal is done, you pause/clear it, or the budget is exhausted.\n" + "Controls: /goal status · /goal pause · /goal resume · /goal clear" + ), + message_key="goal_set", + message_args=[state.max_turns, state.goal], + state=state, + kickoff_prompt=state.goal, + ) + + +def has_active_goal( + session_id: str, + *, + profile_home: str | Path | None = None, +) -> bool: + """Return True when the session has an active standing goal to evaluate.""" + sid = str(session_id or "").strip() + if not sid: + return False + mgr = _manager(sid, profile_home=profile_home) + if mgr is None: + return False + try: + return bool(mgr.is_active()) + except Exception as exc: + logger.debug("goal active-state check failed for session=%s: %s", sid, exc) + return False + + +def evaluate_goal_after_turn( + session_id: str, + last_response: str, + *, + user_initiated: bool = True, + profile_home: str | Path | None = None, +) -> Dict[str, Any]: + """Evaluate a completed turn against the standing goal, if any.""" + sid = str(session_id or "").strip() + if not sid: + return { + "status": None, + "should_continue": False, + "continuation_prompt": None, + "verdict": "inactive", + "reason": "missing session_id", + "message": "", + } + mgr = _manager(sid, profile_home=profile_home) + if mgr is None: + return { + "status": None, + "should_continue": False, + "continuation_prompt": None, + "verdict": "inactive", + "reason": "goals unavailable", + "message": "", + } + try: + if not mgr.is_active(): + return { + "status": getattr(getattr(mgr, "state", None), "status", None), + "should_continue": False, + "continuation_prompt": None, + "verdict": "inactive", + "reason": "no active goal", + "message": "", + } + decision = mgr.evaluate_after_turn(str(last_response or ""), user_initiated=user_initiated) + except Exception as exc: + logger.debug("goal evaluation failed for session=%s: %s", sid, exc) + return { + "status": None, + "should_continue": False, + "continuation_prompt": None, + "verdict": "error", + "reason": f"goal evaluation failed: {type(exc).__name__}", + "message": "", + } + if not isinstance(decision, dict): + decision = {} + decision.setdefault("should_continue", False) + decision.setdefault("continuation_prompt", None) + decision.setdefault("message", "") + decision = dict(decision) + decision = _goal_decision_payload(decision, getattr(mgr, "state", None)) + return decision diff --git a/api/helpers.py b/api/helpers.py index f6c8b584..7cf010c7 100644 --- a/api/helpers.py +++ b/api/helpers.py @@ -2,6 +2,7 @@ Hermes Web UI -- HTTP helper functions. """ import json as _json +import os import re as _re from pathlib import Path from api.config import IMAGE_EXTS, MD_EXTS @@ -45,7 +46,7 @@ def _security_headers(handler): "default-src 'self' https://*.cloudflareaccess.com; " "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://static.cloudflareinsights.com; " "style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net https://fonts.googleapis.com; " - "img-src 'self' data: https: blob:; font-src 'self' data: https://cdn.jsdelivr.net https://fonts.gstatic.com; connect-src 'self'; " + "img-src 'self' data: https: blob:; font-src 'self' data: https://cdn.jsdelivr.net https://fonts.gstatic.com; connect-src 'self' https://cdn.jsdelivr.net; " "manifest-src 'self' https://*.cloudflareaccess.com; " "base-uri 'self'; form-action 'self'" ) @@ -252,8 +253,13 @@ def read_body(handler) -> dict: PROFILE_COOKIE_NAME = 'hermes_profile' +def get_profile_cookie_name() -> str: + """Return the cookie name used to persist the active WebUI profile.""" + return os.getenv('WEBUI_PROFILE_COOKIE_NAME', PROFILE_COOKIE_NAME) + + def get_profile_cookie(handler) -> str | None: - """Extract the hermes_profile cookie value from the request, or None.""" + """Extract the active-profile cookie value from the request, or None.""" cookie_header = handler.headers.get('Cookie', '') if not cookie_header: return None @@ -263,7 +269,8 @@ def get_profile_cookie(handler) -> str | None: cookie.load(cookie_header) except _hc.CookieError: return None - morsel = cookie.get(PROFILE_COOKIE_NAME) + cookie_name = get_profile_cookie_name() + morsel = cookie.get(cookie_name) if morsel and morsel.value: # Validate against profile-name pattern before trusting from api.profiles import _PROFILE_ID_RE @@ -274,7 +281,7 @@ def get_profile_cookie(handler) -> str | None: def build_profile_cookie(name: str) -> str: - """Build a Set-Cookie header value for the hermes_profile cookie. + """Build a Set-Cookie header value for the active-profile cookie. Always persist the selected profile in the cookie, including 'default'. Clearing the cookie causes the backend to fall back to process-global @@ -287,8 +294,9 @@ def build_profile_cookie(name: str) -> str: """ import http.cookies as _hc cookie = _hc.SimpleCookie() - cookie[PROFILE_COOKIE_NAME] = name - cookie[PROFILE_COOKIE_NAME]['path'] = '/' - cookie[PROFILE_COOKIE_NAME]['httponly'] = True - cookie[PROFILE_COOKIE_NAME]['samesite'] = 'Lax' - return cookie[PROFILE_COOKIE_NAME].OutputString() + cookie_name = get_profile_cookie_name() + cookie[cookie_name] = name + cookie[cookie_name]['path'] = '/' + cookie[cookie_name]['httponly'] = True + cookie[cookie_name]['samesite'] = 'Lax' + return cookie[cookie_name].OutputString() diff --git a/api/kanban_bridge.py b/api/kanban_bridge.py new file mode 100644 index 00000000..63bef9cd --- /dev/null +++ b/api/kanban_bridge.py @@ -0,0 +1,1255 @@ +"""Hermes Kanban bridge for the WebUI. + +This module exposes a full CRUD API under ``/api/kanban/*`` while keeping +Hermes Agent's ``hermes_cli.kanban_db`` as the only source of truth. + +Supported operations: +- Task CRUD (create, read, patch, bulk update, archive) +- Multi-board management (list, create, archive, switch) +- Task dependency links (create, delete) +- SSE live event stream for real-time updates +- Comments and worker dispatch integration +""" + +from __future__ import annotations + +import json +import time +from dataclasses import asdict, is_dataclass +from urllib.parse import parse_qs, unquote + +from api.helpers import bad, j + +BOARD_COLUMNS = ["triage", "todo", "ready", "running", "blocked", "done"] +_TASK_PREFIX = "/api/kanban/tasks/" + + +def _kb(): + from hermes_cli import kanban_db as kb + + return kb + + +def _resolve_board(parsed): + """Validate and normalise a ?board= query param. + + Returns the normalised slug, or ``None`` when the caller omitted the + param. Raises ValueError on a malformed slug so the bridge surfaces a + clean 400 instead of a 500 from deeper in the library. + """ + raw = (parse_qs(parsed.query or "").get("board") or [None])[0] + return _normalise_board_or_raise(raw) + + +def _resolve_board_from_body(body): + """Same contract as :func:`_resolve_board` but reads ``board`` from a + parsed JSON body (POST / PATCH / DELETE handlers receive a dict, not + a parsed URL). Returns ``None`` when the body did not specify a board. + """ + if not isinstance(body, dict): + return None + raw = body.get("board") + if raw is None or (isinstance(raw, str) and raw.strip() == ""): + return None + return _normalise_board_or_raise(raw) + + +def _normalise_board_or_raise(raw): + """Shared normalisation + existence check for board slugs.""" + if raw is None or (isinstance(raw, str) and raw.strip() == ""): + return None + kb = _kb() + try: + normed = kb._normalize_board_slug(raw) + except (ValueError, AttributeError) as exc: + raise ValueError(f"invalid board slug: {raw!r}") from exc + if not normed: + return None + # Allow the default board even if it has not been materialised yet + # (kb.init_db will create it lazily). For non-default boards, require + # the directory exists or _conn would fail with a confusing OperationalError. + try: + default_slug = getattr(kb, "DEFAULT_BOARD", "default") + except Exception: + default_slug = "default" + if normed != default_slug and not kb.board_exists(normed): + raise LookupError(f"board {normed!r} does not exist") + return normed + + +def _conn(board=None): + kb = _kb() + kb.init_db(board=board) + return kb.connect(board=board) + + +def _obj_dict(value): + if value is None: + return None + if is_dataclass(value): + return asdict(value) + if isinstance(value, dict): + return dict(value) + return dict(getattr(value, "__dict__", {})) + + +def _task_dict(task): + data = _obj_dict(task) + if not data: + return data + try: + age = _kb().task_age(task) + except Exception: + age = None + data["age_seconds"] = age + data["age"] = age + data.setdefault("progress", None) + return data + + +def _latest_event_id(conn) -> int: + try: + row = conn.execute("SELECT COALESCE(MAX(id), 0) AS latest FROM task_events").fetchone() + return int(row["latest"] or 0) + except Exception: + return 0 + + +def _bool_query(parsed, name: str, default: bool = False) -> bool: + raw = (parse_qs(parsed.query or "").get(name) or [None])[0] + if raw is None: + return default + return str(raw).strip().lower() in {"1", "true", "yes", "on"} + + +def _str_query(parsed, name: str): + raw = (parse_qs(parsed.query or "").get(name) or [None])[0] + return str(raw).strip() or None if raw is not None else None + + +def _int_query(parsed, name: str, default=None, *, minimum=None, maximum=None): + raw = _str_query(parsed, name) + if raw is None: + return default + try: + value = int(raw) + except (TypeError, ValueError): + return default + if minimum is not None: + value = max(minimum, value) + if maximum is not None: + value = min(maximum, value) + return value + + +def _task_link_counts(conn, tasks): + counts = {task.id: {"parents": 0, "children": 0} for task in tasks} + try: + rows = conn.execute("SELECT parent_id, child_id FROM task_links").fetchall() + except Exception: + return counts + for row in rows: + counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})["children"] += 1 + counts.setdefault(row["child_id"], {"parents": 0, "children": 0})["parents"] += 1 + return counts + + +def _comment_counts(conn): + try: + rows = conn.execute( + "SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id" + ).fetchall() + except Exception: + return {} + return {row["task_id"]: int(row["n"] or 0) for row in rows} + + +def _board_payload(parsed): + board = _resolve_board(parsed) + kb = _kb() + tenant = _str_query(parsed, "tenant") + assignee = _str_query(parsed, "assignee") + include_archived = _bool_query(parsed, "include_archived", False) + only_mine = _bool_query(parsed, "only_mine", False) + since = _int_query(parsed, "since", None, minimum=0) + profile = None + if only_mine and not assignee: + try: + from api.profiles import get_active_profile_name + + profile = get_active_profile_name() or "default" + except Exception: + profile = "default" + assignee = profile + + with _conn(board=board) as conn: + latest_event_id = _latest_event_id(conn) + if since is not None and since >= latest_event_id: + return {"changed": False, "latest_event_id": latest_event_id, "read_only": False} + + tasks = kb.list_tasks( + conn, + tenant=tenant, + assignee=assignee, + include_archived=include_archived, + ) + link_counts = _task_link_counts(conn, tasks) + comment_counts = _comment_counts(conn) + + def row(task): + data = _task_dict(task) + data["link_counts"] = link_counts.get(task.id, {"parents": 0, "children": 0}) + data["comment_count"] = comment_counts.get(task.id, 0) + return data + + columns = [ + {"name": name, "tasks": [row(task) for task in tasks if task.status == name]} + for name in BOARD_COLUMNS + ] + if include_archived: + columns.append({ + "name": "archived", + "tasks": [row(task) for task in tasks if task.status == "archived"], + }) + return { + "columns": columns, + "tenants": sorted({task.tenant for task in tasks if getattr(task, "tenant", None)}), + "assignees": sorted({task.assignee for task in tasks if getattr(task, "assignee", None)}), + "latest_event_id": latest_event_id, + "changed": True, + "read_only": False, + "filters": { + "tenant": tenant, + "assignee": assignee, + "include_archived": include_archived, + "only_mine": only_mine, + "profile": profile, + }, + } + + + +def _validate_status(status: str) -> str: + value = str(status or "").strip().lower() + allowed = set(BOARD_COLUMNS) | {"archived"} + if value not in allowed: + raise ValueError(f"invalid status: {value}") + return value + + +def _set_status_direct(conn, task_id: str, new_status: str) -> bool: + """Direct status write for drag-drop moves not covered by structured verbs. + + Used for ``todo <-> ready`` and ``running -> ready`` transitions. The + structured verbs (``complete_task``, ``block_task``, ``unblock_task``, + ``archive_task``, ``claim_task``) own their own state changes; this helper + handles the remainder while preserving the dispatcher's contract: + + - When transitioning OFF ``running`` to anything other than the terminal + verbs, claim_lock / claim_expires / worker_pid are nulled so the + dispatcher doesn't see a phantom-running task. The active run (if any) + is closed with ``outcome='reclaimed'`` so attempt history isn't + orphaned. + - When transitioning INTO ``running``, claim fields are preserved (this + function is NOT used for entering 'running' — that goes through + ``kb.claim_task()`` and the bridge rejects raw 'running' status writes + with HTTP 400). + + Mirrors the agent dashboard plugin's ``_set_status_direct`` + (plugins/kanban/dashboard/plugin_api.py) so first-party clients see + identical behaviour from either surface. + """ + kb = _kb() + with kb.write_txn(conn): + prev = conn.execute( + "SELECT status, current_run_id FROM tasks WHERE id = ?", + (task_id,), + ).fetchone() + if prev is None: + return False + was_running = prev["status"] == "running" + cur = conn.execute( + "UPDATE tasks SET status = ?, " + " claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, " + " claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, " + " worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END " + "WHERE id = ?", + (new_status, new_status, new_status, new_status, task_id), + ) + if cur.rowcount != 1: + return False + run_id = None + if was_running and new_status != "running" and prev["current_run_id"]: + try: + run_id = kb._end_run( + conn, task_id, + outcome="reclaimed", status="reclaimed", + summary=f"status changed to {new_status} (webui/direct)", + ) + except Exception: + # _end_run is best-effort here; the status flip itself is + # what matters for sidebar rendering. + run_id = None + conn.execute( + "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) " + "VALUES (?, ?, 'status', ?, ?)", + (task_id, run_id, json.dumps({"status": new_status, "source": "webui"}), int(time.time())), + ) + if new_status in ("done", "ready") and hasattr(kb, "recompute_ready"): + try: + kb.recompute_ready(conn) + except Exception: + pass + return True + + +def _create_task_payload(body: dict, *, board=None): + title = str(body.get("title") or "").strip() + if not title: + raise ValueError("title is required") + try: + priority = int(body.get("priority") or 0) + except (TypeError, ValueError): + raise ValueError("priority must be an integer") + kb = _kb() + requested_status = body.get("status") + with _conn(board=board) as conn: + task_id = kb.create_task( + conn, + title=title, + body=body.get("body") or None, + assignee=body.get("assignee") or None, + created_by=body.get("created_by") or "webui", + tenant=body.get("tenant") or None, + priority=priority, + parents=body.get("parents") or (), + triage=bool(body.get("triage") or False), + workspace_kind=body.get("workspace_kind") or "scratch", + workspace_path=body.get("workspace_path") or None, + idempotency_key=body.get("idempotency_key") or None, + max_runtime_seconds=body.get("max_runtime_seconds") or None, + skills=body.get("skills") or None, + ) + if requested_status: + _patch_task(conn, task_id, {"status": requested_status}) + return {"task": _task_dict(kb.get_task(conn, task_id)), "read_only": False} + + +def _patch_task(conn, task_id: str, body: dict): + kb = _kb() + task = kb.get_task(conn, task_id) + if not task: + raise LookupError("task not found") + + updates = {} + if "title" in body: + title = str(body.get("title") or "").strip() + if not title: + raise ValueError("title is required") + updates["title"] = title + if "body" in body: + updates["body"] = body.get("body") or None + if "tenant" in body: + updates["tenant"] = body.get("tenant") or None + if "priority" in body: + try: + updates["priority"] = int(body.get("priority") or 0) + except (TypeError, ValueError): + raise ValueError("priority must be an integer") + + for field, value in updates.items(): + if hasattr(task, field): + try: + setattr(task, field, value) + except Exception: + pass + if updates: + assignments = ", ".join(f"{field} = ?" for field in updates) + conn.execute(f"UPDATE tasks SET {assignments} WHERE id = ?", [*updates.values(), task_id]) + if hasattr(kb, "_append_event"): + kb._append_event(conn, task_id, "updated", {"fields": list(updates), "source": "webui"}) + + if "assignee" in body: + if not kb.assign_task(conn, task_id, body.get("assignee") or None): + raise LookupError("task not found") + + if "status" not in body or body.get("status") in (None, ""): + return + status = _validate_status(body.get("status")) + if status == "done": + if not kb.complete_task(conn, task_id, result=body.get("result"), summary=body.get("summary")): + raise LookupError("task not found") + elif status == "blocked": + if not kb.block_task(conn, task_id, reason=body.get("block_reason") or body.get("reason")): + raise LookupError("task not found") + elif status == "archived": + if not kb.archive_task(conn, task_id): + raise LookupError("task not found") + elif status == "running": + # The 'running' state is owned by the kanban dispatcher / claim + # protocol — entering it via raw UPDATE bypasses claim_lock, + # claim_expires, started_at, and worker_pid, which leaves the task + # in a state the dispatcher treats as "phantom claimed" and may + # reclaim or hide. Match the agent dashboard plugin's contract + # (plugins/kanban/dashboard/plugin_api.py update_task) by rejecting + # this transition with HTTP 400. Workers enter 'running' via + # kb.claim_task(); UI users should use the dispatcher nudge. + raise ValueError( + "Cannot set status to 'running' directly; use the dispatcher/claim path" + ) + elif status == "ready": + # If the task is currently 'blocked', use the structured unblock + # verb so the unblocked event fires. Otherwise it's a legitimate + # drag-drop or click move (e.g. todo → ready, running → ready when + # the user yanks a stuck worker back to the queue) and we use the + # claim-aware direct status write. + current = kb.get_task(conn, task_id) + if not current: + raise LookupError("task not found") + if current.status == "blocked": + if not kb.unblock_task(conn, task_id): + raise LookupError("task not found") + else: + if not _set_status_direct(conn, task_id, "ready"): + raise LookupError("task not found") + elif status in ("triage", "todo"): + # Direct status write for drag-drop moves between non-running, + # non-terminal columns. Uses the claim-aware helper that nulls out + # claim_lock / claim_expires / worker_pid when leaving 'running' + # and ends any active run with outcome='reclaimed'. + if not _set_status_direct(conn, task_id, status): + raise LookupError("task not found") + else: + # _validate_status guarantees we never reach here, but be defensive. + raise ValueError(f"unknown status: {status}") + + +def _patch_task_payload(task_id: str, body: dict, *, board=None): + task_id = str(task_id or "").strip() + if not task_id: + raise ValueError("task_id is required") + kb = _kb() + with _conn(board=board) as conn: + _patch_task(conn, task_id, body) + return {"task": _task_dict(kb.get_task(conn, task_id)), "read_only": False} + + +def _comment_payload(task_id: str, body: dict, *, board=None): + task_id = str(task_id or "").strip() + comment_body = str(body.get("body") or "").strip() + if not task_id: + raise ValueError("task_id is required") + if not comment_body: + raise ValueError("body is required") + kb = _kb() + with _conn(board=board) as conn: + if not kb.get_task(conn, task_id): + raise LookupError("task not found") + comment_id = kb.add_comment(conn, task_id, body.get("author") or "webui", comment_body) + return {"ok": True, "comment_id": comment_id, "read_only": False} + + +def _link_tasks_payload(body: dict, *, unlink: bool = False, board=None): + parent_id = str(body.get("parent_id") or "").strip() + child_id = str(body.get("child_id") or "").strip() + if not parent_id or not child_id: + raise ValueError("parent_id and child_id are required") + kb = _kb() + with _conn(board=board) as conn: + if not kb.get_task(conn, parent_id): + raise LookupError("parent task not found") + if not kb.get_task(conn, child_id): + raise LookupError("child task not found") + if unlink: + changed = kb.unlink_tasks(conn, parent_id, child_id) + return {"ok": True, "changed": bool(changed), "parent_id": parent_id, "child_id": child_id, "read_only": False} + kb.link_tasks(conn, parent_id, child_id) + return {"ok": True, "parent_id": parent_id, "child_id": child_id, "read_only": False} + +def _links_for(conn, task_id: str) -> dict: + kb = _kb() + return { + "parents": kb.parent_ids(conn, task_id), + "children": kb.child_ids(conn, task_id), + } + + +def _task_detail_payload(task_id: str, *, board=None): + kb = _kb() + with _conn(board=board) as conn: + task = kb.get_task(conn, task_id) + if not task: + return None + return { + "task": _task_dict(task), + "comments": [_obj_dict(c) for c in kb.list_comments(conn, task_id)], + "events": [_obj_dict(e) for e in kb.list_events(conn, task_id)], + "links": _links_for(conn, task_id), + "runs": [_obj_dict(r) for r in kb.list_runs(conn, task_id)], + "read_only": False, + } + + +def _events_payload(parsed): + board = _resolve_board(parsed) + since = _int_query(parsed, "since", 0, minimum=0) + limit = _int_query(parsed, "limit", 200, minimum=1, maximum=200) + with _conn(board=board) as conn: + rows = conn.execute( + "SELECT id, task_id, run_id, kind, payload, created_at " + "FROM task_events WHERE id > ? ORDER BY id ASC LIMIT ?", + (since, limit), + ).fetchall() + events = [] + cursor = since + for row in rows: + try: + payload = json.loads(row["payload"]) if row["payload"] else None + except Exception: + payload = None + events.append({ + "id": row["id"], + "task_id": row["task_id"], + "run_id": row["run_id"], + "kind": row["kind"], + "payload": payload, + "created_at": row["created_at"], + }) + cursor = int(row["id"]) + latest = _latest_event_id(conn) + if not events: + cursor = latest if since >= latest else since + return {"events": events, "cursor": cursor, "latest_event_id": cursor, "read_only": False} + + +def _config_payload(*, board=None): + kb = _kb() + try: + with _conn(board=board) as conn: + try: + assignees = list(kb.known_assignees(conn)) + except Exception: + assignees = [] + except Exception: + assignees = [] + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + except Exception: + cfg = {} + k_cfg = ((cfg.get("dashboard") or {}).get("kanban") or {}) + return { + "columns": BOARD_COLUMNS, + "assignees": assignees, + "default_tenant": k_cfg.get("default_tenant") or "", + "lane_by_profile": bool(k_cfg.get("lane_by_profile", True)), + "include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)), + "render_markdown": bool(k_cfg.get("render_markdown", True)), + "read_only": False, + } + + +def _stats_payload(*, board=None): + kb = _kb() + with _conn(board=board) as conn: + if hasattr(kb, "board_stats"): + return kb.board_stats(conn) + rows = conn.execute( + "SELECT status, assignee, COUNT(*) AS n FROM tasks WHERE status != 'archived' GROUP BY status, assignee" + ).fetchall() + by_status = {} + by_assignee = {} + for row in rows: + n = int(row["n"] or 0) + by_status[row["status"]] = by_status.get(row["status"], 0) + n + assignee = row["assignee"] or "unassigned" + by_assignee[assignee] = by_assignee.get(assignee, 0) + n + return {"by_status": by_status, "by_assignee": by_assignee} + + +def _assignees_payload(*, board=None): + kb = _kb() + with _conn(board=board) as conn: + try: + assignees = list(kb.known_assignees(conn)) + except Exception: + rows = conn.execute( + "SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL AND assignee != '' ORDER BY assignee" + ).fetchall() + assignees = [row["assignee"] for row in rows] + return {"assignees": assignees} + + +def _task_log_payload(parsed, task_id: str): + board = _resolve_board(parsed) + kb = _kb() + tail = _int_query(parsed, "tail", None, minimum=1, maximum=2_000_000) + with _conn(board=board) as conn: + if not kb.get_task(conn, task_id): + return None + if not hasattr(kb, "read_worker_log"): + return {"task_id": task_id, "path": "", "exists": False, "size_bytes": 0, "content": "", "truncated": False} + content = kb.read_worker_log(task_id, tail_bytes=tail) + log_path = kb.worker_log_path(task_id) if hasattr(kb, "worker_log_path") else None + try: + size = log_path.stat().st_size if log_path and log_path.exists() else 0 + except OSError: + size = 0 + return { + "task_id": task_id, + "path": str(log_path or ""), + "exists": content is not None, + "size_bytes": size, + "content": content or "", + "truncated": bool(tail and size > tail), + } + + +def _bulk_tasks_payload(body: dict, *, board=None): + ids = [str(i).strip() for i in (body.get("ids") or []) if str(i).strip()] + if not ids: + raise ValueError("ids is required") + results = [] + kb = _kb() + with _conn(board=board) as conn: + for task_id in ids: + entry = {"id": task_id, "ok": True} + try: + if not kb.get_task(conn, task_id): + entry.update(ok=False, error="not found") + results.append(entry) + continue + if body.get("archive"): + if not kb.archive_task(conn, task_id): + entry.update(ok=False, error="archive refused") + elif body.get("status") is not None: + _patch_task(conn, task_id, {"status": body.get("status")}) + if body.get("assignee") is not None: + if not kb.assign_task(conn, task_id, body.get("assignee") or None): + entry.update(ok=False, error="assign refused") + if body.get("priority") is not None: + try: + priority = int(body.get("priority")) + except (TypeError, ValueError): + entry.update(ok=False, error="priority must be an integer") + else: + conn.execute("UPDATE tasks SET priority = ? WHERE id = ?", (priority, task_id)) + if hasattr(kb, "_append_event"): + kb._append_event(conn, task_id, "reprioritized", {"priority": priority, "source": "webui"}) + except Exception as exc: + entry.update(ok=False, error=str(exc)) + results.append(entry) + return {"results": results, "read_only": False} + + +def _dispatch_payload(parsed): + board = _resolve_board(parsed) + kb = _kb() + dry_run = _bool_query(parsed, "dry_run", False) + max_spawn = _int_query(parsed, "max", 8, minimum=1, maximum=100) + if not hasattr(kb, "dispatch_once"): + raise ValueError("dispatcher is unavailable") + with _conn(board=board) as conn: + result = kb.dispatch_once(conn, dry_run=dry_run, max_spawn=max_spawn) + if isinstance(result, dict): + return result + try: + return asdict(result) + except TypeError: + return {"result": str(result)} + + +def _task_action_payload(task_id: str, body: dict, action: str, *, board=None): + kb = _kb() + task_id = str(task_id or "").strip() + if not task_id: + raise ValueError("task_id is required") + with _conn(board=board) as conn: + if not kb.get_task(conn, task_id): + raise LookupError("task not found") + if action == "block": + ok = kb.block_task(conn, task_id, reason=body.get("reason") or body.get("block_reason")) + elif action == "unblock": + if hasattr(kb, "unblock_task"): + ok = kb.unblock_task(conn, task_id) + else: + _patch_task(conn, task_id, {"status": "ready"}) + ok = True + else: + raise ValueError(f"invalid action: {action}") + if not ok: + raise RuntimeError(f"{action} refused") + return {"task": _task_dict(kb.get_task(conn, task_id)), "read_only": False} + + +# --------------------------------------------------------------------------- +# Multi-board management +# --------------------------------------------------------------------------- +# These endpoints operate on the on-disk board collection itself rather than +# on the tasks of a single board. They mirror the agent dashboard plugin's +# /boards surface (plugins/kanban/dashboard/plugin_api.py) so that the +# CLI / gateway / dashboard / WebUI all share the same active-board pointer. + +def _board_meta_dict(meta): + """Coerce the library's board metadata dict into a JSON-serialisable + form. ``list_boards`` returns dicts with Path values for ``directory``; + json.dumps would refuse those without help.""" + if not isinstance(meta, dict): + return meta + out = dict(meta) + for key in ("directory", "db_path", "path"): + if key in out and out[key] is not None: + out[key] = str(out[key]) + return out + + +def _board_counts_for_slug(slug): + """Per-status task counts for a board, used to populate the board + switcher with a live "12 tasks" badge. Mirrors the agent dashboard's + ``_board_counts`` helper. Returns an empty dict for boards whose + sqlite file has not been materialized yet (freshly-created boards + with no tasks).""" + kb = _kb() + if not kb.board_exists(slug): + return {} + try: + conn = kb.connect(board=slug) + except Exception: + return {} + try: + rows = conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks " + "WHERE status != 'archived' GROUP BY status" + ).fetchall() + return {row["status"]: int(row["n"] or 0) for row in rows} + except Exception: + return {} + finally: + try: + conn.close() + except Exception: + pass + + +def _list_boards_payload(parsed): + """GET /api/kanban/boards — return all boards on disk + active slug. + + Each entry includes per-status counts and an ``is_current`` flag so the + UI can render the switcher in a single round-trip. + """ + kb = _kb() + include_archived = _bool_query(parsed, "include_archived", False) + boards = kb.list_boards(include_archived=include_archived) + try: + current = kb.get_current_board() + except Exception: + current = "default" + visible_slugs = {(_board_meta_dict(meta).get("slug")) for meta in boards} + default_slug = getattr(kb, "DEFAULT_BOARD", "default") + if current not in visible_slugs: + # The on-disk active-board pointer can outlive an archived/deleted board + # when another CLI/WebUI process removes it. Surface a valid current + # board instead of letting the frontend pin every subsequent request to + # a ghost slug and fail with an opaque 404. + try: + kb.clear_current_board() + except Exception: + pass + current = default_slug + out = [] + for raw_meta in boards: + meta = _board_meta_dict(raw_meta) + slug = meta.get("slug") + if slug is None: + continue + meta["is_current"] = (slug == current) + meta["counts"] = _board_counts_for_slug(slug) + meta["total"] = sum(meta["counts"].values()) if meta["counts"] else 0 + out.append(meta) + return {"boards": out, "current": current, "read_only": False} + + +def _create_board_payload(body): + """POST /api/kanban/boards — create a new board. + + Body fields: ``slug`` (required), ``name``, ``description``, ``icon``, + ``color``, ``switch`` (bool — set as active after creation, default false). + Idempotent on slug — repeating returns the existing board metadata. + """ + kb = _kb() + if not isinstance(body, dict): + raise ValueError("body must be a JSON object") + slug = str(body.get("slug") or "").strip() + if not slug: + raise ValueError("slug is required") + try: + meta = kb.create_board( + slug, + name=body.get("name") or None, + description=body.get("description") or None, + icon=body.get("icon") or None, + color=body.get("color") or None, + ) + except (ValueError, AttributeError) as exc: + raise ValueError(str(exc)) from exc + if body.get("switch"): + try: + kb.set_current_board(meta["slug"]) + except (ValueError, AttributeError) as exc: + raise ValueError(str(exc)) from exc + try: + current = kb.get_current_board() + except Exception: + current = "default" + return {"board": _board_meta_dict(meta), "current": current, "read_only": False} + + +def _update_board_payload(slug, body): + """PATCH /api/kanban/boards/ — update a board's display metadata. + + The slug itself is immutable (changing it would mean moving the on-disk + directory and re-pointing every saved active-board cookie). Only + ``name``, ``description``, ``icon``, ``color``, and ``archived`` are + mutable here; the slug travels in the URL path. + """ + kb = _kb() + if not isinstance(body, dict): + raise ValueError("body must be a JSON object") + try: + normed = kb._normalize_board_slug(slug) + except (ValueError, AttributeError) as exc: + raise ValueError(f"invalid board slug: {slug!r}") from exc + if not normed or not kb.board_exists(normed): + raise LookupError(f"board {slug!r} does not exist") + archived = body.get("archived") + if isinstance(archived, str): + archived = archived.strip().lower() in {"1", "true", "yes", "on"} + meta = kb.write_board_metadata( + normed, + name=body.get("name"), + description=body.get("description"), + icon=body.get("icon"), + color=body.get("color"), + archived=archived if isinstance(archived, bool) else None, + ) + return {"board": _board_meta_dict(meta), "read_only": False} + + +def _delete_board_payload(slug, parsed): + """DELETE /api/kanban/boards/ — archive (default) or hard-delete. + + ``?delete=1`` is required to actually remove on-disk artefacts; without + it the board is just marked archived in its metadata and remains + enumerable via ``?include_archived=1`` on /boards. + """ + kb = _kb() + hard_delete = _bool_query(parsed, "delete", False) + try: + normed = kb._normalize_board_slug(slug) + except (ValueError, AttributeError) as exc: + raise ValueError(f"invalid board slug: {slug!r}") from exc + if not normed or not kb.board_exists(normed): + raise LookupError(f"board {slug!r} does not exist") + # Refuse to delete the default board — that would leave the system + # without a fallback active board on next CLI / dashboard call. + try: + default_slug = getattr(kb, "DEFAULT_BOARD", "default") + except Exception: + default_slug = "default" + if normed == default_slug: + raise ValueError("cannot remove the default board") + res = kb.remove_board(normed, archive=not hard_delete) + try: + current = kb.get_current_board() + except Exception: + current = "default" + # If we just removed the active board, the library auto-falls-back to + # default on the next get_current_board() — surface that explicitly so + # the UI can re-fetch /board on the new active slug. + return { + "result": _board_meta_dict(res) if isinstance(res, dict) else res, + "current": current, + "read_only": False, + } + + +def _switch_board_payload(slug): + """POST /api/kanban/boards//switch — set this board as active. + + The active-board pointer is stored on disk under ``/kanban/current`` + and is shared by the CLI, gateway, dashboard, and WebUI — switching + here switches everywhere. The UI also keeps a localStorage hint so + that opening a fresh tab doesn't always have to round-trip to discover + the active slug, but the on-disk pointer is the source of truth. + """ + kb = _kb() + try: + normed = kb._normalize_board_slug(slug) + except (ValueError, AttributeError) as exc: + raise ValueError(f"invalid board slug: {slug!r}") from exc + if not normed or not kb.board_exists(normed): + raise LookupError(f"board {slug!r} does not exist") + kb.set_current_board(normed) + return {"current": normed, "read_only": False} + + +# --------------------------------------------------------------------------- +# SSE event stream +# --------------------------------------------------------------------------- +# Server-Sent Events let the UI react to task transitions in real time +# without the 30s HTTP polling tax. The agent dashboard uses WebSockets +# for the same purpose; we use SSE because the WebUI's existing transport +# is a synchronous BaseHTTPServer and SSE is the right tool for +# unidirectional server-pushed event streams. The wire-level UX is +# identical from the client's perspective: events arrive within ~300ms +# of being committed to task_events. + +# Polling interval matches the agent dashboard's _EVENT_POLL_SECONDS so +# write-to-receive latency is identical between the two surfaces. +_KANBAN_SSE_POLL_SECONDS = 0.3 +# Heartbeat keeps proxies/CDNs from reaping the connection on idle boards. +# Identical to the approval/clarify SSE heartbeat. +_KANBAN_SSE_HEARTBEAT_SECONDS = 15.0 +# Hard cap on a single SSE batch so a board with thousands of historical +# events doesn't ship them all in one frame. Same as the dashboard. +_KANBAN_SSE_BATCH_LIMIT = 200 + + +def _kanban_sse_fetch_new(board, cursor): + """Read events with id > cursor from the given board's task_events + table. Returns ``(new_cursor, events_list)``. Best-effort — returns + the input cursor and an empty list on any DB error so the SSE loop + self-heals on transient sqlite contention rather than dropping the + client.""" + kb = _kb() + # Guard against a board that's been archived/removed mid-stream: + # kb.connect(board=) auto-materialises the directory + DB on + # first call, which would silently un-archive a board that was just + # removed. Skip the fetch when the board no longer exists. + if board is not None: + try: + default_slug = getattr(kb, "DEFAULT_BOARD", "default") + except Exception: + default_slug = "default" + if board != default_slug and not kb.board_exists(board): + return cursor, [] + try: + conn = kb.connect(board=board) + except Exception: + return cursor, [] + try: + rows = conn.execute( + "SELECT id, task_id, run_id, kind, payload, created_at " + "FROM task_events WHERE id > ? ORDER BY id ASC LIMIT ?", + (int(cursor), _KANBAN_SSE_BATCH_LIMIT), + ).fetchall() + except Exception: + return cursor, [] + finally: + try: + conn.close() + except Exception: + pass + out = [] + new_cursor = cursor + for r in rows: + payload = None + try: + raw = r["payload"] + if raw: + payload = json.loads(raw) + except Exception: + payload = None + out.append({ + "id": int(r["id"]), + "task_id": r["task_id"], + "run_id": r["run_id"], + "kind": r["kind"], + "payload": payload, + "created_at": int(r["created_at"]) if r["created_at"] is not None else None, + }) + new_cursor = int(r["id"]) + return new_cursor, out + + +def _handle_events_sse_stream(handler, parsed): + """GET /api/kanban/events/stream — long-lived SSE feed of task events. + + Query params: + since= Resume from this event id. Defaults to 0 (full backlog + on first connect — the client should pass the latest + id it knows about so it does not re-receive historical + events.) Capped to the most recent _KANBAN_SSE_BATCH_LIMIT. + board= Pin the stream to a specific board. Switching boards + requires the client to close and re-open the stream. + + Header (set automatically by EventSource on reconnect): + Last-Event-ID Fallback resume cursor when ?since= is absent. The + server emits ``id: `` on every events frame + so the browser can resume cleanly across drops without + re-receiving up to _KANBAN_SSE_BATCH_LIMIT events the + client already has. + + Mirrors the agent dashboard's WebSocket /events contract event-for-event + so a client that handles one can handle the other with only the + transport swapped. + """ + try: + board = _resolve_board(parsed) + except (ValueError, LookupError) as exc: + return bad(handler, str(exc), status=400 if isinstance(exc, ValueError) else 404) + + qs = parse_qs(parsed.query or "") + # Resolution chain: ?since= query param → Last-Event-ID header → 0. + # The Last-Event-ID header is what EventSource sends automatically on + # reconnect; honouring it lets the browser resume cleanly without the + # client needing to track the cursor in JS. + since_raw = (qs.get("since") or [None])[0] + if since_raw is None: + try: + since_raw = handler.headers.get("Last-Event-ID") + except Exception: + since_raw = None + try: + cursor = int(since_raw) if since_raw is not None else 0 + except (TypeError, ValueError): + cursor = 0 + if cursor < 0: + cursor = 0 + + handler.send_response(200) + handler.send_header("Content-Type", "text/event-stream; charset=utf-8") + handler.send_header("Cache-Control", "no-cache") + handler.send_header("X-Accel-Buffering", "no") + handler.send_header("Connection", "keep-alive") + handler.end_headers() + + # Send an initial frame so the client knows the connection is open + # and learns the current cursor (in case the server already had a + # backlog when the client first connected). + try: + handler.wfile.write( + f"event: hello\ndata: {json.dumps({'cursor': cursor, 'board': board})}\n\n".encode("utf-8") + ) + handler.wfile.flush() + except (BrokenPipeError, ConnectionResetError, ValueError, OSError): + return True + + last_heartbeat = time.monotonic() + try: + while True: + cursor, events = _kanban_sse_fetch_new(board, cursor) + if events: + # Emit `id: ` on every events frame so the + # browser sets Last-Event-ID on auto-reconnect, letting us + # resume from there without re-streaming the backlog. + payload = json.dumps({"events": events, "cursor": cursor}) + frame = ( + f"id: {cursor}\nevent: events\ndata: {payload}\n\n" + ).encode("utf-8") + try: + handler.wfile.write(frame) + handler.wfile.flush() + except (BrokenPipeError, ConnectionResetError, ValueError, OSError): + return True + last_heartbeat = time.monotonic() + else: + # Heartbeat keeps reverse proxies and the browser from + # closing an idle stream. SSE comments (lines starting + # with `:`) are ignored by EventSource. + if (time.monotonic() - last_heartbeat) >= _KANBAN_SSE_HEARTBEAT_SECONDS: + try: + handler.wfile.write(b": keepalive\n\n") + handler.wfile.flush() + except (BrokenPipeError, ConnectionResetError, ValueError, OSError): + return True + last_heartbeat = time.monotonic() + time.sleep(_KANBAN_SSE_POLL_SECONDS) + except Exception: + # Any other unexpected exception in the SSE loop should not bubble + # up to the request handler (which would 500 a long-lived stream). + return True + + +def handle_kanban_get(handler, parsed) -> bool | None: + """Dispatch a Kanban GET. Three-valued return: + + - ``False`` — no Kanban path matched; caller should emit a 404 + (``_kanban_unknown_endpoint``) for genuinely stale-bundle requests. + - ``None`` — a path matched and the inner handler already sent a + response via ``bad(...)`` / ``j(...)`` (which both return ``None``). + The caller MUST NOT emit another response. + - ``True`` — a path matched and the inner handler succeeded. + + Treat any falsy-but-not-False return (``0``, ``''``, etc.) as a bug and + audit the new return path; the caller uses ``is False`` identity check + to distinguish unmatched paths from already-responded paths (#1843). + """ + path = parsed.path + try: + # Multi-board management endpoints — these do NOT take a board arg + # because they operate on the on-disk board collection itself, not + # on a single board's tasks. + if path == "/api/kanban/boards": + return j(handler, _list_boards_payload(parsed)) or True + if path == "/api/kanban/board": + return j(handler, _board_payload(parsed)) or True + if path == "/api/kanban/config": + return j(handler, _config_payload(board=_resolve_board(parsed))) or True + if path == "/api/kanban/stats": + return j(handler, _stats_payload(board=_resolve_board(parsed))) or True + if path == "/api/kanban/assignees": + return j(handler, _assignees_payload(board=_resolve_board(parsed))) or True + if path == "/api/kanban/events": + return j(handler, _events_payload(parsed)) or True + if path == "/api/kanban/events/stream": + return _handle_events_sse_stream(handler, parsed) + if path.startswith(_TASK_PREFIX) and path.endswith("/log"): + task_id = unquote(path[len(_TASK_PREFIX):-len("/log")]).strip("/") + if not task_id or "/" in task_id: + return False + payload = _task_log_payload(parsed, task_id) + if payload is None: + return bad(handler, "task not found", status=404) + return j(handler, payload) or True + if path.startswith(_TASK_PREFIX): + task_id = unquote(path[len(_TASK_PREFIX):]).strip("/") + if not task_id or "/" in task_id: + return False + payload = _task_detail_payload(task_id, board=_resolve_board(parsed)) + if payload is None: + return bad(handler, "task not found", status=404) + return j(handler, payload) or True + return False + except ImportError as exc: + # hermes_cli not installed (webui-only deploy). Return a clean 503 + # "kanban unavailable" rather than a 500 so the frontend's existing + # try/catch surfaces a useful toast. + return bad(handler, f"kanban unavailable: {exc}", status=503) + except LookupError as exc: + return bad(handler, str(exc), status=404) + except ValueError as exc: + return bad(handler, str(exc)) + except RuntimeError as exc: + return bad(handler, str(exc), status=409) + + +def handle_kanban_post(handler, parsed, body) -> bool | None: + """Dispatch a Kanban POST. See ``handle_kanban_get`` for the + three-valued ``True | None | False`` contract (#1843).""" + path = parsed.path + try: + # Multi-board management endpoints — `_create_board_payload` and + # `_switch_board_payload` operate on the on-disk board collection, + # not on a single board's tasks. + if path == "/api/kanban/boards": + return j(handler, _create_board_payload(body)) or True + # POST /api/kanban/boards//switch — set active board + _BOARDS_PREFIX = "/api/kanban/boards/" + if path.startswith(_BOARDS_PREFIX) and path.endswith("/switch"): + slug = unquote(path[len(_BOARDS_PREFIX):-len("/switch")]).strip("/") + if not slug or "/" in slug: + return False + return j(handler, _switch_board_payload(slug)) or True + # All board-scoped writes accept a ?board= query param OR a + # `board` field in the JSON body. Query takes precedence. + board_q = _resolve_board(parsed) + board_b = _resolve_board_from_body(body) + board = board_q if board_q is not None else board_b + if path == "/api/kanban/dispatch": + return j(handler, _dispatch_payload(parsed)) or True + if path == "/api/kanban/tasks/bulk": + return j(handler, _bulk_tasks_payload(body, board=board)) or True + if path == "/api/kanban/tasks": + return j(handler, _create_task_payload(body, board=board)) or True + if path == "/api/kanban/links": + return j(handler, _link_tasks_payload(body, board=board)) or True + if path == "/api/kanban/links/delete": + return j(handler, _link_tasks_payload(body, unlink=True, board=board)) or True + if path.startswith(_TASK_PREFIX) and path.endswith("/comments"): + task_id = path[len(_TASK_PREFIX):-len("/comments")].strip("/") + return j(handler, _comment_payload(task_id, body, board=board)) or True + for suffix, action in (("/block", "block"), ("/unblock", "unblock")): + if path.startswith(_TASK_PREFIX) and path.endswith(suffix): + task_id = path[len(_TASK_PREFIX):-len(suffix)].strip("/") + return j(handler, _task_action_payload(task_id, body, action, board=board)) or True + if path.startswith(_TASK_PREFIX) and path.endswith("/patch"): + task_id = path[len(_TASK_PREFIX):-len("/patch")].strip("/") + return j(handler, _patch_task_payload(task_id, body, board=board)) or True + except ImportError as exc: + return bad(handler, f"kanban unavailable: {exc}", status=503) + except LookupError as exc: + return bad(handler, str(exc), status=404) + except ValueError as exc: + return bad(handler, str(exc)) + except RuntimeError as exc: + return bad(handler, str(exc), status=409) + return False + + +def handle_kanban_patch(handler, parsed, body) -> bool | None: + """Dispatch a Kanban PATCH. See ``handle_kanban_get`` for the + three-valued ``True | None | False`` contract (#1843).""" + path = parsed.path + try: + # /boards/ routes operate on the on-disk board collection + # itself — the slug travels in the URL path, not via ?board=. Match + # them BEFORE resolving the board param so a stray ?board=ghost in + # the query string doesn't 404 the legitimate `experiments` rename. + # (Mirrors handle_kanban_post's structure — fixes asymmetry caught + # by Opus advisor.) + _BOARDS_PREFIX = "/api/kanban/boards/" + if path.startswith(_BOARDS_PREFIX): + slug = unquote(path[len(_BOARDS_PREFIX):]).strip("/") + if not slug or "/" in slug: + return False + return j(handler, _update_board_payload(slug, body)) or True + # Task-scoped writes accept ?board= (or body.board) to pin the + # write to a specific board. Query takes precedence over body. + board_q = _resolve_board(parsed) + board_b = _resolve_board_from_body(body) + board = board_q if board_q is not None else board_b + if path.startswith(_TASK_PREFIX): + task_id = unquote(path[len(_TASK_PREFIX):]).strip("/") + if not task_id or "/" in task_id: + return False + return j(handler, _patch_task_payload(task_id, body, board=board)) or True + except ImportError as exc: + return bad(handler, f"kanban unavailable: {exc}", status=503) + except LookupError as exc: + return bad(handler, str(exc), status=404) + except ValueError as exc: + return bad(handler, str(exc)) + except RuntimeError as exc: + return bad(handler, str(exc), status=409) + return False + + +def handle_kanban_delete(handler, parsed, body) -> bool | None: + """Dispatch a Kanban DELETE. See ``handle_kanban_get`` for the + three-valued ``True | None | False`` contract (#1843).""" + path = parsed.path + try: + # Same routing reorder as PATCH: /boards/ path-routed first, + # so a stray ?board=ghost can't 404 a legitimate board archive. + _BOARDS_PREFIX = "/api/kanban/boards/" + if path.startswith(_BOARDS_PREFIX): + slug = unquote(path[len(_BOARDS_PREFIX):]).strip("/") + if not slug or "/" in slug: + return False + return j(handler, _delete_board_payload(slug, parsed)) or True + board_q = _resolve_board(parsed) + board_b = _resolve_board_from_body(body) + board = board_q if board_q is not None else board_b + if path == "/api/kanban/links": + return j(handler, _link_tasks_payload(body, unlink=True, board=board)) or True + except ImportError as exc: + return bad(handler, f"kanban unavailable: {exc}", status=503) + except LookupError as exc: + return bad(handler, str(exc), status=404) + except ValueError as exc: + return bad(handler, str(exc)) + except RuntimeError as exc: + return bad(handler, str(exc), status=409) + return False diff --git a/api/metering.py b/api/metering.py index 6edf2961..c4696d24 100644 --- a/api/metering.py +++ b/api/metering.py @@ -1,17 +1,17 @@ """ Hermes Web UI -- Streaming performance metering. -Tracks Tokens Per Second (TPS) across all active WebUI sessions, and the -HIGH/LOW TPS values observed over the past 60 minutes. Metering data is -emitted via SSE events so the header label can update live during a stream. +Tracks Tokens Per Second (TPS) across active WebUI streams. Metering data is +emitted via SSE events so a streaming assistant message can update its own +header while the turn is running. Architecture ──────────── -Each streaming session is tracked independently. TPS per session is: +Each streaming session is tracked independently. TPS per stream is: - session_tps = total_tokens / (last_token_ts - first_token_ts) + stream_tps = total_stream_deltas / (last_delta_ts - first_delta_ts) -The global tps is the average of all currently active sessions' TPS values. +The global tps is the average of all currently active streams' TPS values. This correctly represents the system's real-time capacity regardless of how many sessions are running or how long each has been streaming. @@ -19,8 +19,8 @@ For HIGH/LOW tracking, every stats snapshot records the current global tps (only when > 0 — idle periods are skipped) into a rolling 60-minute history. The max/min of that history gives the peak throughput observed over the past hour. -The ticker in streaming.py calls get_interval() — it returns 1.0 when sessions -are actively receiving tokens so the header updates at 1 Hz, and 10.0 when idle +The ticker in streaming.py calls get_interval() — it returns 1.0 when streams +are actively receiving output deltas so message headers update at 1 Hz, and 10.0 when idle so the ticker exits and no idle readings are emitted. Usage from api/streaming.py @@ -28,15 +28,17 @@ Usage from api/streaming.py from api.metering import meter meter().begin_session(stream_id) # stream starts - meter().record_token(stream_id, running_output) # per output token - meter().record_reasoning(stream_id, running_reasoning_len) # per reasoning token + meter().record_token(stream_id, running_output_deltas) + meter().record_reasoning(stream_id, running_reasoning_deltas) The SSE `metering` event payload: { - "tps": 47.3, # average TPS across active sessions (real-time) - "high": 52.1, # highest average TPS observed in the past 60 minutes - "low": 31.4, # lowest average TPS (excl. readings < 1 tps, to ignore idle) - "active": 1, # sessions currently streaming + "tps": 47.3, # omitted/null until a real reading exists + "tps_available": true, # frontend must hide TPS when false + "estimated": false, # never show byte/character-size estimates + "high": 52.1, + "low": 31.4, + "active": 1, } """ @@ -60,9 +62,9 @@ class _SessionMeter: def total_tokens(self) -> int: return self.output_tokens + self.reasoning_tokens - def tps(self) -> float: + def tps(self) -> float | None: if self.first_token_ts == 0.0 or self.last_token_ts <= self.first_token_ts: - return 0.0 + return None return self.total_tokens() / (self.last_token_ts - self.first_token_ts) @@ -148,12 +150,15 @@ class GlobalMeter: if not self._sessions: self._window_start = now - # Compute global tps: average of per-session TPS values + # Compute global tps: average only streams with a real reading. The + # UI hides TPS entirely when this is unavailable instead of showing + # placeholder/estimated values. active = [s for s in self._sessions.values() if s.first_token_ts > 0] - if active: - global_tps = sum(s.tps() for s in active) / len(active) + active_tps = [v for s in active for v in [s.tps()] if v is not None and v > 0] + if active_tps: + global_tps = sum(active_tps) / len(active_tps) else: - global_tps = 0.0 + global_tps = None # Prune readings older than 1 hour cutoff = now - _HOUR_SECS @@ -162,7 +167,7 @@ class GlobalMeter: # Only record this snapshot for HIGH/LOW if there is active work. # This prevents idle periods from flooding the history and keeps # HIGH/LOW meaningful for the past hour of actual throughput. - if global_tps > 0: + if global_tps is not None and global_tps > 0: self._readings.append((now, global_tps)) # HIGH/LOW from the past hour (skip near-zero idle readings) @@ -171,9 +176,11 @@ class GlobalMeter: low = min(active_readings) if active_readings else 0.0 return { - 'tps': round(global_tps, 1), - 'high': round(high, 1), - 'low': round(low, 1), + 'tps': round(global_tps, 1) if global_tps is not None else None, + 'tps_available': global_tps is not None, + 'estimated': False, + 'high': round(high, 1) if high else None, + 'low': round(low, 1) if low else None, 'active': len(self._sessions), } diff --git a/api/models.py b/api/models.py index 07de6a43..b15d5531 100644 --- a/api/models.py +++ b/api/models.py @@ -1,5 +1,7 @@ """Hermes Web UI -- Session model and in-memory session store.""" import collections +import datetime +import hashlib import json import logging import os @@ -19,6 +21,7 @@ from api.workspace import get_last_workspace from api.agent_sessions import read_importable_agent_session_rows, read_session_lineage_metadata logger = logging.getLogger(__name__) +CLI_VISIBLE_SESSION_LIMIT = 20 # --------------------------------------------------------------------------- # Stale temp-file cleanup @@ -223,6 +226,12 @@ def _last_message_timestamp(messages): return None +def _message_role(message): + if not isinstance(message, dict): + return '' + return str(message.get('role', '')).strip().lower() + + def _find_top_level_json_key(text, key): """Return the byte offset of a top-level JSON object key, if present.""" depth = 0 @@ -320,10 +329,18 @@ class Session: context_messages=None, compression_anchor_visible_idx=None, compression_anchor_message_key=None, + compression_anchor_summary=None, context_length=None, threshold_tokens=None, last_prompt_tokens=None, + gateway_routing=None, gateway_routing_history=None, + llm_title_generated: bool=False, parent_session_id: str=None, + worktree_path=None, + worktree_branch=None, + worktree_repo_root=None, + worktree_created_at=None, enabled_toolsets=None, + composer_draft=None, **kwargs): self.session_id = session_id or uuid.uuid4().hex[:12] self.title = title @@ -349,15 +366,25 @@ class Session: self.context_messages = context_messages if isinstance(context_messages, list) else [] self.compression_anchor_visible_idx = compression_anchor_visible_idx self.compression_anchor_message_key = compression_anchor_message_key + self.compression_anchor_summary = compression_anchor_summary self.context_length = context_length self.threshold_tokens = threshold_tokens self.last_prompt_tokens = last_prompt_tokens + self.gateway_routing = gateway_routing if isinstance(gateway_routing, dict) else None + self.gateway_routing_history = gateway_routing_history if isinstance(gateway_routing_history, list) else [] + self.llm_title_generated = bool(llm_title_generated) self.parent_session_id = parent_session_id + self.worktree_path = str(Path(worktree_path).expanduser().resolve()) if worktree_path else None + self.worktree_branch = str(worktree_branch) if worktree_branch else None + self.worktree_repo_root = str(Path(worktree_repo_root).expanduser().resolve()) if worktree_repo_root else None + self.worktree_created_at = worktree_created_at self.is_cli_session = bool(kwargs.get('is_cli_session', False)) self.source_tag = kwargs.get('source_tag') + self.raw_source = kwargs.get('raw_source') self.session_source = kwargs.get('session_source') self.source_label = kwargs.get('source_label') self.enabled_toolsets = enabled_toolsets # List[str] or None — per-session toolset override + self.composer_draft = composer_draft if isinstance(composer_draft, dict) else {} self._metadata_message_count = None @property @@ -365,6 +392,23 @@ class Session: return SESSION_DIR / f'{self.session_id}.json' def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None: + # ── #1558 P0 guard ────────────────────────────────────────────── + # Refuse to save a session that was loaded with metadata_only=True. + # Such sessions have messages=[] (it's the whole point of the partial + # load), and save() unconditionally writes self.messages to disk via + # an atomic os.replace(). Saving a metadata-only stub thus wipes the + # full conversation history — which is exactly the v0.50.279 + # _clear_stale_stream_state() regression that lost users 1000+ + # message conversations. Any caller that needs to mutate persisted + # fields on a metadata-only session must reload with + # metadata_only=False first. + if getattr(self, '_loaded_metadata_only', False): + raise RuntimeError( + f"Refusing to save metadata-only session {self.session_id!r}: " + f"would atomically overwrite on-disk messages with []. " + f"Reload with metadata_only=False before mutating state. " + f"See #1558." + ) if touch_updated_at: self.updated_at = time.time() # Write metadata fields first so load_metadata_only() can read them @@ -377,10 +421,13 @@ class Session: 'personality', 'active_stream_id', 'pending_user_message', 'pending_attachments', 'pending_started_at', 'compression_anchor_visible_idx', 'compression_anchor_message_key', + 'compression_anchor_summary', 'context_length', 'threshold_tokens', 'last_prompt_tokens', + 'gateway_routing', 'gateway_routing_history', 'llm_title_generated', 'parent_session_id', - 'is_cli_session', 'source_tag', 'session_source', 'source_label', - 'enabled_toolsets', + 'worktree_path', 'worktree_branch', 'worktree_repo_root', 'worktree_created_at', + 'is_cli_session', 'source_tag', 'raw_source', 'session_source', 'source_label', + 'enabled_toolsets', 'composer_draft', ] meta = {k: getattr(self, k, None) for k in METADATA_FIELDS} meta['messages'] = self.messages @@ -390,6 +437,56 @@ class Session: if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls') and not k.startswith('_')} payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2) + + # ── #1558 backup safeguard ────────────────────────────────────── + # Before overwriting the session file, copy the previous version to + # ``.json.bak`` IFF the previous file has more messages than the + # incoming payload. The asymmetric guard means: + # * Normal grow-the-conversation saves never produce a backup + # (incoming messages >= existing) — keeps disk overhead near zero. + # * Any save that would shrink the messages array (the failure mode + # of #1558, plus anything similar in the future) leaves a recoverable + # snapshot of the pre-shrink state on disk. + # The recovery path is api/session_recovery.py — at server startup and + # via /api/session/recover, sessions whose JSON has fewer messages than + # their .bak get restored automatically. + try: + if self.path.exists(): + existing_text = self.path.read_text(encoding='utf-8') + try: + existing = json.loads(existing_text) + existing_msg_count = len(existing.get('messages') or []) + except (json.JSONDecodeError, ValueError): + existing_msg_count = -1 # corrupt → always back up + incoming_msg_count = len(self.messages or []) + if existing_msg_count > incoming_msg_count: + bak_path = self.path.with_suffix('.json.bak') + # SHOULD-FIX #2 (Opus): atomic write via tmp+replace, + # mirroring the main save() pattern below. Prevents a + # torn .bak from a crash mid-write or a concurrent + # backup-producing save. Recovery defends against a + # torn .bak (JSONDecodeError → no_action), so the + # failure mode pre-fix was "backup is lost"; with + # this fix the backup either lands cleanly or doesn't + # land at all. + try: + bak_tmp = bak_path.with_suffix( + f'.bak.tmp.{os.getpid()}.{threading.current_thread().ident}' + ) + with open(bak_tmp, 'w', encoding='utf-8') as bf: + bf.write(existing_text) + bf.flush() + os.fsync(bf.fileno()) + os.replace(bak_tmp, bak_path) + except OSError: + # Backup is best-effort; main save proceeds regardless. + try: + bak_tmp.unlink(missing_ok=True) + except Exception: + pass + except OSError: + pass + tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}') try: with open(tmp, 'w', encoding='utf-8') as f: @@ -442,6 +539,13 @@ class Session: parsed['tool_calls'] = [] session = cls(**parsed) session._metadata_message_count = _lookup_index_message_count(sid) + # Mark this session as a metadata-only stub. save() refuses to write + # such a session because doing so would atomically replace the + # on-disk JSON with messages=[], wiping the conversation. Any + # caller that needs to mutate persisted state on a metadata-only + # session must reload it with metadata_only=False first. + # See #1558 — v0.50.279 _clear_stale_stream_state() data-loss bug. + session._loaded_metadata_only = True return session except Exception: # Corrupt prefix or decode error — fall back to full load @@ -449,20 +553,27 @@ class Session: def compact(self, include_runtime=False, active_stream_ids=None) -> dict: active_stream_ids = active_stream_ids if active_stream_ids is not None else set() + has_pending_user_message = bool(self.pending_user_message) + message_count = ( + self._metadata_message_count + if self._metadata_message_count is not None + else len(self.messages) + ) + if has_pending_user_message: + message_count = max(message_count, 1) + last_message_at = _last_message_timestamp(self.messages) or self.updated_at + if has_pending_user_message and self.pending_started_at: + last_message_at = self.pending_started_at return { 'session_id': self.session_id, 'title': self.title, 'workspace': self.workspace, 'model': self.model, 'model_provider': self.model_provider, - 'message_count': ( - self._metadata_message_count - if self._metadata_message_count is not None - else len(self.messages) - ), + 'message_count': message_count, 'created_at': self.created_at, 'updated_at': self.updated_at, - 'last_message_at': _last_message_timestamp(self.messages) or self.updated_at, + 'last_message_at': last_message_at, 'pinned': self.pinned, 'archived': self.archived, 'project_id': self.project_id, @@ -473,19 +584,34 @@ class Session: 'personality': self.personality, 'compression_anchor_visible_idx': self.compression_anchor_visible_idx, 'compression_anchor_message_key': self.compression_anchor_message_key, + 'compression_anchor_summary': self.compression_anchor_summary, 'context_length': self.context_length, 'threshold_tokens': self.threshold_tokens, 'last_prompt_tokens': self.last_prompt_tokens, + 'gateway_routing': self.gateway_routing, + 'gateway_routing_history': self.gateway_routing_history, # Only emit 'parent_session_id' when set (the /branch fork link, #1342). # Sessions without a fork must not leak None — see test_session_lineage_metadata_api. **({'parent_session_id': self.parent_session_id} if self.parent_session_id else {}), + **({ + 'worktree_path': self.worktree_path, + 'worktree_branch': self.worktree_branch, + 'worktree_repo_root': self.worktree_repo_root, + 'worktree_created_at': self.worktree_created_at, + } if self.worktree_path else {}), + 'user_message_count': sum( + 1 for message in self.messages if _message_role(message) == 'user' + ) if isinstance(self.messages, list) else 0, 'active_stream_id': self.active_stream_id, 'pending_user_message': self.pending_user_message, + 'has_pending_user_message': has_pending_user_message, 'is_cli_session': self.is_cli_session, 'source_tag': self.source_tag, + 'raw_source': self.raw_source, 'session_source': self.session_source, 'source_label': self.source_label, 'enabled_toolsets': self.enabled_toolsets, + 'composer_draft': self.composer_draft if isinstance(self.composer_draft, dict) else {}, 'is_streaming': _is_streaming_session( self.active_stream_id, active_stream_ids ) if include_runtime else False, @@ -540,11 +666,31 @@ def _apply_core_sync_or_error_marker( if require_stream_dead and session.active_stream_id in _active_stream_ids(): return False - # When messages is already non-empty the core-sync overwrite and recovered - # user turn are skipped (we cannot clobber in-memory mutations), but the - # stuck pending fields MUST still be cleared and an error marker appended - # so the session isn't permanently left in stale-pending state. + # When messages is already non-empty, do not overwrite history from any core + # transcript. The pending user turn may still be the only durable copy of a + # prompt submitted just before a server restart, so materialize it before + # clearing runtime stream state. if len(session.messages) != 0: + _pending_text = " ".join(str(session.pending_user_message or "").split()) + _already_checkpointed = False + if _pending_text and session.messages: + _last_msg = session.messages[-1] + if isinstance(_last_msg, dict) and _last_msg.get('role') == 'user': + _last_text = " ".join(str(_last_msg.get('content') or "").split()) + _already_checkpointed = _last_text == _pending_text + _recovered_ts = int(time.time()) + if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0: + _recovered_ts = int(session.pending_started_at) + if not _already_checkpointed: + recovered = { + 'role': 'user', + 'content': session.pending_user_message, + 'timestamp': _recovered_ts, + '_recovered': True, + } + if session.pending_attachments: + recovered['attachments'] = list(session.pending_attachments) + session.messages.append(recovered) session.active_stream_id = None session.pending_user_message = None session.pending_attachments = [] @@ -557,7 +703,7 @@ def _apply_core_sync_or_error_marker( }) session.save() logger.info( - "Session %s: pending cleared (messages non-empty), added error marker", + "Session %s: recovered pending user turn (messages non-empty), added error marker", sid, ) return True @@ -617,11 +763,32 @@ def _apply_core_sync_or_error_marker( return True +# ── _repair_stale_pending grace period (#1624) ───────────────────────────── +# +# Defense-in-depth against a narrow race between the streaming thread clearing +# pending_user_message and STREAMS.pop(stream_id). Without this guard, any +# fast turn (e.g. command approval) that exits the thread before the on-disk +# pending clear has flushed gets misdiagnosed as a crashed turn, producing a +# spurious "Previous turn did not complete." marker. +# +# 30s covers the worst-case post-loop persistence window: LLM finishing a tool +# batch + lock contention with the checkpoint thread + a multi-MB session.save. +# A legitimately crashed turn whose pending_started_at is < 30s old will not +# repair on the first get_session() call, but WILL repair on the next call +# after the grace period elapses (typically the user's next interaction). +# +# Missing/falsy pending_started_at (legacy sidecars from before that field +# existed, or any path that forgot to set it) is treated as "old enough" so +# repair still recovers them — preserves current behavior for legacy data. +_REPAIR_STALE_PENDING_GRACE_SECONDS = 30 + + def _repair_stale_pending(session) -> bool: """Recover a sidecar stuck with messages=[] and stale pending state. Fires only when messages is empty, pending_user_message is set, - active_stream_id is set, and the stream is no longer alive. + active_stream_id is set, the stream is no longer alive, AND the turn is + older than _REPAIR_STALE_PENDING_GRACE_SECONDS (#1624). Uses a non-blocking lock acquire so a caller that already holds the per-session lock (e.g. retry_last, undo_last, cancel_stream) cannot @@ -634,12 +801,31 @@ def _repair_stale_pending(session) -> bool: # _apply_core_sync_or_error_marker uses this to detect a rotated active_stream_id # (e.g. context compression) or a stream that came back alive. _seen_stream_id = session.active_stream_id - if (len(session.messages) != 0 - or not session.pending_user_message + if (not session.pending_user_message or not _seen_stream_id or _seen_stream_id in _active_stream_ids()): return False + # Grace-period guard: bail if the turn is too fresh to be a real crash. + # Falsy pending_started_at (None, 0, missing) means "old enough" — preserve + # legacy-data recovery semantics for sessions that pre-date the field. + _started = getattr(session, 'pending_started_at', None) + if _started: + try: + _age = time.time() - float(_started) + except (TypeError, ValueError): + _age = float('inf') + if _age < _REPAIR_STALE_PENDING_GRACE_SECONDS: + logger.debug( + "_repair_stale_pending: skipping repair for session %s — " + "pending_started_at age=%.1fs < %ds grace window", + session.session_id, _age, _REPAIR_STALE_PENDING_GRACE_SECONDS, + ) + return False + else: + # Treat missing/falsy pending_started_at as "old enough" (legacy data). + _age = float('inf') + sid = session.session_id if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid): return False @@ -658,6 +844,20 @@ def _repair_stale_pending(session) -> bool: ) return False try: + # Telemetry (#1624): log legitimate repair firings so the next batch + # of user reports tells us whether the underlying race still fires + # post-fix. Rate-limit by age (Opus pre-release SHOULD-FIX): WARNING + # for the diagnostically valuable race window (< 5 min — actual + # leak-path candidates that slipped past the grace guard) and DEBUG + # for the long-tail (orphaned sidecars from prior process lifetimes) + # so reconnect loops on stuck sessions don't flood the log. + _DIAG_WARN_WINDOW_SECONDS = 300 # 5 min + _age_str = ('inf' if _age == float('inf') else f'{_age:.1f}s') + _log = logger.warning if _age < _DIAG_WARN_WINDOW_SECONDS else logger.debug + _log( + "_repair_stale_pending firing: session=%s stream_id=%s pending_age=%s", + sid, _seen_stream_id, _age_str, + ) return _apply_core_sync_or_error_marker( session, core_path, stream_id_for_recheck=_seen_stream_id, ) @@ -711,7 +911,7 @@ def get_session(sid, metadata_only=False): return s raise KeyError(sid) -def new_session(workspace=None, model=None, profile=None, model_provider=None): +def new_session(workspace=None, model=None, profile=None, model_provider=None, project_id=None, worktree_info=None): """Create a new in-memory session. The session lives in the SESSIONS dict only — no disk write happens until @@ -726,7 +926,9 @@ def new_session(workspace=None, model=None, profile=None, model_provider=None): Crash-safety: if the process exits between session creation and first message, the session is lost. Since it had no messages, there is - nothing to lose. + nothing to lose. Worktree-backed sessions are the exception: they are + saved immediately because creating the session also creates real + filesystem state that must remain discoverable after restart. *profile* — when supplied by the caller (e.g. from the request body sent by the active browser tab), it is used directly so that concurrent clients @@ -742,17 +944,26 @@ def new_session(workspace=None, model=None, profile=None, model_provider=None): except ImportError: profile = None effective_model = model or get_effective_default_model() + wt = worktree_info if isinstance(worktree_info, dict) else None + workspace_path = (wt.get('path') if wt and wt.get('path') else workspace) if wt else workspace s = Session( - workspace=workspace or get_last_workspace(), + workspace=workspace_path or get_last_workspace(), model=effective_model, model_provider=model_provider, profile=profile, + project_id=project_id, + worktree_path=wt.get('path') if wt else None, + worktree_branch=wt.get('branch') if wt else None, + worktree_repo_root=wt.get('repo_root') if wt else None, + worktree_created_at=wt.get('created_at') if wt else None, ) with LOCK: SESSIONS[s.session_id] = s SESSIONS.move_to_end(s.session_id) while len(SESSIONS) > SESSIONS_MAX: SESSIONS.popitem(last=False) + if wt: + s.save() return s def _hide_from_default_sidebar(session: dict) -> bool: @@ -787,12 +998,24 @@ def _enrich_sidebar_lineage_metadata(sessions: list[dict]) -> None: session.update(metadata[sid]) -def all_sessions(): +def _diag_stage(diag, name: str) -> None: + if diag is not None: + try: + diag.stage(name) + except Exception: + pass + + +def all_sessions(diag=None): + _diag_stage(diag, "all_sessions.active_streams") active_stream_ids = _active_stream_ids() # Phase C: try index first for O(1) read; fall back to full scan + _diag_stage(diag, "all_sessions.index_exists") if SESSION_INDEX_FILE.exists(): try: + _diag_stage(diag, "all_sessions.read_index") index = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8')) + _diag_stage(diag, "all_sessions.prune_index") index = [ s for s in index if _index_entry_exists(s.get('session_id')) @@ -800,21 +1023,25 @@ def all_sessions(): backfilled = [] for i, s in enumerate(index): if 'last_message_at' not in s: + _diag_stage(diag, "all_sessions.backfill_load") full = Session.load(s.get('session_id')) if full: index[i] = full.compact() backfilled.append(full) if backfilled: try: + _diag_stage(diag, "all_sessions.backfill_write") _write_session_index(updates=backfilled) except Exception: logger.debug("Failed to persist last_message_at backfill") + _diag_stage(diag, "all_sessions.mark_streaming") for s in index: s['is_streaming'] = _is_streaming_session( s.get('active_stream_id'), active_stream_ids, ) # Overlay any in-memory sessions that may be newer than the index + _diag_stage(diag, "all_sessions.overlay_lock") index_map = {s['session_id']: s for s in index} with LOCK: for s in SESSIONS.values(): @@ -822,6 +1049,7 @@ def all_sessions(): include_runtime=True, active_stream_ids=active_stream_ids, ) + _diag_stage(diag, "all_sessions.sort_filter") result = sorted(index_map.values(), key=lambda s: (s.get('pinned', False), _session_sort_timestamp(s)), reverse=True) # Hide empty Untitled sessions from the UI entirely — they are ephemeral # scratch pads that only become real once the first message is sent (#1171). @@ -838,6 +1066,8 @@ def all_sessions(): s.get('title', 'Untitled') == 'Untitled' and s.get('message_count', 0) == 0 and not s.get('active_stream_id') + and not s.get('has_pending_user_message') + and not s.get('worktree_path') )] result = [s for s in result if not _hide_from_default_sidebar(s)] # Backfill: sessions created before Sprint 22 have no profile tag. @@ -845,11 +1075,13 @@ def all_sessions(): for s in result: if not s.get('profile'): s['profile'] = 'default' + _diag_stage(diag, "all_sessions.lineage_metadata") _enrich_sidebar_lineage_metadata(result) return result except Exception: logger.debug("Failed to load session index, falling back to full scan") # Full scan fallback + _diag_stage(diag, "all_sessions.full_scan") out = [] for p in SESSION_DIR.glob('*.json'): if p.name.startswith('_'): continue @@ -858,8 +1090,10 @@ def all_sessions(): if s: out.append(s) except Exception: logger.debug("Failed to load session from %s", p) + _diag_stage(diag, "all_sessions.full_scan_overlay") for s in SESSIONS.values(): if all(s.session_id != x.session_id for x in out): out.append(s) + _diag_stage(diag, "all_sessions.full_scan_sort_filter") out.sort(key=lambda s: (getattr(s, 'pinned', False), _session_sort_timestamp(s)), reverse=True) # Hide empty Untitled sessions from the UI entirely — kept consistent with the # index-path filter above. No grace window: a 0-message Untitled session is @@ -869,11 +1103,13 @@ def all_sessions(): and len(s.messages) == 0 and not s.active_stream_id and not s.pending_user_message + and not getattr(s, 'worktree_path', None) )] result = [s for s in result if not _hide_from_default_sidebar(s)] for s in result: if not s.get('profile'): s['profile'] = 'default' + _diag_stage(diag, "all_sessions.lineage_metadata") _enrich_sidebar_lineage_metadata(result) return result @@ -893,14 +1129,90 @@ def title_from(messages, fallback: str='Untitled'): # ── Project helpers ────────────────────────────────────────────────────────── -def load_projects() -> list: - """Load project list from disk. Returns list of project dicts.""" +_PROJECTS_MIGRATION_LOCK = threading.Lock() +_projects_migrated = False + + +def _backfill_project_profiles_if_needed(projects: list) -> bool: + """Tag any legacy untagged projects (`profile` missing) with a sensible default. + + Strategy: + 1. For each untagged project, look at the sessions assigned to it via + the session index. If any session carries a profile, take that + profile. Most installs are single-profile so this picks up the + right answer for everyone. + 2. Otherwise default to 'default'. + + Returns True if any project was mutated. Safe to call repeatedly — once + every project is tagged, this is a no-op. Runs at most once per process + (cached via the module-level _projects_migrated flag) but the result is + persisted so it's a one-time write. + """ + untagged = [p for p in projects if not p.get('profile')] + if not untagged: + return False + + # Build session_id -> profile map for the untagged project_ids. + session_profile_by_project: dict[str, str] = {} + if SESSION_INDEX_FILE.exists(): + try: + entries = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8')) + untagged_ids = {p['project_id'] for p in untagged if p.get('project_id')} + for e in entries: + pid = e.get('project_id') + if pid in untagged_ids and e.get('profile'): + # First session profile wins for the project. + session_profile_by_project.setdefault(pid, e['profile']) + except Exception: + logger.debug("Failed to read session index for project profile backfill") + + mutated = False + for p in untagged: + inferred = session_profile_by_project.get(p.get('project_id'), 'default') + p['profile'] = inferred + mutated = True + return mutated + + +def load_projects(*, _migrate: bool = True) -> list: + """Load project list from disk. Returns list of project dicts. + + On first call, runs a one-time migration to back-fill the `profile` field + on legacy untagged projects (#1614). Disable via `_migrate=False` for + callsites that want the raw on-disk shape (test fixtures, e.g.). + """ + global _projects_migrated if not PROJECTS_FILE.exists(): return [] try: - return json.loads(PROJECTS_FILE.read_text(encoding='utf-8')) + projects = json.loads(PROJECTS_FILE.read_text(encoding='utf-8')) except Exception: return [] + if _migrate and not _projects_migrated: + with _PROJECTS_MIGRATION_LOCK: + # Re-check inside the lock — another thread may have raced. + if _projects_migrated: + # Per Opus advisor on stage-293: another thread completed + # migration and wrote new state to disk while we waited for + # the lock. Our `projects` snapshot is the pre-migration + # version; re-read so the caller doesn't see stale untagged + # rows (which a mutation route could then write back, + # silently overwriting the migration). + try: + return json.loads(PROJECTS_FILE.read_text(encoding='utf-8')) + except Exception: + return projects + if _backfill_project_profiles_if_needed(projects): + try: + save_projects(projects) + _projects_migrated = True + except Exception: + logger.debug("Failed to persist project profile backfill") + # Leave _projects_migrated False so a future call retries. + else: + # Nothing to migrate — already tagged. + _projects_migrated = True + return projects def save_projects(projects) -> None: """Write project list to disk.""" @@ -912,20 +1224,46 @@ _CRON_PROJECT_LOCK = threading.Lock() def ensure_cron_project() -> str: - """Return the project_id of the system "Cron Jobs" project, creating it if needed. + """Return the project_id of the system "Cron Jobs" project for the active profile. + + Each profile gets its own "Cron Jobs" project so cron-spawned sessions in + profile A don't surface under the cron chip of profile B (#1614). Lookup + keys on (name, profile) — a legacy untagged "Cron Jobs" project (no + `profile` field) is treated as belonging to whichever profile first calls + this in a given install, then re-tagged. Thread-safe and idempotent. Returns a 12-char hex project_id string. """ + from api.profiles import get_active_profile_name, _is_root_profile + + active = get_active_profile_name() or 'default' with _CRON_PROJECT_LOCK: - for p in load_projects(): - if p.get('name') == CRON_PROJECT_NAME: - return p['project_id'] - project_id = uuid.uuid4().hex[:12] projects = load_projects() + # Look for an existing per-profile cron project. Match either an exact + # profile tag or the renamed-root alias (a 'default'-tagged project + # under a renamed root, or a renamed-root-tagged project under + # 'default'). _is_root_profile is the canonical alias check. + for p in projects: + if p.get('name') != CRON_PROJECT_NAME: + continue + row_profile = p.get('profile') + if row_profile == active: + return p['project_id'] + if _is_root_profile(row_profile or 'default') and _is_root_profile(active): + return p['project_id'] + # Reuse a legacy untagged cron project — back-tag it to the active profile. + for p in projects: + if p.get('name') == CRON_PROJECT_NAME and not p.get('profile'): + p['profile'] = active + save_projects(projects) + return p['project_id'] + # Otherwise create a new one tagged with the active profile. + project_id = uuid.uuid4().hex[:12] projects.append({ 'project_id': project_id, 'name': CRON_PROJECT_NAME, 'color': '#6366f1', + 'profile': active, 'created_at': time.time(), }) save_projects(projects) @@ -949,9 +1287,13 @@ def import_cli_session( profile=None, created_at=None, updated_at=None, + parent_session_id=None, ): - """Create a new WebUI session populated with CLI messages. - Returns the Session object. + """Create a new WebUI session populated with CLI/agent messages. + + Preserve parent_session_id from state.db so imported continuation segments + keep their lineage in the WebUI store and sidebar instead of reappearing as + detached orphan chats. """ s = Session( session_id=session_id, @@ -962,6 +1304,7 @@ def import_cli_session( profile=profile, created_at=created_at, updated_at=updated_at, + parent_session_id=parent_session_id, ) s.save(touch_updated_at=False) return s @@ -969,6 +1312,230 @@ def import_cli_session( # ── CLI session bridge ────────────────────────────────────────────────────── +CLAUDE_CODE_SOURCE = 'claude_code' +CLAUDE_CODE_SOURCE_LABEL = 'Claude Code' +CLAUDE_CODE_MAX_FILES = 200 +CLAUDE_CODE_MAX_FILE_BYTES = 10 * 1024 * 1024 +CLAUDE_CODE_MAX_MESSAGES_PER_FILE = 1000 +CLAUDE_CODE_MAX_CONTENT_CHARS = 200_000 + + +def _default_claude_code_projects_dir() -> Path | None: + """Resolve the Claude Code projects directory without touching real home in tests.""" + override = os.getenv('HERMES_WEBUI_CLAUDE_PROJECTS_DIR') + if override: + return Path(override).expanduser() + if os.getenv('HERMES_WEBUI_TEST_STATE_DIR'): + return None + return Path.home() / '.claude' / 'projects' + + +def _claude_code_session_id(path: Path) -> str: + digest = hashlib.sha256(str(path.expanduser().resolve()).encode('utf-8')).hexdigest()[:24] + return f'{CLAUDE_CODE_SOURCE}_{digest}' + + +def _parse_claude_code_timestamp(value): + if value is None: + return None + if isinstance(value, (int, float)): + return float(value) + text = str(value).strip() + if not text: + return None + try: + return float(text) + except ValueError: + pass + try: + return datetime.datetime.fromisoformat(text.replace('Z', '+00:00')).timestamp() + except Exception: + return None + + +def _extract_claude_code_text(content) -> str: + if content is None: + return '' + if isinstance(content, str): + return content[:CLAUDE_CODE_MAX_CONTENT_CHARS] + if isinstance(content, list): + parts = [] + used = 0 + for item in content: + text = '' + if isinstance(item, str): + text = item + elif isinstance(item, dict): + text = item.get('text') or item.get('content') or '' + if not text: + continue + text = str(text) + remaining = CLAUDE_CODE_MAX_CONTENT_CHARS - used + if remaining <= 0: + break + parts.append(text[:remaining]) + used += len(parts[-1]) + return '\n'.join(parts) + if isinstance(content, dict): + return _extract_claude_code_text(content.get('text') or content.get('content')) + return str(content)[:CLAUDE_CODE_MAX_CONTENT_CHARS] + + +def _parse_claude_code_jsonl(path: Path, *, max_messages: int = CLAUDE_CODE_MAX_MESSAGES_PER_FILE) -> tuple[list[dict], str | None, float | None, float | None]: + messages: list[dict] = [] + summary_title = None + first_ts = None + last_ts = None + try: + with path.open('r', encoding='utf-8', errors='replace') as fh: + for line in fh: + if len(messages) >= max_messages: + break + line = line.strip() + if not line: + continue + try: + raw = json.loads(line) + except Exception: + continue + if not isinstance(raw, dict): + continue + if not summary_title: + summary = raw.get('summary') or raw.get('title') + if isinstance(summary, str) and summary.strip(): + summary_title = ' '.join(summary.split())[:80] + records = raw.get('messages') if isinstance(raw.get('messages'), list) else None + if records is None: + records = [raw.get('message') if isinstance(raw.get('message'), dict) else raw] + for record in records: + if len(messages) >= max_messages: + break + if not isinstance(record, dict): + continue + msg = record.get('message') if isinstance(record.get('message'), dict) else record + role = str(msg.get('role') or record.get('role') or raw.get('role') or raw.get('type') or '').strip().lower() + if role == 'human': + role = 'user' + if role not in {'user', 'assistant', 'system', 'tool'}: + continue + content = _extract_claude_code_text(msg.get('content') if 'content' in msg else record.get('content')) + if not content.strip(): + continue + ts = _parse_claude_code_timestamp( + msg.get('timestamp') + or record.get('timestamp') + or raw.get('timestamp') + or raw.get('created_at') + ) + if ts is not None: + first_ts = ts if first_ts is None else min(first_ts, ts) + last_ts = ts if last_ts is None else max(last_ts, ts) + item = {'role': role, 'content': content} + if ts is not None: + item['timestamp'] = ts + messages.append(item) + except Exception: + return [], None, None, None + return messages, summary_title, first_ts, last_ts + + +def _iter_claude_code_jsonl_files(projects_dir: Path | str | None = None, *, max_files: int = CLAUDE_CODE_MAX_FILES, max_file_bytes: int = CLAUDE_CODE_MAX_FILE_BYTES): + root = Path(projects_dir).expanduser() if projects_dir is not None else _default_claude_code_projects_dir() + if root is None: + return + try: + if root.is_symlink(): + return + root = root.resolve(strict=False) + if not root.exists() or not root.is_dir(): + return + yielded = 0 + for project_dir in sorted(root.iterdir(), key=lambda p: p.name): + if yielded >= max_files: + return + try: + if project_dir.is_symlink() or not project_dir.is_dir(): + continue + for path in sorted(project_dir.iterdir(), key=lambda p: p.name): + if yielded >= max_files: + return + if path.is_symlink() or not path.is_file() or path.suffix.lower() != '.jsonl': + continue + try: + if path.stat().st_size > max_file_bytes: + continue + except OSError: + continue + yielded += 1 + yield path + except OSError: + continue + except OSError: + return + + +def _claude_code_title(messages: list[dict], summary_title: str | None) -> str: + if summary_title: + return summary_title + for msg in messages: + if msg.get('role') == 'user': + text = ' '.join(str(msg.get('content') or '').split()) + if text: + return text[:80] + return 'Claude Code Session' + + +def get_claude_code_sessions(projects_dir: Path | str | None = None, *, max_files: int = CLAUDE_CODE_MAX_FILES, max_file_bytes: int = CLAUDE_CODE_MAX_FILE_BYTES) -> list: + """Read Claude Code JSONL sessions as read-only external-agent rows. + + The bridge is additive and defensive: it skips symlinks, oversized files, + malformed lines, and per-file errors rather than crashing WebUI session + listing. Tests pass ``projects_dir`` fixtures so Michael's real ~/.claude is + never read during test runs. + """ + sessions = [] + for path in _iter_claude_code_jsonl_files(projects_dir, max_files=max_files, max_file_bytes=max_file_bytes) or []: + messages, summary_title, first_ts, last_ts = _parse_claude_code_jsonl(path) + if not messages: + continue + sid = _claude_code_session_id(path) + sessions.append({ + 'session_id': sid, + 'title': _claude_code_title(messages, summary_title), + 'workspace': str(get_last_workspace()), + 'model': 'claude-code', + 'message_count': len(messages), + 'created_at': first_ts or last_ts or path.stat().st_mtime, + 'updated_at': last_ts or first_ts or path.stat().st_mtime, + 'last_message_at': last_ts or first_ts or path.stat().st_mtime, + 'pinned': False, + 'archived': False, + 'project_id': None, + 'profile': None, + 'source_tag': CLAUDE_CODE_SOURCE, + 'raw_source': CLAUDE_CODE_SOURCE, + 'session_source': 'external_agent', + 'source_label': CLAUDE_CODE_SOURCE_LABEL, + 'is_cli_session': True, + 'read_only': True, + }) + sessions.sort(key=lambda s: s.get('last_message_at') or s.get('updated_at') or 0, reverse=True) + return sessions + + +def get_claude_code_session_messages(sid, projects_dir: Path | str | None = None) -> list: + """Return messages for one read-only Claude Code JSONL session.""" + sid = str(sid or '') + if not sid.startswith(f'{CLAUDE_CODE_SOURCE}_'): + return [] + for path in _iter_claude_code_jsonl_files(projects_dir) or []: + if _claude_code_session_id(path) != sid: + continue + messages, _summary_title, _first_ts, _last_ts = _parse_claude_code_jsonl(path) + return messages + return [] + + def get_cli_sessions() -> list: """Read CLI sessions from the agent's SQLite store and return them as dicts in a format the WebUI sidebar can render alongside local sessions. @@ -978,6 +1545,10 @@ def get_cli_sessions() -> list: """ import os cli_sessions = [] + try: + cli_sessions.extend(get_claude_code_sessions()) + except Exception: + logger.debug("Claude Code session scan failed", exc_info=True) # Use the active WebUI profile's HERMES_HOME to find state.db. # The active profile is determined by what the user has selected in the UI @@ -1015,7 +1586,12 @@ def get_cli_sessions() -> list: return _cron_pid_cache[0] try: - for row in read_importable_agent_session_rows(db_path, limit=200, log=logger, exclude_sources=None): + for row in read_importable_agent_session_rows( + db_path, + limit=CLI_VISIBLE_SESSION_LIMIT, + log=logger, + exclude_sources=None, + ): sid = row['id'] raw_ts = row['last_activity'] or row['started_at'] # Prefer the CLI session's own profile from the DB; fall back to @@ -1066,6 +1642,12 @@ def get_cli_sessions() -> list: 'profile': profile, 'source_tag': _source, 'raw_source': row.get('raw_source'), + 'user_id': row.get('user_id'), + 'chat_id': row.get('chat_id') or row.get('origin_chat_id'), + 'chat_type': row.get('chat_type'), + 'thread_id': row.get('thread_id'), + 'session_key': row.get('session_key'), + 'platform': row.get('platform'), 'session_source': row.get('session_source'), 'source_label': row.get('source_label'), 'parent_session_id': row.get('parent_session_id'), @@ -1073,6 +1655,9 @@ def get_cli_sessions() -> list: 'parent_source': row.get('parent_source'), 'relationship_type': row.get('relationship_type'), '_parent_lineage_root_id': row.get('_parent_lineage_root_id'), + 'end_reason': row.get('end_reason'), + 'actual_message_count': row.get('actual_message_count'), + 'user_message_count': row.get('actual_user_message_count'), '_lineage_root_id': row.get('_lineage_root_id'), '_lineage_tip_id': row.get('_lineage_tip_id'), '_compression_segment_count': row.get('_compression_segment_count'), @@ -1091,12 +1676,30 @@ def get_cli_sessions() -> list: return cli_sessions +def _json_loads_if_string(value): + if not isinstance(value, str): + return value + text = value.strip() + if not text: + return None + try: + return json.loads(text) + except Exception: + return value + + def get_cli_session_messages(sid) -> list: - """Read messages for a single CLI session from the SQLite store. - Returns a list of {role, content, timestamp} dicts. - Returns empty list on any error. + """Read messages for a single CLI/external-agent session. + + Preserve tool-call/result and reasoning metadata from the agent state.db so + CLI-origin transcripts render with the same tool cards as WebUI-native + sessions. When the requested session is the tip of a compression/CLI-close + continuation chain, return the stitched full transcript across all segments + in chronological order. Returns empty list on any error. """ import os + if str(sid or '').startswith(f'{CLAUDE_CODE_SOURCE}_'): + return get_claude_code_session_messages(sid) try: import sqlite3 except ImportError: @@ -1115,24 +1718,185 @@ def get_cli_session_messages(sid) -> list: with closing(sqlite3.connect(str(db_path))) as conn: conn.row_factory = sqlite3.Row cur = conn.cursor() - cur.execute(""" - SELECT role, content, timestamp + cur.execute("PRAGMA table_info(messages)") + available = {str(row['name']) for row in cur.fetchall()} + required = {'role', 'content', 'timestamp'} + if not required.issubset(available): + return [] + optional = [ + 'tool_call_id', + 'tool_calls', + 'tool_name', + 'reasoning', + 'reasoning_details', + 'codex_reasoning_items', + 'reasoning_content', + 'codex_message_items', + ] + selected = ['role', 'content', 'timestamp'] + [c for c in optional if c in available] + + cur.execute("PRAGMA table_info(sessions)") + session_cols = {str(row['name']) for row in cur.fetchall()} + session_chain = [str(sid)] + if {'parent_session_id', 'end_reason', 'started_at', 'source'}.issubset(session_cols): + cur.execute( + """ + SELECT id, source, started_at, parent_session_id, ended_at, end_reason + FROM sessions + WHERE id = ? + """, + (sid,), + ) + rows_by_id = {} + row = cur.fetchone() + if row: + rows_by_id[str(row['id'])] = dict(row) + current_id = str(row['id']) + seen = {current_id} + for _ in range(20): + current = rows_by_id.get(current_id) + parent_id = current.get('parent_session_id') if current else None + if not parent_id or parent_id in seen: + break + cur.execute( + """ + SELECT id, source, started_at, parent_session_id, ended_at, end_reason + FROM sessions + WHERE id = ? + """, + (parent_id,), + ) + parent_row = cur.fetchone() + if not parent_row: + break + parent_dict = dict(parent_row) + rows_by_id[str(parent_row['id'])] = parent_dict + if not _is_continuation_session(parent_dict, current): + break + session_chain.insert(0, str(parent_row['id'])) + current_id = str(parent_row['id']) + seen.add(current_id) + + placeholders = ', '.join('?' for _ in session_chain) + cur.execute(f""" + SELECT {', '.join(selected)}, session_id FROM messages - WHERE session_id = ? - ORDER BY timestamp ASC - """, (sid,)) + WHERE session_id IN ({placeholders}) + ORDER BY timestamp ASC, id ASC + """, session_chain) msgs = [] for row in cur.fetchall(): - msgs.append({ + msg = { 'role': row['role'], 'content': row['content'], 'timestamp': row['timestamp'], - }) + } + for col in optional: + if col not in row.keys(): + continue + value = row[col] + if value in (None, ''): + continue + if col in {'tool_calls', 'reasoning_details', 'codex_reasoning_items', 'codex_message_items'}: + value = _json_loads_if_string(value) + msg[col] = value + if msg.get('role') == 'tool' and msg.get('tool_name') and not msg.get('name'): + msg['name'] = msg['tool_name'] + msgs.append(msg) except Exception: return [] return msgs +def count_conversation_rounds(sid: str, since: float | None = None) -> int: + """Count conversation rounds for a session from state.db. + + A "round" = one user message + one agent reply. Consecutive user + messages are merged into a single round so that multi-part questions + don't inflate the count. + + Parameters + ---------- + sid : str + Gateway session ID (e.g. ``20260430_151231_7209a0``). + since : float | None + Unix timestamp. If provided, only messages **after** this + timestamp are counted. + + Returns + ------- + int + Number of complete conversation rounds. + """ + import os, sqlite3, datetime + + try: + from api.profiles import get_active_hermes_home + hermes_home = Path(get_active_hermes_home()).expanduser().resolve() + except Exception: + hermes_home = Path(os.getenv('HERMES_HOME', str(HOME / '.hermes'))).expanduser().resolve() + db_path = hermes_home / 'state.db' + if not db_path.exists(): + return 0 + + try: + with sqlite3.connect(str(db_path)) as conn: + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute( + "SELECT role, timestamp FROM messages WHERE session_id = ? ORDER BY timestamp ASC", + (sid,), + ) + rows = cur.fetchall() + except Exception: + return 0 + + rounds = 0 + seen_user = False # have we seen a user msg in the current round? + seen_agent_after_user = False # have we seen an agent reply after that user msg? + + for row in rows: + role = (row['role'] or '').strip().lower() + ts_raw = row['timestamp'] + + # Parse timestamp and apply the ``since`` filter. + if since is not None and ts_raw is not None: + try: + if isinstance(ts_raw, (int, float)): + ts_val = float(ts_raw) + else: + # ISO-8601 string + ts_val = datetime.datetime.fromisoformat( + str(ts_raw).replace('Z', '+00:00') + ).timestamp() + if ts_val <= since: + continue + except Exception: + pass + + if role == 'user': + if seen_user and not seen_agent_after_user: + # Consecutive user message — merge into current round. + pass + elif seen_user and seen_agent_after_user: + # Previous round completed, starting a new one. + rounds += 1 + seen_agent_after_user = False + seen_user = True + elif role == 'assistant': + if seen_user: + seen_agent_after_user = True + + # Close the last round if it was completed. + if seen_user and seen_agent_after_user: + rounds += 1 + + return rounds + + +CONVERSATION_ROUND_THRESHOLD = 10 + + def delete_cli_session(sid) -> bool: """Delete a CLI session from state.db (messages + session row). Returns True if deleted, False if not found or error. diff --git a/api/oauth.py b/api/oauth.py index 106e63b7..8a9eb56e 100644 --- a/api/oauth.py +++ b/api/oauth.py @@ -1,187 +1,770 @@ -"""In-app OAuth flow implementations for providers like OpenAI Codex. +"""In-app OAuth flow implementations for onboarding. -Uses only stdlib (urllib.request, json, time) — no external dependencies. -Credentials are stored in ~/.hermes/auth.json under the credential_pool. +The browser receives only WebUI-local flow metadata (flow_id, user_code, +verification_uri, high-level status). Provider device/auth codes and OAuth +tokens stay server-side and are persisted to the active Hermes profile's +``auth.json`` credential_pool. """ +from __future__ import annotations + import json import logging +import os +import stat +import threading import time import uuid -import urllib.request -import urllib.parse import urllib.error +import urllib.parse +import urllib.request +from datetime import datetime, timezone from pathlib import Path +from typing import Any logger = logging.getLogger(__name__) +# Compatibility for older helper tests and self-heal code that import these. AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json" -# ── Codex OAuth constants (from hermes_cli/auth.py) ── -CODEX_CLIENT_ID = "pdlLIX2Y72MIl2rhLhTE9VV9bN905kBh" -CODEX_AUTH_URL = "https://auth.openai.com/oauth/device/authorize" -CODEX_TOKEN_URL = "https://auth.openai.com/oauth/token" -CODEX_SCOPE = "openid profile email offline_access" -CODEX_GRANT_TYPE_DEVICE = "urn:ietf:params:oauth:grant-type:device_code" +CODEX_ISSUER = "https://auth.openai.com" +CODEX_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann" +CODEX_VERIFICATION_URI = f"{CODEX_ISSUER}/codex/device" +CODEX_USER_CODE_URL = f"{CODEX_ISSUER}/api/accounts/deviceauth/usercode" +CODEX_DEVICE_TOKEN_URL = f"{CODEX_ISSUER}/api/accounts/deviceauth/token" +CODEX_TOKEN_URL = f"{CODEX_ISSUER}/oauth/token" +CODEX_REDIRECT_URI = f"{CODEX_ISSUER}/deviceauth/callback" +CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex" +CODEX_FLOW_MAX_WAIT_SECONDS = 15 * 60 + +_ALLOWED_ONBOARDING_OAUTH_PROVIDERS = {"openai-codex", "anthropic", "claude", "claude-code"} +_ANTHROPIC_PROVIDER_ALIASES = {"anthropic", "claude", "claude-code"} +_REJECTED_ONBOARDING_OAUTH_PROVIDERS = { + "nous", + "qwen-oauth", + "gemini-cli", + "google-gemini-cli", + "minimax", + "minimax-oauth", + "copilot", + "copilot-acp", +} + +ANTHROPIC_CREDENTIAL_POLL_SECONDS = 5 +ANTHROPIC_FLOW_MAX_WAIT_SECONDS = 15 * 60 +ANTHROPIC_PUBLIC_LINK_ERROR = "Claude Code credential linking failed. Check server logs." + +_OAUTH_FLOWS: dict[str, dict[str, Any]] = {} +_OAUTH_FLOWS_LOCK = threading.Lock() +_ANTHROPIC_ENV_KEYS = ("ANTHROPIC_TOKEN", "ANTHROPIC_API_KEY") -# ── auth.json helpers ── +def _clear_process_anthropic_env_values() -> None: + """Clear Anthropic process env fallbacks under the streaming env lock.""" + from api.streaming import _ENV_LOCK -def _read_auth_json(): - """Read auth.json and return parsed dict, or empty dict.""" - if AUTH_JSON_PATH.exists(): + with _ENV_LOCK: + for key in _ANTHROPIC_ENV_KEYS: + os.environ.pop(key, None) + + +def resolve_runtime_provider_with_anthropic_env_lock(resolver, *args, **kwargs): + """Resolve runtime credentials under the Anthropic onboarding env lock. + + Request paths must resolve Anthropic env fallbacks per outbound request, + not cache ANTHROPIC_TOKEN or ANTHROPIC_API_KEY across onboarding. Sharing + the process-env lock prevents a chat stream from observing one stale + Anthropic env value while onboarding has already cleared the other. + """ + from api.streaming import _ENV_LOCK + + with _ENV_LOCK: + return resolver(*args, **kwargs) + + +def _normalize_onboarding_oauth_provider(provider: str) -> str: + provider = str(provider or "").strip().lower() + if provider in _ANTHROPIC_PROVIDER_ALIASES: + return "anthropic" + return provider or "openai-codex" + + +def _get_active_hermes_home() -> Path: + try: + from api.profiles import get_active_hermes_home + + return Path(get_active_hermes_home()) + except Exception as exc: + # Per Opus advisor on stage-296: log the silent fallback so a corrupt + # profile state ending up writing tokens to ~/.hermes (instead of the + # active profile) is observable in logs rather than failing silently. + logger.warning( + "Falling back to ~/.hermes for OAuth credential storage: " + "active-profile resolution failed: %s", + exc, + ) + return Path.home() / ".hermes" + + +# ── legacy auth.json helpers ──────────────────────────────────────────────── + +def _read_auth_json(auth_path: Path | None = None) -> dict[str, Any]: + """Read auth.json and return parsed dict, or an empty compatible store.""" + path = auth_path or AUTH_JSON_PATH + if path.exists(): try: - return json.loads(AUTH_JSON_PATH.read_text()) + loaded = json.loads(path.read_text(encoding="utf-8")) + return loaded if isinstance(loaded, dict) else {} except json.JSONDecodeError as exc: - logger.warning("Failed to parse %s: %s", AUTH_JSON_PATH, exc) + logger.warning("Failed to parse %s: %s", path, exc) return {} return {} -def _write_auth_json(data): - """Atomically write auth.json via temp-file rename. +def read_auth_json(): + """Public wrapper for streaming credential self-heal code.""" + return _read_auth_json() - SECURITY: auth.json contains OAuth access/refresh tokens. ``tmp.replace()`` - preserves the temp file's mode (created with the process umask, typically - 0644 or 0664), NOT the prior auth.json mode. Without an explicit chmod, - tokens land world-readable on shared systems. Set 0600 BEFORE the rename - so there is no window where the final file is world-readable. - (Opus pre-release advisor finding.) + +def _write_auth_json(data: dict[str, Any], auth_path: Path | None = None) -> Path: + """Atomically write auth.json with owner-only permissions. + + OAuth access/refresh tokens live in this file. The temp file is chmod 0600 + before rename so the final path never inherits a permissive process umask. """ - import os, stat - AUTH_JSON_PATH.parent.mkdir(parents=True, exist_ok=True) - tmp = AUTH_JSON_PATH.with_suffix('.tmp') - tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False)) + path = auth_path or AUTH_JSON_PATH + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}") try: - tmp.chmod(0o600) - except OSError as e: - # Best-effort: if chmod fails (e.g. on a filesystem that doesn't - # support POSIX modes), don't abort. The startup permission fixer - # in api.startup will sweep auth.json on the next process start. - logger.warning("Failed to chmod 0600 on %s: %s", tmp, e) - tmp.replace(AUTH_JSON_PATH) + tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") + try: + tmp.chmod(0o600) + except OSError as exc: + logger.warning("Failed to chmod 0600 on %s: %s", tmp, exc) + tmp.replace(path) + try: + path.chmod(stat.S_IRUSR | stat.S_IWUSR) + except OSError: + pass + return path + finally: + try: + if tmp.exists(): + tmp.unlink() + except OSError: + pass -# ── Codex device-code flow ── +def _now_iso() -> str: + return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") -def start_codex_device_code(): - """Start Codex OAuth device-code flow. - Returns dict: { device_code, user_code, verification_uri, expires_in, interval } - Raises RuntimeError on network error. +def _persist_codex_credentials(hermes_home: Path, token_data: dict[str, Any]) -> Path: + """Persist Codex OAuth credentials to active-profile auth.json.""" + access_token = str(token_data.get("access_token") or "").strip() + refresh_token = str(token_data.get("refresh_token") or "").strip() + if not access_token: + raise RuntimeError("Codex token exchange did not return an access_token") + + auth_path = Path(hermes_home) / "auth.json" + auth = _read_auth_json(auth_path) + auth.setdefault("version", 1) + pool = auth.setdefault("credential_pool", {}) + if not isinstance(pool, dict): + pool = {} + auth["credential_pool"] = pool + entries = pool.setdefault("openai-codex", []) + if not isinstance(entries, list): + entries = [] + pool["openai-codex"] = entries + + now = _now_iso() + entry = None + # Per Opus advisor on stage-296: also accept the legacy `source == + # "oauth_device"` value so users with prior Codex OAuth credentials + # (written by older WebUI versions before this PR's source-key change) + # get their existing entry updated in-place rather than accumulating a + # stale duplicate pool entry. + _accept_sources = {"manual:device_code", "oauth_device"} + for candidate in entries: + if isinstance(candidate, dict) and candidate.get("source") in _accept_sources: + entry = candidate + break + if entry is None: + entry = { + "id": "codex-oauth-" + uuid.uuid4().hex[:12], + "label": "Codex OAuth", + "auth_type": "oauth", + "priority": 0, + "source": "manual:device_code", + "base_url": CODEX_BASE_URL, + "created_at": now, + } + entries.insert(0, entry) + + entry.update( + { + "label": "Codex OAuth", + "auth_type": "oauth", + "priority": 0, + "source": "manual:device_code", + "access_token": access_token, + "refresh_token": refresh_token, + "base_url": CODEX_BASE_URL, + "last_refresh": now, + "updated_at": now, + } + ) + auth["updated_at"] = now + path = _write_auth_json(auth, auth_path) + + try: + from api.config import invalidate_credential_pool_cache + + invalidate_credential_pool_cache("openai-codex") + except Exception: + logger.debug("Failed to invalidate openai-codex credential cache", exc_info=True) + + return path + + +# Backward-compatible wrapper used by older code/tests. +def _save_codex_credentials(token_data): + return _persist_codex_credentials(_get_active_hermes_home(), token_data) + + +# ── Anthropic / Claude Code credential linking ───────────────────────────── + +def _read_claude_code_credentials() -> dict[str, Any] | None: + """Read Claude Code OAuth credentials from the host without exposing them. + + Delegates to the agent adapter which knows about ~/.claude/.credentials.json + and macOS Keychain. Returns the credential dict or None. """ - params = { - "client_id": CODEX_CLIENT_ID, - "scope": CODEX_SCOPE, + try: + from agent.anthropic_adapter import ( + is_claude_code_token_valid, + read_claude_code_credentials, + ) + + creds = read_claude_code_credentials() + if creds and ( + is_claude_code_token_valid(creds) or bool(creds.get("refreshToken")) + ): + return creds + except Exception as exc: + logger.debug("Could not read Claude Code credentials: %s", exc) + return None + + +def _clear_anthropic_env_values(hermes_home: Path) -> None: + """Clear Anthropic API/setup-token env values in the active profile only. + + The .env write path already clears os.environ while holding the streaming + env lock. Keep a locked process-env clear here too so import/write failures + cannot leave or partially clear stale Anthropic fallbacks. + """ + try: + from api.providers import _write_env_file + + _write_env_file( + Path(hermes_home) / ".env", + {key: None for key in _ANTHROPIC_ENV_KEYS}, + ) + except Exception as exc: + logger.warning("Failed to clear Anthropic env values: %s", exc) + _clear_process_anthropic_env_values() + + +def _link_anthropic_credentials(hermes_home: Path) -> None: + """Link Hermes to use Claude Code's credential store. + + Clears ANTHROPIC_TOKEN and ANTHROPIC_API_KEY from the Hermes .env so + that resolve_anthropic_token() falls through to reading Claude Code's + ~/.claude/.credentials.json directly — the same thing the CLI's + ``use_anthropic_claude_code_credentials()`` does. + + Also writes a marker entry in auth.json credential_pool so that + ``_provider_oauth_authenticated("anthropic", ...)`` can detect the + linked state without touching the actual credential files. + """ + _clear_anthropic_env_values(hermes_home) + + # Write a pool marker (no secrets) so onboarding status can detect linkage. + auth_path = Path(hermes_home) / "auth.json" + auth = _read_auth_json(auth_path) + auth.setdefault("version", 1) + pool = auth.setdefault("credential_pool", {}) + if not isinstance(pool, dict): + pool = {} + auth["credential_pool"] = pool + entries = pool.setdefault("anthropic", []) + if not isinstance(entries, list): + entries = [] + pool["anthropic"] = entries + + now = _now_iso() + entry = None + for candidate in entries: + if isinstance(candidate, dict) and candidate.get("source") == "claude_code_linked": + entry = candidate + break + if entry is None: + entry = { + "id": "anthropic-claude-code-" + uuid.uuid4().hex[:12], + "label": "Claude Code (linked)", + "auth_type": "oauth", + "priority": 0, + "source": "claude_code_linked", + "created_at": now, + } + entries.insert(0, entry) + + entry.update({ + "label": "Claude Code (linked)", + "auth_type": "oauth", + "priority": 0, + "source": "claude_code_linked", + "updated_at": now, + }) + auth["updated_at"] = now + _write_auth_json(auth, auth_path) + + try: + from api.config import invalidate_credential_pool_cache + invalidate_credential_pool_cache("anthropic") + except Exception: + logger.debug("Failed to invalidate anthropic credential cache", exc_info=True) + + +def _anthropic_public_start_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + payload: dict[str, Any] = { + "ok": True, + "provider": "anthropic", + "flow_id": flow_id, + "status": flow.get("status", "pending"), + "poll_interval_seconds": flow.get("poll_interval_seconds", ANTHROPIC_CREDENTIAL_POLL_SECONDS), } - data = urllib.parse.urlencode(params).encode() - req = urllib.request.Request(CODEX_AUTH_URL, data=data, method="POST") - req.add_header("Content-Type", "application/x-www-form-urlencoded") + if flow.get("status") == "pending": + payload["action_required"] = ( + "Claude Code credentials were not found on this server. " + "Please run 'claude login' or 'claude setup-token' in a terminal " + "on the host, then return here — this page will detect the credentials automatically." + ) + if flow.get("expires_at"): + payload["expires_at"] = flow["expires_at"] + return payload + + +def _anthropic_public_status_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + payload: dict[str, Any] = { + "ok": True, + "provider": "anthropic", + "flow_id": flow_id, + "status": flow.get("status", "error"), + } + if flow.get("status") == "error" and flow.get("error"): + payload["error"] = ANTHROPIC_PUBLIC_LINK_ERROR + return payload + + +def _spawn_anthropic_credential_worker(flow_id: str) -> None: + worker = threading.Thread( + target=_run_anthropic_credential_worker, args=(flow_id,), daemon=True, + ) + worker.start() + + +def _run_anthropic_credential_worker(flow_id: str) -> None: + """Poll for Claude Code credential appearance until found, cancelled, or expired.""" + while True: + with _OAUTH_FLOWS_LOCK: + flow = dict(_OAUTH_FLOWS.get(flow_id) or {}) + if not flow: + return + if flow.get("status") != "pending": + return + if float(flow.get("expires_at") or 0) <= time.time(): + _set_flow_status(flow_id, "expired") + return + + time.sleep(max(1, int(flow.get("poll_interval_seconds") or ANTHROPIC_CREDENTIAL_POLL_SECONDS))) + + # Re-check status under lock (cancel may have arrived during sleep) + with _OAUTH_FLOWS_LOCK: + live = _OAUTH_FLOWS.get(flow_id) + if not live or live.get("status") != "pending": + return + + try: + creds = _read_claude_code_credentials() + if creds is None: + continue + + # Re-check status under lock before linking — cancel must win + with _OAUTH_FLOWS_LOCK: + current = _OAUTH_FLOWS.get(flow_id) + if not current or current.get("status") != "pending": + return + + hermes_home = Path(flow["hermes_home"]) + _link_anthropic_credentials(hermes_home) + with _OAUTH_FLOWS_LOCK: + current = _OAUTH_FLOWS.get(flow_id) + if not current or current.get("status") != "pending": + cancelled = bool(current and current.get("status") == "cancelled") + else: + current["status"] = "success" + current["updated_at"] = time.time() + _drop_sensitive_flow_fields(current) + cancelled = False + if cancelled: + _remove_anthropic_link_marker(hermes_home) + return + except Exception as exc: + logger.warning("Anthropic credential polling failed: %s", exc) + with _OAUTH_FLOWS_LOCK: + current = _OAUTH_FLOWS.get(flow_id) + if current and current.get("status") == "pending": + current["status"] = "error" + current["updated_at"] = time.time() + current["error"] = str(exc) + _drop_sensitive_flow_fields(current) + return + + +def _remove_anthropic_link_marker(hermes_home: Path) -> None: + """Remove the secret-free Claude Code linked marker after a cancelled race.""" + auth_path = Path(hermes_home) / "auth.json" + auth = _read_auth_json(auth_path) + pool = auth.get("credential_pool") + if not isinstance(pool, dict): + return + entries = pool.get("anthropic") + if not isinstance(entries, list): + return + kept = [entry for entry in entries if not (isinstance(entry, dict) and entry.get("source") == "claude_code_linked")] + if len(kept) == len(entries): + return + if kept: + pool["anthropic"] = kept + else: + pool.pop("anthropic", None) + auth["updated_at"] = _now_iso() + _write_auth_json(auth, auth_path) try: - with urllib.request.urlopen(req, timeout=15) as resp: - return json.loads(resp.read().decode()) - except Exception as e: - raise RuntimeError(f"Failed to start Codex OAuth: {e}") from e + from api.config import invalidate_credential_pool_cache + invalidate_credential_pool_cache("anthropic") + except Exception: + logger.debug("Failed to invalidate anthropic credential cache", exc_info=True) + + +# ── Codex protocol ────────────────────────────────────────────────────────── + +def _json_request(url: str, payload: dict[str, Any], *, form: bool = False) -> dict[str, Any]: + if form: + data = urllib.parse.urlencode(payload).encode("utf-8") + content_type = "application/x-www-form-urlencoded" + else: + data = json.dumps(payload).encode("utf-8") + content_type = "application/json" + req = urllib.request.Request( + url, + data=data, + method="POST", + headers={"Content-Type": content_type, "Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def _request_codex_user_code() -> dict[str, Any]: + return _json_request(CODEX_USER_CODE_URL, {"client_id": CODEX_CLIENT_ID}) + + +def _poll_codex_authorization(device_auth_id: str, user_code: str) -> dict[str, Any] | None: + try: + return _json_request( + CODEX_DEVICE_TOKEN_URL, + {"device_auth_id": device_auth_id, "user_code": user_code}, + ) + except urllib.error.HTTPError as exc: + if exc.code in (403, 404): + return None + raise + + +def _exchange_codex_authorization(authorization_code: str, code_verifier: str) -> dict[str, Any]: + return _json_request( + CODEX_TOKEN_URL, + { + "grant_type": "authorization_code", + "code": authorization_code, + "redirect_uri": CODEX_REDIRECT_URI, + "client_id": CODEX_CLIENT_ID, + "code_verifier": code_verifier, + }, + form=True, + ) + + +def _codex_public_start_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + return { + "ok": True, + "provider": "openai-codex", + "flow_id": flow_id, + "status": flow.get("status", "pending"), + "verification_uri": CODEX_VERIFICATION_URI, + "user_code": flow.get("user_code", ""), + "expires_at": flow.get("expires_at"), + "poll_interval_seconds": flow.get("poll_interval_seconds", 5), + } + + +def _codex_public_status_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + payload = { + "ok": True, + "provider": "openai-codex", + "flow_id": flow_id, + "status": flow.get("status", "error"), + } + if flow.get("status") == "error" and flow.get("error"): + payload["error"] = str(flow.get("error"))[:200] + return payload + + +def _public_start_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + provider = flow.get("provider", "openai-codex") + if provider == "anthropic": + return _anthropic_public_start_payload(flow_id, flow) + return _codex_public_start_payload(flow_id, flow) + + +def _public_status_payload(flow_id: str, flow: dict[str, Any]) -> dict[str, Any]: + provider = flow.get("provider", "openai-codex") + if provider == "anthropic": + return _anthropic_public_status_payload(flow_id, flow) + return _codex_public_status_payload(flow_id, flow) + + +def _drop_sensitive_flow_fields(flow: dict[str, Any]) -> None: + for key in ( + "device_auth_id", + "authorization_code", + "code_verifier", + "access_token", + "refresh_token", + "token_data", + ): + flow.pop(key, None) + + +def _cleanup_oauth_flows(now: float | None = None) -> None: + now = now or time.time() + cutoff = now - 300 + with _OAUTH_FLOWS_LOCK: + for fid, flow in list(_OAUTH_FLOWS.items()): + status = flow.get("status") + if status == "pending" and float(flow.get("expires_at") or 0) <= now: + flow["status"] = "expired" + _drop_sensitive_flow_fields(flow) + if status in {"success", "expired", "cancelled", "error"} and float(flow.get("updated_at") or 0) < cutoff: + _OAUTH_FLOWS.pop(fid, None) + + +def _spawn_codex_oauth_worker(flow_id: str) -> None: + worker = threading.Thread(target=_run_codex_oauth_worker, args=(flow_id,), daemon=True) + worker.start() + + +def _set_flow_status(flow_id: str, status: str, **fields: Any) -> None: + with _OAUTH_FLOWS_LOCK: + flow = _OAUTH_FLOWS.get(flow_id) + if not flow: + return + flow["status"] = status + flow["updated_at"] = time.time() + flow.update(fields) + if status in {"success", "expired", "cancelled", "error"}: + _drop_sensitive_flow_fields(flow) + + +def _run_codex_oauth_worker(flow_id: str) -> None: + while True: + with _OAUTH_FLOWS_LOCK: + flow = dict(_OAUTH_FLOWS.get(flow_id) or {}) + if not flow: + return + status = flow.get("status") + if status != "pending": + return + if float(flow.get("expires_at") or 0) <= time.time(): + _set_flow_status(flow_id, "expired") + return + + time.sleep(max(1, int(flow.get("poll_interval_seconds") or 5))) + + with _OAUTH_FLOWS_LOCK: + live = dict(_OAUTH_FLOWS.get(flow_id) or {}) + if live.get("status") != "pending": + return + try: + code_resp = _poll_codex_authorization( + str(live.get("device_auth_id") or ""), + str(live.get("user_code") or ""), + ) + if code_resp is None: + continue + authorization_code = str(code_resp.get("authorization_code") or "").strip() + code_verifier = str(code_resp.get("code_verifier") or "").strip() + if not authorization_code or not code_verifier: + raise RuntimeError("Device auth response missing authorization_code or code_verifier") + tokens = _exchange_codex_authorization(authorization_code, code_verifier) + # Re-check status under lock before persisting: a cancel/expire that + # raced with the device-token + token-exchange network calls must + # win, so we don't persist credentials the user explicitly aborted. + with _OAUTH_FLOWS_LOCK: + current = _OAUTH_FLOWS.get(flow_id) + if not current or current.get("status") != "pending": + return + _persist_codex_credentials(Path(live["hermes_home"]), tokens) + _set_flow_status(flow_id, "success") + return + except Exception as exc: + logger.warning("Codex OAuth onboarding flow failed: %s", exc) + _set_flow_status(flow_id, "error", error=str(exc)) + return + + +def _start_anthropic_flow(hermes_home: Path) -> dict[str, Any]: + """Start or immediately complete the Anthropic credential-linking flow.""" + creds = _read_claude_code_credentials() + flow_id = uuid.uuid4().hex + + if creds: + # Credentials already exist — link and return success immediately. + _link_anthropic_credentials(hermes_home) + flow = { + "provider": "anthropic", + "status": "success", + "hermes_home": str(hermes_home), + "created_at": time.time(), + "updated_at": time.time(), + } + with _OAUTH_FLOWS_LOCK: + _OAUTH_FLOWS[flow_id] = flow + return _public_start_payload(flow_id, flow) + + # No credentials found — create a pending flow that polls for them. + expires_at = time.time() + ANTHROPIC_FLOW_MAX_WAIT_SECONDS + flow = { + "provider": "anthropic", + "status": "pending", + "expires_at": expires_at, + "poll_interval_seconds": ANTHROPIC_CREDENTIAL_POLL_SECONDS, + "hermes_home": str(hermes_home), + "created_at": time.time(), + "updated_at": time.time(), + } + with _OAUTH_FLOWS_LOCK: + _OAUTH_FLOWS[flow_id] = flow + _spawn_anthropic_credential_worker(flow_id) + return _public_start_payload(flow_id, flow) + + +def start_onboarding_oauth_flow(body: dict[str, Any] | None) -> dict[str, Any]: + """Start the supported onboarding OAuth flow. + + Supports OpenAI Codex (device-code flow) and Anthropic/Claude Code + (credential-linking flow). Other providers are rejected. + """ + _cleanup_oauth_flows() + provider = str((body or {}).get("provider") or "").strip().lower() + if provider not in _ALLOWED_ONBOARDING_OAUTH_PROVIDERS: + if provider in _REJECTED_ONBOARDING_OAUTH_PROVIDERS or provider: + raise ValueError( + "Only OpenAI Codex and Anthropic/Claude OAuth are supported " + "in WebUI onboarding right now" + ) + raise ValueError("provider is required") + + # Normalize Claude aliases to canonical "anthropic" + if provider in _ANTHROPIC_PROVIDER_ALIASES: + return _start_anthropic_flow(_get_active_hermes_home()) + + # Codex flow + hermes_home = _get_active_hermes_home() + try: + device = _request_codex_user_code() + except Exception as exc: + raise RuntimeError(f"Failed to start Codex OAuth: {exc}") from exc + + user_code = str(device.get("user_code") or "").strip() + device_auth_id = str(device.get("device_auth_id") or "").strip() + if not user_code or not device_auth_id: + raise RuntimeError("Device code response missing required fields") + + interval = max(3, int(device.get("interval") or 5)) + expires_in = int(device.get("expires_in") or CODEX_FLOW_MAX_WAIT_SECONDS) + expires_at = time.time() + min(max(expires_in, 60), CODEX_FLOW_MAX_WAIT_SECONDS) + flow_id = uuid.uuid4().hex + flow = { + "provider": "openai-codex", + "status": "pending", + "device_auth_id": device_auth_id, + "user_code": user_code, + "expires_at": expires_at, + "poll_interval_seconds": interval, + "hermes_home": str(hermes_home), + "created_at": time.time(), + "updated_at": time.time(), + } + with _OAUTH_FLOWS_LOCK: + _OAUTH_FLOWS[flow_id] = flow + _spawn_codex_oauth_worker(flow_id) + return _public_start_payload(flow_id, flow) + + +def poll_onboarding_oauth_flow(flow_id: str) -> dict[str, Any]: + _cleanup_oauth_flows() + fid = str(flow_id or "").strip() + if not fid: + raise ValueError("flow_id is required") + with _OAUTH_FLOWS_LOCK: + flow = _OAUTH_FLOWS.get(fid) + if not flow: + raise KeyError("OAuth flow not found") + if flow.get("status") == "pending" and float(flow.get("expires_at") or 0) <= time.time(): + flow["status"] = "expired" + flow["updated_at"] = time.time() + _drop_sensitive_flow_fields(flow) + return _public_status_payload(fid, dict(flow)) + + +def cancel_onboarding_oauth_flow(body: dict[str, Any] | None) -> dict[str, Any]: + fid = str((body or {}).get("flow_id") or "").strip() + if not fid: + raise ValueError("flow_id is required") + requested_provider = _normalize_onboarding_oauth_provider(str((body or {}).get("provider") or "")) + if requested_provider not in {"openai-codex", "anthropic"}: + requested_provider = "openai-codex" + with _OAUTH_FLOWS_LOCK: + flow = _OAUTH_FLOWS.get(fid) + if not flow: + return {"ok": True, "provider": requested_provider, "flow_id": fid, "status": "cancelled"} + if flow.get("status") == "pending": + flow["status"] = "cancelled" + flow["updated_at"] = time.time() + _drop_sensitive_flow_fields(flow) + result = _public_status_payload(fid, dict(flow)) + return result + + +# Backward-compatible names from the abandoned spike. They intentionally do not +# expose provider device secrets to callers anymore. +def start_codex_device_code(): + return start_onboarding_oauth_flow({"provider": "openai-codex"}) def poll_codex_token(device_code, interval=5): - """Poll for Codex OAuth token. Generator that yields status dicts. - - Yields: - {"status": "polling", "attempt": N, "max_attempts": 40} - {"status": "success", "credentials": {...}} - {"status": "error", "error": "..."} - """ - params = { - "grant_type": CODEX_GRANT_TYPE_DEVICE, - "device_code": device_code, - "client_id": CODEX_CLIENT_ID, - } - data = urllib.parse.urlencode(params).encode() - max_attempts = 40 # 40 * 5 = 200s max - - for attempt in range(max_attempts): - yield {"status": "polling", "attempt": attempt + 1, "max_attempts": max_attempts} - - req = urllib.request.Request(CODEX_TOKEN_URL, data=data, method="POST") - req.add_header("Content-Type", "application/x-www-form-urlencoded") - try: - with urllib.request.urlopen(req, timeout=15) as resp: - token_data = json.loads(resp.read().decode()) - # Save to auth.json credential_pool - _save_codex_credentials(token_data) - yield {"status": "success", "credentials": { - "access_token": "***", - "refresh_token": "***", - "token_type": token_data.get("token_type"), - "expires_in": token_data.get("expires_in"), - }} - return - except urllib.error.HTTPError as e: - body = e.read().decode() - try: - err_data = json.loads(body) - error = err_data.get("error", "") - if error == "authorization_pending": - time.sleep(interval) - continue - elif error == "slow_down": - time.sleep(interval + 5) - continue - elif error == "expired_token": - yield {"status": "error", "error": "Device code expired. Please try again."} - return - else: - yield {"status": "error", "error": err_data.get("error_description", error)} - return - except Exception: - yield {"status": "error", "error": body[:200]} - return - except Exception as e: - yield {"status": "error", "error": str(e)} - return - - yield {"status": "error", "error": "OAuth flow timed out. Please try again."} - - -def _save_codex_credentials(token_data): - """Save Codex OAuth credentials to auth.json credential_pool.""" - auth = _read_auth_json() - if "credential_pool" not in auth: - auth["credential_pool"] = {} - pool = auth["credential_pool"] - - if "openai-codex" not in pool: - pool["openai-codex"] = [] - - # Check if an oauth_device entry already exists (update in place) - updated = False - _now_iso = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - for entry in pool["openai-codex"]: - if entry.get("source") == "oauth_device": - entry["access_token"] = token_data.get("access_token", "") - entry["refresh_token"] = token_data.get("refresh_token", "") - entry["auth_type"] = "oauth" - entry["updated_at"] = _now_iso - updated = True - break - - if not updated: - existing_ids = {e["id"] for e in pool.get("openai-codex", [])} - for _ in range(3): # retry on collision - cred_id = "codex-oauth-" + uuid.uuid4().hex[:8] - if cred_id not in existing_ids: - break - pool["openai-codex"].append({ - "id": cred_id, - "label": "Codex OAuth", - "auth_type": "oauth", - "source": "oauth_device", - "access_token": token_data.get("access_token", ""), - "refresh_token": token_data.get("refresh_token", ""), - "priority": 1, - "created_at": _now_iso, - }) - - auth["updated_at"] = _now_iso - _write_auth_json(auth) + yield {"status": "error", "error": "Use /api/onboarding/oauth/poll with flow_id"} diff --git a/api/onboarding.py b/api/onboarding.py index b572c542..806e4856 100644 --- a/api/onboarding.py +++ b/api/onboarding.py @@ -53,6 +53,8 @@ _SUPPORTED_PROVIDER_SETUPS = { "requires_base_url": False, "models": list(_PROVIDER_MODELS.get("anthropic", [])), "category": "easy_start", + "oauth_provider": "anthropic", + "oauth_label": "Claude Code OAuth", }, "openai": { "label": "OpenAI", @@ -137,6 +139,15 @@ _SUPPORTED_PROVIDER_SETUPS = { "models": list(_PROVIDER_MODELS.get("deepseek", [])), "category": "specialized", }, + "xiaomi": { + "label": "Xiaomi MiMo", + "env_var": "XIAOMI_API_KEY", + "default_model": "mimo-v2.5-pro", + "default_base_url": "https://api.xiaomimimo.com/v1", + "requires_base_url": False, + "models": list(_PROVIDER_MODELS.get("xiaomi", [])), + "category": "specialized", + }, "zai": { "label": "Z.AI / GLM (智谱)", "env_var": "GLM_API_KEY", @@ -185,8 +196,9 @@ _PROVIDER_CATEGORIES = [ ] _UNSUPPORTED_PROVIDER_NOTE = ( - "OAuth and advanced provider flows such as Nous Portal, OpenAI Codex, and GitHub " - "Copilot are still terminal-first. Use `hermes model` for those flows." + "Advanced provider flows such as Nous Portal and GitHub Copilot are still " + "terminal-first. OpenAI Codex and Anthropic Claude Code can be authenticated in this onboarding flow " + "when your Hermes config selects the corresponding provider." ) @@ -537,7 +549,7 @@ def _provider_api_key_present( # var names and can check os.environ for a valid key. # Exclude known OAuth/token-flow providers — those are handled separately by # _provider_oauth_authenticated() and should not be short-circuited here. - _known_oauth = {"openai-codex", "copilot", "copilot-acp", "qwen-oauth", "nous"} + _known_oauth = {"openai-codex", "copilot", "copilot-acp", "qwen-oauth", "nous", "anthropic"} if provider not in _SUPPORTED_PROVIDER_SETUPS and provider not in _known_oauth: try: from hermes_cli.auth import get_auth_status as _gas @@ -581,10 +593,11 @@ def _provider_oauth_authenticated(provider: str, hermes_home: "Path") -> bool: used by current Hermes runtime auth resolution. """ provider = (provider or "").strip().lower() + provider = {"claude": "anthropic", "claude-code": "anthropic"}.get(provider, provider) if not provider: return False - _known_oauth_providers = {"openai-codex", "copilot", "copilot-acp", "qwen-oauth", "nous"} + _known_oauth_providers = {"openai-codex", "copilot", "copilot-acp", "qwen-oauth", "nous", "anthropic"} if provider not in _known_oauth_providers: return False @@ -606,7 +619,16 @@ def _provider_oauth_authenticated(provider: str, hermes_home: "Path") -> bool: if isinstance(pool_store, dict): entries = pool_store.get(provider) if isinstance(entries, list): - return any(_oauth_payload_has_token(entry) for entry in entries) + for entry in entries: + if _oauth_payload_has_token(entry): + return True + if ( + provider == "anthropic" + and isinstance(entry, dict) + and entry.get("auth_type") == "oauth" + and entry.get("source") == "claude_code_linked" + ): + return True return False except Exception: @@ -647,6 +669,10 @@ def _status_from_runtime(cfg: dict, imports_ok: bool) -> dict: ) else: provider_ready = _provider_api_key_present(provider, cfg, env_values) + if not provider_ready and meta.get("oauth_provider"): + provider_ready = _provider_oauth_authenticated( + str(meta.get("oauth_provider")), _get_active_hermes_home() + ) else: # Unknown provider — may be an OAuth flow (openai-codex, copilot, etc.) # OR an API-key provider not in the quick-setup list (minimax-cn, deepseek, @@ -729,6 +755,8 @@ def _build_setup_catalog(cfg: dict) -> dict: "models": list(meta.get("models", [])), "category": meta.get("category", "easy_start"), "quick": meta.get("quick", False), + "oauth_provider": meta.get("oauth_provider") or "", + "oauth_label": meta.get("oauth_label") or "", } ) @@ -748,9 +776,9 @@ def _build_setup_catalog(cfg: dict) -> dict: # Flag whether the currently-configured provider is OAuth-based (not in the # API-key flow). The frontend uses this to show a confirmation card instead # of a key input when the user has already authenticated via 'hermes auth'. - current_is_oauth = current_provider not in _SUPPORTED_PROVIDER_SETUPS and bool( - current_provider - ) + current_is_oauth = ( + current_provider not in _SUPPORTED_PROVIDER_SETUPS and bool(current_provider) + ) or _provider_oauth_authenticated(current_provider, _get_active_hermes_home()) return { "providers": providers, @@ -915,11 +943,13 @@ def apply_onboarding_setup(body: dict) -> dict: if not api_key and not _provider_api_key_present(provider, cfg, env_values): # Providers that may run keyless (lmstudio, ollama, custom — gated by # `key_optional` in _SUPPORTED_PROVIDER_SETUPS) are allowed to onboard - # with no api_key. The agent runtime substitutes a placeholder - # (LMSTUDIO_NOAUTH_PLACEHOLDER) for those, and the probe (#1499) gives - # the user immediate feedback if their server actually does require - # auth (http_4xx with status 401). See #1499 third sub-bug from #1420. - if not provider_meta.get("key_optional"): + # with no api_key. OAuth-capable wizard providers (currently Anthropic + # via Claude Code) are also allowed once their server-side OAuth/link + # marker is present. + oauth_ready = bool(provider_meta.get("oauth_provider")) and _provider_oauth_authenticated( + str(provider_meta.get("oauth_provider")), _get_active_hermes_home() + ) + if not provider_meta.get("key_optional") and not oauth_ready: raise ValueError(f"{provider_meta['env_var']} is required") model_cfg = cfg.get("model", {}) diff --git a/api/profiles.py b/api/profiles.py index fc94a336..9af9bcba 100644 --- a/api/profiles.py +++ b/api/profiles.py @@ -37,6 +37,13 @@ _loaded_profile_env_keys: set[str] = set() # process-global _active_profile. _tls = threading.local() +def _unwrap_profile_home_to_base(home: Path) -> Path: + """Return the base Hermes home when *home* is already a named profile dir.""" + if home.parent.name == 'profiles': + return home.parent.parent + return home + + def _resolve_base_hermes_home() -> Path: """Return the BASE ~/.hermes directory — the root that contains profiles/. @@ -56,20 +63,22 @@ def _resolve_base_hermes_home() -> Path: reading it here would make _DEFAULT_HERMES_HOME point to that subdir, causing switch_profile('webui') to look for /home/user/.hermes/profiles/webui/profiles/webui — which doesn't exist. + + HERMES_BASE_HOME normally points at the base home already, but isolated + single-profile WebUI deployments can provide /base/profiles/ there as + well. Normalize both env vars through the same helper so active-profile + and per-request resolution share one base-root contract (#749). """ # Explicit override for tests or unusual setups base_override = os.getenv('HERMES_BASE_HOME', '').strip() if base_override: - return Path(base_override).expanduser() + return _unwrap_profile_home_to_base(Path(base_override).expanduser()) hermes_home = os.getenv('HERMES_HOME', '').strip() if hermes_home: p = Path(hermes_home).expanduser() # If HERMES_HOME points to a profiles/ subdir, walk up two levels to the base - if p.parent.name == 'profiles': - return p.parent.parent - # Otherwise trust it (e.g. test isolation sets HERMES_HOME to TEST_STATE_DIR) - return p + return _unwrap_profile_home_to_base(p) return Path.home() / '.hermes' @@ -91,6 +100,103 @@ def _read_active_profile_file() -> str: # ── Public API ────────────────────────────────────────────────────────────── +# ── Root-profile resolution (#1612) ──────────────────────────────────────── +# +# Hermes Agent allows the root/default profile (~/.hermes itself) to have a +# display name other than the legacy literal 'default'. When that happens, +# WebUI must NOT resolve the display name as ~/.hermes/profiles/ — that +# directory doesn't exist, and every site that does `if name == 'default':` +# will fall through to the wrong filesystem path. +# +# `_is_root_profile(name)` answers "does this name resolve to ~/.hermes?" and +# is the canonical replacement for scattered `if name == 'default':` checks +# in switch_profile, get_active_hermes_home, _validate_profile_name, etc. +# +# Cost note: list_profiles_api() shells out via hermes_cli (non-trivial), so +# we memoize the lookup. The cache is invalidated whenever profiles are +# created, deleted, renamed, or cloned — i.e. on every mutation site we +# control. +_root_profile_name_cache: set[str] = {'default'} +_root_profile_name_cache_lock = threading.Lock() +_root_profile_name_cache_loaded = False + + +def _invalidate_root_profile_cache() -> None: + """Drop the memoized root-profile-name set. + + Called whenever profile metadata might have changed: create, clone, + delete, rename. The next _is_root_profile() call repopulates from + list_profiles_api(). + """ + global _root_profile_name_cache_loaded + with _root_profile_name_cache_lock: + _root_profile_name_cache.clear() + _root_profile_name_cache.add('default') + _root_profile_name_cache_loaded = False + + +def _is_root_profile(name: str) -> bool: + """True if *name* resolves to the Hermes Agent root profile (~/.hermes). + + Matches the legacy 'default' alias plus any name where list_profiles_api() + reports is_default=True. Memoized; call _invalidate_root_profile_cache() + after mutating profile metadata. + """ + global _root_profile_name_cache_loaded + if not name: + return False + if name == 'default': + return True + with _root_profile_name_cache_lock: + if _root_profile_name_cache_loaded: + return name in _root_profile_name_cache + # Cache miss — populate from list_profiles_api(). Done outside the lock to + # avoid holding it across a hermes_cli subprocess call. + try: + infos = list_profiles_api() + except Exception: + logger.debug("Failed to list profiles for root-profile lookup", exc_info=True) + return False + with _root_profile_name_cache_lock: + _root_profile_name_cache.clear() + _root_profile_name_cache.add('default') + for p in infos: + try: + if p.get('is_default') and p.get('name'): + _root_profile_name_cache.add(p['name']) + except (AttributeError, TypeError): + continue + _root_profile_name_cache_loaded = True + return name in _root_profile_name_cache + + +def _profiles_match(row_profile, active_profile) -> bool: + """Return True if a session/project row's profile matches the active profile. + + Treats both the literal alias 'default' and any renamed-root display name + (per _is_root_profile) as equivalent, so legacy rows tagged 'default' + still surface when the user has renamed the root profile to e.g. 'kinni', + and vice versa. + + A row with no profile (`None` or empty string) is treated as belonging to + the root profile — that's the convention used by the legacy backfill at + api/models.py::all_sessions, and matches the default seen in + `static/sessions.js` (`S.activeProfile||'default'`). + + Originally lived in api/routes.py; relocated here so both routes.py and + out-of-process consumers (mcp_server.py) can import the canonical helper + instead of duplicating the body. See #1614 for the visibility model. + """ + row = row_profile or 'default' + active = active_profile or 'default' + if row == active: + return True + # Cross-alias the renamed root. + if _is_root_profile(row) and _is_root_profile(active): + return True + return False + + def get_active_profile_name() -> str: """Return the currently active profile name. @@ -123,22 +229,287 @@ def clear_request_profile() -> None: _tls.profile = None +def _resolve_profile_home_for_name(name: str) -> Path: + """Resolve a logical profile name to its Hermes home path. + + Root/default aliases resolve to _DEFAULT_HERMES_HOME. Valid named profiles + resolve to _DEFAULT_HERMES_HOME/profiles/ even when the directory has + not been created yet; the agent layer may create it on first use. Invalid + names fall back to the base home so traversal-shaped cookie values cannot + influence filesystem paths. + """ + if not name or _is_root_profile(name): + return _DEFAULT_HERMES_HOME + if not _PROFILE_ID_RE.fullmatch(name): + return _DEFAULT_HERMES_HOME + return _resolve_named_profile_home(name) + + def get_active_hermes_home() -> Path: """Return the HERMES_HOME path for the currently active profile. Uses get_active_profile_name() so per-request TLS context (issue #798) is respected, not just the process-level global. """ - name = get_active_profile_name() - if name == 'default': - return _DEFAULT_HERMES_HOME - profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name - if profile_dir.is_dir(): - return profile_dir - return _DEFAULT_HERMES_HOME + return _resolve_profile_home_for_name(get_active_profile_name()) +# ── Cron-call profile isolation (issue: Scheduled jobs ignored active profile) ─ +# `cron.jobs` reads HERMES_HOME from os.environ (process-global) at function- +# call time. That bypasses our per-request thread-local profile, so the +# `/api/crons*` endpoints always returned the process-default profile's jobs. +# This context manager swaps HERMES_HOME (and the cached module-level constants +# in cron.jobs) for the duration of a cron call, serialized by a lock so +# concurrent requests from different profiles don't race on the global env var. +# +# Thread-safety note on os.environ mutation: +# CPython's os.environ assignment is GIL-protected at the bytecode level, but +# multi-step read-modify-write sequences (snapshot prev → assign new → restore +# on exit) are NOT atomic without explicit serialization. The _cron_env_lock +# below makes the entire context-manager body run-to-completion serially, so +# all webui access to HERMES_HOME goes through one thread at a time. Any +# subprocess.Popen() call inside `run_job` inherits the env at fork time, +# which is also under the lock — so child processes always see a consistent +# (own-profile) HERMES_HOME, never a half-swapped state. +_cron_env_lock = threading.Lock() + + +def _cron_profile_context_depth() -> int: + return int(getattr(_tls, 'cron_profile_depth', 0) or 0) + + +def _push_cron_profile_context_depth() -> None: + _tls.cron_profile_depth = _cron_profile_context_depth() + 1 + + +def _pop_cron_profile_context_depth() -> None: + depth = _cron_profile_context_depth() + _tls.cron_profile_depth = max(0, depth - 1) + + +def _home_for_scheduled_cron_job(job: dict) -> Path: + """Resolve the profile home an auto-fired scheduler job should execute in. + + Legacy jobs with no profile keep the scheduler's server-default profile. + Jobs pinned to a named profile execute under that profile's HERMES_HOME, so + an in-process WebUI scheduler thread does not leak process-global config or + .env into the agent run. If a profile was deleted after the job was saved, + fall back to the server default rather than crashing every scheduler tick. + """ + raw = str((job or {}).get('profile') or '').strip() + if not raw: + return get_active_hermes_home() + if _is_root_profile(raw): + return _DEFAULT_HERMES_HOME + if not _PROFILE_ID_RE.fullmatch(raw): + logger.warning( + "Cron job %s has invalid profile %r; falling back to server default", + (job or {}).get('id', '?'), raw, + ) + return get_active_hermes_home() + home = _resolve_named_profile_home(raw) + if not home.is_dir(): + logger.warning( + "Cron job %s references missing profile %r; falling back to server default", + (job or {}).get('id', '?'), raw, + ) + return get_active_hermes_home() + return home + + +def install_cron_scheduler_profile_isolation() -> None: + """Patch cron.scheduler.run_job for WebUI in-process scheduler safety. + + Standard WebUI deployments do not start the scheduler thread in-process, but + if a future/single-process deployment calls cron.scheduler.tick() from the + WebUI worker, tick's background job path has no request TLS context. Wrap + run_job so each auto-fired job's persisted ``profile`` field gets the same + HERMES_HOME isolation as the manual /api/crons/run path. + """ + try: + import cron.scheduler as _cs + except ImportError: + logger.debug("install_cron_scheduler_profile_isolation: cron.scheduler unavailable") + return + + original = getattr(_cs, 'run_job', None) + if original is None or getattr(original, '_webui_profile_isolated', False): + return + + def _webui_profile_isolated_run_job(job, *args, **kwargs): + # Manual WebUI runs already enter cron_profile_context_for_home before + # calling run_job. Avoid nesting the non-reentrant env lock or changing + # the explicitly selected manual execution profile. + if _cron_profile_context_depth() > 0: + return original(job, *args, **kwargs) + with cron_profile_context_for_home(_home_for_scheduled_cron_job(job)): + return original(job, *args, **kwargs) + + _webui_profile_isolated_run_job._webui_profile_isolated = True + _webui_profile_isolated_run_job._webui_original_run_job = original + _cs.run_job = _webui_profile_isolated_run_job + + +class cron_profile_context_for_home: + """Context manager that pins HERMES_HOME to an explicit profile home path. + + Use this variant from worker threads that don't have TLS context (e.g. the + background thread started by /api/crons/run). The HTTP-side variant below + resolves the home via TLS. + """ + + def __init__(self, home: Path): + self._home = Path(home) + + def __enter__(self): + _cron_env_lock.acquire() + _push_cron_profile_context_depth() + try: + self._prev_env = os.environ.get('HERMES_HOME') + os.environ['HERMES_HOME'] = str(self._home) + + # Re-patch cron.jobs module-level constants (see main context manager + # below for the rationale). + self._prev_cj = None + try: + import cron.jobs as _cj + self._prev_cj = (_cj.HERMES_DIR, _cj.CRON_DIR, _cj.JOBS_FILE, _cj.OUTPUT_DIR) + _cj.HERMES_DIR = self._home + _cj.CRON_DIR = self._home / 'cron' + _cj.JOBS_FILE = _cj.CRON_DIR / 'jobs.json' + _cj.OUTPUT_DIR = _cj.CRON_DIR / 'output' + except (ImportError, AttributeError): + logger.debug("cron_profile_context_for_home: cron.jobs unavailable") + + # cron.scheduler snapshots _hermes_home at import time and run_job() + # reads config/.env from that module global. Patch it alongside + # cron.jobs so manual WebUI runs actually execute under the selected + # profile, not merely write output metadata there (#617). + self._prev_cs = None + try: + import cron.scheduler as _cs + self._prev_cs = ( + getattr(_cs, '_hermes_home', None), + getattr(_cs, '_LOCK_DIR', None), + getattr(_cs, '_LOCK_FILE', None), + ) + _cs._hermes_home = self._home + _cs._LOCK_DIR = self._home / 'cron' + _cs._LOCK_FILE = _cs._LOCK_DIR / '.tick.lock' + except (ImportError, AttributeError): + logger.debug("cron_profile_context_for_home: cron.scheduler unavailable") + except Exception: + _pop_cron_profile_context_depth() + _cron_env_lock.release() + raise + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + if self._prev_env is None: + os.environ.pop('HERMES_HOME', None) + else: + os.environ['HERMES_HOME'] = self._prev_env + if self._prev_cj is not None: + try: + import cron.jobs as _cj + _cj.HERMES_DIR, _cj.CRON_DIR, _cj.JOBS_FILE, _cj.OUTPUT_DIR = self._prev_cj + except (ImportError, AttributeError): + pass + if getattr(self, '_prev_cs', None) is not None: + try: + import cron.scheduler as _cs + _cs._hermes_home, _cs._LOCK_DIR, _cs._LOCK_FILE = self._prev_cs + except (ImportError, AttributeError): + pass + finally: + _pop_cron_profile_context_depth() + _cron_env_lock.release() + return False + + +class cron_profile_context: + """Context manager that pins HERMES_HOME to the TLS-active profile. + + Usage: + with cron_profile_context(): + from cron.jobs import list_jobs + jobs = list_jobs(include_disabled=True) + + Serializes cron API calls across profiles (cron API is low-frequency; + serialization cost is negligible compared to correctness). + """ + + def __enter__(self): + _cron_env_lock.acquire() + _push_cron_profile_context_depth() + try: + self._prev_env = os.environ.get('HERMES_HOME') + home = get_active_hermes_home() + os.environ['HERMES_HOME'] = str(home) + + # Re-patch cron.jobs module-level constants. They are snapshot at + # import time (line 68-71 of cron/jobs.py) and don't participate in + # the module's __getattr__ lazy path, so env-var alone is not enough + # for callers that reference the module constants directly. + self._prev_cj = None + try: + import cron.jobs as _cj + self._prev_cj = (_cj.HERMES_DIR, _cj.CRON_DIR, _cj.JOBS_FILE, _cj.OUTPUT_DIR) + _cj.HERMES_DIR = home + _cj.CRON_DIR = home / 'cron' + _cj.JOBS_FILE = _cj.CRON_DIR / 'jobs.json' + _cj.OUTPUT_DIR = _cj.CRON_DIR / 'output' + except (ImportError, AttributeError): + logger.debug("cron_profile_context: cron.jobs unavailable; env-var only") + + self._prev_cs = None + try: + import cron.scheduler as _cs + self._prev_cs = ( + getattr(_cs, '_hermes_home', None), + getattr(_cs, '_LOCK_DIR', None), + getattr(_cs, '_LOCK_FILE', None), + ) + _cs._hermes_home = home + _cs._LOCK_DIR = home / 'cron' + _cs._LOCK_FILE = _cs._LOCK_DIR / '.tick.lock' + except (ImportError, AttributeError): + logger.debug("cron_profile_context: cron.scheduler unavailable; env-var only") + except Exception: + _pop_cron_profile_context_depth() + _cron_env_lock.release() + raise + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + # Restore env var + if self._prev_env is None: + os.environ.pop('HERMES_HOME', None) + else: + os.environ['HERMES_HOME'] = self._prev_env + + # Restore cron.jobs module constants + if self._prev_cj is not None: + try: + import cron.jobs as _cj + _cj.HERMES_DIR, _cj.CRON_DIR, _cj.JOBS_FILE, _cj.OUTPUT_DIR = self._prev_cj + except (ImportError, AttributeError): + pass + if getattr(self, '_prev_cs', None) is not None: + try: + import cron.scheduler as _cs + _cs._hermes_home, _cs._LOCK_DIR, _cs._LOCK_FILE = self._prev_cs + except (ImportError, AttributeError): + pass + finally: + _pop_cron_profile_context_depth() + _cron_env_lock.release() + return False + + def get_hermes_home_for_profile(name: str) -> Path: """Return the HERMES_HOME Path for *name* without mutating any process state. @@ -150,10 +521,7 @@ def get_hermes_home_for_profile(name: str) -> Path: empty, 'default', or does not match the profile-name format (rejects path traversal such as '../../etc'). """ - if not name or name == 'default' or not _PROFILE_ID_RE.fullmatch(name): - return _DEFAULT_HERMES_HOME - profile_dir = _DEFAULT_HERMES_HOME / 'profiles' / name - return profile_dir + return _resolve_profile_home_for_name(name) _TERMINAL_ENV_MAPPINGS = { @@ -261,6 +629,14 @@ def _set_hermes_home(home: Path): except (ImportError, AttributeError): logger.debug("Failed to patch cron.jobs module") + try: + import cron.scheduler as _cs + _cs._hermes_home = home + _cs._LOCK_DIR = home / 'cron' + _cs._LOCK_FILE = _cs._LOCK_DIR / '.tick.lock' + except (ImportError, AttributeError): + logger.debug("Failed to patch cron.scheduler module") + def _reload_dotenv(home: Path): """Load .env from the profile dir into os.environ with profile isolation. @@ -306,6 +682,7 @@ def init_profile_state() -> None: _active_profile = _read_active_profile_file() home = get_active_hermes_home() _set_hermes_home(home) + install_cron_scheduler_profile_isolation() _reload_dotenv(home) @@ -329,16 +706,21 @@ def switch_profile(name: str, *, process_wide: bool = True) -> dict: # Import here to avoid circular import at module load from api.config import STREAMS, STREAMS_LOCK, reload_config - # Block if agent is running - with STREAMS_LOCK: - if len(STREAMS) > 0: - raise RuntimeError( - 'Cannot switch profiles while an agent is running. ' - 'Cancel or wait for it to finish.' - ) + # Process-wide profile switches mutate HERMES_HOME, module-level path caches, + # os.environ-backed .env keys, and the global config cache. Keep those blocked + # while any agent stream is active. Per-client WebUI switches are cookie/TLS + # scoped (process_wide=False) and do not mutate those globals, so users can + # leave a running session in one profile and start work in another (#1700). + if process_wide: + with STREAMS_LOCK: + if len(STREAMS) > 0: + raise RuntimeError( + 'Cannot switch profiles while an agent is running. ' + 'Cancel or wait for it to finish.' + ) # Resolve profile directory - if name == 'default': + if _is_root_profile(name): home = _DEFAULT_HERMES_HOME else: home = _resolve_named_profile_home(name) @@ -356,7 +738,7 @@ def switch_profile(name: str, *, process_wide: bool = True) -> dict: # Write sticky default for CLI consistency try: ap_file = _DEFAULT_HERMES_HOME / 'active_profile' - ap_file.write_text(name if name != 'default' else '', encoding='utf-8') + ap_file.write_text('' if _is_root_profile(name) else name, encoding='utf-8') except Exception: logger.debug("Failed to write active profile file") @@ -526,7 +908,7 @@ def _create_profile_fallback(name: str, clone_from: str = None, # Clone config files from source profile if requested if clone_config and clone_from: - if clone_from == 'default': + if _is_root_profile(clone_from): source_dir = _DEFAULT_HERMES_HOME else: source_dir = _DEFAULT_HERMES_HOME / 'profiles' / clone_from @@ -575,7 +957,7 @@ def create_profile_api(name: str, clone_from: str = None, _validate_profile_name(name) # Defense-in-depth: validate clone_from here too, even though routes.py # also validates it. Any caller that bypasses the HTTP layer gets protection. - if clone_from is not None and clone_from != 'default': + if clone_from is not None and not _is_root_profile(clone_from): _validate_profile_name(clone_from) try: @@ -606,6 +988,10 @@ def create_profile_api(name: str, clone_from: str = None, profile_path.mkdir(parents=True, exist_ok=True) _write_endpoint_to_config(profile_path, base_url=base_url, api_key=api_key) + # Invalidate cached root-profile-name lookup; create_profile may have added + # a new profile that flips is_default semantics on the agent side (#1612). + _invalidate_root_profile_cache() + # Find and return the newly created profile info. # When hermes_cli is not importable, list_profiles_api() also falls back # to the stub default-only list and won't find the new profile by name. @@ -628,7 +1014,7 @@ def create_profile_api(name: str, clone_from: str = None, def delete_profile_api(name: str) -> dict: """Delete a profile. Switches to default first if it's the active one.""" - if name == 'default': + if _is_root_profile(name): raise ValueError("Cannot delete the default profile.") _validate_profile_name(name) @@ -654,4 +1040,6 @@ def delete_profile_api(name: str) -> dict: else: raise ValueError(f"Profile '{name}' does not exist.") + # Drop cached root-profile-name lookup — list_profiles_api() shape changed. + _invalidate_root_profile_cache() return {'ok': True, 'name': name} diff --git a/api/providers.py b/api/providers.py index 4226aa1f..a1291c20 100644 --- a/api/providers.py +++ b/api/providers.py @@ -7,15 +7,27 @@ multi-provider support). from __future__ import annotations +import json import logging import os +import signal +import subprocess +import sys +import threading +import urllib.error +import urllib.request +from datetime import datetime, timezone from pathlib import Path +from types import SimpleNamespace from typing import Any from api.config import ( _PROVIDER_DISPLAY, _PROVIDER_MODELS, - _get_config_path, + _get_label_for_model, + _models_from_live_provider_ids, + _read_live_provider_model_ids, + _read_visible_codex_cache_model_ids, _save_yaml_config_file, get_config, invalidate_models_cache, @@ -24,6 +36,126 @@ from api.config import ( logger = logging.getLogger(__name__) +_OPENROUTER_KEY_URL = "https://openrouter.ai/api/v1/key" +_PROVIDER_QUOTA_TIMEOUT_SECONDS = 3.0 +_ACCOUNT_USAGE_SUBPROCESS_TIMEOUT_SECONDS = 35.0 +_ACCOUNT_USAGE_PROVIDERS = frozenset({"openai-codex", "anthropic"}) + +# Upper bound on simultaneous profile-isolated quota probe subprocesses. +# Each probe runs a Python child for up to 35 s; capping concurrency prevents +# resource exhaustion when the UI polls all providers rapidly. The limit is +# deliberately low (2) since _ACCOUNT_USAGE_SUBPROCESS_TIMEOUT_SECONDS is +# already 35 s and probe I/O is lightweight HTTP calls. +_MAX_CONCURRENT_ACCOUNT_USAGE_PROBES = 2 + +# Parent-death-signal setup: on Linux, arrange for the quota-probe child to +# receive SIGTERM when the WebUI parent dies (e.g. systemctl restart, OOM kill). +# This prevents probe children from becoming orphaned zombies that continue +# calling the provider API indefinitely after the WebUI process is gone. +# We use prctl(PR_SET_DEATHSIG, SIGTERM) which is standard on modern Linux +# kernels and available via ctypes (no external C extension needed). +# If prctl is unavailable (non-Linux, or Linux without prctl support), the +# probe child exits normally when its parent (WebUI) terminates -- on macOS/ +# Windows this is handled by OS-level process tree cleanup. +# Portable parent-death-signal bootstrap. On Linux this arranges for the +# probe child to receive SIGTERM when the WebUI parent dies (systemctl +# restart, OOM kill, etc.), preventing orphaned zombie probes from continuing +# to call the provider API indefinitely. Non-Linux platforms (macOS, Windows) +# rely on OS-level process-tree cleanup instead; this variable is then unused. +# prctl(PR_SET_DEATHSIG, SIGTERM) is available via ctypes without any C +# extension — the same technique used throughout the Hermes codebase. +_ACCOUNT_USAGE_PARENT_DEATHSIG_BOOTSTRAP = ( + # fmt: off + # Lines are written as string literals so this block passes + # `python3 -m py_compile` cleanly and is safe to include verbatim + # inside the single argument string passed to `python -c ...`. + 'import sys\n' + 'try:\n' + ' import ctypes, signal\n' + ' libc = ctypes.CDLL(None)\n' + ' libc.prctl(1, signal.SIGTERM) # PR_SET_DEATHSIG=1, SIGTERM=15\n' + 'except Exception:\n' + ' pass\n' + # fmt: on +) + + +# Module-level cap on concurrent quota-probe subprocesses. +# Lazily created so this module compiles even when threading isn't ready. +_account_usage_probe_semaphore: threading.BoundedSemaphore | None = None + + +def _get_account_usage_probe_semaphore() -> threading.BoundedSemaphore: + global _account_usage_probe_semaphore + if _account_usage_probe_semaphore is None: + _account_usage_probe_semaphore = threading.BoundedSemaphore( + _MAX_CONCURRENT_ACCOUNT_USAGE_PROBES + ) + return _account_usage_probe_semaphore + + +# ── preexec_fn: parent-death signal for the probe subprocess ───────────────── +# On POSIX/Linux, arrange for the child to receive SIGTERM when the WebUI +# parent dies (systemctl restart, OOM kill, etc.). The parent's bootstrap +# code (_ACCOUNT_USAGE_PARENT_DEATHSIG_BOOTSTRAP) also covers the grandchild +# fork inside the child, but this preexec_fn handles the direct child-process +# case. Returns None on non-POSIX or when prctl is unavailable so that +# subprocess.run() works on Windows/macOS without changes. +def _account_usage_preexec_fn() -> None: + try: + import ctypes + libc = ctypes.CDLL(None) + libc.prctl(1, signal.SIGTERM) # PR_SET_PDEATHSIG=1, SIGTERM=15 + except Exception: + pass + + +_ACCOUNT_USAGE_SUBPROCESS_CODE = r""" +import json +import sys + +from agent.account_usage import fetch_account_usage + + +def _iso(value): + if value in (None, ""): + return None + if hasattr(value, "isoformat"): + text = value.isoformat() + return text.replace("+00:00", "Z") + text = str(value).strip() + return text or None + + +def _snapshot_payload(snapshot): + if snapshot is None: + return None + windows = [] + for window in getattr(snapshot, "windows", ()) or (): + windows.append({ + "label": str(getattr(window, "label", "") or ""), + "used_percent": getattr(window, "used_percent", None), + "reset_at": _iso(getattr(window, "reset_at", None)), + "detail": getattr(window, "detail", None), + }) + return { + "provider": str(getattr(snapshot, "provider", "") or ""), + "source": str(getattr(snapshot, "source", "") or ""), + "title": str(getattr(snapshot, "title", "") or ""), + "plan": getattr(snapshot, "plan", None), + "windows": windows, + "details": list(getattr(snapshot, "details", ()) or ()), + "available": bool(getattr(snapshot, "available", bool(windows))), + "unavailable_reason": getattr(snapshot, "unavailable_reason", None), + "fetched_at": _iso(getattr(snapshot, "fetched_at", None)), + } + + +provider = sys.argv[1] +api_key = sys.argv[2] or None +print(json.dumps(_snapshot_payload(fetch_account_usage(provider, api_key=api_key)))) +""" + # SECTION: Provider ↔ env var mapping # Maps canonical provider slug → env var name for API key. @@ -42,6 +174,7 @@ _PROVIDER_ENV_VAR: dict[str, str] = { "minimax-cn": "MINIMAX_CN_API_KEY", "mistralai": "MISTRAL_API_KEY", "x-ai": "XAI_API_KEY", + "xiaomi": "XIAOMI_API_KEY", "opencode-zen": "OPENCODE_ZEN_API_KEY", "opencode-go": "OPENCODE_GO_API_KEY", # NOTE: bare "ollama" (local) deliberately omitted — local Ollama is keyless @@ -269,6 +402,411 @@ def _provider_has_key(provider_id: str) -> bool: return False +def _get_provider_api_key(provider_id: str) -> str | None: + """Return a configured provider API key without exposing it to callers.""" + provider_id = (provider_id or "").strip().lower() + env_var = _PROVIDER_ENV_VAR.get(provider_id) + if env_var: + env_path = _get_hermes_home() / ".env" + env_values = _load_env_file(env_path) + if env_values.get(env_var): + return str(env_values[env_var]).strip() or None + if os.getenv(env_var): + return os.getenv(env_var, "").strip() or None + for alias in _PROVIDER_ENV_VAR_ALIASES.get(provider_id, ()) or (): + if env_values.get(alias): + return str(env_values[alias]).strip() or None + if os.getenv(alias): + return os.getenv(alias, "").strip() or None + + cfg = get_config() + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, dict): + active_provider = str(model_cfg.get("provider") or "").strip().lower() + model_key = str(model_cfg.get("api_key") or "").strip() + if model_key and active_provider == provider_id: + return model_key + + providers_cfg = cfg.get("providers", {}) + if isinstance(providers_cfg, dict): + provider_cfg = providers_cfg.get(provider_id, {}) + if isinstance(provider_cfg, dict): + provider_key = str(provider_cfg.get("api_key") or "").strip() + if provider_key: + return provider_key + + custom_providers = cfg.get("custom_providers", []) + if isinstance(custom_providers, list): + for cp in custom_providers: + if not isinstance(cp, dict): + continue + cp_name = str(cp.get("name") or "").strip().lower().replace(" ", "-") + if f"custom:{cp_name}" == provider_id or str(cp.get("name", "")).strip().lower() == provider_id: + cp_key = str(cp.get("api_key") or "").strip() + if cp_key.startswith("${") and cp_key.endswith("}"): + return os.getenv(cp_key[2:-1], "").strip() or None + if cp_key: + return cp_key + return None + + +def _active_provider_id() -> str | None: + cfg = get_config() + model_cfg = cfg.get("model", {}) + if not isinstance(model_cfg, dict): + return None + provider = str(model_cfg.get("provider") or "").strip().lower() + return provider or None + + +def _quota_number(value: Any) -> int | float | None: + if isinstance(value, bool) or value is None: + return None + if isinstance(value, (int, float)): + return value + try: + text = str(value).strip() + if not text: + return None + number = float(text) + return int(number) if number.is_integer() else number + except (TypeError, ValueError): + return None + + +def _sanitize_openrouter_quota(payload: Any) -> dict[str, int | float | None]: + if isinstance(payload, dict) and isinstance(payload.get("data"), dict): + payload = payload["data"] + if not isinstance(payload, dict): + payload = {} + return { + "limit_remaining": _quota_number(payload.get("limit_remaining")), + "usage": _quota_number(payload.get("usage")), + "limit": _quota_number(payload.get("limit")), + } + + +def _isoformat_utc(value: Any) -> str | None: + if value in (None, ""): + return None + if isinstance(value, datetime): + dt = value if value.tzinfo else value.replace(tzinfo=timezone.utc) + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + text = str(value).strip() + return text or None + + +def _serialize_account_usage_snapshot(snapshot: Any) -> dict[str, Any] | None: + if snapshot is None: + return None + windows: list[dict[str, Any]] = [] + for window in getattr(snapshot, "windows", ()) or (): + label = str(getattr(window, "label", "") or "").strip() + if not label: + continue + used_percent = _quota_number(getattr(window, "used_percent", None)) + remaining_percent = None + if used_percent is not None: + remaining_percent = max(0.0, min(100.0, 100.0 - float(used_percent))) + windows.append({ + "label": label, + "used_percent": used_percent, + "remaining_percent": remaining_percent, + "reset_at": _isoformat_utc(getattr(window, "reset_at", None)), + "detail": str(getattr(window, "detail", "") or "").strip() or None, + }) + + details = [ + str(detail).strip() + for detail in (getattr(snapshot, "details", ()) or ()) + if str(detail).strip() + ] + plan = str(getattr(snapshot, "plan", "") or "").strip() or None + unavailable_reason = str(getattr(snapshot, "unavailable_reason", "") or "").strip() or None + return { + "provider": str(getattr(snapshot, "provider", "") or "").strip() or None, + "source": str(getattr(snapshot, "source", "") or "").strip() or None, + "title": str(getattr(snapshot, "title", "") or "").strip() or "Account limits", + "plan": plan, + "windows": windows, + "details": details, + "available": bool(getattr(snapshot, "available", bool(windows or details))) and not unavailable_reason, + "unavailable_reason": unavailable_reason, + "fetched_at": _isoformat_utc(getattr(snapshot, "fetched_at", None)), + } + + +def _agent_fetch_account_usage(provider: str, *, base_url: str | None = None, api_key: str | None = None) -> Any: + from agent.account_usage import fetch_account_usage + + return fetch_account_usage(provider, base_url=base_url, api_key=api_key) + + +def _account_usage_subprocess_env(home: Path, provider: str, api_key: str | None) -> dict[str, str]: + env = dict(os.environ) + env["HERMES_HOME"] = str(Path(home)) + + # Profile .env values should affect only the child quota probe, not the + # WebUI process-global environment. This is especially important for + # Anthropic account usage, where the agent resolver reads OAuth/API tokens + # from environment variables. + for key, value in _load_env_file(Path(home) / ".env").items(): + if value: + env[key] = value + + env_var = _PROVIDER_ENV_VAR.get((provider or "").strip().lower()) + if env_var and api_key: + env[env_var] = api_key + + try: + from api.config import _AGENT_DIR + except Exception: + _AGENT_DIR = None + pythonpath_parts: list[str] = [] + if _AGENT_DIR: + pythonpath_parts.append(str(_AGENT_DIR)) + existing_pythonpath = env.get("PYTHONPATH", "") + if existing_pythonpath: + pythonpath_parts.append(existing_pythonpath) + if pythonpath_parts: + env["PYTHONPATH"] = os.pathsep.join(pythonpath_parts) + return env + + +def _account_usage_payload_to_snapshot(payload: Any) -> Any: + if not isinstance(payload, dict): + return None + windows = tuple( + SimpleNamespace( + label=window.get("label"), + used_percent=window.get("used_percent"), + reset_at=window.get("reset_at"), + detail=window.get("detail"), + ) + for window in (payload.get("windows") or ()) + if isinstance(window, dict) + ) + return SimpleNamespace( + provider=payload.get("provider"), + source=payload.get("source"), + title=payload.get("title"), + plan=payload.get("plan"), + windows=windows, + details=tuple(payload.get("details") or ()), + available=bool(payload.get("available")), + unavailable_reason=payload.get("unavailable_reason"), + fetched_at=payload.get("fetched_at"), + ) + + +def _agent_fetch_account_usage_for_home(provider: str, home: Path, *, api_key: str | None = None) -> Any: + try: + from api.config import PYTHON_EXE + except Exception: + PYTHON_EXE = sys.executable or "python3" + + try: + # On POSIX (Linux/macOS), wire parent-death signal so the child dies + # cleanly if the WebUI parent terminates. preexec_fn is not safe on + # Windows, where OS-level process-tree cleanup handles child orphans. + kwargs: dict[str, Any] = { + "stdin": subprocess.DEVNULL, + "stdout": subprocess.PIPE, + "stderr": subprocess.PIPE, + "text": True, + "timeout": _ACCOUNT_USAGE_SUBPROCESS_TIMEOUT_SECONDS, + "check": False, + } + if hasattr(os, "fork"): # POSIX + kwargs["preexec_fn"] = _account_usage_preexec_fn + + proc = subprocess.run( + [ + PYTHON_EXE, "-c", + _ACCOUNT_USAGE_PARENT_DEATHSIG_BOOTSTRAP + _ACCOUNT_USAGE_SUBPROCESS_CODE, + provider, + api_key or "", + ], + env=_account_usage_subprocess_env(home, provider, api_key), + **kwargs, + ) + except subprocess.TimeoutExpired: + logger.debug("Account usage probe for %s timed out", provider) + return None + except Exception: + logger.debug("Account usage probe for %s failed to launch", provider, exc_info=True) + return None + + if proc.returncode != 0: + logger.debug("Account usage probe for %s exited with status %s", provider, proc.returncode) + return None + try: + payload = json.loads((proc.stdout or "").strip() or "null") + except json.JSONDecodeError: + logger.debug("Account usage probe for %s returned invalid JSON", provider) + return None + return _account_usage_payload_to_snapshot(payload) + + +def _fetch_account_usage_with_profile_context(provider: str) -> Any: + """Fetch account usage for a provider within the active profile context. + + Concurrency is capped by the module-level BoundedSemaphore so that rapid + UI polls (e.g. Settings page refresh) cannot exhaust file-descriptors or + memory by spawning more than _MAX_CONCURRENT_ACCOUNT_USAGE_PROBES probe + subprocesses simultaneously. Each probe runs up to 35 s. + + A warm worker-pool (reuse of persistent subprocess handles) is a natural + follow-up if this first slice proves insufficient in production. + """ + home = _get_hermes_home() + api_key = _get_provider_api_key(provider) + sem = _get_account_usage_probe_semaphore() + try: + with sem: + return _agent_fetch_account_usage_for_home( + provider, + home, + api_key=api_key, + ) + except Exception: + logger.debug("Failed to fetch account usage for %s", provider, exc_info=True) + return None + + +def _provider_account_usage_status(provider: str, display_name: str) -> dict[str, Any]: + snapshot = _fetch_account_usage_with_profile_context(provider) + account_limits = _serialize_account_usage_snapshot(snapshot) + if account_limits and account_limits.get("available"): + return { + "ok": True, + "provider": provider, + "display_name": display_name, + "supported": True, + "status": "available", + "label": account_limits.get("title") or "Account limits", + "quota": None, + "account_limits": account_limits, + "message": f"{display_name} account limits loaded.", + } + + reason = "" + if account_limits: + reason = str(account_limits.get("unavailable_reason") or "").strip() + message = ( + f"{display_name} account limits are unavailable. {reason}" + if reason + else f"{display_name} account limits are unavailable. Confirm provider authentication and try again." + ) + return { + "ok": False, + "provider": provider, + "display_name": display_name, + "supported": True, + "status": "unavailable", + "quota": None, + "account_limits": account_limits, + "message": message, + } + + +def get_provider_quota(provider_id: str | None = None) -> dict[str, Any]: + """Return sanitized quota/rate-limit status for the active provider. + + OpenRouter keeps its documented key endpoint. OAuth-backed account usage + providers reuse Hermes Agent's /usage account-limits abstraction so WebUI + stays aligned with CLI/Gateway provider semantics. + """ + provider = (provider_id or _active_provider_id() or "").strip().lower() + if not provider: + return { + "ok": False, + "provider": None, + "display_name": None, + "supported": False, + "status": "unavailable", + "quota": None, + "message": "No active provider is configured.", + } + + display_name = _PROVIDER_DISPLAY.get(provider, provider.replace("-", " ").title()) + if provider in _ACCOUNT_USAGE_PROVIDERS: + return _provider_account_usage_status(provider, display_name) + + if provider != "openrouter": + detail = "OpenAI/Anthropic rate-limit headers are a follow-up once WebUI captures provider response metadata." + return { + "ok": False, + "provider": provider, + "display_name": display_name, + "supported": False, + "status": "unsupported", + "quota": None, + "message": f"Quota status is not available for {display_name}. {detail}", + } + + api_key = _get_provider_api_key("openrouter") + if not api_key: + return { + "ok": False, + "provider": "openrouter", + "display_name": display_name, + "supported": True, + "status": "no_key", + "quota": None, + "message": "OpenRouter quota status needs an OPENROUTER_API_KEY configured on the server.", + } + + req = urllib.request.Request( + _OPENROUTER_KEY_URL, + headers={ + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + }, + ) + try: + with urllib.request.urlopen(req, timeout=_PROVIDER_QUOTA_TIMEOUT_SECONDS) as resp: + raw = resp.read() + payload = json.loads(raw.decode("utf-8")) if isinstance(raw, (bytes, bytearray)) else json.loads(raw) + quota = _sanitize_openrouter_quota(payload) + return { + "ok": True, + "provider": "openrouter", + "display_name": display_name, + "supported": True, + "status": "available", + "label": "OpenRouter credits", + "quota": quota, + "message": "OpenRouter quota status loaded.", + } + except urllib.error.HTTPError as exc: + status = "invalid_key" if exc.code in (401, 403) else "unavailable" + message = ( + "OpenRouter rejected the configured API key." + if status == "invalid_key" + else "OpenRouter quota status is temporarily unavailable." + ) + return { + "ok": False, + "provider": "openrouter", + "display_name": display_name, + "supported": True, + "status": status, + "quota": None, + "message": message, + } + except (TimeoutError, urllib.error.URLError, json.JSONDecodeError, OSError, ValueError): + return { + "ok": False, + "provider": "openrouter", + "display_name": display_name, + "supported": True, + "status": "unavailable", + "quota": None, + "message": "OpenRouter quota status is temporarily unavailable.", + } + + def _provider_is_oauth(provider_id: str) -> bool: """Check whether a provider uses OAuth/token flows (managed by CLI).""" return provider_id in _OAUTH_PROVIDERS @@ -391,7 +929,67 @@ def get_providers() -> dict[str, Any]: except Exception: pass - models = _PROVIDER_MODELS.get(pid, []) + models = list(_PROVIDER_MODELS.get(pid, [])) + models_total = len(models) + # OpenAI Codex account catalogs drift independently from WebUI releases. + # The model picker already prefers hermes_cli + Codex local cache for + # this provider (the agent's `provider_model_ids("openai-codex")` filters + # IDs with `supported_in_api: false`, but Codex CLI still surfaces some + # of those — notably `gpt-5.3-codex-spark` from #1680 — in its picker). + # Merge both sources here so the providers card matches the picker + # exactly. Static entries remain the offline fallback when live + # discovery and the local Codex cache are both unavailable. (#1807 + # follow-up to v0.51.19 #1812.) + if pid == "openai-codex": + live_ids = _read_live_provider_model_ids("openai-codex") + for mid in _read_visible_codex_cache_model_ids(): + if mid not in live_ids: + live_ids.append(mid) + live_models = _models_from_live_provider_ids(pid, live_ids) + if live_models: + models = live_models + models_total = len(models) + # Nous Portal: prefer the live catalog so the providers card matches + # the dropdown picker (#1538). Same fallback shape as the static-only + # case below — when hermes_cli is unavailable or its lookup raises, + # we keep the four-entry curated list. + # + # On large-tier accounts (#1567 reporter Deor saw 396 entries), we + # render the same featured subset the picker uses so the providers + # card body doesn't become a 396-pill wall. The full count is still + # reported via models_total — surfaced in the header line as + # "396 models · OAuth" by static/panels.js — so the user knows the + # complete catalog is reachable (via /model autocomplete or a future + # "show all" disclosure if added). + if pid == "nous": + try: + from hermes_cli.models import provider_model_ids as _provider_model_ids + + live_ids = _provider_model_ids("nous") or [] + if live_ids: + # Lazy-import to avoid circular dep with api.config. + from api.config import _format_nous_label, _build_nous_featured_set + + featured_ids, _extras = _build_nous_featured_set(live_ids) + models = [ + {"id": f"@nous:{mid}", "label": _format_nous_label(mid)} + for mid in featured_ids + ] + models_total = len(live_ids) + except Exception: + logger.debug("Failed to load Nous Portal models from hermes_cli") + # LM Studio: fetch live locally-loaded models so the providers card + # matches what's actually available on the user's server (#WebUI). + if pid == "lmstudio": + try: + from hermes_cli.models import provider_model_ids as _pmi + + lm_live = _pmi("lmstudio") or [] + if lm_live: + models = [{"id": mid, "label": mid} for mid in lm_live] + models_total = len(models) + except Exception: + logger.debug("Failed to load LM Studio models from hermes_cli") # Also include models from config.yaml providers section if isinstance(providers_cfg, dict): provider_cfg = providers_cfg.get(pid, {}) @@ -401,6 +999,13 @@ def get_providers() -> dict[str, Any]: models = models + [{"id": k, "label": k} for k in cfg_models.keys()] elif isinstance(cfg_models, list): models = models + [{"id": k, "label": k} for k in cfg_models] + # Recompute models_total when config.yaml contributes additional + # entries on top of the live/static catalog. For non-Nous + # providers models_total still equals len(models); for Nous + # we keep the live count (which already includes any models + # surfaced in the curated featured slice). + if pid != "nous": + models_total = len(models) providers.append({ "id": pid, @@ -411,6 +1016,14 @@ def get_providers() -> dict[str, Any]: "key_source": key_source, "auth_error": auth_error, "models": models, + # models_total reflects the complete catalog size (e.g. 396 for + # an enterprise Nous Portal account), even when "models" is + # trimmed to a featured subset for UI scannability. The frontend + # uses this for the header text "396 models · OAuth" so users + # know the full catalog exists and is reachable via the slash + # command. For providers that don't trim, models_total == + # len(models) and the frontend behaves identically to before. + "models_total": models_total, }) # Scan custom_providers from config.yaml (e.g. glmcode, timicc) @@ -548,7 +1161,13 @@ def _clean_provider_key_from_config(provider_id: str) -> None: from api.config import _cfg_lock try: - config_path = _get_config_path() + # Resolve through api.config at call time instead of the function imported + # at module load. Several tests (and some profile flows) monkeypatch the + # config module's path resolver after api.providers has already been + # imported; using the stale imported reference can clean the wrong + # config.yaml. + import api.config as _config + config_path = _config._get_config_path() except Exception: return diff --git a/api/request_diagnostics.py b/api/request_diagnostics.py new file mode 100644 index 00000000..4c3ec719 --- /dev/null +++ b/api/request_diagnostics.py @@ -0,0 +1,160 @@ +"""Slow request diagnostics for latency-sensitive browser API paths.""" + +from __future__ import annotations + +import json +import logging +import os +import sys +import threading +import time +import traceback +import uuid +from typing import Any + + +DEFAULT_SLOW_REQUEST_SECONDS = 5.0 +MAX_STACK_FRAMES_PER_THREAD = 40 + + +def _slow_request_seconds() -> float: + raw = os.getenv("HERMES_WEBUI_SLOW_REQUEST_SECONDS", "").strip() + if not raw: + return DEFAULT_SLOW_REQUEST_SECONDS + try: + value = float(raw) + except ValueError: + return DEFAULT_SLOW_REQUEST_SECONDS + return max(0.0, value) + + +class RequestDiagnostics: + """Track request stages and emit a watchdog record if a request wedges.""" + + def __init__( + self, + method: str, + path: str, + *, + logger: logging.Logger | None = None, + timeout_seconds: float | None = None, + auto_start: bool = True, + ) -> None: + self.request_id = uuid.uuid4().hex[:10] + self.method = str(method or "-") + self.path = str(path or "-").split("?", 1)[0] + self.logger = logger or logging.getLogger(__name__) + self.timeout_seconds = _slow_request_seconds() if timeout_seconds is None else max(0.0, float(timeout_seconds)) + self.started_monotonic = time.monotonic() + self.started_wall = time.time() + self._lock = threading.Lock() + self._stages: list[dict[str, Any]] = [] + self._current_stage = "start" + self._current_stage_started = self.started_monotonic + self._finished = False + self._watchdog_logged = False + self._timer: threading.Timer | None = None + if auto_start and self.timeout_seconds > 0: + self._timer = threading.Timer(self.timeout_seconds, self._on_timeout) + self._timer.daemon = True + self._timer.start() + + @classmethod + def maybe_start( + cls, + method: str, + path: str, + *, + logger: logging.Logger | None = None, + ) -> "RequestDiagnostics | None": + clean_path = str(path or "").split("?", 1)[0] + if (method.upper(), clean_path) not in { + ("GET", "/api/sessions"), + ("POST", "/api/chat/start"), + }: + return None + return cls(method, clean_path, logger=logger) + + def stage(self, name: str) -> None: + now = time.monotonic() + clean = str(name or "unknown").strip() or "unknown" + with self._lock: + if self._finished: + return + self._stages.append( + { + "name": self._current_stage, + "ms": round((now - self._current_stage_started) * 1000, 1), + } + ) + self._current_stage = clean + self._current_stage_started = now + + def finish(self) -> None: + timer = None + record = None + with self._lock: + if self._finished: + return + self._finished = True + timer = self._timer + record = self._build_record_locked(include_stacks=False) + if timer is not None: + timer.cancel() + if record and self.timeout_seconds > 0 and record["elapsed_ms"] >= self.timeout_seconds * 1000: + self.logger.warning( + "Slow WebUI request completed: %s", + json.dumps(record, sort_keys=True), + ) + + def _on_timeout(self) -> None: + with self._lock: + if self._finished or self._watchdog_logged: + return + self._watchdog_logged = True + record = self._build_record_locked(include_stacks=True) + self.logger.warning( + "Slow WebUI request still running: %s", + json.dumps(record, sort_keys=True), + ) + + def _build_record_locked(self, *, include_stacks: bool) -> dict[str, Any]: + now = time.monotonic() + stages = list(self._stages) + stages.append( + { + "name": self._current_stage, + "ms": round((now - self._current_stage_started) * 1000, 1), + } + ) + record: dict[str, Any] = { + "request_id": self.request_id, + "method": self.method, + "path": self.path, + "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(self.started_wall)), + "elapsed_ms": round((now - self.started_monotonic) * 1000, 1), + "current_stage": self._current_stage, + "stages": stages, + } + if include_stacks: + record["thread_stacks"] = _thread_stack_snapshot() + return record + + +def _thread_stack_snapshot() -> list[dict[str, Any]]: + frames = sys._current_frames() + threads = {thread.ident: thread for thread in threading.enumerate()} + snapshot: list[dict[str, Any]] = [] + for ident, frame in frames.items(): + thread = threads.get(ident) + stack = traceback.format_stack(frame, limit=MAX_STACK_FRAMES_PER_THREAD) + snapshot.append( + { + "thread_id": ident, + "thread_name": thread.name if thread else "", + "daemon": bool(thread.daemon) if thread else None, + "stack": [line.rstrip() for line in stack], + } + ) + snapshot.sort(key=lambda item: str(item.get("thread_name") or "")) + return snapshot diff --git a/api/routes.py b/api/routes.py index 8c4bc834..ca0c2dd2 100644 --- a/api/routes.py +++ b/api/routes.py @@ -9,13 +9,25 @@ import json import logging import os import queue +import re +import platform import shutil +import sqlite3 +import subprocess import sys import threading import time import uuid +import re from pathlib import Path +from contextlib import closing from urllib.parse import parse_qs +from api.agent_sessions import ( + MESSAGING_SOURCES, + is_cli_session_row, + is_cli_session_row_visible, + read_session_lineage_report, +) logger = logging.getLogger(__name__) @@ -38,6 +50,357 @@ _RUNNING_CRON_JOBS: dict[str, float] = {} # job_id → start_timestamp _RUNNING_CRON_LOCK = threading.Lock() _CRON_OUTPUT_CONTENT_LIMIT = 8000 _CRON_OUTPUT_HEADER_CONTEXT = 200 +_MESSAGING_RAW_SOURCES = {str(s).strip().lower() for s in MESSAGING_SOURCES} +_MESSAGING_SESSION_METADATA_CACHE: dict[str, object] = { + "path": None, + "mtime": None, + "identity": {}, +} +_MESSAGING_SESSION_METADATA_LOCK = threading.Lock() +_STALE_MESSAGING_END_REASONS = {"session_reset", "session_switch"} + + +# ── Profile-scoped session/project filtering (#1611, #1614) ──────────────── +# +# Sessions and projects are stored in the WebUI sidecar without per-row +# isolation by default — they're tagged with a `profile` field but every +# query saw all rows. The fix scopes both endpoints to the active profile +# by default, with `?all_profiles=1` opting into aggregate mode. +# +# Renamed-root profile handling (#1612): a row tagged `profile='default'` +# matches the active root regardless of the root's display name, and a row +# tagged with the renamed-root display name (e.g. 'kinni') likewise matches +# when the active profile is `'default'`. _is_root_profile() is the +# canonical check. + +# Canonical helper now lives in api.profiles so out-of-process consumers +# (mcp_server.py) can import it without duplicating the visibility model. +# Re-exported here so existing `_profiles_match(...)` call sites in this +# module keep resolving without per-call-site refactors. +from api.profiles import _profiles_match # noqa: F401, E402 (re-export) + + +def _all_profiles_query_flag(parsed_url) -> bool: + """Return True if the request URL has `?all_profiles=1` (or true/yes). + + Centralizes the opt-in parsing so /api/sessions and /api/projects use + the same shape. Accepts 1/true/yes (case-insensitive) for ergonomics. + """ + qs = parse_qs(parsed_url.query) + raw = qs.get('all_profiles', [''])[0].strip().lower() + return raw in ('1', 'true', 'yes', 'on') + + +def _active_skills_dir() -> Path: + """Return the skills directory for the request's active Hermes profile. + + WebUI profile switches are cookie/thread-local scoped, so the agent + module-level ``tools.skills_tool.SKILLS_DIR`` can still point at the server + startup profile. Skills UI endpoints must derive the directory from + ``get_active_hermes_home()`` for every request instead of reading that + process-global constant. + """ + try: + from api.profiles import get_active_hermes_home + + return Path(get_active_hermes_home()) / "skills" + except Exception: + try: + from tools.skills_tool import SKILLS_DIR + + return Path(SKILLS_DIR) + except Exception: + return Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser() / "skills" + + +def _skill_path_within(base_dir: Path, candidate: Path) -> bool: + try: + candidate.resolve().relative_to(base_dir.resolve()) + return True + except (OSError, ValueError): + return False + + +def _skill_category_from_path(skill_md: Path, skills_dirs: list[Path]) -> str | None: + for skills_dir in skills_dirs: + try: + rel_path = skill_md.relative_to(skills_dir) + except ValueError: + continue + parts = rel_path.parts + if len(parts) >= 3: + return parts[0] + return None + return None + + +def _active_skill_search_dirs(skills_dir: Path) -> list[Path]: + dirs = [skills_dir] + try: + from agent.skill_utils import get_external_skills_dirs + + dirs.extend(Path(p) for p in get_external_skills_dirs()) + except Exception: + pass + return [p for p in dirs if p.exists()] + + +def _skills_list_from_dir(skills_dir: Path, category: str | None = None) -> dict: + """List skills using an explicit local skills directory. + + This mirrors ``tools.skills_tool.skills_list`` closely, but keeps the local + scan root explicit so per-client WebUI profile switches do not race on or + leak through the skills tool's module-global ``SKILLS_DIR``. + """ + from agent.skill_utils import iter_skill_index_files + from tools.skills_tool import ( + MAX_DESCRIPTION_LENGTH, + _EXCLUDED_SKILL_DIRS, + _get_disabled_skill_names, + _parse_frontmatter, + _sort_skills, + skill_matches_platform, + ) + + if not skills_dir.exists(): + skills_dir.mkdir(parents=True, exist_ok=True) + return { + "success": True, + "skills": [], + "categories": [], + "message": f"No skills found. Skills directory created at {skills_dir}/", + } + + all_skills = [] + seen_names: set[str] = set() + disabled = _get_disabled_skill_names() + search_dirs = _active_skill_search_dirs(skills_dir) + + for scan_dir in search_dirs: + for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"): + if any(part in _EXCLUDED_SKILL_DIRS for part in skill_md.parts): + continue + skill_dir = skill_md.parent + try: + content = skill_md.read_text(encoding="utf-8")[:4000] + frontmatter, body = _parse_frontmatter(content) + if not skill_matches_platform(frontmatter): + continue + name = frontmatter.get("name", skill_dir.name)[:64] + if name in seen_names or name in disabled: + continue + description = frontmatter.get("description", "") + if not description: + for line in body.strip().split("\n"): + line = line.strip() + if line and not line.startswith("#"): + description = line + break + if len(description) > MAX_DESCRIPTION_LENGTH: + description = description[: MAX_DESCRIPTION_LENGTH - 3] + "..." + seen_names.add(name) + all_skills.append( + { + "name": name, + "description": description, + "category": _skill_category_from_path(skill_md, search_dirs), + } + ) + except (UnicodeDecodeError, PermissionError) as e: + logger.debug("Failed to read skill file %s: %s", skill_md, e) + except Exception as e: + logger.debug( + "Skipping skill at %s: failed to parse: %s", skill_md, e, exc_info=True + ) + + if category: + all_skills = [s for s in all_skills if s.get("category") == category] + all_skills = _sort_skills(all_skills) + categories = sorted(set(s.get("category") for s in all_skills if s.get("category"))) + result = { + "success": True, + "skills": all_skills, + "categories": categories, + "count": len(all_skills), + } + if all_skills: + result["hint"] = "Use skill_view(name) to see full content, tags, and linked files" + else: + result["message"] = "No skills found in skills/ directory." + return result + + +def _find_skill_in_dir(name: str, skills_dir: Path) -> tuple[Path | None, Path | None]: + """Resolve a WebUI skill name inside an explicit skills directory.""" + from agent.skill_utils import iter_skill_index_files + from tools.skills_tool import _EXCLUDED_SKILL_DIRS, _parse_frontmatter + + raw_name = str(name or "").strip().strip("/") + if not raw_name or not skills_dir.exists(): + return None, None + + candidate_names = [raw_name] + if ":" in raw_name: + namespace, bare = raw_name.split(":", 1) + if namespace and bare: + candidate_names.append(f"{namespace}/{bare}") + + for candidate_name in candidate_names: + direct_path = skills_dir / candidate_name + if not _skill_path_within(skills_dir, direct_path): + continue + if direct_path.is_dir() and (direct_path / "SKILL.md").exists(): + return direct_path, direct_path / "SKILL.md" + legacy_md = direct_path.with_suffix(".md") + if legacy_md.exists() and _skill_path_within(skills_dir, legacy_md): + return legacy_md.parent, legacy_md + + for skill_md in iter_skill_index_files(skills_dir, "SKILL.md"): + if any(part in _EXCLUDED_SKILL_DIRS for part in skill_md.parts): + continue + skill_dir = skill_md.parent + if skill_dir.name == raw_name: + return skill_dir, skill_md + try: + frontmatter, _ = _parse_frontmatter(skill_md.read_text(encoding="utf-8")[:4000]) + if frontmatter.get("name") == raw_name: + return skill_dir, skill_md + except Exception: + continue + + for legacy_md in skills_dir.rglob("*.md"): + if legacy_md.name == "SKILL.md": + continue + if legacy_md.stem == raw_name and _skill_path_within(skills_dir, legacy_md): + return legacy_md.parent, legacy_md + return None, None + + +def _skill_not_found_payload(name: str, skills_dir: Path) -> dict: + available = [s["name"] for s in _skills_list_from_dir(skills_dir).get("skills", [])[:20]] + return { + "success": False, + "error": f"Skill '{name}' not found.", + "available_skills": available, + "hint": "Use skills_list to see all available skills", + } + + +def _skill_view_from_active_dir(name: str) -> dict: + from tools.skills_tool import skill_view as _skill_view + + skills_dir = _active_skills_dir() + skill_dir, skill_md = _find_skill_in_dir(name, skills_dir) + if not skill_md: + # Preserve plugin-qualified skill viewing without falling back to the + # startup/root profile's local skills tree for ordinary missing skills. + if ":" in str(name or ""): + try: + from agent.skill_utils import is_valid_namespace, parse_qualified_name + from hermes_cli.plugins import discover_plugins, get_plugin_manager + + namespace, _bare = parse_qualified_name(name) + if is_valid_namespace(namespace): + discover_plugins() + pm = get_plugin_manager() + if pm.find_plugin_skill(name) is not None or pm.list_plugin_skills(namespace): + raw = _skill_view(name) + return json.loads(raw) if isinstance(raw, str) else raw + except Exception: + pass + return _skill_not_found_payload(name, skills_dir) + target_name = str(skill_dir) if skill_dir and (skill_dir / "SKILL.md") == skill_md else str(skill_md) + raw = _skill_view(target_name) + data = json.loads(raw) if isinstance(raw, str) else raw + return data + +# ── SSE app-level heartbeat (#1623) ──────────────────────────────────────── +# +# Kernel TCP keepalive (server.py setsockopt block) declares a peer dead at +# KEEPIDLE (10s) + KEEPINTVL (5s) * KEEPCNT (3) = 25s in the worst case. The +# app-level SSE heartbeat must fire well below that window so flaky-network +# probes never get the chance to kill an idle stream during long LLM thinking +# phases. 5s gives the kernel ~5x headroom: probe at 10s, heartbeat byte at +# every 5s of idle keeps the socket warm. +# +# Cost: ~12 bytes per heartbeat * 12 extra heartbeats/min = ~150B/min idle. +# Trivial; many production SSE deployments run 5-15s heartbeats specifically +# to handle proxies and mobile NAT. +_SSE_HEARTBEAT_INTERVAL_SECONDS = 5 + + +def _normalize_messaging_source(raw_source) -> str: + return str(raw_source or "").strip().lower() + + +def _is_known_messaging_source(raw_source) -> bool: + return _normalize_messaging_source(raw_source) in _MESSAGING_RAW_SOURCES + + +def _safe_first(*values): + for value in values: + if value is None: + continue + text = str(value).strip() + if text: + return text + return "" + + +def _gateway_session_metadata_path(): + try: + from api.profiles import get_active_hermes_home + hermes_home = Path(get_active_hermes_home()).expanduser().resolve() + except Exception: + hermes_home = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser().resolve() + return hermes_home / "sessions" / "sessions.json" + + +def _load_gateway_session_identity_map() -> dict[str, dict]: + path = _gateway_session_metadata_path() + if not path.exists(): + return {} + + try: + st = path.stat() + cache = _MESSAGING_SESSION_METADATA_CACHE + with _MESSAGING_SESSION_METADATA_LOCK: + if cache["path"] == str(path) and cache["mtime"] == st.st_mtime: + return cache["identity"].copy() + except Exception: + return {} + + try: + raw_sessions = json.loads(path.read_text(encoding="utf-8")) + except Exception as _json_err: + logger.debug("Failed to parse gateway sessions metadata from %s: %s", path, _json_err) + return {} + + mapping: dict[str, dict] = {} + if isinstance(raw_sessions, dict): + for _entry in raw_sessions.values(): + if not isinstance(_entry, dict): + continue + session_id = _safe_first(_entry.get("session_id")) + if not session_id: + continue + origin = _entry.get("origin") if isinstance(_entry.get("origin"), dict) else {} + platform = _safe_first(origin.get("platform"), _entry.get("platform")) + mapping[session_id] = { + "session_key": _safe_first(_entry.get("session_key"), _entry.get("key")), + "chat_id": _safe_first(origin.get("chat_id"), _entry.get("chat_id")), + "thread_id": _safe_first(origin.get("thread_id"), _entry.get("thread_id")), + "chat_type": _safe_first(origin.get("chat_type"), _entry.get("chat_type")), + "user_id": _safe_first(origin.get("user_id"), _entry.get("user_id")), + "platform": platform, + "raw_source": platform, + } + + with _MESSAGING_SESSION_METADATA_LOCK: + _MESSAGING_SESSION_METADATA_CACHE["path"] = str(path) + _MESSAGING_SESSION_METADATA_CACHE["mtime"] = st.st_mtime + _MESSAGING_SESSION_METADATA_CACHE["identity"] = mapping + return mapping.copy() def _mark_cron_running(job_id: str): @@ -93,27 +456,223 @@ def _cron_output_content_window(text: str, limit: int = _CRON_OUTPUT_CONTENT_LIM return text[-limit:] -def _run_cron_tracked(job): - """Wrapper that tracks running state around cron.scheduler.run_job.""" - from cron.scheduler import run_job # import here — runs inside a worker thread + + +def _cron_job_for_api(job: dict) -> dict: + """Return a cron job payload with the #617 optional profile field present. + + Legacy jobs intentionally persist without ``profile`` so they keep the + scheduler's server-default behavior. The API still returns ``profile: None`` + so the UI can label that state explicitly instead of guessing. + """ + payload = dict(job or {}) + payload.setdefault("profile", None) + return payload + + +def _cron_jobs_for_api(jobs) -> list[dict]: + return [_cron_job_for_api(job) for job in (jobs or [])] + + +def _available_cron_profile_names() -> set[str]: + from api.profiles import list_profiles_api + + names = {"default"} + for profile in list_profiles_api(): + try: + name = str(profile.get("name") or "").strip() + except AttributeError: + continue + if name: + names.add(name) + return names + + +def _normalize_cron_profile_value(value) -> str | None: + if value is None: + return None + profile = str(value).strip() + if not profile: + return None + if profile not in _available_cron_profile_names(): + raise ValueError(f"Unknown profile: {profile}") + return profile + + +def _profile_home_for_cron_job(job: dict): + """Resolve the execution profile for a cron job, with graceful fallback. + + A missing/blank profile preserves legacy server-default behavior. If a job + points at a profile that was deleted after save, fall back to the active + server profile and log a warning instead of crashing the Run Now path. + """ + from api.profiles import get_active_hermes_home, get_hermes_home_for_profile + + raw = str((job or {}).get("profile") or "").strip() + if not raw: + return get_active_hermes_home() + if raw not in _available_cron_profile_names(): + logger.warning( + "Cron job %s references missing profile %r; falling back to server default", + (job or {}).get("id", "?"), raw, + ) + return get_active_hermes_home() + return get_hermes_home_for_profile(raw) + + +def _cron_job_subprocess_main(job, execution_profile_home, result_queue): + """Run one cron job inside a child process pinned to a profile home.""" + try: + def _run(): + from cron.scheduler import run_job + + return run_job(job) + + if execution_profile_home is None: + result = _run() + else: + from api.profiles import cron_profile_context_for_home + + with cron_profile_context_for_home(execution_profile_home): + result = _run() + result_queue.put(("ok", result)) + except BaseException as exc: # pragma: no cover - surfaced in parent + import traceback + + result_queue.put(("error", f"{type(exc).__name__}: {exc}", traceback.format_exc())) + + +def _cron_subprocess_result_timeout_seconds(job): + """Return how long the manual-run parent waits for child result payloads.""" + for key in ("timeout_seconds", "max_runtime_seconds", "timeout"): + raw = (job or {}).get(key) + if raw in (None, ""): + continue + try: + value = float(raw) + except (TypeError, ValueError): + continue + if value > 0: + return max(60.0, value + 30.0) + # Manual cron jobs can legitimately run for a long time. Keep a recovery + # path for wedged children without truncating normal long-running jobs. + return 6 * 60 * 60.0 + + +def _run_cron_job_in_profile_subprocess(job, execution_profile_home): + """Execute cron.scheduler.run_job without holding the parent cron env lock. + + cron.scheduler/cron.jobs still rely on process-global HERMES_HOME and module + constants, so running the job body in a child process gives each long cron + execution its own globals. The parent process only uses cron_profile_context + for short metadata reads/writes and remains responsive to unrelated cron UI + and API calls while the job runs. + """ + import multiprocessing + import queue + + ctx = multiprocessing.get_context("spawn") + result_queue = ctx.Queue(maxsize=1) + process = ctx.Process( + target=_cron_job_subprocess_main, + args=(job, execution_profile_home, result_queue), + ) + process.start() + + result_timeout = _cron_subprocess_result_timeout_seconds(job) + status = "error" + payload = ["cron run subprocess failed before producing a result", ""] + try: + try: + # Drain the potentially large pickled result before joining. If the + # child puts >~64 KiB on a multiprocessing.Queue, joining first can + # deadlock while the child's feeder thread waits for the parent to + # read from the pipe. + status, *payload = result_queue.get(timeout=result_timeout) + except queue.Empty: + status = "error" + if process.is_alive(): + process.terminate() + process.join(timeout=5) + payload = [ + f"cron run subprocess produced no result within {result_timeout:g}s and was terminated", + "", + ] + else: + payload = [ + f"cron run subprocess exited with code {process.exitcode} without producing a result", + "", + ] + finally: + process.join(timeout=5) + if process.is_alive(): + process.terminate() + process.join(timeout=5) + if status == "ok": + status = "error" + payload = [ + "cron run subprocess did not exit after returning a result", + "", + ] + finally: + result_queue.close() + result_queue.join_thread() + + if status == "ok": + return payload[0] + + message = payload[0] + traceback_text = payload[1] if len(payload) > 1 else "" + if traceback_text: + logger.error("Manual cron subprocess failed:\n%s", traceback_text) + raise RuntimeError(message) + + +def _run_cron_tracked(job, profile_home=None, execution_profile_home=None): + """Wrapper that tracks running state around cron.scheduler.run_job. + + ``profile_home`` is the cron store that owns the job row/output metadata. + ``execution_profile_home`` is the selected per-job profile used to load + agent config/.env while running. When no job profile is selected, both homes + are the same and legacy server-default behavior is preserved. + """ from cron.jobs import mark_job_run, save_job_output job_id = job.get("id", "") + execution_profile_home = execution_profile_home or profile_home + + def _with_cron_home(home, fn): + if home is None: + return fn() + from api.profiles import cron_profile_context_for_home + + with cron_profile_context_for_home(home): + return fn() + try: - success, output, final_response, error = run_job(job) - save_job_output(job_id, output) + success, output, final_response, error = _run_cron_job_in_profile_subprocess( + job, execution_profile_home + ) - # Match the scheduled cron path: an apparently successful run with no - # final response should not leave the job looking healthy. - if success and not final_response: - success = False - error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" + # Persist output and run metadata back to the job's owning cron store, + # even when the selected execution profile is different. + def _persist_success(): + save_job_output(job_id, output) - mark_job_run(job_id, success, error) + # Match the scheduled cron path: an apparently successful run with no + # final response should not leave the job looking healthy. + _success, _error = success, error + if _success and not final_response: + _success = False + _error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)" + + mark_job_run(job_id, _success, _error) + + _with_cron_home(profile_home, _persist_success) except Exception as e: logger.exception("Manual cron run failed for job %s", job_id) try: - mark_job_run(job_id, False, str(e)) + _with_cron_home(profile_home, lambda: mark_job_run(job_id, False, str(e))) except Exception: logger.debug("Failed to mark manual cron run failure for %s", job_id) finally: @@ -219,6 +778,10 @@ from api.config import ( get_reasoning_status, set_reasoning_display, set_reasoning_effort, + create_stream_channel, + get_webui_session_save_mode, + STREAM_GOAL_RELATED, + PENDING_GOAL_CONTINUATION, ) from api.helpers import ( require, @@ -232,6 +795,125 @@ from api.helpers import ( redact_session_data, _redact_text, ) +from api.agent_health import build_agent_health_payload +from api.request_diagnostics import RequestDiagnostics +from api.system_health import build_system_health_payload + + +def _kanban_unknown_endpoint(handler, parsed, method: str) -> bool: + """Return a Kanban-specific 404 for stale clients/obsolete endpoint shapes.""" + return bad( + handler, + ( + f"unknown Kanban endpoint: {method} {parsed.path}. " + "If this appeared after a WebUI update, your browser may be running " + "a stale cached bundle; use Hard refresh now, then reopen Kanban." + ), + status=404, + ) or True + + +def _clear_stale_stream_state(session) -> bool: + """Clear persisted streaming flags when the in-memory stream no longer exists. + + A server restart or worker crash can leave active_stream_id/pending_* in the + session JSON while STREAMS is empty. The frontend then keeps reconnecting to + a dead stream and shows a permanent running/thinking state. + + SAFETY (#1558): If ``session`` was loaded with ``metadata_only=True``, its + ``messages`` array is empty by design and calling ``save()`` would + atomically overwrite the on-disk JSON, wiping the conversation. In that + case we re-load the full session before mutating, so the persisted + write carries the real messages forward. + """ + stream_id = getattr(session, "active_stream_id", None) + if not stream_id: + return False + with STREAMS_LOCK: + stream_alive = stream_id in STREAMS + if stream_alive: + return False + + # ── #1558 P0 safety: if we were handed a metadata-only stub, reload the + # full session before touching persisted state. The original + # metadata-only object is left untouched so the caller's read path is + # unaffected. + original_stub = session # SHOULD-FIX #1 (Opus): keep reference so we can + # patch the caller's in-memory copy after a + # successful clear, avoiding one ghost SSE + # reconnect on the very next /api/session GET. + if getattr(session, "_loaded_metadata_only", False): + try: + from api.models import get_session as _get_session + session = _get_session(session.session_id, metadata_only=False) + except Exception: + # If we cannot upgrade to a full load (file gone, decode error, + # etc.) bail without clearing — better to leave a stale + # active_stream_id than to wipe the conversation. + logger.warning( + "_clear_stale_stream_state: refused to clear stale stream %s " + "for session %s — full reload failed and we will not save a " + "metadata-only stub. See #1558.", + stream_id, getattr(session, "session_id", "?"), + ) + return False + if session is None: + return False + # The full-load path may have already repaired stale pending fields + # via _repair_stale_pending(); only re-assert if still set. + if not getattr(session, "active_stream_id", None): + # Patch the caller's stub so its read path also sees the cleared + # field (matches the Opus SHOULD-FIX #1 — without this, /api/session + # would briefly return the stale active_stream_id and the frontend + # would attempt one ghost SSE reconnect before recovering). + try: + original_stub.active_stream_id = None + if hasattr(original_stub, "pending_user_message"): + original_stub.pending_user_message = None + if hasattr(original_stub, "pending_attachments"): + original_stub.pending_attachments = [] + if hasattr(original_stub, "pending_started_at"): + original_stub.pending_started_at = None + except Exception: + pass + return False + + # ── #1533 race fix: acquire the per-session lock and re-read + # active_stream_id under it. A concurrent chat_start may have already + # registered a new stream after our STREAMS_LOCK check above; in that + # case we must NOT clobber its session.active_stream_id. + with _get_session_agent_lock(session.session_id): + if getattr(session, "active_stream_id", None) != stream_id: + return False + _materialize_pending_user_turn_before_error(session) + session.active_stream_id = None + if hasattr(session, "pending_user_message"): + session.pending_user_message = None + if hasattr(session, "pending_attachments"): + session.pending_attachments = [] + if hasattr(session, "pending_started_at"): + session.pending_started_at = None + try: + session.save() + except Exception: + logger.exception( + "_clear_stale_stream_state: save() failed for session %s", + getattr(session, "session_id", "?"), + ) + # Patch the caller's stub (if different from the full-load object) so + # its in-memory active_stream_id matches what just got persisted. + if original_stub is not session: + try: + original_stub.active_stream_id = None + if hasattr(original_stub, "pending_user_message"): + original_stub.pending_user_message = None + if hasattr(original_stub, "pending_attachments"): + original_stub.pending_attachments = [] + if hasattr(original_stub, "pending_started_at"): + original_stub.pending_started_at = None + except Exception: + pass + return True # ── CSRF: validate Origin/Referer on POST ──────────────────────────────────── import re as _re @@ -583,6 +1265,28 @@ def _resolve_compatible_session_model_state( # Skip normalization for models on custom/openrouter namespaces — these are # user-controlled and should never be silently replaced. + # + # OpenAI Codex is intentionally normalized to the OpenAI family above so bare + # GPT IDs survive provider switches. Slash-qualified OpenAI IDs are different: + # ``openai/gpt-...`` is the OpenRouter shape for OpenAI models, and + # resolve_model_provider() routes that through OpenRouter when Codex is the + # configured provider. Legacy sessions can carry that stale slash ID without + # a saved model_provider, so repair it to the active Codex default unless the + # session/request explicitly says it is an OpenRouter selection. (#1734) + if ( + raw_active_provider == "openai-codex" + and model_provider == "openai" + and requested_provider is None + and default_model + ): + # Persist provider_context = "openai-codex" unconditionally on this + # repair path so the resolved shape is stable across resolutions + # (Opus stage-303 SHOULD-FIX: avoid redundant repair-writes per + # chat-start when the catalog-coverage check fails — e.g. if a + # future Codex default is itself slash-prefixed). Once we've + # decided the session belongs to Codex, persist that decision. + return default_model, raw_active_provider, True + # Also normalize when the model is from a known provider but the active provider # is an unlisted one (e.g. ollama-cloud) — active_provider is "" in that case # but raw_active_provider is set. If model_provider doesn't start with the raw @@ -636,7 +1340,6 @@ def _resolve_effective_session_model_for_display(session) -> str: ) return effective_model or original_model - def _resolve_effective_session_model_provider_for_display(session) -> str | None: original_model = getattr(session, "model", None) or "" _model, provider, _changed = _resolve_compatible_session_model_state( @@ -670,6 +1373,319 @@ def _session_model_state_from_request( return model_value, provider +def _lookup_gateway_session_identity(session_id: str) -> dict: + if not session_id: + return {} + metadata = _load_gateway_session_identity_map().get(str(session_id)) + return metadata if isinstance(metadata, dict) else {} + + +def _lookup_cli_session_metadata(session_id: str) -> dict: + if not session_id: + return {} + try: + for row in get_cli_sessions(): + if row.get("session_id") == session_id: + return row + except Exception: + return {} + return {} + + +def _messaging_session_identity(session: dict, raw_source: str) -> str: + metadata = _lookup_gateway_session_identity(session.get("session_id")) + session_key = _safe_first( + metadata.get("session_key"), + session.get("session_key"), + session.get("gateway_session_key"), + ) + if session_key: + return f"{raw_source}|session_key:{session_key}" + + chat_id = _safe_first( + metadata.get("chat_id"), + session.get("chat_id"), + session.get("origin_chat_id"), + ) + thread_id = _safe_first(metadata.get("thread_id"), session.get("thread_id")) + chat_type = _safe_first(metadata.get("chat_type"), session.get("chat_type")) + user_id = _safe_first( + metadata.get("user_id"), + session.get("user_id"), + session.get("origin_user_id"), + ) + + identity_parts = [] + if chat_type: + identity_parts.append(f"chat_type:{chat_type}") + if chat_id: + identity_parts.append(f"chat_id:{chat_id}") + if thread_id: + identity_parts.append(f"thread_id:{thread_id}") + if user_id: + identity_parts.append(f"user_id:{user_id}") + + if identity_parts: + return f"{raw_source}|" + "|".join(identity_parts) + return raw_source + + +def _session_messaging_raw_source(session: dict) -> str: + raw = _safe_first( + session.get("raw_source"), + session.get("source_tag"), + session.get("source"), + session.get("platform"), + ) + if not raw: + raw = session.get("source_label") or "messaging" + return _normalize_messaging_source(raw) + + +def _has_durable_messaging_identity(session: dict) -> bool: + metadata = _lookup_gateway_session_identity(session.get("session_id")) + return bool(_safe_first( + metadata.get("session_key"), + session.get("session_key"), + session.get("gateway_session_key"), + metadata.get("chat_id"), + session.get("chat_id"), + session.get("origin_chat_id"), + metadata.get("thread_id"), + session.get("thread_id"), + )) + + +def _numeric_count(value) -> int: + try: + return int(float(_safe_first(value, 0) or 0)) + except (TypeError, ValueError): + return 0 + + +def _should_hide_stale_messaging_session( + session: dict, + active_gateway_session_ids: set[str], + active_gateway_sources: set[str], +) -> bool: + """Hide stale Gateway-owned internal rows after an external chat moved on. + + Hermes Gateway keeps the external conversation identity in sessions.json. + Compression/session-reset can leave old Agent state.db rows behind; those + rows are implementation segments, not distinct conversations users chose. + Only apply this aggressive hiding when Gateway is currently advertising an + active session for the same messaging source. Without that source-of-truth + file we keep the old fallback behavior. + """ + raw_source = _session_messaging_raw_source(session) + if not _is_known_messaging_source(raw_source): + return False + if not active_gateway_session_ids or raw_source not in active_gateway_sources: + return False + + sid = _safe_first(session.get("session_id")) + if sid and sid in active_gateway_session_ids: + return False + + if _safe_first(session.get("end_reason")) in _STALE_MESSAGING_END_REASONS: + return True + + if not _has_durable_messaging_identity(session): + return True + + if session.get("parent_session_id"): + return True + + message_count = _numeric_count(session.get("message_count")) + actual_count = _numeric_count(session.get("actual_message_count")) + if message_count <= 0 and actual_count <= 0: + return True + + return False + + +def _is_messaging_session_record(session) -> bool: + """Return true for sessions backed by external messaging channels.""" + if not session: + return False + if ( + (getattr(session, "session_source", None) if not isinstance(session, dict) else session.get("session_source")) == "messaging" + ): + return True + raw = _safe_first( + getattr(session, "raw_source", None) if not isinstance(session, dict) else session.get("raw_source"), + getattr(session, "source_tag", None) if not isinstance(session, dict) else session.get("source_tag"), + getattr(session, "source", None) if not isinstance(session, dict) else session.get("source"), + session.get("source_label") if isinstance(session, dict) else None, + ) + return _is_known_messaging_source(raw) + + +def _is_messaging_session_id(sid: str) -> bool: + """Detect messaging-backed sessions from WebUI metadata or Agent rows.""" + try: + session = Session.load(sid) + if _is_messaging_session_record(session): + return True + except Exception: + pass + return _is_messaging_session_record(_lookup_cli_session_metadata(sid)) + + +def _session_sort_timestamp(session: dict) -> float: + return float( + _safe_first( + session.get("last_message_at"), + session.get("updated_at"), + session.get("created_at"), + session.get("started_at"), + 0, + ) or 0 + ) or 0.0 + + +def _is_cli_session_for_settings(session: dict) -> bool: + """Return True for importable CLI sessions that are safe to classify for settings.""" + if not isinstance(session, dict): + return False + if is_cli_session_row(session): + return True + + # Fallback for legacy local copies that had weak/empty metadata: + # keep this conservative so messaging sessions do not collapse incorrectly. + if not session.get("is_cli_session"): + return False + source = str(session.get("source") or "").strip().lower() + if source in MESSAGING_SOURCES: + return False + title = str(session.get("title") or "").strip().lower() + return title in ("", "untitled", "cli", "cli session") or title.endswith(" session") and ( + not source or source == "cli" + ) + + +CLI_VISIBLE_SESSION_CAP = 20 + + +def _cap_recent_cli_sessions(sessions: list[dict], cli_cap: int = CLI_VISIBLE_SESSION_CAP) -> list[dict]: + """Keep only the most recent CLI-visible sessions after filtering.""" + if cli_cap <= 0: + return sessions + kept = [] + cli_seen = 0 + for session in sessions: + if _is_cli_session_for_settings(session): + cli_seen += 1 + if cli_seen > cli_cap: + continue + kept.append(session) + return kept + + +def _merge_cli_sidebar_metadata(ui_session: dict, cli_meta: dict) -> dict: + """Merge source-of-truth CLI metadata into a sidebar session row. + + Preserve UI-owned state (archived/pinned) while replacing metadata that can + legitimately drift in WebUI snapshots. + """ + if not ui_session: + return ui_session + if not cli_meta: + return dict(ui_session) + merged = dict(ui_session) + merged["is_cli_session"] = True + for key in ( + "source_tag", + "raw_source", + "session_source", + "source_label", + "user_id", + "chat_id", + "chat_type", + "thread_id", + "session_key", + "platform", + "parent_session_id", + "end_reason", + "actual_message_count", + "_lineage_root_id", + "_lineage_tip_id", + "_compression_segment_count", + ): + value = _safe_first(cli_meta.get(key)) + if value: + merged[key] = value + + if cli_meta.get("created_at") is not None: + merged["created_at"] = cli_meta["created_at"] + if cli_meta.get("updated_at") is not None: + merged["updated_at"] = cli_meta["updated_at"] + if cli_meta.get("last_message_at") is not None: + merged["last_message_at"] = cli_meta["last_message_at"] + if cli_meta.get("message_count") is not None: + merged["message_count"] = max( + _numeric_count(merged.get("message_count")), + _numeric_count(cli_meta.get("message_count")), + ) + elif cli_meta.get("actual_message_count") is not None: + merged["message_count"] = max( + _numeric_count(merged.get("message_count")), + _numeric_count(cli_meta.get("actual_message_count")), + ) + + if cli_meta.get("title"): + current_title = merged.get("title") + if not current_title or current_title == "Untitled": + merged["title"] = cli_meta["title"] + + if cli_meta.get("model"): + if not merged.get("model") or merged.get("model") == "unknown": + merged["model"] = cli_meta["model"] + return merged + + +def _messaging_source_key(session: dict) -> str | None: + raw = _session_messaging_raw_source(session) + if not _is_known_messaging_source(raw): + return None + return _messaging_session_identity(session, raw) + + +def _keep_latest_messaging_session_per_source(sessions: list[dict]) -> list[dict]: + """Keep only the newest sidebar row per messaging session identity.""" + gateway_metadata = _load_gateway_session_identity_map() + active_gateway_session_ids = {str(sid) for sid in gateway_metadata.keys() if sid} + active_gateway_sources = { + _normalize_messaging_source(_safe_first(meta.get("raw_source"), meta.get("platform"))) + for meta in gateway_metadata.values() + if isinstance(meta, dict) + } + active_gateway_sources = {source for source in active_gateway_sources if _is_known_messaging_source(source)} + + kept_sources: set[str] = set() + best_by_source: dict[str, dict] = {} + kept: list[dict] = [] + for session in sessions: + key = _messaging_source_key(session) + if not key: + kept.append(session) + continue + if _should_hide_stale_messaging_session(session, active_gateway_session_ids, active_gateway_sources): + continue + if key in kept_sources: + kept_sources.add(key) + current = best_by_source.get(key) + if current is None or _session_sort_timestamp(session) > _session_sort_timestamp(current): + best_by_source[key] = session + continue + kept_sources.add(key) + best_by_source[key] = session + + kept.extend(best_by_source.values()) + kept.sort(key=_session_sort_timestamp, reverse=True) + return kept + + from api.models import ( Session, get_session, @@ -678,6 +1694,7 @@ from api.models import ( title_from, _write_session_index, SESSION_INDEX_FILE, + _active_state_db_path, load_projects, save_projects, import_cli_session, @@ -698,17 +1715,28 @@ from api.workspace import ( resolve_trusted_workspace, validate_workspace_to_add, _is_blocked_system_path, + _strip_surrounding_quotes, _workspace_blocked_roots, ) from api.upload import handle_upload, handle_upload_extract, handle_transcribe -from api.streaming import _sse, _run_agent_streaming, cancel_stream -from api.providers import get_providers, set_provider_key, remove_provider_key +from api.streaming import ( + _sse, + _run_agent_streaming, + cancel_stream, + _materialize_pending_user_turn_before_error, +) +from api.providers import get_providers, get_provider_quota, set_provider_key, remove_provider_key from api.onboarding import ( apply_onboarding_setup, get_onboarding_status, complete_onboarding, probe_provider_endpoint, ) +from api.oauth import ( + cancel_onboarding_oauth_flow, + poll_onboarding_oauth_flow, + start_onboarding_oauth_flow, +) # Approval system (optional -- graceful fallback if agent not available) try: @@ -1006,11 +2034,298 @@ button:hover{background:rgba(124,185,255,.25)}
- + + """ + +# ── Logs endpoint ───────────────────────────────────────────────────────────── +_LOG_FILE_WHITELIST = { + "agent": "agent.log", + "errors": "errors.log", + "gateway": "gateway.log", +} +_LOG_TAIL_VALUES = {100, 200, 500, 1000} +_LOG_DEFAULT_TAIL = 200 +_LOG_MAX_BYTES = 4 * 1024 * 1024 + + +def _normalize_logs_tail(raw_tail) -> int: + try: + tail = int(str(raw_tail or "").strip()) + except (TypeError, ValueError): + return _LOG_DEFAULT_TAIL + return tail if tail in _LOG_TAIL_VALUES else _LOG_DEFAULT_TAIL + + +def _handle_logs(handler, parsed) -> bool: + """Return a bounded tail window for an active-profile Hermes log file.""" + query = parse_qs(parsed.query) + file_key = (query.get("file", ["agent"])[0] or "agent").strip().lower() + filename = _LOG_FILE_WHITELIST.get(file_key) + if not filename: + return bad(handler, "Unknown log file", status=400) + + tail = _normalize_logs_tail(query.get("tail", [None])[0]) + try: + from api.profiles import get_active_hermes_home + + hermes_home = Path(get_active_hermes_home()).expanduser() + except Exception: + hermes_home = Path(os.environ.get("HERMES_HOME") or (Path.home() / ".hermes")).expanduser() + + log_dir = hermes_home / "logs" + log_path = log_dir / filename + try: + # Defense in depth: the filename is hardcoded above, but keep the final + # path anchored under the active profile's logs directory. + if log_path.resolve(strict=False).parent != log_dir.resolve(strict=False): + return bad(handler, "Invalid log file", status=400) + if not log_path.exists() or not log_path.is_file(): + return j(handler, { + "file": file_key, + "tail": tail, + "lines": [], + "truncated": False, + "total_bytes": 0, + "mtime": None, + "hint": f"Log file for {file_key} not found yet.", + }) + st = log_path.stat() + total_bytes = int(st.st_size) + read_bytes = min(total_bytes, _LOG_MAX_BYTES) + with log_path.open("rb") as fh: + if total_bytes > read_bytes: + fh.seek(total_bytes - read_bytes) + raw = fh.read(read_bytes) + text = raw.decode("utf-8", errors="replace") + lines = text.splitlines()[-tail:] + return j(handler, { + "file": file_key, + "tail": tail, + "lines": lines, + "truncated": total_bytes > read_bytes, + "total_bytes": total_bytes, + "mtime": st.st_mtime, + "hint": "", + }) + except Exception as exc: + logger.exception("Failed to read whitelisted log file %s", file_key) + return bad(handler, _sanitize_error(exc), status=500) + # ── Insights endpoint ────────────────────────────────────────────────────────── +_LLM_WIKI_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/skills/bundled/research/research-llm-wiki" +_LLM_WIKI_PAGE_DIRS = ("entities", "concepts", "comparisons", "queries") + + +def _llm_wiki_active_hermes_home() -> Path: + try: + from api.profiles import get_active_hermes_home + return Path(get_active_hermes_home()).expanduser() + except Exception: + return Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser() + + +def _llm_wiki_env_file_path(hermes_home: Path) -> str | None: + env_path = hermes_home / ".env" + if not env_path.exists() or not env_path.is_file(): + return None + try: + for line in env_path.read_text(encoding="utf-8", errors="replace").splitlines(): + stripped = line.strip() + if not stripped or stripped.startswith("#") or "=" not in stripped: + continue + key, value = stripped.split("=", 1) + if key.strip() != "WIKI_PATH": + continue + value = value.strip().strip('"').strip("'") + return value or None + except Exception: + return None + return None + + +def _llm_wiki_get_config_path_value(config: dict, dotted_key: str) -> str | None: + if not isinstance(config, dict): + return None + if dotted_key in config and config.get(dotted_key): + return str(config.get(dotted_key)) + cur = config + for part in dotted_key.split("."): + if not isinstance(cur, dict) or part not in cur: + return None + cur = cur[part] + return str(cur) if cur else None + + +def _llm_wiki_config_path() -> str | None: + try: + from api.config import get_config as _get_cfg + cfg = _get_cfg() + except Exception: + return None + return ( + _llm_wiki_get_config_path_value(cfg, "skills.config.wiki.path") + or _llm_wiki_get_config_path_value(cfg, "wiki.path") + ) + + +# Cap WIKI walks to prevent self-DoS if WIKI_PATH points at /, /etc, /home, etc. +# Real LLM wikis have under a few thousand files; 10k is generous and catches misconfig. +_LLM_WIKI_MAX_FILES = 10000 +# Refuse to walk these system roots even if explicitly configured. +_LLM_WIKI_FORBIDDEN_ROOTS = frozenset( + str(Path(p).expanduser().resolve()) for p in ("/", "/etc", "/usr", "/var", "/opt", "/sys", "/proc") +) + + +def _llm_wiki_resolve_path() -> tuple[Path, str, bool]: + hermes_home = _llm_wiki_active_hermes_home() + raw = os.getenv("WIKI_PATH") or _llm_wiki_env_file_path(hermes_home) + source = "WIKI_PATH" if raw else "default" + configured = bool(raw) + if not raw: + raw = _llm_wiki_config_path() + if raw: + source = "skills.config.wiki.path" + configured = True + if not raw: + raw = "~/wiki" + return Path(os.path.expandvars(raw)).expanduser(), source, configured + + +def _llm_wiki_safe_iso(ts: float | None) -> str | None: + if not ts: + return None + try: + from datetime import datetime, timezone + return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat().replace("+00:00", "Z") + except Exception: + return None + + +def _llm_wiki_count_files(root: Path) -> int: + if not root.exists() or not root.is_dir(): + return 0 + # Defense in depth: refuse to walk forbidden system roots even if WIKI_PATH + # was set to one. The endpoint is auth-gated but a misconfigured server + # shouldn't self-DoS by rglob'ing all of /etc on every Insights load. + try: + if str(root.resolve()) in _LLM_WIKI_FORBIDDEN_ROOTS: + return 0 + except Exception: + return 0 + count = 0 + iterated = 0 + for item in root.rglob("*"): + iterated += 1 + if iterated > _LLM_WIKI_MAX_FILES: + break # bounded — prevents hangs on symlink loops or huge trees + try: + if item.is_file() and not any(part.startswith(".") for part in item.relative_to(root).parts): + count += 1 + except Exception: + continue + return count + + +def _llm_wiki_page_files(wiki_path: Path) -> list[Path]: + pages: list[Path] = [] + # Defense in depth: refuse forbidden system roots. + try: + if str(wiki_path.resolve()) in _LLM_WIKI_FORBIDDEN_ROOTS: + return pages + except Exception: + return pages + iterated = 0 + for dirname in _LLM_WIKI_PAGE_DIRS: + section = wiki_path / dirname + if not section.exists() or not section.is_dir(): + continue + for item in section.rglob("*.md"): + iterated += 1 + if iterated > _LLM_WIKI_MAX_FILES: + return pages # bounded + try: + rel = item.relative_to(section) + if item.is_file() and not any(part.startswith(".") for part in rel.parts): + pages.append(item) + except Exception: + continue + return pages + + +def _build_llm_wiki_status() -> dict: + """Return private-safe LLM Wiki status metadata without reading page bodies.""" + try: + wiki_path, path_source, path_configured = _llm_wiki_resolve_path() + base = { + "available": False, + "enabled": False, + "status": "missing", + "entry_count": 0, + "page_count": 0, + "raw_source_count": 0, + "last_updated": None, + "last_writer": None, + "path_configured": path_configured, + "path_source": path_source, + "toggle_available": False, + "toggle_reason": "Hermes Agent exposes WIKI_PATH/wiki.path for location, but no stable on/off config flag is currently available.", + "docs_url": _LLM_WIKI_DOCS_URL, + } + if not wiki_path.exists(): + return base + if not wiki_path.is_dir(): + base["status"] = "not_directory" + return base + + page_files = _llm_wiki_page_files(wiki_path) + status_files = [p for p in (wiki_path / "SCHEMA.md", wiki_path / "index.md", wiki_path / "log.md") if p.exists() and p.is_file()] + status_files.extend(page_files) + latest = None + for item in status_files: + try: + mtime = item.stat().st_mtime + except Exception: + continue + latest = mtime if latest is None else max(latest, mtime) + + base.update({ + "available": True, + "enabled": True, + "status": "ready" if page_files else "empty", + "entry_count": len(page_files), + "page_count": len(page_files), + "raw_source_count": _llm_wiki_count_files(wiki_path / "raw"), + "last_updated": _llm_wiki_safe_iso(latest), + }) + return base + except Exception as exc: + return { + "available": False, + "enabled": False, + "status": "error", + "entry_count": 0, + "page_count": 0, + "raw_source_count": 0, + "last_updated": None, + "last_writer": None, + "path_configured": False, + "path_source": "unknown", + "toggle_available": False, + "toggle_reason": "Unable to inspect LLM Wiki status safely.", + "docs_url": _LLM_WIKI_DOCS_URL, + "error": type(exc).__name__, + } + + +def _handle_llm_wiki_status(handler, parsed) -> bool: + j(handler, _build_llm_wiki_status()) + return True + + def _handle_insights(handler, parsed) -> bool: """Return usage analytics from local WebUI session data.""" import collections @@ -1023,7 +2338,32 @@ def _handle_insights(handler, parsed) -> bool: days = 30 now = _time.time() - cutoff = now - (days * 86400) + today = _time.localtime(now) + today_midnight = _time.mktime((today.tm_year, today.tm_mon, today.tm_mday, 0, 0, 0, today.tm_wday, today.tm_yday, today.tm_isdst)) + day_secs = 86400 + first_day_ts = today_midnight - ((days - 1) * day_secs) + cutoff = first_day_ts + + def _safe_usage_int(value) -> int: + try: + return max(int(float(value or 0)), 0) + except (TypeError, ValueError): + return 0 + + def _safe_cost_float(value) -> float: + if value is None: + return 0.0 + try: + if isinstance(value, str): + value = value.strip().replace("$", "").replace(",", "") + if not value: + return 0.0 + return max(float(value), 0.0) + except (TypeError, ValueError): + return 0.0 + + def _session_usage_ts(session: dict) -> float: + return session.get("updated_at", session.get("created_at", 0)) or session.get("created_at", 0) or 0 # Walk session index (fast, no full JSON parse) sessions_data = [] @@ -1039,7 +2379,7 @@ def _handle_insights(handler, parsed) -> bool: for entry in idx: created = entry.get("created_at", 0) or 0 updated = entry.get("updated_at", 0) or 0 - # Session is relevant if it was created or updated within the window + # Session is relevant if it was created or updated within the calendar window. if max(created, updated) < cutoff: continue sessions_data.append(entry) @@ -1050,39 +2390,91 @@ def _handle_insights(handler, parsed) -> bool: total_input_tokens = 0 total_output_tokens = 0 total_cost = 0.0 - model_counts = collections.Counter() + model_stats: dict[str, dict] = {} + daily_tokens: dict[str, dict] = {} # Activity by day of week (0=Mon .. 6=Sun) dow_activity = collections.Counter() # Activity by hour of day (0-23) hod_activity = collections.Counter() for s in sessions_data: - total_messages += max(s.get("message_count", 0) or 0, 0) - total_input_tokens += max(s.get("input_tokens", 0) or 0, 0) - total_output_tokens += max(s.get("output_tokens", 0) or 0, 0) - cost = s.get("estimated_cost") - if cost is not None: - try: - total_cost += float(cost) - except (ValueError, TypeError): - pass + input_tokens = _safe_usage_int(s.get("input_tokens")) + output_tokens = _safe_usage_int(s.get("output_tokens")) + cost_value = _safe_cost_float(s.get("estimated_cost")) + total_messages += _safe_usage_int(s.get("message_count")) + total_input_tokens += input_tokens + total_output_tokens += output_tokens + total_cost += cost_value + model = s.get("model") or "unknown" - if model: - model_counts[model] += 1 + bucket = model_stats.setdefault(model, { + "sessions": 0, + "input_tokens": 0, + "output_tokens": 0, + "cost": 0.0, + }) + bucket["sessions"] += 1 + bucket["input_tokens"] += input_tokens + bucket["output_tokens"] += output_tokens + bucket["cost"] += cost_value + # Activity patterns - ts = s.get("updated_at", s.get("created_at", 0)) or 0 + ts = _session_usage_ts(s) if ts: try: dt = _time.localtime(ts) + day_key = _time.strftime("%Y-%m-%d", dt) + daily_bucket = daily_tokens.setdefault(day_key, { + "input_tokens": 0, + "output_tokens": 0, + "sessions": 0, + "cost": 0.0, + }) + daily_bucket["input_tokens"] += input_tokens + daily_bucket["output_tokens"] += output_tokens + daily_bucket["sessions"] += 1 + daily_bucket["cost"] += cost_value dow_activity[dt.tm_wday] += 1 hod_activity[dt.tm_hour] += 1 except Exception: pass # Build model breakdown + total_tokens = total_input_tokens + total_output_tokens models_breakdown = [] - for model, count in model_counts.most_common(): - models_breakdown.append({"model": model, "sessions": count}) + for model, stats in model_stats.items(): + row_total_tokens = stats["input_tokens"] + stats["output_tokens"] + row_cost = round(stats["cost"], 6) + models_breakdown.append({ + "model": model, + "sessions": stats["sessions"], + "input_tokens": stats["input_tokens"], + "output_tokens": stats["output_tokens"], + "total_tokens": row_total_tokens, + "cost": row_cost, + "session_share": int(round((stats["sessions"] / total_sessions) * 100)) if total_sessions else 0, + "token_share": int(round((row_total_tokens / total_tokens) * 100)) if total_tokens else 0, + "cost_share": int(round((row_cost / total_cost) * 100)) if total_cost else 0, + }) + models_breakdown.sort(key=lambda r: (-r["cost"], -r["sessions"], r["model"])) + + daily_series = [] + for i in range(days): + day_ts = first_day_ts + (i * day_secs) + day_key = _time.strftime("%Y-%m-%d", _time.localtime(day_ts)) + bucket = daily_tokens.get(day_key, { + "input_tokens": 0, + "output_tokens": 0, + "sessions": 0, + "cost": 0.0, + }) + daily_series.append({ + "date": day_key, + "input_tokens": bucket["input_tokens"], + "output_tokens": bucket["output_tokens"], + "sessions": bucket["sessions"], + "cost": round(bucket["cost"], 6), + }) # Day-of-week labels dow_labels = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] @@ -1097,9 +2489,10 @@ def _handle_insights(handler, parsed) -> bool: "total_messages": total_messages, "total_input_tokens": total_input_tokens, "total_output_tokens": total_output_tokens, - "total_tokens": total_input_tokens + total_output_tokens, + "total_tokens": total_tokens, "total_cost": round(total_cost, 6), "models": models_breakdown, + "daily_tokens": daily_series, "activity_by_day": dow_data, "activity_by_hour": hod_data, }) @@ -1108,6 +2501,299 @@ def _handle_insights(handler, parsed) -> bool: # ── GET routes ──────────────────────────────────────────────────────────────── +def _accept_loop_health(handler) -> dict: + server = getattr(handler, "server", None) + return { + "requests_total": int(getattr(server, "accept_loop_requests_total", 0) or 0), + "last_request_at": round(float(getattr(server, "accept_loop_last_request_at", 0.0) or 0.0), 3), + } + + +def _streams_lock_health(timeout_seconds: float = 0.5) -> dict: + t0 = time.time() + acquired = STREAMS_LOCK.acquire(timeout=timeout_seconds) + elapsed_ms = round((time.time() - t0) * 1000, 1) + if not acquired: + return { + "status": "blocked", + "timeout_seconds": timeout_seconds, + "ms": elapsed_ms, + } + try: + return { + "status": "ok", + "active_streams": len(STREAMS), + "ms": elapsed_ms, + } + finally: + STREAMS_LOCK.release() + + +def _run_lifecycle_health() -> dict: + """Return active worker-run state independent of SSE stream presence.""" + # Import the module rather than relying only on imported scalar aliases so + # LAST_RUN_FINISHED_AT stays fresh after unregister_active_run() updates it. + from api import config as _live_config + + now = time.time() + with _live_config.ACTIVE_RUNS_LOCK: + runs = [] + for stream_id, raw in (_live_config.ACTIVE_RUNS or {}).items(): + item = dict(raw or {}) + started_at = item.get("started_at") + try: + age = max(0.0, now - float(started_at)) + except Exception: + age = 0.0 + item.setdefault("stream_id", stream_id) + item["age_seconds"] = round(age, 1) + runs.append(item) + last_finished = _live_config.LAST_RUN_FINISHED_AT + runs.sort(key=lambda item: float(item.get("started_at") or 0.0)) + payload = { + "active_runs": len(runs), + "runs": runs, + "last_run_finished_at": last_finished, + } + if runs: + payload["oldest_run_age_seconds"] = runs[0].get("age_seconds", 0.0) + elif last_finished: + payload["idle_seconds_since_last_run"] = round(max(0.0, now - float(last_finished)), 1) + return payload + + +def _deep_health_checks(stream_check: dict | None = None) -> tuple[dict, bool]: + """Run cheap probes that exercise the state paths used by the UI shell. + + Plain /health intentionally stays tiny. /health?deep=1 is for supervisors + and watchdogs that need to know whether the process can still touch the + shared stream map, sidebar/session path, project state, and Hermes state.db + without hitting the RST-before-write failure mode from #1458. + + `stream_check` is the result from a prior `_streams_lock_health()` call; + if provided, it's reused so we don't acquire `STREAMS_LOCK` twice on the + same /health?deep=1 request (per Opus advisor on stage-297). + """ + checks: dict[str, dict] = {} + + checks["streams_lock"] = stream_check if stream_check is not None else _streams_lock_health() + if checks["streams_lock"].get("status") != "ok": + return checks, False + + t0 = time.time() + try: + sessions = all_sessions() + checks["sessions"] = { + "status": "ok", + "count": len(sessions), + "ms": round((time.time() - t0) * 1000, 1), + } + except Exception as exc: + checks["sessions"] = { + "status": "error", + "error": type(exc).__name__, + "ms": round((time.time() - t0) * 1000, 1), + } + + t0 = time.time() + try: + projects = load_projects(_migrate=False) + checks["projects"] = { + "status": "ok", + "count": len(projects), + "ms": round((time.time() - t0) * 1000, 1), + } + except Exception as exc: + checks["projects"] = { + "status": "error", + "error": type(exc).__name__, + "ms": round((time.time() - t0) * 1000, 1), + } + + t0 = time.time() + try: + db_path = _active_state_db_path() + if not db_path.exists(): + checks["state_db"] = { + "status": "missing", + "ms": round((time.time() - t0) * 1000, 1), + } + else: + with closing(sqlite3.connect(str(db_path))) as conn: + conn.execute("PRAGMA schema_version").fetchone() + checks["state_db"] = { + "status": "ok", + "ms": round((time.time() - t0) * 1000, 1), + } + except Exception as exc: + checks["state_db"] = { + "status": "error", + "error": type(exc).__name__, + "ms": round((time.time() - t0) * 1000, 1), + } + + healthy = all( + check.get("status") in {"ok", "missing"} + for check in checks.values() + ) + return checks, healthy + + +def _handle_health(handler, parsed): + deep = parse_qs(parsed.query or "").get("deep", [""])[0].lower() in {"1", "true", "yes", "on"} + stream_check = _streams_lock_health() + run_check = _run_lifecycle_health() + payload = { + "status": "ok" if stream_check.get("status") == "ok" else "degraded", + "sessions": len(SESSIONS), + "active_streams": int(stream_check.get("active_streams") or 0), + "active_runs": int(run_check.get("active_runs") or 0), + "runs": run_check.get("runs", []), + "last_run_finished_at": run_check.get("last_run_finished_at"), + "uptime_seconds": round(time.time() - SERVER_START_TIME, 1), + "accept_loop": _accept_loop_health(handler), + } + if "oldest_run_age_seconds" in run_check: + payload["oldest_run_age_seconds"] = run_check["oldest_run_age_seconds"] + if "idle_seconds_since_last_run" in run_check: + payload["idle_seconds_since_last_run"] = run_check["idle_seconds_since_last_run"] + if deep: + if stream_check.get("status") != "ok": + payload["checks"] = {"streams_lock": stream_check} + return j(handler, payload, status=503) + checks, healthy = _deep_health_checks(stream_check=stream_check) + payload["checks"] = checks + if not healthy: + payload["status"] = "degraded" + return j(handler, payload, status=503) + if payload["status"] != "ok": + return j(handler, payload, status=503) + return j(handler, payload) + + +# ── Plugin visibility endpoint (#539) ─────────────────────────────────────── +_PLUGIN_VISIBILITY_HOOKS = ( + "pre_tool_call", + "post_tool_call", + "pre_llm_call", + "post_llm_call", +) +_PLUGIN_VISIBILITY_HOOK_SET = set(_PLUGIN_VISIBILITY_HOOKS) + + +def _get_plugin_manager_for_visibility(): + """Return Hermes Agent's plugin manager for read-only WebUI visibility.""" + from hermes_cli.plugins import get_plugin_manager + + return get_plugin_manager() + + +def _clean_plugin_visibility_text(value, *, limit=240) -> str: + """Return bounded display text without path/callback-like internals.""" + if value is None: + return "" + text = str(value).replace("\x00", "").strip() + # Display metadata should be plain labels/descriptions. Drop multiline text + # and common path separators rather than risk leaking local plugin paths. + text = " ".join(text.split()) + if len(text) > limit: + text = text[: limit - 1].rstrip() + "…" + return text + + +def _plugin_visibility_payload(manager=None) -> dict: + """Build a sanitized plugin/hook visibility payload for Settings. + + The Hermes Agent manager stores manifests and callback objects internally. + This endpoint intentionally exposes only safe, user-facing metadata and the + four lifecycle hook names called out by the Settings visibility MVP. It + never includes plugin source paths, callback names, callback reprs, or raw + load errors because those can contain private filesystem details. + """ + manager = manager or _get_plugin_manager_for_visibility() + manager.discover_and_load(force=False) + + plugins = [] + raw_plugins = getattr(manager, "_plugins", {}) or {} + for key, loaded in sorted(raw_plugins.items(), key=lambda item: str(item[0])): + manifest = getattr(loaded, "manifest", None) + if manifest is None: + continue + plugin_key = _clean_plugin_visibility_text( + getattr(manifest, "key", None) or key or getattr(manifest, "name", ""), + limit=120, + ) + name = _clean_plugin_visibility_text(getattr(manifest, "name", "") or plugin_key, limit=120) + version = _clean_plugin_visibility_text(getattr(manifest, "version", ""), limit=80) + description = _clean_plugin_visibility_text(getattr(manifest, "description", ""), limit=280) + registered = [] + for hook in list(getattr(manifest, "provides_hooks", []) or []) + list(getattr(loaded, "hooks_registered", []) or []): + hook_name = str(hook or "").strip() + if hook_name in _PLUGIN_VISIBILITY_HOOK_SET and hook_name not in registered: + registered.append(hook_name) + registered.sort(key=_PLUGIN_VISIBILITY_HOOKS.index) + plugins.append({ + "name": name, + "key": plugin_key or name, + "version": version, + "description": description, + "enabled": bool(getattr(loaded, "enabled", False)), + "hooks": registered, + }) + + return { + "plugins": plugins, + "empty": not bool(plugins), + "supported_hooks": list(_PLUGIN_VISIBILITY_HOOKS), + "read_only": True, + } + + +def _handle_plugins(handler, parsed) -> bool: + try: + return j(handler, _plugin_visibility_payload()) + except Exception as exc: + logger.warning("Failed to build plugin visibility payload: %s", exc) + return j( + handler, + { + "plugins": [], + "empty": True, + "supported_hooks": list(_PLUGIN_VISIBILITY_HOOKS), + "read_only": True, + "unavailable": True, + }, + ) + + +_SHELL_ERROR_HTML = """ + + + + + Hermes is restarting + + +
+

Hermes is restarting…

+

The WebUI shell could not load cleanly. Refresh in a moment if this page does not update automatically.

+
+ +""" + + +def _serve_shell_unavailable(handler, exc: Exception) -> bool: + """Return HTML for shell-route failures so `/` never renders JSON.""" + logger.warning("Failed to serve WebUI shell route: %s", exc) + t( + handler, + _SHELL_ERROR_HTML, + status=503, + content_type="text/html; charset=utf-8", + ) + return True + + def handle_get(handler, parsed) -> bool: """Handle all GET routes. Returns True if handled, False for 404.""" @@ -1119,17 +2805,20 @@ def handle_get(handler, parsed) -> bool: return _serve_static(handler, stripped) if parsed.path in ("/", "/index.html") or parsed.path.startswith("/session/"): - from urllib.parse import quote - from api.updates import WEBUI_VERSION - version_token = quote(WEBUI_VERSION, safe="") - from api.extensions import inject_extension_tags + try: + from urllib.parse import quote + from api.updates import WEBUI_VERSION + version_token = quote(WEBUI_VERSION, safe="") + from api.extensions import inject_extension_tags - html = _INDEX_HTML_PATH.read_text(encoding="utf-8").replace("__WEBUI_VERSION__", version_token) - return t( - handler, - inject_extension_tags(html), - content_type="text/html; charset=utf-8", - ) + html = _INDEX_HTML_PATH.read_text(encoding="utf-8").replace("__WEBUI_VERSION__", version_token) + return t( + handler, + inject_extension_tags(html), + content_type="text/html; charset=utf-8", + ) + except Exception as exc: + return _serve_shell_unavailable(handler, exc) if parsed.path == "/login": _settings = load_settings() @@ -1138,9 +2827,13 @@ def handle_get(handler, parsed) -> bool: _login_strings = _LOGIN_LOCALE[ _resolve_login_locale_key(_lang) ] + from urllib.parse import quote + from api.updates import WEBUI_VERSION + version_token = quote(WEBUI_VERSION, safe="") _page = ( _LOGIN_PAGE_HTML.replace("{{BOT_NAME}}", _bn) .replace("{{BOT_NAME_INITIAL}}", _bn[0].upper()) + .replace("{{WEBUI_VERSION}}", version_token) .replace("{{LANG}}", _html.escape(_login_strings["lang"])) .replace("{{LOGIN_TITLE}}", _html.escape(_login_strings["title"])) .replace("{{LOGIN_SUBTITLE}}", _html.escape(_login_strings["subtitle"])) @@ -1188,7 +2881,7 @@ def handle_get(handler, parsed) -> bool: from api.updates import WEBUI_VERSION version_token = quote(WEBUI_VERSION, safe="") text = sw_path.read_text(encoding="utf-8").replace( - "__CACHE_VERSION__", version_token + "__WEBUI_VERSION__", version_token ) data = text.encode("utf-8") handler.send_response(200) @@ -1217,22 +2910,34 @@ def handle_get(handler, parsed) -> bool: handler.end_headers() return True - # ── Insights ── + # ── Insights / knowledge status ── if parsed.path == "/api/insights": return _handle_insights(handler, parsed) + if parsed.path.startswith("/api/kanban/"): + from api.kanban_bridge import handle_kanban_get + + # Only treat an explicit False as "no route matched". None means the + # bridge already sent a response via bad()/j() — emitting our own 404 + # on top of that produces concatenated JSON bodies on the wire. + result = handle_kanban_get(handler, parsed) + if result is False: + return _kanban_unknown_endpoint(handler, parsed, "GET") + return True + if parsed.path == "/api/wiki/status": + return _handle_llm_wiki_status(handler, parsed) + if parsed.path == "/api/logs": + return _handle_logs(handler, parsed) + if parsed.path == "/health": - with STREAMS_LOCK: - n_streams = len(STREAMS) - return j( - handler, - { - "status": "ok", - "sessions": len(SESSIONS), - "active_streams": n_streams, - "uptime_seconds": round(time.time() - SERVER_START_TIME, 1), - }, - ) + return _handle_health(handler, parsed) + + if parsed.path == "/api/health/agent": + return j(handler, build_agent_health_payload()) + + if parsed.path == "/api/system/health": + j(handler, build_system_health_payload()) + return True if parsed.path == "/api/models": return j(handler, get_available_models()) @@ -1240,19 +2945,50 @@ def handle_get(handler, parsed) -> bool: if parsed.path == "/api/models/live": return _handle_live_models(handler, parsed) + if parsed.path == "/api/dashboard/status": + from api import dashboard_probe + + j(handler, dashboard_probe.get_dashboard_status()) + return True + + if parsed.path == "/api/dashboard/config": + from api import dashboard_probe + + try: + j(handler, dashboard_probe.get_dashboard_config()) + except ValueError as exc: + bad(handler, str(exc), status=400) + return True + # ── Providers (GET) ── if parsed.path == "/api/providers": return j(handler, get_providers()) + # ── Plugins/hooks visibility (read-only, no callback/source internals) ── + if parsed.path == "/api/plugins": + return _handle_plugins(handler, parsed) + if parsed.path == "/api/provider/quota": + query = parse_qs(parsed.query) + provider_id = (query.get("provider", [""])[0] or None) + return j(handler, get_provider_quota(provider_id)) + if parsed.path == "/api/settings": settings = load_settings() # Never expose the stored password hash to clients settings.pop("password_hash", None) + # Surface env-var precedence so the UI can disable the password field + # instead of silently no-oping the save (#1560). The setting takes + # precedence in api.auth.get_password_hash(), but until now the UI + # had no way to know — see issue #1139 / #1560. + settings["password_env_var"] = bool( + os.getenv("HERMES_WEBUI_PASSWORD", "").strip() + ) # Inject the running version so the UI badge stays in sync with git tags # without any manual release step. try: - from api.updates import WEBUI_VERSION + from api.updates import AGENT_VERSION, WEBUI_VERSION settings["webui_version"] = WEBUI_VERSION + settings["agent_version"] = AGENT_VERSION except Exception: pass return j(handler, settings) @@ -1309,6 +3045,12 @@ def handle_get(handler, parsed) -> bool: try: _t1 = _time.monotonic() s = get_session(sid, metadata_only=(not load_messages)) + _clear_stale_stream_state(s) + cli_meta = _lookup_cli_session_metadata(sid) + is_messaging_session = _is_messaging_session_record(s) or _is_messaging_session_record(cli_meta) + cli_messages = [] + if is_messaging_session: + cli_messages = get_cli_session_messages(sid) _t2 = _time.monotonic() effective_model = ( _resolve_effective_session_model_for_display(s) @@ -1321,7 +3063,47 @@ def handle_get(handler, parsed) -> bool: else None ) _t3 = _time.monotonic() - _all_msgs = s.messages if load_messages else [] + if load_messages: + if is_messaging_session and cli_messages: + sidecar_messages = getattr(s, "messages", []) or [] + # Recovery/aggregate sidecars can intentionally contain a + # longer visible conversation than the single state.db + # segment for this messaging session id. Prefer the longer + # sidecar so repaired WebUI history is not hidden behind the + # canonical per-segment transcript. When both sources carry + # different slices of the same stitched conversation, merge + # them chronologically and dedupe exact repeats. + if sidecar_messages and sidecar_messages != cli_messages: + merged_messages = [] + seen_message_keys = set() + for msg in sorted(list(cli_messages) + list(sidecar_messages), key=lambda m: ( + float(m.get("timestamp") or 0), + str(m.get("role") or ""), + str(m.get("content") or ""), + )): + message_identity = msg.get("id") or msg.get("message_id") + if message_identity: + key = ("message_id", str(message_identity)) + else: + key = ( + "legacy", + str(msg.get("role") or ""), + str(msg.get("content") or ""), + str(msg.get("timestamp") or ""), + str(msg.get("tool_call_id") or ""), + str(msg.get("tool_name") or msg.get("name") or ""), + ) + if key in seen_message_keys: + continue + seen_message_keys.add(key) + merged_messages.append(msg) + _all_msgs = merged_messages + else: + _all_msgs = sidecar_messages if len(sidecar_messages) > len(cli_messages) else cli_messages + else: + _all_msgs = s.messages + else: + _all_msgs = [] if load_messages: if msg_before is not None: # Scroll-to-top paging: msg_before is a 0-based index into @@ -1342,6 +3124,14 @@ def handle_get(handler, parsed) -> bool: # older sessions (pre-#1318) that have context_length=0 persisted # still render a meaningful indicator on load. Mirrors the # SSE-path fallback in api/streaming.py:2333-2342. Fixes #1436. + # + # #1896: pass config_context_length, provider, and custom_providers + # so explicit config overrides win over the 256K default fallback. + # Without these, an old session loaded after a user upgraded to a + # 1M-context model with `model.context_length: 1048576` in + # config.yaml gets a 256K window in the initial UI indicator and + # /api/session/get response — the same wrong-window display this + # fix addresses on the streaming side. _persisted_cl = getattr(s, "context_length", 0) or 0 if not _persisted_cl: _model_for_lookup = ( @@ -1350,7 +3140,37 @@ def handle_get(handler, parsed) -> bool: if _model_for_lookup: try: from agent.model_metadata import get_model_context_length as _get_cl - _fb_cl = _get_cl(_model_for_lookup, "") or 0 + from api.config import get_config as _get_config_for_cl + _cfg_for_cl = _get_config_for_cl() + _cfg_ctx_len_load = None + _cfg_custom_providers_load = None + try: + _model_cfg_load = _cfg_for_cl.get('model', {}) if isinstance(_cfg_for_cl, dict) else {} + if isinstance(_model_cfg_load, dict): + _raw_cfg_ctx_load = _model_cfg_load.get('context_length') + if _raw_cfg_ctx_load is not None: + try: + _parsed_load = int(_raw_cfg_ctx_load) + if _parsed_load > 0: + _cfg_ctx_len_load = _parsed_load + except (TypeError, ValueError): + pass + _raw_cp_load = _cfg_for_cl.get('custom_providers') if isinstance(_cfg_for_cl, dict) else None + if isinstance(_raw_cp_load, list): + _cfg_custom_providers_load = _raw_cp_load + except Exception: + pass + try: + _fb_cl = _get_cl( + _model_for_lookup, + "", + config_context_length=_cfg_ctx_len_load, + provider=effective_provider or "", + custom_providers=_cfg_custom_providers_load, + ) or 0 + except TypeError: + # Older hermes-agent builds: legacy 2-arg form. + _fb_cl = _get_cl(_model_for_lookup, "") or 0 if _fb_cl: _persisted_cl = _fb_cl except Exception: @@ -1366,6 +3186,8 @@ def handle_get(handler, parsed) -> bool: "threshold_tokens": getattr(s, "threshold_tokens", 0) or 0, "last_prompt_tokens": getattr(s, "last_prompt_tokens", 0) or 0, } + if cli_meta and _is_messaging_session_record(cli_meta): + raw = _merge_cli_sidebar_metadata(raw, cli_meta) # Signal to the frontend that older messages were omitted. # For msg_before paging, compare against the filtered set, # not the full list — otherwise we signal truncation even when @@ -1401,13 +3223,9 @@ def handle_get(handler, parsed) -> bool: return resp except KeyError: # Not a WebUI session -- try CLI store + cli_meta = _lookup_cli_session_metadata(sid) msgs = get_cli_session_messages(sid) if msgs: - cli_meta = None - for cs in get_cli_sessions(): - if cs["session_id"] == sid: - cli_meta = cs - break sess = { "session_id": sid, "title": (cli_meta or {}).get("title", "CLI Session"), @@ -1417,24 +3235,45 @@ def handle_get(handler, parsed) -> bool: "created_at": (cli_meta or {}).get("created_at", 0), "updated_at": (cli_meta or {}).get("updated_at", 0), "last_message_at": (cli_meta or {}).get("last_message_at") - or (cli_meta or {}).get("updated_at", 0), + or (cli_meta or {}).get("updated_at", 0) + or (msgs[-1] if msgs else {"timestamp": 0}).get("timestamp", 0), "pinned": False, "archived": False, "project_id": None, "profile": (cli_meta or {}).get("profile"), "is_cli_session": True, + "source_tag": (cli_meta or {}).get("source_tag"), + "raw_source": (cli_meta or {}).get("raw_source"), + "session_source": (cli_meta or {}).get("session_source"), + "source_label": (cli_meta or {}).get("source_label"), + "read_only": bool((cli_meta or {}).get("read_only")), "messages": msgs, "tool_calls": [], } + sess = _merge_cli_sidebar_metadata(sess, cli_meta) return j(handler, {"session": redact_session_data(sess)}) return bad(handler, "Session not found", 404) + if parsed.path == "/api/session/lineage/report": + sid = parse_qs(parsed.query).get("session_id", [""])[0] + if not sid: + return bad(handler, "session_id required", 400) + report = read_session_lineage_report(_active_state_db_path(), sid) + if not report.get("found"): + return bad(handler, "Session not found", 404) + return j(handler, report) + + if parsed.path == "/api/session/recovery/audit": + from api.session_recovery import audit_session_recovery + return j(handler, audit_session_recovery(SESSION_DIR, state_db_path=_active_state_db_path())) + if parsed.path == "/api/session/status": sid = parse_qs(parsed.query).get("session_id", [""])[0] if not sid: return bad(handler, "Missing session_id") try: from api.session_ops import session_status + _clear_stale_stream_state(get_session(sid, metadata_only=True)) return j(handler, session_status(sid)) except KeyError: return bad(handler, "Session not found", 404) @@ -1463,37 +3302,116 @@ def handle_get(handler, parsed) -> bool: return j(handler, {"results": get_results(sid)}) if parsed.path == "/api/sessions": - webui_sessions = all_sessions() - settings = load_settings() - if settings.get("show_cli_sessions"): - cli = get_cli_sessions() - webui_ids = {s["session_id"] for s in webui_sessions} - from api.models import _hide_from_default_sidebar as _cron_hide - deduped_cli = [s for s in cli - if s["session_id"] not in webui_ids - and not _cron_hide(s)] - else: - deduped_cli = [] - merged = webui_sessions + deduped_cli - merged.sort( - key=lambda s: s.get("last_message_at") or s.get("updated_at", 0) or 0, - reverse=True, - ) - safe_merged = [] - for s in merged: - item = dict(s) - if isinstance(item.get("title"), str): - item["title"] = _redact_text(item["title"]) - safe_merged.append(item) - return j(handler, { - "sessions": safe_merged, - "cli_count": len(deduped_cli), - "server_time": time.time(), - "server_tz": time.strftime("%z"), - }) + diag = RequestDiagnostics.maybe_start("GET", parsed.path, logger=logger) + try: + diag.stage("all_sessions") + webui_sessions = all_sessions(diag=diag) + diag.stage("load_settings") + settings = load_settings() + show_cli_sessions = bool(settings.get("show_cli_sessions")) + if show_cli_sessions: + diag.stage("get_cli_sessions") + cli = get_cli_sessions() + diag.stage("merge_cli_sessions") + cli_by_id = {s["session_id"]: s for s in cli} + for s in webui_sessions: + meta = cli_by_id.get(s.get("session_id")) + if not meta: + continue + if _is_messaging_session_record(meta): + s.update(_merge_cli_sidebar_metadata(s, meta)) + if s.get("session_id") != meta.get("session_id"): + s["session_id"] = meta.get("session_id") + else: + for key in ("source_tag", "raw_source", "session_source", "source_label"): + if not s.get(key) and meta.get(key): + s[key] = meta[key] + # Apply the same CLI visibility semantics to imported local copies so + # low-value imported artifacts do not leak into the sidebar. + webui_sessions = [s for s in webui_sessions if is_cli_session_row_visible(s)] + webui_ids = {s["session_id"] for s in webui_sessions} + from api.models import _hide_from_default_sidebar as _cron_hide + deduped_cli = [s for s in cli if s["session_id"] not in webui_ids and is_cli_session_row_visible(s) and not _cron_hide(s)] + else: + diag.stage("filter_webui_sessions") + webui_sessions = [s for s in webui_sessions if not _is_cli_session_for_settings(s)] + deduped_cli = [] + diag.stage("sort_sessions") + merged = webui_sessions + deduped_cli + merged.sort( + key=lambda s: s.get("last_message_at") or s.get("updated_at", 0) or 0, + reverse=True, + ) + # ── Profile scoping (#1611) ──────────────────────────────────────── + # Default: filter to the active profile. ?all_profiles=1 opts into + # the aggregate view used by the "All profiles" sidebar toggle. + # The other_profile_count is always returned so the UI can render + # the "Show N from other profiles" affordance without sending the + # cross-profile rows by default. + # + # IMPORTANT: scope BEFORE _keep_latest_messaging_session_per_source. + # _messaging_source_key is profile-blind (#1614 follow-up): if the + # same Slack/Telegram identity has sessions in profiles A and B, a + # profile-blind dedupe would discard the older one even when scoped + # to its own profile, leaving that profile with zero rows for that + # source. Filter first so the dedupe operates only within the active + # profile's rows. + diag.stage("active_profile") + from api.profiles import get_active_profile_name + active_profile = get_active_profile_name() + all_profiles = _all_profiles_query_flag(parsed) + diag.stage("profile_filter") + if all_profiles: + scoped = merged + other_profile_count = 0 + else: + scoped = [s for s in merged + if _profiles_match(s.get("profile"), active_profile)] + other_profile_count = len(merged) - len(scoped) + diag.stage("messaging_dedupe") + scoped = _keep_latest_messaging_session_per_source(scoped) + if show_cli_sessions: + diag.stage("cli_cap") + scoped = _cap_recent_cli_sessions(scoped, cli_cap=CLI_VISIBLE_SESSION_CAP) + diag.stage("redact_sessions") + safe_merged = [] + for s in scoped: + item = dict(s) + if isinstance(item.get("title"), str): + item["title"] = _redact_text(item["title"]) + safe_merged.append(item) + diag.stage("response_write") + return j(handler, { + "sessions": safe_merged, + "cli_count": len(deduped_cli), + "all_profiles": all_profiles, + "active_profile": active_profile, + "other_profile_count": other_profile_count, + "server_time": time.time(), + "server_tz": time.strftime("%z"), + }) + finally: + diag.finish() if parsed.path == "/api/projects": - return j(handler, {"projects": load_projects()}) + # ── Profile scoping (#1614) ──────────────────────────────────────── + # Default: filter to the active profile. ?all_profiles=1 returns the + # aggregate list so settings/admin UIs can still see everything. + from api.profiles import get_active_profile_name + active_profile = get_active_profile_name() + all_projects = load_projects() + all_profiles = _all_profiles_query_flag(parsed) + if all_profiles: + scoped = all_projects + else: + scoped = [p for p in all_projects + if _profiles_match(p.get("profile"), active_profile)] + return j(handler, { + "projects": scoped, + "all_profiles": all_profiles, + "active_profile": active_profile, + "other_profile_count": len(all_projects) - len(scoped), + }) if parsed.path == "/api/session/export": return _handle_session_export(handler, parsed) @@ -1648,71 +3566,69 @@ def handle_get(handler, parsed) -> bool: return j(handler, {"error": "not found"}, status=404) return _handle_clarify_inject(handler, parsed) - # ── OAuth (Codex device-code) ── - if parsed.path == "/api/oauth/codex/start": - """Start Codex device-code OAuth flow. Returns user_code + verification_uri.""" - try: - from api.oauth import start_codex_device_code - result = start_codex_device_code() - return j(handler, result) - except Exception as e: - return j(handler, {"error": str(e)}, status=500) - - if parsed.path == "/api/oauth/codex/poll": - """SSE endpoint for polling Codex OAuth token.""" + if parsed.path == "/api/onboarding/oauth/poll": qs = parse_qs(parsed.query) - device_code = qs.get("device_code", [""])[0] - if not device_code: - return j(handler, {"error": "device_code required"}, status=400) - handler.send_response(200) - handler.send_header("Content-Type", "text/event-stream") - handler.send_header("Cache-Control", "no-cache") - handler.send_header("Connection", "keep-alive") - handler.end_headers() + flow_id = qs.get("flow_id", [""])[0] try: - from api.oauth import poll_codex_token - for event in poll_codex_token(device_code): - handler.wfile.write(f"data: {json.dumps(event)}\n\n".encode()) - handler.wfile.flush() - if event.get("status") in ("success", "error"): - break - except Exception as e: - handler.wfile.write(f"data: {json.dumps({'status': 'error', 'error': str(e)})}\n\n".encode()) - handler.wfile.flush() - return # SSE handled, no JSON response + return j( + handler, + poll_onboarding_oauth_flow(flow_id), + extra_headers={"Cache-Control": "no-store"}, + ) + except ValueError as e: + return bad(handler, str(e)) + except KeyError as e: + return bad(handler, str(e), 404) # ── Cron API (GET) ── + # All cron handlers touch cron.jobs which resolves HERMES_HOME from + # os.environ (process-global) at call time. Wrap in cron_profile_context + # so the TLS-active profile's jobs.json is read, not the process default. if parsed.path == "/api/crons": from cron.jobs import list_jobs + from api.profiles import cron_profile_context - return j(handler, {"jobs": list_jobs(include_disabled=True)}) + with cron_profile_context(): + return j(handler, {"jobs": _cron_jobs_for_api(list_jobs(include_disabled=True))}) if parsed.path == "/api/crons/output": - return _handle_cron_output(handler, parsed) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_output(handler, parsed) if parsed.path == "/api/crons/history": - return _handle_cron_history(handler, parsed) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_history(handler, parsed) if parsed.path == "/api/crons/run": - return _handle_cron_run_detail(handler, parsed) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_run_detail(handler, parsed) if parsed.path == "/api/crons/recent": - return _handle_cron_recent(handler, parsed) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_recent(handler, parsed) if parsed.path == "/api/crons/status": - return _handle_cron_status(handler, parsed) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_status(handler, parsed) # ── Skills API (GET) ── if parsed.path == "/api/skills": - from tools.skills_tool import skills_list as _skills_list - - raw = _skills_list() - data = json.loads(raw) if isinstance(raw, str) else raw + qs = parse_qs(parsed.query) + category = qs.get("category", [None])[0] + data = _skills_list_from_dir(_active_skills_dir(), category=category) return j(handler, {"skills": data.get("skills", [])}) if parsed.path == "/api/skills/content": - from tools.skills_tool import skill_view as _skill_view, SKILLS_DIR - qs = parse_qs(parsed.query) name = qs.get("name", [""])[0] if not name: @@ -1724,11 +3640,8 @@ def handle_get(handler, parsed) -> bool: if _re.search(r"[*?\[\]]", name): return bad(handler, "Invalid skill name", 400) - skill_dir = None - for p in SKILLS_DIR.rglob(name): - if p.is_dir(): - skill_dir = p - break + skills_dir = _active_skills_dir() + skill_dir, _skill_md = _find_skill_in_dir(name, skills_dir) if not skill_dir: return bad(handler, "Skill not found", 404) target = (skill_dir / file_path).resolve() @@ -1742,8 +3655,7 @@ def handle_get(handler, parsed) -> bool: handler, {"content": target.read_text(encoding="utf-8"), "path": file_path}, ) - raw = _skill_view(name) - data = json.loads(raw) if isinstance(raw, str) else raw + data = _skill_view_from_active_dir(name) if not isinstance(data.get("linked_files"), dict): data["linked_files"] = {} return j(handler, data) @@ -1769,10 +3681,79 @@ def handle_get(handler, parsed) -> bool: {"name": get_active_profile_name(), "path": str(get_active_hermes_home())}, ) + # ── Gateway Status (GET) ── + if parsed.path == "/api/gateway/status": + import datetime + identity_map = _load_gateway_session_identity_map() + sessions_path = _gateway_session_metadata_path() + + # Detect whether the gateway process is alive, independent of + # connected messaging platforms. An empty identity_map just + # means zero platforms connected, not that the gateway is down. + # + # agent_health.build_agent_health_payload() is the authoritative + # signal: it reads gateway.status runtime metadata and returns a + # tri-state `alive` field (True/False/None). This avoids the + # false-negative where the gateway is running but has zero active + # messaging sessions (empty identity_map). + # + # `alive` tri-state semantics: + # True → gateway process is alive + # False → gateway metadata exists but process is down + # None → no gateway metadata/status available; this WebUI + # setup is probably not configured with a gateway + health = build_agent_health_payload() + alive = health.get("alive") + if alive is True: + running = True + configured = True + elif alive is False: + running = False + configured = True + else: # alive is None → gateway not configured / unavailable + running = bool(identity_map) + configured = False + + platforms_set: set[str] = set() + for meta in identity_map.values(): + raw = meta.get("raw_source") or meta.get("platform") or "" + norm = _normalize_messaging_source(raw) + if norm: + platforms_set.add(norm) + _PLATFORM_LABELS = { + "telegram": "Telegram", + "discord": "Discord", + "slack": "Slack", + "web": "Web", + "api": "API", + } + platforms = sorted( + [{"name": p, "label": _PLATFORM_LABELS.get(p, p.title())} for p in platforms_set], + key=lambda x: x["label"], + ) + last_active = "" + if running and sessions_path.exists(): + try: + mtime = sessions_path.stat().st_mtime + last_active = datetime.datetime.fromtimestamp(mtime).isoformat() + except Exception: + pass + return j(handler, { + "running": running, + "configured": configured, + "platforms": platforms, + "last_active": last_active, + "session_count": len(identity_map), + }) + # ── MCP Servers (GET) ── if parsed.path == "/api/mcp/servers": return _handle_mcp_servers_list(handler) + # ── MCP Tools (GET) ── + if parsed.path == "/api/mcp/tools": + return _handle_mcp_tools_list(handler) + # ── Checkpoints / Rollback (GET) ── if parsed.path == "/api/rollback/list": qs = parse_qs(parsed.query) @@ -1811,9 +3792,16 @@ def handle_get(handler, parsed) -> bool: def handle_post(handler, parsed) -> bool: """Handle all POST routes. Returns True if handled, False for 404.""" + diag = RequestDiagnostics.maybe_start("POST", parsed.path, logger=logger) # CSRF: reject cross-origin browser requests + if diag: + diag.stage("csrf") if not _check_csrf(handler): - return j(handler, {"error": "Cross-origin request rejected"}, status=403) + try: + return j(handler, {"error": "Cross-origin request rejected"}, status=403) + finally: + if diag: + diag.finish() if parsed.path == "/api/upload": return handle_upload(handler) @@ -1823,13 +3811,62 @@ def handle_post(handler, parsed) -> bool: if parsed.path == "/api/transcribe": return handle_transcribe(handler) - body = read_body(handler) + if diag: + diag.stage("read_body") + try: + body = read_body(handler) + except Exception: + if diag: + diag.finish() + raise + + if parsed.path == "/api/session/recovery/repair-safe": + from api.session_recovery import repair_safe_session_recovery + result = repair_safe_session_recovery(SESSION_DIR, state_db_path=_active_state_db_path()) + return j(handler, result, status=200 if result.get("ok") else 409) + + if parsed.path.startswith("/api/kanban/"): + from api.kanban_bridge import handle_kanban_post + + result = handle_kanban_post(handler, parsed, body) + if result is False: + return _kanban_unknown_endpoint(handler, parsed, "POST") + return True + if parsed.path == "/api/dashboard/config": + from api import dashboard_probe + + try: + j(handler, dashboard_probe.save_dashboard_config(body)) + except ValueError as exc: + bad(handler, str(exc), status=400) + except Exception as exc: + logger.exception("dashboard config save failed") + bad(handler, str(exc), status=500) + return True if parsed.path == "/api/session/new": try: workspace = str(resolve_trusted_workspace(body.get("workspace"))) if body.get("workspace") else None - except ValueError as e: + except (TypeError, ValueError) as e: return bad(handler, str(e)) + worktree_info = None + worktree_requested = ( + body.get("worktree") is True + or str(body.get("worktree")).strip().lower() in {"1", "true", "yes", "on"} + ) + if worktree_requested: + try: + from api.worktrees import create_worktree_for_workspace + base_workspace = workspace + if not base_workspace: + base_workspace = str(resolve_trusted_workspace(get_last_workspace())) + worktree_info = create_worktree_for_workspace(base_workspace) + workspace = worktree_info["path"] + except (TypeError, ValueError) as e: + return bad(handler, str(e), status=400) + except Exception as e: + logger.exception("failed to create worktree-backed session") + return bad(handler, f"Failed to create worktree: {e}", status=500) model, model_provider = _session_model_state_from_request( body.get("model"), body.get("model_provider"), @@ -1841,6 +3878,8 @@ def handle_post(handler, parsed) -> bool: model=model, model_provider=model_provider, profile=body.get("profile") or None, + project_id=body.get("project_id") or None, + worktree_info=worktree_info, ) return j(handler, {"session": s.compact() | {"messages": s.messages}}) @@ -2069,6 +4108,57 @@ def handle_post(handler, parsed) -> bool: s.save() return j(handler, {"ok": True, "enabled_toolsets": s.enabled_toolsets}) + if parsed.path == "/api/session/draft": + # GET ?session_id=X → return current draft + # POST body → save draft { session_id, text?, files? } + # HTTP method is in handler.command (e.g. "POST", "GET"), parsed has no .method + if handler.command == "GET": + query = parse_qs(parsed.query) + sid = query.get("session_id", [""])[0] if parsed.query else "" + if not sid: + return bad(handler, "session_id is required", 400) + try: + s = get_session(sid) + except KeyError: + return bad(handler, "Session not found", 404) + draft = getattr(s, "composer_draft", {}) or {} + return j(handler, {"draft": draft}) + # POST + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + sid = body["session_id"] + text = body.get("text") + files = body.get("files") + # Stage-326 hardening (per Opus advisor): size + type validation on + # the draft inputs. Without this, a misbehaving or malicious client + # can persist multi-MB strings into the session JSON on every keystroke + # via the 400ms debounced auto-save. + _MAX_DRAFT_TEXT = 50_000 # 50 KB cap on textarea content + _MAX_DRAFT_FILES = 50 # max number of attached file references + if text is not None and not isinstance(text, str): + text = "" + if isinstance(text, str) and len(text) > _MAX_DRAFT_TEXT: + text = text[:_MAX_DRAFT_TEXT] + if files is not None and not isinstance(files, list): + files = [] + if isinstance(files, list) and len(files) > _MAX_DRAFT_FILES: + files = files[:_MAX_DRAFT_FILES] + try: + s = get_session(sid) + except KeyError: + return bad(handler, "Session not found", 404) + with _get_session_agent_lock(sid): + draft = getattr(s, "composer_draft", {}) or {} + if text is not None: + draft["text"] = text + if files is not None: + draft["files"] = files + s.composer_draft = draft + s.save() + return j(handler, {"ok": True, "draft": s.composer_draft}) + if parsed.path == "/api/session/update": try: require(body, "session_id") @@ -2110,9 +4200,17 @@ def handle_post(handler, parsed) -> bool: return bad(handler, "session_id is required") if not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid): return bad(handler, "Invalid session_id", 400) + cli_meta_for_delete = _lookup_cli_session_metadata(sid) + if cli_meta_for_delete.get("read_only"): + return bad(handler, "Read-only imported sessions cannot be deleted from WebUI", 400) + is_messaging_session = _is_messaging_session_id(sid) # Delete from WebUI session store with LOCK: SESSIONS.pop(sid, None) + try: + SESSION_INDEX_FILE.unlink(missing_ok=True) + except Exception: + logger.debug("Failed to unlink session index") # Evict cached agent so turn count doesn't leak into a recycled session from api.config import _evict_session_agent _evict_session_agent(sid) @@ -2123,28 +4221,27 @@ def handle_post(handler, parsed) -> bool: return bad(handler, "Invalid session_id", 400) try: p.unlink(missing_ok=True) + p.with_suffix('.json.bak').unlink(missing_ok=True) except Exception: logger.debug("Failed to unlink session file %s", p) # Prune the per-session agent lock so deleted sessions don't leak # Lock entries in SESSION_AGENT_LOCKS forever. with SESSION_AGENT_LOCKS_LOCK: SESSION_AGENT_LOCKS.pop(sid, None) - try: - SESSION_INDEX_FILE.unlink(missing_ok=True) - except Exception: - logger.debug("Failed to unlink session index") try: from api.terminal import close_terminal close_terminal(sid) except Exception: logger.debug("Failed to close workspace terminal for deleted session %s", sid) - # Also delete from CLI state.db (for CLI sessions shown in sidebar) - try: - from api.models import delete_cli_session + # Also delete from CLI state.db for CLI sessions shown in sidebar, + # but never erase external messaging channel memory via WebUI delete. + if not is_messaging_session: + try: + from api.models import delete_cli_session - delete_cli_session(sid) - except Exception: - logger.debug("Failed to delete CLI session %s", sid) + delete_cli_session(sid) + except Exception: + logger.debug("Failed to delete CLI session %s", sid) return j(handler, {"ok": True}) if parsed.path == "/api/session/clear": @@ -2243,6 +4340,7 @@ def handle_post(handler, parsed) -> bool: title=branch_title, messages=forked_messages, parent_session_id=source.session_id, + session_source="fork", ) with LOCK: SESSIONS[branch.session_id] = branch @@ -2263,6 +4361,12 @@ def handle_post(handler, parsed) -> bool: if parsed.path == "/api/session/compress": return _handle_session_compress(handler, body) + if parsed.path == "/api/session/conversation-rounds": + return _handle_conversation_rounds(handler, body) + + if parsed.path == "/api/session/handoff-summary": + return _handle_handoff_summary(handler, body) + if parsed.path == "/api/session/retry": try: require(body, "session_id") @@ -2327,8 +4431,11 @@ def handle_post(handler, parsed) -> bool: if parsed.path == "/api/background": return _handle_background(handler, body) + if parsed.path == "/api/goal": + return _handle_goal_command(handler, body) + if parsed.path == "/api/chat/start": - return _handle_chat_start(handler, body) + return _handle_chat_start(handler, body, diag=diag) if parsed.path == "/api/chat": return _handle_chat_sync(handler, body) @@ -2350,23 +4457,43 @@ def handle_post(handler, parsed) -> bool: return _handle_terminal_close(handler, body) # ── Cron API (POST) ── + # See GET-side comment above: wrap in cron_profile_context so writes go + # to the TLS-active profile's jobs.json instead of the process default. if parsed.path == "/api/crons/create": - return _handle_cron_create(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_create(handler, body) if parsed.path == "/api/crons/update": - return _handle_cron_update(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_update(handler, body) if parsed.path == "/api/crons/delete": - return _handle_cron_delete(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_delete(handler, body) if parsed.path == "/api/crons/run": - return _handle_cron_run(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_run(handler, body) if parsed.path == "/api/crons/pause": - return _handle_cron_pause(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_pause(handler, body) if parsed.path == "/api/crons/resume": - return _handle_cron_resume(handler, body) + from api.profiles import cron_profile_context + + with cron_profile_context(): + return _handle_cron_resume(handler, body) # ── File ops (POST) ── if parsed.path == "/api/file/delete": @@ -2384,6 +4511,12 @@ def handle_post(handler, parsed) -> bool: if parsed.path == "/api/file/create-dir": return _handle_create_dir(handler, body) + if parsed.path == "/api/file/reveal": + return _handle_file_reveal(handler, body) + + if parsed.path == "/api/file/path": + return _handle_file_path(handler, body) + # ── Workspace management (POST) ── if parsed.path == "/api/workspaces/add": return _handle_workspace_add(handler, body) @@ -2511,6 +4644,21 @@ def handle_post(handler, parsed) -> bool: isinstance(body.get("_set_password"), str) and body.get("_set_password", "").strip() ) + requested_clear_password = bool(body.get("_clear_password")) + + # #1560: HERMES_WEBUI_PASSWORD env var takes precedence in + # api.auth.get_password_hash(), so writing password_hash to settings.json + # has no effect on auth. Refuse loudly with 409 instead of silently + # succeeding — the previous behaviour returned 200 + a green save toast + # while every subsequent login still required the env-var password. + if requested_password or requested_clear_password: + if os.getenv("HERMES_WEBUI_PASSWORD", "").strip(): + return bad( + handler, + "HERMES_WEBUI_PASSWORD env var is set — it overrides the settings password. " + "Unset the env var and restart the server before changing the password here.", + 409, + ) saved = save_settings(body) saved.pop("password_hash", None) # never expose hash to client @@ -2544,6 +4692,34 @@ def handle_post(handler, parsed) -> bool: handler.wfile.write(response_body) return True + if parsed.path == "/api/onboarding/oauth/start": + from api.auth import is_auth_enabled + import os as _os + if not is_auth_enabled() and not _os.getenv("HERMES_WEBUI_ONBOARDING_OPEN"): + import ipaddress + try: + _xff = handler.headers.get("X-Forwarded-For", "").split(",")[0].strip() + _xri = handler.headers.get("X-Real-IP", "").strip() + _raw = handler.client_address[0] + addr = ipaddress.ip_address(_xff or _xri or _raw) + is_local = addr.is_loopback or addr.is_private + except ValueError: + is_local = False + if not is_local: + return bad(handler, "Onboarding OAuth is only available from local networks when auth is not enabled. To bypass this on a remote server, set HERMES_WEBUI_ONBOARDING_OPEN=1.", 403) + try: + return j(handler, start_onboarding_oauth_flow(body), extra_headers={"Cache-Control": "no-store"}) + except ValueError as e: + return bad(handler, str(e)) + except RuntimeError as e: + return bad(handler, str(e), 500) + + if parsed.path == "/api/onboarding/oauth/cancel": + try: + return j(handler, cancel_onboarding_oauth_flow(body), extra_headers={"Cache-Control": "no-store"}) + except ValueError as e: + return bad(handler, str(e)) + if parsed.path == "/api/onboarding/setup": # Writing API keys to disk - restrict to local/private networks unless auth is active. # In Docker, requests arrive from the bridge network (172.x.x.x), not 127.0.0.1, @@ -2629,13 +4805,64 @@ def handle_post(handler, parsed) -> bool: require(body, "session_id") except ValueError as e: return bad(handler, str(e)) + sid = body["session_id"] try: - s = get_session(body["session_id"]) + s = get_session(sid) except KeyError: - return bad(handler, "Session not found", 404) - with _get_session_agent_lock(body["session_id"]): + cli_meta = _lookup_cli_session_metadata(sid) + if not cli_meta: + return bad(handler, "Session not found", 404) + if cli_meta.get("read_only"): + return bad(handler, "Read-only imported sessions cannot be archived from WebUI", 400) + if _is_messaging_session_record(cli_meta): + s = Session( + session_id=sid, + title=cli_meta.get("title") or title_from(get_cli_session_messages(sid), "CLI Session"), + workspace=get_last_workspace(), + messages=[], + model=cli_meta.get("model") or "unknown", + created_at=cli_meta.get("created_at"), + updated_at=cli_meta.get("updated_at"), + ) + s.is_cli_session = True + s.source_tag = cli_meta.get("source_tag") + s.raw_source = cli_meta.get("raw_source") or cli_meta.get("source_tag") + s.session_source = cli_meta.get("session_source") + s.source_label = cli_meta.get("source_label") + s.user_id = cli_meta.get("user_id") + s.chat_id = cli_meta.get("chat_id") + s.chat_type = cli_meta.get("chat_type") + s.thread_id = cli_meta.get("thread_id") + s.session_key = cli_meta.get("session_key") + s.platform = cli_meta.get("platform") + s.save(touch_updated_at=False) + else: + msgs = get_cli_session_messages(sid) + if not msgs: + return bad(handler, "Session not found", 404) + s = import_cli_session( + sid, + cli_meta.get("title") or title_from(msgs, "CLI Session"), + msgs, + cli_meta.get("model") or "unknown", + profile=cli_meta.get("profile"), + created_at=cli_meta.get("created_at"), + updated_at=cli_meta.get("updated_at"), + ) + s.is_cli_session = True + s.source_tag = cli_meta.get("source_tag") + s.raw_source = cli_meta.get("raw_source") or cli_meta.get("source_tag") + s.session_source = cli_meta.get("session_source") + s.source_label = cli_meta.get("source_label") + s.user_id = cli_meta.get("user_id") + s.chat_id = cli_meta.get("chat_id") + s.chat_type = cli_meta.get("chat_type") + s.thread_id = cli_meta.get("thread_id") + s.session_key = cli_meta.get("session_key") + s.platform = cli_meta.get("platform") + with _get_session_agent_lock(sid): s.archived = bool(body.get("archived", True)) - s.save() + s.save(touch_updated_at=False) return j(handler, {"ok": True, "session": s.compact()}) # ── Session move to project (POST) ── @@ -2648,8 +4875,21 @@ def handle_post(handler, parsed) -> bool: s = get_session(body["session_id"]) except KeyError: return bad(handler, "Session not found", 404) + # #1614: refuse moves into a project owned by another profile. + target_pid = body.get("project_id") or None + if target_pid: + from api.profiles import get_active_profile_name + active_profile = get_active_profile_name() + target = next( + (p for p in load_projects() if p["project_id"] == target_pid), + None, + ) + if not target: + return bad(handler, "Project not found", 404) + if not _profiles_match(target.get("profile"), active_profile): + return bad(handler, "Project not found", 404) with _get_session_agent_lock(body["session_id"]): - s.project_id = body.get("project_id") or None + s.project_id = target_pid s.save() return j(handler, {"ok": True, "session": s.compact()}) @@ -2660,6 +4900,7 @@ def handle_post(handler, parsed) -> bool: except ValueError as e: return bad(handler, str(e)) import re as _re + from api.profiles import get_active_profile_name name = body["name"].strip()[:128] if not name: @@ -2672,6 +4913,7 @@ def handle_post(handler, parsed) -> bool: "project_id": uuid.uuid4().hex[:12], "name": name, "color": color, + "profile": get_active_profile_name() or 'default', "created_at": time.time(), } projects.append(proj) @@ -2684,6 +4926,7 @@ def handle_post(handler, parsed) -> bool: except ValueError as e: return bad(handler, str(e)) import re as _re + from api.profiles import get_active_profile_name projects = load_projects() proj = next( @@ -2691,6 +4934,10 @@ def handle_post(handler, parsed) -> bool: ) if not proj: return bad(handler, "Project not found", 404) + # #1614: a project can only be renamed by the profile that owns it. + active_profile = get_active_profile_name() + if not _profiles_match(proj.get("profile"), active_profile): + return bad(handler, "Project not found", 404) proj["name"] = body["name"].strip()[:128] if "color" in body: color = body["color"] @@ -2705,12 +4952,17 @@ def handle_post(handler, parsed) -> bool: require(body, "project_id") except ValueError as e: return bad(handler, str(e)) + from api.profiles import get_active_profile_name projects = load_projects() proj = next( (p for p in projects if p["project_id"] == body["project_id"]), None ) if not proj: return bad(handler, "Project not found", 404) + # #1614: a project can only be deleted by the profile that owns it. + active_profile = get_active_profile_name() + if not _profiles_match(proj.get("profile"), active_profile): + return bad(handler, "Project not found", 404) projects = [p for p in projects if p["project_id"] != body["project_id"]] save_projects(projects) # Unassign all sessions that belonged to this project @@ -2821,6 +5073,36 @@ def handle_post(handler, parsed) -> bool: return False # 404 + +def handle_patch(handler, parsed) -> bool: + """Handle all PATCH routes. Returns True if handled, False for 404.""" + if not _check_csrf(handler): + return j(handler, {"error": "Cross-origin request rejected"}, status=403) + body = read_body(handler) + if parsed.path.startswith("/api/kanban/"): + from api.kanban_bridge import handle_kanban_patch + + result = handle_kanban_patch(handler, parsed, body) + if result is False: + return _kanban_unknown_endpoint(handler, parsed, "PATCH") + return True + return False + + +def handle_delete(handler, parsed) -> bool: + """Handle all DELETE routes. Returns True if handled, False for 404.""" + if not _check_csrf(handler): + return j(handler, {"error": "Cross-origin request rejected"}, status=403) + body = read_body(handler) + if parsed.path.startswith("/api/kanban/"): + from api.kanban_bridge import handle_kanban_delete + + result = handle_kanban_delete(handler, parsed, body) + if result is False: + return _kanban_unknown_endpoint(handler, parsed, "DELETE") + return True + return False + # ── GET route helpers ───────────────────────────────────────────────────────── # MIME types for static file serving. Hoisted to module scope to avoid @@ -2969,9 +5251,10 @@ def _handle_list_dir(handler, parsed): def _handle_sse_stream(handler, parsed): stream_id = parse_qs(parsed.query).get("stream_id", [""])[0] - q = STREAMS.get(stream_id) - if q is None: + stream = STREAMS.get(stream_id) + if stream is None: return j(handler, {"error": "stream not found"}, status=404) + subscriber = stream.subscribe() if hasattr(stream, "subscribe") else stream handler.send_response(200) handler.send_header("Content-Type", "text/event-stream; charset=utf-8") handler.send_header("Cache-Control", "no-cache") @@ -2981,7 +5264,7 @@ def _handle_sse_stream(handler, parsed): try: while True: try: - event, data = q.get(timeout=30) + event, data = subscriber.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS) except queue.Empty: handler.wfile.write(b": heartbeat\n\n") handler.wfile.flush() @@ -2991,6 +5274,12 @@ def _handle_sse_stream(handler, parsed): break except _CLIENT_DISCONNECT_ERRORS: pass + finally: + if subscriber is not stream and hasattr(stream, "unsubscribe"): + try: + stream.unsubscribe(subscriber) + except Exception: + pass return True @@ -3098,7 +5387,7 @@ def _handle_terminal_output(handler, parsed): try: while True: try: - event, data = term.output.get(timeout=25) + event, data = term.output.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS) except queue.Empty: handler.wfile.write(b": terminal heartbeat\n\n") handler.wfile.flush() @@ -3183,7 +5472,7 @@ def _handle_gateway_sse_stream(handler, parsed): while True: try: - event_data = q.get(timeout=30) + event_data = q.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS) except queue.Empty: handler.wfile.write(b': keepalive\n\n') handler.wfile.flush() @@ -3279,8 +5568,17 @@ def _serve_file_bytes(handler, target: Path, mime: str, disposition: str, cache_ handler.send_header("Cache-Control", cache_control) handler.send_header("Content-Disposition", _content_disposition_value(disposition, target.name)) if csp: + # Sandboxed inline HTML must remain frameable for workspace previews; + # X-Frame-Options: DENY would block the iframe before CSP sandbox applies. handler.send_header("Content-Security-Policy", csp) - _security_headers(handler) + handler.send_header("X-Content-Type-Options", "nosniff") + handler.send_header("Referrer-Policy", "same-origin") + handler.send_header( + "Permissions-Policy", + "camera=(), microphone=(self), geolocation=(), clipboard-write=(self)", + ) + else: + _security_headers(handler) handler.end_headers() if content_length: @@ -3308,6 +5606,8 @@ def _handle_media(handler, parsed): - Only image MIME types are served inline; all others force download - SVG always served as attachment (XSS risk) - No path traversal: resolved path must stay within an allowed root + - Additional roots can be added via MEDIA_ALLOWED_ROOTS env var + (colon-separated list of absolute paths) """ import os as _os from api.auth import is_auth_enabled, parse_cookie, verify_session @@ -3351,6 +5651,21 @@ def _handle_media(handler, parsed): allowed_roots.append(ws) except Exception: pass + + # Also allow additional roots from MEDIA_ALLOWED_ROOTS env var + # (colon-separated list of absolute paths, e.g. /home/user/models:/home/user/Pictures) + extra_roots = _os.environ.get("MEDIA_ALLOWED_ROOTS", "").strip() + if extra_roots: + for root in extra_roots.split(":"): + root = root.strip() + if root: + try: + rp = Path(root).resolve() + if rp.is_dir(): + allowed_roots.append(rp) + except Exception: + pass + within_allowed = any( _os.path.commonpath([str(target), str(root)]) == str(root) for root in allowed_roots @@ -3366,8 +5681,9 @@ def _handle_media(handler, parsed): ext = target.suffix.lower() mime = MIME_MAP.get(ext, "application/octet-stream") - # Only serve safe media/PDF types inline when explicitly requested. Everything - # else remains a download. SVG is always a download (XSS risk). + # Only serve safe media/PDF types inline when explicitly requested. HTML is + # allowed inline only with a CSP sandbox so "open full page" can work without + # granting same-origin access to the WebUI. SVG is always a download (XSS risk). _INLINE_IMAGE_TYPES = { "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon", "image/bmp", @@ -3380,12 +5696,15 @@ def _handle_media(handler, parsed): } _DOWNLOAD_TYPES = {"image/svg+xml"} # SVG: XSS risk, force download inline_preview = qs.get("inline", [""])[0] == "1" + html_inline_ok = inline_preview and mime == "text/html" disposition = "inline" if ( mime not in _DOWNLOAD_TYPES and ( mime in _INLINE_IMAGE_TYPES or (inline_preview and mime in _INLINE_PREVIEW_TYPES) + or html_inline_ok ) ) else "attachment" - return _serve_file_bytes(handler, target, mime, disposition, "private, max-age=3600") + csp = "sandbox allow-scripts" if html_inline_ok else None + return _serve_file_bytes(handler, target, mime, disposition, "private, max-age=3600", csp=csp) def _handle_file_raw(handler, parsed): @@ -3506,7 +5825,7 @@ def _handle_approval_sse_stream(handler, parsed): try: while True: try: - payload = q.get(timeout=30) + payload = q.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS) except queue.Empty: # Keepalive — SSE comment line prevents proxy/CDN timeout. handler.wfile.write(b': keepalive\n\n') @@ -3607,7 +5926,7 @@ def _handle_clarify_sse_stream(handler, parsed): try: while True: try: - payload = q.get(timeout=30) + payload = q.get(timeout=_SSE_HEARTBEAT_INTERVAL_SECONDS) except queue.Empty: handler.wfile.write(b': keepalive\n\n') handler.wfile.flush() @@ -3706,11 +6025,12 @@ def _handle_live_models(handler, parsed): ids = [] if not ids: - # For 'custom' provider, provider_model_ids() returns [] because - # 'custom' isn't a real endpoint. Fall back to the custom_providers - # entries from config.yaml so the live-model enrichment step can - # add any models that weren't already in the static list. - if provider == "custom": + # For 'custom' and 'custom:*' providers, provider_model_ids() + # returns [] because they aren't real hermes_cli endpoints. + # Fall back to the custom_providers entries from config.yaml so + # the live-model enrichment step can add any models that weren't + # already in the static list (issue #1619). + if provider == "custom" or provider.startswith("custom:"): try: _cp_entries = cfg.get("custom_providers", []) if isinstance(_cp_entries, list): @@ -3722,8 +6042,8 @@ def _handle_live_models(handler, parsed): except Exception: pass - # If still no ids, try fetching from model.base_url directly (OpenAI-compat endpoint) - if not ids and provider == "custom": + # If still no ids, try fetching from base_url directly (OpenAI-compat endpoint) + if not ids and (provider == "custom" or provider.startswith("custom:")): _base_url = cfg.get("model", {}).get("base_url") _api_key = cfg.get("model", {}).get("api_key") if _base_url and _api_key: @@ -3807,6 +6127,23 @@ def _handle_live_models(handler, parsed): if not ids: return _finish({"provider": provider, "models": [], "count": 0}) + # For Nous Portal, apply the same featured-set cap that + # /api/models uses so background enrichment via _fetchLiveModels() + # doesn't undo the dropdown trim — otherwise a 397-model catalog + # would still flood the picker after the initial render finished + # the cap. The full list is returned via the main /api/models + # endpoint's extra_models field for /model autocomplete; the live + # endpoint is purely a dropdown-enrichment surface, so it should + # match the dropdown's visibility budget. (#1567) + if provider == "nous": + try: + from api.config import _build_nous_featured_set + _default_model = (cfg.get("model", {}) or {}).get("model") if isinstance(cfg.get("model"), dict) else None + _featured, _ = _build_nous_featured_set(ids, selected_model_id=_default_model) + ids = _featured + except Exception: + logger.debug("Failed to apply Nous featured-set cap for /api/models/live") + # Normalise to {id, label} — provider_model_ids() returns plain string IDs. # For ollama-cloud use the shared Ollama formatter (handles `:variant` suffix). # For all other providers use a simpler hyphen-split capitaliser. @@ -4115,9 +6452,9 @@ def _handle_btw(handler, body): stream_id = uuid.uuid4().hex ephemeral.active_stream_id = stream_id ephemeral.save() - q = queue.Queue() + stream = create_stream_channel() with STREAMS_LOCK: - STREAMS[stream_id] = q + STREAMS[stream_id] = stream from api.background import track_btw track_btw(body["session_id"], ephemeral.session_id, stream_id, question) thr = threading.Thread( @@ -4161,9 +6498,9 @@ def _handle_background(handler, body): stream_id = uuid.uuid4().hex bg.active_stream_id = stream_id bg.save() - q = queue.Queue() + stream = create_stream_channel() with STREAMS_LOCK: - STREAMS[stream_id] = q + STREAMS[stream_id] = stream task_id = uuid.uuid4().hex[:8] from api.background import track_background, complete_background parent_sid = body["session_id"] @@ -4221,7 +6558,154 @@ def _handle_background(handler, body): return j(handler, {"task_id": task_id, "stream_id": stream_id, "session_id": bg.session_id}) -def _handle_chat_start(handler, body): +def _checkpoint_user_message_for_eager_session_save(s, msg: str, attachments, started_at: float | None) -> None: + """Materialize the current user turn for eager first-turn persistence. + + The streaming thread still receives ``pending_user_message`` so existing + cancel/recovery/final-merge paths keep their current contract. Eager mode + only adds a durable display-message checkpoint before the agent launches. + """ + if not msg: + return + existing = list(getattr(s, "messages", None) or []) + if existing: + latest = existing[-1] + if isinstance(latest, dict) and latest.get("role") == "user": + latest_text = " ".join(str(latest.get("content") or "").split()) + msg_text = " ".join(str(msg or "").split()) + if latest_text == msg_text: + return + user_msg = {"role": "user", "content": msg} + if isinstance(started_at, (int, float)) and started_at > 0: + user_msg["timestamp"] = int(started_at) + if attachments: + user_msg["attachments"] = list(attachments) + s.messages.append(user_msg) + + +def _prepare_chat_start_session_for_stream( + s, + *, + msg: str, + attachments, + workspace: str, + model: str, + model_provider, + stream_id: str, + started_at: float | None = None, +): + """Persist chat-start state according to webui.session_save_mode. + + ``deferred`` keeps the existing sidecar/WAL-backed behaviour: save pending + fields but leave the display transcript empty until the agent merges the + result. ``eager`` additionally writes the current user turn into messages so + a process restart immediately after /api/chat/start preserves the prompt as + a normal session message. Empty sessions are never saved here because this + helper only runs after a non-empty message is validated. + """ + s.workspace = workspace + s.model = model + s.model_provider = model_provider + s.active_stream_id = stream_id + s.pending_user_message = msg + s.pending_attachments = attachments + s.pending_started_at = started_at if started_at is not None else time.time() + if get_webui_session_save_mode() == "eager": + _checkpoint_user_message_for_eager_session_save( + s, + msg, + attachments, + s.pending_started_at, + ) + s.save() + + +def _start_chat_stream_for_session( + s, + *, + msg: str, + attachments=None, + workspace: str, + model: str, + model_provider=None, + normalized_model: bool = False, + diag=None, + goal_related: bool = False, +): + """Persist pending state, register an SSE channel, and start an agent turn.""" + attachments = attachments or [] + # Prevent duplicate runs in the same session while a stream is still active. + # This commonly happens after page refresh/reconnect races and can produce + # duplicated clarify cards for what appears to be a single user request. + diag.stage("active_stream_check") if diag else None + current_stream_id = getattr(s, "active_stream_id", None) + if current_stream_id: + diag.stage("active_stream_lock_wait") if diag else None + with STREAMS_LOCK: + current_active = current_stream_id in STREAMS + if current_active: + diag.stage("response_write") if diag else None + return { + "error": "session already has an active stream", + "active_stream_id": current_stream_id, + "_status": 409, + } + # Stale stream id from a previous run; clear and continue. + diag.stage("stale_stream_cleanup") if diag else None + _clear_stale_stream_state(s) + + # #1932: check if this session has a pending goal continuation flag. + # The streaming hook sets PENDING_GOAL_CONTINUATION when goal_continue fires, + # so the next chat/start for this session is automatically treated as goal-related. + if not goal_related and s.session_id in PENDING_GOAL_CONTINUATION: + goal_related = True + PENDING_GOAL_CONTINUATION.discard(s.session_id) + + stream_id = uuid.uuid4().hex + session_lock = _get_session_agent_lock(s.session_id) + diag.stage("session_lock_wait") if diag else None + with session_lock: + diag.stage("save_pending_state") if diag else None + _prepare_chat_start_session_for_stream( + s, + msg=msg, + attachments=attachments, + workspace=workspace, + model=model, + model_provider=model_provider, + stream_id=stream_id, + ) + diag.stage("set_last_workspace") if diag else None + set_last_workspace(workspace) + diag.stage("stream_registration") if diag else None + stream = create_stream_channel() + with STREAMS_LOCK: + STREAMS[stream_id] = stream + # #1932: mark stream as goal-related so the streaming hook evaluates the goal. + if goal_related: + STREAM_GOAL_RELATED[stream_id] = True + diag.stage("worker_thread_start") if diag else None + thr = threading.Thread( + target=_run_agent_streaming, + args=(s.session_id, msg, model, workspace, stream_id, attachments), + kwargs={"model_provider": model_provider, "goal_related": goal_related}, + daemon=True, + ) + thr.start() + response = { + "stream_id": stream_id, + "session_id": s.session_id, + "pending_started_at": s.pending_started_at, + } + if normalized_model: + response["effective_model"] = model + if model_provider: + response["effective_model_provider"] = model_provider + return response + + +def _handle_goal_command(handler, body): + """Handle WebUI /goal command controls and optional kickoff stream.""" try: require(body, "session_id") except ValueError as e: @@ -4230,69 +6714,189 @@ def _handle_chat_start(handler, body): s = get_session(body["session_id"]) except KeyError: return bad(handler, "Session not found", 404) - msg = str(body.get("message", "")).strip() - if not msg: - return bad(handler, "message is required") - attachments = _normalize_chat_attachments(body.get("attachments") or [])[:20] - try: - workspace = str(resolve_trusted_workspace(body.get("workspace") or s.workspace)) - except ValueError as e: - return bad(handler, str(e)) - requested_model = body.get("model") or s.model - requested_provider = ( - body.get("model_provider") - if "model_provider" in body - else getattr(s, "model_provider", None) - ) - model, model_provider, normalized_model = _resolve_compatible_session_model_state( - requested_model, - requested_provider, - ) - # Prevent duplicate runs in the same session while a stream is still active. - # This commonly happens after page refresh/reconnect races and can produce - # duplicated clarify cards for what appears to be a single user request. + + requested_profile = str(body.get("profile") or "").strip() + if requested_profile: + try: + from api.profiles import _PROFILE_ID_RE + + if requested_profile != "default" and not _PROFILE_ID_RE.fullmatch(requested_profile): + return bad(handler, "invalid profile", 400) + except ImportError: + requested_profile = "" + if requested_profile and not _profiles_match(getattr(s, "profile", None), requested_profile): + has_persisted_turns = bool( + getattr(s, "messages", None) + or getattr(s, "context_messages", None) + or getattr(s, "pending_user_message", None) + ) + if not has_persisted_turns: + s.profile = requested_profile + current_stream_id = getattr(s, "active_stream_id", None) + stream_running = False if current_stream_id: with STREAMS_LOCK: - current_active = current_stream_id in STREAMS - if current_active: - return j( - handler, - { - "error": "session already has an active stream", - "active_stream_id": current_stream_id, - }, - status=409, - ) - # Stale stream id from a previous run; clear and continue. - s.active_stream_id = None - stream_id = uuid.uuid4().hex - with _get_session_agent_lock(s.session_id): - s.workspace = workspace - s.model = model - s.model_provider = model_provider - s.active_stream_id = stream_id - s.pending_user_message = msg - s.pending_attachments = attachments - s.pending_started_at = time.time() - s.save() - set_last_workspace(workspace) - q = queue.Queue() - with STREAMS_LOCK: - STREAMS[stream_id] = q - thr = threading.Thread( - target=_run_agent_streaming, - args=(s.session_id, msg, model, workspace, stream_id, attachments), - kwargs={"model_provider": model_provider}, - daemon=True, + stream_running = current_stream_id in STREAMS + if not stream_running: + _clear_stale_stream_state(s) + + try: + from api.profiles import get_hermes_home_for_profile + + profile_home = get_hermes_home_for_profile(getattr(s, "profile", None)) + except Exception: + profile_home = None + + from api.goals import goal_command_payload, goal_state_snapshot, restore_goal_state + + goal_args = str(body.get("args", "") or body.get("text", "") or "") + goal_action = goal_args.strip().lower() + will_kickoff = bool( + goal_args.strip() + and goal_action not in ("status", "pause", "resume", "clear", "stop", "done") + and not stream_running ) - thr.start() - response = {"stream_id": stream_id, "session_id": s.session_id} - if normalized_model: - response["effective_model"] = model - if model_provider: - response["effective_model_provider"] = model_provider - return j(handler, response) + workspace = model = model_provider = normalized_model = None + previous_goal_state = None + if will_kickoff: + try: + workspace = str(resolve_trusted_workspace(body.get("workspace") or s.workspace)) + except ValueError as e: + return bad(handler, str(e)) + requested_model = body.get("model") or s.model + requested_provider = ( + body.get("model_provider") + if "model_provider" in body + else getattr(s, "model_provider", None) + ) + model, model_provider, normalized_model = _resolve_compatible_session_model_state( + requested_model, + requested_provider, + ) + previous_goal_state = goal_state_snapshot(s.session_id, profile_home=profile_home) + + payload = goal_command_payload( + s.session_id, + goal_args, + stream_running=stream_running, + profile_home=profile_home, + ) + if not payload.get("ok", True): + status = 409 if payload.get("error") == "agent_running" else 400 + return j(handler, payload, status=status) + + kickoff_prompt = str(payload.get("kickoff_prompt") or "").strip() + if kickoff_prompt: + if workspace is None: + try: + workspace = str(resolve_trusted_workspace(body.get("workspace") or s.workspace)) + except ValueError as e: + return bad(handler, str(e)) + if model is None: + requested_model = body.get("model") or s.model + requested_provider = ( + body.get("model_provider") + if "model_provider" in body + else getattr(s, "model_provider", None) + ) + model, model_provider, normalized_model = _resolve_compatible_session_model_state( + requested_model, + requested_provider, + ) + stream_response = _start_chat_stream_for_session( + s, + msg=kickoff_prompt, + attachments=[], + workspace=workspace, + model=model, + model_provider=model_provider, + normalized_model=normalized_model, + goal_related=True, + ) + status = int(stream_response.pop("_status", 200) or 200) + payload.update(stream_response) + if status >= 400: + restore_goal_state(s.session_id, previous_goal_state, profile_home=profile_home) + payload["ok"] = False + return j(handler, payload, status=status) + + return j(handler, payload) + + +def _handle_chat_start(handler, body, diag=None): + try: + diag.stage("validate_session_id") if diag else None + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + diag.stage("get_session") if diag else None + try: + s = get_session(body["session_id"]) + except KeyError: + return bad(handler, "Session not found", 404) + diag.stage("validate_profile") if diag else None + requested_profile = str(body.get("profile") or "").strip() + if requested_profile: + try: + from api.profiles import _PROFILE_ID_RE + + if requested_profile != "default" and not _PROFILE_ID_RE.fullmatch(requested_profile): + return bad(handler, "invalid profile", 400) + except ImportError: + requested_profile = "" + if requested_profile and not _profiles_match(getattr(s, "profile", None), requested_profile): + has_persisted_turns = bool( + getattr(s, "messages", None) + or getattr(s, "context_messages", None) + or getattr(s, "pending_user_message", None) + ) + if not has_persisted_turns: + # Empty sessions are placeholders. If the user switches profiles + # before sending the first turn, run the placeholder under the + # currently-selected profile instead of the stale one stamped at + # creation time. + s.profile = requested_profile + diag.stage("normalize_message") if diag else None + msg = str(body.get("message", "")).strip() + if not msg: + return bad(handler, "message is required") + diag.stage("normalize_attachments") if diag else None + attachments = _normalize_chat_attachments(body.get("attachments") or [])[:20] + diag.stage("resolve_workspace") if diag else None + try: + workspace = str(resolve_trusted_workspace(body.get("workspace") or s.workspace)) + except ValueError as e: + return bad(handler, str(e)) + requested_model = body.get("model") or s.model + requested_provider = ( + body.get("model_provider") + if "model_provider" in body + else getattr(s, "model_provider", None) + ) + diag.stage("resolve_model_provider") if diag else None + model, model_provider, normalized_model = _resolve_compatible_session_model_state( + requested_model, + requested_provider, + ) + response = _start_chat_stream_for_session( + s, + msg=msg, + attachments=attachments, + workspace=workspace, + model=model, + model_provider=model_provider, + normalized_model=normalized_model, + diag=diag, + ) + status = int(response.pop("_status", 200) or 200) + diag.stage("response_write") if diag else None + return j(handler, response, status=status) + finally: + if diag: + diag.finish() + def _normalize_chat_attachments(raw_attachments): @@ -4356,7 +6960,10 @@ def _handle_chat_sync(handler, body): from run_agent import AIAgent with CHAT_LOCK: - from api.config import resolve_model_provider + from api.config import ( + resolve_model_provider, + resolve_custom_provider_connection, + ) _model, _provider, _base_url = resolve_model_provider( model_with_provider_context(s.model, getattr(s, "model_provider", None)) @@ -4364,9 +6971,13 @@ def _handle_chat_sync(handler, body): # Resolve API key via Hermes runtime provider (matches gateway behaviour) _api_key = None try: + from api.oauth import resolve_runtime_provider_with_anthropic_env_lock from hermes_cli.runtime_provider import resolve_runtime_provider - _rt = resolve_runtime_provider(requested=_provider) + _rt = resolve_runtime_provider_with_anthropic_env_lock( + resolve_runtime_provider, + requested=_provider, + ) _api_key = _rt.get("api_key") # Also use runtime provider/base_url if the webui config didn't resolve them if not _provider: @@ -4378,6 +6989,12 @@ def _handle_chat_sync(handler, body): f"[webui] WARNING: resolve_runtime_provider failed: {_e}", flush=True, ) + if isinstance(_provider, str) and _provider.startswith("custom:"): + _cp_key, _cp_base = resolve_custom_provider_connection(_provider) + if not _api_key and _cp_key: + _api_key = _cp_key + if not _base_url and _cp_base: + _base_url = _cp_base agent = AIAgent( model=_model, provider=_provider, @@ -4390,23 +7007,24 @@ def _handle_chat_sync(handler, body): enabled_toolsets=_resolve_cli_toolsets(), session_id=s.session_id, ) - workspace_ctx = f"[Workspace: {s.workspace}]\n" - workspace_system_msg = ( - f"Active workspace at session start: {s.workspace}\n" - "Every user message is prefixed with [Workspace: /absolute/path] indicating the " - "workspace the user has selected in the web UI at the time they sent that message. " - "This tag is the single authoritative source of the active workspace and updates " - "with every message. It overrides any prior workspace mentioned in this system " - "prompt, memory, or conversation history. Always use the value from the most recent " - "[Workspace: ...] tag as your default working directory for ALL file operations: " - "write_file, read_file, search_files, terminal workdir, and patch. " - "Never fall back to a hardcoded path when this tag is present." - ) from api.streaming import ( _merge_display_messages_after_agent_result, _restore_reasoning_metadata, _sanitize_messages_for_api, _session_context_messages, + _workspace_context_prefix, + ) + workspace_ctx = _workspace_context_prefix(str(s.workspace)) + workspace_system_msg = ( + f"Active workspace at session start: {s.workspace}\n" + "Every user message is prefixed with [Workspace::v1: /absolute/path] indicating the " + "workspace the user has selected in the web UI at the time they sent that message. " + "This tag is the single authoritative source of the active workspace and updates " + "with every message. It overrides any prior workspace mentioned in this system " + "prompt, memory, or conversation history. Always use the value from the most recent " + "[Workspace::v1: ...] tag as your default working directory for ALL file operations: " + "write_file, read_file, search_files, terminal workdir, and patch. " + "Never fall back to a hardcoded path when this tag is present." ) _previous_messages = list(s.messages or []) @@ -4483,8 +7101,9 @@ def _handle_cron_create(handler, body): except ValueError as e: return bad(handler, str(e)) try: - from cron.jobs import create_job + from cron.jobs import create_job, update_job + profile = _normalize_cron_profile_value(body.get("profile")) job = create_job( prompt=body["prompt"], schedule=body["schedule"], @@ -4493,7 +7112,9 @@ def _handle_cron_create(handler, body): skills=body.get("skills") or [], model=body.get("model") or None, ) - return j(handler, {"ok": True, "job": job}) + if profile is not None: + job = update_job(job["id"], {"profile": profile}) or job + return j(handler, {"ok": True, "job": _cron_job_for_api(job)}) except Exception as e: return j(handler, {"error": str(e)}, status=400) @@ -4505,11 +7126,21 @@ def _handle_cron_update(handler, body): return bad(handler, str(e)) from cron.jobs import update_job - updates = {k: v for k, v in body.items() if k != "job_id" and v is not None} + try: + updates = {} + for k, v in body.items(): + if k == "job_id": + continue + if k == "profile": + updates[k] = _normalize_cron_profile_value(v) + elif v is not None: + updates[k] = v + except ValueError as e: + return bad(handler, str(e)) job = update_job(body["job_id"], updates) if not job: return bad(handler, "Job not found", 404) - return j(handler, {"ok": True, "job": job}) + return j(handler, {"ok": True, "job": _cron_job_for_api(job)}) def _handle_cron_delete(handler, body): @@ -4540,7 +7171,23 @@ def _handle_cron_run(handler, body): return j(handler, {"ok": False, "job_id": job_id, "status": "already_running", "elapsed": round(elapsed, 1)}) _mark_cron_running(job_id) - threading.Thread(target=_run_cron_tracked, args=(job,), daemon=True).start() + # Capture the TLS-active profile home now — the thread runs after the + # request finishes, so TLS is gone by then. + # + # Resolve directly without a try/except: get_active_hermes_home() does + # in-memory dict reads + a single Path.is_dir() stat, so the only way + # it could raise from inside a request handler is if api.profiles + # itself partially failed to import (in which case we'd already be + # 500-ing the whole request). A silent fallback to None here would + # re-introduce the exact bug #1573 fixes — the worker thread would + # run unpinned against the process-global HERMES_HOME — so we'd + # rather let any unexpected exception 500 the request than corrupt + # cross-profile state. + from api.profiles import get_active_hermes_home + + _profile_home = get_active_hermes_home() + _execution_profile_home = _profile_home_for_cron_job(job) + threading.Thread(target=_run_cron_tracked, args=(job, _profile_home, _execution_profile_home), daemon=True).start() return j(handler, {"ok": True, "job_id": job_id, "status": "running"}) @@ -4684,8 +7331,77 @@ def _handle_create_dir(handler, body): return bad(handler, _sanitize_error(e)) +def _handle_file_reveal(handler, body): + try: + require(body, "session_id", "path") + except ValueError as e: + return bad(handler, str(e)) + try: + s = get_session(body["session_id"]) + except KeyError: + return bad(handler, "Session not found", 404) + try: + target = safe_resolve(Path(s.workspace), body["path"]) + if not target.exists(): + # Include the resolved server-side path in the error message so + # the frontend toast can show *which* file the system expected. + # Useful when a stale session row still references a deleted file + # (#1764 — Cygnus's screenshot showed a "Failed to reveal: not + # found" toast that dropped the path entirely, leaving no clue + # what was missing). + return bad(handler, f"File not found: {target}", 404) + + system = platform.system() + if system == "Darwin": + subprocess.Popen(["open", "-R", str(target)]) + elif system == "Windows": + subprocess.Popen(["explorer.exe", "/select," + str(target)]) + else: + # Linux / other — open parent directory + subprocess.Popen(["xdg-open", str(target.parent)]) + + return j(handler, {"ok": True, "path": body["path"]}) + except (ValueError, PermissionError, OSError) as e: + return bad(handler, _sanitize_error(e)) + + +def _handle_file_path(handler, body): + """Resolve a relative workspace-rooted path into an absolute on-disk path. + + The right-click "Copy file path" action (#1764) wants to put the + absolute path on the user's clipboard so they can paste it into a + terminal, editor, or anywhere else without having to round-trip through + the OS file browser. The frontend can't compute the absolute path on + its own — `safe_resolve` joins against the session's workspace root + which only the server knows. The handler here is a thin lookup; no + filesystem mutation, no OS-specific dispatch. We do NOT require the + target to exist (unlike `_handle_file_reveal`) — copying the path of a + just-deleted file is still useful, and refusing would force callers + to special-case 404s for an action that cannot fail destructively. + """ + try: + require(body, "session_id", "path") + except ValueError as e: + return bad(handler, str(e)) + try: + s = get_session(body["session_id"]) + except KeyError: + return bad(handler, "Session not found", 404) + try: + target = safe_resolve(Path(s.workspace), body["path"]) + return j(handler, {"ok": True, "path": str(target)}) + except (ValueError, PermissionError, OSError) as e: + return bad(handler, _sanitize_error(e)) + + def _handle_workspace_add(handler, body): - path_str = body.get("path", "").strip() + # Strip surrounding paired quotes BEFORE any further processing — macOS + # Finder's "Copy as Pathname" wraps paths in single quotes, and users + # routinely paste those quoted strings into the Add Space input. + # Doing this at the route entry means every downstream check (blocked + # system path, validate_workspace_to_add, duplicate detection) sees the + # cleaned form. + path_str = _strip_surrounding_quotes(body.get("path", "").strip()) name = body.get("name", "").strip() auto_create = body.get("create", False) if not path_str: @@ -4915,6 +7631,38 @@ def _handle_session_compress(handler, body): return None return {"role": role, "ts": ts, "text": norm, "attachments": attach_count} + def _compression_summary_from_messages(messages): + text = None + for m in reversed(messages or []): + if not isinstance(m, dict): + continue + role = str(m.get("role") or "").lower() + if role != "assistant": + continue + if not isinstance(m.get("content"), str): + continue + content = str(m.get("content") or "").strip() + if not content: + continue + norm = re.sub(r"\s+", " ", content).strip() + if ( + "context compaction" in norm.lower() + or "context compression" in norm.lower() + ): + return norm + return None + + def _compact_summary_text(raw_text): + if not isinstance(raw_text, str): + return None + txt = raw_text.strip() + if not txt: + return None + txt = re.sub(r"\s+", " ", txt) + if len(txt) > 320: + txt = f"{txt[:314]}…" + return txt + try: require(body, "session_id") except ValueError as e: @@ -5020,6 +7768,7 @@ def _handle_session_compress(handler, body): ) import api.config as _cfg + from api.oauth import resolve_runtime_provider_with_anthropic_env_lock import hermes_cli.runtime_provider as _runtime_provider import run_agent as _run_agent @@ -5029,7 +7778,10 @@ def _handle_session_compress(handler, body): resolved_api_key = None try: - _rt = _runtime_provider.resolve_runtime_provider(requested=resolved_provider) + _rt = resolve_runtime_provider_with_anthropic_env_lock( + _runtime_provider.resolve_runtime_provider, + requested=resolved_provider, + ) resolved_api_key = _rt.get("api_key") if not resolved_provider: resolved_provider = _rt.get("provider") @@ -5038,6 +7790,13 @@ def _handle_session_compress(handler, body): except Exception as _e: logger.warning("resolve_runtime_provider failed for compression: %s", _e) + if isinstance(resolved_provider, str) and resolved_provider.startswith("custom:"): + _cp_key, _cp_base = _cfg.resolve_custom_provider_connection(resolved_provider) + if not resolved_api_key and _cp_key: + resolved_api_key = _cp_key + if not resolved_base_url and _cp_base: + resolved_base_url = _cp_base + if not resolved_api_key: return bad(handler, "No provider configured -- cannot compress.") @@ -5090,6 +7849,12 @@ def _handle_session_compress(handler, body): visible_after = _visible_messages_for_anchor(compressed) s.compression_anchor_visible_idx = max(0, len(visible_after) - 1) if visible_after else None s.compression_anchor_message_key = _anchor_message_key(visible_after[-1]) if visible_after else None + summary_text = None + if isinstance(summary, dict): + summary_text = summary.get("reference_message") or summary.get("token_line") or summary.get("headline") + s.compression_anchor_summary = _compact_summary_text( + summary_text or _compression_summary_from_messages(compressed) or "" + ) s.save() session_payload = redact_session_data( @@ -5118,6 +7883,670 @@ def _handle_session_compress(handler, body): return bad(handler, f"Compression failed: {_sanitize_error(e)}") +def _handle_conversation_rounds(handler, body): + """Return conversation-round count for a gateway session. + + Request body:: + + { "session_id": "...", "since": } + + Response:: + + { "ok": true, "rounds": 12, "threshold": 10, "should_show": true } + """ + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + + sid = str(body.get("session_id") or "").strip() + if not sid: + return bad(handler, "session_id is required") + + since = body.get("since") + if since is not None: + try: + since = float(since) + except (TypeError, ValueError): + return bad(handler, "since must be a unix timestamp (number)") + + from api.models import count_conversation_rounds, CONVERSATION_ROUND_THRESHOLD + + rounds = count_conversation_rounds(sid, since=since) + return j(handler, { + "ok": True, + "rounds": rounds, + "threshold": CONVERSATION_ROUND_THRESHOLD, + "should_show": rounds >= CONVERSATION_ROUND_THRESHOLD, + }) + + +def _build_handoff_summary_tool_message( + sid: str, + summary: str, + channel: str | None, + rounds: int | None = None, + fallback: bool = False, +) -> dict: + """Build a compact tool-role transcript marker for persistence.""" + now = time.time() + return { + "role": "tool", + # Keep this intentionally empty so API-history sanitization drops it from + # model context (it is display-only data). + "tool_call_id": "", + "name": "handoff_summary", + "timestamp": now, + "_ts": now, + "content": json.dumps({ + "_handoff_summary_card": True, + "session_id": sid, + "summary": str(summary or "").strip(), + "channel": (str(channel or "").strip() or None), + "rounds": rounds, + "fallback": bool(fallback), + "generated_at": now, + }, ensure_ascii=False), + } + + +def _extract_handoff_summary_payload(message: dict) -> dict | None: + """Return a normalized handoff-summary payload if *message* is a tool marker.""" + if not isinstance(message, dict): + return None + if message.get("role") != "tool" or message.get("name") != "handoff_summary": + return None + + content = message.get("content") + if isinstance(content, dict): + payload = content + else: + try: + payload = json.loads(content or "") + except Exception: + return None + + if not isinstance(payload, dict) or not payload.get("_handoff_summary_card"): + return None + if payload.get("session_id") is None: + return None + return { + "session_id": str(payload.get("session_id")), + "summary": str(payload.get("summary", "")), + "channel": payload.get("channel"), + "rounds": payload.get("rounds"), + "fallback": bool(payload.get("fallback")), + "_handoff_summary_card": True, + } + + +def _is_matching_handoff_summary_message(existing: dict, target: dict) -> bool: + """Return True when two message payloads represent the same handoff summary.""" + existing_payload = _extract_handoff_summary_payload(existing) + target_payload = _extract_handoff_summary_payload(target) + if not existing_payload or not target_payload: + return False + return ( + existing_payload.get("session_id") == target_payload.get("session_id") and + existing_payload.get("summary") == target_payload.get("summary") and + existing_payload.get("channel") == target_payload.get("channel") and + existing_payload.get("rounds") == target_payload.get("rounds") and + existing_payload.get("fallback") == target_payload.get("fallback") and + existing_payload.get("_handoff_summary_card") == target_payload.get("_handoff_summary_card") + ) + + +def _is_matching_handoff_summary_content(content: object, target_payload: dict | None) -> bool: + """Return True if DB content JSON matches an expected handoff summary payload.""" + if target_payload is None: + return False + try: + payload = json.loads(content or "") + except Exception: + return False + if not isinstance(payload, dict): + return False + if payload.get("session_id") is None: + return False + return ( + payload.get("_handoff_summary_card") is True and + str(payload.get("session_id")) == str(target_payload.get("session_id")) and + str(payload.get("summary", "")) == str(target_payload.get("summary", "")) and + payload.get("channel") == target_payload.get("channel") and + payload.get("rounds") == target_payload.get("rounds") and + bool(payload.get("fallback")) == bool(target_payload.get("fallback")) + ) + + +def _persist_handoff_summary_locally(sid: str, message: dict) -> bool: + """Persist a handoff summary marker into a local WebUI session file.""" + try: + from api.models import get_session + + s = get_session(sid) + except KeyError: + return False + + try: + if s.messages and _is_matching_handoff_summary_message(s.messages[-1], message): + return True + s.messages.append(message) + s.save() + return True + except Exception as e: + logger.warning("Failed to persist handoff summary marker in local session %s: %s", sid, e) + return False + + +def _persist_handoff_summary_to_state_db(sid: str, message: dict) -> bool: + """Persist a handoff summary marker into CLI sessions state.db. + + This keeps summary cards available after hard-refresh for imported gateway + sessions that are not in local session JSON yet. + """ + import os + + try: + import sqlite3 + except ImportError: + return False + + try: + from api.profiles import get_active_hermes_home + + hermes_home = Path(get_active_hermes_home()).expanduser().resolve() + except Exception: + hermes_home = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser().resolve() + + db_path = hermes_home / "state.db" + if not db_path.exists(): + return False + + ts = message.get("timestamp", time.time()) + content = message.get("content", "") + if not isinstance(content, str): + content = json.dumps(content, ensure_ascii=False) + + marker_payload = _extract_handoff_summary_payload(message) + try: + with sqlite3.connect(str(db_path)) as conn: + try: + if marker_payload is not None: + cur = conn.execute( + "SELECT content FROM messages WHERE session_id = ? AND role = 'tool' " + "ORDER BY rowid DESC LIMIT 1", + (sid,), + ) + row = cur.fetchone() + if row is not None and _is_matching_handoff_summary_content(row[0], marker_payload): + return True + except Exception: + # If tail-read fails, continue with a best-effort write. + logger.debug("Unable to read tail handoff marker from state.db for %s", sid) + + conn.execute( + "INSERT INTO messages (session_id, role, content, timestamp) " + "VALUES (?, 'tool', ?, ?)", + (sid, content, ts), + ) + # Keep session row message_count/last-activity aligned with displayed + # transcript length. session rows are optional in some test DBs, so + # this update is best-effort. + conn.execute( + "UPDATE sessions SET message_count = COALESCE(message_count, 0) + 1 " + "WHERE id = ?", + (sid,), + ) + conn.commit() + return True + except Exception as e: + logger.warning("Failed to persist handoff summary marker in state.db for %s: %s", sid, e) + return False + + +def _persist_handoff_summary(sid: str, summary: str, channel: str | None, rounds: int | None, fallback: bool = False) -> dict: + """Persist a handoff summary marker across local/session backends.""" + marker = _build_handoff_summary_tool_message(sid, summary, channel, rounds, fallback) + is_messaging_session = _is_messaging_session_id(sid) + if is_messaging_session: + _persist_handoff_summary_to_state_db(sid, marker) + _persist_handoff_summary_locally(sid, marker) + return marker + persisted_local = _persist_handoff_summary_locally(sid, marker) + if persisted_local: + return marker + return marker if _persist_handoff_summary_to_state_db(sid, marker) else marker + + +def _handle_handoff_summary(handler, body): + """Generate an on-demand handoff summary for a gateway session. + + Request body:: + + { "session_id": "...", "since": } + + Uses the session's configured model to produce a concise summary of + recent conversation activity. Returns the summary text so the caller + can display it in a tool-card. + """ + try: + require(body, "session_id") + except ValueError as e: + return bad(handler, str(e)) + + sid = str(body.get("session_id") or "").strip() + if not sid: + return bad(handler, "session_id is required") + + since = body.get("since") + if since is not None: + try: + since = float(since) + except (TypeError, ValueError): + return bad(handler, "since must be a unix timestamp (number)") + + from api.models import get_cli_session_messages, count_conversation_rounds, CONVERSATION_ROUND_THRESHOLD + + rounds = count_conversation_rounds(sid, since=since) + if rounds < CONVERSATION_ROUND_THRESHOLD: + return bad(handler, "Not enough conversation rounds to generate a summary.", 400) + + # Filter messages by ``since``. + all_msgs = get_cli_session_messages(sid) + if since is not None: + import datetime as _dt + filtered = [] + for m in all_msgs: + ts_raw = m.get("timestamp") + if ts_raw is None: + continue + try: + if isinstance(ts_raw, (int, float)): + ts_val = float(ts_raw) + else: + ts_val = _dt.datetime.fromisoformat( + str(ts_raw).replace("Z", "+00:00") + ).timestamp() + if ts_val > since: + filtered.append(m) + except Exception: + pass + msgs = filtered + else: + msgs = all_msgs + + # Cap to last 50 messages. + msgs = msgs[-50:] + + if len(msgs) < 2: + return bad(handler, "Not enough messages to summarize.", 400) + + def _extract_handoff_text(raw_content): + if isinstance(raw_content, list): + return " ".join( + str(p.get("text") or p.get("content") or "") + for p in raw_content + if isinstance(p, dict) + ).strip() + return str(raw_content or "").strip() + + def _contains_chinese(text): + return any("\u4e00" <= ch <= "\u9fff" for ch in str(text)) + + transcript_is_chinese = any( + _contains_chinese(_extract_handoff_text(m.get("content"))) + for m in msgs + ) + # Build a lightweight conversation transcript for the LLM. + lines = [] + for m in msgs: + role = m.get("role", "") + content = _extract_handoff_text(m.get("content")) + content = str(content or "").strip()[:1000] + if role in ("user", "assistant") and content: + lines.append(content) + transcript = "\n".join(lines) + + def _fallback_handoff_summary(items): + """Return a deterministic summary when LLM summary generation is unavailable.""" + user_points = [] + assistant_points = [] + + def _summarize_snippet(raw_text, max_len=78): + text = " ".join(str(raw_text or "").split()).strip() + if not text: + return "" + if len(text) <= max_len: + return text + return text[: max_len - 1].rstrip() + "…" + + for m in items: + role = m.get("role", "") + content = _summarize_snippet(_extract_handoff_text(m.get("content")), 82) + if role in ("user", "assistant") and content: + if role == "user": + user_points.append(content) + else: + assistant_points.append(content) + if not user_points and not assistant_points: + return ( + "近期可读文本不足,无法生成更完整的交接摘要,请补充一条消息后重试。" + if transcript_is_chinese + else "Not enough readable text to create a useful handoff summary; please send one more message and retry." + ) + + if transcript_is_chinese: + bullets = [] + if user_points: + bullets.append(f"- 你刚讨论了:{user_points[-1]}。") + if assistant_points: + bullets.append(f"- 助手已回复:{assistant_points[-1]}。") + if len(user_points) + len(assistant_points) >= 2: + bullets.append("- 当前对话存在尚未确认的后续动作。") + else: + bullets.append("- 当前信息偏少,建议补充关键点后再切换。") + return "\n".join(bullets) + + bullets = [] + if user_points: + bullets.append(f"- You asked: {user_points[-1]}.") + if assistant_points: + bullets.append(f"- The assistant responded: {assistant_points[-1]}.") + if len(user_points) + len(assistant_points) >= 2: + bullets.append("- There is pending context to continue next.") + else: + bullets.append("- The conversation is still short; add one more turn before summarizing.") + return "\n".join(bullets) + + def _summary_output_incomplete(text): + """Best-effort guard for truncated summaries when LLM signals are unavailable.""" + if not isinstance(text, str): + text = str(text or "") + text = text.strip() + if not text: + return True + if text.endswith("...") or text.endswith("…"): + return True + lines = [line.strip() for line in text.splitlines() if line.strip()] + if not lines: + return True + last_line = lines[-1] + if re.search(r"[。!?;!?.;]$", last_line): + return False + if len(last_line) >= 56 and not re.search(r"\b(and|or|so|then|because|if|when|but|so|as)\b$", last_line, re.IGNORECASE): + return True + return bool(re.search(r"\b(and|or|but|so|because|if|when)$", last_line, re.IGNORECASE)) + + def _agent_summary_incomplete(summary_result): + if not isinstance(summary_result, dict): + return True + reason = (summary_result.get("finish_reason") or "").strip().lower() + if reason == "length": + return True + stop_reason = (summary_result.get("stop_reason") or "").strip().lower() + if stop_reason in {"max_tokens", "length"}: + return True + return _summary_output_incomplete(summary_result.get("text", "")) + + def _resolve_handoff_channel_label(): + channel_label = None + try: + from api.models import get_session as _get_session, get_cli_sessions + + session_meta = _get_session(sid) + channel_label = ( + session_meta.source_label + or session_meta.raw_source + or session_meta.source_tag + or session_meta.session_source + ) + if not channel_label: + for candidate in get_cli_sessions(): + if candidate.get("session_id") == sid: + channel_label = ( + candidate.get("source_label") + or candidate.get("raw_source") + or candidate.get("source_tag") + or candidate.get("source") + ) + break + except Exception: + pass + return channel_label + + def _agent_text_completion(agent, system_prompt, user_text, max_tokens=700): + """Use the current Hermes Agent transport without mutating conversation history.""" + api_messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_text}, + ] + result = { + "text": "", + "finish_reason": None, + "stop_reason": None, + "incomplete": True, + } + disabled_reasoning = {"enabled": False} + previous_reasoning = getattr(agent, "reasoning_config", None) + try: + agent.reasoning_config = disabled_reasoning + if getattr(agent, "api_mode", "") == "codex_responses": + codex_kwargs = agent._build_api_kwargs(api_messages) + codex_kwargs.pop("tools", None) + codex_kwargs["max_output_tokens"] = max_tokens + resp = agent._run_codex_stream(codex_kwargs) + assistant_message, _ = agent._normalize_codex_response(resp) + result["text"] = str((assistant_message.content or "") if assistant_message else "").strip() + result["incomplete"] = _summary_output_incomplete(result["text"]) + return result + + if getattr(agent, "api_mode", "") == "anthropic_messages": + from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response + + ant_kwargs = build_anthropic_kwargs( + model=agent.model, + messages=api_messages, + tools=None, + max_tokens=max_tokens, + reasoning_config=disabled_reasoning, + is_oauth=getattr(agent, "_is_anthropic_oauth", False), + preserve_dots=agent._anthropic_preserve_dots(), + base_url=getattr(agent, "_anthropic_base_url", None), + ) + resp = agent._anthropic_messages_create(ant_kwargs) + assistant_message, _ = normalize_anthropic_response( + resp, + strip_tool_prefix=getattr(agent, "_is_anthropic_oauth", False), + ) + result["text"] = str((assistant_message.content or "") if assistant_message else "").strip() + result["incomplete"] = _summary_output_incomplete(result["text"]) + return result + + api_kwargs = agent._build_api_kwargs(api_messages) + api_kwargs.pop("tools", None) + api_kwargs["temperature"] = 0.2 + api_kwargs["timeout"] = 30.0 + if "max_completion_tokens" in api_kwargs: + api_kwargs["max_completion_tokens"] = max_tokens + else: + api_kwargs["max_tokens"] = max_tokens + resp = agent._ensure_primary_openai_client(reason="handoff_summary").chat.completions.create( + **api_kwargs, + ) + choice = (getattr(resp, "choices", None) or [None])[0] + msg = getattr(choice, "message", None) if choice is not None else None + result["text"] = str(getattr(msg, "content", "") or "").strip() + result["finish_reason"] = getattr(choice, "finish_reason", None) + result["stop_reason"] = getattr(choice, "stop_reason", None) + result["incomplete"] = _agent_summary_incomplete(result) + return result + finally: + agent.reasoning_config = previous_reasoning + + # Call LLM for summary. + try: + import api.config as _cfg + from api.oauth import resolve_runtime_provider_with_anthropic_env_lock + import hermes_cli.runtime_provider as _runtime_provider + import run_agent as _run_agent + + # Try to resolve model from an existing session, fall back to default. + resolved_model = None + resolved_provider = None + resolved_base_url = None + try: + from api.models import get_session + s_obj = get_session(sid) + resolved_model = getattr(s_obj, "model", None) + except Exception: + pass + + resolved_model, resolved_provider, resolved_base_url = _cfg.resolve_model_provider(resolved_model) + + resolved_api_key = None + try: + _rt = resolve_runtime_provider_with_anthropic_env_lock( + _runtime_provider.resolve_runtime_provider, + requested=resolved_provider, + ) + resolved_api_key = _rt.get("api_key") + if not resolved_provider: + resolved_provider = _rt.get("provider") + if not resolved_base_url: + resolved_base_url = _rt.get("base_url") + except Exception as _e: + logger.warning("resolve_runtime_provider failed for handoff summary: %s", _e) + + if isinstance(resolved_provider, str) and resolved_provider.startswith("custom:"): + _cp_key, _cp_base = _cfg.resolve_custom_provider_connection(resolved_provider) + if not resolved_api_key and _cp_key: + resolved_api_key = _cp_key + if not resolved_base_url and _cp_base: + resolved_base_url = _cp_base + + if not resolved_api_key: + summary_text = _fallback_handoff_summary(msgs) + try: + _persist_handoff_summary( + sid, + summary_text, + _resolve_handoff_channel_label(), + rounds, + fallback=True, + ) + except Exception: + pass + return j(handler, { + "ok": True, + "summary": summary_text, + "message_count": len(msgs), + "rounds": rounds, + "fallback": True, + }) + + agent = _run_agent.AIAgent( + model=resolved_model, + provider=resolved_provider, + base_url=resolved_base_url, + api_key=resolved_api_key, + platform="webui", + quiet_mode=True, + enabled_toolsets=[], + session_id=sid, + ) + + summary_system_prompt = ( + "You are summarizing an external-channel conversation so a Web UI reader " + "can quickly catch up after switching contexts.\n\n" + "Only use the latest messages, and never copy raw transcript lines.\n" + "Do not output role labels (no “你:” / “assistant:” / “user:” / “assistant”).\n" + "Use direct 2–5 bullet points in the conversation language.\n" + "English: speak using “you”.\n" + "中文: 使用“你”。\n\n" + "Focus on:\n" + "- Unfinished tasks or action items\n" + "- Pending questions that need replies\n" + "- Key decisions made\n" + "- Open disagreements or TBD items\n\n" + "If the conversation is purely casual with no actionable items, " + "say so in one sentence." + ) + summary_user_text = f"Conversation transcript:\n{transcript}" + + try: + first_pass = _agent_text_completion( + agent, + summary_system_prompt, + summary_user_text, + max_tokens=700, + ) + summary_text = first_pass.get("text") if isinstance(first_pass, dict) else "" + if _agent_summary_incomplete(first_pass): + second_pass = _agent_text_completion( + agent, + summary_system_prompt, + summary_user_text, + max_tokens=1400, + ) + summary_text = second_pass.get("text") if isinstance(second_pass, dict) else "" + if _agent_summary_incomplete(second_pass): + summary_text = _fallback_handoff_summary(msgs) + fallback = True + else: + fallback = False + else: + fallback = False + finally: + try: + agent.release_clients() + except Exception: + pass + if not summary_text: + summary_text = _fallback_handoff_summary(msgs) + fallback = True + elif _summary_output_incomplete(summary_text): + if not fallback: + fallback = True + + channel_label = _resolve_handoff_channel_label() + _persist_handoff_summary( + sid, + summary_text, + channel_label, + rounds, + fallback=fallback, + ) + + return j(handler, { + "ok": True, + "summary": summary_text, + "message_count": len(msgs), + "rounds": rounds, + "fallback": fallback, + }) + except Exception as e: + logger.warning("Handoff summary generation failed: %s", e) + summary_text = _fallback_handoff_summary(msgs) + try: + _persist_handoff_summary( + sid, + summary_text, + _resolve_handoff_channel_label(), + rounds, + fallback=True, + ) + except Exception: + pass + return j(handler, { + "ok": True, + "summary": summary_text, + "message_count": len(msgs), + "rounds": rounds, + "fallback": True, + "warning": f"Summary generation used local fallback: {_sanitize_error(e)}", + }) + + def _handle_skill_save(handler, body): try: require(body, "name", "content") @@ -5129,15 +8558,15 @@ def _handle_skill_save(handler, body): category = body.get("category", "").strip() if category and ("/" in category or ".." in category): return bad(handler, "Invalid category") - from tools.skills_tool import SKILLS_DIR + skills_dir = _active_skills_dir() if category: - skill_dir = SKILLS_DIR / category / skill_name + skill_dir = skills_dir / category / skill_name else: - skill_dir = SKILLS_DIR / skill_name - # Validate resolved path stays within SKILLS_DIR + skill_dir = skills_dir / skill_name + # Validate resolved path stays within the active profile skills dir. try: - skill_dir.resolve().relative_to(SKILLS_DIR.resolve()) + skill_dir.resolve().relative_to(skills_dir.resolve()) except ValueError: return bad(handler, "Invalid skill path") skill_dir.mkdir(parents=True, exist_ok=True) @@ -5151,10 +8580,13 @@ def _handle_skill_delete(handler, body): require(body, "name") except ValueError as e: return bad(handler, str(e)) - from tools.skills_tool import SKILLS_DIR import shutil - matches = list(SKILLS_DIR.rglob(f"{body['name']}/SKILL.md")) + skill_name = str(body["name"]).strip().lower().replace(" ", "-") + if not skill_name or "/" in skill_name or ".." in skill_name: + return bad(handler, "Invalid skill name") + skills_dir = _active_skills_dir() + matches = [p for p in skills_dir.rglob("SKILL.md") if p.parent.name == skill_name] if not matches: return bad(handler, "Skill not found", 404) skill_dir = matches[0].parent @@ -5185,6 +8617,83 @@ def _handle_memory_write(handler, body): return j(handler, {"ok": True, "section": section, "path": str(target)}) +def _normalize_message_for_import_refresh(message: object) -> object: + """Normalize message payloads for import refresh prefix checks. + + The strict dict comparison previously failed when existing messages held + integer timestamps while refreshed messages held floating-point timestamps. + Strip timing keys before comparison so we can safely treat semantic + prefixes as equivalent. + """ + if not isinstance(message, dict): + return message + normalized = dict(message) + normalized.pop("timestamp", None) + normalized.pop("_ts", None) + return normalized + + +def _message_has_cli_tool_metadata(message: object) -> bool: + if not isinstance(message, dict): + return False + if message.get("role") == "assistant" and message.get("tool_calls"): + return True + if message.get("role") == "tool" and (message.get("tool_call_id") or message.get("tool_name") or message.get("name")): + return True + return False + + +def _strip_cli_tool_metadata_for_refresh(message: object) -> object: + if not isinstance(message, dict): + return _normalize_message_for_import_refresh(message) + normalized = _normalize_message_for_import_refresh(message) + if not isinstance(normalized, dict): + return normalized + for key in ("tool_calls", "tool_call_id", "tool_name", "name"): + normalized.pop(key, None) + return normalized + + +def _is_cli_tool_metadata_enrichment(existing_messages: list, fresh_messages: list) -> bool: + """Return True when fresh messages only add CLI tool metadata. + + Older imports from get_cli_session_messages() persisted assistant/tool rows + without tool_calls, tool_call_id, or tool_name. After #1772 the refreshed + transcript can have the same length but richer metadata, so re-imports must + rebuild the stored sidecar even without a new row. + """ + if not isinstance(existing_messages, list) or not isinstance(fresh_messages, list): + return False + if len(existing_messages) != len(fresh_messages): + return False + if any(_message_has_cli_tool_metadata(m) for m in existing_messages): + return False + if not any(_message_has_cli_tool_metadata(m) for m in fresh_messages): + return False + for idx, existing_message in enumerate(existing_messages): + if _strip_cli_tool_metadata_for_refresh(existing_message) != _strip_cli_tool_metadata_for_refresh(fresh_messages[idx]): + return False + return True + + +def _is_messages_refresh_prefix_match(existing_messages: list, fresh_messages: list) -> bool: + """Return True when existing_messages is a prefix of fresh_messages by value. + + This is a semantic comparison intended for import refresh, not deep + structural equality. It intentionally ignores timing fields that may differ + in type/precision between storage layers. + """ + if not isinstance(existing_messages, list) or not isinstance(fresh_messages, list): + return False + if len(existing_messages) > len(fresh_messages): + return False + for idx, existing_message in enumerate(existing_messages): + fresh_message = fresh_messages[idx] + if _normalize_message_for_import_refresh(existing_message) != _normalize_message_for_import_refresh(fresh_message): + return False + return True + + def _handle_session_import_cli(handler, body): """Import a single CLI session into the WebUI store.""" try: @@ -5198,13 +8707,39 @@ def _handle_session_import_cli(handler, body): existing = Session.load(sid) if existing: fresh_msgs = get_cli_session_messages(sid) + changed = False + cli_meta = None + for cs in list(get_cli_sessions()): + if cs["session_id"] == sid: + cli_meta = cs + break if fresh_msgs and len(fresh_msgs) > len(existing.messages): # Prefix-equality guard: only extend if existing messages are a prefix of # the fresh CLI messages. Prevents silently dropping WebUI-added messages # on hybrid sessions (user sent messages via WebUI while CLI continued). - if existing.messages == fresh_msgs[:len(existing.messages)]: + if _is_messages_refresh_prefix_match(existing.messages, fresh_msgs): existing.messages = fresh_msgs - existing.save(touch_updated_at=False) + changed = True + elif fresh_msgs and _is_cli_tool_metadata_enrichment(existing.messages, fresh_msgs): + # Same row count, richer payload: rebuild sidecars imported before + # CLI tool metadata was preserved (#1772). + existing.messages = fresh_msgs + changed = True + if cli_meta: + updates = { + "is_cli_session": True, + "source_tag": existing.source_tag or cli_meta.get("source_tag"), + "raw_source": existing.raw_source or cli_meta.get("raw_source") or cli_meta.get("source_tag"), + "session_source": existing.session_source or cli_meta.get("session_source"), + "source_label": existing.source_label or cli_meta.get("source_label"), + "parent_session_id": existing.parent_session_id or cli_meta.get("parent_session_id"), + } + for attr, value in updates.items(): + if getattr(existing, attr, None) != value: + setattr(existing, attr, value) + changed = True + if changed: + existing.save(touch_updated_at=False) return j( handler, { @@ -5212,6 +8747,7 @@ def _handle_session_import_cli(handler, body): | { "messages": existing.messages, "is_cli_session": True, + "read_only": bool((cli_meta or {}).get("read_only")), }, "imported": False, }, @@ -5229,6 +8765,17 @@ def _handle_session_import_cli(handler, body): cli_title = None cli_source_tag = None model = "unknown" + cli_raw_source = None + cli_session_source = None + cli_source_label = None + cli_user_id = None + cli_chat_id = None + cli_chat_type = None + cli_thread_id = None + cli_session_key = None + cli_platform = None + cli_parent_session_id = None + cli_read_only = False for cs in get_cli_sessions(): if cs["session_id"] == sid: profile = cs.get("profile") @@ -5237,6 +8784,17 @@ def _handle_session_import_cli(handler, body): updated_at = cs.get("updated_at") cli_title = cs.get("title") cli_source_tag = cs.get("source_tag") + cli_raw_source = cs.get("raw_source") + cli_session_source = cs.get("session_source") + cli_source_label = cs.get("source_label") + cli_user_id = cs.get("user_id") + cli_chat_id = cs.get("chat_id") + cli_chat_type = cs.get("chat_type") + cli_thread_id = cs.get("thread_id") + cli_session_key = cs.get("session_key") + cli_platform = cs.get("platform") + cli_parent_session_id = cs.get("parent_session_id") + cli_read_only = bool(cs.get("read_only")) break # Use the CLI session title if available (e.g., cron job name), otherwise derive from messages @@ -5247,6 +8805,32 @@ def _handle_session_import_cli(handler, body): if is_cron_session(sid, cli_source_tag): cron_project_id = ensure_cron_project() + if cli_read_only: + session_payload = { + "session_id": sid, + "title": title, + "workspace": str(get_last_workspace()), + "model": model, + "message_count": len(msgs), + "created_at": created_at, + "updated_at": updated_at, + "last_message_at": updated_at or created_at, + "pinned": False, + "archived": False, + "project_id": None, + "profile": profile, + "is_cli_session": True, + "source_tag": cli_source_tag, + "raw_source": cli_raw_source or cli_source_tag, + "session_source": cli_session_source, + "source_label": cli_source_label, + "parent_session_id": cli_parent_session_id, + "read_only": True, + "messages": msgs, + "tool_calls": [], + } + return j(handler, {"session": session_payload, "imported": False}) + s = import_cli_session( sid, title, @@ -5255,10 +8839,21 @@ def _handle_session_import_cli(handler, body): profile=profile, created_at=created_at, updated_at=updated_at, + parent_session_id=cli_parent_session_id, ) if cron_project_id: s.project_id = cron_project_id s.is_cli_session = True + s.source_tag = cli_source_tag + s.raw_source = cli_raw_source or cli_source_tag + s.session_source = cli_session_source + s.source_label = cli_source_label + s.user_id = cli_user_id + s.chat_id = cli_chat_id + s.chat_type = cli_chat_type + s.thread_id = cli_thread_id + s.session_key = cli_session_key + s.platform = cli_platform s._cli_origin = sid s.save(touch_updated_at=False) return j( @@ -5282,7 +8877,10 @@ def _handle_session_import(handler, body): if not isinstance(messages, list): return bad(handler, 'JSON must contain a "messages" array') title = body.get("title", "Imported session") - workspace = body.get("workspace", str(DEFAULT_WORKSPACE)) + try: + workspace = str(resolve_trusted_workspace(body.get("workspace", str(DEFAULT_WORKSPACE)))) + except (TypeError, ValueError) as e: + return bad(handler, str(e)) model = body.get("model", DEFAULT_MODEL) s = Session( title=title, @@ -5320,33 +8918,291 @@ def _mask_secrets(obj): return masked -def _server_summary(name, cfg): +def _parse_mcp_enabled(value) -> bool: + """Parse Hermes MCP ``enabled`` values without raising on bad config.""" + if value is None: + return True + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return value != 0 + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in {"true", "1", "yes", "on"}: + return True + if normalized in {"false", "0", "no", "off"}: + return False + return True + + +def _mcp_runtime_status_by_name() -> dict[str, dict]: + """Return already-known MCP runtime status without starting servers. + + ``tools.mcp_tool.get_mcp_status()`` only reads the existing MCP registry and + configuration; it does not probe or spawn MCP subprocesses. If Hermes Agent + is unavailable, fall back to an empty map so the API remains safe. + """ + try: + from tools.mcp_tool import get_mcp_status + statuses = get_mcp_status() + except Exception: + return {} + if not isinstance(statuses, list): + return {} + return { + str(entry.get("name")): entry + for entry in statuses + if isinstance(entry, dict) and entry.get("name") + } + + +def _server_summary(name, cfg, runtime_status=None): """Return a safe summary of an MCP server config.""" + runtime_status = runtime_status if isinstance(runtime_status, dict) else {} out = {"name": name} + if not isinstance(cfg, dict): + out.update({ + "transport": "invalid", + "timeout": 120, + "connect_timeout": 60, + "enabled": False, + "active": False, + "status": "invalid_config", + "tool_count": None, + }) + return out + + enabled = _parse_mcp_enabled(cfg.get("enabled", True)) + connected = bool(runtime_status.get("connected")) if enabled else False if "url" in cfg: out["transport"] = "http" # Mask auth headers if "headers" in cfg: out["headers"] = _mask_secrets(cfg["headers"]) out["url"] = cfg["url"] - else: + elif "command" in cfg: out["transport"] = "stdio" out["command"] = cfg.get("command", "") out["args"] = cfg.get("args", []) if "env" in cfg: out["env"] = _mask_secrets(cfg["env"]) + else: + out["transport"] = "invalid" + enabled = False + connected = False + out["timeout"] = cfg.get("timeout", 120) + out["connect_timeout"] = cfg.get("connect_timeout", 60) + out["enabled"] = enabled + out["active"] = connected + if out["transport"] == "invalid": + out["status"] = "invalid_config" + elif not enabled: + out["status"] = "disabled" + elif connected: + out["status"] = "active" + else: + out["status"] = "configured" + out["tool_count"] = runtime_status.get("tools") if runtime_status else None return out -def _handle_mcp_servers_list(handler): - """List all configured MCP servers.""" +def _mcp_safe_display_text(value, *, limit: int) -> str: + """Return redacted, bounded MCP text safe for WebUI inventory rows.""" + if not isinstance(value, str): + value = "" if value is None else str(value) + value = _redact_text(value).strip() + value = re.sub(r"Authorization:\s*Bearer\s+\S+", "[REDACTED CREDENTIAL]", value, flags=re.I) + if len(value) > limit: + value = value[: max(0, limit - 1)].rstrip() + "…" + return value + + +def _mcp_schema_type(schema) -> str: + """Return a compact, non-sensitive display type for a JSON schema node.""" + if not isinstance(schema, dict): + return "unknown" + typ = schema.get("type") + if isinstance(typ, list): + typ = "/".join(str(t) for t in typ if t) + if isinstance(typ, str) and typ: + return typ + for composite in ("anyOf", "oneOf", "allOf"): + if isinstance(schema.get(composite), list) and schema[composite]: + return composite + if "enum" in schema: + return "enum" + return "unknown" + + +def _mcp_schema_summary(schema, *, limit: int = 12) -> list[dict]: + """Summarize an MCP input schema without exposing raw defaults/examples. + + The WebUI only needs searchable/displayable argument hints. Returning raw + JSON Schema can overexpose server-provided defaults, examples, enums, or + vendor extensions, so this strips each parameter down to name/type/required + and a redacted description. + """ + if not isinstance(schema, dict): + return [] + properties = schema.get("properties") + if not isinstance(properties, dict): + return [] + required = schema.get("required") + required_names = set(required) if isinstance(required, list) else set() + out = [] + for name, prop in properties.items(): + if len(out) >= limit: + break + if not isinstance(name, str): + continue + prop = prop if isinstance(prop, dict) else {} + desc = prop.get("description", "") + if not isinstance(desc, str): + desc = "" + desc = _mcp_safe_display_text(desc, limit=180) + out.append({ + "name": name, + "type": _mcp_schema_type(prop), + "required": name in required_names, + "description": desc, + }) + return out + + +def _mcp_tool_schema_from_payload(tool): + if not isinstance(tool, dict): + return {} + for key in ("parameters", "inputSchema", "input_schema", "schema"): + value = tool.get(key) + if isinstance(value, dict): + if key == "schema" and isinstance(value.get("parameters"), dict): + return value["parameters"] + return value + return {} + + +def _mcp_tool_summary(name, tool, server_summary): + """Return a safe global inventory row for one MCP tool.""" + server_summary = server_summary if isinstance(server_summary, dict) else {} + if isinstance(tool, str): + tool = {"name": tool} + elif not isinstance(tool, dict): + tool = {} + tool_name = str(tool.get("name") or name or "") + description = tool.get("description") or "" + if not isinstance(description, str): + description = str(description) + description = _mcp_safe_display_text(description, limit=360) + return { + "name": tool_name, + "server": str(server_summary.get("name") or ""), + "description": description, + "active": bool(server_summary.get("active")), + "enabled": bool(server_summary.get("enabled")), + "status": server_summary.get("status") or "unknown", + "schema_summary": _mcp_schema_summary(_mcp_tool_schema_from_payload(tool)), + } + + +def _mcp_tools_from_runtime_status(runtime_by_name, server_summaries): + """Read detailed MCP tool payloads from runtime status when available.""" + tools = [] + if not isinstance(runtime_by_name, dict): + return tools + for server_name, runtime in runtime_by_name.items(): + if not isinstance(runtime, dict): + continue + raw_tools = runtime.get("tools") + if not isinstance(raw_tools, list): + raw_tools = runtime.get("tool_schemas") + if not isinstance(raw_tools, list): + continue + server_summary = server_summaries.get(str(server_name), {"name": str(server_name)}) + for index, tool in enumerate(raw_tools): + fallback_name = f"{server_name}:{index}" + summary = _mcp_tool_summary(fallback_name, tool, server_summary) + if summary["name"]: + tools.append(summary) + return tools + + +def _mcp_tools_from_registry(server_summaries): + """Read already-registered MCP tool schemas without probing MCP servers.""" + try: + from tools.registry import registry + except Exception: + return [] + tools = [] + try: + names = registry.get_all_tool_names() + except Exception: + return [] + for tool_name in names: + try: + toolset = registry.get_toolset_for_tool(tool_name) + except Exception: + continue + if not isinstance(toolset, str) or not toolset.startswith("mcp-"): + continue + server_name = toolset[len("mcp-"):] + schema = registry.get_schema(tool_name) or {} + server_summary = server_summaries.get(server_name, { + "name": server_name, + "enabled": True, + "active": False, + "status": "configured", + }) + tools.append(_mcp_tool_summary(tool_name, schema, server_summary)) + return tools + + +def _handle_mcp_tools_list(handler): + """List known MCP tools from already-available runtime inventory only.""" cfg = get_config() servers = cfg.get("mcp_servers", {}) if not isinstance(servers, dict): servers = {} - result = [_server_summary(name, scfg) for name, scfg in servers.items()] - return j(handler, {"servers": result}) + runtime = _mcp_runtime_status_by_name() + server_summaries = { + str(name): _server_summary(str(name), scfg, runtime.get(str(name))) + for name, scfg in servers.items() + } + tools = _mcp_tools_from_runtime_status(runtime, server_summaries) + source = "mcp_runtime_status" + if not tools: + tools = _mcp_tools_from_registry(server_summaries) + source = "tool_registry" if tools else "none" + tools.sort(key=lambda row: (row.get("server", ""), row.get("name", ""))) + unavailable_servers = [ + summary["name"] for summary in server_summaries.values() + if summary.get("enabled") and not summary.get("active") + ] + return j(handler, { + "tools": tools, + "total": len(tools), + "source": source, + "inventory_scope": "already_known_runtime_only", + "unavailable_servers": unavailable_servers, + }) + + +def _handle_mcp_servers_list(handler): + """List configured MCP servers with safe, read-only runtime visibility.""" + cfg = get_config() + servers = cfg.get("mcp_servers", {}) + if not isinstance(servers, dict): + servers = {} + runtime = _mcp_runtime_status_by_name() + result = [ + _server_summary(name, scfg, runtime.get(str(name))) + for name, scfg in servers.items() + ] + return j(handler, { + "servers": result, + "toggle_supported": False, + "reload_required": True, + }) def _handle_mcp_server_delete(handler, name): diff --git a/api/session_recovery.py b/api/session_recovery.py new file mode 100644 index 00000000..62f74026 --- /dev/null +++ b/api/session_recovery.py @@ -0,0 +1,593 @@ +""" +Session recovery from .bak snapshots — last line of defense against +data-loss bugs like #1558. + +``Session.save()`` writes a ``.json.bak`` snapshot of the previous +state whenever an incoming save would shrink the messages array. This +module reads those snapshots back and restores any session whose live +file has fewer messages than its backup, or whose live file is missing +while a valid backup remains. + +Three integration points: + +1. ``recover_all_sessions_on_startup()`` — called from server.py at boot, + scans the session dir, restores any session whose JSON has fewer + messages than its .bak, and recreates a missing ``.json`` from an + orphaned ``.json.bak`` when the canonical state DB still has that + session. Idempotent: a clean run is a no-op. + +2. ``recover_session(sid)`` — single-session helper backing the + ``POST /api/session/recover`` endpoint, so users can re-run recovery + manually if their session was open through a server restart. + +3. ``inspect_session_recovery_status(sid)`` — read-only audit returning + message counts for the live JSON, the .bak, and a recommendation. +""" +from __future__ import annotations + +import argparse +import json +import logging +import os +import shutil +import sqlite3 +import threading +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def _msg_count(p: Path) -> int: + """Return the number of messages in a session JSON file, or -1 on read/parse error. + + Returns -1 for any non-session-shape file: + - File can't be read (OSError) + - Top-level isn't valid JSON or is invalid (JSONDecodeError, ValueError) + - Top-level isn't a dict (AttributeError on .get) — e.g. ``_index.json`` + which is a top-level list of session metadata, not a session itself. + The startup recovery scanner globs ``*.json`` and would otherwise + crash on the first non-dict file it encounters. + """ + try: + data = json.loads(p.read_text(encoding='utf-8')) + except (OSError, json.JSONDecodeError, ValueError): + return -1 + if not isinstance(data, dict): + return -1 + msgs = data.get('messages') + return len(msgs) if isinstance(msgs, list) else -1 + + +def inspect_session_recovery_status(session_path: Path) -> dict: + """Return a status dict describing whether recovery is recommended. + + { + "session_id": "...", + "live_messages": int, # -1 if live file unreadable + "bak_messages": int, # -1 if no .bak or unreadable + "recommend": "restore" | "no_action" | "no_backup", + } + """ + bak_path = session_path.with_suffix('.json.bak') + live_count = _msg_count(session_path) + if not bak_path.exists(): + return { + "session_id": session_path.stem, + "live_messages": live_count, + "bak_messages": -1, + "recommend": "no_backup", + } + bak_count = _msg_count(bak_path) + if bak_count > live_count: + return { + "session_id": session_path.stem, + "live_messages": live_count, + "bak_messages": bak_count, + "recommend": "restore", + } + return { + "session_id": session_path.stem, + "live_messages": live_count, + "bak_messages": bak_count, + "recommend": "no_action", + } + + +def recover_session(session_path: Path) -> dict: + """Restore session_path from its .bak when the bak has more messages. + + Returns a status dict identical to ``inspect_session_recovery_status`` + plus a "restored" boolean. + """ + status = inspect_session_recovery_status(session_path) + if status["recommend"] != "restore": + return {**status, "restored": False} + bak_path = session_path.with_suffix('.json.bak') + # Stage the recovery via a tmp copy + atomic replace so a crash mid-restore + # cannot leave a half-written session.json. + tmp_path = session_path.with_suffix('.json.recover.tmp') + try: + shutil.copyfile(bak_path, tmp_path) + tmp_path.replace(session_path) + except OSError as exc: + logger.warning("recover_session: copy failed for %s: %s", session_path, exc) + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + return {**status, "restored": False, "error": str(exc)} + logger.warning( + "recover_session: restored %s from .bak (live=%d → bak=%d messages). " + "See #1558 for the data-loss class this guards against.", + session_path.name, status["live_messages"], status["bak_messages"], + ) + return {**status, "restored": True} + + +def _state_db_has_session(session_id: str, state_db_path: Path | None) -> bool: + """Return whether state.db still knows this session. + + The check is deliberately fail-open: recovery must not be prevented by a + locked, absent, or older-schema state DB. When a DB is readable and has no + row, treat the orphan backup as a tombstoned/deleted session and skip it. + """ + if state_db_path is None or not state_db_path.exists(): + return True + try: + with sqlite3.connect(f"file:{state_db_path}?mode=ro", uri=True) as conn: + cur = conn.execute( + "select 1 from sqlite_master where type='table' and name='sessions'" + ) + if cur.fetchone() is None: + return True + cur = conn.execute("select 1 from sessions where id = ? limit 1", (session_id,)) + return cur.fetchone() is not None + except Exception as exc: + logger.debug("state_db session tombstone check failed for %s: %s", session_id, exc) + return True + + +def _orphaned_backup_live_paths( + session_dir: Path, + state_db_path: Path | None = None, +) -> list[Path]: + """Return live ``.json`` paths whose ``.json.bak`` exists. + + ``Path.glob('*.json')`` does not see orphan backups because their suffix is + ``.bak``. Existing startup recovery only handled shrunken live files; this + helper covers the crash shape where the live sidecar is gone but the rescue + copy remains. + """ + paths: list[Path] = [] + for bak_path in sorted(session_dir.glob('*.json.bak')): + live_path = bak_path.with_suffix('') + if live_path.name.startswith('_') or live_path.exists(): + continue + if _msg_count(bak_path) < 0: + continue + session_id = live_path.stem + if not _state_db_has_session(session_id, state_db_path): + logger.info( + "recover_all_sessions_on_startup: skipped orphan backup %s; " + "state.db has no live session row", + bak_path.name, + ) + continue + paths.append(live_path) + return paths + + +def _read_state_db_missing_sidecar_rows(session_dir: Path, state_db_path: Path | None) -> list[dict]: + """Return WebUI-origin state.db rows whose JSON sidecar is missing.""" + if state_db_path is None or not state_db_path.exists(): + return [] + try: + with sqlite3.connect(f"file:{state_db_path}?mode=ro", uri=True) as conn: + conn.row_factory = sqlite3.Row + session_cols = {row[1] for row in conn.execute("PRAGMA table_info(sessions)").fetchall()} + message_cols = {row[1] for row in conn.execute("PRAGMA table_info(messages)").fetchall()} + if not {'id', 'source'}.issubset(session_cols): + return [] + title_expr = _sql_optional_col('title', session_cols) + model_expr = _sql_optional_col('model', session_cols) + started_expr = _sql_optional_col('started_at', session_cols, '0') + parent_expr = _sql_optional_col('parent_session_id', session_cols) + msg_count_expr = _sql_optional_col('message_count', session_cols, '0') + workspace_expr = _sql_optional_col('workspace', session_cols) + worktree_path_expr = _sql_optional_col('worktree_path', session_cols) + worktree_branch_expr = _sql_optional_col('worktree_branch', session_cols) + worktree_repo_root_expr = _sql_optional_col('worktree_repo_root', session_cols) + worktree_created_at_expr = _sql_optional_col('worktree_created_at', session_cols) + rows = [] + for row in conn.execute( + f""" + SELECT id, source, {title_expr}, {model_expr}, {started_expr}, + {parent_expr}, {msg_count_expr}, {workspace_expr}, + {worktree_path_expr}, {worktree_branch_expr}, + {worktree_repo_root_expr}, {worktree_created_at_expr} + FROM sessions + WHERE source = 'webui' + ORDER BY COALESCE(started_at, 0) DESC + """ + ).fetchall(): + data = dict(row) + sid = str(data.get('id') or '').strip() + if not sid or (session_dir / f"{sid}.json").exists(): + continue + message_rows: list[dict] = [] + if {'session_id', 'role', 'content'}.issubset(message_cols): + order = "timestamp, id" if 'timestamp' in message_cols and 'id' in message_cols else "rowid" + ts_expr = 'timestamp' if 'timestamp' in message_cols else 'NULL AS timestamp' + for msg in conn.execute( + f"SELECT role, content, {ts_expr} FROM messages WHERE session_id = ? ORDER BY {order}", + (sid,), + ).fetchall(): + message = { + 'role': msg['role'], + 'content': msg['content'] or '', + } + if msg['timestamp'] is not None: + message['timestamp'] = msg['timestamp'] + message_rows.append(message) + if not message_rows: + continue + data['messages'] = message_rows + rows.append(data) + return rows + except Exception as exc: + logger.debug("state_db sidecar reconciliation scan failed for %s: %s", state_db_path, exc) + return [] + + +def _sql_optional_col(name: str, columns: set[str], fallback: str = "NULL") -> str: + return name if name in columns else f"{fallback} AS {name}" + + +def _state_db_row_to_sidecar(row: dict) -> dict: + try: + from api.agent_sessions import normalize_agent_session_source + except Exception: + normalize_agent_session_source = None + source = str(row.get('source') or '').strip().lower() + source_meta = normalize_agent_session_source(source) if normalize_agent_session_source else { + 'raw_source': source or None, + 'session_source': source or None, + 'source_label': source.title() if source else None, + } + started_at = row.get('started_at') or 0 + messages = row.get('messages') if isinstance(row.get('messages'), list) else [] + last_ts = messages[-1].get('timestamp') if messages and isinstance(messages[-1], dict) else started_at + workspace_value = row.get('workspace') or '' + return { + 'session_id': row.get('id'), + 'title': row.get('title') or 'Recovered WebUI Session', + 'workspace': workspace_value if isinstance(workspace_value, str) else '', + 'message_count': row.get('message_count') if isinstance(row.get('message_count'), int) else len(messages), + 'worktree_path': row.get('worktree_path') or None, + 'worktree_branch': row.get('worktree_branch') or None, + 'worktree_repo_root': row.get('worktree_repo_root') or None, + 'worktree_created_at': row.get('worktree_created_at') or None, + 'model': row.get('model') or 'unknown', + 'model_provider': None, + 'created_at': started_at, + 'updated_at': last_ts or started_at, + 'pinned': False, + 'archived': False, + 'project_id': None, + 'profile': None, + 'input_tokens': 0, + 'output_tokens': 0, + 'estimated_cost': None, + 'personality': None, + 'active_stream_id': None, + 'pending_user_message': None, + 'pending_attachments': [], + 'pending_started_at': None, + 'compression_anchor_visible_idx': None, + 'compression_anchor_message_key': None, + 'compression_anchor_summary': None, + 'context_length': None, + 'threshold_tokens': None, + 'last_prompt_tokens': None, + 'gateway_routing': None, + 'gateway_routing_history': [], + 'llm_title_generated': False, + 'parent_session_id': row.get('parent_session_id'), + 'is_cli_session': False, + 'source_tag': source or None, + **source_meta, + 'enabled_toolsets': None, + 'composer_draft': {}, + 'messages': messages, + 'tool_calls': [], + '_recovered_from_state_db': True, + } + + +def recover_missing_sidecars_from_state_db(session_dir: Path, state_db_path: Path | None) -> dict: + """Materialize missing WebUI JSON sidecars from canonical state.db rows.""" + rows = _read_state_db_missing_sidecar_rows(session_dir, state_db_path) + materialized = 0 + details: list[dict] = [] + session_dir.mkdir(parents=True, exist_ok=True) + for row in rows: + sid = str(row.get('id') or '').strip() + if not sid: + continue + target = session_dir / f"{sid}.json" + if target.exists(): + continue + payload = _state_db_row_to_sidecar(row) + # Per-process/per-thread tmp suffix to avoid corruption under + # concurrent reconciliation calls (matches api/models.py:484 + # Session.save() convention). + tmp_suffix = f".json.reconcile.tmp.{os.getpid()}.{threading.current_thread().ident}" + tmp = target.with_suffix(tmp_suffix) + try: + tmp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding='utf-8') + except OSError as exc: + try: + tmp.unlink(missing_ok=True) + except OSError: + pass + details.append({'session_id': sid, 'materialized': False, 'error': str(exc)}) + continue + # Atomic create-or-fail: os.link() refuses to overwrite an existing + # target. Closes the TOCTOU window between the target.exists() check + # above and the rename — a concurrent Session.save() for the same SID + # will win and we silently skip rather than overwrite a live sidecar. + materialized_now = False + try: + os.link(str(tmp), str(target)) + materialized_now = True + except FileExistsError: + # Live sidecar appeared between the check and the link — keep it. + pass + except OSError as exc: + details.append({'session_id': sid, 'materialized': False, 'error': str(exc)}) + finally: + try: + tmp.unlink(missing_ok=True) + except OSError: + pass + if materialized_now: + materialized += 1 + details.append({'session_id': sid, 'materialized': True, 'messages': len(payload.get('messages') or [])}) + elif not any(d.get('session_id') == sid for d in details[-1:]): + details.append({'session_id': sid, 'materialized': False, 'skipped': 'sidecar_appeared_during_reconcile'}) + return {'scanned': len(rows), 'materialized': materialized, 'details': details} + + +def _new_audit_item( + session_id: str, + kind: str, + category: str, + recommendation: str, + live_messages: int = -1, + bak_messages: int = -1, +) -> dict: + return { + "session_id": session_id, + "kind": kind, + "category": category, + "recommendation": recommendation, + "live_messages": live_messages, + "bak_messages": bak_messages, + } + + +def _read_index_session_ids(index_path: Path) -> set[str]: + try: + data = json.loads(index_path.read_text(encoding='utf-8')) + except (OSError, json.JSONDecodeError, ValueError): + return set() + if not isinstance(data, list): + return set() + ids: set[str] = set() + for entry in data: + if isinstance(entry, dict) and isinstance(entry.get('session_id'), str): + ids.add(entry['session_id']) + return ids + + +def audit_session_recovery(session_dir: Path, state_db_path: Path | None = None) -> dict: + """Read-only audit of session recovery state. + + The audit intentionally does not mutate files. It classifies only the safe + recovery primitives this module knows how to perform: backup restores and + derived index rebuilds. Call ``recover_all_sessions_on_startup`` separately + for safe repairs. + """ + if not session_dir.exists(): + return { + "status": "ok", + "summary": {"ok": 0, "repairable": 0, "unsafe_to_repair": 0}, + "items": [], + } + + items: list[dict] = [] + live_paths = sorted(p for p in session_dir.glob('*.json') if not p.name.startswith('_')) + live_ids = {p.stem for p in live_paths} + + for live_path in live_paths: + status = inspect_session_recovery_status(live_path) + if status.get('recommend') == 'restore': + items.append(_new_audit_item( + status['session_id'], + "shrunken_live", + "repairable", + "restore_from_bak", + status.get('live_messages', -1), + status.get('bak_messages', -1), + )) + + for bak_path in sorted(session_dir.glob('*.json.bak')): + live_path = bak_path.with_suffix('') + if live_path.exists() or live_path.name.startswith('_'): + continue + bak_messages = _msg_count(bak_path) + session_id = live_path.stem + if bak_messages < 0: + items.append(_new_audit_item( + session_id, "malformed_orphan_backup", "unsafe_to_repair", "manual_review", -1, bak_messages + )) + elif _state_db_has_session(session_id, state_db_path): + items.append(_new_audit_item( + session_id, "orphan_backup", "repairable", "restore_from_bak", -1, bak_messages + )) + else: + items.append(_new_audit_item( + session_id, + "orphan_backup_without_state_row", + "unsafe_to_repair", + "manual_review", + -1, + bak_messages, + )) + + index_path = session_dir / '_index.json' + if index_path.exists(): + index_ids = _read_index_session_ids(index_path) + for session_id in sorted(index_ids - live_ids): + items.append(_new_audit_item( + session_id, "index_missing_file", "repairable", "rebuild_index" + )) + for session_id in sorted(live_ids - index_ids): + items.append(_new_audit_item( + session_id, "index_missing_entry", "repairable", "rebuild_index", + _msg_count(session_dir / f"{session_id}.json"), -1, + )) + + for row in _read_state_db_missing_sidecar_rows(session_dir, state_db_path): + sid = str(row.get('id') or '') + items.append(_new_audit_item( + sid, + "state_db_missing_sidecar", + "repairable", + "materialize_from_state_db", + -1, + -1, + )) + + summary = {"ok": len(live_paths), "repairable": 0, "unsafe_to_repair": 0} + for item in items: + category = item.get('category') + if category in summary: + summary[category] += 1 + if summary["unsafe_to_repair"]: + overall = "needs_manual_review" + elif summary["repairable"]: + overall = "warn" + else: + overall = "ok" + return {"status": overall, "summary": summary, "items": items} + + +def repair_safe_session_recovery(session_dir: Path, state_db_path: Path | None = None) -> dict: + """Run safe, deterministic session recovery repairs. + + This mutates only repairable classes already handled by startup recovery: + shrunken live sidecars and orphan backups that are not tombstoned by a + readable state.db. Unsafe audit findings remain for manual review. + """ + before = audit_session_recovery(session_dir, state_db_path=state_db_path) + backup_repair = recover_all_sessions_on_startup( + session_dir, + rebuild_index=True, + state_db_path=state_db_path, + ) + sidecar_repair = recover_missing_sidecars_from_state_db(session_dir, state_db_path) + if sidecar_repair.get('materialized'): + try: + from api.models import _write_session_index + _write_session_index(updates=None) + except Exception as exc: + logger.warning("repair_safe_session_recovery: index rebuild after state.db reconciliation failed: %s", exc) + after = audit_session_recovery(session_dir, state_db_path=state_db_path) + unsafe_remaining = int((after.get("summary") or {}).get("unsafe_to_repair") or 0) + repairable_remaining = int((after.get("summary") or {}).get("repairable") or 0) + return { + "ok": unsafe_remaining == 0 and repairable_remaining == 0, + "repaired": int(backup_repair.get("restored") or 0) + int(sidecar_repair.get("materialized") or 0), + "before": before, + "backup_repair": backup_repair, + "sidecar_repair": sidecar_repair, + "after": after, + } + + +def recover_all_sessions_on_startup( + session_dir: Path, + rebuild_index: bool = False, + state_db_path: Path | None = None, +) -> dict: + """Scan session_dir for shrunken/orphaned sessions and restore from .bak. + + Returns {"scanned": N, "restored": M, "orphaned_backups": K, "details": [...]}. + """ + if not session_dir.exists(): + return {"scanned": 0, "restored": 0, "orphaned_backups": 0, "details": []} + scanned = 0 + restored = 0 + details: list[dict] = [] + live_paths = [path for path in sorted(session_dir.glob('*.json')) if not path.name.startswith('_')] + orphan_paths = _orphaned_backup_live_paths(session_dir, state_db_path=state_db_path) + for path in [*live_paths, *orphan_paths]: + # Skip non-session JSON files in the same dir: + # - ``_index.json`` is a top-level list of session metadata + # - any future non-session JSON marked with the ``_`` convention is + # skipped automatically (project convention for system files in + # directories that otherwise hold user data) + scanned += 1 + try: + result = recover_session(path) + except Exception as exc: + # Defensive: a malformed session file shouldn't break recovery + # for the rest. Log and continue. + logger.warning( + "recover_all_sessions_on_startup: skipped %s due to %s: %s", + path.name, type(exc).__name__, exc, + ) + continue + if result.get("restored"): + restored += 1 + details.append(result) + if restored: + logger.warning( + "recover_all_sessions_on_startup: restored %d/%d sessions from .bak. " + "If you weren't expecting this, check the session list for missing " + "messages — see #1558.", restored, scanned, + ) + if rebuild_index: + try: + from api.models import _write_session_index + _write_session_index(updates=None) + except Exception as exc: + logger.warning("recover_all_sessions_on_startup: index rebuild failed: %s", exc) + return { + "scanned": scanned, + "restored": restored, + "orphaned_backups": len(orphan_paths), + "details": details, + } + + +def _main() -> int: + parser = argparse.ArgumentParser(description="Audit Hermes WebUI session recovery state") + parser.add_argument("--audit", action="store_true", help="run a read-only recovery audit") + parser.add_argument("--session-dir", type=Path, required=True, help="path to WebUI sessions directory") + parser.add_argument("--state-db", type=Path, default=None, help="optional Hermes state.db path") + parser.add_argument("--repair-safe", action="store_true", help="run safe deterministic repairs after auditing") + args = parser.parse_args() + if args.repair_safe: + report = repair_safe_session_recovery(args.session_dir, state_db_path=args.state_db) + elif args.audit: + report = audit_session_recovery(args.session_dir, state_db_path=args.state_db) + else: + parser.error("choose --audit or --repair-safe") + print(json.dumps(report, sort_keys=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(_main()) diff --git a/api/streaming.py b/api/streaming.py index 25b29db4..565a454e 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -20,15 +20,19 @@ from typing import Optional logger = logging.getLogger(__name__) from api.config import ( + get_config, STREAMS, STREAMS_LOCK, CANCEL_FLAGS, AGENT_INSTANCES, STREAM_PARTIAL_TEXT, STREAM_REASONING_TEXT, STREAM_LIVE_TOOL_CALLS, + STREAM_GOAL_RELATED, PENDING_GOAL_CONTINUATION, LOCK, SESSIONS, SESSION_DIR, _get_session_agent_lock, _set_thread_env, _clear_thread_env, + register_active_run, update_active_run, unregister_active_run, SESSION_AGENT_LOCKS, SESSION_AGENT_LOCKS_LOCK, resolve_model_provider, + resolve_custom_provider_connection, model_with_provider_context, ) -from api.helpers import redact_session_data +from api.helpers import redact_session_data, _redact_text from api.metering import meter # Global lock for os.environ writes. Per-session locks (_agent_lock) prevent @@ -37,6 +41,30 @@ from api.metering import meter # save/restore around the entire agent run. _ENV_LOCK = threading.Lock() + +def _prewarm_skill_tool_modules(): + """Import tools.skills_tool and tools.skill_manager_tool outside any lock. + + First-time module imports can trigger heavy initialisation (disk I/O, + transitive imports, plugin discovery). Performing those imports while + holding ``_ENV_LOCK`` serialises every concurrent session behind the + slowest import. Prewarming ensures the modules are already in + ``sys.modules`` before the lock is acquired, so the lock body only + does lightweight attribute patching. + + We cannot place these at module top-level because ``tools.*`` lives + in the hermes-agent package which may not be on ``sys.path`` at + import time (Docker volume-mount ordering). A dedicated helper + keeps the lazy-import try/except in one place and makes the intent + explicit. + """ + for _mod_name in ('tools.skills_tool', 'tools.skill_manager_tool'): + try: + __import__(_mod_name) + except ImportError: + pass + + # Lazy import to avoid circular deps -- hermes-agent is on sys.path via api/config.py try: from run_agent import AIAgent @@ -59,6 +87,177 @@ def _get_ai_agent(): except ImportError: pass return AIAgent + + +def _is_quota_error_text(err_text: str) -> bool: + """Return True when provider text looks like quota/usage exhaustion.""" + _err_lower = str(err_text or '').lower() + return ( + 'insufficient credit' in _err_lower + or 'credit balance' in _err_lower + or 'credits exhausted' in _err_lower + or 'more credits' in _err_lower + or 'can only afford' in _err_lower + or 'fewer max_tokens' in _err_lower + or 'quota_exceeded' in _err_lower + or 'quota exceeded' in _err_lower + or 'exceeded your current quota' in _err_lower + # OpenAI Codex OAuth usage-exhaustion shapes (#1765). + or 'plan limit reached' in _err_lower + or 'usage_limit_exceeded' in _err_lower + or 'usage limit exceeded' in _err_lower + or 'reached the limit of messages' in _err_lower + or 'used up your usage' in _err_lower + or ('plan' in _err_lower and 'limit' in _err_lower and 'reached' in _err_lower) + ) + + +def _clarify_timeout_seconds(default: int = 120) -> int: + """Resolve clarify timeout from config, with bounded fallback.""" + try: + cfg = get_config() + raw = cfg.get("clarify", {}).get("timeout", default) + timeout_seconds = int(raw) + if timeout_seconds <= 0: + return default + return timeout_seconds + except Exception: + return default + + +def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = False) -> dict: + """Classify provider/agent failure text for WebUI apperror UX. + + Keep this string-based until hermes-agent exposes stable structured + provider error classes for Codex OAuth plan limits. + """ + err_str = str(err_str or '') + _err_lower = err_str.lower() + _exc_name = type(exc).__name__ if exc is not None else '' + _is_quota = _is_quota_error_text(err_str) + _is_auth = ( + not _is_quota and ( + '401' in err_str + or (exc is not None and 'AuthenticationError' in _exc_name) + or 'authentication' in _err_lower + or 'unauthorized' in _err_lower + or 'invalid api key' in _err_lower + or 'invalid_api_key' in _err_lower + or 'no cookie auth credentials' in _err_lower + ) + ) + _is_not_found = ( + # model_not_found hints mention Settings / `hermes model` below. + '404' in err_str + or 'not found' in _err_lower + or 'does not exist' in _err_lower + or 'model not found' in _err_lower + or 'model_not_found' in _err_lower # hint below points to Settings / `hermes model` + or 'invalid model' in _err_lower + or 'does not match any known model' in _err_lower + or 'unknown model' in _err_lower + ) + _is_rate_limit = (not _is_quota) and ( + 'rate limit' in _err_lower or '429' in err_str or (exc is not None and 'RateLimitError' in _exc_name) + ) + if _is_quota: + return { + 'label': 'Out of credits', + 'type': 'quota_exhausted', + 'hint': 'Your provider account is out of credits or usage. Top up, wait for the plan window to reset, or switch providers via `hermes model`.', + } + if _is_rate_limit: + return { + 'label': 'Rate limit reached', + 'type': 'rate_limit', + 'hint': 'Rate limit reached. The fallback model (if configured) was also exhausted. Try again in a moment.', + } + if _is_auth: + return { + 'label': 'Authentication failed', + 'type': 'auth_mismatch', + 'hint': 'The selected model may not be supported by your configured provider or your API key is invalid. Run `hermes model` in your terminal to update credentials, then restart the WebUI.', + } + if _is_not_found: + return { + 'label': 'Model not found', + 'type': 'model_not_found', + 'hint': 'The selected model was not found by the provider. Check the model ID in Settings or run `hermes model` to verify it exists for your provider.', + } + if silent_failure: + return { + 'label': 'No response from provider', + # Preserve the existing no_response event type (#373) while making + # the catch-all silent-failure message more specific for #1765. + 'type': 'no_response', + 'hint': 'The provider returned no content and no error. This often means a usage/rate limit was hit silently. Check provider status, switch providers via `hermes model`, or try again in a moment.', + } + return {'label': 'Error', 'type': 'error', 'hint': ''} + + +def _provider_error_payload(message: str, err_type: str, hint: str = '') -> dict: + """Build a bounded, redacted apperror payload with provider details.""" + _message = str(message or '') + _safe_message = _redact_text(_message).strip() if _message else '' + payload: dict = {'message': _safe_message or _message, 'type': err_type} + if hint: + payload['hint'] = hint + if _safe_message: + _details = _safe_message + if len(_details) > 1200: + _details = _details[:1197].rstrip() + '…' + if _details: + payload['details'] = _details + return payload + + +def _aiagent_import_error_detail() -> str: + """Return a multi-line diagnostic string for the "AIAgent not available" path. + + The bare ImportError ("AIAgent not available -- check that hermes-agent is + on sys.path") leaves users guessing at which python is running, where it's + looking, and what to fix. We assemble the same evidence a maintainer would + ask for first (issue #1695): the python that's running, the agent_dir env + var if set, the sys.path entries that mention 'hermes', and the most-common + fix (`pip install -e .` in the agent dir). + + Kept as a separate helper so it stays out of the hot path until we actually + need to raise — building it on every successful import would be wasted work. + """ + import os as _os + import sys as _sys + + lines = ["AIAgent not available -- check that hermes-agent is on sys.path"] + lines.append("") + lines.append(f" python: {_sys.executable}") + agent_dir = _os.environ.get("HERMES_WEBUI_AGENT_DIR") + if agent_dir: + lines.append(f" HERMES_WEBUI_AGENT_DIR: {agent_dir}") + else: + lines.append(" HERMES_WEBUI_AGENT_DIR: (not set)") + + # Show only the sys.path entries that look relevant — full sys.path is noisy. + relevant = [p for p in _sys.path if "hermes" in p.lower() or "agent" in p.lower()] + if relevant: + lines.append(" sys.path entries mentioning hermes/agent:") + for entry in relevant[:6]: + lines.append(f" - {entry}") + if len(relevant) > 6: + lines.append(f" ... and {len(relevant) - 6} more") + else: + lines.append(" sys.path: (no entries mention hermes or agent)") + + lines.append("") + lines.append(" Most common fix: install the agent in editable mode so its modules") + lines.append(" appear on sys.path:") + lines.append("") + lines.append(" cd /path/to/hermes-agent") + lines.append(" pip install -e .") + lines.append("") + lines.append(" Then restart the WebUI.") + lines.append("") + lines.append(' Full troubleshooting: docs/troubleshooting.md ("AIAgent not available")') + return "\n".join(lines) from api.models import get_session, title_from from api.workspace import set_last_workspace @@ -69,6 +268,152 @@ _API_SAFE_MSG_KEYS = {'role', 'content', 'tool_calls', 'tool_call_id', 'name', ' _NATIVE_IMAGE_MAX_BYTES = 20 * 1024 * 1024 +_GATEWAY_ROUTING_TOP_LEVEL_KEYS = { + 'used_provider', + 'used_model', + 'requested_provider', + 'requested_model', +} +_GATEWAY_ROUTING_CONTAINER_KEYS = ( + 'llm_gateway', + 'gateway', + 'metadata', + 'response_metadata', + 'routing_metadata', + 'usage', +) +_GATEWAY_ROUTING_ATTEMPT_KEYS = { + 'provider', 'model', 'status', 'reason', 'selection_reason', 'score', + 'latency_ms', 'error', 'timestamp', 'selected', 'attempt', 'attempt_index', +} + + +def _clean_gateway_routing_scalar(value): + if value is None: + return None + if isinstance(value, (str, int, float, bool)): + text = str(value).strip() + if not text: + return None + return value if isinstance(value, (int, float, bool)) else text[:240] + return None + + +def _find_gateway_metadata_payload(payload): + if not isinstance(payload, dict): + return None + if any(k in payload for k in _GATEWAY_ROUTING_TOP_LEVEL_KEYS) or isinstance(payload.get('routing'), list): + return payload + for key in _GATEWAY_ROUTING_CONTAINER_KEYS: + nested = payload.get(key) + found = _find_gateway_metadata_payload(nested) + if found: + return found + return None + + +def _normalize_gateway_routing_metadata(payload, requested_model=None, requested_provider=None): + """Return safe LLM Gateway routing metadata, or None when absent. + + LLM Gateway response metadata can contain provider/model routing details, + but WebUI must only persist display-safe scalars and a bounded routing list. + Secrets or provider-specific request objects are deliberately ignored. + """ + src = _find_gateway_metadata_payload(payload) + if not src: + return None + + normalized = {} + for key in _GATEWAY_ROUTING_TOP_LEVEL_KEYS: + value = _clean_gateway_routing_scalar(src.get(key)) + if value is not None: + normalized[key] = value + + if 'requested_model' not in normalized: + fallback_model = _clean_gateway_routing_scalar(requested_model) + if fallback_model is not None: + normalized['requested_model'] = fallback_model + if 'requested_provider' not in normalized: + fallback_provider = _clean_gateway_routing_scalar(requested_provider) + if fallback_provider is not None: + normalized['requested_provider'] = fallback_provider + + routing = [] + raw_routing = src.get('routing') + if isinstance(raw_routing, list): + for attempt in raw_routing[:12]: + if not isinstance(attempt, dict): + continue + clean_attempt = {} + for key in _GATEWAY_ROUTING_ATTEMPT_KEYS: + value = _clean_gateway_routing_scalar(attempt.get(key)) + if value is not None: + clean_attempt[key] = value + if clean_attempt: + routing.append(clean_attempt) + if routing: + normalized['routing'] = routing + + used_provider = str(normalized.get('used_provider') or '').strip().lower() + requested_provider_norm = str(normalized.get('requested_provider') or '').strip().lower() + used_model = str(normalized.get('used_model') or '').strip().lower() + requested_model_norm = str(normalized.get('requested_model') or '').strip().lower() + provider_changed = bool(used_provider and requested_provider_norm and used_provider != requested_provider_norm) + model_changed = bool(used_model and requested_model_norm and used_model != requested_model_norm) + attempted_providers = [ + str(a.get('provider') or '').strip().lower() + for a in routing + if a.get('provider') + ] + distinct_attempted_providers = {p for p in attempted_providers if p} + failed_before_selection = any( + str(a.get('status') or '').strip().lower() in {'failed', 'error', 'timeout', 'rejected'} + for a in routing + ) + has_failover = bool(provider_changed or len(distinct_attempted_providers) > 1 or failed_before_selection) + + if not ( + normalized.get('used_provider') or normalized.get('used_model') or routing or provider_changed or model_changed + ): + return None + normalized['provider_changed'] = provider_changed + normalized['model_changed'] = model_changed + normalized['has_failover'] = has_failover + return normalized + + +def _extract_gateway_routing_metadata(agent, result, requested_model=None, requested_provider=None): + candidates = [] + if isinstance(result, dict): + candidates.extend([ + result.get('llm_gateway'), + result.get('gateway'), + result.get('metadata'), + result.get('response_metadata'), + result.get('routing_metadata'), + result.get('usage'), + result, + ]) + for attr in ( + 'llm_gateway_metadata', + 'gateway_metadata', + 'last_response_metadata', + 'response_metadata', + 'routing_metadata', + 'last_usage', + ): + if agent is not None: + candidates.append(getattr(agent, attr, None)) + for candidate in candidates: + normalized = _normalize_gateway_routing_metadata( + candidate, + requested_model=requested_model, + requested_provider=requested_provider, + ) + if normalized: + return normalized + return None + def _build_agent_thread_env(profile_runtime_env: dict | None, workspace: str, session_id: str, profile_home: str) -> dict: """Build thread-local agent env with per-run values overriding profile defaults. @@ -126,17 +471,60 @@ def _is_valid_image(path: Path, mime: str) -> bool: return False -def _build_native_multimodal_message(workspace_ctx: str, msg_text: str, attachments, workspace: str): +def _resolve_image_input_mode(cfg: dict) -> str: + """Return ``"native"`` or ``"text"`` based on config, mirroring + ``agent/image_routing.py:decide_image_input_mode``. + + The agent has this logic, but the WebUI's ``_build_native_multimodal_message`` + was unconditionally embedding images as native ``image_url`` parts, completely + bypassing ``image_input_mode``. This caused silent failures when the main model + does not support images and the fallback model is also text-only (#21160-related). + """ + agent_cfg = cfg.get("agent") or {} + mode = str(agent_cfg.get("image_input_mode", "auto") or "auto").strip().lower() + if mode not in ("auto", "native", "text"): + mode = "auto" + + if mode == "native": + return "native" + if mode == "text": + return "text" + + # auto: if auxiliary.vision is explicitly configured → text mode + # (user opted into a dedicated vision backend) + aux = cfg.get("auxiliary") or {} + vision = aux.get("vision") or {} + provider = str(vision.get("provider") or "").strip().lower() + model_name = str(vision.get("model") or "").strip() + base_url = str(vision.get("base_url") or "").strip() + if provider not in ("", "auto") or model_name or base_url: + return "text" + + # No explicit vision config, no model-capability lookup available in WebUI. + # Default to native — the agent's ``_strip_images_from_messages`` guard will + # strip images on rejection and retry as text. + return "native" + + +def _build_native_multimodal_message(workspace_ctx: str, msg_text: str, attachments, workspace: str, *, cfg: dict = None): """Build native multimodal content parts for current-turn image uploads. WebUI uploads files into the active workspace. For image files, pass the bytes to Hermes as OpenAI-style image_url data URLs so vision-capable main models can consume them in the same request. Non-image files intentionally stay as text path attachments so the agent can inspect them with file tools. + + When *cfg* is provided, respects ``agent.image_input_mode`` — if the resolved + mode is ``"text"``, returns a plain string (attachments are not embedded) so + the agent's text-mode pipeline (``vision_analyze``) handles images. """ if not attachments: return workspace_ctx + msg_text + # ── Check image_input_mode before embedding anything ── + if cfg is not None and _resolve_image_input_mode(cfg) == "text": + return workspace_ctx + msg_text + parts = [{'type': 'text', 'text': workspace_ctx + msg_text}] workspace_root = Path(workspace).expanduser().resolve() image_count = 0 @@ -282,6 +670,27 @@ def _message_text(value) -> str: return _strip_thinking_markup(str(value or '').strip()) +_WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace::v1:\s*(?:\\.|[^\]\\])+\]\s*') +_LEGACY_WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace:[^\]]+\]\s*') + + +def _escape_workspace_prefix_path(path: str) -> str: + return str(path or '').replace('\\', '\\\\').replace(']', '\\]') + + +def _workspace_context_prefix(path: str) -> str: + return f"[Workspace::v1: {_escape_workspace_prefix_path(path)}]\n" + + +def _strip_workspace_prefix(text: str, *, include_legacy: bool = False) -> str: + """Remove WebUI-injected workspace tags without eating user-typed text.""" + value = str(text or '') + stripped = _WORKSPACE_PREFIX_RE.sub('', value, count=1) + if include_legacy and stripped == value: + stripped = _LEGACY_WORKSPACE_PREFIX_RE.sub('', value, count=1) + return stripped.strip() + + def _first_exchange_snippets(messages): """Return (first_user_text, first_assistant_text) snippets for title generation. @@ -742,7 +1151,7 @@ def _fallback_title_from_exchange(user_text: str, assistant_text: str) -> Option assistant_text = _strip_thinking_markup(assistant_text or '').strip() if not user_text: return None - user_text = re.sub(r'^\[Workspace:[^\]]+\]\s*', '', user_text) + user_text = _strip_workspace_prefix(user_text) user_text = re.sub(r'\s+', ' ', user_text).strip() assistant_text = re.sub(r'\s+', ' ', assistant_text).strip() combined = f"{user_text} {assistant_text}".strip().lower() @@ -792,7 +1201,12 @@ def _fallback_title_from_exchange(user_text: str, assistant_text: str) -> Option 'need', 'needs', 'want', 'wants', 'user', 'assistant', 'could', 'would', 'should', 'about', 'there', 'here', 'test', 'testing', 'title', 'summary', } - tokens = re.findall(r'[A-Za-z0-9][A-Za-z0-9_./+-]*', head) + # Unicode-aware Latin tokenization: keep the old "no leading underscore" + # and non-Latin placeholder behavior while allowing letters such as ä/ö/ü/ß. + # The previous ASCII-only pattern turned "führe" into "f" + "hre"; the short + # "f" was filtered and the broken "hre" became part of the title. + latin_word = r'A-Za-z0-9À-ÖØ-öø-ÿ' + tokens = re.findall(rf'[{latin_word}][{latin_word}_./+-]*', head) if not tokens: return 'Conversation topic' @@ -946,8 +1360,12 @@ def _run_background_title_refresh(session_id: str, user_text: str, assistant_tex return s.title = next_title s.llm_title_generated = True - s.save(touch_updated_at=False) effective_title = s.title + # Session.save() calls _write_session_index(), which acquires LOCK. + # Keep the per-session agent lock for mutation serialization, but + # release the global session LOCK before persisting to avoid a + # self-deadlock in the background title-refresh thread. + s.save(touch_updated_at=False) _put_title_status(put_event, session_id, 'refreshed', llm_status, effective_title, raw_preview) put_event('title', {'session_id': session_id, 'title': effective_title}) logger.info("Adaptive title refresh: session=%s new_title=%r", session_id, effective_title) @@ -1120,6 +1538,12 @@ def _message_identity(msg): role = str(msg.get('role') or '') content = msg.get('content', '') text = _message_text(content) + if role == 'user': + # WebUI sends the model a workspace-prefixed user_message while the + # visible optimistic bubble contains only the human text. Treat them as + # the same turn for merge/dedup purposes; otherwise compaction results + # render two adjacent user bubbles ("Ok" and "[Workspace...]\nOk"). + text = _strip_workspace_prefix(text, include_legacy=True) if not text and not msg.get('tool_call_id') and not msg.get('tool_calls'): return None return ( @@ -1151,6 +1575,87 @@ def _is_context_compression_marker(msg): ) +def _compact_summary_text(raw_text: str | None, limit: int = 320) -> str | None: + """Normalize a text blob used in compression summary cards.""" + if not isinstance(raw_text, str): + return None + txt = raw_text.strip() + if not txt: + return None + txt = re.sub(r"\s+", " ", txt).strip() + if len(txt) > limit: + txt = f"{txt[: limit - 6]}…" + return txt + + +def _compression_anchor_message_key(message): + if not isinstance(message, dict): + return None + role = str(message.get('role') or '') + if not role or role == 'tool': + return None + content = message.get('content', '') + text = _message_text(content) + if len(text) > 160: + text = text[:160] + ts = message.get('_ts') or message.get('timestamp') + attachments = message.get('attachments') + attach_count = len(attachments) if isinstance(attachments, list) else 0 + if not text and not attach_count and not ts: + return None + return {'role': role, 'ts': ts, 'text': text, 'attachments': attach_count} + + +def _visible_messages_for_compression_anchor(messages): + out = [] + for m in messages or []: + if not isinstance(m, dict): + continue + role = m.get('role') + if not role or role == 'tool': + continue + content = m.get('content', '') + has_attachments = bool(m.get('attachments')) + has_tool_calls = bool(isinstance(m.get('tool_calls'), list) and m.get('tool_calls')) + has_tool_use = False + has_reasoning = bool(m.get('reasoning')) + if isinstance(content, list): + text = '\n'.join( + str(p.get('text') or p.get('content') or '') + for p in content + if isinstance(p, dict) + and p.get('type') in {'text', 'input_text', 'output_text'} + ).strip() + for part in content: + if not isinstance(part, dict): + continue + if part.get('type') == 'tool_use': + has_tool_use = True + if not text: + has_reasoning = has_reasoning or any( + isinstance(part, dict) + and part.get('type') in {'thinking', 'reasoning'} + for part in content + ) + else: + text = str(content or '').strip() + if text or has_attachments or has_tool_calls or has_tool_use or has_reasoning: + out.append(m) + return out + + +def _compression_summary_from_messages(messages): + for m in reversed(messages or []): + if not isinstance(m, dict): + continue + if not _is_context_compression_marker(m): + continue + text = _message_text(m.get('content')) + if text: + return text + return None + + def _find_current_user_turn(messages, msg_text): needle = " ".join(str(msg_text or '').split()) fallback = None @@ -1158,12 +1663,28 @@ def _find_current_user_turn(messages, msg_text): if not isinstance(msg, dict) or msg.get('role') != 'user': continue fallback = idx - text = " ".join(_message_text(msg.get('content', '')).split()) + text = " ".join( + _strip_workspace_prefix( + _message_text(msg.get('content', '')), + include_legacy=True, + ).split() + ) if needle and (needle in text or text in needle): return idx return fallback +def _drop_checkpointed_current_user_from_context(messages, msg_text): + """Return model history without an eager-checkpointed current user turn.""" + history = list(messages or []) + if not history: + return history + current_user_key = _message_identity({'role': 'user', 'content': msg_text}) + if current_user_key and _message_identity(history[-1]) == current_user_key: + return history[:-1] + return history + + def _merge_display_messages_after_agent_result(previous_display, previous_context, result_messages, msg_text): """Keep UI transcript durable while allowing model context to compact. @@ -1191,26 +1712,87 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex merged = previous_display[:] seen = {_message_identity(m) for m in merged} + current_user_key = _message_identity({'role': 'user', 'content': msg_text}) + current_user_in_candidates = any( + _message_identity(m) == current_user_key for m in candidates + ) + current_user_already_checkpointed = bool( + merged and _message_identity(merged[-1]) == current_user_key + ) + if ( + current_user_key is not None + and not current_user_in_candidates + and not current_user_already_checkpointed + and any( + isinstance(m, dict) and m.get('role') in ('assistant', 'tool') + for m in candidates + ) + ): + # Some provider retry/fallback paths can return an assistant/tool delta + # without echoing the current user turn. In deferred session-save mode + # the prompt exists only in pending_user_message, so appending that delta + # directly would make the assistant bubble appear attached to the prior + # exchange and then clear the pending prompt. Materialize the current + # turn at the transcript boundary before the assistant/tool response. + current_user_msg = {'role': 'user', 'content': msg_text} + insert_at = 0 + while insert_at < len(candidates) and _is_context_compression_marker(candidates[insert_at]): + insert_at += 1 + candidates = candidates[:insert_at] + [current_user_msg] + candidates[insert_at:] + for msg in candidates: key = _message_identity(msg) + if ( + key is not None + and key == current_user_key + and merged + and _message_identity(merged[-1]) == key + ): + # Eager session-save mode can checkpoint the current user turn + # before the agent runs. When the agent returns that same user turn + # in result_messages, keep the durable checkpoint and append only + # the assistant/tool delta. + continue + if ( + key is not None + and isinstance(msg, dict) + and msg.get('role') == 'assistant' + and merged + and _message_identity(merged[-1]) == key + ): + # Some provider/result replay paths can include the same assistant + # message twice in the current delta. Treat only adjacent identity + # matches as replay duplicates so identical answers in separate + # user turns remain visible. + continue if _is_context_compression_marker(msg) and key is not None and key in seen: continue - merged.append(copy.deepcopy(msg)) + display_msg = msg + if key is not None and key == current_user_key and isinstance(msg, dict) and msg.get('role') == 'user': + display_msg = copy.deepcopy(msg) + display_msg['content'] = msg_text + merged.append(copy.deepcopy(display_msg)) if key is not None: seen.add(key) return merged -def _tool_result_snippet(raw) -> str: - """Extract a compact result preview from a stored tool message payload.""" +_TOOL_RESULT_SNIPPET_MAX = 4000 + + +def _tool_result_snippet(raw, limit: int = _TOOL_RESULT_SNIPPET_MAX) -> str: + """Extract a bounded result preview from a stored tool message payload.""" + if limit <= 0: + return '' text = str(raw or '') try: - data = json.loads(text) + data = raw if isinstance(raw, dict) else json.loads(text) if isinstance(data, dict): - return str(data.get('output') or data.get('result') or data.get('error') or text)[:200] + preview = data.get('output') or data.get('result') or data.get('error') or text + text = str(preview) except Exception: pass - return text[:200] + return text[:limit] def _truncate_tool_args(args, limit: int = 6) -> dict: @@ -1312,6 +1894,43 @@ def _sse(handler, event, data): handler.wfile.flush() +def _materialize_pending_user_turn_before_error(session) -> bool: + """Persist the pending user prompt before clearing runtime stream state. + + Error paths often clear ``pending_user_message`` before appending an assistant + error marker. In deferred session-save mode that pending field can be the + only durable copy of the user's current turn, so clearing it makes the user + bubble disappear on reload/reconcile. Return True when a recovered user turn + was appended. + """ + pending_text = str(getattr(session, 'pending_user_message', None) or '') + if not pending_text: + return False + normalized_pending = " ".join(pending_text.split()) + if normalized_pending: + for existing in reversed(list(getattr(session, 'messages', None) or [])[-8:]): + if not isinstance(existing, dict) or existing.get('role') != 'user': + continue + existing_text = " ".join(str(existing.get('content') or '').split()) + if existing_text == normalized_pending: + return False + recovered_ts = int(time.time()) + pending_started_at = getattr(session, 'pending_started_at', None) + if isinstance(pending_started_at, (int, float)) and pending_started_at > 0: + recovered_ts = int(pending_started_at) + recovered = { + 'role': 'user', + 'content': pending_text, + 'timestamp': recovered_ts, + '_recovered': True, + } + pending_attachments = getattr(session, 'pending_attachments', None) + if pending_attachments: + recovered['attachments'] = list(pending_attachments) + session.messages.append(recovered) + return True + + def _last_resort_sync_from_core(session, stream_id, agent_lock): """Final-exit guard: if the stream exits with pending_user_message still set, sync messages from the core transcript or add an error marker. @@ -1343,6 +1962,67 @@ def _last_resort_sync_from_core(session, stream_id, agent_lock): ) +def _attempt_credential_self_heal( + provider_id, session_id, _agent_lock_ref, +): + """Try to silently refresh credentials after a 401/auth error (#1401). + + Returns a new ``(agent, rt_dict)`` tuple on success so the caller can + retry the conversation. Returns ``None`` when self-heal is not + applicable (e.g. auth.json unchanged, provider unresolvable). + + Steps: + 1. Re-read ``~/.hermes/auth.json`` to pick up fresh credentials that + may have been written by a concurrent ``hermes model`` CLI invocation. + 2. Evict the session's cached agent so it is rebuilt with fresh keys. + 3. Evict the provider's credential-pool cache entry. + 4. Re-resolve the runtime provider. + 5. Return a new agent + resolved-provider dict (the caller must + re-invoke ``run_conversation`` with these). + """ + try: + from api.oauth import ( + read_auth_json, + resolve_runtime_provider_with_anthropic_env_lock, + ) + from api.config import ( + SESSION_AGENT_CACHE, SESSION_AGENT_CACHE_LOCK, + invalidate_credential_pool_cache, + ) + from hermes_cli.runtime_provider import resolve_runtime_provider + + # 1. Re-read auth.json (triggers a fresh credential scan) + _fresh_auth = read_auth_json() + if not _fresh_auth: + logger.debug('[webui] self-heal: auth.json empty or missing, skipping') + return None + + # 2. Evict the cached agent for this session + with SESSION_AGENT_CACHE_LOCK: + SESSION_AGENT_CACHE.pop(session_id, None) + + # 3. Invalidate the credential pool for this provider + invalidate_credential_pool_cache(provider_id) + + # 4. Re-resolve runtime provider with fresh credentials + _new_rt = resolve_runtime_provider_with_anthropic_env_lock( + resolve_runtime_provider, + requested=provider_id, + ) + + logger.info( + '[webui] self-heal: credential refresh succeeded for provider=%s session=%s', + provider_id, session_id, + ) + return _new_rt + except Exception as _heal_err: + logger.warning( + '[webui] self-heal: failed for provider=%s session=%s: %s', + provider_id, session_id, _heal_err, + ) + return None + + def _run_agent_streaming( session_id, msg_text, @@ -1353,6 +2033,7 @@ def _run_agent_streaming( *, ephemeral=False, model_provider=None, + goal_related=False, ): """Run agent in background thread, writing SSE events to STREAMS[stream_id]. @@ -1362,6 +2043,16 @@ def _run_agent_streaming( q = STREAMS.get(stream_id) if q is None: return + register_active_run( + stream_id, + session_id=session_id, + started_at=time.time(), + phase="starting", + workspace=str(workspace), + model=model, + provider=model_provider, + ephemeral=bool(ephemeral), + ) s = None _rt = {} old_cwd = None @@ -1370,15 +2061,10 @@ def _run_agent_streaming( old_hermes_home = None old_profile_env = {} - # ── MCP Server Discovery (lazy import, idempotent) ── - # discover_mcp_tools() is called here (rather than at server startup) so that - # the hermes-agent package is fully initialized before we try to connect. - # It is safe to call multiple times — already-connected servers are skipped. - try: - from tools.mcp_tool import discover_mcp_tools - discover_mcp_tools() - except Exception: - pass # MCP not available or not configured — non-fatal + # MCP discovery moved to AFTER the per-profile HERMES_HOME mutation below + # (was here at v0.51.30) — the previous placement always read the default + # profile's mcp_servers because os.environ['HERMES_HOME'] hadn't been + # rewritten yet. See https://github.com/nesquena/hermes-webui/issues/1968. # Sprint 10: create a cancel event for this stream cancel_event = threading.Event() @@ -1388,6 +2074,103 @@ def _run_agent_streaming( STREAM_REASONING_TEXT[stream_id] = '' # start accumulating reasoning trace (#1361 §A) STREAM_LIVE_TOOL_CALLS[stream_id] = [] # start accumulating tool calls (#1361 §B) + agent = None + _live_prompt_estimate_tokens = [0] + _live_prompt_exact_tokens = [0] + _live_prompt_estimate_seen_ids = set() + + def _seed_live_prompt_estimate() -> int: + """Capture the latest exact prompt size before adding live tool deltas.""" + if _live_prompt_estimate_tokens[0] > 0: + return _live_prompt_estimate_tokens[0] + _base = 0 + _agent = agent + if _agent is not None: + try: + _cc = getattr(_agent, 'context_compressor', None) + if _cc: + _base = getattr(_cc, 'last_prompt_tokens', 0) or 0 + except Exception: + _base = 0 + if not _base: + try: + _session_obj = get_session(session_id) + _base = getattr(_session_obj, 'last_prompt_tokens', 0) or 0 + except Exception: + _base = 0 + _live_prompt_estimate_tokens[0] = int(_base or 0) + _live_prompt_exact_tokens[0] = _live_prompt_estimate_tokens[0] + return _live_prompt_estimate_tokens[0] + + def _bump_live_prompt_estimate(messages) -> int: + """Increment a rough next-prompt estimate from live tool activity.""" + if not messages: + return _live_prompt_estimate_tokens[0] + try: + from agent.model_metadata import estimate_messages_tokens_rough + _delta = int(estimate_messages_tokens_rough(messages) or 0) + except Exception: + _delta = 0 + if _delta > 0: + _seed_live_prompt_estimate() + _live_prompt_estimate_tokens[0] += _delta + return _live_prompt_estimate_tokens[0] + + def _live_usage_snapshot(): + """Best-effort live usage payload for mid-stream UI updates. + + During tool execution the final `done` event has not fired yet, but the + frontend still benefits from seeing the latest known token / context + values. These are exact for the most recent model call and a truthful + lower bound for the pending next call after a tool result is appended. + """ + _usage = { + 'input_tokens': 0, + 'output_tokens': 0, + 'estimated_cost': 0, + 'context_length': 0, + 'threshold_tokens': 0, + 'last_prompt_tokens': 0, + } + try: + _session_obj = get_session(session_id) + except Exception: + _session_obj = None + + _agent = agent + if _agent is not None: + try: + _usage['input_tokens'] = getattr(_agent, 'session_prompt_tokens', 0) or 0 + _usage['output_tokens'] = getattr(_agent, 'session_completion_tokens', 0) or 0 + _usage['estimated_cost'] = getattr(_agent, 'session_estimated_cost_usd', 0) or 0 + except Exception: + pass + try: + _cc = getattr(_agent, 'context_compressor', None) + if _cc: + _usage['context_length'] = getattr(_cc, 'context_length', 0) or 0 + _usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 + _usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 + except Exception: + pass + + if _session_obj is not None: + for _field in ('input_tokens', 'output_tokens', 'estimated_cost', 'context_length', 'threshold_tokens', 'last_prompt_tokens'): + if not _usage.get(_field): + try: + _usage[_field] = getattr(_session_obj, _field, 0) or 0 + except Exception: + pass + + _real_prompt_tokens = int(_usage.get('last_prompt_tokens') or 0) + if _real_prompt_tokens and _real_prompt_tokens != _live_prompt_exact_tokens[0]: + _live_prompt_exact_tokens[0] = _real_prompt_tokens + _live_prompt_estimate_tokens[0] = _real_prompt_tokens + elif _live_prompt_estimate_tokens[0] > _real_prompt_tokens: + _usage['last_prompt_tokens'] = _live_prompt_estimate_tokens[0] + + return _usage + # Register this stream with the global streaming meter meter().begin_session(stream_id) @@ -1404,7 +2187,8 @@ def _run_agent_streaming( if _metering_stop.wait(interval): break # stream was cancelled or ended — exit stats = meter().get_stats() - stats['session_id'] = stream_id + stats['session_id'] = session_id + stats['usage'] = _live_usage_snapshot() put('metering', stats) _metering_thread = threading.Thread(target=_metering_ticker, daemon=True) @@ -1419,6 +2203,29 @@ def _run_agent_streaming( except Exception: logger.debug("Failed to put event to queue") + def _agent_status_callback(kind, message): + """Bridge Agent lifecycle compression status into WebUI SSE.""" + _message = str(message or '').strip() + _kind = str(kind or '').strip().lower() + if not _message: + return + _lower = _message.lower() + _is_compression_start = ( + _kind == 'lifecycle' + and ( + 'preflight compression' in _lower + or 'compressing' in _lower + or 'compacting context' in _lower + or 'context too large' in _lower + ) + ) + if not _is_compression_start: + return + put('compressing', { + 'session_id': session_id, + 'message': 'Auto-compressing context to continue...', + }) + # Initialised here (before any code that may raise) so the outer `finally` # block can safely check `if _checkpoint_stop is not None` even when an # exception fires before the checkpoint thread is created (Issue #765). @@ -1427,6 +2234,7 @@ def _run_agent_streaming( _agent_lock = None try: s = get_session(session_id) + update_active_run(stream_id, phase="running", session_id=session_id) s.workspace = str(Path(workspace).expanduser().resolve()) s.model = model provider_context = ( @@ -1455,7 +2263,24 @@ def _run_agent_streaming( except ImportError: _profile_home = os.environ.get('HERMES_HOME', '') _profile_runtime_env = {} - + + # Capture the resolved profile name now, while profile context is + # reliable. Used in the compression migration block to stamp s.profile + # on the continuation session. We resolve it here rather than calling + # get_active_profile_name() at compression time because that function + # reads thread-local storage (_tls.profile) set by set_request_profile() + # on the HTTP handler thread. The streaming thread is a separate + # threading.Thread and does not inherit TLS. At compression time, + # get_active_profile_name() would fall back to the process-global + # _active_profile, which may belong to a different concurrent tab. + _resolved_profile_name = getattr(s, 'profile', None) + if not _resolved_profile_name: + try: + from api.profiles import get_active_profile_name + _resolved_profile_name = get_active_profile_name() + except Exception: + _resolved_profile_name = None + _thread_env = _build_agent_thread_env( _profile_runtime_env, str(s.workspace), @@ -1463,6 +2288,10 @@ def _run_agent_streaming( _profile_home, ) _set_thread_env(**_thread_env) + # Prewarm skill-tool imports *before* acquiring the lock so that + # first-time module initialisation (which can be slow) does not + # block other concurrent sessions waiting on _ENV_LOCK (#2024). + _prewarm_skill_tool_modules() # Still set process-level env as fallback for tools that bypass thread-local # Acquire lock only for the env mutation, then release before the agent runs. # The finally block re-acquires to restore — keeping critical sections short @@ -1479,7 +2308,52 @@ def _run_agent_streaming( os.environ['HERMES_SESSION_KEY'] = session_id if _profile_home: os.environ['HERMES_HOME'] = _profile_home + # Patch module-level caches to match the active profile. + # _set_hermes_home() does this for process-wide switches + # but per-request switches skip it (#1700). + # Modules were prewarmed by _prewarm_skill_tool_modules() + # above, so we only do lightweight sys.modules lookups and + # attribute assignments here — no first-time import under + # the lock (#2024). + from pathlib import Path as _P + import sys as _sys + _ph = _P(_profile_home) + _sk = _sys.modules.get('tools.skills_tool') + if _sk is not None: + try: + _sk.HERMES_HOME = _ph + _sk.SKILLS_DIR = _ph / 'skills' + except AttributeError: + pass + _sm = _sys.modules.get('tools.skill_manager_tool') + if _sm is not None: + try: + _sm.HERMES_HOME = _ph + _sm.SKILLS_DIR = _ph / 'skills' + except AttributeError: + pass # Lock released — agent runs without holding it + # ── MCP Server Discovery (lazy import, idempotent) ── + # MUST run AFTER the HERMES_HOME mutation above — `discover_mcp_tools()` + # reads `~/.hermes/config.yaml` via `get_hermes_home()`, which uses + # `os.environ['HERMES_HOME']`. Calling it before the mutation always + # loaded the default profile's `mcp_servers`, even when the session + # was stamped with a non-default profile. See issue #1968. + # + # NOTE: `_servers` in `tools/mcp_tool.py` is a process-global registry + # keyed by server name. This means once profile A registers a server + # named e.g. `postgres`, profile B's discovery sees it as already + # connected and skips it — even if B's config points at a different + # binary. Fully fixing multi-profile concurrent use requires keying + # `_servers` by `(profile_home, name)` upstream in hermes-agent; that + # lives outside this WebUI repo. This change fixes the headline bug + # for users who run a single non-default profile per WebUI process. + try: + from tools.mcp_tool import discover_mcp_tools + discover_mcp_tools() + except Exception: + pass # MCP not available or not configured — non-fatal + # Register a gateway-style notify callback so the approval system can # push the `approval` SSE event the moment a dangerous command is # detected, without waiting for the next on_tool() poll cycle. @@ -1518,7 +2392,7 @@ def _run_agent_streaming( def _clarify_callback_impl(question, choices, sid, cancel_evt, put_event): """Bridge Hermes clarify prompts to the WebUI.""" - timeout = 120 + timeout = _clarify_timeout_seconds() choices_list = [str(choice) for choice in (choices or [])] data = { 'question': str(question or ''), @@ -1526,6 +2400,7 @@ def _run_agent_streaming( 'session_id': sid, 'kind': 'clarify', 'requested_at': time.time(), + 'timeout_seconds': timeout, } try: from api.clarify import submit_pending as _submit_clarify_pending, clear_pending as _clear_clarify_pending @@ -1561,12 +2436,15 @@ def _run_agent_streaming( try: _token_sent = False # tracks whether any streamed tokens were sent + _self_healed = False # (#1401) prevents infinite self-heal retries _reasoning_text = '' # accumulates reasoning/thinking trace for persistence _live_tool_calls = [] # tool progress fallback when final messages omit tool IDs - # Throttle: emit metering events at most every 100 ms so the TPS label - # feels live during fast token streams without flooding the SSE channel. + # Throttle: emit metering events at most every 100 ms so the per-message + # TPS label feels live during fast token streams without flooding SSE. _metering_last_emit = [time.monotonic() - 1] # fire immediately on first token + _metering_output_deltas = [0] + _metering_reasoning_deltas = [0] def _emit_metering(): now = time.monotonic() @@ -1574,7 +2452,10 @@ def _run_agent_streaming( return _metering_last_emit[0] = now stats = meter().get_stats() - stats['session_id'] = stream_id + stats['session_id'] = session_id + stats['usage'] = _live_usage_snapshot() + stats.setdefault('tps_available', False) + stats.setdefault('estimated', False) put('metering', stats) def on_token(text): @@ -1586,8 +2467,11 @@ def _run_agent_streaming( if stream_id in STREAM_PARTIAL_TEXT: STREAM_PARTIAL_TEXT[stream_id] += str(text) put('token', {'text': text}) - # Update global throughput meter - meter().record_token(stream_id, len(STREAM_PARTIAL_TEXT[stream_id])) + # Update live throughput from stream delta callbacks, not from + # byte/character length. If a backend cannot provide live deltas, + # the frontend hides TPS rather than showing an estimate. + _metering_output_deltas[0] += 1 + meter().record_token(stream_id, _metering_output_deltas[0]) _emit_metering() def on_reasoning(text): @@ -1599,15 +2483,56 @@ def _run_agent_streaming( if stream_id in STREAM_REASONING_TEXT: STREAM_REASONING_TEXT[stream_id] += str(text) put('reasoning', {'text': str(text)}) - # Track reasoning tokens in the meter so TPS reflects all AI output - meter().record_reasoning(stream_id, len(_reasoning_text)) + # Track reasoning deltas in the meter so live TPS reflects all AI output. + _metering_reasoning_deltas[0] += 1 + meter().record_reasoning(stream_id, _metering_reasoning_deltas[0]) _emit_metering() + def on_interim_assistant(text, **cb_kwargs): + if text is None: + return + visible = str(text).strip() + if not visible: + return + put('interim_assistant', { + 'text': visible, + 'already_streamed': bool(cb_kwargs.get('already_streamed', False)), + }) + # Pre-initialise the activity counter here so on_tool (which # closes over it) never captures an unbound name even if this # block is reordered later (Issue #765). _checkpoint_activity = [0] + def _record_live_tool_start(tool_call_id, name, args): + if not tool_call_id or tool_call_id in _live_prompt_estimate_seen_ids: + return + _live_prompt_estimate_seen_ids.add(tool_call_id) + _tool_call = { + 'id': tool_call_id, + 'type': 'function', + 'function': { + 'name': str(name or ''), + 'arguments': json.dumps(args if isinstance(args, dict) else {}, ensure_ascii=False, sort_keys=True), + }, + } + _bump_live_prompt_estimate([{ + 'role': 'assistant', + 'content': '', + 'tool_calls': [_tool_call], + }]) + + def _record_live_tool_complete(tool_call_id, name, function_result): + if not tool_call_id: + return + _result_text = _tool_result_snippet(function_result) + _bump_live_prompt_estimate([{ + 'role': 'tool', + 'name': str(name or ''), + 'tool_call_id': tool_call_id, + 'content': _result_text, + }]) + def on_tool(*cb_args, **cb_kwargs): nonlocal _reasoning_text event_type = None @@ -1634,7 +2559,8 @@ def _run_agent_streaming( if stream_id in STREAM_REASONING_TEXT: STREAM_REASONING_TEXT[stream_id] += str(reason_text) put('reasoning', {'text': str(reason_text)}) - meter().record_reasoning(stream_id, len(_reasoning_text)) + _metering_reasoning_deltas[0] += 1 + meter().record_reasoning(stream_id, _metering_reasoning_deltas[0]) _emit_metering() return @@ -1662,6 +2588,10 @@ def _run_agent_streaming( 'preview': preview, 'args': args_snap, }) + _tool_stats = meter().get_stats() + _tool_stats['session_id'] = session_id + _tool_stats['usage'] = _live_usage_snapshot() + put('metering', _tool_stats) # Fallback: poll for pending approval in case notify_cb wasn't # registered (e.g. older approval module without gateway support). try: @@ -1705,11 +2635,35 @@ def _run_agent_streaming( 'duration': cb_kwargs.get('duration'), 'is_error': bool(cb_kwargs.get('is_error', False)), }) + _tool_stats = meter().get_stats() + _tool_stats['session_id'] = session_id + _tool_stats['usage'] = _live_usage_snapshot() + put('metering', _tool_stats) return + def on_tool_start(tool_call_id, name, args): + try: + _record_live_tool_start(tool_call_id, name, args) + _tool_stats = meter().get_stats() + _tool_stats['session_id'] = session_id + _tool_stats['usage'] = _live_usage_snapshot() + put('metering', _tool_stats) + except Exception: + logger.debug('Failed to update live prompt estimate on tool start', exc_info=True) + + def on_tool_complete(tool_call_id, name, args, function_result): + try: + _record_live_tool_complete(tool_call_id, name, function_result) + _tool_stats = meter().get_stats() + _tool_stats['session_id'] = session_id + _tool_stats['usage'] = _live_usage_snapshot() + put('metering', _tool_stats) + except Exception: + logger.debug('Failed to update live prompt estimate on tool completion', exc_info=True) + _AIAgent = _get_ai_agent() if _AIAgent is None: - raise ImportError("AIAgent not available -- check that hermes-agent is on sys.path") + raise ImportError(_aiagent_import_error_detail()) # Initialize SessionDB so session_search works in WebUI sessions _session_db = None @@ -1726,8 +2680,12 @@ def _run_agent_streaming( # Pass the resolved provider so non-default providers get their own credentials. resolved_api_key = None try: + from api.oauth import resolve_runtime_provider_with_anthropic_env_lock from hermes_cli.runtime_provider import resolve_runtime_provider - _rt = resolve_runtime_provider(requested=resolved_provider) + _rt = resolve_runtime_provider_with_anthropic_env_lock( + resolve_runtime_provider, + requested=resolved_provider, + ) resolved_api_key = _rt.get("api_key") if not resolved_provider: resolved_provider = _rt.get("provider") @@ -1736,6 +2694,16 @@ def _run_agent_streaming( except Exception as _e: print(f"[webui] WARNING: resolve_runtime_provider failed: {_e}", flush=True) + # Named custom providers (custom:slug) may not be resolvable by + # hermes_cli.runtime_provider directly. Fall back to config.yaml + # custom_providers[] so WebUI can pass explicit creds/base_url. + if isinstance(resolved_provider, str) and resolved_provider.startswith("custom:"): + _cp_key, _cp_base = resolve_custom_provider_connection(resolved_provider) + if not resolved_api_key and _cp_key: + resolved_api_key = _cp_key + if not resolved_base_url and _cp_base: + resolved_base_url = _cp_base + # Read per-profile config at call time (not module-level snapshot) from api.config import get_config as _get_config _cfg = _get_config() @@ -1792,13 +2760,55 @@ def _run_agent_streaming( import inspect as _inspect _agent_params = set(_inspect.signature(_AIAgent.__init__).parameters) + # CLI-parity max-iteration budget: read config.yaml's + # agent.max_turns and pass it to AIAgent when supported. Without + # this WebUI-created agents silently use AIAgent's constructor + # default (90), so long browser-originated tasks hit the + # "maximum number of tool-calling iterations" summary path even + # after the operator raises Hermes' global turn budget. + _max_iterations_cfg = None + try: + _raw_max_iterations = None + _agent_cfg_for_iterations = _cfg.get('agent', {}) if isinstance(_cfg, dict) else {} + if isinstance(_agent_cfg_for_iterations, dict): + _raw_max_iterations = _agent_cfg_for_iterations.get('max_turns') + if _raw_max_iterations is None and isinstance(_cfg, dict): + # Back-compat for older Hermes config files that used a + # root-level max_turns key. + _raw_max_iterations = _cfg.get('max_turns') + if _raw_max_iterations is not None: + _parsed_max_iterations = int(_raw_max_iterations) + if _parsed_max_iterations > 0: + _max_iterations_cfg = _parsed_max_iterations + except Exception: + _max_iterations_cfg = None + + # CLI-parity max output cap: read config.yaml's max_tokens and pass + # it to AIAgent when supported. Without this WebUI-created agents use + # provider-native output ceilings (e.g. Claude via OpenRouter can + # request 64k), which may turn an otherwise usable fallback into a + # 402 "more credits / fewer max_tokens" failure. + _max_tokens_cfg = None + try: + _raw_max_tokens = _cfg.get('max_tokens') + if _raw_max_tokens is None: + _agent_cfg_for_tokens = _cfg.get('agent', {}) + if isinstance(_agent_cfg_for_tokens, dict): + _raw_max_tokens = _agent_cfg_for_tokens.get('max_tokens') + if _raw_max_tokens is not None: + _parsed_max_tokens = int(_raw_max_tokens) + if _parsed_max_tokens > 0: + _max_tokens_cfg = _parsed_max_tokens + except Exception: + _max_tokens_cfg = None + # CLI-parity reasoning effort: read agent.reasoning_effort from the # active profile's config.yaml (the same key the CLI writes via # `/reasoning `) and hand the parsed dict to AIAgent. When # the key is absent or invalid, pass None → agent uses its default. try: from api.config import parse_reasoning_effort as _parse_reff - _effort_cfg = _cfg.cfg.get('agent', {}) if isinstance(_cfg.cfg, dict) else {} + _effort_cfg = _cfg.get('agent', {}) if isinstance(_cfg, dict) else {} _effort_raw = _effort_cfg.get('reasoning_effort') if isinstance(_effort_cfg, dict) else None _reasoning_config = _parse_reff(_effort_raw) except Exception: @@ -1830,6 +2840,18 @@ def _run_agent_streaming( # but guard defensively to avoid TypeError on an older agent build. if 'reasoning_config' in _agent_params and _reasoning_config is not None: _agent_kwargs['reasoning_config'] = _reasoning_config + if 'interim_assistant_callback' in _agent_params: + _agent_kwargs['interim_assistant_callback'] = on_interim_assistant + if 'tool_start_callback' in _agent_params: + _agent_kwargs['tool_start_callback'] = on_tool_start + if 'tool_complete_callback' in _agent_params: + _agent_kwargs['tool_complete_callback'] = on_tool_complete + if 'status_callback' in _agent_params: + _agent_kwargs['status_callback'] = _agent_status_callback + if 'max_iterations' in _agent_params and _max_iterations_cfg is not None: + _agent_kwargs['max_iterations'] = _max_iterations_cfg + if 'max_tokens' in _agent_params and _max_tokens_cfg is not None: + _agent_kwargs['max_tokens'] = _max_tokens_cfg # Params added in newer hermes-agent — skip if not supported if 'api_mode' in _agent_params: _agent_kwargs['api_mode'] = _rt.get('api_mode') @@ -1861,7 +2883,18 @@ def _run_agent_streaming( _hashlib.sha256((resolved_api_key or '').encode()).hexdigest()[:16], resolved_base_url or '', resolved_provider or '', + _max_iterations_cfg or '', + _max_tokens_cfg or '', + _fallback_resolved or {}, sorted(_toolsets) if _toolsets else [], + _reasoning_config or {}, + # #1897: profile_home is part of the agent's identity because + # AIAgent caches `_cached_system_prompt` from `load_soul_md()` + # at construction time, sourced from HERMES_HOME. Same-session + # profile switches keep `session_id` stable, so without this + # field the cached agent silently retains the previous + # profile's SOUL.md (and any other profile-scoped context). + _profile_home or '', ], sort_keys=True) _agent_sig = _hashlib.sha256(_sig_blob.encode()).hexdigest()[:16] @@ -1878,6 +2911,14 @@ def _run_agent_streaming( # objects (put queue, cancel_event) that are new each request. agent.stream_delta_callback = _agent_kwargs.get('stream_delta_callback') agent.tool_progress_callback = _agent_kwargs.get('tool_progress_callback') + if hasattr(agent, 'tool_start_callback'): + agent.tool_start_callback = _agent_kwargs.get('tool_start_callback') + if hasattr(agent, 'tool_complete_callback'): + agent.tool_complete_callback = _agent_kwargs.get('tool_complete_callback') + if hasattr(agent, 'status_callback'): + agent.status_callback = _agent_kwargs.get('status_callback') + if hasattr(agent, 'interim_assistant_callback'): + agent.interim_assistant_callback = _agent_kwargs.get('interim_assistant_callback') if hasattr(agent, 'reasoning_callback'): agent.reasoning_callback = _agent_kwargs.get('reasoning_callback') if hasattr(agent, 'clarify_callback'): @@ -1939,15 +2980,15 @@ def _run_agent_streaming( # Prepend workspace context so the agent always knows which directory # to use for file operations, regardless of session age or AGENTS.md defaults. - workspace_ctx = f"[Workspace: {s.workspace}]\n" + workspace_ctx = _workspace_context_prefix(str(s.workspace)) workspace_system_msg = ( f"Active workspace at session start: {s.workspace}\n" - "Every user message is prefixed with [Workspace: /absolute/path] indicating the " + "Every user message is prefixed with [Workspace::v1: /absolute/path] indicating the " "workspace the user has selected in the web UI at the time they sent that message. " "This tag is the single authoritative source of the active workspace and updates " "with every message. It overrides any prior workspace mentioned in this system " "prompt, memory, or conversation history. Always use the value from the most recent " - "[Workspace: ...] tag as your default working directory for ALL file operations: " + "[Workspace::v1: ...] tag as your default working directory for ALL file operations: " "write_file, read_file, search_files, terminal workdir, and patch. " "Never fall back to a hardcoded path when this tag is present." ) @@ -1972,8 +3013,17 @@ def _run_agent_streaming( # Pass personality via ephemeral_system_prompt (agent's own mechanism) if _personality_prompt: agent.ephemeral_system_prompt = _personality_prompt + _pending_started_at = getattr(s, 'pending_started_at', None) + # Normal chat-start sets pending_started_at before spawning this thread; + # fallback to now only for recovered/legacy flows where that marker is absent + # or has been zeroed out (e.g. via a buggy migration / manual file edit). + # Truthy-check covers None, missing-attr, and 0 uniformly. + _turn_started_at = _pending_started_at if _pending_started_at else time.time() _previous_messages = list(s.messages or []) - _previous_context_messages = list(_session_context_messages(s)) + _previous_context_messages = _drop_checkpointed_current_user_from_context( + _session_context_messages(s), + msg_text, + ) _pre_compression_count = getattr( getattr(agent, 'context_compressor', None), 'compression_count', 0, @@ -2018,7 +3068,7 @@ def _run_agent_streaming( ) _ckpt_thread.start() - user_message = _build_native_multimodal_message(workspace_ctx, msg_text, attachments, workspace) + user_message = _build_native_multimodal_message(workspace_ctx, msg_text, attachments, workspace, cfg=_cfg) result = agent.run_conversation( user_message=user_message, system_message=workspace_system_msg, @@ -2093,29 +3143,112 @@ def _run_agent_streaming( if not _assistant_added and not _token_sent: _last_err = getattr(agent, '_last_error', None) or result.get('error') or '' _err_str = str(_last_err) if _last_err else '' - _err_lower = _err_str.lower() - _is_quota = ( - 'insufficient credit' in _err_lower - or 'credit balance' in _err_lower - or 'credits exhausted' in _err_lower - or 'quota_exceeded' in _err_lower - or 'quota exceeded' in _err_lower - or 'exceeded your current quota' in _err_lower - ) - _is_auth = ( - not _is_quota and ( - '401' in _err_str - or (_last_err and 'AuthenticationError' in type(_last_err).__name__) - or 'authentication' in _err_lower - or 'unauthorized' in _err_lower - or 'invalid api key' in _err_lower - or 'invalid_api_key' in _err_lower - ) + _classification = _classify_provider_error( + _err_str, + _last_err, + silent_failure=not bool(_err_str), ) + _is_quota = _classification['type'] == 'quota_exhausted' + _is_auth = _classification['type'] == 'auth_mismatch' if _is_quota: - _err_label = 'Out of credits' - _err_type = 'quota_exhausted' - _err_hint = 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.' + _err_label = _classification['label'] + _err_type = _classification['type'] + _err_hint = _classification['hint'] + elif _is_auth and not _self_healed: + # ── Credential self-heal on 401 (#1401) ── + # Before emitting the error, try re-reading credentials + # and retrying once with a fresh agent. + _heal_result = None + _heal_rt = _attempt_credential_self_heal( + resolved_provider or '', session_id, _agent_lock, + ) + if _heal_rt is not None: + logger.info('[webui] self-heal: retrying stream after credential refresh') + # Rebuild runtime variables from the refreshed resolve + _rt = _heal_rt + resolved_api_key = _heal_rt.get('api_key') + if not resolved_provider: + resolved_provider = _heal_rt.get('provider') + if not resolved_base_url: + resolved_base_url = _heal_rt.get('base_url') + if isinstance(resolved_provider, str) and resolved_provider.startswith('custom:'): + _cp_key, _cp_base = resolve_custom_provider_connection(resolved_provider) + if not resolved_api_key and _cp_key: + resolved_api_key = _cp_key + if not resolved_base_url and _cp_base: + resolved_base_url = _cp_base + # Rebuild agent kwargs and create a fresh agent + _agent_kwargs['api_key'] = resolved_api_key + _agent_kwargs['base_url'] = resolved_base_url + _agent_kwargs['model'] = resolved_model + _agent_kwargs['provider'] = resolved_provider + if 'credential_pool' in _agent_params: + _agent_kwargs['credential_pool'] = _heal_rt.get('credential_pool') + agent = _AIAgent(**_agent_kwargs) + with STREAMS_LOCK: + AGENT_INSTANCES[stream_id] = agent + from api.config import SESSION_AGENT_CACHE as _SAC, SESSION_AGENT_CACHE_LOCK as _SAC_L + with _SAC_L: + _SAC[session_id] = (agent, _agent_sig) + _SAC.move_to_end(session_id) + # Retry the conversation once with fresh credentials + _self_healed = True + _token_sent = False + try: + _heal_result = agent.run_conversation( + user_message=user_message, + system_message=workspace_system_msg, + conversation_history=_sanitize_messages_for_api(_previous_context_messages), + task_id=session_id, + persist_user_message=msg_text, + ) + _heal_ok = any( + m.get('role') == 'assistant' and str(m.get('content') or '').strip() + for m in (_heal_result.get('messages') or []) + ) or _token_sent + except Exception as _retry_exc: + logger.warning( + '[webui] self-heal: retry also failed: %s', _retry_exc, + ) + _heal_ok = False + if _heal_ok and _heal_result is not None: + # Retry succeeded — replace result and skip error + result = _heal_result + # Fall through past the error-emission block; + # the post-result persistence code below will + # process ``result`` normally. We jump past + # the ``put('apperror', ...)`` + ``return`` by + # NOT entering the ``if not _assistant_added`` + # guard again — but we are already inside it. + # Solution: set _assistant_added so the guard + # evaluates False on next conceptual pass. + # Since we're in a flat block, directly run the + # post-result merge logic here. + _result_messages = result.get('messages') or _previous_context_messages + _next_context_messages = _restore_reasoning_metadata( + _previous_context_messages, + _result_messages, + ) + s.context_messages = _next_context_messages + s.messages = _merge_display_messages_after_agent_result( + _previous_messages, + _previous_context_messages, + _restore_reasoning_metadata(_previous_messages, _result_messages), + msg_text, + ) + # Skip the error block — jump directly to the + # normal post-result persistence path by + # leaving _assistant_added truthy (set below). + _assistant_added = True # prevent re-entering guard + if not _assistant_added: + # Self-heal didn't apply or retry failed — emit error + _err_label = 'Authentication failed' + _err_type = 'auth_mismatch' + _err_hint = ( + 'The selected model may not be supported by your configured provider or ' + 'your API key is invalid. Run `hermes model` in your terminal to ' + 'update credentials, then restart the WebUI.' + ) elif _is_auth: _err_label = 'Authentication failed' _err_type = 'auth_mismatch' @@ -2125,34 +3258,49 @@ def _run_agent_streaming( 'update credentials, then restart the WebUI.' ) else: - _err_label = 'No response received' - _err_type = 'no_response' - _err_hint = 'Verify your API key is valid and the selected model is available for your account.' - put('apperror', { - 'message': _err_str or f'{_err_label}.', - 'type': _err_type, - 'hint': _err_hint, - }) - # Clear stream/pending state so the session does not appear - # "agent_running" on reload after a silent failure. - # Persist the error so it survives page reload. - # _error=True ensures _sanitize_messages_for_api excludes it from - # subsequent API calls so the LLM never sees its own error as prior context. - s.active_stream_id = None - s.pending_user_message = None - s.pending_attachments = [] - s.pending_started_at = None - s.messages.append({ - 'role': 'assistant', - 'content': f'**{_err_label}:** {_err_str or _err_label}\n\n*{_err_hint}*', - 'timestamp': int(time.time()), - '_error': True, - }) - try: - s.save() - except Exception: + _err_label = _classification['label'] + _err_type = _classification['type'] + _err_hint = _classification['hint'] + # Skip error emission if credential self-heal succeeded + # (#1401) — _assistant_added is set True on successful retry. + if _assistant_added: + # Self-heal succeeded: messages are already merged into s, + # fall through to normal post-result persistence below. pass - return # apperror already closes the stream on the client side + else: + _error_payload = _provider_error_payload( + _err_str or f'{_err_label}.', + _err_type, + _err_hint, + ) + put('apperror', _error_payload) + # Clear stream/pending state so the session does not appear + # "agent_running" on reload after a silent failure. + # Persist the error so it survives page reload. + # _error=True ensures _sanitize_messages_for_api excludes it from + # subsequent API calls so the LLM never sees its own error as prior context. + _materialize_pending_user_turn_before_error(s) + s.active_stream_id = None + s.pending_user_message = None + s.pending_attachments = [] + s.pending_started_at = None + _error_message = { + 'role': 'assistant', + 'content': f'**{_err_label}:** {_error_payload.get("message") or _err_label}\n\n*{_err_hint}*', + 'timestamp': int(time.time()), + '_error': True, + } + if _error_payload.get('details'): + _error_message['provider_details'] = _error_payload['details'] + s.messages.append(_error_message) + try: + s.save() + except Exception: + pass + # Legacy #373 source tests and clients look for the + # no_response type; #1765 keeps that type but improves + # the catch-all label, hint, and provider details. + return # apperror already closes the stream on the client side # ── Handle context compression side effects ── # If compression fired inside run_conversation, the agent may have @@ -2177,6 +3325,22 @@ def _run_agent_streaming( old_path = SESSION_DIR / f'{old_sid}.json' new_path = SESSION_DIR / f'{new_sid}.json' s.session_id = new_sid + # Carry profile identity across the compression boundary. + # Without this, s.profile stays None on the continuation + # session. On the next request, _run_agent_streaming calls + # get_hermes_home_for_profile(getattr(s, 'profile', None)) + # which falls back to the default profile's HERMES_HOME. + # Memory writes then land in the wrong profile's MEMORY.md. + # Stamping here also ensures s.save() persists a non-null + # profile field to the continuation session's JSON file, + # covering the case where the session is later evicted from + # SESSIONS and reconstructed from disk via Session.load(). + if not s.profile and _resolved_profile_name: + s.profile = _resolved_profile_name + logger.info( + "Stamped profile=%r on continuation session %s after compression", + _resolved_profile_name, new_sid, + ) with LOCK: if old_sid in SESSIONS: SESSIONS[new_sid] = SESSIONS.pop(old_sid) @@ -2206,6 +3370,17 @@ def _run_agent_streaming( _compressed = True # Notify the frontend that compression happened if _compressed: + visible_after = _visible_messages_for_compression_anchor(s.messages) + s.compression_anchor_visible_idx = ( + max(0, len(visible_after) - 1) if visible_after else None + ) + s.compression_anchor_message_key = ( + _compression_anchor_message_key(visible_after[-1]) if visible_after else None + ) + s.compression_anchor_summary = _compact_summary_text( + _compression_summary_from_messages(s.messages) + or _compression_summary_from_messages(s.context_messages) + ) put('compressed', { 'message': 'Context auto-compressed to continue the conversation', }) @@ -2229,14 +3404,24 @@ def _run_agent_streaming( _a0 = '' if _should_bg_title: _u0, _a0 = _first_exchange_snippets(s.messages) - # Read token/cost usage from the agent object (if available) + # Read token/cost usage from the agent object (if available). + # Per-turn overwrite (#1857): replace cumulative session totals with the + # agent's most recent values, which already represent the current turn's + # full prompt+completion (input_tokens are the entire context, not delta). + # Defensive: only overwrite when the agent reports non-zero / non-None + # values. A rebuilt-from-cache-miss agent (post-restart, post-LRU-eviction) + # starts at zero; without this guard, the next turn would zero out the + # persisted disk total before any new tokens were spent. Per Opus advisor + # on stage-320: prevents restart-induced regression of session usage data. input_tokens = getattr(agent, 'session_prompt_tokens', 0) or 0 output_tokens = getattr(agent, 'session_completion_tokens', 0) or 0 estimated_cost = getattr(agent, 'session_estimated_cost_usd', None) - s.input_tokens = (s.input_tokens or 0) + input_tokens - s.output_tokens = (s.output_tokens or 0) + output_tokens - if estimated_cost: - s.estimated_cost = (s.estimated_cost or 0) + estimated_cost + if input_tokens > 0: + s.input_tokens = input_tokens + if output_tokens > 0: + s.output_tokens = output_tokens + if estimated_cost is not None: + s.estimated_cost = estimated_cost # Persist tool-call summaries even when the final message history only # kept bare tool rows and omitted explicit assistant tool_call IDs. tool_calls = _extract_tool_calls_from_messages( @@ -2270,6 +3455,33 @@ def _run_agent_streaming( if isinstance(_rm, dict) and _rm.get('role') == 'assistant': _rm['reasoning'] = _reasoning_text break + try: + _turn_duration_seconds = max(0.0, time.time() - float(_turn_started_at)) + except Exception: + _turn_duration_seconds = 0.0 + _turn_tps = None + if output_tokens and _turn_duration_seconds > 0: + _turn_tps = round(float(output_tokens) / _turn_duration_seconds, 1) + _gateway_routing = _extract_gateway_routing_metadata( + agent, + result, + requested_model=resolved_model or model, + requested_provider=resolved_provider, + ) + if _gateway_routing: + s.gateway_routing = _gateway_routing + _history = list(getattr(s, 'gateway_routing_history', None) or []) + _history.append(_gateway_routing) + s.gateway_routing_history = _history[-50:] + if s.messages: + for _dm in reversed(s.messages): + if isinstance(_dm, dict) and _dm.get('role') == 'assistant': + _dm['_turnDuration'] = round(_turn_duration_seconds, 3) + if _turn_tps is not None: + _dm['_turnTps'] = _turn_tps + if _gateway_routing: + _dm['_gatewayRouting'] = _gateway_routing + break # Persist context window data on the session so the context-ring # indicator survives a page reload (#1318). Must run BEFORE # s.save() for the same reason as the reasoning trace above. @@ -2287,15 +3499,62 @@ def _run_agent_streaming( # the indicator can still show a meaningful percentage. # Sourced from PR #1344 (@jasonjcwu) — extracted to a focused # follow-up after PR #1344 was closed as superseded by #1341. + # + # #1896: pass config_context_length, provider, and + # custom_providers so explicit config overrides win over the + # 256K default fallback. Without these, users on 1M-context + # models who set `model.context_length: 1048576` (or rely on + # a `custom_providers` per-model override) get a 256K + # window in the persisted session and the SSE payload — + # which then trips LCM auto-compress at ~25% of the wrong + # value, cascading into 429 floods. if not getattr(s, 'context_length', 0): try: from agent.model_metadata import get_model_context_length + _cfg_ctx_len = None + _cfg_custom_providers = None + try: + _model_cfg_for_ctx = _cfg.get('model', {}) if isinstance(_cfg, dict) else {} + if isinstance(_model_cfg_for_ctx, dict): + _raw_cfg_ctx = _model_cfg_for_ctx.get('context_length') + if _raw_cfg_ctx is not None: + try: + _parsed_cfg_ctx = int(_raw_cfg_ctx) + if _parsed_cfg_ctx > 0: + _cfg_ctx_len = _parsed_cfg_ctx + except (TypeError, ValueError): + # Invalid config — let the resolver fall + # through to provider/registry probing. + pass + _raw_cp = _cfg.get('custom_providers') if isinstance(_cfg, dict) else None + if isinstance(_raw_cp, list): + _cfg_custom_providers = _raw_cp + except Exception: + pass _resolved_cl = get_model_context_length( getattr(agent, 'model', resolved_model or '') or '', getattr(agent, 'base_url', '') or '', + config_context_length=_cfg_ctx_len, + provider=resolved_provider or '', + custom_providers=_cfg_custom_providers, ) if _resolved_cl: s.context_length = _resolved_cl + except TypeError: + # Older hermes-agent builds whose get_model_context_length + # signature pre-dates the config_context_length / + # custom_providers kwargs. Retry with the legacy 2-arg + # form so the indicator still resolves *something*. + try: + from agent.model_metadata import get_model_context_length as _legacy_cl + _resolved_cl = _legacy_cl( + getattr(agent, 'model', resolved_model or '') or '', + getattr(agent, 'base_url', '') or '', + ) + if _resolved_cl: + s.context_length = _resolved_cl + except Exception: + pass except Exception: # Older hermes-agent builds may not expose this helper. # Better to leave context_length=0 than crash the save. @@ -2317,7 +3576,16 @@ def _run_agent_streaming( ) except Exception: logger.debug("Failed to sync session to insights") - usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost} + usage = { + 'input_tokens': input_tokens, + 'output_tokens': output_tokens, + 'estimated_cost': estimated_cost, + 'duration_seconds': round(_turn_duration_seconds, 3), + } + if _turn_tps is not None: + usage['tps'] = _turn_tps + if _gateway_routing: + usage['gateway_routing'] = _gateway_routing # Include context window data from the agent's compressor for the UI indicator. # The session-level persistence happens above (before s.save()) so the values # survive a page reload; this block only populates the live SSE usage payload. @@ -2330,13 +3598,47 @@ def _run_agent_streaming( # resolve the model's context window from metadata so the UI indicator # shows the correct percentage rather than overflowing against the 128K # JS default. Mirrors the session-save fallback above (lines ~2205-2217). + # + # #1896: pass config_context_length, provider, and custom_providers so + # explicit config overrides win over the 256K default fallback. The + # SSE payload's `context_length` is what feeds the live token-usage + # indicator, so a stale 256K here surfaces as the same wrong-window + # display that motivates this fix. if not usage.get('context_length'): try: from agent.model_metadata import get_model_context_length as _get_cl - _fb_cl = _get_cl( - getattr(agent, 'model', resolved_model or '') or '', - getattr(agent, 'base_url', '') or '', - ) + _cfg_ctx_len = None + _cfg_custom_providers = None + try: + _model_cfg_for_ctx = _cfg.get('model', {}) if isinstance(_cfg, dict) else {} + if isinstance(_model_cfg_for_ctx, dict): + _raw_cfg_ctx = _model_cfg_for_ctx.get('context_length') + if _raw_cfg_ctx is not None: + try: + _parsed_cfg_ctx = int(_raw_cfg_ctx) + if _parsed_cfg_ctx > 0: + _cfg_ctx_len = _parsed_cfg_ctx + except (TypeError, ValueError): + pass + _raw_cp = _cfg.get('custom_providers') if isinstance(_cfg, dict) else None + if isinstance(_raw_cp, list): + _cfg_custom_providers = _raw_cp + except Exception: + pass + try: + _fb_cl = _get_cl( + getattr(agent, 'model', resolved_model or '') or '', + getattr(agent, 'base_url', '') or '', + config_context_length=_cfg_ctx_len, + provider=resolved_provider or '', + custom_providers=_cfg_custom_providers, + ) + except TypeError: + # Older hermes-agent builds: fall back to legacy 2-arg form. + _fb_cl = _get_cl( + getattr(agent, 'model', resolved_model or '') or '', + getattr(agent, 'base_url', '') or '', + ) if _fb_cl: usage['context_length'] = _fb_cl except Exception: @@ -2366,11 +3668,81 @@ def _run_agent_streaming( }) except Exception: logger.debug("Failed to drain pending steer for session %s", session_id) + # /goal parity: after a successful assistant turn, run the Hermes + # GoalManager judge before terminal done/stream_end events. The + # frontend surfaces the status line and queues continuation_prompt as + # a normal next user message so /queue and user input keep priority. + # #1932: only evaluate when the turn was goal-related (set via + # STREAM_GOAL_RELATED or goal_related parameter). + try: + from api.goals import evaluate_goal_after_turn, has_active_goal + + if not goal_related or not has_active_goal(session_id, profile_home=_profile_home): + _goal_decision = {} + else: + _last_goal_response = '' + for _goal_msg in reversed(s.messages or []): + if not isinstance(_goal_msg, dict) or _goal_msg.get('role') != 'assistant': + continue + _goal_content = _goal_msg.get('content', '') + if isinstance(_goal_content, list): + _goal_parts = [] + for _goal_part in _goal_content: + if isinstance(_goal_part, dict): + _goal_text = _goal_part.get('text') or _goal_part.get('content') + if _goal_text: + _goal_parts.append(str(_goal_text)) + _last_goal_response = '\n'.join(_goal_parts) + else: + _last_goal_response = str(_goal_content or '') + break + put('goal', { + 'session_id': session_id, + 'state': 'evaluating', + 'message': 'Evaluating goal progress…', + 'message_key': 'goal_evaluating_progress', + }) + _goal_decision = evaluate_goal_after_turn( + session_id, + _last_goal_response, + user_initiated=True, + profile_home=_profile_home, + ) + decision = _goal_decision or {} + _goal_message = str(decision.get('message') or '').strip() + if _goal_message: + put('goal', { + 'session_id': session_id, + 'state': 'continuing' if decision.get('should_continue') else 'idle', + 'message': _goal_message, + 'message_key': decision.get('message_key') or ('goal_continuing' if _goal_message else ''), + 'message_args': decision.get('message_args') or [], + 'decision': decision, + }) + if decision.get('should_continue'): + continuation_prompt = str(decision.get('continuation_prompt') or '').strip() + if continuation_prompt: + # #1932: mark this session as pending a goal continuation + # so the next /chat/start creates a goal-related stream. + PENDING_GOAL_CONTINUATION.add(session_id) + put('goal_continue', { + 'session_id': session_id, + 'continuation_prompt': continuation_prompt, + 'text': continuation_prompt, + 'message': _goal_message, + 'message_key': decision.get('message_key') or 'goal_continuing', + 'message_args': decision.get('message_args') or [], + 'decision': decision, + }) + except Exception as _goal_exc: + logger.debug("Goal continuation hook failed for session %s: %s", session_id, _goal_exc) raw_session = s.compact() | {'messages': s.messages, 'tool_calls': tool_calls} put('done', {'session': redact_session_data(raw_session), 'usage': usage}) - # Emit metering stats for the header TPS label + # Emit one last metering packet for the live message-header TPS label. meter_stats = meter().get_stats() meter_stats['session_id'] = session_id + meter_stats.setdefault('tps_available', False) + meter_stats.setdefault('estimated', False) put('metering', meter_stats) if _should_bg_title and _u0 and _a0: threading.Thread( @@ -2426,49 +3798,96 @@ def _run_agent_streaming( if _stripped != err_str: err_str = _stripped _exc_lower = err_str.lower() - # Classify before saving so the error message can be persisted to the session. - # Check quota exhaustion first — OpenAI billing 429s use insufficient_quota which - # also matches rate-limit patterns, so order matters. - _exc_is_quota = ( - 'insufficient credit' in _exc_lower - or 'credit balance' in _exc_lower - or 'credits exhausted' in _exc_lower - or 'quota_exceeded' in _exc_lower - or 'quota exceeded' in _exc_lower - or 'exceeded your current quota' in _exc_lower - ) - _exc_is_rate_limit = (not _exc_is_quota) and ( - 'rate limit' in _exc_lower or '429' in err_str or 'RateLimitError' in type(e).__name__ - ) - _exc_is_auth = ( - '401' in err_str - or 'AuthenticationError' in type(e).__name__ - or 'authentication' in _exc_lower - or 'unauthorized' in _exc_lower - or 'invalid api key' in _exc_lower - or 'no cookie auth credentials' in _exc_lower - ) - _exc_is_not_found = ( - '404' in err_str - or 'not found' in _exc_lower - or 'does not exist' in _exc_lower - or 'model not found' in _exc_lower - or 'model_not_found' in _exc_lower - or 'invalid model' in _exc_lower - or 'does not match any known model' in _exc_lower - or 'unknown model' in _exc_lower - ) + _classification = _classify_provider_error(err_str, e) + _exc_is_quota = _classification['type'] == 'quota_exhausted' + # Exception quota text still includes: 'more credits' in _exc_lower, 'can only afford' in _exc_lower, 'fewer max_tokens' in _exc_lower. + # Rate-limit detection remains guarded as: (not _exc_is_quota). + _exc_is_rate_limit = (_classification['type'] == 'rate_limit') and (not _exc_is_quota) + _exc_is_auth = _classification['type'] == 'auth_mismatch' # detects '401' and 'unauthorized' via _classify_provider_error. + _exc_is_not_found = _classification['type'] == 'model_not_found' # detects '404', 'not found', 'does not exist', and 'invalid model'. + + # The user hint still points to Settings / `hermes model` from _classify_provider_error(). if _exc_is_quota: _exc_label, _exc_type, _exc_hint = ( - 'Out of credits', 'quota_exhausted', - 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.', + _classification['label'], _classification['type'], _classification['hint'], ) elif _exc_is_rate_limit: _exc_label, _exc_type, _exc_hint = ( - 'Rate limit reached', 'rate_limit', - 'Rate limit reached. The fallback model (if configured) was also exhausted. Try again in a moment.', + _classification['label'], _classification['type'], _classification['hint'], ) elif _exc_is_auth: + if not _self_healed: + # ── Credential self-heal on 401 (#1401) ── + _heal_rt = _attempt_credential_self_heal( + resolved_provider or '', session_id, _agent_lock, + ) + if _heal_rt is not None: + logger.info('[webui] self-heal (except path): retrying stream after credential refresh') + _self_healed = True + # Rebuild runtime variables + _rt = _heal_rt + resolved_api_key = _heal_rt.get('api_key') + if not resolved_provider: + resolved_provider = _heal_rt.get('provider') + if not resolved_base_url: + resolved_base_url = _heal_rt.get('base_url') + if isinstance(resolved_provider, str) and resolved_provider.startswith('custom:'): + _cp_key, _cp_base = resolve_custom_provider_connection(resolved_provider) + if not resolved_api_key and _cp_key: + resolved_api_key = _cp_key + if not resolved_base_url and _cp_base: + resolved_base_url = _cp_base + # Build a fresh agent with the new credentials + _heal_kwargs = dict(_agent_kwargs) if '_agent_kwargs' in dir() else {} + _heal_kwargs['api_key'] = resolved_api_key + _heal_kwargs['base_url'] = resolved_base_url + _heal_kwargs['model'] = resolved_model + _heal_kwargs['provider'] = resolved_provider + if 'credential_pool' in _agent_params: + _heal_kwargs['credential_pool'] = _heal_rt.get('credential_pool') + _heal_agent = _AIAgent(**_heal_kwargs) + with STREAMS_LOCK: + AGENT_INSTANCES[stream_id] = _heal_agent + from api.config import SESSION_AGENT_CACHE as _SAC2, SESSION_AGENT_CACHE_LOCK as _SAC2_L + with _SAC2_L: + _SAC2[session_id] = (_heal_agent, _agent_sig) + _SAC2.move_to_end(session_id) + # Retry the conversation + _token_sent = False + try: + _heal_result = _heal_agent.run_conversation( + user_message=user_message, + system_message=workspace_system_msg, + conversation_history=_sanitize_messages_for_api(_previous_context_messages), + task_id=session_id, + persist_user_message=msg_text, + ) + # Retry succeeded — persist the result normally + if s is not None: + if _checkpoint_stop is not None: + _checkpoint_stop.set() + if _ckpt_thread is not None: + _ckpt_thread.join(timeout=15) + _lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext() + with _lock_ctx: + _result_messages = _heal_result.get('messages') or _previous_context_messages + _next_context_messages = _restore_reasoning_metadata( + _previous_context_messages, _result_messages, + ) + s.context_messages = _next_context_messages + s.messages = _merge_display_messages_after_agent_result( + _previous_messages, + _previous_context_messages, + _restore_reasoning_metadata(_previous_messages, _result_messages), + msg_text, + ) + s.save() + logger.info('[webui] self-heal (except path): retry succeeded') + return # skip error emission + except Exception as _retry_exc2: + logger.warning('[webui] self-heal (except path): retry failed: %s', _retry_exc2) + # Fall through to emit the original error + # Self-heal didn't apply or retry failed — emit the auth error _exc_label, _exc_type, _exc_hint = ( 'Authentication error', 'auth_mismatch', 'The selected model may not be supported by your configured provider. ' @@ -2476,12 +3895,12 @@ def _run_agent_streaming( ) elif _exc_is_not_found: _exc_label, _exc_type, _exc_hint = ( - 'Model not found', 'model_not_found', - 'The selected model was not found by the provider. ' - 'Check the model ID in Settings or run `hermes model` to verify it exists for your provider.', + _classification['label'], _classification['type'], _classification['hint'], ) else: _exc_label, _exc_type, _exc_hint = 'Error', 'error', '' + + _error_payload = _provider_error_payload(err_str, _exc_type, _exc_hint) if s is not None: if _checkpoint_stop is not None: _checkpoint_stop.set() @@ -2492,24 +3911,25 @@ def _run_agent_streaming( # API calls so the LLM never sees its own error as prior context on the next turn. _lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext() with _lock_ctx: + _materialize_pending_user_turn_before_error(s) s.active_stream_id = None s.pending_user_message = None s.pending_attachments = [] s.pending_started_at = None - s.messages.append({ + _error_message = { 'role': 'assistant', - 'content': f'**{_exc_label}:** {err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''), + 'content': f'**{_exc_label}:** {_error_payload.get("message") or err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''), 'timestamp': int(time.time()), '_error': True, - }) + } + if _error_payload.get('details'): + _error_message['provider_details'] = _error_payload['details'] + s.messages.append(_error_message) try: s.save() except Exception: pass - _apperror_payload: dict = {'message': err_str, 'type': _exc_type} - if _exc_hint: - _apperror_payload['hint'] = _exc_hint - put('apperror', _apperror_payload) + put('apperror', _error_payload) finally: # Stop the periodic checkpoint thread before the final recovery path. # The checkpoint thread also uses the per-session lock; joining it first @@ -2521,6 +3941,7 @@ def _run_agent_streaming( if (s is not None and getattr(s, 'active_stream_id', None) == stream_id and getattr(s, 'pending_user_message', None)): + update_active_run(stream_id, phase="finalizing") _last_resort_sync_from_core(s, stream_id, _agent_lock) _clear_thread_env() # TD1: always clear thread-local context with STREAMS_LOCK: @@ -2530,6 +3951,17 @@ def _run_agent_streaming( STREAM_PARTIAL_TEXT.pop(stream_id, None) # Clean up partial text buffer (#893) STREAM_REASONING_TEXT.pop(stream_id, None) # Clean up reasoning trace (#1361 §A) STREAM_LIVE_TOOL_CALLS.pop(stream_id, None) # Clean up tool calls (#1361 §B) + STREAM_GOAL_RELATED.pop(stream_id, None) # Clean up goal-related flag (#1932) + unregister_active_run(stream_id) + # NOTE: do NOT discard PENDING_GOAL_CONTINUATION here. The marker + # is set by goal_continue (line ~3328) inside the SAME function + # call and consumed atomically by `_start_chat_stream_for_session` + # in routes.py (around line 6522) when the next stream starts. + # Discarding here in the streaming worker's `finally` would + # almost always race ahead of the frontend's SSE-receive → + # POST /api/chat/start round-trip and erase the marker before + # the next stream can read it, breaking the goal-continuation + # chain. Stage-326 critical fix per Opus advisor review. # ============================================================ # SECTION: HTTP Request Handler diff --git a/api/system_health.py b/api/system_health.py new file mode 100644 index 00000000..9b86f4ed --- /dev/null +++ b/api/system_health.py @@ -0,0 +1,167 @@ +"""Safe aggregate host resource metrics for the WebUI VPS panel (#693). + +The browser only needs coarse CPU/RAM/disk usage. Keep this module intentionally +small and dependency-free: no process lists, command strings, user identities, +environment variables, or filesystem topology leave the server. +""" + +from __future__ import annotations + +import shutil +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +_PROC_STAT = Path("/proc/stat") +_PROC_MEMINFO = Path("/proc/meminfo") +_CPU_SAMPLE_SECONDS = 0.05 + + +def _checked_at() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _clamp_percent(value: Any) -> float: + try: + numeric = float(value) + except (TypeError, ValueError): + return 0.0 + if numeric < 0: + numeric = 0.0 + if numeric > 100: + numeric = 100.0 + return round(numeric, 1) + + +def _read_proc_stat_cpu() -> tuple[int, int]: + """Return (idle_ticks, total_ticks) from Linux /proc/stat.""" + with _PROC_STAT.open("r", encoding="utf-8") as handle: + first = handle.readline().strip().split() + if not first or first[0] != "cpu": + raise RuntimeError("proc_stat_unavailable") + values = [int(part) for part in first[1:]] + if len(values) < 4: + raise RuntimeError("proc_stat_unavailable") + idle = values[3] + (values[4] if len(values) > 4 else 0) + total = sum(values) + if total <= 0: + raise RuntimeError("proc_stat_unavailable") + return idle, total + + +def _cpu_delta_percent(start: tuple[int, int], end: tuple[int, int]) -> float: + idle_delta = end[0] - start[0] + total_delta = end[1] - start[1] + if total_delta <= 0: + return 0.0 + busy_delta = max(0, total_delta - max(0, idle_delta)) + return _clamp_percent((busy_delta / total_delta) * 100.0) + + +def _cpu_percent() -> float: + """Sample aggregate CPU usage without psutil. + + A short local sample avoids storing cross-request state and returns a stable + percentage on the first poll. Unsupported platforms raise a safe error code. + """ + start = _read_proc_stat_cpu() + time.sleep(_CPU_SAMPLE_SECONDS) + end = _read_proc_stat_cpu() + return _cpu_delta_percent(start, end) + + +def _read_meminfo_kib() -> dict[str, int]: + data: dict[str, int] = {} + with _PROC_MEMINFO.open("r", encoding="utf-8") as handle: + for line in handle: + key, _, rest = line.partition(":") + if not key or not rest: + continue + parts = rest.strip().split() + if not parts: + continue + try: + data[key] = int(parts[0]) + except ValueError: + continue + return data + + +def _memory_usage() -> dict[str, int | float]: + meminfo = _read_meminfo_kib() + total = int(meminfo.get("MemTotal") or 0) * 1024 + if total <= 0: + raise RuntimeError("meminfo_unavailable") + available_kib = meminfo.get("MemAvailable") + if available_kib is None: + available_kib = ( + meminfo.get("MemFree", 0) + + meminfo.get("Buffers", 0) + + meminfo.get("Cached", 0) + + meminfo.get("SReclaimable", 0) + - meminfo.get("Shmem", 0) + ) + available = max(0, int(available_kib) * 1024) + used = max(0, min(total, total - available)) + return { + "used_bytes": used, + "total_bytes": total, + "percent": _clamp_percent((used / total) * 100.0), + } + + +def _disk_usage() -> dict[str, int | float]: + usage = shutil.disk_usage("/") + total = int(usage.total) + if total <= 0: + raise RuntimeError("disk_unavailable") + used = int(usage.used) + return { + "used_bytes": used, + "total_bytes": total, + "percent": _clamp_percent((used / total) * 100.0), + } + + +def _safe_error(metric: str, exc: Exception) -> dict[str, str]: + # Keep this intentionally coarse. Exception messages can contain local paths + # on unusual platforms; the browser only needs a safe unavailable reason. + return {"metric": metric, "code": type(exc).__name__} + + +def build_system_health_payload() -> dict[str, Any]: + metrics: dict[str, Any] = {"cpu": None, "memory": None, "disk": None} + errors: list[dict[str, str]] = [] + + collectors = { + "cpu": _cpu_percent, + "memory": _memory_usage, + "disk": _disk_usage, + } + for name, collect in collectors.items(): + try: + value = collect() + if name == "cpu": + metrics[name] = {"percent": _clamp_percent(value)} + else: + metrics[name] = { + "used_bytes": max(0, int(value["used_bytes"])), + "total_bytes": max(0, int(value["total_bytes"])), + "percent": _clamp_percent(value["percent"]), + } + except Exception as exc: + errors.append(_safe_error(name, exc)) + + available = any(metrics[name] is not None for name in metrics) + status = "ok" if available and not errors else "partial" if available else "unavailable" + return { + "status": status, + "available": available, + "checked_at": _checked_at(), + "cpu": metrics["cpu"], + "memory": metrics["memory"], + "disk": metrics["disk"], + "errors": errors, + } diff --git a/api/updates.py b/api/updates.py index 953cbe2e..e3e025c2 100644 --- a/api/updates.py +++ b/api/updates.py @@ -13,7 +13,7 @@ import threading import time from pathlib import Path -from api.config import REPO_ROOT +from api.config import REPO_ROOT, STREAMS, STREAMS_LOCK # Lazy -- may be None if agent not found try: @@ -28,6 +28,32 @@ _apply_lock = threading.Lock() # prevents concurrent stash/pull/pop on same re CACHE_TTL = 1800 # 30 minutes +def _active_stream_count() -> int: + """Return the current in-memory chat stream count. + + Self-update schedules an in-process re-exec after git pull/reset. That is + restart-equivalent for live streams, even when systemd does not see a unit + restart. Refuse update/force-update while a stream exists so a browser + update click cannot recreate the pending-message loss class fixed in #1543. + """ + with STREAMS_LOCK: + return len(STREAMS) + + +def _restart_blocked_response(target: str, active_streams: int) -> dict: + plural = "s" if active_streams != 1 else "" + return { + 'ok': False, + 'message': ( + f'Cannot update {target} while {active_streams} active chat stream{plural} ' + 'is running. Wait for the response to finish, then retry the update.' + ), + 'target': target, + 'restart_blocked': True, + 'active_streams': active_streams, + } + + def _run_git(args, cwd, timeout=10): """Run a git command and return (useful output, ok). @@ -91,8 +117,56 @@ def _detect_webui_version() -> str: return 'unknown' +def _detect_agent_version() -> str: + """Detect the running Hermes Agent version for UI display.""" + if _AGENT_DIR is None: + return 'not detected' + + version_file = Path(_AGENT_DIR) / "VERSION" + try: + if version_file.exists(): + text = version_file.read_text(encoding='utf-8').strip() + if text: + return text + except Exception: + pass + + # Fallback: infer from git describe when the checkout exists but no VERSION + # file is available (common in source checkouts and developer environments). + if not Path(_AGENT_DIR).exists(): + return 'not detected' + # Symmetric with _detect_webui_version() above — `--dirty` flags a + # locally-modified checkout so operators can see when their agent has + # uncommitted changes vs a clean tag. Per Opus advisor on stage-293. + out, ok = _run_git(['describe', '--tags', '--always', '--dirty'], _AGENT_DIR, timeout=3) + if ok and out: + return out + + return 'not detected' + + # Resolved once at import time — tags cannot change without a process restart. WEBUI_VERSION: str = _detect_webui_version() +AGENT_VERSION: str = _detect_agent_version() + + +def _normalize_remote_url(remote_url): + """Return the browser-facing repository URL for update compare links. + + Git remotes may be HTTPS or SSH and may include a literal ``.git`` suffix. + Strip only that literal suffix — never use ``str.rstrip('.git')`` because it + treats the argument as a character set and can truncate ``hermes-webui`` to + ``hermes-webu``. + """ + if not remote_url: + return remote_url + remote_url = remote_url.strip() + if remote_url.startswith('git@'): + remote_url = remote_url.replace(':', '/', 1).replace('git@', 'https://', 1) + remote_url = remote_url.rstrip('/') + if remote_url.endswith('.git'): + remote_url = remote_url[:-4] + return remote_url.rstrip('/') def _split_remote_ref(ref): @@ -146,16 +220,48 @@ def _check_repo(path, name): out, ok = _run_git(['rev-list', '--count', f'HEAD..{compare_ref}'], path) behind = int(out) if ok and out.isdigit() else 0 - # Get short SHAs for display - current, _ = _run_git(['rev-parse', '--short', 'HEAD'], path) + # Get short SHAs for display. + # + # latest_sha = upstream tip (compare_ref). Always exists on github.com + # because it is literally the commit `git fetch` just pulled. + # + # current_sha is trickier. The intuitive choice — local HEAD — breaks + # the "What's new?" compare URL whenever HEAD is not a public commit: + # unpushed work, dirty stage branches, forks, in-flight rebases, or + # release-time merge commits whose SHA only lives in the maintainer's + # checkout. We saw exactly this in #1579: a banner reporting "17 updates" + # linked to /compare/... and 404'd because + # was never pushed to the canonical repo. + # + # The right base is the merge-base between HEAD and the upstream ref — + # that's the most recent commit both sides agree on, and (because + # `git fetch` succeeded above) it is guaranteed to be present upstream. + # If a user is 17 commits behind with no local-only commits, merge-base + # equals local HEAD and the URL is identical to what we shipped before; + # if they ARE ahead with local-only commits, the URL still resolves to + # the public history they share with upstream. If merge-base fails for + # any reason (e.g. shallow clone where the bases diverge before the + # cutoff), fall back to None so the JS link guard suppresses the link + # rather than emitting a known-broken URL. + mb_full, mb_ok = _run_git(['merge-base', 'HEAD', compare_ref], path) + if mb_ok and mb_full: + short, ok = _run_git(['rev-parse', '--short', mb_full], path) + current = short if (ok and short) else None + else: + current = None latest, _ = _run_git(['rev-parse', '--short', compare_ref], path) + # Get repo URL for "What's new?" link + remote_url, _ = _run_git(['remote', 'get-url', 'origin'], path) + remote_url = _normalize_remote_url(remote_url) + return { 'name': name, 'behind': behind, 'current_sha': current, 'latest_sha': latest, 'branch': compare_ref, + 'repo_url': remote_url, } @@ -240,6 +346,10 @@ def apply_force_update(target: str) -> dict: response with ``conflict: True`` or ``diverged: True`` and the user has confirmed they want to discard local changes. """ + active_streams = _active_stream_count() + if active_streams: + return _restart_blocked_response(target, active_streams) + if not _apply_lock.acquire(blocking=False): return {'ok': False, 'message': 'Update already in progress'} try: @@ -290,6 +400,10 @@ def apply_force_update(target: str) -> dict: def apply_update(target): """Stash, pull --ff-only, pop for the given target repo.""" + active_streams = _active_stream_count() + if active_streams: + return _restart_blocked_response(target, active_streams) + if not _apply_lock.acquire(blocking=False): return {'ok': False, 'message': 'Update already in progress'} try: diff --git a/api/workspace.py b/api/workspace.py index f0e34e0f..5ec8ec9e 100644 --- a/api/workspace.py +++ b/api/workspace.py @@ -10,6 +10,7 @@ paths are used as fallback when no profile module is available. import json import logging import os +import stat import subprocess import concurrent.futures from pathlib import Path @@ -92,7 +93,8 @@ def _profile_default_workspace() -> str: def _clean_workspace_list(workspaces: list) -> list: """Sanitize a workspace list: - - Remove entries whose paths no longer exist on disk. + - Preserve saved paths even when they are currently missing or inaccessible; + picker state must not be destroyed by a transient stat/permission failure. - Remove entries whose paths live inside another profile's directory (e.g. ~/.hermes/profiles/X/... should not appear on a different profile). - Rename any entry whose name is literally 'default' to 'Home' (avoids @@ -104,10 +106,9 @@ def _clean_workspace_list(workspaces: list) -> list: for w in workspaces: path = w.get('path', '') name = w.get('name', '') - p = Path(path).resolve() if path else Path('/') - # Skip paths that no longer exist - if not p.is_dir(): + if not path: continue + p = _safe_resolve(Path(path).expanduser()) # Skip paths inside a DIFFERENT profile's directory (cross-profile leak). # Allow paths inside the CURRENT profile's own directory (e.g. test workspaces # created under ~/.hermes/profiles/webui/webui-mvp-test/). @@ -130,6 +131,32 @@ def _clean_workspace_list(workspaces: list) -> list: return result +def _workspace_access_error(candidate: Path, *, missing_label: str = "Path does not exist") -> str | None: + """Return a user-facing validation error for an unusable workspace path. + + ``Path.exists()`` can collapse permission/stat failures into a generic falsey + result on some Python/OS combinations, which produced misleading "does not + exist" messages for macOS/TCC-denied directories. Probe with ``stat()`` so + missing paths, non-directories, and permission-denied paths can be reported + separately. + """ + try: + st = candidate.stat() + except FileNotFoundError: + return f"{missing_label}: {candidate}" + except PermissionError as exc: + return ( + f"Cannot access path: {candidate}. The server process could not inspect " + f"this directory ({exc}). On macOS, grant Full Disk Access or Files and " + f"Folders permission to the Hermes/WebUI app or server process, then try again." + ) + except OSError as exc: + return f"Cannot access path: {candidate}. The server process could not inspect this path ({exc})." + if not stat.S_ISDIR(st.st_mode): + return f"Path is not a directory: {candidate}" + return None + + def _migrate_global_workspaces() -> list: """Read the legacy global workspaces.json, clean it, and return the result. @@ -517,10 +544,9 @@ def resolve_trusted_workspace(path: str | Path | None = None) -> Path: candidate = Path(path).expanduser().resolve() - if not candidate.exists(): - raise ValueError(f"Path does not exist: {candidate}") - if not candidate.is_dir(): - raise ValueError(f"Path is not a directory: {candidate}") + access_error = _workspace_access_error(candidate) + if access_error: + raise ValueError(access_error) # (A) Trusted if under the user's home directory — cross-platform via Path.home() # Must be checked before system roots to allow symlinks like /var/home. @@ -566,6 +592,25 @@ def resolve_trusted_workspace(path: str | Path | None = None) -> Path: +def _strip_surrounding_quotes(path: str) -> str: + """Strip a single pair of surrounding single or double quotes from a path string. + + macOS Finder's "Copy as Pathname" (Cmd+Option+C) returns paths wrapped in + single quotes, e.g. ``'/Users/x/Documents/foo'``. Other shells and OS file + managers do similar things with double quotes. Users routinely paste these + quoted strings into the Add Space input expecting them to "just work" — + the only reason they didn't was a missing strip. + + Only paired quotes are stripped (matching opener and closer). One-sided quotes + are preserved on the slim chance a path legitimately contains a literal quote + character. + """ + s = path.strip() + if len(s) >= 2 and s[0] == s[-1] and s[0] in ("'", '"'): + return s[1:-1] + return s + + def validate_workspace_to_add(path: str) -> Path: """Validate a path for *adding* to the workspace list (less restrictive than resolve_trusted_workspace). @@ -575,13 +620,17 @@ def validate_workspace_to_add(path: str) -> Path: The stricter ``resolve_trusted_workspace`` is used when *using* an existing workspace (file reads/writes) to prevent path traversal after the list is built. + + Surrounding quotes (single or double) are stripped before validation — + macOS Finder's "Copy as Pathname" wraps paths in single quotes by default, + and users routinely paste those into the Add Space input. """ + path = _strip_surrounding_quotes(path) candidate = Path(path).expanduser().resolve() - if not candidate.exists(): - raise ValueError(f"Path does not exist: {candidate}") - if not candidate.is_dir(): - raise ValueError(f"Path is not a directory: {candidate}") + access_error = _workspace_access_error(candidate) + if access_error: + raise ValueError(access_error) # Home directory is always trusted regardless of where it lives on disk # (e.g. /var/home/... on systemd-homed Fedora/RHEL). diff --git a/api/worktrees.py b/api/worktrees.py new file mode 100644 index 00000000..330a4385 --- /dev/null +++ b/api/worktrees.py @@ -0,0 +1,73 @@ +"""Helpers for WebUI-managed Hermes Agent git worktrees.""" + +from __future__ import annotations + +import subprocess +import time +from contextlib import redirect_stderr, redirect_stdout +from io import StringIO +from pathlib import Path + +import logging + +logger = logging.getLogger(__name__) + + +def find_git_repo_root(workspace: str | Path) -> Path: + """Return the enclosing git repo root for *workspace*. + + Use git itself instead of checking ``workspace/.git`` so nested workspaces + and linked git worktrees are both handled correctly. + """ + ws = Path(workspace).expanduser().resolve() + if not ws.is_dir(): + raise ValueError("Workspace path does not exist or is not a directory") + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + cwd=ws, + text=True, + capture_output=True, + timeout=5, + check=False, + ) + except (OSError, subprocess.TimeoutExpired) as exc: + raise ValueError("Workspace is not inside a git repository") from exc + if result.returncode != 0: + raise ValueError("Workspace is not inside a git repository") + root = result.stdout.strip() + if not root: + raise ValueError("Workspace is not inside a git repository") + return Path(root).expanduser().resolve() + + +def _setup_agent_worktree(repo_root: str) -> dict: + try: + import api.config # noqa: F401 # ensure Hermes Agent dir is on sys.path + from cli import _setup_worktree + except Exception as exc: + raise RuntimeError("Hermes Agent worktree helper is unavailable") from exc + output = StringIO() + with redirect_stdout(output), redirect_stderr(output): + info = _setup_worktree(repo_root) + emitted = output.getvalue().strip() + if emitted: + logger.debug("Hermes Agent worktree helper output: %s", emitted) + if not info: + raise RuntimeError("Hermes Agent failed to create a git worktree") + return info + + +def create_worktree_for_workspace(workspace: str | Path) -> dict: + repo_root = find_git_repo_root(workspace) + info = _setup_agent_worktree(str(repo_root)) + path = info.get("path") + branch = info.get("branch") + if not path or not branch: + raise RuntimeError("Hermes Agent returned incomplete worktree metadata") + return { + "path": str(Path(path).expanduser().resolve()), + "branch": str(branch), + "repo_root": str(Path(info.get("repo_root") or repo_root).expanduser().resolve()), + "created_at": time.time(), + } diff --git a/bootstrap.py b/bootstrap.py index 32393fd1..92d08245 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -90,6 +90,47 @@ def ensure_supported_platform() -> None: ) +def _agent_dir_from_hermes_cli() -> Path | None: + """Resolve the agent install root by inspecting the `hermes` CLI shebang. + + The Hermes Agent installer drops a `hermes` console-script in the user's + PATH whose shebang points at the agent's bundled venv: + + #!/path/to/hermes-agent/venv/bin/python3 + + Walking up the parents until we find a directory that contains + `run_agent.py` recovers the install root regardless of where the user + chose to clone the agent (e.g. ~/Projects/GitHub/hermes-agent), which + the hard-coded candidate list in :func:`discover_agent_dir` cannot. + + Last-resort only: this is invoked after every explicit candidate + (`HERMES_WEBUI_AGENT_DIR`, `$HERMES_HOME/hermes-agent`, etc.) has missed. + A stale clone in a known location still wins over the live `hermes` CLI + — that's intentional, since the candidate list is treated as + authoritative when present, and matches existing behavior. + """ + hermes_path = shutil.which("hermes") + if not hermes_path: + return None + try: + with open(hermes_path, "r", encoding="utf-8", errors="replace") as f: + first_line = f.readline().strip() + except OSError: + return None + if not first_line.startswith("#!"): + return None + interp_field = first_line[2:].strip().split(None, 1) + if not interp_field: + return None + interp = Path(interp_field[0]) + if not interp.is_absolute(): + return None + for parent in interp.parents: + if (parent / "run_agent.py").exists(): + return parent.resolve() + return None + + def discover_agent_dir() -> Path | None: home = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))).expanduser() candidates = [ @@ -105,7 +146,7 @@ def discover_agent_dir() -> Path | None: candidate = Path(raw).expanduser().resolve() if candidate.exists() and (candidate / "run_agent.py").exists(): return candidate - return None + return _agent_dir_from_hermes_cli() def discover_launcher_python(agent_dir: Path | None) -> str: @@ -179,7 +220,16 @@ def ensure_python_has_webui_deps(python_exe: str, agent_dir: Path | None = None) ) if not venv_python.exists(): info(f"Creating local virtualenv at {venv_dir}") - venv.EnvBuilder(with_pip=True).create(venv_dir) + # symlinks=True: some Python builds (notably mise/asdf shared-library + # installs on macOS) default venv to copy mode. The copied binary still + # uses @executable_path/../lib/libpython3.X.dylib for its load command, + # so the venv binary aborts with SIGABRT on first import because the + # dylib never gets copied into .venv/lib. Symlinking the interpreter + # keeps @executable_path resolving back to the original install. + # CPython's venv falls back to copy mode automatically when symlink + # creation fails (e.g. older Windows without SeCreateSymbolicLinkPrivilege), + # so this is safe to set unconditionally. + venv.EnvBuilder(with_pip=True, symlinks=True).create(venv_dir) info("Installing WebUI dependencies into local virtualenv") subprocess.run( diff --git a/ctl.sh b/ctl.sh new file mode 100755 index 00000000..c246131f --- /dev/null +++ b/ctl.sh @@ -0,0 +1,367 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HERMES_HOME="${HERMES_HOME:-${HOME}/.hermes}" +PID_FILE="${HERMES_WEBUI_PID_FILE:-${HERMES_HOME}/webui.pid}" +LOG_FILE="${HERMES_WEBUI_LOG_FILE:-${HERMES_HOME}/webui.log}" +STATE_FILE="${HERMES_WEBUI_CTL_STATE_FILE:-${HERMES_HOME}/webui.ctl.env}" +DEFAULT_STATE_DIR="${HERMES_WEBUI_STATE_DIR:-${HERMES_HOME}/webui}" + +usage() { + cat <<'EOF' +Usage: ./ctl.sh [args] + +Commands: + start [bootstrap args...] Start Hermes WebUI as a background daemon + stop Stop the daemon started by ctl.sh + restart [bootstrap args...] Stop, then start again + status Show daemon, host/port, log, and health status + logs [--lines N] [--follow|--no-follow] + Show the daemon log (defaults to tail -n 100 -f) +EOF +} + +ensure_home() { + mkdir -p "${HERMES_HOME}" "${DEFAULT_STATE_DIR}" +} + +_load_repo_dotenv_preserving_env() { + local env_file="${REPO_ROOT}/.env" + [[ -f "${env_file}" ]] || return 0 + + local -a preserved=() + local line key value + while IFS= read -r line || [[ -n "${line}" ]]; do + line="${line#${line%%[![:space:]]*}}" + [[ -z "${line}" || "${line}" == \#* || "${line}" != *=* ]] && continue + key="${line%%=*}" + key="${key#export }" + key="${key//[[:space:]]/}" + [[ "${key}" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]] || continue + if [[ -n "${!key+x}" ]]; then + value="${!key}" + preserved+=("${key}=${value}") + fi + done < "${env_file}" + + set -a + # shellcheck source=/dev/null + source "${env_file}" + set +a + + local assignment + for assignment in "${preserved[@]}"; do + export "${assignment}" + done +} + +_find_python() { + if [[ -n "${HERMES_WEBUI_PYTHON:-}" ]]; then + printf '%s\n' "${HERMES_WEBUI_PYTHON}" + elif command -v python3 >/dev/null 2>&1; then + command -v python3 + elif command -v python >/dev/null 2>&1; then + command -v python + else + echo "[ctl] Python 3 is required to run bootstrap.py" >&2 + return 1 + fi +} + +_parse_launch_binding() { + CTL_HOST="${HERMES_WEBUI_HOST:-127.0.0.1}" + CTL_PORT="${HERMES_WEBUI_PORT:-8787}" + local arg next_is_host=0 saw_port=0 + for arg in "$@"; do + if (( next_is_host )); then + CTL_HOST="${arg}" + next_is_host=0 + continue + fi + case "${arg}" in + --host) + next_is_host=1 + ;; + --host=*) + CTL_HOST="${arg#--host=}" + ;; + --*) + ;; + *) + if (( ! saw_port )) && [[ "${arg}" =~ ^[0-9]+$ ]]; then + CTL_PORT="${arg}" + saw_port=1 + fi + ;; + esac + done +} + +_build_bootstrap_args() { + CTL_BOOTSTRAP_ARGS=() + local arg next_is_host=0 saw_port=0 + for arg in "$@"; do + if (( next_is_host )); then + next_is_host=0 + continue + fi + case "${arg}" in + --host) + next_is_host=1 + ;; + --host=*) + ;; + --*) + CTL_BOOTSTRAP_ARGS+=("${arg}") + ;; + *) + if (( ! saw_port )) && [[ "${arg}" =~ ^[0-9]+$ ]]; then + saw_port=1 + else + CTL_BOOTSTRAP_ARGS+=("${arg}") + fi + ;; + esac + done +} + +_write_state() { + local pid="$1" host="$2" port="$3" + local state_dir="${HERMES_WEBUI_STATE_DIR:-${DEFAULT_STATE_DIR}}" + { + printf 'PID=%q\n' "${pid}" + printf 'REPO_ROOT=%q\n' "${REPO_ROOT}" + printf 'HOST=%q\n' "${host}" + printf 'PORT=%q\n' "${port}" + printf 'LOG_FILE=%q\n' "${LOG_FILE}" + printf 'STATE_DIR=%q\n' "${state_dir}" + printf 'STARTED_AT=%q\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + } > "${STATE_FILE}" +} + +_load_state_if_present() { + if [[ -f "${STATE_FILE}" ]]; then + # shellcheck source=/dev/null + source "${STATE_FILE}" + fi +} + +_pid_from_file() { + [[ -f "${PID_FILE}" ]] || return 1 + local pid + pid="$(tr -d '[:space:]' < "${PID_FILE}")" + [[ "${pid}" =~ ^[0-9]+$ ]] || return 1 + printf '%s\n' "${pid}" +} + +_is_alive() { + local pid="$1" + kill -0 "${pid}" >/dev/null 2>&1 +} + +_proc_args() { + local pid="$1" + ps -p "${pid}" -o args= 2>/dev/null || true +} + +_is_owned_webui_pid() { + local pid="$1" args state_repo="" + [[ -f "${STATE_FILE}" ]] || return 1 + _load_state_if_present + state_repo="${REPO_ROOT:-}" + [[ "${state_repo}" == "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ]] || return 1 + args="$(_proc_args "${pid}")" + [[ -n "${args}" ]] || return 1 + [[ "${args}" == *"${state_repo}/bootstrap.py"* || "${args}" == *"${state_repo}/server.py"* || "${args}" == *"${state_repo}/start.sh"* ]] +} + +_current_pid() { + local pid + pid="$(_pid_from_file)" || return 1 + if _is_alive "${pid}" && _is_owned_webui_pid "${pid}"; then + printf '%s\n' "${pid}" + return 0 + fi + return 1 +} + +_clear_stale_pid() { + if [[ -f "${PID_FILE}" ]]; then + rm -f "${PID_FILE}" "${STATE_FILE}" + echo "[ctl] Removed stale PID file: ${PID_FILE}" + fi +} + +start_cmd() { + ensure_home + _load_repo_dotenv_preserving_env + export HERMES_WEBUI_STATE_DIR="${HERMES_WEBUI_STATE_DIR:-${DEFAULT_STATE_DIR}}" + mkdir -p "${HERMES_WEBUI_STATE_DIR}" + _parse_launch_binding "$@" + _build_bootstrap_args "$@" + export HERMES_WEBUI_HOST="${CTL_HOST}" + export HERMES_WEBUI_PORT="${CTL_PORT}" + + local existing_pid + if existing_pid="$(_current_pid 2>/dev/null)"; then + echo "[ctl] Hermes WebUI is already running (PID ${existing_pid})" + return 0 + fi + _clear_stale_pid >/dev/null 2>&1 || true + + local python_exe pid + python_exe="$(_find_python)" + : >> "${LOG_FILE}" + ( + cd "${REPO_ROOT}" + exec "${python_exe}" "${REPO_ROOT}/bootstrap.py" --no-browser --foreground --host "${CTL_HOST}" "${CTL_PORT}" ${CTL_BOOTSTRAP_ARGS[@]+"${CTL_BOOTSTRAP_ARGS[@]}"} + ) >> "${LOG_FILE}" 2>&1 & + pid=$! + + printf '%s\n' "${pid}" > "${PID_FILE}" + _write_state "${pid}" "${CTL_HOST}" "${CTL_PORT}" + sleep 0.15 + if ! _is_alive "${pid}"; then + echo "[ctl] Hermes WebUI failed to stay running. Log: ${LOG_FILE}" >&2 + rm -f "${PID_FILE}" "${STATE_FILE}" + return 1 + fi + echo "[ctl] Started Hermes WebUI (PID ${pid})" + echo "[ctl] Bound: ${CTL_HOST}:${CTL_PORT}" + echo "[ctl] Log: ${LOG_FILE}" +} + +stop_cmd() { + ensure_home + local pid + if ! pid="$(_pid_from_file 2>/dev/null)"; then + echo "[ctl] Hermes WebUI is stopped" + rm -f "${PID_FILE}" "${STATE_FILE}" + return 0 + fi + + if ! _is_alive "${pid}" || ! _is_owned_webui_pid "${pid}"; then + _clear_stale_pid + return 0 + fi + + echo "[ctl] Stopping Hermes WebUI (PID ${pid})" + kill "${pid}" >/dev/null 2>&1 || true + local i + for i in {1..50}; do + if ! _is_alive "${pid}"; then + rm -f "${PID_FILE}" "${STATE_FILE}" + echo "[ctl] Stopped" + return 0 + fi + sleep 0.1 + done + + echo "[ctl] Process did not exit after SIGTERM; sending SIGKILL" >&2 + kill -KILL "${pid}" >/dev/null 2>&1 || true + rm -f "${PID_FILE}" "${STATE_FILE}" +} + +_health_line() { + local host="$1" port="$2" url result + url="http://${host}:${port}/health" + if command -v curl >/dev/null 2>&1; then + if result="$(curl -fsS --max-time 2 "${url}" 2>/dev/null)"; then + if command -v python3 >/dev/null 2>&1; then + printf '%s' "${result}" | python3 -c 'import json,sys +try: + data=json.load(sys.stdin) + sessions=data.get("sessions", data.get("session_count", "?")) + active=data.get("active_streams", "?") + status=data.get("status", "ok") + print(f"ok ({sessions} sessions, {active} active streams)" if status == "ok" else status) +except Exception: + print("ok")' + else + echo "ok" + fi + else + echo "unreachable (${url})" + fi + else + echo "unknown (curl not found; ${url})" + fi +} + +status_cmd() { + ensure_home + _load_state_if_present + local host="${HOST:-${HERMES_WEBUI_HOST:-127.0.0.1}}" + local port="${PORT:-${HERMES_WEBUI_PORT:-8787}}" + local log_path="${LOG_FILE}" + local pid uptime health + + if pid="$(_current_pid 2>/dev/null)"; then + uptime="$(ps -p "${pid}" -o etime= 2>/dev/null | sed 's/^ *//' || true)" + health="$(_health_line "${host}" "${port}")" + echo "● hermes-webui — running" + echo " PID: ${pid}" + echo " Uptime: ${uptime:-unknown}" + echo " Bound: ${host}:${port}" + echo " Log: ${log_path}" + echo " Health: ${health}" + else + [[ -f "${PID_FILE}" ]] && _clear_stale_pid >/dev/null 2>&1 || true + echo "● hermes-webui — stopped" + echo " PID: -" + echo " Bound: ${host}:${port}" + echo " Log: ${log_path}" + echo " Health: not checked" + fi +} + +logs_cmd() { + ensure_home + local lines=100 follow=1 + while [[ $# -gt 0 ]]; do + case "$1" in + --lines) + shift + lines="${1:-}" + [[ "${lines}" =~ ^[0-9]+$ ]] || { echo "[ctl] --lines requires a number" >&2; return 2; } + ;; + --lines=*) + lines="${1#--lines=}" + [[ "${lines}" =~ ^[0-9]+$ ]] || { echo "[ctl] --lines requires a number" >&2; return 2; } + ;; + --follow|-f) + follow=1 + ;; + --no-follow) + follow=0 + ;; + *) + echo "[ctl] Unknown logs option: $1" >&2 + return 2 + ;; + esac + shift + done + touch "${LOG_FILE}" + if (( follow )); then + tail -n "${lines}" -f "${LOG_FILE}" + else + tail -n "${lines}" "${LOG_FILE}" + fi +} + +cmd="${1:-}" +if [[ $# -gt 0 ]]; then + shift +fi + +case "${cmd}" in + start) start_cmd "$@" ;; + stop) stop_cmd ;; + restart) stop_cmd; start_cmd "$@" ;; + status) status_cmd ;; + logs) logs_cmd "$@" ;; + -h|--help|help|"") usage ;; + *) echo "[ctl] Unknown command: ${cmd}" >&2; usage >&2; exit 2 ;; +esac diff --git a/docker_init.bash b/docker_init.bash index 88f21456..fbe71780 100644 --- a/docker_init.bash +++ b/docker_init.bash @@ -36,25 +36,25 @@ script_fullname=$0 echo " - script_fullname: ${script_fullname}" ignore_value="VALUE_TO_IGNORE" -# everyone can read our files by default -umask 0022 +# Keep init scratch files private to the container user that owns them. +umask 0077 -# Write a world-writeable file (preferably inside /tmp -- ie within the container) -write_worldtmpfile() { +write_privtmpfile() { tmpfile=$1 - if [ -z "${tmpfile}" ]; then error_exit "write_worldfile: missing argument"; fi - if [ -f $tmpfile ]; then rm -f $tmpfile; fi - echo -n $2 > ${tmpfile} - chmod 777 ${tmpfile} + if [ -z "${tmpfile}" ]; then error_exit "write_privtmpfile: missing argument"; fi + if [ -f "$tmpfile" ]; then rm -f "$tmpfile"; fi + printf '%s' "$2" > "$tmpfile" + chmod 600 "$tmpfile" } itdir=/tmp/hermeswebui_init -if [ ! -d $itdir ]; then mkdir $itdir; chmod 777 $itdir; fi -if [ ! -d $itdir ]; then error_exit "Failed to create $itdir"; fi +if [ ! -d "$itdir" ]; then mkdir -p "$itdir"; fi +chmod 700 "$itdir" || error_exit "Failed to secure $itdir" +if [ ! -d "$itdir" ]; then error_exit "Failed to create $itdir"; fi # Set user and group id # logic: if not set and file exists, use file value, else use default. Create file for persistence when the container is re-run -# reasoning: needed when using docker compose as the file will exist in the stopped container, and changing the value from environment variables or configuration file must be propagated from hermeswebuitoo to hermeswebuitoo transition (those values are the only ones loaded before the environment variables dump file are loaded) +# reasoning: needed when using docker compose as the file will exist in the stopped container, and changing the value from environment variables or configuration file must be propagated from the root init phase to the hermeswebui runtime phase it=$itdir/hermeswebui_user_uid if [ -z "${WANTED_UID+x}" ]; then if [ -f $it ]; then WANTED_UID=$(cat $it); fi @@ -88,7 +88,7 @@ if [ -z "${WANTED_UID+x}" ] || [ "${WANTED_UID}" = "1024" ]; then fi fi WANTED_UID=${WANTED_UID:-1024} -write_worldtmpfile $it "$WANTED_UID" +write_privtmpfile $it "$WANTED_UID" echo "-- WANTED_UID: \"${WANTED_UID}\"" it=$itdir/hermeswebui_user_gid @@ -120,7 +120,7 @@ if [ -z "${WANTED_GID+x}" ] || [ "${WANTED_GID}" = "1024" ]; then fi fi WANTED_GID=${WANTED_GID:-1024} -write_worldtmpfile $it "$WANTED_GID" +write_privtmpfile $it "$WANTED_GID" echo "-- WANTED_GID: \"${WANTED_GID}\"" echo "== Most Environment variables set" @@ -180,27 +180,78 @@ load_env() { fi } -# hermeswebuitoo is a specfiic user not existing by default on ubuntu, we can check its whomai -if [ "A${whoami}" == "Ahermeswebuitoo" ]; then - echo "-- Running as hermeswebuitoo, will switch hermeswebui to the desired UID/GID" - # The script is started as hermeswebuitoo -- UID/GID 1025/1025 +# The production image does not ship sudo. The entrypoint starts as root only +# long enough to align the hermeswebui UID/GID with mounted volumes, prepare +# root-owned paths, and then drop privileges for the server process. +if [ "A${whoami}" == "Aroot" ]; then + echo "-- Running as root for one-time container init; will switch to hermeswebui" # We are altering the UID/GID of the hermeswebui user to the desired ones and restarting as that user - # using usermod for the already create hermeswebui user, knowing it is not already in use + # using usermod for the already created hermeswebui user, knowing it is not already in use # per usermod manual: "You must make certain that the named user is not executing any processes when this command is being executed" - sudo groupmod -o -g ${WANTED_GID} hermeswebui || error_exit "Failed to set GID of hermeswebui user" - sudo usermod -o -u ${WANTED_UID} hermeswebui || error_exit "Failed to set UID of hermeswebui user" - sudo chown -R ${WANTED_UID}:${WANTED_GID} /home/hermeswebui || error_exit "Failed to set owner of /home/hermeswebui" - save_env /tmp/hermeswebuitoo_env.txt + # Guard for read-only root filesystem (podman with read_only=true, issue #1470). + _readonly_root=false + if ! sh -c 'test -w /etc/group && test -w /etc/passwd' 2>/dev/null; then + _readonly_root=true + echo " !! Detected read-only root filesystem — /etc/group or /etc/passwd is not writable" + fi + if [ "A${_readonly_root}" == "Atrue" ]; then + _current_hermeswebui_gid=$(id -g hermeswebui 2>/dev/null || echo "") + _current_hermeswebui_uid=$(id -u hermeswebui 2>/dev/null || echo "") + if [ "A${_current_hermeswebui_gid}" == "A${WANTED_GID}" ] && [ "A${_current_hermeswebui_uid}" == "A${WANTED_UID}" ]; then + echo " -- Skipping groupmod/usermod — hermeswebui already has UID ${WANTED_UID} GID ${WANTED_GID} and root fs is read-only" + else + error_exit "Cannot modify /etc/group or /etc/passwd (read-only root fs). Set UID=${_current_hermeswebui_uid} and GID=${_current_hermeswebui_gid} to match, or run without read_only=true. See issue #1470." + fi + else + groupmod -o -g "${WANTED_GID}" hermeswebui || error_exit "Failed to set GID of hermeswebui user" + usermod -o -u "${WANTED_UID}" hermeswebui || error_exit "Failed to set UID of hermeswebui user" + fi + + chown -R "${WANTED_UID}:${WANTED_GID}" /home/hermeswebui || error_exit "Failed to set owner of /home/hermeswebui" + + echo ""; echo "-- Preparing /app for the hermeswebui runtime user" + mkdir -p /app || error_exit "Failed to create /app directory" + chown hermeswebui:hermeswebui /app || error_exit "Failed to set owner of /app to hermeswebui user" + rsync -av --chown=hermeswebui:hermeswebui /apptoo/ /app/ || error_exit "Failed to sync /apptoo to /app with correct ownership" + + if [ -z "${HERMES_WEBUI_DEFAULT_WORKSPACE+x}" ]; then export HERMES_WEBUI_DEFAULT_WORKSPACE="/workspace"; fi + if [ ! -d "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]; then + mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE" || error_exit "Failed to create default workspace at $HERMES_WEBUI_DEFAULT_WORKSPACE" + fi + if [ ! -d "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]; then error_exit "HERMES_WEBUI_DEFAULT_WORKSPACE directory does not exist at $HERMES_WEBUI_DEFAULT_WORKSPACE"; fi + chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE" 2>/dev/null || echo "!! WARNING: Could not chown $HERMES_WEBUI_DEFAULT_WORKSPACE (continuing)" + + export UV_CACHE_DIR=${UV_CACHE_DIR:-/uv_cache} + mkdir -p "${UV_CACHE_DIR}" || error_exit "Failed to create ${UV_CACHE_DIR} directory" + chown hermeswebui:hermeswebui "${UV_CACHE_DIR}" || error_exit "Failed to set owner of ${UV_CACHE_DIR} to hermeswebui user" + + chown -R "${WANTED_UID}:${WANTED_GID}" "$itdir" || error_exit "Failed to set owner of $itdir" + # Issue #2010 — Railway / user-namespaced runtimes: in-container UID 0 may map + # to a host UID outside the writable subuid range, so /tmp writes fail despite + # id -u == 0. Probe writability and fall back through $itdir → /app. + ENV_FILE="/tmp/hermeswebui_root_env.txt" + if ! ( : > "$ENV_FILE" ) 2>/dev/null; then + ENV_FILE="${itdir:-/tmp/hermeswebui_init}/hermeswebui_root_env.txt" + mkdir -p "$(dirname "$ENV_FILE")" 2>/dev/null + if ! ( : > "$ENV_FILE" ) 2>/dev/null; then + ENV_FILE="/app/.hermeswebui_root_env" + fi + echo " !! /tmp not writable by root — falling back to $ENV_FILE (user-namespaced runtime?)" + fi + save_env "$ENV_FILE" + chown "${WANTED_UID}:${WANTED_GID}" "$ENV_FILE" || error_exit "Failed to set owner of $ENV_FILE" + chmod 600 "$ENV_FILE" || error_exit "Failed to secure $ENV_FILE" + export _HW_ROOT_ENV_PATH="$ENV_FILE" + # restart the script as hermeswebui set with the correct UID/GID this time echo "-- Restarting as hermeswebui user with UID ${WANTED_UID} GID ${WANTED_GID}" - sudo su hermeswebui $script_fullname || error_exit "subscript failed" - ok_exit "Clean exit" + exec su -s /bin/bash -c "exec \"${script_fullname}\"" hermeswebui || error_exit "subscript failed" fi -# If we are here, the script is started as another user than hermeswebuitoo -# because the whoami value for the hermeswebui user can be any existing user, we can not check against it -# instead we check if the UID/GID are the expected ones +# If we are here, the script is started as an unprivileged runtime user. +# Because the whoami value for the hermeswebui user can be any existing user, we cannot check against it; +# instead we check if the UID/GID are the expected ones. if [ "$WANTED_GID" != "$new_gid" ]; then error_exit "hermeswebui MUST be running as UID ${WANTED_UID} GID ${WANTED_GID}, current UID ${new_uid} GID ${new_gid}"; fi if [ "$WANTED_UID" != "$new_uid" ]; then error_exit "hermeswebui MUST be running as UID ${WANTED_UID} GID ${WANTED_GID}, current UID ${new_uid} GID ${new_gid}"; fi @@ -209,18 +260,16 @@ if [ "$WANTED_UID" != "$new_uid" ]; then error_exit "hermeswebui MUST be running # We are therefore running as hermeswebui echo ""; echo "== Running as hermeswebui" -# Load environment variables one by one if they do not exist from /tmp/hermeswebuitoo_env.txt -it=/tmp/hermeswebuitoo_env.txt -if [ -f $it ]; then - echo "-- Loading not already set environment variables from $it" - load_env $it true +# Load environment variables one by one if they do not exist from the root init phase +tmp_root_env="${_HW_ROOT_ENV_PATH:-/tmp/hermeswebui_root_env.txt}" +if [ -f $tmp_root_env ]; then + echo "-- Loading not already set environment variables from $tmp_root_env" + load_env $tmp_root_env true fi ## -echo ""; echo "-- Making sure /app is owned by the hermeswebui user to avoid permission issues when running the server " -sudo mkdir -p /app || error_exit "Failed to create /app directory" -sudo chown hermeswebui:hermeswebui /app || error_exit "Failed to set owner of /app to hermeswebui user" -sudo rsync -av --chown=hermeswebui:hermeswebui /apptoo/ /app/ || error_exit "Failed to sync /apptoo to /app with correct ownership" +echo ""; echo "-- Verifying /app is writable by the hermeswebui runtime user" +if [ ! -d /app ]; then error_exit "/app directory does not exist"; fi it=/app/.testfile; touch $it || error_exit "Failed to verify /app directory" rm -f $it || error_exit "Failed to delete test file in /app" @@ -239,19 +288,18 @@ rm -f $it || error_exit "Failed to delete test file in $HERMES_WEBUI_STATE_DIR" echo ""; echo "-- HERMES_WEBUI_DEFAULT_WORKSPACE: Default workspace directory shown on first launch" if [ -z "${HERMES_WEBUI_DEFAULT_WORKSPACE+x}" ]; then echo "HERMES_WEBUI_DEFAULT_WORKSPACE not set, setting to /workspace"; export HERMES_WEBUI_DEFAULT_WORKSPACE="/workspace"; fi; echo "-- HERMES_WEBUI_DEFAULT_WORKSPACE: $HERMES_WEBUI_DEFAULT_WORKSPACE" -# Use sudo for mkdir — Docker may auto-create bind-mount directories as root (#357). -# Skip mkdir if the directory already exists (e.g. a read-only mount — #670). +# The root init phase creates/chowns missing bind-mount directories before +# dropping privileges. After that, the runtime user only verifies access. if [ ! -d "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]; then - sudo mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE" || error_exit "Failed to create default workspace at $HERMES_WEBUI_DEFAULT_WORKSPACE" + mkdir -p "$HERMES_WEBUI_DEFAULT_WORKSPACE" || error_exit "Failed to create default workspace at $HERMES_WEBUI_DEFAULT_WORKSPACE" fi if [ ! -d "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]; then error_exit "HERMES_WEBUI_DEFAULT_WORKSPACE directory does not exist at $HERMES_WEBUI_DEFAULT_WORKSPACE"; fi -# Only chown and write-test if the workspace is writable. Read-only bind-mounts -# (:ro) are valid — the workspace is used for browsing, not writing by the server. +# Only write-test if the workspace is writable. Read-only bind-mounts (:ro) +# are valid — the workspace is used for browsing, not writing by the server. if [ -w "$HERMES_WEBUI_DEFAULT_WORKSPACE" ]; then - sudo chown hermeswebui:hermeswebui "$HERMES_WEBUI_DEFAULT_WORKSPACE" || echo "!! WARNING: Could not chown $HERMES_WEBUI_DEFAULT_WORKSPACE (continuing)" it="$HERMES_WEBUI_DEFAULT_WORKSPACE/.testfile"; touch $it && rm -f $it || echo "!! WARNING: Could not write to $HERMES_WEBUI_DEFAULT_WORKSPACE (continuing)" else - echo "-- HERMES_WEBUI_DEFAULT_WORKSPACE is read-only — skipping chown/write check (read-only workspace is supported)" + echo "-- HERMES_WEBUI_DEFAULT_WORKSPACE is read-only — skipping write check (read-only workspace is supported)" fi echo ""; echo "===================" @@ -266,9 +314,9 @@ else fi export UV_PROJECT_ENVIRONMENT=venv -export UV_CACHE_DIR=/uv_cache -sudo mkdir -p ${UV_CACHE_DIR} || error_exit "Failed to create /uv_cache directory" -sudo chown hermeswebui:hermeswebui ${UV_CACHE_DIR} || error_exit "Failed to set owner of ${UV_CACHE_DIR} to hermeswebui user" +export UV_CACHE_DIR=${UV_CACHE_DIR:-/uv_cache} +mkdir -p "${UV_CACHE_DIR}" || error_exit "Failed to create ${UV_CACHE_DIR} directory" +test -w "${UV_CACHE_DIR}" || error_exit "${UV_CACHE_DIR} is not writable by hermeswebui" cd /app if [ -f /app/venv/bin/python3 ]; then diff --git a/docs/docker.md b/docs/docker.md index 04c1bc3e..ada305b3 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -13,6 +13,24 @@ This is the comprehensive Docker reference. For a 5-minute quickstart, see the [ If something stops working, **start with the single-container setup** — it's the simplest path and fixes most permission/UID/path-mismatch issues by construction. +## Production image security model + +The production Docker image is hardened for the normal single-tenant container threat model: +Hermes WebUI assumes one operator controls the container, mounted Hermes home, and workspace. +The image does **not** install `sudo`, does not add runtime users to a sudo group, and does not +grant `NOPASSWD` escalation. If an agent/tool process gains a shell as `hermeswebui`, it should +not be able to become root with a passwordless sudo command. + +The entrypoint still starts as `root` for a narrow init phase because Docker bind mounts often need +UID/GID alignment and ownership preparation before the app can read `~/.hermes`, `/workspace`, +`/app`, and `/uv_cache`. After that setup, `docker_init.bash` re-execs itself as the unprivileged +`hermeswebui` user and starts the server there. Init scratch files under `/tmp/hermeswebui_init` +are owner-only (`0700` directory, `0600` files), not world-writable. + +For multi-tenant or hostile-container environments, rebuild with your own runtime user, mount policy, +and supervisor assumptions. Development images that need package-manager convenience should add +those tools in a dev-only Dockerfile instead of reintroducing passwordless sudo to production. + ## 5-minute quickstart (single container) ```bash diff --git a/docs/onboarding.md b/docs/onboarding.md new file mode 100644 index 00000000..f6409f96 --- /dev/null +++ b/docs/onboarding.md @@ -0,0 +1,181 @@ +# First-run onboarding guide + +This guide explains what happens the first time Hermes WebUI starts, which +setup path to choose, and how to recover when the wizard cannot finish. + +The short version: run the bootstrap, open the WebUI, choose a provider, choose +a workspace, optionally set a password, then start a chat. If you are using a +local model server from Docker, pay special attention to the Base URL section +below. + +## Before you start + +Hermes WebUI is only the browser interface. The actual agent runtime, memory, +skills, config, cron jobs, and provider credentials belong to Hermes Agent. + +The bootstrap supports Linux, macOS, and WSL2. Native Windows is not supported +by the bootstrap yet. A community native Windows setup is being tracked in +[#1952](https://github.com/nesquena/hermes-webui/issues/1952), including: + +- [Native Windows guide](https://github.com/markwang2658/hermes-windows-native-guide) +- [Native Windows setup scripts](https://github.com/markwang2658/hermes-windows-native) + +For Windows users who want the supported path today, use WSL2 and see +[Windows / WSL auto-start](wsl-autostart.md). + +## Install path choices + +| Path | Use it when | Notes | +|---|---|---| +| Local bootstrap | You run WebUI directly on Linux, macOS, or WSL2 | Best for a personal server, Mac mini, VPS, or homelab host. | +| Docker single-container | You want the simplest container setup | Recommended first Docker path. WebUI runs the agent in-process. | +| Docker two-container | You already run the agent gateway separately | More isolated, but tools launched from WebUI run in the WebUI container. | +| Docker three-container | You want agent gateway plus dashboard plus WebUI | Same caveats as two-container, plus the dashboard service. | +| Native Windows community path | You are intentionally testing unsupported native Windows | Community-maintained for now, not the official bootstrap path. | + +If a Docker install gets confusing, start again with the single-container setup. +It avoids most UID/GID, source-volume, and tool-location surprises. See +[Docker setup guide](docker.md) for the full container reference. + +## Re-running onboarding safely + +Do not delete `~/.hermes` just to see the wizard again. That directory can hold +your real Hermes config, credentials, memory, skills, profiles, sessions, and +cron state. + +For a clean local trial, use an isolated Hermes home and WebUI state directory: + +```bash +mkdir -p ~/hermes-onboarding-test +HERMES_HOME=~/hermes-onboarding-test/.hermes \ +HERMES_WEBUI_STATE_DIR=~/hermes-onboarding-test/webui \ +HERMES_WEBUI_PORT=8789 \ +python3 bootstrap.py +``` + +Then open `http://127.0.0.1:8789`. + +If your repo has a `.env` file, remember that the bootstrap loads it. Remove or +adjust any `HERMES_HOME`, `HERMES_WEBUI_STATE_DIR`, or `HERMES_WEBUI_PORT` +entries there before using the isolated command above. + +For managed hosting or fully preconfigured images, set +`HERMES_WEBUI_SKIP_ONBOARDING=1` to bypass the wizard. + +## What the wizard checks + +The first screen reports the runtime state WebUI can see: + +- Hermes Agent importability: whether WebUI can import and run `AIAgent`. +- Provider status: whether `config.yaml` and credential state are enough for a + chat request. +- Password status: whether WebUI password protection is enabled. +- Config paths: the active `config.yaml` and `.env` locations for this profile. + +If the agent check fails, use [Troubleshooting](troubleshooting.md), especially +the `AIAgent not available` section. If provider setup is incomplete, continue +through the wizard or run `hermes model` in the same machine environment that +will run WebUI. + +## Choosing a provider + +The setup step groups providers by how much information they usually need. + +| Group | Examples | What you usually enter | +|---|---|---| +| Easy start | OpenRouter, Anthropic, OpenAI | API key and model. | +| Open / self-hosted | Ollama, LM Studio, custom OpenAI-compatible | Base URL, model, optional API key. | +| Specialized | Gemini, DeepSeek, Xiaomi MiMo, Z.AI / GLM, NVIDIA NIM, Mistral, xAI | Provider API key and default model. | + +For API-key providers, the wizard writes the key to the active Hermes `.env` +file and writes the default model/provider to `config.yaml`. + +For local providers, the API key field can be blank when the server is keyless. +Most LM Studio, Ollama, vLLM, llama-server, and TabbyAPI installs run this way. +Use **Test connection** to verify the Base URL and populate the model list +before continuing. + +Advanced provider flows such as Nous Portal and GitHub Copilot are still +terminal-first. OpenAI Codex and Anthropic Claude Code OAuth can be started in +the onboarding flow when your Hermes config selects the corresponding provider. +If the wizard points you back to `hermes model`, use that CLI flow first, then +refresh WebUI. + +## Base URL rules for local model servers + +For self-hosted providers, the Base URL should point to the OpenAI-compatible +API root. Common examples: + +| Server | Typical Base URL | +|---|---| +| LM Studio on the same non-Docker host | `http://127.0.0.1:1234/v1` | +| Ollama on the same non-Docker host | `http://127.0.0.1:11434/v1` | +| LM Studio from Docker Desktop | `http://host.docker.internal:1234/v1` | +| Ollama from Docker Desktop | `http://host.docker.internal:11434/v1` | +| Local server on another LAN machine | `http://:/v1` | + +Inside Docker, `localhost` means the WebUI container itself, not your Mac, +Windows host, or another machine on your LAN. If LM Studio or Ollama is running +outside the container, use `host.docker.internal` on Docker Desktop or the +server's LAN IP address. + +The wizard probes `/models` before saving. A successful probe fills +the model dropdown. A failed probe blocks the setup step and shows an inline +error such as DNS failure, connection refused, timeout, HTTP error, or +unexpected response shape. + +## Workspace step + +The workspace is the filesystem location Hermes should use for new sessions. +It can be a source checkout, a project directory, or a general workspace folder. + +In Docker, the default browsable path is `/workspace`, which maps to the host +directory mounted by the compose file. If the workspace appears empty, check the +Docker UID/GID and mount guidance in [Docker setup guide](docker.md). + +## Password step + +Password protection is optional for localhost-only installs. Enable it if you +expose WebUI outside `127.0.0.1`, behind a reverse proxy, or on a LAN. + +The password is stored through the normal WebUI settings path and hashed +server-side. You can change it later from Settings. + +## What gets written + +The wizard uses the same files and APIs as the normal app: + +- Active Hermes `config.yaml`: provider, default model, and Base URL when + relevant. +- Active Hermes `.env`: provider API keys when you entered one. +- WebUI `settings.json`: onboarding completion, workspace, password state, and + other WebUI preferences. + +State normally lives outside the repository. By default: + +- Hermes Agent state: `~/.hermes` +- WebUI state: `~/.hermes/webui` + +Override these with `HERMES_HOME` and `HERMES_WEBUI_STATE_DIR` when you need an +isolated test install. + +## When to file an issue + +File an issue when the diagnostics point to WebUI rather than local +configuration. Include: + +1. Install path: local bootstrap, Docker single-container, Docker + two-container, Docker three-container, WSL2, or community native Windows. +2. Output from `/health`, or the startup banner if the server never starts. +3. The provider selected in onboarding and the Base URL shape, with secrets + redacted. +4. For Docker provider problems, the result of probing from inside the + container, for example: + +```bash +docker exec hermes-webui sh -c 'curl -sS -w "\nHTTP %{http_code}\n" http://host.docker.internal:1234/v1/models | head -50' +``` + +5. Any inline wizard error text and relevant logs. + +Never paste API keys, OAuth tokens, or full `.env` contents into an issue. diff --git a/docs/pr-media/1257/llm-wiki-status.png b/docs/pr-media/1257/llm-wiki-status.png new file mode 100644 index 00000000..b488310e Binary files /dev/null and b/docs/pr-media/1257/llm-wiki-status.png differ diff --git a/docs/pr-media/1321/update-network-error.png b/docs/pr-media/1321/update-network-error.png new file mode 100644 index 00000000..7c438a14 Binary files /dev/null and b/docs/pr-media/1321/update-network-error.png differ diff --git a/docs/pr-media/1362/claude-code-onboarding.png b/docs/pr-media/1362/claude-code-onboarding.png new file mode 100644 index 00000000..ef2feef7 Binary files /dev/null and b/docs/pr-media/1362/claude-code-onboarding.png differ diff --git a/docs/pr-media/1362/codex-oauth-onboarding.png b/docs/pr-media/1362/codex-oauth-onboarding.png new file mode 100644 index 00000000..9d184151 Binary files /dev/null and b/docs/pr-media/1362/codex-oauth-onboarding.png differ diff --git a/docs/pr-media/1406/eager-config-app-shell.png b/docs/pr-media/1406/eager-config-app-shell.png new file mode 100644 index 00000000..871741ea Binary files /dev/null and b/docs/pr-media/1406/eager-config-app-shell.png differ diff --git a/docs/pr-media/1451/raw-pre-render-validation.png b/docs/pr-media/1451/raw-pre-render-validation.png new file mode 100644 index 00000000..d27ddc40 Binary files /dev/null and b/docs/pr-media/1451/raw-pre-render-validation.png differ diff --git a/docs/pr-media/1455/logs-tab-mvp.png b/docs/pr-media/1455/logs-tab-mvp.png new file mode 100644 index 00000000..ef87f91b Binary files /dev/null and b/docs/pr-media/1455/logs-tab-mvp.png differ diff --git a/docs/pr-media/1456/insights-before.png b/docs/pr-media/1456/insights-before.png new file mode 100644 index 00000000..6ed21217 Binary files /dev/null and b/docs/pr-media/1456/insights-before.png differ diff --git a/docs/pr-media/1456/insights-daily-tokens-models.png b/docs/pr-media/1456/insights-daily-tokens-models.png new file mode 100644 index 00000000..10533275 Binary files /dev/null and b/docs/pr-media/1456/insights-daily-tokens-models.png differ diff --git a/docs/pr-media/1459/dashboard-nav-link.png b/docs/pr-media/1459/dashboard-nav-link.png new file mode 100644 index 00000000..85b68084 Binary files /dev/null and b/docs/pr-media/1459/dashboard-nav-link.png differ diff --git a/docs/pr-media/1640/tps-toggle-off-default.png b/docs/pr-media/1640/tps-toggle-off-default.png new file mode 100644 index 00000000..ffefd438 Binary files /dev/null and b/docs/pr-media/1640/tps-toggle-off-default.png differ diff --git a/docs/pr-media/1640/tps-toggle-on-hot-apply.png b/docs/pr-media/1640/tps-toggle-on-hot-apply.png new file mode 100644 index 00000000..eb54b50a Binary files /dev/null and b/docs/pr-media/1640/tps-toggle-on-hot-apply.png differ diff --git a/docs/pr-media/1640/tps-toggle-settings.png b/docs/pr-media/1640/tps-toggle-settings.png new file mode 100644 index 00000000..c8086deb Binary files /dev/null and b/docs/pr-media/1640/tps-toggle-settings.png differ diff --git a/docs/pr-media/1688/chat-no-health-bar.png b/docs/pr-media/1688/chat-no-health-bar.png new file mode 100644 index 00000000..f79ee650 Binary files /dev/null and b/docs/pr-media/1688/chat-no-health-bar.png differ diff --git a/docs/pr-media/1688/insights-system-health.png b/docs/pr-media/1688/insights-system-health.png new file mode 100644 index 00000000..c9788835 Binary files /dev/null and b/docs/pr-media/1688/insights-system-health.png differ diff --git a/docs/pr-media/1690/scroll-preserved-after-completion.png b/docs/pr-media/1690/scroll-preserved-after-completion.png new file mode 100644 index 00000000..805245d0 Binary files /dev/null and b/docs/pr-media/1690/scroll-preserved-after-completion.png differ diff --git a/docs/pr-media/1698/workspace-double-click-rename.png b/docs/pr-media/1698/workspace-double-click-rename.png new file mode 100644 index 00000000..fb1dd9e9 Binary files /dev/null and b/docs/pr-media/1698/workspace-double-click-rename.png differ diff --git a/docs/pr-media/1699/model-cache-auth-store-refresh.png b/docs/pr-media/1699/model-cache-auth-store-refresh.png new file mode 100644 index 00000000..beb552f6 Binary files /dev/null and b/docs/pr-media/1699/model-cache-auth-store-refresh.png differ diff --git a/docs/pr-media/1700/profile-switch-away-from-running-session.png b/docs/pr-media/1700/profile-switch-away-from-running-session.png new file mode 100644 index 00000000..a6c1f178 Binary files /dev/null and b/docs/pr-media/1700/profile-switch-away-from-running-session.png differ diff --git a/docs/pr-media/1715/activity-focus-reload.png b/docs/pr-media/1715/activity-focus-reload.png new file mode 100644 index 00000000..5ca8f736 Binary files /dev/null and b/docs/pr-media/1715/activity-focus-reload.png differ diff --git a/docs/pr-media/1716/active-elapsed-timer.png b/docs/pr-media/1716/active-elapsed-timer.png new file mode 100644 index 00000000..59f468e9 Binary files /dev/null and b/docs/pr-media/1716/active-elapsed-timer.png differ diff --git a/docs/pr-media/1725/activity-summary-after.png b/docs/pr-media/1725/activity-summary-after.png new file mode 100644 index 00000000..1ef4a39a Binary files /dev/null and b/docs/pr-media/1725/activity-summary-after.png differ diff --git a/docs/pr-media/1725/activity-summary-before.png b/docs/pr-media/1725/activity-summary-before.png new file mode 100644 index 00000000..a207c6a7 Binary files /dev/null and b/docs/pr-media/1725/activity-summary-before.png differ diff --git a/docs/pr-media/1765/codex-quota-error-collapsed.png b/docs/pr-media/1765/codex-quota-error-collapsed.png new file mode 100644 index 00000000..7cbba286 Binary files /dev/null and b/docs/pr-media/1765/codex-quota-error-collapsed.png differ diff --git a/docs/pr-media/1765/codex-quota-error-expanded.png b/docs/pr-media/1765/codex-quota-error-expanded.png new file mode 100644 index 00000000..ae4931ce Binary files /dev/null and b/docs/pr-media/1765/codex-quota-error-expanded.png differ diff --git a/docs/pr-media/1771/session-model-fallback.png b/docs/pr-media/1771/session-model-fallback.png new file mode 100644 index 00000000..e16a3538 Binary files /dev/null and b/docs/pr-media/1771/session-model-fallback.png differ diff --git a/docs/pr-media/1772/cli-tool-metadata-api-evidence.json b/docs/pr-media/1772/cli-tool-metadata-api-evidence.json new file mode 100644 index 00000000..80cea476 --- /dev/null +++ b/docs/pr-media/1772/cli-tool-metadata-api-evidence.json @@ -0,0 +1,25 @@ +{ + "issue": 1772, + "check": "api.models.get_cli_session_messages preserves CLI tool metadata for WebUI rendering", + "session_id": "cli_issue_1772_demo", + "message_count": 2, + "assistant_tool_calls": [ + { + "id": "call_1772_demo", + "type": "function", + "function": { + "name": "terminal", + "arguments": "{\"command\": \"printf ok\"}" + } + } + ], + "tool_result": { + "role": "tool", + "tool_call_id": "call_1772_demo", + "tool_name": "terminal", + "name": "terminal", + "content": { + "output": "ok" + } + } +} diff --git a/docs/pr-media/1784/sidebar-scroll-fixture.png b/docs/pr-media/1784/sidebar-scroll-fixture.png new file mode 100644 index 00000000..3f90334d Binary files /dev/null and b/docs/pr-media/1784/sidebar-scroll-fixture.png differ diff --git a/docs/pr-media/1784/sidebar-scroll-qa.json b/docs/pr-media/1784/sidebar-scroll-qa.json new file mode 100644 index 00000000..75d2cc2a --- /dev/null +++ b/docs/pr-media/1784/sidebar-scroll-qa.json @@ -0,0 +1,25 @@ +{ + "issue": 1784, + "commit_under_test": "9875967", + "fixture": "Synthetic 180-row session sidebar with active sid_0 streaming and long chat pane content.", + "pre_fix_observation": { + "steps": [ + "Set _scrollPinned=true with #messages at scrollTop 0 in a long chat fixture.", + "Dispatch a wheel gesture on the active sidebar session row.", + "Call scrollIfPinned() to mimic the next streaming token render." + ], + "result": "#messages jumped from scrollTop 0 to 3073 immediately after the sidebar wheel gesture, showing the chat auto-scroll path fought non-chat scroll intent." + }, + "post_fix_observation": { + "steps": [ + "Repeat the same fixture and sidebar wheel gesture after the fix.", + "Call scrollIfPinned() immediately, then again after the 350ms non-chat intent guard expires." + ], + "result": { + "afterSidebarWheel": 0, + "afterIntentExpires": 2992, + "sessionListCss": "overscroll-behavior-y: contain; touch-action: pan-y" + }, + "meaning": "A sidebar wheel/touch scroll intent now suppresses only the immediate chat-pane auto-scroll write, leaving the sidebar gesture free while streaming continues." + } +} diff --git a/docs/pr-media/1785/workspace-preview-breadcrumb-before.png b/docs/pr-media/1785/workspace-preview-breadcrumb-before.png new file mode 100644 index 00000000..12d18944 Binary files /dev/null and b/docs/pr-media/1785/workspace-preview-breadcrumb-before.png differ diff --git a/docs/pr-media/1785/workspace-root-breadcrumb-fixed.png b/docs/pr-media/1785/workspace-root-breadcrumb-fixed.png new file mode 100644 index 00000000..6167e6ee Binary files /dev/null and b/docs/pr-media/1785/workspace-root-breadcrumb-fixed.png differ diff --git a/docs/pr-media/1787/issue-1787-transcript-order.png b/docs/pr-media/1787/issue-1787-transcript-order.png new file mode 100644 index 00000000..b08a2a3e Binary files /dev/null and b/docs/pr-media/1787/issue-1787-transcript-order.png differ diff --git a/docs/pr-media/1792/sidebar-first-turn-click-away-fixed.png b/docs/pr-media/1792/sidebar-first-turn-click-away-fixed.png new file mode 100644 index 00000000..b51cdaab Binary files /dev/null and b/docs/pr-media/1792/sidebar-first-turn-click-away-fixed.png differ diff --git a/docs/pr-media/1796/error-toast-after.png b/docs/pr-media/1796/error-toast-after.png new file mode 100644 index 00000000..ec1425a7 Binary files /dev/null and b/docs/pr-media/1796/error-toast-after.png differ diff --git a/docs/pr-media/1796/error-toast-before.png b/docs/pr-media/1796/error-toast-before.png new file mode 100644 index 00000000..f9dd0487 Binary files /dev/null and b/docs/pr-media/1796/error-toast-before.png differ diff --git a/docs/pr-media/1796/error-toast-copy.png b/docs/pr-media/1796/error-toast-copy.png new file mode 100644 index 00000000..eeff0f39 Binary files /dev/null and b/docs/pr-media/1796/error-toast-copy.png differ diff --git a/docs/pr-media/1807/providers-api-openai-codex.json b/docs/pr-media/1807/providers-api-openai-codex.json new file mode 100644 index 00000000..08360769 --- /dev/null +++ b/docs/pr-media/1807/providers-api-openai-codex.json @@ -0,0 +1,35 @@ +{ + "id": "openai-codex", + "display_name": "OpenAI Codex", + "has_key": true, + "configurable": false, + "is_oauth": true, + "key_source": "oauth", + "models": [ + { + "id": "gpt-5.5", + "label": "GPT 5.5" + }, + { + "id": "gpt-5.4", + "label": "GPT 5.4" + }, + { + "id": "gpt-5.4-mini", + "label": "GPT 5.4 Mini" + }, + { + "id": "gpt-5.3-codex", + "label": "GPT 5.3 Codex" + }, + { + "id": "gpt-5.2", + "label": "GPT 5.2" + }, + { + "id": "gpt-5.3-codex-spark", + "label": "GPT 5.3 Codex Spark" + } + ], + "models_total": 6 +} diff --git a/docs/pr-media/1807/providers-openai-codex-expanded.png b/docs/pr-media/1807/providers-openai-codex-expanded.png new file mode 100644 index 00000000..458e7538 Binary files /dev/null and b/docs/pr-media/1807/providers-openai-codex-expanded.png differ diff --git a/docs/pr-media/1808/goal-autocomplete.png b/docs/pr-media/1808/goal-autocomplete.png new file mode 100644 index 00000000..e07e32a5 Binary files /dev/null and b/docs/pr-media/1808/goal-autocomplete.png differ diff --git a/docs/pr-media/1808/goal-command-set.png b/docs/pr-media/1808/goal-command-set.png new file mode 100644 index 00000000..f5ee966d Binary files /dev/null and b/docs/pr-media/1808/goal-command-set.png differ diff --git a/docs/pr-media/1808/goal-status-command.png b/docs/pr-media/1808/goal-status-command.png new file mode 100644 index 00000000..26eef14b Binary files /dev/null and b/docs/pr-media/1808/goal-status-command.png differ diff --git a/docs/pr-media/1820/no-agent-cron-edit.png b/docs/pr-media/1820/no-agent-cron-edit.png new file mode 100644 index 00000000..ffb1af9f Binary files /dev/null and b/docs/pr-media/1820/no-agent-cron-edit.png differ diff --git a/docs/pr-media/1823/kanban-hard-refresh-diagnostic.png b/docs/pr-media/1823/kanban-hard-refresh-diagnostic.png new file mode 100644 index 00000000..94546c3e Binary files /dev/null and b/docs/pr-media/1823/kanban-hard-refresh-diagnostic.png differ diff --git a/docs/pr-media/1832/auto-compression-running-card.png b/docs/pr-media/1832/auto-compression-running-card.png new file mode 100644 index 00000000..04c33d53 Binary files /dev/null and b/docs/pr-media/1832/auto-compression-running-card.png differ diff --git a/docs/pr-media/1834/compression-toast-visible.png b/docs/pr-media/1834/compression-toast-visible.png new file mode 100644 index 00000000..045ba46b Binary files /dev/null and b/docs/pr-media/1834/compression-toast-visible.png differ diff --git a/docs/pr-media/1835/home-shell-normal.png b/docs/pr-media/1835/home-shell-normal.png new file mode 100644 index 00000000..4a08f3f2 Binary files /dev/null and b/docs/pr-media/1835/home-shell-normal.png differ diff --git a/docs/pr-media/1842/after-hover-no-workspace.png b/docs/pr-media/1842/after-hover-no-workspace.png new file mode 100644 index 00000000..4b87e558 Binary files /dev/null and b/docs/pr-media/1842/after-hover-no-workspace.png differ diff --git a/docs/pr-media/1842/before-hover-no-workspace.png b/docs/pr-media/1842/before-hover-no-workspace.png new file mode 100644 index 00000000..18e83e27 Binary files /dev/null and b/docs/pr-media/1842/before-hover-no-workspace.png differ diff --git a/docs/pr-media/1866/goal-evaluating-status.png b/docs/pr-media/1866/goal-evaluating-status.png new file mode 100644 index 00000000..a411a43a Binary files /dev/null and b/docs/pr-media/1866/goal-evaluating-status.png differ diff --git a/docs/pr-media/1880/profile-skills-tab.png b/docs/pr-media/1880/profile-skills-tab.png new file mode 100644 index 00000000..df8358a1 Binary files /dev/null and b/docs/pr-media/1880/profile-skills-tab.png differ diff --git a/docs/pr-media/1955/after-workspace-menu.png b/docs/pr-media/1955/after-workspace-menu.png new file mode 100644 index 00000000..a3db2769 Binary files /dev/null and b/docs/pr-media/1955/after-workspace-menu.png differ diff --git a/docs/pr-media/1955/before-workspace-menu.png b/docs/pr-media/1955/before-workspace-menu.png new file mode 100644 index 00000000..3906dce4 Binary files /dev/null and b/docs/pr-media/1955/before-workspace-menu.png differ diff --git a/docs/pr-media/463/status-command-card.png b/docs/pr-media/463/status-command-card.png new file mode 100644 index 00000000..ac63b7d7 Binary files /dev/null and b/docs/pr-media/463/status-command-card.png differ diff --git a/docs/pr-media/500/session-list-virtualization-synthetic.png b/docs/pr-media/500/session-list-virtualization-synthetic.png new file mode 100644 index 00000000..42059029 Binary files /dev/null and b/docs/pr-media/500/session-list-virtualization-synthetic.png differ diff --git a/docs/pr-media/539/plugins-panel.png b/docs/pr-media/539/plugins-panel.png new file mode 100644 index 00000000..b33beb48 Binary files /dev/null and b/docs/pr-media/539/plugins-panel.png differ diff --git a/docs/pr-media/617/task-profile-badges.png b/docs/pr-media/617/task-profile-badges.png new file mode 100644 index 00000000..ae54288c Binary files /dev/null and b/docs/pr-media/617/task-profile-badges.png differ diff --git a/docs/pr-media/617/task-profile-selector.png b/docs/pr-media/617/task-profile-selector.png new file mode 100644 index 00000000..81f067e2 Binary files /dev/null and b/docs/pr-media/617/task-profile-selector.png differ diff --git a/docs/pr-media/674/claude-code-import-readonly.png b/docs/pr-media/674/claude-code-import-readonly.png new file mode 100644 index 00000000..26bf7d7f Binary files /dev/null and b/docs/pr-media/674/claude-code-import-readonly.png differ diff --git a/docs/pr-media/693/system-health-panel.png b/docs/pr-media/693/system-health-panel.png new file mode 100644 index 00000000..a228a346 Binary files /dev/null and b/docs/pr-media/693/system-health-panel.png differ diff --git a/docs/pr-media/696/mcp-servers-system-panel.png b/docs/pr-media/696/mcp-servers-system-panel.png new file mode 100644 index 00000000..32c8789a Binary files /dev/null and b/docs/pr-media/696/mcp-servers-system-panel.png differ diff --git a/docs/pr-media/697/mcp-tools-search-filter.png b/docs/pr-media/697/mcp-tools-search-filter.png new file mode 100644 index 00000000..3d681893 Binary files /dev/null and b/docs/pr-media/697/mcp-tools-search-filter.png differ diff --git a/docs/pr-media/706/openrouter-quota-card.png b/docs/pr-media/706/openrouter-quota-card.png new file mode 100644 index 00000000..ed0b7500 Binary files /dev/null and b/docs/pr-media/706/openrouter-quota-card.png differ diff --git a/docs/pr-media/716/agent-health-alert.png b/docs/pr-media/716/agent-health-alert.png new file mode 100644 index 00000000..86b9be89 Binary files /dev/null and b/docs/pr-media/716/agent-health-alert.png differ diff --git a/docs/pr-media/732/gateway-routing-before.png b/docs/pr-media/732/gateway-routing-before.png new file mode 100644 index 00000000..a431e972 Binary files /dev/null and b/docs/pr-media/732/gateway-routing-before.png differ diff --git a/docs/pr-media/732/gateway-routing-metadata.png b/docs/pr-media/732/gateway-routing-metadata.png new file mode 100644 index 00000000..155cf489 Binary files /dev/null and b/docs/pr-media/732/gateway-routing-metadata.png differ diff --git a/docs/pr-media/734/message-window-top.png b/docs/pr-media/734/message-window-top.png new file mode 100644 index 00000000..4b20d841 Binary files /dev/null and b/docs/pr-media/734/message-window-top.png differ diff --git a/docs/pr-media/activity-disclosure/activity-expanded.png b/docs/pr-media/activity-disclosure/activity-expanded.png new file mode 100644 index 00000000..97f579e2 Binary files /dev/null and b/docs/pr-media/activity-disclosure/activity-expanded.png differ diff --git a/docs/pr-media/activity-disclosure/activity-persisted-closed.png b/docs/pr-media/activity-disclosure/activity-persisted-closed.png new file mode 100644 index 00000000..44c74c64 Binary files /dev/null and b/docs/pr-media/activity-disclosure/activity-persisted-closed.png differ diff --git a/docs/pr-media/issue-1617/after.png b/docs/pr-media/issue-1617/after.png new file mode 100644 index 00000000..0d18929e Binary files /dev/null and b/docs/pr-media/issue-1617/after.png differ diff --git a/docs/pr-media/issue-1617/before.png b/docs/pr-media/issue-1617/before.png new file mode 100644 index 00000000..85686efc Binary files /dev/null and b/docs/pr-media/issue-1617/before.png differ diff --git a/docs/pr-media/issue-1618/after.png b/docs/pr-media/issue-1618/after.png new file mode 100644 index 00000000..f6714777 Binary files /dev/null and b/docs/pr-media/issue-1618/after.png differ diff --git a/docs/pr-media/issue-1618/before.png b/docs/pr-media/issue-1618/before.png new file mode 100644 index 00000000..44ab5bd0 Binary files /dev/null and b/docs/pr-media/issue-1618/before.png differ diff --git a/docs/pr-media/sidebar-hover-drag/after-hover-qa.png b/docs/pr-media/sidebar-hover-drag/after-hover-qa.png new file mode 100644 index 00000000..7142a1ab Binary files /dev/null and b/docs/pr-media/sidebar-hover-drag/after-hover-qa.png differ diff --git a/docs/rfcs/README.md b/docs/rfcs/README.md new file mode 100644 index 00000000..9f40371a --- /dev/null +++ b/docs/rfcs/README.md @@ -0,0 +1,36 @@ +# RFCs + +This directory holds design documents for hermes-webui features that are +worth thinking through in writing before (or alongside) implementation — +typically when the change touches durability, recovery, schema, or cross- +cutting infrastructure. + +## Conventions + +- One file per RFC. Filename is the topic (kebab-case), not a number. +- Top of every RFC carries a small header: + + - **Status:** Proposed | Accepted | Implemented | Withdrawn + - **Author:** @github-handle + - **Created:** YYYY-MM-DD + +- Sections usually include: Problem, Goals, Non-goals, Proposal, Open + questions, Rollout plan. Skip what doesn't apply. +- An RFC is a starting point for review. Comments and revisions land via PR + edits, not separate discussion threads. + +## When to file an RFC + +- The change is large enough that you want consensus before writing code. +- The change touches data-at-rest formats or recovery semantics. +- The change introduces a new architectural primitive (journal, queue, + scheduler, cache layer) that other features will build on. +- A reviewer asks for one during code review. + +When in doubt, just ship the code — small features don't need RFCs. +First-time contributor RFCs should be discussed in an issue before opening a PR. + +## Current RFCs + +- [`turn-journal.md`](turn-journal.md) — Crash-safe WebUI turn journal for + recovering interrupted chat submissions. diff --git a/docs/rfcs/turn-journal.md b/docs/rfcs/turn-journal.md new file mode 100644 index 00000000..6c0924f4 --- /dev/null +++ b/docs/rfcs/turn-journal.md @@ -0,0 +1,158 @@ +# RFC: WebUI Turn Journal for Crash-Safe Chat Submissions + +- **Status:** Proposed +- **Author:** @ai-ag2026 +- **Created:** 2026-05-11 + +## Problem + +A WebUI chat turn crosses several durability boundaries: + +1. browser submits a user message, +2. WebUI creates or updates session runtime metadata, +3. the agent worker starts streaming, +4. assistant output is appended, +5. the JSON sidecar and derived index are saved. + +If the server crashes between submission and the final sidecar save, recovery has to infer what happened from `pending_user_message`, `active_stream_id`, `.json.bak`, `_index.json`, and `state.db`. Those safeguards are useful, but they are still reconstructing intent after the fact. + +The missing primitive is a small write-ahead journal for turns: record the submitted user turn durably before the worker starts, then advance the journal as the turn progresses. + +## Goals + +- Preserve the exact user-submitted turn, including attachments metadata, before any provider or worker work starts. +- Make crash recovery deterministic: a submitted-but-unfinished turn can be reported or reconstructed without guessing. +- Keep the journal append/update format simple enough for startup recovery, CLI audit, and future API repair endpoints. +- Avoid turning recovery into a background daemon. This is storage hygiene, not a long-running service. + +## Non-goals + +- Replacing `state.db.sessions` or WebUI JSON sidecars. +- Journaling every token or every SSE event. +- Replaying tool calls or provider streams. +- Automatically inventing assistant messages after ambiguous crashes. + +## Proposed storage + +Use one JSONL file per session under the existing WebUI state area: + +```text +/_turn_journal/.jsonl +``` + +Each line is an immutable event. Recovery can scan by `turn_id` and choose the latest status. + +### Event shape + +```json +{ + "version": 1, + "event": "submitted", + "turn_id": "20260511T001122Z-abcdef", + "session_id": "abc123", + "stream_id": "stream-xyz", + "created_at": 1778458282.123, + "role": "user", + "content": "...", + "attachments": [], + "workspace": "/workspace", + "model": "openai/gpt-5", + "model_provider": "openai" +} +``` + +Later events for the same `turn_id`: + +```json +{"version":1,"event":"worker_started","turn_id":"...","created_at":1778458283.0} +{"version":1,"event":"assistant_started","turn_id":"...","created_at":1778458284.0} +{"version":1,"event":"completed","turn_id":"...","created_at":1778458299.0,"assistant_message_index":12} +{"version":1,"event":"interrupted","turn_id":"...","created_at":1778458301.0,"reason":"server_startup_recovery"} +``` + +## Turn state machine + +```text +submitted -> worker_started -> assistant_started -> completed +submitted -> interrupted +worker_started -> interrupted +assistant_started -> interrupted +``` + +`completed` is terminal. `interrupted` is terminal unless a later explicit repair creates a new turn. Recovery should not silently resume a provider call. + +## Write rules + +1. On `/api/chat/start` or equivalent turn-submission path: + - generate `turn_id`, + - append `submitted`, + - fsync the journal file, + - only then start the worker. +2. When worker thread enters `_run_agent_streaming`, append `worker_started`. +3. When assistant output is first persisted or clearly begins, append `assistant_started`. +4. After the sidecar save that includes the assistant answer succeeds, append `completed`. +5. On cancellation or known worker exception, append `interrupted` with a reason. + +## Startup recovery semantics + +On startup, for each journal file: + +- Latest event is `completed`: no action. +- Latest event is `submitted` or `worker_started` and no matching user message exists in sidecar: + - append/recover the user message into the session sidecar with a recovery marker. +- Latest event is `submitted`, `worker_started`, or `assistant_started` and no completed assistant turn exists: + - add a visible interruption marker, not a fake assistant answer. +- Existing `.json.bak` and `state.db` recovery still run first so the sidecar is as complete as possible before journal reconciliation. + +## Audit additions + +`audit_session_recovery()` can report: + +- `turn_journal_pending_turn` — repairable if the user message is absent from sidecar. +- `turn_journal_interrupted_turn` — ok/warn depending on whether a visible marker exists. +- `turn_journal_malformed_event` — manual review. + +Safe repair should only materialize submitted user messages and interruption markers when the journal event content is valid JSON and the target message is absent. + +## API surface + +Initial read-only endpoint can be folded into the existing recovery audit: + +```text +GET /api/session/recovery/audit +``` + +Later, if needed: + +```text +GET /api/session/turn-journal?session_id= +``` + +The latter should be diagnostic-only and redact or omit large attachment payloads. + +## Rollout plan + +1. Land backup/sidecar recovery and audit primitives. +2. Add this journal writer in the turn-submission path behind no config flag; it is local-only and append-only. +3. Add read-only audit reporting for pending journal turns. +4. Add safe repair for missing user messages and interruption markers. +5. Once stable, consider pruning completed journal entries older than a retention window, but only after sidecar/index recovery has no findings. + +## Open questions + +- Exact place to define `turn_id` so browser retry and server retry do not duplicate the same user message. +- Whether attachment files need their own durable manifest entry or whether metadata-only is enough for v1. +- How much of the assistant partial output, if any, should be recoverable after `assistant_started` but before `completed`. +- Whether completed journal entries should be compacted into a per-session checkpoint file. + +## Minimal implementation slice + +The first implementation PR should be deliberately small: + +- helper: `append_turn_journal_event(session_id, event)` +- helper: `read_turn_journal(session_id)` +- unit tests for atomic append, malformed-line tolerance, and state derivation +- one call site: append `submitted` before worker start +- audit-only report of pending journal turns + +Do **not** combine the first implementation with replay/repair. Replay is where most of the bugs in WAL systems live; ship the writer and audit first, prove the format, then add repair. diff --git a/docs/supervisor.md b/docs/supervisor.md index 4ec433b0..85821a69 100644 --- a/docs/supervisor.md +++ b/docs/supervisor.md @@ -235,3 +235,44 @@ PID PPID CMD If PPID is ``1`` (init) when it should be the supervisor, the orphan-server loop is happening — re-check that ``--foreground`` (or one of the env vars) is reaching the process. + +## HTTP watchdog / deep health + +``KeepAlive`` / ``Restart=always`` only recover a process that exits. If the +process is still listening on the port but request handling is wedged, pair your +supervisor with an HTTP probe and force a restart when the probe fails. + +Hermes Web UI exposes two health levels: + +- ``/health`` — cheap liveness probe with ``active_streams``, uptime, and an + ``accept_loop`` heartbeat counter. +- ``/health?deep=1`` — readiness probe that briefly acquires the stream lock, + reads the sidebar/session path, reads projects state, and touches Hermes + ``state.db`` if it exists. Use this for watchdogs. + +At startup the server also tries to raise its file-descriptor soft limit to +4096 on platforms that support ``RLIMIT_NOFILE``. That is defense in depth for +persistent hosts: leaks should still be fixed, but a higher soft limit gives +you more diagnostic headroom before request handling falls over. + +Minimal macOS launchd watchdog script: + +```bash +#!/usr/bin/env bash +set -euo pipefail +LABEL="com.example.hermes-webui" +BASE="http://127.0.0.1:8787" + +if ! curl -fsS --max-time 10 "$BASE/health?deep=1" >/dev/null; then + launchctl kickstart -k "gui/$(id -u)/$LABEL" +fi +``` + +Run it every few minutes from a separate ``StartInterval`` LaunchAgent. For +systemd, prefer a timer/service pair that runs the same curl probe and +``systemctl --user restart hermes-webui.service`` on failure. + +The ``accept_loop.requests_total`` value should increase when probes arrive. If +it stays flat while the process is still alive, the server accept loop is not +making progress; capture logs/thread samples before restarting if you are +collecting diagnostics for a bug report. diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md new file mode 100644 index 00000000..0a9a4c45 --- /dev/null +++ b/docs/troubleshooting.md @@ -0,0 +1,97 @@ +# Troubleshooting + +Concrete diagnostic flows for the most common failure modes when running Hermes WebUI. Each entry has the symptom, the diagnostic commands you should run *before* opening an issue, and the fix that has worked for past reporters. + +If your symptom isn't listed and the diagnostics don't narrow it down, file a bug at https://github.com/nesquena/hermes-webui/issues — include the **full output** of every command in the relevant section. + +--- + +## "AIAgent not available -- check that hermes-agent is on sys.path" + +**Symptom.** WebUI starts, shows the chat interface, but every chat request fails immediately with this error in the response or the server log. As of v0.51.6 the error includes a diagnostic block with the running Python interpreter, the relevant `sys.path` entries, and the most-common fix; on older versions the message is bare. + +**Why it happens.** The WebUI imports the agent class at chat time via `from run_agent import AIAgent`. That import only succeeds if the running Python's `sys.path` contains either the hermes-agent checkout or a pip-installed copy of the agent. Three common failure modes: + +1. **Agent installed but not on `sys.path`.** Most common. The agent is checked out somewhere (e.g. `~/Programmes/hermes-agent`), the WebUI was launched with a Python that doesn't know about it, and there's no `pip install -e .` linking the two. +2. **Symlink with a typo or wrong target.** A symlink to the agent looks correct on `ls`, but `readlink` resolves to a path that doesn't exist or doesn't contain `agent/__init__.py`. +3. **`HERMES_WEBUI_AGENT_DIR` set to the wrong directory.** Override env var beats auto-discovery and points at a directory that has no agent code. + +### Step 1 — confirm the agent location + +```bash +# If you have ~/hermes-agent (the default location): +ls -la ~/hermes-agent +readlink ~/hermes-agent # if it's a symlink, where does it resolve? +ls ~/hermes-agent/agent/__init__.py 2>&1 +``` + +The third command must succeed (the file must exist). If it fails, your symlink is broken or pointing at a directory that's missing the agent module — fix that first. + +### Step 2 — confirm the WebUI is using the right Python + +```bash +cd ~/hermes-webui && ./start.sh 2>&1 | grep -iE 'agent|python|hermes_webui_python' | head -20 +``` + +The startup banner prints which Python and agent dir it resolved. If the agent dir is empty or the Python is the wrong one, set the override: + +```bash +export HERMES_WEBUI_AGENT_DIR=/absolute/path/to/hermes-agent +export HERMES_WEBUI_PYTHON=/absolute/path/to/agent/venv/bin/python +./start.sh +``` + +### Step 3 — install the agent in editable mode + +This is the most common fix and resolves the original issue #1695: + +```bash +cd /path/to/hermes-agent # the directory holding pyproject.toml + the agent/ module +pip install -e . # use the same python that runs the WebUI +``` + +Then restart the WebUI: + +```bash +cd ~/hermes-webui +./start.sh +``` + +### Step 4 — verify by importing manually + +If steps 1-3 still don't work, check whether the WebUI's Python can import the agent at all: + +```bash +$HERMES_WEBUI_PYTHON -c "from run_agent import AIAgent; print('ok')" 2>&1 +``` + +(Replace `$HERMES_WEBUI_PYTHON` with the actual Python path from step 2 if the env var isn't set.) If this prints `ok`, the agent IS on `sys.path` for that Python — and the WebUI should work. + +If this fails, `import run_agent` itself is broken — check that the agent's pyproject.toml lists `run_agent` as a top-level module or that the agent dir is on PYTHONPATH: + +```bash +PYTHONPATH=/path/to/hermes-agent $HERMES_WEBUI_PYTHON -c "from run_agent import AIAgent; print('ok')" +``` + +If adding PYTHONPATH fixes it, persist the path either via `pip install -e .` (preferred) or by setting `HERMES_WEBUI_AGENT_DIR` to that directory. + +### When to file a bug + +If after running steps 1-4 the import still fails *and* `pip install -e .` succeeded *and* `PYTHONPATH=... python -c "from run_agent import AIAgent"` succeeds — that's a real WebUI bug. File at https://github.com/nesquena/hermes-webui/issues with: + +- The output of every command in steps 1-4 +- The full diagnostic block printed by the WebUI's `ImportError` (v0.51.6+) +- Your OS, Python version, and how the agent was installed + +--- + +## Other troubleshooting + +This document grows over time. If a recurring failure mode isn't covered here yet, add it via PR. The format for each entry: **Symptom → Why → Diagnostic commands → Fix → When to file a bug**. + +Related references: + +- [`docs/supervisor.md`](supervisor.md) — process-supervisor setup (launchd, systemd, supervisord, runit/s6) including the bootstrap supervisor-foreground flag. +- [`docs/docker.md`](docker.md) — Docker compose setup, common failure modes, bind-mount migration. +- [`docs/wsl-autostart.md`](wsl-autostart.md) — WSL2 auto-start at login on Windows. +- [`docs/EXTENSIONS.md`](EXTENSIONS.md) — WebUI extension injection, security model, examples. diff --git a/docs/wsl-autostart.md b/docs/wsl-autostart.md new file mode 100644 index 00000000..0ae9f89e --- /dev/null +++ b/docs/wsl-autostart.md @@ -0,0 +1,126 @@ +# Windows / WSL auto-start + +Hermes WebUI runs well under WSL2, but native Windows login does not automatically start Linux user processes. This guide covers two supported options: + +1. **WSL session startup** — simple and low-risk. WebUI starts the next time you open a WSL shell. +2. **Windows Task Scheduler** — true Windows logon startup. Windows invokes `wsl.exe`, which runs the WSL launch script. + +Both paths use the same WSL launch script: + +```text +scripts/wsl/hermes_webui_autostart.sh +``` + +The script is safe to call repeatedly. It uses a lock file, checks the `/health` endpoint, checks a pid file, and writes logs before starting `start.sh --foreground` in the background. It does not hardcode a user path; by default it derives the repository root from its own location. + +## Script settings + +The WSL launcher supports these environment variables: + +| Variable | Default | Purpose | +|---|---|---| +| `HERMES_WEBUI_REPO` | repo containing the script | WebUI checkout to start | +| `HERMES_WEBUI_LOG_DIR` | `$HOME/.hermes/webui/logs` | Autostart and WebUI logs | +| `HERMES_WEBUI_HOST` | `127.0.0.1` | Host passed through to `start.sh` / `bootstrap.py` | +| `HERMES_WEBUI_PORT` | `8787` | WebUI port and health-check port | +| `HERMES_WEBUI_HEALTH_URL` | `http://127.0.0.1:$HERMES_WEBUI_PORT/health` | URL used to decide whether WebUI is already running | +| `HERMES_WEBUI_PID_FILE` | `$HERMES_WEBUI_LOG_DIR/hermes-webui.pid` | pid file used for duplicate prevention | +| `HERMES_WEBUI_REQUIRE_AGENT_PROCESS` | `0` | Optional: set to `1` only if your local setup requires a separate Hermes process before WebUI starts | + +Make the script executable once inside WSL: + +```bash +cd /path/to/hermes-webui +chmod +x scripts/wsl/hermes_webui_autostart.sh +``` + +Run it manually to verify your paths and logs: + +```bash +scripts/wsl/hermes_webui_autostart.sh +curl -fsS http://127.0.0.1:8787/health +``` + +Logs are written to: + +```text +$HOME/.hermes/webui/logs/webui_autostart.log +$HOME/.hermes/webui/logs/hermes_webui.log +``` + +## Option 1: WSL session startup + +This starts WebUI when your WSL login shell starts. It is the easiest option if you already open WSL during your day. + +Add this to `~/.profile` or `~/.bashrc` inside WSL, adjusting the repo path: + +```bash +if [ -x "$HOME/hermes-webui/scripts/wsl/hermes_webui_autostart.sh" ]; then + HERMES_WEBUI_REPO="$HOME/hermes-webui" \ + "$HOME/hermes-webui/scripts/wsl/hermes_webui_autostart.sh" >/dev/null 2>&1 & +fi +``` + +Open a new WSL terminal and check: + +```bash +curl -fsS http://127.0.0.1:8787/health +``` + +If you open several WSL terminals, the launcher should still start only one WebUI process because the lock, health check, and pid file all converge on "already running". + +## Option 2: Windows Task Scheduler startup + +Use this if you want WebUI to start automatically at Windows logon even before you open a WSL terminal. + +The helper PowerShell script is: + +```text +scripts/windows/setup_webui_autostart.ps1 +``` + +From Windows PowerShell, run it with the WSL path to the launch script: + +```powershell +Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass +.\scripts\windows\setup_webui_autostart.ps1 ` + -WslScriptPath "/home/your-user/hermes-webui/scripts/wsl/hermes_webui_autostart.sh" ` + -Distro "Ubuntu" +``` + +Notes: + +- `-Distro` is optional. Omit it to use your default WSL distro. +- The default task name is `HermesWebUIAutoStart`; pass `-TaskName` if you need a different name. +- The script is idempotent: rerunning it updates the existing scheduled task instead of creating duplicates. +- The task runs as the current Windows user at logon with least privilege. +- Add `-WhatIf` to preview the scheduled task registration. +- Add `-RunNow` to start the task immediately after registration. +- Add `-SkipValidation` only if you need to register the task before the WSL path exists. + +To inspect or remove the task later: + +```powershell +Get-ScheduledTask -TaskName HermesWebUIAutoStart +Unregister-ScheduledTask -TaskName HermesWebUIAutoStart -Confirm:$false +``` + +## Troubleshooting + +Check the WSL logs first: + +```bash +tail -n 80 "$HOME/.hermes/webui/logs/webui_autostart.log" +tail -n 80 "$HOME/.hermes/webui/logs/hermes_webui.log" +``` + +Common causes: + +| Symptom | Likely cause | Fix | +|---|---|---| +| Task exists but WebUI is not reachable | WSL script path is wrong for the selected distro | Re-run the PowerShell setup with the correct `-WslScriptPath` and `-Distro` | +| WebUI starts only after opening WSL | You used the WSL session startup option, not Task Scheduler | Install the Windows scheduled task | +| Multiple login events happen quickly | Normal Windows startup behavior | The WSL script should log `already running` and avoid duplicate processes | +| Health check fails but pid exists | WebUI is still booting or the port differs | Check `HERMES_WEBUI_PORT` and `hermes_webui.log` | + +If you want WSL2 systemd integration instead, see `docs/supervisor.md` for foreground process-supervisor guidance and adapt the Linux `systemd --user` pattern to your distro. diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 00000000..53ff2ef4 --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,567 @@ +#!/usr/bin/env python3 +""" +Hermes WebUI MCP Server — exposes project and session management +as MCP tools for any MCP-compatible agent. + +Option A rewrite (2026-05-08): imports api.models and api.profiles +directly from the webui codebase, using canonical helpers for +locking, profile scoping, index consistency, and validation. + + pip install mcp # one-time setup + python3 mcp_server.py # start via stdio + +MCP config for Hermes Agent (add to config.yaml): + mcp_servers: + hermes-webui: + command: /path/to/venv/bin/python3 + args: [/path/to/hermes-webui/mcp_server.py] + env: + HERMES_WEBUI_PASSWORD: your_password + +Profile override (optional): + args: [/path/to/hermes-webui/mcp_server.py, --profile, myprofile] + +AI-authoring disclosure: this file was rewritten by MILO (Hermes Agent) +under human direction, per maintainer guidelines for #1616. +""" + +import argparse +import json +import os +import re +import sys +import time +import uuid +from pathlib import Path + +from mcp.server import Server +from mcp.server.stdio import stdio_server +from mcp.types import Tool, TextContent + +# ── Ensure the repo root is on sys.path so api.* imports work ───────────── +_REPO_ROOT = Path(__file__).parent.resolve() +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) + +# ── CLI: optional --profile override ────────────────────────────────────── +_profile_arg: str | None = None +_parser = argparse.ArgumentParser(add_help=False) +_parser.add_argument("--profile", type=str, default=None) +_args, _unknown = _parser.parse_known_args() +_profile_arg = _args.profile + +# ── Import webui canonical modules (after path setup) ───────────────────── +import api.config as _cfg +from api.config import ( + STATE_DIR, SESSION_DIR, SESSION_INDEX_FILE, PROJECTS_FILE, HOME, +) +from api.models import load_projects, save_projects +from api.profiles import get_active_profile_name, _is_root_profile, _profiles_match + +# ── Apply --profile override before any module uses get_active_profile_name +if _profile_arg is not None: + import api.profiles as _profiles + _profiles._active_profile = _profile_arg + +# ── API auth state ───────────────────────────────────────────────────────── +# Mirror the env-var contract used by api/config.py:32-33 so a non-default +# WebUI port/host (e.g. when 8787 is held by another service on the host) +# Just Works without configuration drift between the WebUI process and MCP. +WEBUI_HOST = os.environ.get("HERMES_WEBUI_HOST", "127.0.0.1") +WEBUI_PORT = os.environ.get("HERMES_WEBUI_PORT", "8787") +WEBUI_URL = f"http://{WEBUI_HOST}:{WEBUI_PORT}" +_auth_cookie: str | None = None +_auth_expires: float = 0 # unix timestamp after which we re-auth + +server = Server("hermes-webui") + + +# ═══════════════════════════════════════════════════════════════════════════ +# Helpers — filesystem (project CRUD via canonical api.models) +# ═══════════════════════════════════════════════════════════════════════════ + +def _active_profile() -> str: + """Shorthand for the current profile name (--profile or auto-detected).""" + return get_active_profile_name() or 'default' + + +def _validate_color(color: str | None) -> str | None: + """Return an error string if color is invalid, else None.""" + if color is not None and not re.match(r"^#[0-9a-fA-F]{3,8}$", color): + return "Invalid color format (use #RGB, #RRGGBB, or #RRGGBBAA)" + return None + + +def _load_index() -> list: + """Read the session index. Falls back to empty list on failure.""" + if not SESSION_INDEX_FILE.exists(): + return [] + try: + return json.loads(SESSION_INDEX_FILE.read_text(encoding="utf-8")) + except Exception: + return [] + + +def _session_compact(row: dict) -> dict: + """Lightweight compact representation of a session index entry.""" + return { + "session_id": row.get("session_id"), + "title": row.get("title"), + "project_id": row.get("project_id"), + "workspace": row.get("workspace"), + "model": row.get("model"), + "message_count": row.get("message_count", 0), + "source_tag": row.get("source_tag"), + "is_cli_session": row.get("is_cli_session", False), + "profile": row.get("profile"), + } + + +# ═══════════════════════════════════════════════════════════════════════════ +# Helpers — HTTP API (for mutations that need cache sync) +# ═══════════════════════════════════════════════════════════════════════════ + +def _api_password() -> str | None: + """Return the plaintext webui password from HERMES_WEBUI_PASSWORD, or None. + + settings.json stores only the bcrypt hash, which the login endpoint cannot + accept — it calls verify_password(plaintext) against the stored hash. So + there's no usable fallback when the env var is unset; the MCP simply runs + in unauthenticated mode and any auth-protected mutation will fail clearly + with the server's 401 instead of silently sending an unusable hash. + """ + pw = os.environ.get("HERMES_WEBUI_PASSWORD", "").strip() + return pw or None + + +def _api_auth() -> str | None: + """Authenticate and return cookie value, or None if auth disabled/fails.""" + global _auth_cookie, _auth_expires + + pw = _api_password() + if not pw: + return None # auth not enabled — API calls will fail anyway + + # Reuse cookie if still valid (25 days — server issues 30-day cookies) + if _auth_cookie and time.time() < _auth_expires: + return _auth_cookie + + import urllib.request + + try: + req = urllib.request.Request( + f"{WEBUI_URL}/api/auth/login", + data=json.dumps({"password": pw}).encode(), + headers={"Content-Type": "application/json"}, + method="POST", + ) + resp = urllib.request.urlopen(req, timeout=5) + cookie = resp.headers.get("Set-Cookie", "") + if cookie: + _auth_cookie = cookie.split(";")[0] # "hermes_session=VALUE; ..." + _auth_expires = time.time() + 25 * 86400 # 25 days + return _auth_cookie + except Exception: + _auth_cookie = None + return None + + +def _api_post(endpoint: str, body: dict) -> dict: + """POST to webui API with auth cookie. Returns parsed JSON response.""" + import urllib.request + import urllib.error + + cookie = _api_auth() + headers = {"Content-Type": "application/json"} + if cookie: + headers["Cookie"] = cookie + + try: + req = urllib.request.Request( + f"{WEBUI_URL}{endpoint}", + data=json.dumps(body).encode(), + headers=headers, + method="POST", + ) + resp = urllib.request.urlopen(req, timeout=5) + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + err_body = json.loads(e.read()) + return {"error": f"API {e.code}: {err_body.get('error', 'unknown')}"} + except Exception as e: + return {"error": f"API unreachable: {e}"} + + +# ═══════════════════════════════════════════════════════════════════════════ +# Tool handlers — read-only (filesystem, profile-aware) +# ═══════════════════════════════════════════════════════════════════════════ + +async def handle_list_projects(_arguments: dict) -> list[TextContent]: + """List all projects with session counts, scoped to active profile.""" + projects = load_projects() + active = _active_profile() + index = _load_index() + + # Session counts per project (from index) + counts: dict[str, int] = {} + for s in index: + pid = s.get("project_id") + if pid: + counts[pid] = counts.get(pid, 0) + 1 + + result = [] + for p in projects: + # Profile filter: legacy untagged rows are treated as 'default' by + # _profiles_match, so non-root profiles correctly hide them. + if not _profiles_match(p.get("profile"), active): + continue + entry = dict(p) + entry["session_count"] = counts.get(p["project_id"], 0) + result.append(entry) + + return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False, indent=2))] + + +async def handle_list_sessions(arguments: dict) -> list[TextContent]: + """List sessions, optionally filtered by project or unassigned status.""" + project_id = arguments.get("project_id") + unassigned = arguments.get("unassigned", False) + limit = max(1, min(500, arguments.get("limit", 50))) + active = _active_profile() + + index = _load_index() + sessions = [_session_compact(s) for s in index if s.get("session_id")] + + # Filter by profile: legacy untagged rows are treated as 'default' by + # _profiles_match (canonical convention), so non-root profiles hide them. + sessions = [s for s in sessions if _profiles_match(s.get("profile"), active)] + + if unassigned: + sessions = [s for s in sessions if not s["project_id"]] + elif project_id: + sessions = [s for s in sessions if s["project_id"] == project_id] + + sessions = sessions[:limit] + return [TextContent(type="text", text=json.dumps(sessions, ensure_ascii=False, indent=2))] + + +# ═══════════════════════════════════════════════════════════════════════════ +# Tool handlers — project CRUD (canonical helpers, profile-scoped) +# ═══════════════════════════════════════════════════════════════════════════ + +async def handle_create_project(arguments: dict) -> list[TextContent]: + """Create a new project (profile-scoped, exact-match title collision).""" + name = arguments.get("name", "").strip()[:128] + if not name: + return [TextContent(type="text", text=json.dumps( + {"error": "name is required"}, ensure_ascii=False))] + + color = arguments.get("color") + color_err = _validate_color(color) + if color_err: + return [TextContent(type="text", text=json.dumps( + {"error": color_err}, ensure_ascii=False))] + + active = _active_profile() + projects = load_projects() + + # Title collision: exact match (consistent with ensure_cron_project) + if any(p.get("name") == name and _profiles_match(p.get("profile"), active) + for p in projects): + return [TextContent(type="text", text=json.dumps( + {"error": f"Project '{name}' already exists"}, ensure_ascii=False))] + + proj = { + "project_id": uuid.uuid4().hex[:12], + "name": name, + "color": color, + "profile": active, + "created_at": time.time(), + } + projects.append(proj) + save_projects(projects) + + proj["session_count"] = 0 + return [TextContent(type="text", text=json.dumps(proj, ensure_ascii=False, indent=2))] + + +async def handle_rename_project(arguments: dict) -> list[TextContent]: + """Rename a project and optionally change its color (profile-checked).""" + project_id = arguments.get("project_id") + name = arguments.get("name", "").strip()[:128] + if not project_id or not name: + return [TextContent(type="text", text=json.dumps( + {"error": "project_id and name are required"}, ensure_ascii=False))] + + color = arguments.get("color") + color_err = _validate_color(color) + if color_err: + return [TextContent(type="text", text=json.dumps( + {"error": color_err}, ensure_ascii=False))] + + active = _active_profile() + projects = load_projects() + proj = next((p for p in projects if p["project_id"] == project_id), None) + if not proj: + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + + # #1614: profile ownership check + if not _profiles_match(proj.get("profile"), active): + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + + proj["name"] = name + if color is not None: + proj["color"] = color + save_projects(projects) + return [TextContent(type="text", text=json.dumps(proj, ensure_ascii=False, indent=2))] + + +async def handle_delete_project(arguments: dict) -> list[TextContent]: + """Delete a project and unassign all its sessions (profile-checked).""" + project_id = arguments.get("project_id") + if not project_id: + return [TextContent(type="text", text=json.dumps( + {"error": "project_id is required"}, ensure_ascii=False))] + + active = _active_profile() + projects = load_projects() + proj = next((p for p in projects if p["project_id"] == project_id), None) + if not proj: + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + + # #1614: profile ownership check + if not _profiles_match(proj.get("profile"), active): + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + + projects = [p for p in projects if p["project_id"] != project_id] + save_projects(projects) + + # Unassign sessions only when we can do it cache-safely via the HTTP API. + # The previous filesystem fallback wrote session_data directly with + # os.replace(), which bypassed _write_session_index() in api/models.py + # and left _index.json holding the stale project_id — a running WebUI + # would still group those sessions under the deleted project until a + # subsequent re-compact. Even calling Session.save() in-process would + # not help because the WebUI's SESSIONS dict cache (a separate process) + # still has the old project_id and overwrites our update on its next + # save. The HTTP API is the only cache-safe path; without auth we + # refuse and surface the limitation so the operator can act. + has_auth = bool(_api_password()) + if not has_auth: + return [TextContent(type="text", text=json.dumps({ + "ok": True, + "deleted": proj["name"], + "unassigned_sessions": 0, + "warning": "Set HERMES_WEBUI_PASSWORD to unassign sessions; " + "without auth the session index cannot be safely " + "updated and direct filesystem writes would cause " + "index drift in a running WebUI.", + }, ensure_ascii=False))] + + unassigned = 0 + if SESSION_DIR.exists(): + for p in SESSION_DIR.glob("*.json"): + if p.name.startswith("_"): + continue + try: + session_data = json.loads(p.read_text(encoding="utf-8")) + if session_data.get("project_id") == project_id: + sid = p.stem + result = _api_post("/api/session/move", + {"session_id": sid, "project_id": None}) + if "ok" in result or "session" in result: + unassigned += 1 + except Exception: + pass + + return [TextContent(type="text", text=json.dumps({ + "ok": True, + "deleted": proj["name"], + "unassigned_sessions": unassigned, + }, ensure_ascii=False))] + + +# ═══════════════════════════════════════════════════════════════════════════ +# Tool handlers — mutations (HTTP API with auth, cache-safe) +# ═══════════════════════════════════════════════════════════════════════════ + +async def handle_rename_session(arguments: dict) -> list[TextContent]: + """Rename a session via the authenticated webui API (cache-safe).""" + session_id = arguments.get("session_id") + title = arguments.get("title", "").strip()[:80] + if not session_id or not title: + return [TextContent(type="text", text=json.dumps( + {"error": "session_id and title are required"}, ensure_ascii=False))] + + result = _api_post("/api/session/rename", + {"session_id": session_id, "title": title}) + if "error" in result: + return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))] + + session = result.get("session", {}) + return [TextContent(type="text", text=json.dumps({ + "ok": True, + "session_id": session_id, + "title": session.get("title", title), + "method": "api", + }, ensure_ascii=False, indent=2))] + + +async def handle_move_session(arguments: dict) -> list[TextContent]: + """Assign a session to a project via the authenticated webui API (cache-safe).""" + session_id = arguments.get("session_id") + project_id = arguments.get("project_id") # None/null = unassign + if not session_id: + return [TextContent(type="text", text=json.dumps( + {"error": "session_id is required"}, ensure_ascii=False))] + + # If project_id is provided, verify it exists and is profile-accessible + if project_id is not None: + projects = load_projects() + active = _active_profile() + target = next((p for p in projects if p["project_id"] == project_id), None) + if not target: + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + # #1614: refuse moves into projects owned by another profile + if not _profiles_match(target.get("profile"), active): + return [TextContent(type="text", text=json.dumps( + {"error": "Project not found"}, ensure_ascii=False))] + + result = _api_post("/api/session/move", + {"session_id": session_id, "project_id": project_id}) + if "error" in result: + return [TextContent(type="text", text=json.dumps(result, ensure_ascii=False))] + + session = result.get("session", {}) + return [TextContent(type="text", text=json.dumps({ + "ok": True, + "session_id": session_id, + "project_id": project_id, + "title": session.get("title"), + "method": "api", + }, ensure_ascii=False, indent=2))] + + +# ═══════════════════════════════════════════════════════════════════════════ +# MCP Server wiring +# ═══════════════════════════════════════════════════════════════════════════ + +TOOLS = [ + Tool( + name="list_projects", + description="List all session projects with their IDs, names, colors, and session counts (scoped to active profile).", + inputSchema={"type": "object", "properties": {}, "required": []}, + ), + Tool( + name="create_project", + description="Create a new project for organizing sessions (profile-scoped).", + inputSchema={ + "type": "object", + "properties": { + "name": {"type": "string", "description": "Project name (max 128 chars)"}, + "color": {"type": "string", "description": "Optional hex color (#RGB, #RRGGBB, or #RRGGBBAA)"}, + }, + "required": ["name"], + }, + ), + Tool( + name="rename_project", + description="Rename a project and optionally change its color (profile-checked).", + inputSchema={ + "type": "object", + "properties": { + "project_id": {"type": "string", "description": "12-char project ID"}, + "name": {"type": "string", "description": "New name (max 128 chars)"}, + "color": {"type": "string", "description": "Optional new hex color"}, + }, + "required": ["project_id", "name"], + }, + ), + Tool( + name="delete_project", + description="Delete a project and unassign all its sessions (profile-checked).", + inputSchema={ + "type": "object", + "properties": { + "project_id": {"type": "string", "description": "12-char project ID to delete"}, + }, + "required": ["project_id"], + }, + ), + Tool( + name="rename_session", + description="Rename a session (updates sidebar via authenticated API, cache-safe).", + inputSchema={ + "type": "object", + "properties": { + "session_id": {"type": "string", "description": "Session ID"}, + "title": {"type": "string", "description": "New title (max 80 chars)"}, + }, + "required": ["session_id", "title"], + }, + ), + Tool( + name="move_session", + description="Assign a session to a project. Pass project_id=null to unassign. Uses authenticated API for cache safety (profile-checked).", + inputSchema={ + "type": "object", + "properties": { + "session_id": {"type": "string", "description": "Session ID"}, + "project_id": {"type": ["string", "null"], "description": "Project ID (or null to unassign)"}, + }, + "required": ["session_id", "project_id"], + }, + ), + Tool( + name="list_sessions", + description="List sessions, optionally filtered by project or unassigned status (profile-scoped).", + inputSchema={ + "type": "object", + "properties": { + "project_id": {"type": "string", "description": "Filter sessions by project ID"}, + "unassigned": {"type": "boolean", "description": "Show only sessions with no project"}, + "limit": {"type": "integer", "description": "Max results (default: 50, max: 500)"}, + }, + "required": [], + }, + ), +] + +HANDLERS = { + "list_projects": handle_list_projects, + "create_project": handle_create_project, + "rename_project": handle_rename_project, + "delete_project": handle_delete_project, + "rename_session": handle_rename_session, + "move_session": handle_move_session, + "list_sessions": handle_list_sessions, +} + + +@server.list_tools() +async def list_tools() -> list[Tool]: + return TOOLS + + +@server.call_tool() +async def call_tool(name: str, arguments: dict) -> list[TextContent]: + handler = HANDLERS.get(name) + if not handler: + return [TextContent(type="text", text=json.dumps( + {"error": f"Unknown tool: {name}"}, ensure_ascii=False))] + return await handler(arguments) + + +async def main(): + async with stdio_server() as (read, write): + await server.run(read, write, server.create_initialization_options()) + + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) diff --git a/scripts/repair_workspace_user_turns.py b/scripts/repair_workspace_user_turns.py new file mode 100644 index 00000000..a4080720 --- /dev/null +++ b/scripts/repair_workspace_user_turns.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +"""Repair workspace-prefixed and duplicated user turns in WebUI transcripts. + +WebUI may store model-facing user messages prefixed with +``[Workspace: /path]``. That prefix is useful for the model, but it should not +remain in display transcripts. Older data can also contain adjacent duplicate +user bubbles when a display turn and a workspace-prefixed model turn were merged +as separate messages. + +This script cleans those historical artifacts in WebUI sidecar JSON files and, +when requested, the SQLite session database. +""" +from __future__ import annotations + +import argparse +import datetime as _dt +import json +import re +import shutil +import sqlite3 +from pathlib import Path +from typing import Any + +_WORKSPACE_PREFIX_RE = re.compile(r"^\s*\[Workspace:[^\]]+\]\s*") + + +def strip_workspace_prefix(text: str | None) -> str: + """Return user text without WebUI's model-facing workspace prefix.""" + return _WORKSPACE_PREFIX_RE.sub("", str(text or "")).strip() + + +def normalized_text(text: str | None) -> str: + return " ".join(strip_workspace_prefix(text).split()) + + +def clean_message_list(messages: list[dict[str, Any]]) -> tuple[list[dict[str, Any]], dict[str, int]]: + """Strip workspace prefixes and remove adjacent duplicate user turns.""" + cleaned: list[dict[str, Any]] = [] + stats = {"stripped_workspace_prefixes": 0, "removed_adjacent_user_duplicates": 0} + + for message in messages: + if not isinstance(message, dict): + cleaned.append(message) + continue + + next_message = dict(message) + if next_message.get("role") == "user": + original = str(next_message.get("content") or "") + stripped = strip_workspace_prefix(original) + if stripped and stripped != original: + next_message["content"] = stripped + stats["stripped_workspace_prefixes"] += 1 + + if cleaned and isinstance(cleaned[-1], dict) and cleaned[-1].get("role") == "user": + previous_text = normalized_text(str(cleaned[-1].get("content") or "")) + current_text = normalized_text(str(next_message.get("content") or "")) + if previous_text and previous_text == current_text: + stats["removed_adjacent_user_duplicates"] += 1 + continue + + cleaned.append(next_message) + + return cleaned, stats + + +def _backup_file(path: Path, backup_dir: Path) -> None: + backup_dir.mkdir(parents=True, exist_ok=True) + shutil.copy2(path, backup_dir / path.name) + + +def repair_sidecars(sessions_dir: Path, backup_dir: Path | None = None, dry_run: bool = False) -> dict[str, Any]: + changed: list[dict[str, Any]] = [] + for path in sorted(sessions_dir.glob("*.json")): + if path.name == "_index.json": + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + continue + messages = data.get("messages") + if not isinstance(messages, list): + continue + cleaned, stats = clean_message_list(messages) + if stats["stripped_workspace_prefixes"] or stats["removed_adjacent_user_duplicates"]: + changed.append({"file": path.name, **stats, "messages_after": len(cleaned)}) + if not dry_run: + if backup_dir is not None: + _backup_file(path, backup_dir) + data["messages"] = cleaned + data["message_count"] = len(cleaned) + path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + return {"changed_sidecars": changed} + + +def repair_state_db(state_db: Path, backup_dir: Path | None = None, dry_run: bool = False) -> dict[str, Any]: + if not state_db.exists(): + return {"updated_workspace_prefix_user_messages": 0, "removed_adjacent_user_duplicates": 0} + if not dry_run and backup_dir is not None: + _backup_file(state_db, backup_dir) + for suffix in ("-wal", "-shm"): + extra = Path(str(state_db) + suffix) + if extra.exists(): + _backup_file(extra, backup_dir) + + con = sqlite3.connect(state_db) + con.row_factory = sqlite3.Row + updated = 0 + deleted = 0 + affected_sessions: set[str] = set() + try: + rows = con.execute( + "select id, session_id, content from messages " + "where role = 'user' and content like '[Workspace:%' order by session_id, id" + ).fetchall() + duplicate_ids: list[int] = [] + for row in rows: + stripped = strip_workspace_prefix(row["content"]) + if stripped and stripped != row["content"]: + updated += 1 + affected_sessions.add(row["session_id"]) + if not dry_run: + con.execute("update messages set content = ? where id = ?", (stripped, row["id"])) + + for sid_row in con.execute("select distinct session_id from messages order by session_id").fetchall(): + sid = sid_row["session_id"] + previous = None + for row in con.execute("select id, role, content from messages where session_id = ? order by id", (sid,)).fetchall(): + if previous and previous["role"] == "user" and row["role"] == "user": + if normalized_text(previous["content"]) and normalized_text(previous["content"]) == normalized_text(row["content"]): + duplicate_ids.append(row["id"]) + affected_sessions.add(sid) + continue + previous = row + + deleted = len(duplicate_ids) + if not dry_run: + for message_id in duplicate_ids: + con.execute("delete from messages where id = ?", (message_id,)) + for sid in sorted(affected_sessions): + message_count = con.execute("select count(*) from messages where session_id = ?", (sid,)).fetchone()[0] + tool_count = con.execute( + "select count(*) from messages where session_id = ? and role = 'tool'", (sid,) + ).fetchone()[0] + con.execute( + "update sessions set message_count = ?, tool_call_count = ? where id = ?", + (message_count, tool_count, sid), + ) + con.commit() + finally: + con.close() + + return { + "updated_workspace_prefix_user_messages": updated, + "removed_adjacent_user_duplicates": deleted, + "affected_sessions": sorted(affected_sessions), + } + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--sessions-dir", type=Path, help="WebUI sidecar session directory") + parser.add_argument("--state-db", type=Path, help="Hermes SQLite state.db path") + parser.add_argument("--backup-dir", type=Path, help="Directory for backups before mutation") + parser.add_argument("--dry-run", action="store_true", help="Report changes without writing") + args = parser.parse_args() + + if not args.sessions_dir and not args.state_db: + parser.error("provide --sessions-dir, --state-db, or both") + + backup_dir = args.backup_dir + if backup_dir is None and not args.dry_run: + backup_dir = Path("backups") / f"workspace-user-turn-repair-{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}" + + report: dict[str, Any] = {"dry_run": args.dry_run} + if backup_dir is not None: + report["backup_dir"] = str(backup_dir) + if args.sessions_dir: + report.update(repair_sidecars(args.sessions_dir, backup_dir, args.dry_run)) + if args.state_db: + report["state_db"] = repair_state_db(args.state_db, backup_dir, args.dry_run) + + print(json.dumps(report, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/windows/setup_webui_autostart.ps1 b/scripts/windows/setup_webui_autostart.ps1 new file mode 100644 index 00000000..08b88949 --- /dev/null +++ b/scripts/windows/setup_webui_autostart.ps1 @@ -0,0 +1,95 @@ +[CmdletBinding(SupportsShouldProcess = $true)] +param( + [Parameter(Mandatory = $true)] + [ValidateNotNullOrEmpty()] + [string]$WslScriptPath, + + [string]$Distro, + + [ValidateNotNullOrEmpty()] + [string]$TaskName = "HermesWebUIAutoStart", + + [switch]$RunNow, + + [switch]$SkipValidation +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = "Stop" + +function ConvertTo-WindowsArgument { + param( + [Parameter(Mandatory = $true)] + [string]$Value + ) + + if ($Value -notmatch '[\s\"]') { + return $Value + } + + $escaped = $Value.Replace('"', '\"') + return '"' + $escaped + '"' +} + +function Get-WslExePath { + $systemWsl = Join-Path $env:SystemRoot "System32\wsl.exe" + if (Test-Path $systemWsl) { + return $systemWsl + } + return "wsl.exe" +} + +$wslExe = Get-WslExePath + +$wslArgs = @() +if ($Distro) { + $wslArgs += @("-d", $Distro) +} +$wslArgs += @("--exec", "bash", $WslScriptPath) +$actionArguments = ($wslArgs | ForEach-Object { ConvertTo-WindowsArgument -Value $_ }) -join " " + +if (-not $SkipValidation) { + $validationArgs = @() + if ($Distro) { + $validationArgs += @("-d", $Distro) + } + $validationArgs += @("--exec", "test", "-f", $WslScriptPath) + + & $wslExe @validationArgs + if ($LASTEXITCODE -ne 0) { + throw "WSL script path was not found inside the selected distro: $WslScriptPath" + } +} + +$description = "Auto-start Hermes WebUI inside WSL at Windows logon. Runs $WslScriptPath." +$action = New-ScheduledTaskAction -Execute $wslExe -Argument $actionArguments +$trigger = New-ScheduledTaskTrigger -AtLogOn +$currentUser = [System.Security.Principal.WindowsIdentity]::GetCurrent().Name +$principal = New-ScheduledTaskPrincipal -UserId $currentUser -LogonType Interactive -RunLevel LeastPrivilege +$settings = New-ScheduledTaskSettingsSet -StartWhenAvailable -MultipleInstances IgnoreNew +$existingTask = Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue + +if ($existingTask) { + Write-Host "Updating existing scheduled task '$TaskName'." +} else { + Write-Host "Creating scheduled task '$TaskName'." +} + +if ($PSCmdlet.ShouldProcess($TaskName, "Register Windows Scheduled Task for Hermes WebUI WSL autostart")) { + Register-ScheduledTask ` + -TaskName $TaskName ` + -Action $action ` + -Trigger $trigger ` + -Principal $principal ` + -Settings $settings ` + -Description $description ` + -Force | Out-Null + + Write-Host "Task '$TaskName' is installed." + Write-Host "Action: $wslExe $actionArguments" + + if ($RunNow) { + Start-ScheduledTask -TaskName $TaskName + Write-Host "Task '$TaskName' started." + } +} diff --git a/scripts/wsl/hermes_webui_autostart.sh b/scripts/wsl/hermes_webui_autostart.sh new file mode 100755 index 00000000..90726c01 --- /dev/null +++ b/scripts/wsl/hermes_webui_autostart.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +set -euo pipefail + +# WSL-friendly autostart launcher for Hermes WebUI. +# +# Safe defaults: +# - derives the repo from this script location, override with HERMES_WEBUI_REPO +# - uses a lock + pid file to avoid duplicate starts +# - treats a healthy /health endpoint as "already running" +# - writes logs under ~/.hermes/webui/logs unless HERMES_WEBUI_LOG_DIR is set + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DEFAULT_REPO="$(cd "${SCRIPT_DIR}/../.." && pwd)" +HERMES_WEBUI_REPO="${HERMES_WEBUI_REPO:-${DEFAULT_REPO}}" +HERMES_WEBUI_LOG_DIR="${HERMES_WEBUI_LOG_DIR:-${HOME}/.hermes/webui/logs}" +HERMES_WEBUI_HOST="${HERMES_WEBUI_HOST:-127.0.0.1}" +HERMES_WEBUI_PORT="${HERMES_WEBUI_PORT:-8787}" +HERMES_WEBUI_HEALTH_HOST="${HERMES_WEBUI_HEALTH_HOST:-127.0.0.1}" +HERMES_WEBUI_HEALTH_URL="${HERMES_WEBUI_HEALTH_URL:-http://${HERMES_WEBUI_HEALTH_HOST}:${HERMES_WEBUI_PORT}/health}" +HERMES_WEBUI_PID_FILE="${HERMES_WEBUI_PID_FILE:-${HERMES_WEBUI_LOG_DIR}/hermes-webui.pid}" +HERMES_WEBUI_LOCK_FILE="${HERMES_WEBUI_LOCK_FILE:-/tmp/hermes-webui-autostart.lock}" +AUTOSTART_LOG="${HERMES_WEBUI_LOG_DIR}/webui_autostart.log" +WEBUI_LOG="${HERMES_WEBUI_LOG_DIR}/hermes_webui.log" + +# Make the WSL launcher knobs visible to start.sh/bootstrap.py. +export HERMES_WEBUI_HOST HERMES_WEBUI_PORT + +mkdir -p "${HERMES_WEBUI_LOG_DIR}" +chmod 700 "${HERMES_WEBUI_LOG_DIR}" 2>/dev/null || true + +log() { + printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S %z')" "$*" | tee -a "${AUTOSTART_LOG}" +} + +webui_healthy() { + command -v curl >/dev/null 2>&1 \ + && curl -fsS --max-time 3 "${HERMES_WEBUI_HEALTH_URL}" >/dev/null 2>&1 +} + +pid_is_alive() { + [[ -s "${HERMES_WEBUI_PID_FILE}" ]] || return 1 + local pid + pid="$(cat "${HERMES_WEBUI_PID_FILE}" 2>/dev/null || true)" + [[ "${pid}" =~ ^[0-9]+$ ]] || return 1 + kill -0 "${pid}" >/dev/null 2>&1 +} + +validate_repo() { + if [[ ! -d "${HERMES_WEBUI_REPO}" ]]; then + log "Hermes WebUI repo not found: ${HERMES_WEBUI_REPO}" + exit 1 + fi + if [[ ! -f "${HERMES_WEBUI_REPO}/start.sh" ]]; then + log "start.sh not found under HERMES_WEBUI_REPO=${HERMES_WEBUI_REPO}" + exit 1 + fi +} + +maybe_require_agent_process() { + # Hermes WebUI usually launches the agent in-process, so this check is opt-in. + # Set HERMES_WEBUI_REQUIRE_AGENT_PROCESS=1 only if your setup depends on a + # separately running Hermes gateway/agent before WebUI starts. + if [[ "${HERMES_WEBUI_REQUIRE_AGENT_PROCESS:-0}" != "1" ]]; then + return 0 + fi + if ! pgrep -f "hermes" >/dev/null 2>&1; then + log "HERMES_WEBUI_REQUIRE_AGENT_PROCESS=1 but no Hermes process is running; skipping start" + exit 1 + fi +} + +acquire_lock() { + exec 9>"${HERMES_WEBUI_LOCK_FILE}" + if command -v flock >/dev/null 2>&1; then + if ! flock -n 9; then + log "Autostart already running; lock held at ${HERMES_WEBUI_LOCK_FILE}" + exit 0 + fi + else + log "flock not found; continuing without lock-based duplicate protection" + fi +} + +start_webui() { + validate_repo + maybe_require_agent_process + + if webui_healthy; then + log "Hermes WebUI already running at ${HERMES_WEBUI_HEALTH_URL}" + exit 0 + fi + + if pid_is_alive; then + log "Hermes WebUI already running with pid $(cat "${HERMES_WEBUI_PID_FILE}")" + exit 0 + fi + + rm -f "${HERMES_WEBUI_PID_FILE}" + log "Starting Hermes WebUI from ${HERMES_WEBUI_REPO} on ${HERMES_WEBUI_HOST}:${HERMES_WEBUI_PORT}" + + ( + cd "${HERMES_WEBUI_REPO}" + nohup bash "${HERMES_WEBUI_REPO}/start.sh" --foreground >>"${WEBUI_LOG}" 2>&1 & + printf '%s\n' "$!" >"${HERMES_WEBUI_PID_FILE}" + ) + + sleep "${HERMES_WEBUI_STARTUP_GRACE_SECONDS:-2}" + if webui_healthy; then + log "Hermes WebUI started and passed health check" + exit 0 + fi + + if pid_is_alive; then + log "Hermes WebUI process started with pid $(cat "${HERMES_WEBUI_PID_FILE}"); health check not ready yet" + exit 0 + fi + + log "Hermes WebUI failed to stay running; see ${WEBUI_LOG}" + exit 1 +} + +acquire_lock +start_webui diff --git a/start.sh b/start.sh index 59e0d65a..a1406663 100755 --- a/start.sh +++ b/start.sh @@ -1,12 +1,48 @@ #!/usr/bin/env bash set -euo pipefail +# If invoked as root (e.g. via `sudo ./start.sh` or accidental root shell +# inside the container), re-exec as the unprivileged hermeswebui user so the +# WebUI process never owns root-only file modes on bind-mounted state. +# Outside containers the EUID==0 case is rare; inside the production image +# the entrypoint drops to hermeswebui itself, so this is a defensive guard. +# Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening), +# extracted to a focused follow-up after the parent PR was deferred over a +# separate sibling-repo build-context concern unrelated to this fix. +# +# Four preconditions to fire (all must hold): +# - EUID == 0 +# - hermeswebui user actually exists (id lookup) +# - sudo is on PATH (production image does not ship sudo, so this is the +# load-bearing no-op guard for the canonical container path) +# - sudo -u hermeswebui passes without prompting (NOPASSWD precheck) +# The NOPASSWD precheck via `sudo -n -u hermeswebui true` makes this a silent +# fall-through on host machines where the developer's hermeswebui user +# requires a password — better than exiting non-zero with `sudo: a password +# is required` and surprising the user who didn't ask for sudo behavior. +if [[ ${EUID:-$(id -u)} -eq 0 ]] && id hermeswebui >/dev/null 2>&1 \ + && command -v sudo >/dev/null 2>&1 \ + && sudo -n -u hermeswebui true 2>/dev/null; then + exec sudo -n -u hermeswebui "$0" "$@" +fi + REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [[ -f "${REPO_ROOT}/.env" ]]; then + # Filter out shell-readonly vars (UID, GID, EUID, EGID, PPID) before + # `source`ing. docker-compose.yml's macOS instructions document + # `echo "UID=$(id -u)" >> .env` to set host UID/GID, which then crashes + # `start.sh` with "UID: readonly variable" when bash tries to assign to + # those names. Filtering them out lets the .env file carry those entries + # for docker-compose's variable substitution while keeping local invocation + # of start.sh working. The regression guard at + # tests/test_bootstrap_dotenv.py:181 still passes — the line below contains + # both `source` and `.env`. + # Sourced from PR #1686 (@binhpt310) — Cluster 1 (operational hardening), + # extracted to a focused follow-up after the parent PR was deferred. set -a # shellcheck source=/dev/null - source "${REPO_ROOT}/.env" + source <(grep -vE '^[[:space:]]*(export[[:space:]]+)?(UID|GID|EUID|EGID|PPID)=' "${REPO_ROOT}/.env") set +a fi diff --git a/static/boot.js b/static/boot.js index 4c9f1ce7..e08ad6e9 100644 --- a/static/boot.js +++ b/static/boot.js @@ -42,6 +42,17 @@ async function cancelSessionStream(session){ if(typeof renderSessionList==='function') renderSessionList(); } +async function _savedSessionShouldStaySidebarOnly(sid){ + if(!sid) return false; + try{ + const data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0&resolve_model=0`); + const session = data&&data.session; + return !!(session&&(session.active_stream_id||session.pending_user_message)); + }catch(e){ + return false; + } +} + // ── Mobile navigation ────────────────────────────────────────────────────── let _workspacePanelMode='closed'; // 'closed' | 'browse' | 'preview' @@ -144,6 +155,26 @@ function handleWorkspaceClose(){ closeWorkspacePanel(); } +/** + * Set a tooltip on a button, preferring the custom CSS tooltip (`data-tooltip`) + * when the element opts in via the `has-tooltip` class. Falls back to the + * native `title` attribute for elements that haven't opted in. + * + * Critical: when the element DOES have data-tooltip, this MUST also clear any + * existing native `title` attribute, otherwise the slow ~1.5s native browser + * tooltip co-fires alongside the fast custom CSS tooltip — exactly the bug + * #1775 reports. Always pair `data-tooltip` with `removeAttribute('title')`. + */ +function _setButtonTooltip(btn, text){ + if(!btn) return; + if(btn.hasAttribute('data-tooltip')){ + btn.setAttribute('data-tooltip', text); + if(btn.hasAttribute('title')) btn.removeAttribute('title'); + } else { + btn.title = text; + } +} + function syncWorkspacePanelUI(){ const {layout,panel,toggleBtn,collapseBtn}= _workspacePanelEls(); if(!layout||!panel)return; @@ -156,11 +187,11 @@ function syncWorkspacePanelUI(){ if(toggleBtn){ toggleBtn.classList.toggle('active',isOpen); toggleBtn.setAttribute('aria-pressed',isOpen?'true':'false'); - toggleBtn.title=isOpen?'Hide workspace panel':'Show workspace panel'; + _setButtonTooltip(toggleBtn, isOpen?'Hide workspace panel':'Show workspace panel'); toggleBtn.disabled=!canBrowse; } if(collapseBtn){ - collapseBtn.title=isCompact?'Close workspace panel':'Hide workspace panel'; + _setButtonTooltip(collapseBtn, isCompact?'Close workspace panel':'Hide workspace panel'); } const hasSession=!!S.session; ['btnUpDir','btnNewFile','btnNewFolder','btnRefreshPanel'].forEach(id=>{ @@ -170,7 +201,7 @@ function syncWorkspacePanelUI(){ const clearBtn=$('btnClearPreview'); if(clearBtn){ clearBtn.disabled=!isOpen; - clearBtn.title=hasPreview?'Close preview':'Hide workspace panel'; + _setButtonTooltip(clearBtn, hasPreview?'Close preview':'Hide workspace panel'); // On desktop, only show the X button when a file preview is open. // In browse mode the chevron (btnCollapseWorkspacePanel) already serves // as the close control, so showing both produces a duplicate X. @@ -192,6 +223,62 @@ function closeMobileSidebar(){ if(sidebar)sidebar.classList.remove('mobile-open'); if(overlay)overlay.classList.remove('visible'); } + +// ── Desktop sidebar collapse toggle ──────────────────────────────────────── +// Two discoverability paths into the same state: +// (1) Click the already-active rail icon → collapse / expand the sidebar. +// (2) Cmd/Ctrl+B keyboard shortcut (VS Code convention). +// Mobile is unaffected: the sidebar is an overlay there, and every collapse +// code path is gated on `_isDesktopWidth()` (min-width:641px). +// State is persisted via localStorage and survives reloads + bfcache. +const _SIDEBAR_COLLAPSED_KEY='hermes-webui-sidebar-collapsed'; + +function _isDesktopWidth(){ + try{return window.matchMedia('(min-width:641px)').matches;}catch(_){return true;} +} + +function _isSidebarCollapsed(){ + return document.querySelector('.layout')?.classList.contains('sidebar-collapsed')||false; +} + +function _syncSidebarAria(){ + // Mirror the open/collapsed state on the active rail button via aria-expanded + // so screen readers announce the toggle. Open=true, collapsed=false. + const active=document.querySelector('.rail .rail-btn.nav-tab.active[data-panel]'); + if(active)active.setAttribute('aria-expanded',!_isSidebarCollapsed()); +} + +function toggleSidebar(forceState){ + if(!_isDesktopWidth())return; // mobile uses an overlay; never collapse there + const layout=document.querySelector('.layout'); + if(!layout)return; + const next=typeof forceState==='boolean'?forceState:!_isSidebarCollapsed(); + layout.classList.toggle('sidebar-collapsed',next); + // Clear the flash-prevention root-level marker once JS owns the state. + try{document.documentElement.removeAttribute('data-sidebar-collapsed');}catch(_){} + try{localStorage.setItem(_SIDEBAR_COLLAPSED_KEY,next?'1':'0');}catch(_){} + _syncSidebarAria(); +} + +function expandSidebar(){ + if(_isSidebarCollapsed())toggleSidebar(false); +} + +// Boot-time restore. The inline flash-prevention script in index.html already +// set data-sidebar-collapsed='1' on before the stylesheet so the page +// renders collapsed without paint flash. This IIFE promotes that pre-paint +// state into the .layout class system where both JS and CSS can read it. +(function _restoreSidebarState(){ + try{document.documentElement.removeAttribute('data-sidebar-collapsed');}catch(_){} + if(!_isDesktopWidth())return; + try{ + if(localStorage.getItem(_SIDEBAR_COLLAPSED_KEY)==='1'){ + const layout=document.querySelector('.layout'); + if(layout)layout.classList.add('sidebar-collapsed'); + } + }catch(_){} + _syncSidebarAria(); +})(); function toggleMobileFiles(){ toggleWorkspacePanel(); } @@ -236,7 +323,7 @@ $('btnSend').onclick=()=>{ } send(); }; -$('btnAttach').onclick=()=>$('fileInput').click(); +$('btnAttach').onclick=e=>{if(e&&e.preventDefault)e.preventDefault();$('fileInput').value='';$('fileInput').click();}; // ── Voice input (Web Speech API + MediaRecorder fallback) ─────────────────── (function(){ @@ -267,7 +354,7 @@ $('btnAttach').onclick=()=>$('fileInput').click(); btn.classList.toggle('recording',on); // Active-state title flips so the tooltip is honest about what // pressing the button will do (#1488). - btn.title = on ? t('voice_dictate_active') : t('voice_dictate'); + _setButtonTooltip(btn, on ? t('voice_dictate_active') : t('voice_dictate')); status.style.display=on?'':'none'; if(statusText) statusText.textContent=on?'Listening':'Listening'; if(!on){ _finalText=''; _prefix=''; } @@ -470,14 +557,17 @@ window._micPendingSend=window._micPendingSend||false; try{ return localStorage.getItem('hermes-voice-mode-button')==='true'; } catch(_){ return false; } } + let _voiceModeActive=false; + function _applyVoiceModePref(){ - modeBtn.style.display = _voiceModePrefEnabled() ? '' : 'none'; + const enabled = _voiceModePrefEnabled(); + modeBtn.style.display = enabled ? '' : 'none'; + if(!enabled && _voiceModeActive) _deactivate(); } _applyVoiceModePref(); // Expose so the settings pane can re-apply immediately on toggle. window._applyVoiceModePref = _applyVoiceModePref; - let _voiceModeActive=false; let _voiceModeState='idle'; // idle | listening | thinking | speaking let _recognition=null; let _silenceTimer=null; @@ -688,7 +778,7 @@ window._micPendingSend=window._micPendingSend||false; function _activate(){ _voiceModeActive=true; modeBtn.classList.add('active'); - modeBtn.title=t('voice_mode_toggle_active'); + _setButtonTooltip(modeBtn, t('voice_mode_toggle_active')); showToast(t('voice_mode_active'),1500); // If the agent is busy, wait — state will be 'thinking' and we'll detect completion if(typeof S!=='undefined'&&S.busy){ @@ -705,7 +795,7 @@ window._micPendingSend=window._micPendingSend||false; _voiceModeState='idle'; _voiceModeThinkingSid=null; modeBtn.classList.remove('active'); - modeBtn.title=t('voice_mode_toggle'); + _setButtonTooltip(modeBtn, t('voice_mode_toggle')); bar.style.display='none'; clearTimeout(_silenceTimer); try{ if(_recognition) _recognition.abort(); }catch(_){} @@ -786,10 +876,11 @@ $('importFileInput').onchange=async(e)=>{ } }; // btnRefreshFiles is now panel-icon-btn in header (see HTML) -function clearPreview(){ +function clearPreview(opts={}){ + const keepPanelOpen=!!(opts&&opts.keepPanelOpen); // Restore directory breadcrumb after closing file preview if(typeof renderBreadcrumb==='function') renderBreadcrumb(); - const closePanelAfter=_workspacePanelMode==='preview'; + const closePanelAfter=_workspacePanelMode==='preview'&&!keepPanelOpen; const pa=$('previewArea');if(pa)pa.classList.remove('visible'); const pi=$('previewImg');if(pi){pi.onerror=null;pi.src='';} const pdf=$('previewPdfFrame');if(pdf)pdf.src=''; @@ -800,6 +891,7 @@ function clearPreview(){ const ft=$('fileTree');if(ft)ft.style.display=''; _previewCurrentPath='';_previewCurrentMode='';_previewDirty=false; if(closePanelAfter)closeWorkspacePanel(); + else if(keepPanelOpen&&_workspacePanelMode==='preview')openWorkspacePanel('browse'); else syncWorkspacePanelUI(); } $('btnClearPreview').onclick=handleWorkspaceClose; @@ -812,7 +904,7 @@ $('modelSelect').onchange=async()=>{ : {model:selectedModel,model_provider:null}; if(typeof closeModelDropdown==='function') closeModelDropdown(); if(typeof _writePersistedModelState==='function') _writePersistedModelState(modelState.model,modelState.model_provider); - else localStorage.setItem('hermes-webui-model', modelState.model); + else try{localStorage.setItem('hermes-webui-model',modelState.model)}catch{} await api('/api/session/update',{method:'POST',body:JSON.stringify({ session_id:S.session.session_id, workspace:S.session.workspace, @@ -836,6 +928,11 @@ $('modelSelect').onchange=async()=>{ $('msg').addEventListener('input',()=>{ autoResize(); updateSendBtn(); + // Persist composer draft to server (debounced in _saveComposerDraft). + const sid = S && S.session && S.session.session_id; + if (sid && typeof _saveComposerDraft === 'function') { + _saveComposerDraft(sid, $('msg').value, S.pendingFiles ? [...S.pendingFiles] : []); + } const text=$('msg').value; if(text.startsWith('/')&&text.indexOf('\n')===-1){ if(typeof getSlashAutocompleteMatches==='function'){ @@ -907,6 +1004,18 @@ $('msg').addEventListener('keydown',e=>{ }); // B14: Cmd/Ctrl+K creates a new chat from anywhere document.addEventListener('keydown',async e=>{ + // Cmd/Ctrl+B toggles desktop sidebar collapse (VS Code convention). + // Skip when typing in an input/textarea/contenteditable so text-edit + // shortcuts (e.g. bold in some embedded editors) are never stolen. + if((e.metaKey||e.ctrlKey)&&!e.shiftKey&&!e.altKey&&(e.key==='b'||e.key==='B')){ + const t=e.target; + const isText=t&&(t.tagName==='INPUT'||t.tagName==='TEXTAREA'||t.isContentEditable); + if(!isText&&typeof toggleSidebar==='function'&&_isDesktopWidth()){ + e.preventDefault(); + toggleSidebar(); + return; + } + } // Enter on approval card = Allow once (when a button inside the card is focused or // card is visible and focus is not on an input/textarea/select) if(e.key==='Enter'&&!e.metaKey&&!e.ctrlKey&&!e.shiftKey){ @@ -962,13 +1071,22 @@ document.addEventListener('keydown',async e=>{ }); $('msg').addEventListener('paste',e=>{ const items=Array.from(e.clipboardData?.items||[]); - const imageItems=items.filter(i=>i.type.startsWith('image/')); - if(!imageItems.length)return; + // When the clipboard carries BOTH text and an image (common from Notes, + // Word, browsers, Slack — the OS attaches a rendered preview alongside + // the plain text), prefer the text and let the browser paste normally. + // Only intercept when the clipboard is image-only (true screenshot paste). + // Tighten the image filter to kind==='file' so string items advertising an + // image MIME (e.g. text/html with an embedded data URI) are not misclassified. + const hasText=items.some(i=>i.kind==='string'&&(i.type==='text/plain'||i.type==='text/html')); + const imageItems=items.filter(i=>i.kind==='file'&&i.type.startsWith('image/')); + if(!imageItems.length||hasText)return; e.preventDefault(); - const files=imageItems.map(i=>{ + const pasteTs=Date.now(); + const files=imageItems.map((i,idx)=>{ const blob=i.getAsFile(); const ext=i.type.split('/')[1]||'png'; - return new File([blob],`screenshot-${Date.now()}.${ext}`,{type:i.type}); + const suffix=imageItems.length>1?`-${idx+1}`:''; + return new File([blob],`screenshot-${pasteTs}${suffix}.${ext}`,{type:i.type}); }); addFiles(files); setStatus(t('image_pasted')+files.map(f=>f.name).join(', ')); @@ -1072,16 +1190,44 @@ function _normalizeAppearance(theme,skin){ return {theme:nextTheme,skin:nextSkin}; } +// Sync with the active theme's computed --bg. +// This surfaces the WebUI's exact theme background to: +// 1. Mobile Safari status bar (the prefers-color-scheme media variants in index.html +// cover the pre-load case; this updater handles user-toggled changes mid-session). +// 2. iOS PWA / Add to Home Screen status bar. +// 3. Native WKWebView wrappers (e.g. hermes-swift-mac) that read this attribute as +// the source of truth for AppKit chrome (tab bar, title bar, traffic-light area) +// instead of pixel-sampling — overlay-resistant and IPC-free. +// Reading getComputedStyle(html).getPropertyValue('--bg') picks up the active skin +// (Default, Sienna, Sisyphus, Charizard, etc.) so each skin's distinct paint reaches +// the meta tag. +function _syncThemeColorMeta(){ + try{ + const bg=getComputedStyle(document.documentElement).getPropertyValue('--bg').trim(); + if(!bg) return; + const known=document.getElementById('hermes-theme-color'); + if(known){ + known.setAttribute('content',bg); + known.removeAttribute('media'); + } + document.querySelectorAll('meta[name="theme-color"]').forEach(meta=>{ + meta.setAttribute('content',bg); + meta.removeAttribute('media'); + }); + }catch(e){} +} + function _setResolvedTheme(isDark){ document.documentElement.classList.toggle('dark',!!isDark); const link=document.getElementById('prism-theme'); - if(!link) return; + if(!link){ _syncThemeColorMeta(); return; } const want=isDark ?'https://cdn.jsdelivr.net/npm/prismjs@1.29.0/themes/prism-tomorrow.min.css' :'https://cdn.jsdelivr.net/npm/prismjs@1.29.0/themes/prism.min.css'; // No SRI integrity on theme CSS — jsdelivr edge nodes serve different // digests for the same pinned version, causing intermittent blocking (#1100). if(link.href!==want){ link.integrity=''; link.href=want; } + _syncThemeColorMeta(); } function _applyTheme(name){ @@ -1106,6 +1252,7 @@ function _applySkin(name){ const key=(name||'default').toLowerCase(); if(key==='default') delete document.documentElement.dataset.skin; else document.documentElement.dataset.skin=key; + _syncThemeColorMeta(); } function _pickTheme(name){ @@ -1228,6 +1375,7 @@ function applyBotName(){ _bootSettings=s; window._sendKey=s.send_key||'enter'; window._showTokenUsage=!!s.show_token_usage; + window._showTps=!!s.show_tps; window._showCliSessions=!!s.show_cli_sessions; window._soundEnabled=!!s.sound_enabled; window._notificationsEnabled=!!s.notifications_enabled; @@ -1235,11 +1383,13 @@ function applyBotName(){ window._simplifiedToolCalling=s.simplified_tool_calling!==false; window._sidebarDensity=(s.sidebar_density==='detailed'?'detailed':'compact'); window._busyInputMode=(s.busy_input_mode||'queue'); + window._sessionEndlessScrollEnabled=!!s.session_endless_scroll; window._botName=s.bot_name||'Hermes'; if(s.default_model) window._defaultModel=s.default_model; // Persist default workspace so the blank new-chat page can show it // and workspace actions (New file/folder) work before the first session (#804). if(s.default_workspace) S._profileDefaultWorkspace=s.default_workspace; + window._sessionJumpButtonsEnabled=!!s.session_jump_buttons; const appearance=_normalizeAppearance(s.theme,s.skin); localStorage.setItem('hermes-theme',appearance.theme); _applyTheme(appearance.theme); @@ -1261,13 +1411,16 @@ function applyBotName(){ }catch(e){ window._sendKey='enter'; window._showTokenUsage=false; + window._showTps=false; window._showCliSessions=false; window._soundEnabled=false; window._notificationsEnabled=false; window._showThinking=true; window._simplifiedToolCalling=true; + window._sessionJumpButtonsEnabled=false; window._sidebarDensity='compact'; window._busyInputMode='queue'; + window._sessionEndlessScrollEnabled=false; window._botName='Hermes'; _bootSettings={check_for_updates:false}; if(typeof setLocale==='function'){ @@ -1284,7 +1437,7 @@ function applyBotName(){ // ?test_updates=1 in URL forces banner display for testing (bypasses sessionStorage guards) const _testUpdates=new URLSearchParams(location.search).get('test_updates')==='1'; if(_testUpdates||(_bootSettings.check_for_updates!==false&&!sessionStorage.getItem('hermes-update-checked')&&!sessionStorage.getItem('hermes-update-dismissed'))){ - const _checkUrl='/api/updates/check'+(_testUpdates?'?simulate=1':''); + const _checkUrl='api/updates/check'+(_testUpdates?'?simulate=1':''); api(_checkUrl).then(d=>{if(!_testUpdates)sessionStorage.setItem('hermes-update-checked','1');if((d.webui&&d.webui.behind>0)||(d.agent&&d.agent.behind>0))_showUpdateBanner(d);}).catch(()=>{}); } // Fetch active profile @@ -1332,9 +1485,18 @@ function applyBotName(){ // Initialize reasoning chip on boot (fixes #1103 — chip hidden until session load) if(typeof fetchReasoningChip==='function') fetchReasoningChip(); const urlSession=(typeof _sessionIdFromLocation==='function')?_sessionIdFromLocation():null; - const saved=urlSession||localStorage.getItem('hermes-webui-session'); + const savedLocal=localStorage.getItem('hermes-webui-session'); + const saved=urlSession||savedLocal; if(saved){ try{ + if(!urlSession&&savedLocal&&await _savedSessionShouldStaySidebarOnly(savedLocal)){ + S.session=null; S.messages=[]; S.activeStreamId=null; S.busy=false; + S._bootReady=true; + syncTopbar();syncWorkspacePanelState(); + $('emptyState').style.display=''; + await renderSessionList();if(typeof startGatewaySSE==='function')startGatewaySSE(); + return; + } await loadSession(saved); // If the restored session has no messages it is an ephemeral scratch pad — // treat the page as a fresh start rather than resuming a blank conversation. @@ -1432,4 +1594,14 @@ window.addEventListener('pageshow', async (event) => { } // Restart the gateway SSE watcher — the persisted connection is dead after bfcache if (typeof startGatewaySSE === 'function') try { startGatewaySSE(); } catch (_) {} + // Re-sync sidebar collapse state from localStorage. bfcache restored the + // frozen DOM but another tab may have toggled the sidebar in the meantime. + if (typeof _isSidebarCollapsed === 'function' && typeof toggleSidebar === 'function') { + try { + const _want = localStorage.getItem('hermes-webui-sidebar-collapsed') === '1'; + const _have = _isSidebarCollapsed(); + if (_want !== _have) toggleSidebar(_want); + if (typeof _syncSidebarAria === 'function') _syncSidebarAria(); + } catch (_) {} + } }); diff --git a/static/commands.js b/static/commands.js index dc806f19..6875135e 100644 --- a/static/commands.js +++ b/static/commands.js @@ -18,6 +18,7 @@ const COMMANDS=[ {name:'personality', desc:t('cmd_personality'), fn:cmdPersonality, arg:'name', subArgs:'personalities'}, {name:'skills', desc:t('cmd_skills'), fn:cmdSkills, arg:'query'}, {name:'stop', desc:t('cmd_stop'), fn:cmdStop, noEcho:true}, + {name:'goal', desc:t('cmd_goal'), fn:cmdGoal, arg:'[status|pause|resume|clear|text]', subArgs:['status','pause','resume','clear']}, {name:'queue', desc:t('cmd_queue'), fn:cmdQueue, arg:'message', noEcho:true}, {name:'interrupt', desc:t('cmd_interrupt'), fn:cmdInterrupt, arg:'message', noEcho:true}, {name:'steer', desc:t('cmd_steer'), fn:cmdSteer, arg:'message', noEcho:true}, @@ -88,6 +89,22 @@ let _slashPersonalityCachePromise=null; let _agentCommandCache=null; let _agentCommandCachePromise=null; +// Invalidate the /api/models slash-suggestion cache. Called by panels.js +// after a provider is added or removed so the next /model autocomplete +// rebuilds from a fresh /api/models response (#1539). Returning a function +// rather than letting callers poke the module-local lets/promises directly +// keeps the cache shape encapsulated to this module. +function _invalidateSlashModelCache(){ + _slashModelCache=null; + _slashModelCachePromise=null; +} +// Expose on window when available. Guarded by typeof so the module is +// importable in headless test contexts (vm.runInContext) that don't +// define a window global — see tests/test_cli_only_slash_commands.py. +if(typeof window!=='undefined'){ + window._invalidateSlashModelCache=_invalidateSlashModelCache; +} + function _normalizeSlashSubArg(value){ return String(value||'').trim(); } @@ -120,6 +137,15 @@ async function _loadSlashModelSubArgs(force=false){ const id=_normalizeSlashSubArg(model&&model.id); if(id) values.push(id); } + // Include extra_models (the catalog tail that doesn't render as + //