Stage 405: PR #2794 — feat(runtime): wire runner route selection harness by @Michaelyklam

This commit is contained in:
hermes-agent
2026-05-24 18:26:55 +00:00
3 changed files with 202 additions and 34 deletions
+63 -17
View File
@@ -8463,6 +8463,41 @@ def _start_chat_stream_for_session(
return response
def _runtime_runner_client_factory():
"""Return the runner-local client when a supervised backend exists.
Slice 4d wires the `/api/chat/start` selection point without silently falling
back to the legacy in-process runtime when `runner-local` is explicitly
requested. The supervised runner backend itself is intentionally not created
in this helper yet; a later slice can replace this factory with the concrete
client while keeping the route contract stable.
"""
raise NotImplementedError("runner-local chat backend is not configured")
def _chat_start_response_from_run_start(result):
"""Expose only the legacy browser-facing chat-start response fields."""
payload = dict(getattr(result, "payload", {}) or {})
response = {}
for key in (
"stream_id",
"session_id",
"pending_started_at",
"turn_id",
"title",
"effective_model",
"effective_model_provider",
"error",
"active_stream_id",
"_status",
):
if key in payload:
response[key] = payload[key]
response.setdefault("stream_id", result.stream_id)
response.setdefault("session_id", result.session_id)
return response
def _runtime_adapter_goal_action(goal_args: str) -> str:
"""Return the bounded RuntimeAdapter goal action for WebUI /goal args."""
action = str(goal_args or "").strip().lower()
@@ -8672,10 +8707,12 @@ def _handle_chat_start(handler, body, diag=None):
from api.runtime_adapter import (
LegacyJournalRuntimeAdapter,
StartRunRequest,
build_runtime_adapter,
runtime_adapter_enabled,
runtime_adapter_runner_enabled,
)
if runtime_adapter_enabled():
if runtime_adapter_enabled() or runtime_adapter_runner_enabled():
def _legacy_start_run(request: StartRunRequest) -> dict:
return _start_chat_stream_for_session(
s,
@@ -8688,23 +8725,32 @@ def _handle_chat_start(handler, body, diag=None):
diag=diag,
)
adapter = LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run)
result = adapter.start_run(
StartRunRequest(
session_id=s.session_id,
message=msg,
attachments=attachments,
workspace=workspace,
profile=getattr(s, "profile", None),
provider=model_provider,
model=model,
source="webui",
metadata={"route": "/api/chat/start"},
def _legacy_adapter_factory():
return LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run)
try:
adapter = build_runtime_adapter(
legacy_adapter_factory=_legacy_adapter_factory,
runner_client_factory=_runtime_runner_client_factory,
)
)
response = dict(result.payload)
response.setdefault("stream_id", result.stream_id)
response.setdefault("session_id", result.session_id)
if adapter is None:
raise NotImplementedError("runtime adapter selection returned no adapter")
result = adapter.start_run(
StartRunRequest(
session_id=s.session_id,
message=msg,
attachments=attachments,
workspace=workspace,
profile=getattr(s, "profile", None),
provider=model_provider,
model=model,
source="webui",
metadata={"route": "/api/chat/start"},
)
)
except NotImplementedError as exc:
return j(handler, {"error": str(exc)}, status=501)
response = _chat_start_response_from_run_start(result)
else:
response = _start_chat_stream_for_session(
s,
+59 -7
View File
@@ -4,7 +4,7 @@
- **Author:** @Michaelyklam
- **Updated by:** @franksong2702
- **Created:** 2026-05-11
- **Revised:** 2026-05-21
- **Revised:** 2026-05-23
- **Tracking issue:** [#1925](https://github.com/nesquena/hermes-webui/issues/1925)
## Credit and Scope
@@ -52,7 +52,7 @@ The immediate goal is not to build a sidecar. The immediate goal is to define th
browser contract, classify current runtime state, and gate the first reversible
journal slice.
## Current Gate State — 2026-05-21
## Current Gate State — 2026-05-23
Slice 1 is now past the first active validation gate:
@@ -104,11 +104,14 @@ adapter-seam work:
`runner-local` adapter selection point and `build_runtime_adapter(...)`
factory wiring around an injected runner client. Live browser chat routes still
stay on the legacy backend, and no supervised runner process exists yet.
- The next implementation gate is a supervised/local runner backend proposal and
route-selection harness. It must stay default-off, keep legacy fallback intact,
pass explicit profile/workspace/model payloads instead of mutating WebUI
process globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval
queues / clarify queues under new names.
- #2744 shipped the Slice 4d supervised runner route gate in v0.51.108.
- The next implementation slice is a default-off runner route-selection harness
for `/api/chat/start`. It should only engage when `runner-local` is explicitly
selected, return a bounded not-configured error until a supervised runner
client exists, keep `legacy-direct` / `legacy-journal` fallback intact, pass
explicit profile/workspace/model payloads instead of mutating WebUI process
globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval queues /
clarify queues under new names.
The next gate is runner-backend plumbing, not queue implementation
by default. Queue / continue routing should only move before Slice 4 if a future
@@ -843,6 +846,10 @@ Non-goals for Slice 4c:
#### Slice 4d: Supervised runner backend route gate
Status as of 2026-05-23: shipped in v0.51.108 via #2744. The gate remains a
docs/test contract: it defines the default-off route-selection requirements but
does not itself route live chat to a runner backend.
After `runner-local` selection exists, the next reviewable gate should define the
first supervised/local runner backend and the route-selection harness before live
browser chat can use it. This is still a contract/test slice first: no default-on
@@ -896,6 +903,51 @@ Non-goals for Slice 4d:
- no broad UI/product surface migration; WebUI remains the rich workbench while
only execution ownership moves.
#### Slice 4e: Default-off runner chat-start route-selection harness
The first implementation after the Slice 4d gate should wire the
`/api/chat/start` selection point to the existing `RuntimeAdapter` factory
without adding a supervised runner process yet. The harness must make the
selection behavior explicit: `legacy-direct` stays default, `legacy-journal`
continues to delegate to the legacy in-process stream path, and `runner-local`
does not silently fall back to legacy when no runner client is configured.
Scope:
- route `/api/chat/start` through `build_runtime_adapter(...)` when an adapter
mode is explicitly selected;
- keep the successful browser response whitelisted to legacy-compatible fields
such as `stream_id`, `session_id`, `pending_started_at`, `turn_id`, `title`,
and effective model/provider metadata;
- return a bounded not-configured error for `runner-local` until a supervised
runner client/backend lands;
- pass the existing explicit `StartRunRequest` payload fields across the seam.
Acceptance tests for Slice 4e:
1. **Default remains legacy-direct.** With no adapter env var, `/api/chat/start`
keeps using `_start_chat_stream_for_session(...)` directly.
2. **Legacy-journal remains behavior-preserving.** The flagged legacy adapter
still delegates to the same stream-start helper and preserves the public
response shape.
3. **Runner-local does not fallback silently.** If `runner-local` is selected but
no runner client exists, the route returns a bounded error instead of starting
a WebUI-owned legacy run behind the operator's back.
4. **No adapter-internal response drift.** `run_id`, `status`, and
`active_controls` remain internal until a later contract explicitly exposes
them.
5. **No runtime-surrogate globals.** The harness does not add runner-owned stream,
cancel, approval, clarify, cached-agent, goal, or queue maps to the main WebUI
process.
Non-goals for Slice 4e:
- no supervised runner process yet;
- no default-on runner mode;
- no execution-survives-WebUI-restart claim for production chat turns;
- no removal of `legacy-direct` or `legacy-journal`;
- no server-side queue endpoint or queue scheduler just for adapter symmetry.
## First Meaningful Success Criteria
The first meaningful milestones are deliberately split.
+80 -10
View File
@@ -409,11 +409,31 @@ def test_chat_start_route_selects_adapter_only_when_flag_enabled():
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
assert "runtime_adapter_enabled()" in start_body
assert "runtime_adapter_runner_enabled()" in start_body
assert "build_runtime_adapter(" in start_body
assert "legacy_adapter_factory=_legacy_adapter_factory" in start_body
assert "runner_client_factory=_runtime_runner_client_factory" in start_body
assert "LegacyJournalRuntimeAdapter" in start_body
assert "_start_chat_stream_for_session(" in start_body
assert "HERMES_WEBUI_RUNTIME_ADAPTER" not in start_body, "route should use runtime_adapter_enabled(), not inline env checks"
def test_runner_local_chat_start_selection_does_not_fallback_to_legacy():
routes = importlib.import_module("api.routes")
src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")
start_idx = src.index("def _handle_chat_start")
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
flag_branch = "if runtime_adapter_enabled() or runtime_adapter_runner_enabled():"
assert flag_branch in start_body
assert "except NotImplementedError as exc:" in start_body
assert 'return j(handler, {"error": str(exc)}, status=501)' in start_body
assert "runner-local chat backend is not configured" in src
adapter_branch = start_body[start_body.index(flag_branch):start_body.index("else:", start_body.index(flag_branch))]
assert "_start_chat_stream_for_session(" in adapter_branch, "legacy-journal delegate should still call the legacy path"
assert "runtime_adapter_runner_enabled()" in adapter_branch
def test_chat_start_adapter_path_preserves_legacy_response_shape():
"""The RuntimeAdapter seam must be invisible to /api/chat/start callers.
@@ -422,17 +442,53 @@ def test_chat_start_adapter_path_preserves_legacy_response_shape():
"""
routes = importlib.import_module("api.routes")
src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")
start_idx = src.index("def _handle_chat_start")
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
branch_start = start_body.index("if runtime_adapter_enabled():")
branch_end = start_body.index("else:", branch_start)
adapter_branch = start_body[branch_start:branch_end]
helper_idx = src.index("def _chat_start_response_from_run_start")
helper_body = src[helper_idx:src.index("def _runtime_adapter_goal_action", helper_idx)]
assert 'response.setdefault("stream_id", result.stream_id)' in adapter_branch
assert 'response.setdefault("session_id", result.session_id)' in adapter_branch
assert 'response.setdefault("run_id", result.run_id)' not in adapter_branch
assert 'response.setdefault("status", result.status)' not in adapter_branch
assert 'response.setdefault("active_controls", result.active_controls)' not in adapter_branch
assert '"stream_id",' in helper_body
assert '"session_id",' in helper_body
assert 'response.setdefault("stream_id", result.stream_id)' in helper_body
assert 'response.setdefault("session_id", result.session_id)' in helper_body
assert '"run_id",' not in helper_body
assert '"status",' not in helper_body
assert '"active_controls",' not in helper_body
def test_chat_start_response_from_run_start_filters_adapter_internal_fields():
routes = importlib.import_module("api.routes")
runtime = importlib.import_module("api.runtime_adapter")
response = routes._chat_start_response_from_run_start(
runtime.RunStartResult(
run_id="runner-internal-1",
session_id="s1",
stream_id="runner-stream-1",
status="running",
active_controls=["cancel"],
payload={
"stream_id": "runner-stream-1",
"session_id": "s1",
"pending_started_at": 123.0,
"turn_id": "turn-1",
"title": "Demo",
"effective_model": "gpt-5.5",
"effective_model_provider": "openai-codex",
"run_id": "runner-internal-1",
"status": "running",
"active_controls": ["cancel"],
},
)
)
assert response == {
"stream_id": "runner-stream-1",
"session_id": "s1",
"pending_started_at": 123.0,
"turn_id": "turn-1",
"title": "Demo",
"effective_model": "gpt-5.5",
"effective_model_provider": "openai-codex",
}
def test_rfc_distinguishes_goal_routing_from_queue_route_staging():
@@ -485,6 +541,7 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate():
rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8")
assert "#### Slice 4d: Supervised runner backend route gate" in rfc
assert "Status as of 2026-05-23: shipped in v0.51.108 via #2744" in rfc
assert "After `runner-local` selection exists" in rfc
assert "route-selection harness before live\nbrowser chat can use it" in rfc
assert "Route remains default-off" in rfc
@@ -496,6 +553,19 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate():
assert "WebUI remains the rich workbench while\n only execution ownership moves" in rfc
def test_rfc_defines_slice4e_runner_chat_start_route_selection_harness():
routes = importlib.import_module("api.routes")
rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8")
assert "#### Slice 4e: Default-off runner chat-start route-selection harness" in rfc
assert "route `/api/chat/start` through `build_runtime_adapter(...)`" in rfc
assert "`legacy-direct` stays default" in rfc
assert "`legacy-journal`\ncontinues to delegate to the legacy in-process stream path" in rfc
assert "`runner-local`\ndoes not silently fall back to legacy" in rfc
assert "return a bounded not-configured error for `runner-local`" in rfc
assert "`run_id`, `status`, and\n `active_controls` remain internal" in rfc
assert "no supervised runner process yet" in rfc
def test_runner_runtime_adapter_passes_explicit_start_payload_without_env_mutation(monkeypatch):
runtime = importlib.import_module("api.runtime_adapter")
captured = []