mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 03:00:23 +00:00
Stage 405: PR #2794 — feat(runtime): wire runner route selection harness by @Michaelyklam
This commit is contained in:
+63
-17
@@ -8463,6 +8463,41 @@ def _start_chat_stream_for_session(
|
||||
return response
|
||||
|
||||
|
||||
def _runtime_runner_client_factory():
|
||||
"""Return the runner-local client when a supervised backend exists.
|
||||
|
||||
Slice 4d wires the `/api/chat/start` selection point without silently falling
|
||||
back to the legacy in-process runtime when `runner-local` is explicitly
|
||||
requested. The supervised runner backend itself is intentionally not created
|
||||
in this helper yet; a later slice can replace this factory with the concrete
|
||||
client while keeping the route contract stable.
|
||||
"""
|
||||
raise NotImplementedError("runner-local chat backend is not configured")
|
||||
|
||||
|
||||
def _chat_start_response_from_run_start(result):
|
||||
"""Expose only the legacy browser-facing chat-start response fields."""
|
||||
payload = dict(getattr(result, "payload", {}) or {})
|
||||
response = {}
|
||||
for key in (
|
||||
"stream_id",
|
||||
"session_id",
|
||||
"pending_started_at",
|
||||
"turn_id",
|
||||
"title",
|
||||
"effective_model",
|
||||
"effective_model_provider",
|
||||
"error",
|
||||
"active_stream_id",
|
||||
"_status",
|
||||
):
|
||||
if key in payload:
|
||||
response[key] = payload[key]
|
||||
response.setdefault("stream_id", result.stream_id)
|
||||
response.setdefault("session_id", result.session_id)
|
||||
return response
|
||||
|
||||
|
||||
def _runtime_adapter_goal_action(goal_args: str) -> str:
|
||||
"""Return the bounded RuntimeAdapter goal action for WebUI /goal args."""
|
||||
action = str(goal_args or "").strip().lower()
|
||||
@@ -8672,10 +8707,12 @@ def _handle_chat_start(handler, body, diag=None):
|
||||
from api.runtime_adapter import (
|
||||
LegacyJournalRuntimeAdapter,
|
||||
StartRunRequest,
|
||||
build_runtime_adapter,
|
||||
runtime_adapter_enabled,
|
||||
runtime_adapter_runner_enabled,
|
||||
)
|
||||
|
||||
if runtime_adapter_enabled():
|
||||
if runtime_adapter_enabled() or runtime_adapter_runner_enabled():
|
||||
def _legacy_start_run(request: StartRunRequest) -> dict:
|
||||
return _start_chat_stream_for_session(
|
||||
s,
|
||||
@@ -8688,23 +8725,32 @@ def _handle_chat_start(handler, body, diag=None):
|
||||
diag=diag,
|
||||
)
|
||||
|
||||
adapter = LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run)
|
||||
result = adapter.start_run(
|
||||
StartRunRequest(
|
||||
session_id=s.session_id,
|
||||
message=msg,
|
||||
attachments=attachments,
|
||||
workspace=workspace,
|
||||
profile=getattr(s, "profile", None),
|
||||
provider=model_provider,
|
||||
model=model,
|
||||
source="webui",
|
||||
metadata={"route": "/api/chat/start"},
|
||||
def _legacy_adapter_factory():
|
||||
return LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run)
|
||||
|
||||
try:
|
||||
adapter = build_runtime_adapter(
|
||||
legacy_adapter_factory=_legacy_adapter_factory,
|
||||
runner_client_factory=_runtime_runner_client_factory,
|
||||
)
|
||||
)
|
||||
response = dict(result.payload)
|
||||
response.setdefault("stream_id", result.stream_id)
|
||||
response.setdefault("session_id", result.session_id)
|
||||
if adapter is None:
|
||||
raise NotImplementedError("runtime adapter selection returned no adapter")
|
||||
result = adapter.start_run(
|
||||
StartRunRequest(
|
||||
session_id=s.session_id,
|
||||
message=msg,
|
||||
attachments=attachments,
|
||||
workspace=workspace,
|
||||
profile=getattr(s, "profile", None),
|
||||
provider=model_provider,
|
||||
model=model,
|
||||
source="webui",
|
||||
metadata={"route": "/api/chat/start"},
|
||||
)
|
||||
)
|
||||
except NotImplementedError as exc:
|
||||
return j(handler, {"error": str(exc)}, status=501)
|
||||
response = _chat_start_response_from_run_start(result)
|
||||
else:
|
||||
response = _start_chat_stream_for_session(
|
||||
s,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
- **Author:** @Michaelyklam
|
||||
- **Updated by:** @franksong2702
|
||||
- **Created:** 2026-05-11
|
||||
- **Revised:** 2026-05-21
|
||||
- **Revised:** 2026-05-23
|
||||
- **Tracking issue:** [#1925](https://github.com/nesquena/hermes-webui/issues/1925)
|
||||
|
||||
## Credit and Scope
|
||||
@@ -52,7 +52,7 @@ The immediate goal is not to build a sidecar. The immediate goal is to define th
|
||||
browser contract, classify current runtime state, and gate the first reversible
|
||||
journal slice.
|
||||
|
||||
## Current Gate State — 2026-05-21
|
||||
## Current Gate State — 2026-05-23
|
||||
|
||||
Slice 1 is now past the first active validation gate:
|
||||
|
||||
@@ -104,11 +104,14 @@ adapter-seam work:
|
||||
`runner-local` adapter selection point and `build_runtime_adapter(...)`
|
||||
factory wiring around an injected runner client. Live browser chat routes still
|
||||
stay on the legacy backend, and no supervised runner process exists yet.
|
||||
- The next implementation gate is a supervised/local runner backend proposal and
|
||||
route-selection harness. It must stay default-off, keep legacy fallback intact,
|
||||
pass explicit profile/workspace/model payloads instead of mutating WebUI
|
||||
process globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval
|
||||
queues / clarify queues under new names.
|
||||
- #2744 shipped the Slice 4d supervised runner route gate in v0.51.108.
|
||||
- The next implementation slice is a default-off runner route-selection harness
|
||||
for `/api/chat/start`. It should only engage when `runner-local` is explicitly
|
||||
selected, return a bounded not-configured error until a supervised runner
|
||||
client exists, keep `legacy-direct` / `legacy-journal` fallback intact, pass
|
||||
explicit profile/workspace/model payloads instead of mutating WebUI process
|
||||
globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval queues /
|
||||
clarify queues under new names.
|
||||
|
||||
The next gate is runner-backend plumbing, not queue implementation
|
||||
by default. Queue / continue routing should only move before Slice 4 if a future
|
||||
@@ -843,6 +846,10 @@ Non-goals for Slice 4c:
|
||||
|
||||
#### Slice 4d: Supervised runner backend route gate
|
||||
|
||||
Status as of 2026-05-23: shipped in v0.51.108 via #2744. The gate remains a
|
||||
docs/test contract: it defines the default-off route-selection requirements but
|
||||
does not itself route live chat to a runner backend.
|
||||
|
||||
After `runner-local` selection exists, the next reviewable gate should define the
|
||||
first supervised/local runner backend and the route-selection harness before live
|
||||
browser chat can use it. This is still a contract/test slice first: no default-on
|
||||
@@ -896,6 +903,51 @@ Non-goals for Slice 4d:
|
||||
- no broad UI/product surface migration; WebUI remains the rich workbench while
|
||||
only execution ownership moves.
|
||||
|
||||
#### Slice 4e: Default-off runner chat-start route-selection harness
|
||||
|
||||
The first implementation after the Slice 4d gate should wire the
|
||||
`/api/chat/start` selection point to the existing `RuntimeAdapter` factory
|
||||
without adding a supervised runner process yet. The harness must make the
|
||||
selection behavior explicit: `legacy-direct` stays default, `legacy-journal`
|
||||
continues to delegate to the legacy in-process stream path, and `runner-local`
|
||||
does not silently fall back to legacy when no runner client is configured.
|
||||
|
||||
Scope:
|
||||
|
||||
- route `/api/chat/start` through `build_runtime_adapter(...)` when an adapter
|
||||
mode is explicitly selected;
|
||||
- keep the successful browser response whitelisted to legacy-compatible fields
|
||||
such as `stream_id`, `session_id`, `pending_started_at`, `turn_id`, `title`,
|
||||
and effective model/provider metadata;
|
||||
- return a bounded not-configured error for `runner-local` until a supervised
|
||||
runner client/backend lands;
|
||||
- pass the existing explicit `StartRunRequest` payload fields across the seam.
|
||||
|
||||
Acceptance tests for Slice 4e:
|
||||
|
||||
1. **Default remains legacy-direct.** With no adapter env var, `/api/chat/start`
|
||||
keeps using `_start_chat_stream_for_session(...)` directly.
|
||||
2. **Legacy-journal remains behavior-preserving.** The flagged legacy adapter
|
||||
still delegates to the same stream-start helper and preserves the public
|
||||
response shape.
|
||||
3. **Runner-local does not fallback silently.** If `runner-local` is selected but
|
||||
no runner client exists, the route returns a bounded error instead of starting
|
||||
a WebUI-owned legacy run behind the operator's back.
|
||||
4. **No adapter-internal response drift.** `run_id`, `status`, and
|
||||
`active_controls` remain internal until a later contract explicitly exposes
|
||||
them.
|
||||
5. **No runtime-surrogate globals.** The harness does not add runner-owned stream,
|
||||
cancel, approval, clarify, cached-agent, goal, or queue maps to the main WebUI
|
||||
process.
|
||||
|
||||
Non-goals for Slice 4e:
|
||||
|
||||
- no supervised runner process yet;
|
||||
- no default-on runner mode;
|
||||
- no execution-survives-WebUI-restart claim for production chat turns;
|
||||
- no removal of `legacy-direct` or `legacy-journal`;
|
||||
- no server-side queue endpoint or queue scheduler just for adapter symmetry.
|
||||
|
||||
## First Meaningful Success Criteria
|
||||
|
||||
The first meaningful milestones are deliberately split.
|
||||
|
||||
@@ -409,11 +409,31 @@ def test_chat_start_route_selects_adapter_only_when_flag_enabled():
|
||||
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
|
||||
|
||||
assert "runtime_adapter_enabled()" in start_body
|
||||
assert "runtime_adapter_runner_enabled()" in start_body
|
||||
assert "build_runtime_adapter(" in start_body
|
||||
assert "legacy_adapter_factory=_legacy_adapter_factory" in start_body
|
||||
assert "runner_client_factory=_runtime_runner_client_factory" in start_body
|
||||
assert "LegacyJournalRuntimeAdapter" in start_body
|
||||
assert "_start_chat_stream_for_session(" in start_body
|
||||
assert "HERMES_WEBUI_RUNTIME_ADAPTER" not in start_body, "route should use runtime_adapter_enabled(), not inline env checks"
|
||||
|
||||
|
||||
def test_runner_local_chat_start_selection_does_not_fallback_to_legacy():
|
||||
routes = importlib.import_module("api.routes")
|
||||
src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")
|
||||
start_idx = src.index("def _handle_chat_start")
|
||||
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
|
||||
|
||||
flag_branch = "if runtime_adapter_enabled() or runtime_adapter_runner_enabled():"
|
||||
assert flag_branch in start_body
|
||||
assert "except NotImplementedError as exc:" in start_body
|
||||
assert 'return j(handler, {"error": str(exc)}, status=501)' in start_body
|
||||
assert "runner-local chat backend is not configured" in src
|
||||
adapter_branch = start_body[start_body.index(flag_branch):start_body.index("else:", start_body.index(flag_branch))]
|
||||
assert "_start_chat_stream_for_session(" in adapter_branch, "legacy-journal delegate should still call the legacy path"
|
||||
assert "runtime_adapter_runner_enabled()" in adapter_branch
|
||||
|
||||
|
||||
def test_chat_start_adapter_path_preserves_legacy_response_shape():
|
||||
"""The RuntimeAdapter seam must be invisible to /api/chat/start callers.
|
||||
|
||||
@@ -422,17 +442,53 @@ def test_chat_start_adapter_path_preserves_legacy_response_shape():
|
||||
"""
|
||||
routes = importlib.import_module("api.routes")
|
||||
src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")
|
||||
start_idx = src.index("def _handle_chat_start")
|
||||
start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)]
|
||||
branch_start = start_body.index("if runtime_adapter_enabled():")
|
||||
branch_end = start_body.index("else:", branch_start)
|
||||
adapter_branch = start_body[branch_start:branch_end]
|
||||
helper_idx = src.index("def _chat_start_response_from_run_start")
|
||||
helper_body = src[helper_idx:src.index("def _runtime_adapter_goal_action", helper_idx)]
|
||||
|
||||
assert 'response.setdefault("stream_id", result.stream_id)' in adapter_branch
|
||||
assert 'response.setdefault("session_id", result.session_id)' in adapter_branch
|
||||
assert 'response.setdefault("run_id", result.run_id)' not in adapter_branch
|
||||
assert 'response.setdefault("status", result.status)' not in adapter_branch
|
||||
assert 'response.setdefault("active_controls", result.active_controls)' not in adapter_branch
|
||||
assert '"stream_id",' in helper_body
|
||||
assert '"session_id",' in helper_body
|
||||
assert 'response.setdefault("stream_id", result.stream_id)' in helper_body
|
||||
assert 'response.setdefault("session_id", result.session_id)' in helper_body
|
||||
assert '"run_id",' not in helper_body
|
||||
assert '"status",' not in helper_body
|
||||
assert '"active_controls",' not in helper_body
|
||||
|
||||
|
||||
def test_chat_start_response_from_run_start_filters_adapter_internal_fields():
|
||||
routes = importlib.import_module("api.routes")
|
||||
runtime = importlib.import_module("api.runtime_adapter")
|
||||
|
||||
response = routes._chat_start_response_from_run_start(
|
||||
runtime.RunStartResult(
|
||||
run_id="runner-internal-1",
|
||||
session_id="s1",
|
||||
stream_id="runner-stream-1",
|
||||
status="running",
|
||||
active_controls=["cancel"],
|
||||
payload={
|
||||
"stream_id": "runner-stream-1",
|
||||
"session_id": "s1",
|
||||
"pending_started_at": 123.0,
|
||||
"turn_id": "turn-1",
|
||||
"title": "Demo",
|
||||
"effective_model": "gpt-5.5",
|
||||
"effective_model_provider": "openai-codex",
|
||||
"run_id": "runner-internal-1",
|
||||
"status": "running",
|
||||
"active_controls": ["cancel"],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
assert response == {
|
||||
"stream_id": "runner-stream-1",
|
||||
"session_id": "s1",
|
||||
"pending_started_at": 123.0,
|
||||
"turn_id": "turn-1",
|
||||
"title": "Demo",
|
||||
"effective_model": "gpt-5.5",
|
||||
"effective_model_provider": "openai-codex",
|
||||
}
|
||||
|
||||
|
||||
def test_rfc_distinguishes_goal_routing_from_queue_route_staging():
|
||||
@@ -485,6 +541,7 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate():
|
||||
rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8")
|
||||
|
||||
assert "#### Slice 4d: Supervised runner backend route gate" in rfc
|
||||
assert "Status as of 2026-05-23: shipped in v0.51.108 via #2744" in rfc
|
||||
assert "After `runner-local` selection exists" in rfc
|
||||
assert "route-selection harness before live\nbrowser chat can use it" in rfc
|
||||
assert "Route remains default-off" in rfc
|
||||
@@ -496,6 +553,19 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate():
|
||||
assert "WebUI remains the rich workbench while\n only execution ownership moves" in rfc
|
||||
|
||||
|
||||
def test_rfc_defines_slice4e_runner_chat_start_route_selection_harness():
|
||||
routes = importlib.import_module("api.routes")
|
||||
rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8")
|
||||
|
||||
assert "#### Slice 4e: Default-off runner chat-start route-selection harness" in rfc
|
||||
assert "route `/api/chat/start` through `build_runtime_adapter(...)`" in rfc
|
||||
assert "`legacy-direct` stays default" in rfc
|
||||
assert "`legacy-journal`\ncontinues to delegate to the legacy in-process stream path" in rfc
|
||||
assert "`runner-local`\ndoes not silently fall back to legacy" in rfc
|
||||
assert "return a bounded not-configured error for `runner-local`" in rfc
|
||||
assert "`run_id`, `status`, and\n `active_controls` remain internal" in rfc
|
||||
assert "no supervised runner process yet" in rfc
|
||||
|
||||
def test_runner_runtime_adapter_passes_explicit_start_payload_without_env_mutation(monkeypatch):
|
||||
runtime = importlib.import_module("api.runtime_adapter")
|
||||
captured = []
|
||||
|
||||
Reference in New Issue
Block a user