From dd7648d56c52d7519878f9ccbfcc5867221b5fcf Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sat, 23 May 2026 04:08:13 -0700 Subject: [PATCH] feat(runtime): wire runner route selection harness --- api/routes.py | 80 ++++++++++++++++----- docs/rfcs/hermes-run-adapter-contract.md | 66 +++++++++++++++-- tests/test_runtime_adapter_seam.py | 90 +++++++++++++++++++++--- 3 files changed, 202 insertions(+), 34 deletions(-) diff --git a/api/routes.py b/api/routes.py index b6ee042c..64fa3d9e 100644 --- a/api/routes.py +++ b/api/routes.py @@ -8463,6 +8463,41 @@ def _start_chat_stream_for_session( return response +def _runtime_runner_client_factory(): + """Return the runner-local client when a supervised backend exists. + + Slice 4d wires the `/api/chat/start` selection point without silently falling + back to the legacy in-process runtime when `runner-local` is explicitly + requested. The supervised runner backend itself is intentionally not created + in this helper yet; a later slice can replace this factory with the concrete + client while keeping the route contract stable. + """ + raise NotImplementedError("runner-local chat backend is not configured") + + +def _chat_start_response_from_run_start(result): + """Expose only the legacy browser-facing chat-start response fields.""" + payload = dict(getattr(result, "payload", {}) or {}) + response = {} + for key in ( + "stream_id", + "session_id", + "pending_started_at", + "turn_id", + "title", + "effective_model", + "effective_model_provider", + "error", + "active_stream_id", + "_status", + ): + if key in payload: + response[key] = payload[key] + response.setdefault("stream_id", result.stream_id) + response.setdefault("session_id", result.session_id) + return response + + def _runtime_adapter_goal_action(goal_args: str) -> str: """Return the bounded RuntimeAdapter goal action for WebUI /goal args.""" action = str(goal_args or "").strip().lower() @@ -8672,10 +8707,12 @@ def _handle_chat_start(handler, body, diag=None): from api.runtime_adapter import ( LegacyJournalRuntimeAdapter, StartRunRequest, + build_runtime_adapter, runtime_adapter_enabled, + runtime_adapter_runner_enabled, ) - if runtime_adapter_enabled(): + if runtime_adapter_enabled() or runtime_adapter_runner_enabled(): def _legacy_start_run(request: StartRunRequest) -> dict: return _start_chat_stream_for_session( s, @@ -8688,23 +8725,32 @@ def _handle_chat_start(handler, body, diag=None): diag=diag, ) - adapter = LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run) - result = adapter.start_run( - StartRunRequest( - session_id=s.session_id, - message=msg, - attachments=attachments, - workspace=workspace, - profile=getattr(s, "profile", None), - provider=model_provider, - model=model, - source="webui", - metadata={"route": "/api/chat/start"}, + def _legacy_adapter_factory(): + return LegacyJournalRuntimeAdapter(start_run_delegate=_legacy_start_run) + + try: + adapter = build_runtime_adapter( + legacy_adapter_factory=_legacy_adapter_factory, + runner_client_factory=_runtime_runner_client_factory, ) - ) - response = dict(result.payload) - response.setdefault("stream_id", result.stream_id) - response.setdefault("session_id", result.session_id) + if adapter is None: + raise NotImplementedError("runtime adapter selection returned no adapter") + result = adapter.start_run( + StartRunRequest( + session_id=s.session_id, + message=msg, + attachments=attachments, + workspace=workspace, + profile=getattr(s, "profile", None), + provider=model_provider, + model=model, + source="webui", + metadata={"route": "/api/chat/start"}, + ) + ) + except NotImplementedError as exc: + return j(handler, {"error": str(exc)}, status=501) + response = _chat_start_response_from_run_start(result) else: response = _start_chat_stream_for_session( s, diff --git a/docs/rfcs/hermes-run-adapter-contract.md b/docs/rfcs/hermes-run-adapter-contract.md index 9732044e..71ef4da2 100644 --- a/docs/rfcs/hermes-run-adapter-contract.md +++ b/docs/rfcs/hermes-run-adapter-contract.md @@ -4,7 +4,7 @@ - **Author:** @Michaelyklam - **Updated by:** @franksong2702 - **Created:** 2026-05-11 -- **Revised:** 2026-05-21 +- **Revised:** 2026-05-23 - **Tracking issue:** [#1925](https://github.com/nesquena/hermes-webui/issues/1925) ## Credit and Scope @@ -52,7 +52,7 @@ The immediate goal is not to build a sidecar. The immediate goal is to define th browser contract, classify current runtime state, and gate the first reversible journal slice. -## Current Gate State — 2026-05-21 +## Current Gate State — 2026-05-23 Slice 1 is now past the first active validation gate: @@ -104,11 +104,14 @@ adapter-seam work: `runner-local` adapter selection point and `build_runtime_adapter(...)` factory wiring around an injected runner client. Live browser chat routes still stay on the legacy backend, and no supervised runner process exists yet. -- The next implementation gate is a supervised/local runner backend proposal and - route-selection harness. It must stay default-off, keep legacy fallback intact, - pass explicit profile/workspace/model payloads instead of mutating WebUI - process globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval - queues / clarify queues under new names. +- #2744 shipped the Slice 4d supervised runner route gate in v0.51.108. +- The next implementation slice is a default-off runner route-selection harness + for `/api/chat/start`. It should only engage when `runner-local` is explicitly + selected, return a bounded not-configured error until a supervised runner + client exists, keep `legacy-direct` / `legacy-journal` fallback intact, pass + explicit profile/workspace/model payloads instead of mutating WebUI process + globals, and avoid recreating `STREAMS` / `CANCEL_FLAGS` / approval queues / + clarify queues under new names. The next gate is runner-backend plumbing, not queue implementation by default. Queue / continue routing should only move before Slice 4 if a future @@ -843,6 +846,10 @@ Non-goals for Slice 4c: #### Slice 4d: Supervised runner backend route gate +Status as of 2026-05-23: shipped in v0.51.108 via #2744. The gate remains a +docs/test contract: it defines the default-off route-selection requirements but +does not itself route live chat to a runner backend. + After `runner-local` selection exists, the next reviewable gate should define the first supervised/local runner backend and the route-selection harness before live browser chat can use it. This is still a contract/test slice first: no default-on @@ -896,6 +903,51 @@ Non-goals for Slice 4d: - no broad UI/product surface migration; WebUI remains the rich workbench while only execution ownership moves. +#### Slice 4e: Default-off runner chat-start route-selection harness + +The first implementation after the Slice 4d gate should wire the +`/api/chat/start` selection point to the existing `RuntimeAdapter` factory +without adding a supervised runner process yet. The harness must make the +selection behavior explicit: `legacy-direct` stays default, `legacy-journal` +continues to delegate to the legacy in-process stream path, and `runner-local` +does not silently fall back to legacy when no runner client is configured. + +Scope: + +- route `/api/chat/start` through `build_runtime_adapter(...)` when an adapter + mode is explicitly selected; +- keep the successful browser response whitelisted to legacy-compatible fields + such as `stream_id`, `session_id`, `pending_started_at`, `turn_id`, `title`, + and effective model/provider metadata; +- return a bounded not-configured error for `runner-local` until a supervised + runner client/backend lands; +- pass the existing explicit `StartRunRequest` payload fields across the seam. + +Acceptance tests for Slice 4e: + +1. **Default remains legacy-direct.** With no adapter env var, `/api/chat/start` + keeps using `_start_chat_stream_for_session(...)` directly. +2. **Legacy-journal remains behavior-preserving.** The flagged legacy adapter + still delegates to the same stream-start helper and preserves the public + response shape. +3. **Runner-local does not fallback silently.** If `runner-local` is selected but + no runner client exists, the route returns a bounded error instead of starting + a WebUI-owned legacy run behind the operator's back. +4. **No adapter-internal response drift.** `run_id`, `status`, and + `active_controls` remain internal until a later contract explicitly exposes + them. +5. **No runtime-surrogate globals.** The harness does not add runner-owned stream, + cancel, approval, clarify, cached-agent, goal, or queue maps to the main WebUI + process. + +Non-goals for Slice 4e: + +- no supervised runner process yet; +- no default-on runner mode; +- no execution-survives-WebUI-restart claim for production chat turns; +- no removal of `legacy-direct` or `legacy-journal`; +- no server-side queue endpoint or queue scheduler just for adapter symmetry. + ## First Meaningful Success Criteria The first meaningful milestones are deliberately split. diff --git a/tests/test_runtime_adapter_seam.py b/tests/test_runtime_adapter_seam.py index 980a7b67..99de8d98 100644 --- a/tests/test_runtime_adapter_seam.py +++ b/tests/test_runtime_adapter_seam.py @@ -409,11 +409,31 @@ def test_chat_start_route_selects_adapter_only_when_flag_enabled(): start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)] assert "runtime_adapter_enabled()" in start_body + assert "runtime_adapter_runner_enabled()" in start_body + assert "build_runtime_adapter(" in start_body + assert "legacy_adapter_factory=_legacy_adapter_factory" in start_body + assert "runner_client_factory=_runtime_runner_client_factory" in start_body assert "LegacyJournalRuntimeAdapter" in start_body assert "_start_chat_stream_for_session(" in start_body assert "HERMES_WEBUI_RUNTIME_ADAPTER" not in start_body, "route should use runtime_adapter_enabled(), not inline env checks" +def test_runner_local_chat_start_selection_does_not_fallback_to_legacy(): + routes = importlib.import_module("api.routes") + src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8") + start_idx = src.index("def _handle_chat_start") + start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)] + + flag_branch = "if runtime_adapter_enabled() or runtime_adapter_runner_enabled():" + assert flag_branch in start_body + assert "except NotImplementedError as exc:" in start_body + assert 'return j(handler, {"error": str(exc)}, status=501)' in start_body + assert "runner-local chat backend is not configured" in src + adapter_branch = start_body[start_body.index(flag_branch):start_body.index("else:", start_body.index(flag_branch))] + assert "_start_chat_stream_for_session(" in adapter_branch, "legacy-journal delegate should still call the legacy path" + assert "runtime_adapter_runner_enabled()" in adapter_branch + + def test_chat_start_adapter_path_preserves_legacy_response_shape(): """The RuntimeAdapter seam must be invisible to /api/chat/start callers. @@ -422,17 +442,53 @@ def test_chat_start_adapter_path_preserves_legacy_response_shape(): """ routes = importlib.import_module("api.routes") src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8") - start_idx = src.index("def _handle_chat_start") - start_body = src[start_idx:src.index("def _resolve_chat_workspace_with_recovery", start_idx)] - branch_start = start_body.index("if runtime_adapter_enabled():") - branch_end = start_body.index("else:", branch_start) - adapter_branch = start_body[branch_start:branch_end] + helper_idx = src.index("def _chat_start_response_from_run_start") + helper_body = src[helper_idx:src.index("def _runtime_adapter_goal_action", helper_idx)] - assert 'response.setdefault("stream_id", result.stream_id)' in adapter_branch - assert 'response.setdefault("session_id", result.session_id)' in adapter_branch - assert 'response.setdefault("run_id", result.run_id)' not in adapter_branch - assert 'response.setdefault("status", result.status)' not in adapter_branch - assert 'response.setdefault("active_controls", result.active_controls)' not in adapter_branch + assert '"stream_id",' in helper_body + assert '"session_id",' in helper_body + assert 'response.setdefault("stream_id", result.stream_id)' in helper_body + assert 'response.setdefault("session_id", result.session_id)' in helper_body + assert '"run_id",' not in helper_body + assert '"status",' not in helper_body + assert '"active_controls",' not in helper_body + + +def test_chat_start_response_from_run_start_filters_adapter_internal_fields(): + routes = importlib.import_module("api.routes") + runtime = importlib.import_module("api.runtime_adapter") + + response = routes._chat_start_response_from_run_start( + runtime.RunStartResult( + run_id="runner-internal-1", + session_id="s1", + stream_id="runner-stream-1", + status="running", + active_controls=["cancel"], + payload={ + "stream_id": "runner-stream-1", + "session_id": "s1", + "pending_started_at": 123.0, + "turn_id": "turn-1", + "title": "Demo", + "effective_model": "gpt-5.5", + "effective_model_provider": "openai-codex", + "run_id": "runner-internal-1", + "status": "running", + "active_controls": ["cancel"], + }, + ) + ) + + assert response == { + "stream_id": "runner-stream-1", + "session_id": "s1", + "pending_started_at": 123.0, + "turn_id": "turn-1", + "title": "Demo", + "effective_model": "gpt-5.5", + "effective_model_provider": "openai-codex", + } def test_rfc_distinguishes_goal_routing_from_queue_route_staging(): @@ -485,6 +541,7 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate(): rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8") assert "#### Slice 4d: Supervised runner backend route gate" in rfc + assert "Status as of 2026-05-23: shipped in v0.51.108 via #2744" in rfc assert "After `runner-local` selection exists" in rfc assert "route-selection harness before live\nbrowser chat can use it" in rfc assert "Route remains default-off" in rfc @@ -496,6 +553,19 @@ def test_rfc_defines_slice4d_supervised_runner_route_gate(): assert "WebUI remains the rich workbench while\n only execution ownership moves" in rfc +def test_rfc_defines_slice4e_runner_chat_start_route_selection_harness(): + routes = importlib.import_module("api.routes") + rfc = (routes.Path(__file__).parent.parent / "docs" / "rfcs" / "hermes-run-adapter-contract.md").read_text(encoding="utf-8") + + assert "#### Slice 4e: Default-off runner chat-start route-selection harness" in rfc + assert "route `/api/chat/start` through `build_runtime_adapter(...)`" in rfc + assert "`legacy-direct` stays default" in rfc + assert "`legacy-journal`\ncontinues to delegate to the legacy in-process stream path" in rfc + assert "`runner-local`\ndoes not silently fall back to legacy" in rfc + assert "return a bounded not-configured error for `runner-local`" in rfc + assert "`run_id`, `status`, and\n `active_controls` remain internal" in rfc + assert "no supervised runner process yet" in rfc + def test_runner_runtime_adapter_passes_explicit_start_payload_without_env_mutation(monkeypatch): runtime = importlib.import_module("api.runtime_adapter") captured = []