From ebb4dffc7d6e3069f7efa225bba42b63a344e458 Mon Sep 17 00:00:00 2001 From: AJV20 <24819659+AJV20@users.noreply.github.com> Date: Tue, 19 May 2026 14:50:26 -0400 Subject: [PATCH 1/2] fix: stream live tool callback events --- CHANGELOG.md | 5 ++ api/streaming.py | 71 ++++++++++++++++++++++--- static/boot.js | 17 ++++++ tests/test_command_asset_fallbacks.py | 30 +++++++++++ tests/test_live_tool_callback_events.py | 63 ++++++++++++++++++++++ 5 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 tests/test_command_asset_fallbacks.py create mode 100644 tests/test_live_tool_callback_events.py diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb09c64..f83d2346 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## [Unreleased] +### Fixed + +- Surface live tool activity when Hermes Agent reports tools through its dedicated `tool_start_callback` / `tool_complete_callback` path, so browser chat shows the existing running tool cards instead of appearing idle until the final answer. +- Keep the composer usable if a transient restart/proxy blip prevents `commands.js` from loading while `boot.js` still attaches slash-command listeners. + ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede) diff --git a/api/streaming.py b/api/streaming.py index 9df92b7c..76b92b9e 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -3450,10 +3450,20 @@ def _run_agent_streaming( # closes over it) never captures an unbound name even if this # block is reordered later (Issue #765). _checkpoint_activity = [0] + _live_tool_event_start_ids = set() + _live_tool_event_complete_ids = set() + + def _tool_args_snapshot(args): + args_snap = {} + if isinstance(args, dict): + for k, v in list(args.items())[:4]: + s2 = str(v) + args_snap[k] = s2[:120] + ('...' if len(s2) > 120 else '') + return args_snap def _record_live_tool_start(tool_call_id, name, args): if not tool_call_id or tool_call_id in _live_prompt_estimate_seen_ids: - return + return False _live_prompt_estimate_seen_ids.add(tool_call_id) _tool_call = { 'id': tool_call_id, @@ -3468,10 +3478,11 @@ def _run_agent_streaming( 'content': '', 'tool_calls': [_tool_call], }]) + return True def _record_live_tool_complete(tool_call_id, name, function_result): if not tool_call_id: - return + return False _result_text = _tool_result_snippet(function_result) _bump_live_prompt_estimate([{ 'role': 'tool', @@ -3479,6 +3490,7 @@ def _run_agent_streaming( 'tool_call_id': tool_call_id, 'content': _result_text, }]) + return True def on_tool(*cb_args, **cb_kwargs): nonlocal _reasoning_text @@ -3511,11 +3523,7 @@ def _run_agent_streaming( _emit_metering() return - args_snap = {} - if isinstance(args, dict): - for k, v in list(args.items())[:4]: - s2 = str(v) - args_snap[k] = s2[:120] + ('...' if len(s2) > 120 else '') + args_snap = _tool_args_snapshot(args) if event_type in (None, 'tool.started'): _live_tool_calls.append({ @@ -3591,6 +3599,28 @@ def _run_agent_streaming( def on_tool_start(tool_call_id, name, args): try: _record_live_tool_start(tool_call_id, name, args) + if tool_call_id and tool_call_id not in _live_tool_event_start_ids: + _live_tool_event_start_ids.add(tool_call_id) + _live_tool_calls.append({ + 'name': name, + 'args': args if isinstance(args, dict) else {}, + 'tid': tool_call_id, + }) + # Mirror to shared dict so cancel_stream() can persist it (#1361 §B) + if stream_id in STREAM_LIVE_TOOL_CALLS: + STREAM_LIVE_TOOL_CALLS[stream_id].append({ + 'name': name, + 'args': args if isinstance(args, dict) else {}, + 'done': False, + 'tid': tool_call_id, + }) + put('tool', { + 'event_type': 'tool.started', + 'name': name, + 'preview': None, + 'args': _tool_args_snapshot(args), + 'tid': tool_call_id, + }) _tool_stats = meter().get_stats() _tool_stats['session_id'] = session_id _tool_stats['usage'] = _live_usage_snapshot() @@ -3601,6 +3631,33 @@ def _run_agent_streaming( def on_tool_complete(tool_call_id, name, args, function_result): try: _record_live_tool_complete(tool_call_id, name, function_result) + if tool_call_id and tool_call_id not in _live_tool_event_complete_ids: + _live_tool_event_complete_ids.add(tool_call_id) + result_snippet = _tool_result_snippet(function_result) + for live_tc in reversed(_live_tool_calls): + if live_tc.get('done'): + continue + if live_tc.get('tid') == tool_call_id or (not live_tc.get('tid') and live_tc.get('name') == name): + live_tc['done'] = True + live_tc['snippet'] = result_snippet + break + if stream_id in STREAM_LIVE_TOOL_CALLS: + for shared_tc in reversed(STREAM_LIVE_TOOL_CALLS[stream_id]): + if shared_tc.get('done'): + continue + if shared_tc.get('tid') == tool_call_id or (not shared_tc.get('tid') and shared_tc.get('name') == name): + shared_tc['done'] = True + shared_tc['snippet'] = result_snippet + break + _checkpoint_activity[0] += 1 + put('tool_complete', { + 'event_type': 'tool.completed', + 'name': name, + 'preview': result_snippet, + 'args': _tool_args_snapshot(args), + 'tid': tool_call_id, + 'is_error': False, + }) _tool_stats = meter().get_stats() _tool_stats['session_id'] = session_id _tool_stats['usage'] = _live_usage_snapshot() diff --git a/static/boot.js b/static/boot.js index 9a30fe8e..eda383a4 100644 --- a/static/boot.js +++ b/static/boot.js @@ -1,3 +1,20 @@ +// Slash-command helpers normally come from commands.js, which is loaded before +// boot.js. If a restart/proxy blip makes that asset fail while boot.js loads, +// keep the composer usable instead of throwing ReferenceError on input/keydown. +(function(){ + function dropdown(){ return document.getElementById('cmdDropdown'); } + if(typeof window.hideCmdDropdown!=='function'){ + window.hideCmdDropdown=function(){ + const dd=dropdown(); + if(dd){ dd.classList.remove('open'); dd.style.display='none'; } + }; + } + if(typeof window.showCmdDropdown!=='function') window.showCmdDropdown=function(){}; + if(typeof window.getMatchingCommands!=='function') window.getMatchingCommands=function(){ return []; }; + if(typeof window.navigateCmdDropdown!=='function') window.navigateCmdDropdown=function(){}; + if(typeof window.selectCmdDropdownItem!=='function') window.selectCmdDropdownItem=function(){}; +})(); + async function cancelStream(){ const streamId = S.activeStreamId; if(!streamId) return; diff --git a/tests/test_command_asset_fallbacks.py b/tests/test_command_asset_fallbacks.py new file mode 100644 index 00000000..2d1bcefc --- /dev/null +++ b/tests/test_command_asset_fallbacks.py @@ -0,0 +1,30 @@ +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +def _read(relpath: str) -> str: + return (ROOT / relpath).read_text(encoding="utf-8") + + +def test_boot_installs_safe_slash_command_fallbacks_when_commands_asset_missing(): + boot = _read("static/boot.js") + + assert "If a restart/proxy blip makes that asset fail" in boot + for name in ( + "hideCmdDropdown", + "showCmdDropdown", + "getMatchingCommands", + "navigateCmdDropdown", + "selectCmdDropdownItem", + ): + assert f"typeof window.{name}!==\'function\'" in boot or f"typeof window.{name}!='function'" in boot + + +def test_commands_asset_still_owns_real_dropdown_implementation(): + commands = _read("static/commands.js") + + assert "function hideCmdDropdown()" in commands + assert "function showCmdDropdown(" in commands + assert "function getMatchingCommands(" in commands diff --git a/tests/test_live_tool_callback_events.py b/tests/test_live_tool_callback_events.py new file mode 100644 index 00000000..0e5dd3ee --- /dev/null +++ b/tests/test_live_tool_callback_events.py @@ -0,0 +1,63 @@ +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] + + +def _read(relpath: str) -> str: + return (ROOT / relpath).read_text(encoding="utf-8") + + +def _function_block(src: str, name: str) -> str: + start = src.find(f"def {name}") + assert start != -1, f"{name} not found" + next_def = src.find("\n def ", start + 1) + assert next_def != -1, f"end of {name} not found" + return src[start:next_def] + + +def test_tool_start_callback_emits_existing_tool_sse_event_with_tool_id(): + src = _read("api/streaming.py") + block = _function_block(src, "on_tool_start") + + assert "put('tool'" in block, ( + "The dedicated Hermes Agent tool_start_callback must emit the existing " + "tool SSE event; otherwise WebUI stays visually silent while tools run." + ) + assert "'event_type': 'tool.started'" in block + assert "'tid': tool_call_id" in block, ( + "Live frontend cards need the tool_call_id so tool_complete can update " + "the running card in place." + ) + assert "_live_tool_event_start_ids" in block, ( + "Tool start SSE emission should be idempotent per callback id." + ) + assert "STREAM_LIVE_TOOL_CALLS" in block and "'done': False" in block + + +def test_tool_complete_callback_emits_existing_tool_complete_sse_event_with_tool_id(): + src = _read("api/streaming.py") + block = _function_block(src, "on_tool_complete") + + assert "put('tool_complete'" in block, ( + "The dedicated Hermes Agent tool_complete_callback must emit the existing " + "tool_complete SSE event so the frontend can settle the running tool card." + ) + assert "'event_type': 'tool.completed'" in block + assert "'tid': tool_call_id" in block + assert "_live_tool_event_complete_ids" in block, ( + "Tool completion SSE emission should be idempotent per callback id." + ) + assert "result_snippet = _tool_result_snippet(function_result)" in block + assert "_checkpoint_activity[0] += 1" in block + + +def test_tool_callback_events_keep_existing_frontend_event_contract(): + messages = _read("static/messages.js") + ui = _read("static/ui.js") + + assert "source.addEventListener('tool',e=>{" in messages + assert "source.addEventListener('tool_complete',e=>{" in messages + assert "tid:d.tid" in messages + assert "data-live-tid" in ui + assert "existing.replaceWith(replacement)" in ui From 612fcd30fe1ca218f14cd28321900c6090bc057b Mon Sep 17 00:00:00 2001 From: AJV20 <24819659+AJV20@users.noreply.github.com> Date: Tue, 19 May 2026 18:41:08 -0400 Subject: [PATCH 2/2] fix: avoid duplicate live tool events --- CHANGELOG.md | 1 - api/streaming.py | 10 +++++++++ static/boot.js | 17 -------------- tests/test_command_asset_fallbacks.py | 30 ------------------------- tests/test_live_tool_callback_events.py | 10 +++++++++ 5 files changed, 20 insertions(+), 48 deletions(-) delete mode 100644 tests/test_command_asset_fallbacks.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f83d2346..c9328df5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ ### Fixed - Surface live tool activity when Hermes Agent reports tools through its dedicated `tool_start_callback` / `tool_complete_callback` path, so browser chat shows the existing running tool cards instead of appearing idle until the final answer. -- Keep the composer usable if a transient restart/proxy blip prevents `commands.js` from loading while `boot.js` still attaches slash-command listeners. ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede) diff --git a/api/streaming.py b/api/streaming.py index 76b92b9e..e7ca4a82 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -3525,6 +3525,13 @@ def _run_agent_streaming( args_snap = _tool_args_snapshot(args) + # Modern Hermes Agent builds can call both tool_progress_callback + # and the structured tool_start/tool_complete callbacks for the + # same tool. Prefer the structured path when it is supported so + # the browser receives one tid-tagged tool card per real call. + if event_type in (None, 'tool.started') and 'tool_start_callback' in _agent_params: + return + if event_type in (None, 'tool.started'): _live_tool_calls.append({ 'name': name, @@ -3560,6 +3567,9 @@ def _run_agent_streaming( pass return + if event_type == 'tool.completed' and 'tool_complete_callback' in _agent_params: + return + if event_type == 'tool.completed': for live_tc in reversed(_live_tool_calls): if live_tc.get('done'): diff --git a/static/boot.js b/static/boot.js index eda383a4..9a30fe8e 100644 --- a/static/boot.js +++ b/static/boot.js @@ -1,20 +1,3 @@ -// Slash-command helpers normally come from commands.js, which is loaded before -// boot.js. If a restart/proxy blip makes that asset fail while boot.js loads, -// keep the composer usable instead of throwing ReferenceError on input/keydown. -(function(){ - function dropdown(){ return document.getElementById('cmdDropdown'); } - if(typeof window.hideCmdDropdown!=='function'){ - window.hideCmdDropdown=function(){ - const dd=dropdown(); - if(dd){ dd.classList.remove('open'); dd.style.display='none'; } - }; - } - if(typeof window.showCmdDropdown!=='function') window.showCmdDropdown=function(){}; - if(typeof window.getMatchingCommands!=='function') window.getMatchingCommands=function(){ return []; }; - if(typeof window.navigateCmdDropdown!=='function') window.navigateCmdDropdown=function(){}; - if(typeof window.selectCmdDropdownItem!=='function') window.selectCmdDropdownItem=function(){}; -})(); - async function cancelStream(){ const streamId = S.activeStreamId; if(!streamId) return; diff --git a/tests/test_command_asset_fallbacks.py b/tests/test_command_asset_fallbacks.py deleted file mode 100644 index 2d1bcefc..00000000 --- a/tests/test_command_asset_fallbacks.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[1] - - -def _read(relpath: str) -> str: - return (ROOT / relpath).read_text(encoding="utf-8") - - -def test_boot_installs_safe_slash_command_fallbacks_when_commands_asset_missing(): - boot = _read("static/boot.js") - - assert "If a restart/proxy blip makes that asset fail" in boot - for name in ( - "hideCmdDropdown", - "showCmdDropdown", - "getMatchingCommands", - "navigateCmdDropdown", - "selectCmdDropdownItem", - ): - assert f"typeof window.{name}!==\'function\'" in boot or f"typeof window.{name}!='function'" in boot - - -def test_commands_asset_still_owns_real_dropdown_implementation(): - commands = _read("static/commands.js") - - assert "function hideCmdDropdown()" in commands - assert "function showCmdDropdown(" in commands - assert "function getMatchingCommands(" in commands diff --git a/tests/test_live_tool_callback_events.py b/tests/test_live_tool_callback_events.py index 0e5dd3ee..74c8b343 100644 --- a/tests/test_live_tool_callback_events.py +++ b/tests/test_live_tool_callback_events.py @@ -52,6 +52,16 @@ def test_tool_complete_callback_emits_existing_tool_complete_sse_event_with_tool assert "_checkpoint_activity[0] += 1" in block +def test_legacy_progress_events_are_suppressed_when_structured_callbacks_are_wired(): + src = _read("api/streaming.py") + block = _function_block(src, "on_tool") + + assert "event_type in (None, 'tool.started') and 'tool_start_callback' in _agent_params" in block + assert "event_type == 'tool.completed' and 'tool_complete_callback' in _agent_params" in block + assert block.index("'tool_start_callback' in _agent_params") < block.index("put('tool'") + assert block.index("'tool_complete_callback' in _agent_params") < block.index("put('tool_complete'") + + def test_tool_callback_events_keep_existing_frontend_event_contract(): messages = _read("static/messages.js") ui = _read("static/ui.js")