From ebb4dffc7d6e3069f7efa225bba42b63a344e458 Mon Sep 17 00:00:00 2001
From: AJV20 <24819659+AJV20@users.noreply.github.com>
Date: Tue, 19 May 2026 14:50:26 -0400
Subject: [PATCH 1/2] fix: stream live tool callback events

---
 CHANGELOG.md                            |  5 ++
 api/streaming.py                        | 71 ++++++++++++++++++++++---
 static/boot.js                          | 17 ++++++
 tests/test_command_asset_fallbacks.py   | 30 +++++++++++
 tests/test_live_tool_callback_events.py | 63 ++++++++++++++++++++++
 5 files changed, 179 insertions(+), 7 deletions(-)
 create mode 100644 tests/test_command_asset_fallbacks.py
 create mode 100644 tests/test_live_tool_callback_events.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfb09c64..f83d2346 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- Surface live tool activity when Hermes Agent reports tools through its dedicated `tool_start_callback` / `tool_complete_callback` path, so browser chat shows the existing running tool cards instead of appearing idle until the final answer.
+- Keep the composer usable if a transient restart/proxy blip prevents `commands.js` from loading while `boot.js` still attaches slash-command listeners.
+
 
 ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede)
 
diff --git a/api/streaming.py b/api/streaming.py
index 9df92b7c..76b92b9e 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -3450,10 +3450,20 @@ def _run_agent_streaming(
             # closes over it) never captures an unbound name even if this
             # block is reordered later (Issue #765).
             _checkpoint_activity = [0]
+            _live_tool_event_start_ids = set()
+            _live_tool_event_complete_ids = set()
+
+            def _tool_args_snapshot(args):
+                args_snap = {}
+                if isinstance(args, dict):
+                    for k, v in list(args.items())[:4]:
+                        s2 = str(v)
+                        args_snap[k] = s2[:120] + ('...' if len(s2) > 120 else '')
+                return args_snap
 
             def _record_live_tool_start(tool_call_id, name, args):
                 if not tool_call_id or tool_call_id in _live_prompt_estimate_seen_ids:
-                    return
+                    return False
                 _live_prompt_estimate_seen_ids.add(tool_call_id)
                 _tool_call = {
                     'id': tool_call_id,
@@ -3468,10 +3478,11 @@ def _run_agent_streaming(
                     'content': '',
                     'tool_calls': [_tool_call],
                 }])
+                return True
 
             def _record_live_tool_complete(tool_call_id, name, function_result):
                 if not tool_call_id:
-                    return
+                    return False
                 _result_text = _tool_result_snippet(function_result)
                 _bump_live_prompt_estimate([{
                     'role': 'tool',
@@ -3479,6 +3490,7 @@ def _run_agent_streaming(
                     'tool_call_id': tool_call_id,
                     'content': _result_text,
                 }])
+                return True
 
             def on_tool(*cb_args, **cb_kwargs):
                 nonlocal _reasoning_text
@@ -3511,11 +3523,7 @@ def _run_agent_streaming(
                         _emit_metering()
                     return
 
-                args_snap = {}
-                if isinstance(args, dict):
-                    for k, v in list(args.items())[:4]:
-                        s2 = str(v)
-                        args_snap[k] = s2[:120] + ('...' if len(s2) > 120 else '')
+                args_snap = _tool_args_snapshot(args)
 
                 if event_type in (None, 'tool.started'):
                     _live_tool_calls.append({
@@ -3591,6 +3599,28 @@ def _run_agent_streaming(
             def on_tool_start(tool_call_id, name, args):
                 try:
                     _record_live_tool_start(tool_call_id, name, args)
+                    if tool_call_id and tool_call_id not in _live_tool_event_start_ids:
+                        _live_tool_event_start_ids.add(tool_call_id)
+                        _live_tool_calls.append({
+                            'name': name,
+                            'args': args if isinstance(args, dict) else {},
+                            'tid': tool_call_id,
+                        })
+                        # Mirror to shared dict so cancel_stream() can persist it (#1361 §B)
+                        if stream_id in STREAM_LIVE_TOOL_CALLS:
+                            STREAM_LIVE_TOOL_CALLS[stream_id].append({
+                                'name': name,
+                                'args': args if isinstance(args, dict) else {},
+                                'done': False,
+                                'tid': tool_call_id,
+                            })
+                        put('tool', {
+                            'event_type': 'tool.started',
+                            'name': name,
+                            'preview': None,
+                            'args': _tool_args_snapshot(args),
+                            'tid': tool_call_id,
+                        })
                     _tool_stats = meter().get_stats()
                     _tool_stats['session_id'] = session_id
                     _tool_stats['usage'] = _live_usage_snapshot()
@@ -3601,6 +3631,33 @@ def _run_agent_streaming(
             def on_tool_complete(tool_call_id, name, args, function_result):
                 try:
                     _record_live_tool_complete(tool_call_id, name, function_result)
+                    if tool_call_id and tool_call_id not in _live_tool_event_complete_ids:
+                        _live_tool_event_complete_ids.add(tool_call_id)
+                        result_snippet = _tool_result_snippet(function_result)
+                        for live_tc in reversed(_live_tool_calls):
+                            if live_tc.get('done'):
+                                continue
+                            if live_tc.get('tid') == tool_call_id or (not live_tc.get('tid') and live_tc.get('name') == name):
+                                live_tc['done'] = True
+                                live_tc['snippet'] = result_snippet
+                                break
+                        if stream_id in STREAM_LIVE_TOOL_CALLS:
+                            for shared_tc in reversed(STREAM_LIVE_TOOL_CALLS[stream_id]):
+                                if shared_tc.get('done'):
+                                    continue
+                                if shared_tc.get('tid') == tool_call_id or (not shared_tc.get('tid') and shared_tc.get('name') == name):
+                                    shared_tc['done'] = True
+                                    shared_tc['snippet'] = result_snippet
+                                    break
+                        _checkpoint_activity[0] += 1
+                        put('tool_complete', {
+                            'event_type': 'tool.completed',
+                            'name': name,
+                            'preview': result_snippet,
+                            'args': _tool_args_snapshot(args),
+                            'tid': tool_call_id,
+                            'is_error': False,
+                        })
                     _tool_stats = meter().get_stats()
                     _tool_stats['session_id'] = session_id
                     _tool_stats['usage'] = _live_usage_snapshot()
diff --git a/static/boot.js b/static/boot.js
index 9a30fe8e..eda383a4 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1,3 +1,20 @@
+// Slash-command helpers normally come from commands.js, which is loaded before
+// boot.js. If a restart/proxy blip makes that asset fail while boot.js loads,
+// keep the composer usable instead of throwing ReferenceError on input/keydown.
+(function(){
+  function dropdown(){ return document.getElementById('cmdDropdown'); }
+  if(typeof window.hideCmdDropdown!=='function'){
+    window.hideCmdDropdown=function(){
+      const dd=dropdown();
+      if(dd){ dd.classList.remove('open'); dd.style.display='none'; }
+    };
+  }
+  if(typeof window.showCmdDropdown!=='function') window.showCmdDropdown=function(){};
+  if(typeof window.getMatchingCommands!=='function') window.getMatchingCommands=function(){ return []; };
+  if(typeof window.navigateCmdDropdown!=='function') window.navigateCmdDropdown=function(){};
+  if(typeof window.selectCmdDropdownItem!=='function') window.selectCmdDropdownItem=function(){};
+})();
+
 async function cancelStream(){
   const streamId = S.activeStreamId;
   if(!streamId) return;
diff --git a/tests/test_command_asset_fallbacks.py b/tests/test_command_asset_fallbacks.py
new file mode 100644
index 00000000..2d1bcefc
--- /dev/null
+++ b/tests/test_command_asset_fallbacks.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def _read(relpath: str) -> str:
+    return (ROOT / relpath).read_text(encoding="utf-8")
+
+
+def test_boot_installs_safe_slash_command_fallbacks_when_commands_asset_missing():
+    boot = _read("static/boot.js")
+
+    assert "If a restart/proxy blip makes that asset fail" in boot
+    for name in (
+        "hideCmdDropdown",
+        "showCmdDropdown",
+        "getMatchingCommands",
+        "navigateCmdDropdown",
+        "selectCmdDropdownItem",
+    ):
+        assert f"typeof window.{name}!==\'function\'" in boot or f"typeof window.{name}!='function'" in boot
+
+
+def test_commands_asset_still_owns_real_dropdown_implementation():
+    commands = _read("static/commands.js")
+
+    assert "function hideCmdDropdown()" in commands
+    assert "function showCmdDropdown(" in commands
+    assert "function getMatchingCommands(" in commands
diff --git a/tests/test_live_tool_callback_events.py b/tests/test_live_tool_callback_events.py
new file mode 100644
index 00000000..0e5dd3ee
--- /dev/null
+++ b/tests/test_live_tool_callback_events.py
@@ -0,0 +1,63 @@
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def _read(relpath: str) -> str:
+    return (ROOT / relpath).read_text(encoding="utf-8")
+
+
+def _function_block(src: str, name: str) -> str:
+    start = src.find(f"def {name}")
+    assert start != -1, f"{name} not found"
+    next_def = src.find("\n            def ", start + 1)
+    assert next_def != -1, f"end of {name} not found"
+    return src[start:next_def]
+
+
+def test_tool_start_callback_emits_existing_tool_sse_event_with_tool_id():
+    src = _read("api/streaming.py")
+    block = _function_block(src, "on_tool_start")
+
+    assert "put('tool'" in block, (
+        "The dedicated Hermes Agent tool_start_callback must emit the existing "
+        "tool SSE event; otherwise WebUI stays visually silent while tools run."
+    )
+    assert "'event_type': 'tool.started'" in block
+    assert "'tid': tool_call_id" in block, (
+        "Live frontend cards need the tool_call_id so tool_complete can update "
+        "the running card in place."
+    )
+    assert "_live_tool_event_start_ids" in block, (
+        "Tool start SSE emission should be idempotent per callback id."
+    )
+    assert "STREAM_LIVE_TOOL_CALLS" in block and "'done': False" in block
+
+
+def test_tool_complete_callback_emits_existing_tool_complete_sse_event_with_tool_id():
+    src = _read("api/streaming.py")
+    block = _function_block(src, "on_tool_complete")
+
+    assert "put('tool_complete'" in block, (
+        "The dedicated Hermes Agent tool_complete_callback must emit the existing "
+        "tool_complete SSE event so the frontend can settle the running tool card."
+    )
+    assert "'event_type': 'tool.completed'" in block
+    assert "'tid': tool_call_id" in block
+    assert "_live_tool_event_complete_ids" in block, (
+        "Tool completion SSE emission should be idempotent per callback id."
+    )
+    assert "result_snippet = _tool_result_snippet(function_result)" in block
+    assert "_checkpoint_activity[0] += 1" in block
+
+
+def test_tool_callback_events_keep_existing_frontend_event_contract():
+    messages = _read("static/messages.js")
+    ui = _read("static/ui.js")
+
+    assert "source.addEventListener('tool',e=>{" in messages
+    assert "source.addEventListener('tool_complete',e=>{" in messages
+    assert "tid:d.tid" in messages
+    assert "data-live-tid" in ui
+    assert "existing.replaceWith(replacement)" in ui

From 612fcd30fe1ca218f14cd28321900c6090bc057b Mon Sep 17 00:00:00 2001
From: AJV20 <24819659+AJV20@users.noreply.github.com>
Date: Tue, 19 May 2026 18:41:08 -0400
Subject: [PATCH 2/2] fix: avoid duplicate live tool events

---
 CHANGELOG.md                            |  1 -
 api/streaming.py                        | 10 +++++++++
 static/boot.js                          | 17 --------------
 tests/test_command_asset_fallbacks.py   | 30 -------------------------
 tests/test_live_tool_callback_events.py | 10 +++++++++
 5 files changed, 20 insertions(+), 48 deletions(-)
 delete mode 100644 tests/test_command_asset_fallbacks.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f83d2346..c9328df5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,6 @@
 ### Fixed
 
 - Surface live tool activity when Hermes Agent reports tools through its dedicated `tool_start_callback` / `tool_complete_callback` path, so browser chat shows the existing running tool cards instead of appearing idle until the final answer.
-- Keep the composer usable if a transient restart/proxy blip prevents `commands.js` from loading while `boot.js` still attaches slash-command listeners.
 
 
 ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede)
diff --git a/api/streaming.py b/api/streaming.py
index 76b92b9e..e7ca4a82 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -3525,6 +3525,13 @@ def _run_agent_streaming(
 
                 args_snap = _tool_args_snapshot(args)
 
+                # Modern Hermes Agent builds can call both tool_progress_callback
+                # and the structured tool_start/tool_complete callbacks for the
+                # same tool. Prefer the structured path when it is supported so
+                # the browser receives one tid-tagged tool card per real call.
+                if event_type in (None, 'tool.started') and 'tool_start_callback' in _agent_params:
+                    return
+
                 if event_type in (None, 'tool.started'):
                     _live_tool_calls.append({
                         'name': name,
@@ -3560,6 +3567,9 @@ def _run_agent_streaming(
                         pass
                     return
 
+                if event_type == 'tool.completed' and 'tool_complete_callback' in _agent_params:
+                    return
+
                 if event_type == 'tool.completed':
                     for live_tc in reversed(_live_tool_calls):
                         if live_tc.get('done'):
diff --git a/static/boot.js b/static/boot.js
index eda383a4..9a30fe8e 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1,20 +1,3 @@
-// Slash-command helpers normally come from commands.js, which is loaded before
-// boot.js. If a restart/proxy blip makes that asset fail while boot.js loads,
-// keep the composer usable instead of throwing ReferenceError on input/keydown.
-(function(){
-  function dropdown(){ return document.getElementById('cmdDropdown'); }
-  if(typeof window.hideCmdDropdown!=='function'){
-    window.hideCmdDropdown=function(){
-      const dd=dropdown();
-      if(dd){ dd.classList.remove('open'); dd.style.display='none'; }
-    };
-  }
-  if(typeof window.showCmdDropdown!=='function') window.showCmdDropdown=function(){};
-  if(typeof window.getMatchingCommands!=='function') window.getMatchingCommands=function(){ return []; };
-  if(typeof window.navigateCmdDropdown!=='function') window.navigateCmdDropdown=function(){};
-  if(typeof window.selectCmdDropdownItem!=='function') window.selectCmdDropdownItem=function(){};
-})();
-
 async function cancelStream(){
   const streamId = S.activeStreamId;
   if(!streamId) return;
diff --git a/tests/test_command_asset_fallbacks.py b/tests/test_command_asset_fallbacks.py
deleted file mode 100644
index 2d1bcefc..00000000
--- a/tests/test_command_asset_fallbacks.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from pathlib import Path
-
-
-ROOT = Path(__file__).resolve().parents[1]
-
-
-def _read(relpath: str) -> str:
-    return (ROOT / relpath).read_text(encoding="utf-8")
-
-
-def test_boot_installs_safe_slash_command_fallbacks_when_commands_asset_missing():
-    boot = _read("static/boot.js")
-
-    assert "If a restart/proxy blip makes that asset fail" in boot
-    for name in (
-        "hideCmdDropdown",
-        "showCmdDropdown",
-        "getMatchingCommands",
-        "navigateCmdDropdown",
-        "selectCmdDropdownItem",
-    ):
-        assert f"typeof window.{name}!==\'function\'" in boot or f"typeof window.{name}!='function'" in boot
-
-
-def test_commands_asset_still_owns_real_dropdown_implementation():
-    commands = _read("static/commands.js")
-
-    assert "function hideCmdDropdown()" in commands
-    assert "function showCmdDropdown(" in commands
-    assert "function getMatchingCommands(" in commands
diff --git a/tests/test_live_tool_callback_events.py b/tests/test_live_tool_callback_events.py
index 0e5dd3ee..74c8b343 100644
--- a/tests/test_live_tool_callback_events.py
+++ b/tests/test_live_tool_callback_events.py
@@ -52,6 +52,16 @@ def test_tool_complete_callback_emits_existing_tool_complete_sse_event_with_tool
     assert "_checkpoint_activity[0] += 1" in block
 
 
+def test_legacy_progress_events_are_suppressed_when_structured_callbacks_are_wired():
+    src = _read("api/streaming.py")
+    block = _function_block(src, "on_tool")
+
+    assert "event_type in (None, 'tool.started') and 'tool_start_callback' in _agent_params" in block
+    assert "event_type == 'tool.completed' and 'tool_complete_callback' in _agent_params" in block
+    assert block.index("'tool_start_callback' in _agent_params") < block.index("put('tool'")
+    assert block.index("'tool_complete_callback' in _agent_params") < block.index("put('tool_complete'")
+
+
 def test_tool_callback_events_keep_existing_frontend_event_contract():
     messages = _read("static/messages.js")
     ui = _read("static/ui.js")