From 2dfe3ffb4241efcaa1914b36202b02b7fa9a9932 Mon Sep 17 00:00:00 2001
From: Frank Song <franksong2702@gmail.com>
Date: Sat, 16 May 2026 23:36:30 +0800
Subject: [PATCH] Fix live progress activity grouping

---
 CHANGELOG.md                       |  4 ++++
 api/streaming.py                   | 27 +++++++++++++++++++++---
 static/messages.js                 | 14 +++++++++++-
 static/ui.js                       | 10 ++++-----
 tests/test_sprint42.py             |  3 +++
 tests/test_ui_tool_call_cleanup.py | 34 ++++++++++++++++++++++++------
 6 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79b39b1c..ca57d1b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- **PR #2390** by @franksong2702 (refs #2376, refs #2344) — Live long-task Activity groups now split only on user-visible interim assistant progress text, not on hidden reasoning updates or each tool start. This keeps Codex-like progress turns readable as "progress note -> grouped tools -> next progress note" instead of degrading into repeated one-tool Activity rows, and WebUI-created agent turns now receive an explicit progress contract that asks for concise user-visible updates before tool-heavy steps.
+
 ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n)
 
 ### Added
diff --git a/api/streaming.py b/api/streaming.py
index 18a32fc2..68b37a47 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -137,6 +137,25 @@ def _clarify_timeout_seconds(default: int = 120) -> int:
 _CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted')
 
 
+_WEBUI_VISIBLE_PROGRESS_PROMPT = """
+WebUI progress contract:
+- For multi-step work that uses tools, provide brief user-visible progress updates as normal assistant content before continuing with tool calls.
+- Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed.
+- Keep updates concise, factual, and in the user's language. One or two short sentences are enough.
+- Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output.
+- For direct answers or very short tasks, skip progress updates and answer normally.
+""".strip()
+
+
+def _webui_ephemeral_system_prompt(personality_prompt: Optional[str]) -> str:
+    """Build WebUI-only runtime instructions that are not persisted to history."""
+    parts = []
+    if personality_prompt:
+        parts.append(str(personality_prompt).strip())
+    parts.append(_WEBUI_VISIBLE_PROGRESS_PROMPT)
+    return "\n\n".join(part for part in parts if part)
+
+
 def _has_new_assistant_reply(all_messages: list, prev_count: int) -> bool:
     """Return True if *new* messages (beyond ``prev_count``) contain an
     assistant message with non-empty content.
@@ -3449,9 +3468,11 @@ def _run_agent_streaming(
                         _personality_prompt = '\n'.join(p for p in _parts if p)
                     else:
                         _personality_prompt = str(_pval)
-            # Pass personality via ephemeral_system_prompt (agent's own mechanism)
-            if _personality_prompt:
-                agent.ephemeral_system_prompt = _personality_prompt
+            # Pass WebUI-only runtime guidance via ephemeral_system_prompt
+            # (agent's own mechanism). This preserves any selected personality
+            # while making long tool runs emit real user-visible interim text
+            # through interim_assistant_callback instead of frontend guesses.
+            agent.ephemeral_system_prompt = _webui_ephemeral_system_prompt(_personality_prompt)
             _pending_started_at = getattr(s, 'pending_started_at', None)
             # Normal chat-start sets pending_started_at before spawning this thread;
             # fallback to now only for recovered/legacy flows where that marker is absent
diff --git a/static/messages.js b/static/messages.js
index 5bfdddaa..d7eb97a0 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1094,7 +1094,18 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     };
     step();
   }
+  function _closeCurrentLiveActivityGroup(){
+    const turn=$('liveAssistantTurn');
+    if(turn){
+      turn.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
+        group.removeAttribute('data-live-activity-current');
+      });
+    }
+  }
   function _resetAssistantSegment(){
+    const options=arguments[0]||{};
+    const closeActivity=!!(options&&options.closeActivity);
+    if(closeActivity) _closeCurrentLiveActivityGroup();
     assistantRow=null;
     assistantBody=null;
     segmentStart=assistantText.length;
@@ -1221,7 +1232,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       }
       if(alreadyStreamed){
         if(!S.session||S.session.session_id!==activeSid) return;
-        _resetAssistantSegment();
+        _resetAssistantSegment({closeActivity:true});
         return;
       }
       assistantText += assistantText ? `\n\n${visible}` : visible;
@@ -1233,6 +1244,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         else appendThinking(_liveThinkingText());
       }
       ensureAssistantRow(true);
+      _resetAssistantSegment({closeActivity:true});
       _scheduleRender();
     });
 
diff --git a/static/ui.js b/static/ui.js
index ad3b0703..1f29f37b 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -4682,7 +4682,7 @@ function ensureActivityGroup(inner, opts){
   if(!inner) return null;
   const live=!!opts.live;
   const activityKey=opts.activityKey||(live?_activityKeyForLiveTurn():null);
-  const selector=live?'.tool-call-group[data-live-tool-call-group="1"]':'.tool-call-group[data-agent-activity-group="1"]';
+  const selector=live?'.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]':'.tool-call-group[data-agent-activity-group="1"]';
   let group=inner.querySelector(selector);
   if(!group){
     group=document.createElement('div');
@@ -4699,7 +4699,10 @@ function ensureActivityGroup(inner, opts){
     group.setAttribute('data-tool-call-group','1');
     group.setAttribute('data-agent-activity-group','1');
     if(activityKey) group.setAttribute('data-activity-disclosure-key',activityKey);
-    if(live) group.setAttribute('data-live-tool-call-group','1');
+    if(live){
+      group.setAttribute('data-live-tool-call-group','1');
+      group.setAttribute('data-live-activity-current','1');
+    }
     group.innerHTML=`<button type="button" class="tool-call-group-summary" aria-expanded="${collapsed?'false':'true'}" onclick="_toggleActivityGroup(this)"><span class="tool-call-group-chevron">${li('chevron-right',12)}</span><span class="tool-call-group-label">Activity</span><span class="tool-call-group-duration"></span></button><div class="tool-call-group-body"></div>`;
     const anchor=opts.anchor||null;
     if(anchor&&anchor.parentElement===inner) anchor.insertAdjacentElement('afterend', group);
@@ -6953,9 +6956,6 @@ function appendThinking(text=''){
     return;
   }
   const thinkingText=String(text||'').trim()||'Thinking…';
-  blocks.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
-    group.removeAttribute('data-live-activity-current');
-  });
   let row=blocks.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
   if(!row){
     row=_thinkingActivityNode(thinkingText, false);
diff --git a/tests/test_sprint42.py b/tests/test_sprint42.py
index 995a6614..e1889c75 100644
--- a/tests/test_sprint42.py
+++ b/tests/test_sprint42.py
@@ -299,6 +299,7 @@ class TestRuntimeRouteInjection(unittest.TestCase):
                 self.ephemeral_system_prompt = None
                 self._last_error = None
                 self.interim_assistant_callback = interim_assistant_callback
+                captured["agent"] = self
 
             def run_conversation(self, **kwargs):
                 if self.interim_assistant_callback:
@@ -388,6 +389,8 @@ class TestRuntimeRouteInjection(unittest.TestCase):
         init_kwargs = captured["init_kwargs"]
         self.assertIsNotNone(init_kwargs["interim_assistant_callback"])
         self.assertTrue(callable(init_kwargs["interim_assistant_callback"]))
+        self.assertIn("WebUI progress contract", captured["agent"].ephemeral_system_prompt)
+        self.assertIn("user-visible progress updates", captured["agent"].ephemeral_system_prompt)
 
         interim_events = []
         while not fake_queue.empty():
diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py
index 44204d00..cee72ab3 100644
--- a/tests/test_ui_tool_call_cleanup.py
+++ b/tests/test_ui_tool_call_cleanup.py
@@ -282,28 +282,50 @@ class TestToolCallGroupingStatic:
             "The non-simplified path should preserve standalone settled thinking cards."
         )
 
-    def test_live_thinking_is_shown_while_still_splitting_tool_bursts(self):
+    def test_live_visible_interim_text_splits_tool_bursts_not_thinking(self):
         live_thinking_fn = _function_body(UI_JS, "appendThinking")
         live_tool_fn = _function_body(UI_JS, "appendLiveToolCard")
         helper = _function_body(UI_JS, "ensureActivityGroup")
         assert "isSimplifiedToolCalling()" in live_thinking_fn, (
             "Live thinking should branch on the Compact tool activity toggle."
         )
-        assert 'data-live-activity-current' in live_thinking_fn, (
-            "Starting a new live thinking block should close the previous live tool burst."
-        )
         assert "body.insertBefore(row, body.firstChild)" not in live_thinking_fn, (
             "Live thinking should not be moved into the top Activity dropdown."
         )
         assert "_thinkingActivityNode(thinkingText, false)" in live_thinking_fn, (
             "Compact live thinking should render a collapsed Thinking card in the timeline."
         )
-        assert '[data-live-activity-current="1"]' in live_thinking_fn, (
-            "Starting a new Thinking card should mark the previous live tool burst as no longer current."
+        assert "removeAttribute('data-live-activity-current')" not in live_thinking_fn, (
+            "Reasoning/Thinking updates alone should not split consecutive tools into one-tool Activity rows."
+        )
+        assert '.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]' in helper, (
+            "Live tool cards should only reuse the current Activity burst, not the first group in the turn."
+        )
+        assert "group.setAttribute('data-live-activity-current','1')" in helper, (
+            "New live Activity bursts must be marked current so later tools append to the right group."
         )
         assert "body.querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, (
             "tool_complete must still update its current live Activity burst by tool id."
         )
+        close_activity_fn = _function_body(MESSAGES_JS, "_closeCurrentLiveActivityGroup")
+        assert "data-live-activity-current" in close_activity_fn, (
+            "Visible interim assistant boundaries should close the previous live Activity burst."
+        )
+        reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment")
+        assert "closeActivity" in reset_fn and "_closeCurrentLiveActivityGroup()" in reset_fn, (
+            "Assistant text reset and Activity burst closing should stay separate."
+        )
+        interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
+        assert interim_match and "_resetAssistantSegment({closeActivity:true});" in interim_match.group(1), (
+            "Visible interim assistant text should split the previous tool burst before the next tool starts."
+        )
+        tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
+        assert "_resetAssistantSegment();" in tool_start_segment, (
+            "Tool starts should reset the next assistant text segment without closing the current Activity burst."
+        )
+        assert "_resetAssistantSegment({closeActivity:true});" not in tool_start_segment, (
+            "Tool starts must not split consecutive tools into one-tool Activity rows."
+        )
 
 
 class TestToolCardDesignTokens: