Fix live progress activity grouping

2026-05-25 11:10:18 +00:00 · 2026-05-16 23:36:30 +08:00
parent e7e45fe98b
commit 2dfe3ffb42
6 changed files with 77 additions and 15 deletions
@@ -2,6 +2,10 @@

 ## [Unreleased]

+### Fixed
+
+- **PR #2390** by @franksong2702 (refs #2376, refs #2344) — Live long-task Activity groups now split only on user-visible interim assistant progress text, not on hidden reasoning updates or each tool start. This keeps Codex-like progress turns readable as "progress note -> grouped tools -> next progress note" instead of degrading into repeated one-tool Activity rows, and WebUI-created agent turns now receive an explicit progress contract that asks for concise user-visible updates before tool-heavy steps.
+
 ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n)

 ### Added
@@ -137,6 +137,25 @@ def _clarify_timeout_seconds(default: int = 120) -> int:
 _CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted')


+_WEBUI_VISIBLE_PROGRESS_PROMPT = """
+WebUI progress contract:
+- For multi-step work that uses tools, provide brief user-visible progress updates as normal assistant content before continuing with tool calls.
+- Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed.
+- Keep updates concise, factual, and in the user's language. One or two short sentences are enough.
+- Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output.
+- For direct answers or very short tasks, skip progress updates and answer normally.
+""".strip()
+
+
+def _webui_ephemeral_system_prompt(personality_prompt: Optional[str]) -> str:
+    """Build WebUI-only runtime instructions that are not persisted to history."""
+    parts = []
+    if personality_prompt:
+        parts.append(str(personality_prompt).strip())
+    parts.append(_WEBUI_VISIBLE_PROGRESS_PROMPT)
+    return "\n\n".join(part for part in parts if part)
+
+
 def _has_new_assistant_reply(all_messages: list, prev_count: int) -> bool:
    """Return True if *new* messages (beyond ``prev_count``) contain an
    assistant message with non-empty content.
@@ -3449,9 +3468,11 @@ def _run_agent_streaming(
                        _personality_prompt = '\n'.join(p for p in _parts if p)
                    else:
                        _personality_prompt = str(_pval)
-            # Pass personality via ephemeral_system_prompt (agent's own mechanism)
-            if _personality_prompt:
-                agent.ephemeral_system_prompt = _personality_prompt
+            # Pass WebUI-only runtime guidance via ephemeral_system_prompt
+            # (agent's own mechanism). This preserves any selected personality
+            # while making long tool runs emit real user-visible interim text
+            # through interim_assistant_callback instead of frontend guesses.
+            agent.ephemeral_system_prompt = _webui_ephemeral_system_prompt(_personality_prompt)
            _pending_started_at = getattr(s, 'pending_started_at', None)
            # Normal chat-start sets pending_started_at before spawning this thread;
            # fallback to now only for recovered/legacy flows where that marker is absent
@@ -1094,7 +1094,18 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
    };
    step();
  }
+  function _closeCurrentLiveActivityGroup(){
+    const turn=$('liveAssistantTurn');
+    if(turn){
+      turn.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
+        group.removeAttribute('data-live-activity-current');
+      });
+    }
+  }
  function _resetAssistantSegment(){
+    const options=arguments[0]||{};
+    const closeActivity=!!(options&&options.closeActivity);
+    if(closeActivity) _closeCurrentLiveActivityGroup();
    assistantRow=null;
    assistantBody=null;
    segmentStart=assistantText.length;
@@ -1221,7 +1232,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      }
      if(alreadyStreamed){
        if(!S.session||S.session.session_id!==activeSid) return;
-        _resetAssistantSegment();
+        _resetAssistantSegment({closeActivity:true});
        return;
      }
      assistantText += assistantText ? `\n\n${visible}` : visible;
@@ -1233,6 +1244,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
        else appendThinking(_liveThinkingText());
      }
      ensureAssistantRow(true);
+      _resetAssistantSegment({closeActivity:true});
      _scheduleRender();
    });

@@ -4682,7 +4682,7 @@ function ensureActivityGroup(inner, opts){
  if(!inner) return null;
  const live=!!opts.live;
  const activityKey=opts.activityKey||(live?_activityKeyForLiveTurn():null);
-  const selector=live?'.tool-call-group[data-live-tool-call-group="1"]':'.tool-call-group[data-agent-activity-group="1"]';
+  const selector=live?'.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]':'.tool-call-group[data-agent-activity-group="1"]';
  let group=inner.querySelector(selector);
  if(!group){
    group=document.createElement('div');
@@ -4699,7 +4699,10 @@ function ensureActivityGroup(inner, opts){
    group.setAttribute('data-tool-call-group','1');
    group.setAttribute('data-agent-activity-group','1');
    if(activityKey) group.setAttribute('data-activity-disclosure-key',activityKey);
-    if(live) group.setAttribute('data-live-tool-call-group','1');
+    if(live){
+      group.setAttribute('data-live-tool-call-group','1');
+      group.setAttribute('data-live-activity-current','1');
+    }
    group.innerHTML=`<button type="button" class="tool-call-group-summary" aria-expanded="${collapsed?'false':'true'}" onclick="_toggleActivityGroup(this)"><span class="tool-call-group-chevron">${li('chevron-right',12)}</span><span class="tool-call-group-label">Activity</span><span class="tool-call-group-duration"></span></button><div class="tool-call-group-body"></div>`;
    const anchor=opts.anchor||null;
    if(anchor&&anchor.parentElement===inner) anchor.insertAdjacentElement('afterend', group);
@@ -6953,9 +6956,6 @@ function appendThinking(text=''){
    return;
  }
  const thinkingText=String(text||'').trim()||'Thinking…';
-  blocks.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
-    group.removeAttribute('data-live-activity-current');
-  });
  let row=blocks.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
  if(!row){
    row=_thinkingActivityNode(thinkingText, false);
@@ -299,6 +299,7 @@ class TestRuntimeRouteInjection(unittest.TestCase):
                self.ephemeral_system_prompt = None
                self._last_error = None
                self.interim_assistant_callback = interim_assistant_callback
+                captured["agent"] = self

            def run_conversation(self, **kwargs):
                if self.interim_assistant_callback:
@@ -388,6 +389,8 @@ class TestRuntimeRouteInjection(unittest.TestCase):
        init_kwargs = captured["init_kwargs"]
        self.assertIsNotNone(init_kwargs["interim_assistant_callback"])
        self.assertTrue(callable(init_kwargs["interim_assistant_callback"]))
+        self.assertIn("WebUI progress contract", captured["agent"].ephemeral_system_prompt)
+        self.assertIn("user-visible progress updates", captured["agent"].ephemeral_system_prompt)

        interim_events = []
        while not fake_queue.empty():
@@ -282,28 +282,50 @@ class TestToolCallGroupingStatic:
            "The non-simplified path should preserve standalone settled thinking cards."
        )

-    def test_live_thinking_is_shown_while_still_splitting_tool_bursts(self):
+    def test_live_visible_interim_text_splits_tool_bursts_not_thinking(self):
        live_thinking_fn = _function_body(UI_JS, "appendThinking")
        live_tool_fn = _function_body(UI_JS, "appendLiveToolCard")
        helper = _function_body(UI_JS, "ensureActivityGroup")
        assert "isSimplifiedToolCalling()" in live_thinking_fn, (
            "Live thinking should branch on the Compact tool activity toggle."
        )
-        assert 'data-live-activity-current' in live_thinking_fn, (
-            "Starting a new live thinking block should close the previous live tool burst."
-        )
        assert "body.insertBefore(row, body.firstChild)" not in live_thinking_fn, (
            "Live thinking should not be moved into the top Activity dropdown."
        )
        assert "_thinkingActivityNode(thinkingText, false)" in live_thinking_fn, (
            "Compact live thinking should render a collapsed Thinking card in the timeline."
        )
-        assert '[data-live-activity-current="1"]' in live_thinking_fn, (
-            "Starting a new Thinking card should mark the previous live tool burst as no longer current."
+        assert "removeAttribute('data-live-activity-current')" not in live_thinking_fn, (
+            "Reasoning/Thinking updates alone should not split consecutive tools into one-tool Activity rows."
+        )
+        assert '.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]' in helper, (
+            "Live tool cards should only reuse the current Activity burst, not the first group in the turn."
+        )
+        assert "group.setAttribute('data-live-activity-current','1')" in helper, (
+            "New live Activity bursts must be marked current so later tools append to the right group."
        )
        assert "body.querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, (
            "tool_complete must still update its current live Activity burst by tool id."
        )
+        close_activity_fn = _function_body(MESSAGES_JS, "_closeCurrentLiveActivityGroup")
+        assert "data-live-activity-current" in close_activity_fn, (
+            "Visible interim assistant boundaries should close the previous live Activity burst."
+        )
+        reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment")
+        assert "closeActivity" in reset_fn and "_closeCurrentLiveActivityGroup()" in reset_fn, (
+            "Assistant text reset and Activity burst closing should stay separate."
+        )
+        interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
+        assert interim_match and "_resetAssistantSegment({closeActivity:true});" in interim_match.group(1), (
+            "Visible interim assistant text should split the previous tool burst before the next tool starts."
+        )
+        tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
+        assert "_resetAssistantSegment();" in tool_start_segment, (
+            "Tool starts should reset the next assistant text segment without closing the current Activity burst."
+        )
+        assert "_resetAssistantSegment({closeActivity:true});" not in tool_start_segment, (
+            "Tool starts must not split consecutive tools into one-tool Activity rows."
+        )


 class TestToolCardDesignTokens: