From 2dfe3ffb4241efcaa1914b36202b02b7fa9a9932 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 23:36:30 +0800 Subject: [PATCH] Fix live progress activity grouping --- CHANGELOG.md | 4 ++++ api/streaming.py | 27 +++++++++++++++++++++--- static/messages.js | 14 +++++++++++- static/ui.js | 10 ++++----- tests/test_sprint42.py | 3 +++ tests/test_ui_tool_call_cleanup.py | 34 ++++++++++++++++++++++++------ 6 files changed, 77 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79b39b1c..ca57d1b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2390** by @franksong2702 (refs #2376, refs #2344) — Live long-task Activity groups now split only on user-visible interim assistant progress text, not on hidden reasoning updates or each tool start. This keeps Codex-like progress turns readable as "progress note -> grouped tools -> next progress note" instead of degrading into repeated one-tool Activity rows, and WebUI-created agent turns now receive an explicit progress contract that asks for concise user-visible updates before tool-heavy steps. + ## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n) ### Added diff --git a/api/streaming.py b/api/streaming.py index 18a32fc2..68b37a47 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -137,6 +137,25 @@ def _clarify_timeout_seconds(default: int = 120) -> int: _CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted') +_WEBUI_VISIBLE_PROGRESS_PROMPT = """ +WebUI progress contract: +- For multi-step work that uses tools, provide brief user-visible progress updates as normal assistant content before continuing with tool calls. +- Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed. +- Keep updates concise, factual, and in the user's language. One or two short sentences are enough. +- Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output. +- For direct answers or very short tasks, skip progress updates and answer normally. +""".strip() + + +def _webui_ephemeral_system_prompt(personality_prompt: Optional[str]) -> str: + """Build WebUI-only runtime instructions that are not persisted to history.""" + parts = [] + if personality_prompt: + parts.append(str(personality_prompt).strip()) + parts.append(_WEBUI_VISIBLE_PROGRESS_PROMPT) + return "\n\n".join(part for part in parts if part) + + def _has_new_assistant_reply(all_messages: list, prev_count: int) -> bool: """Return True if *new* messages (beyond ``prev_count``) contain an assistant message with non-empty content. @@ -3449,9 +3468,11 @@ def _run_agent_streaming( _personality_prompt = '\n'.join(p for p in _parts if p) else: _personality_prompt = str(_pval) - # Pass personality via ephemeral_system_prompt (agent's own mechanism) - if _personality_prompt: - agent.ephemeral_system_prompt = _personality_prompt + # Pass WebUI-only runtime guidance via ephemeral_system_prompt + # (agent's own mechanism). This preserves any selected personality + # while making long tool runs emit real user-visible interim text + # through interim_assistant_callback instead of frontend guesses. + agent.ephemeral_system_prompt = _webui_ephemeral_system_prompt(_personality_prompt) _pending_started_at = getattr(s, 'pending_started_at', None) # Normal chat-start sets pending_started_at before spawning this thread; # fallback to now only for recovered/legacy flows where that marker is absent diff --git a/static/messages.js b/static/messages.js index 5bfdddaa..d7eb97a0 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1094,7 +1094,18 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ }; step(); } + function _closeCurrentLiveActivityGroup(){ + const turn=$('liveAssistantTurn'); + if(turn){ + turn.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{ + group.removeAttribute('data-live-activity-current'); + }); + } + } function _resetAssistantSegment(){ + const options=arguments[0]||{}; + const closeActivity=!!(options&&options.closeActivity); + if(closeActivity) _closeCurrentLiveActivityGroup(); assistantRow=null; assistantBody=null; segmentStart=assistantText.length; @@ -1221,7 +1232,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ } if(alreadyStreamed){ if(!S.session||S.session.session_id!==activeSid) return; - _resetAssistantSegment(); + _resetAssistantSegment({closeActivity:true}); return; } assistantText += assistantText ? `\n\n${visible}` : visible; @@ -1233,6 +1244,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ else appendThinking(_liveThinkingText()); } ensureAssistantRow(true); + _resetAssistantSegment({closeActivity:true}); _scheduleRender(); }); diff --git a/static/ui.js b/static/ui.js index ad3b0703..1f29f37b 100644 --- a/static/ui.js +++ b/static/ui.js @@ -4682,7 +4682,7 @@ function ensureActivityGroup(inner, opts){ if(!inner) return null; const live=!!opts.live; const activityKey=opts.activityKey||(live?_activityKeyForLiveTurn():null); - const selector=live?'.tool-call-group[data-live-tool-call-group="1"]':'.tool-call-group[data-agent-activity-group="1"]'; + const selector=live?'.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]':'.tool-call-group[data-agent-activity-group="1"]'; let group=inner.querySelector(selector); if(!group){ group=document.createElement('div'); @@ -4699,7 +4699,10 @@ function ensureActivityGroup(inner, opts){ group.setAttribute('data-tool-call-group','1'); group.setAttribute('data-agent-activity-group','1'); if(activityKey) group.setAttribute('data-activity-disclosure-key',activityKey); - if(live) group.setAttribute('data-live-tool-call-group','1'); + if(live){ + group.setAttribute('data-live-tool-call-group','1'); + group.setAttribute('data-live-activity-current','1'); + } group.innerHTML=`
`; const anchor=opts.anchor||null; if(anchor&&anchor.parentElement===inner) anchor.insertAdjacentElement('afterend', group); @@ -6953,9 +6956,6 @@ function appendThinking(text=''){ return; } const thinkingText=String(text||'').trim()||'Thinking…'; - blocks.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{ - group.removeAttribute('data-live-activity-current'); - }); let row=blocks.querySelector('.agent-activity-thinking[data-thinking-active="1"]'); if(!row){ row=_thinkingActivityNode(thinkingText, false); diff --git a/tests/test_sprint42.py b/tests/test_sprint42.py index 995a6614..e1889c75 100644 --- a/tests/test_sprint42.py +++ b/tests/test_sprint42.py @@ -299,6 +299,7 @@ class TestRuntimeRouteInjection(unittest.TestCase): self.ephemeral_system_prompt = None self._last_error = None self.interim_assistant_callback = interim_assistant_callback + captured["agent"] = self def run_conversation(self, **kwargs): if self.interim_assistant_callback: @@ -388,6 +389,8 @@ class TestRuntimeRouteInjection(unittest.TestCase): init_kwargs = captured["init_kwargs"] self.assertIsNotNone(init_kwargs["interim_assistant_callback"]) self.assertTrue(callable(init_kwargs["interim_assistant_callback"])) + self.assertIn("WebUI progress contract", captured["agent"].ephemeral_system_prompt) + self.assertIn("user-visible progress updates", captured["agent"].ephemeral_system_prompt) interim_events = [] while not fake_queue.empty(): diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py index 44204d00..cee72ab3 100644 --- a/tests/test_ui_tool_call_cleanup.py +++ b/tests/test_ui_tool_call_cleanup.py @@ -282,28 +282,50 @@ class TestToolCallGroupingStatic: "The non-simplified path should preserve standalone settled thinking cards." ) - def test_live_thinking_is_shown_while_still_splitting_tool_bursts(self): + def test_live_visible_interim_text_splits_tool_bursts_not_thinking(self): live_thinking_fn = _function_body(UI_JS, "appendThinking") live_tool_fn = _function_body(UI_JS, "appendLiveToolCard") helper = _function_body(UI_JS, "ensureActivityGroup") assert "isSimplifiedToolCalling()" in live_thinking_fn, ( "Live thinking should branch on the Compact tool activity toggle." ) - assert 'data-live-activity-current' in live_thinking_fn, ( - "Starting a new live thinking block should close the previous live tool burst." - ) assert "body.insertBefore(row, body.firstChild)" not in live_thinking_fn, ( "Live thinking should not be moved into the top Activity dropdown." ) assert "_thinkingActivityNode(thinkingText, false)" in live_thinking_fn, ( "Compact live thinking should render a collapsed Thinking card in the timeline." ) - assert '[data-live-activity-current="1"]' in live_thinking_fn, ( - "Starting a new Thinking card should mark the previous live tool burst as no longer current." + assert "removeAttribute('data-live-activity-current')" not in live_thinking_fn, ( + "Reasoning/Thinking updates alone should not split consecutive tools into one-tool Activity rows." + ) + assert '.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]' in helper, ( + "Live tool cards should only reuse the current Activity burst, not the first group in the turn." + ) + assert "group.setAttribute('data-live-activity-current','1')" in helper, ( + "New live Activity bursts must be marked current so later tools append to the right group." ) assert "body.querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, ( "tool_complete must still update its current live Activity burst by tool id." ) + close_activity_fn = _function_body(MESSAGES_JS, "_closeCurrentLiveActivityGroup") + assert "data-live-activity-current" in close_activity_fn, ( + "Visible interim assistant boundaries should close the previous live Activity burst." + ) + reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment") + assert "closeActivity" in reset_fn and "_closeCurrentLiveActivityGroup()" in reset_fn, ( + "Assistant text reset and Activity burst closing should stay separate." + ) + interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S) + assert interim_match and "_resetAssistantSegment({closeActivity:true});" in interim_match.group(1), ( + "Visible interim assistant text should split the previous tool burst before the next tool starts." + ) + tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0] + assert "_resetAssistantSegment();" in tool_start_segment, ( + "Tool starts should reset the next assistant text segment without closing the current Activity burst." + ) + assert "_resetAssistantSegment({closeActivity:true});" not in tool_start_segment, ( + "Tool starts must not split consecutive tools into one-tool Activity rows." + ) class TestToolCardDesignTokens: