Fix live progress activity grouping

This commit is contained in:
Frank Song
2026-05-16 23:36:30 +08:00
parent e7e45fe98b
commit 2dfe3ffb42
6 changed files with 77 additions and 15 deletions
+4
View File
@@ -2,6 +2,10 @@
## [Unreleased]
### Fixed
- **PR #2390** by @franksong2702 (refs #2376, refs #2344) — Live long-task Activity groups now split only on user-visible interim assistant progress text, not on hidden reasoning updates or each tool start. This keeps Codex-like progress turns readable as "progress note -> grouped tools -> next progress note" instead of degrading into repeated one-tool Activity rows, and WebUI-created agent turns now receive an explicit progress contract that asks for concise user-visible updates before tool-heavy steps.
## [v0.51.74] — 2026-05-16 — Release AX (stage-367 — 4-PR safe-lane batch — #2362 table-cell spacing + #2363 run-state-consistency RFC + #2365 custom_providers list-format + #2367 settings sidebar i18n)
### Added
+24 -3
View File
@@ -137,6 +137,25 @@ def _clarify_timeout_seconds(default: int = 120) -> int:
_CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted')
_WEBUI_VISIBLE_PROGRESS_PROMPT = """
WebUI progress contract:
- For multi-step work that uses tools, provide brief user-visible progress updates as normal assistant content before continuing with tool calls.
- Each update should say what you are about to check, what you just confirmed, or why the next tool call is needed.
- Keep updates concise, factual, and in the user's language. One or two short sentences are enough.
- Do not reveal hidden reasoning, chain-of-thought, private scratchpads, secrets, raw logs, or long tool output.
- For direct answers or very short tasks, skip progress updates and answer normally.
""".strip()
def _webui_ephemeral_system_prompt(personality_prompt: Optional[str]) -> str:
"""Build WebUI-only runtime instructions that are not persisted to history."""
parts = []
if personality_prompt:
parts.append(str(personality_prompt).strip())
parts.append(_WEBUI_VISIBLE_PROGRESS_PROMPT)
return "\n\n".join(part for part in parts if part)
def _has_new_assistant_reply(all_messages: list, prev_count: int) -> bool:
"""Return True if *new* messages (beyond ``prev_count``) contain an
assistant message with non-empty content.
@@ -3449,9 +3468,11 @@ def _run_agent_streaming(
_personality_prompt = '\n'.join(p for p in _parts if p)
else:
_personality_prompt = str(_pval)
# Pass personality via ephemeral_system_prompt (agent's own mechanism)
if _personality_prompt:
agent.ephemeral_system_prompt = _personality_prompt
# Pass WebUI-only runtime guidance via ephemeral_system_prompt
# (agent's own mechanism). This preserves any selected personality
# while making long tool runs emit real user-visible interim text
# through interim_assistant_callback instead of frontend guesses.
agent.ephemeral_system_prompt = _webui_ephemeral_system_prompt(_personality_prompt)
_pending_started_at = getattr(s, 'pending_started_at', None)
# Normal chat-start sets pending_started_at before spawning this thread;
# fallback to now only for recovered/legacy flows where that marker is absent
+13 -1
View File
@@ -1094,7 +1094,18 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
};
step();
}
function _closeCurrentLiveActivityGroup(){
const turn=$('liveAssistantTurn');
if(turn){
turn.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
group.removeAttribute('data-live-activity-current');
});
}
}
function _resetAssistantSegment(){
const options=arguments[0]||{};
const closeActivity=!!(options&&options.closeActivity);
if(closeActivity) _closeCurrentLiveActivityGroup();
assistantRow=null;
assistantBody=null;
segmentStart=assistantText.length;
@@ -1221,7 +1232,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
}
if(alreadyStreamed){
if(!S.session||S.session.session_id!==activeSid) return;
_resetAssistantSegment();
_resetAssistantSegment({closeActivity:true});
return;
}
assistantText += assistantText ? `\n\n${visible}` : visible;
@@ -1233,6 +1244,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
else appendThinking(_liveThinkingText());
}
ensureAssistantRow(true);
_resetAssistantSegment({closeActivity:true});
_scheduleRender();
});
+5 -5
View File
@@ -4682,7 +4682,7 @@ function ensureActivityGroup(inner, opts){
if(!inner) return null;
const live=!!opts.live;
const activityKey=opts.activityKey||(live?_activityKeyForLiveTurn():null);
const selector=live?'.tool-call-group[data-live-tool-call-group="1"]':'.tool-call-group[data-agent-activity-group="1"]';
const selector=live?'.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]':'.tool-call-group[data-agent-activity-group="1"]';
let group=inner.querySelector(selector);
if(!group){
group=document.createElement('div');
@@ -4699,7 +4699,10 @@ function ensureActivityGroup(inner, opts){
group.setAttribute('data-tool-call-group','1');
group.setAttribute('data-agent-activity-group','1');
if(activityKey) group.setAttribute('data-activity-disclosure-key',activityKey);
if(live) group.setAttribute('data-live-tool-call-group','1');
if(live){
group.setAttribute('data-live-tool-call-group','1');
group.setAttribute('data-live-activity-current','1');
}
group.innerHTML=`<button type="button" class="tool-call-group-summary" aria-expanded="${collapsed?'false':'true'}" onclick="_toggleActivityGroup(this)"><span class="tool-call-group-chevron">${li('chevron-right',12)}</span><span class="tool-call-group-label">Activity</span><span class="tool-call-group-duration"></span></button><div class="tool-call-group-body"></div>`;
const anchor=opts.anchor||null;
if(anchor&&anchor.parentElement===inner) anchor.insertAdjacentElement('afterend', group);
@@ -6953,9 +6956,6 @@ function appendThinking(text=''){
return;
}
const thinkingText=String(text||'').trim()||'Thinking…';
blocks.querySelectorAll('.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]').forEach(group=>{
group.removeAttribute('data-live-activity-current');
});
let row=blocks.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
if(!row){
row=_thinkingActivityNode(thinkingText, false);
+3
View File
@@ -299,6 +299,7 @@ class TestRuntimeRouteInjection(unittest.TestCase):
self.ephemeral_system_prompt = None
self._last_error = None
self.interim_assistant_callback = interim_assistant_callback
captured["agent"] = self
def run_conversation(self, **kwargs):
if self.interim_assistant_callback:
@@ -388,6 +389,8 @@ class TestRuntimeRouteInjection(unittest.TestCase):
init_kwargs = captured["init_kwargs"]
self.assertIsNotNone(init_kwargs["interim_assistant_callback"])
self.assertTrue(callable(init_kwargs["interim_assistant_callback"]))
self.assertIn("WebUI progress contract", captured["agent"].ephemeral_system_prompt)
self.assertIn("user-visible progress updates", captured["agent"].ephemeral_system_prompt)
interim_events = []
while not fake_queue.empty():
+28 -6
View File
@@ -282,28 +282,50 @@ class TestToolCallGroupingStatic:
"The non-simplified path should preserve standalone settled thinking cards."
)
def test_live_thinking_is_shown_while_still_splitting_tool_bursts(self):
def test_live_visible_interim_text_splits_tool_bursts_not_thinking(self):
live_thinking_fn = _function_body(UI_JS, "appendThinking")
live_tool_fn = _function_body(UI_JS, "appendLiveToolCard")
helper = _function_body(UI_JS, "ensureActivityGroup")
assert "isSimplifiedToolCalling()" in live_thinking_fn, (
"Live thinking should branch on the Compact tool activity toggle."
)
assert 'data-live-activity-current' in live_thinking_fn, (
"Starting a new live thinking block should close the previous live tool burst."
)
assert "body.insertBefore(row, body.firstChild)" not in live_thinking_fn, (
"Live thinking should not be moved into the top Activity dropdown."
)
assert "_thinkingActivityNode(thinkingText, false)" in live_thinking_fn, (
"Compact live thinking should render a collapsed Thinking card in the timeline."
)
assert '[data-live-activity-current="1"]' in live_thinking_fn, (
"Starting a new Thinking card should mark the previous live tool burst as no longer current."
assert "removeAttribute('data-live-activity-current')" not in live_thinking_fn, (
"Reasoning/Thinking updates alone should not split consecutive tools into one-tool Activity rows."
)
assert '.tool-call-group[data-live-tool-call-group="1"][data-live-activity-current="1"]' in helper, (
"Live tool cards should only reuse the current Activity burst, not the first group in the turn."
)
assert "group.setAttribute('data-live-activity-current','1')" in helper, (
"New live Activity bursts must be marked current so later tools append to the right group."
)
assert "body.querySelector" in live_tool_fn and "data-live-tid" in live_tool_fn, (
"tool_complete must still update its current live Activity burst by tool id."
)
close_activity_fn = _function_body(MESSAGES_JS, "_closeCurrentLiveActivityGroup")
assert "data-live-activity-current" in close_activity_fn, (
"Visible interim assistant boundaries should close the previous live Activity burst."
)
reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment")
assert "closeActivity" in reset_fn and "_closeCurrentLiveActivityGroup()" in reset_fn, (
"Assistant text reset and Activity burst closing should stay separate."
)
interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
assert interim_match and "_resetAssistantSegment({closeActivity:true});" in interim_match.group(1), (
"Visible interim assistant text should split the previous tool burst before the next tool starts."
)
tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
assert "_resetAssistantSegment();" in tool_start_segment, (
"Tool starts should reset the next assistant text segment without closing the current Activity burst."
)
assert "_resetAssistantSegment({closeActivity:true});" not in tool_start_segment, (
"Tool starts must not split consecutive tools into one-tool Activity rows."
)
class TestToolCardDesignTokens: