Stage 384: PR #2536

2026-05-25 19:20:16 +00:00 · 2026-05-18 22:43:52 +00:00
parent 4589dbec30 e94827f460
commit c13f15b2fc
6 changed files with 113 additions and 17 deletions
@@ -2,6 +2,10 @@

 ## [Unreleased]

+### Fixed
+
+- **PR #2536** (closes #2514, refs #2535) — Stop reasoning-only Thinking entries from being replayed into provider-facing history as blank assistant turns, preventing long WebUI sessions from accumulating duplicated stale Thinking blocks and inflated Activity/tool metadata on later turns. Settled compact Activity rerenders now also clear previously inserted Thinking rows before rebuilding the visible transcript.
+
 ## [v0.51.90] — 2026-05-18 — Release BN (stage-383 — 10-PR full sweep batch — empty-gateway messaging history fix + previous-messaging-sessions setting + Kanban board switcher layout + UI/UX demo theme controls + Slice 3c queue/goal RFC gate + keyless custom endpoints + custom-provider remote model catalog parity + auto-compression elapsed timer + new-conversation cold-start guard + Kanban drag-drop detail open fix)

 ### Fixed
@@ -22,7 +26,6 @@

 - **PR #2511** by @franksong2702 (refs #2502 / #2503) — Update the `docs/ui-ux/` demo appearance controls to initialize as `class="dark" data-skin="slate"` instead of the deprecated `data-theme`-only buttons and legacy theme names. Brings the demo pages in line with the live Theme + Skin contract referenced from the new `docs/CONTRACTS.md` so contributors following the contract-index path don't land on stale demos.
 - **PR #2509** by @Michaelyklam (refs #1925) — Advance the runtime-adapter RFC after the Slice 3b approval/clarify seam shipped in v0.51.89. The RFC now marks Slice 3b as shipped and defines the next Slice 3c queue/continue + goal control gate: route those controls through `RuntimeAdapter.queue_message(...)` / `update_goal(...)` only after pinning stable response contracts, bounded unavailable-control behavior, replayable lifecycle/status evidence, ordering/idempotency expectations, and explicit non-goals for runner/sidecar ownership or a WebUI-owned queue/goal scheduler. Docs + adapter-seam regression test only — no runtime/control routing changes in this PR.
-
 ## [v0.51.89] — 2026-05-18 — Release BM (stage-382 — 6-PR full sweep batch — runtime adapter approval/clarify seam + SOUL.md memory panel + #1855 resolve_model_provider fast-path + PWA sidebar spinner fix + /model active-provider preference + contributor contract docs index)

 ### Changed
@@ -8158,6 +8158,7 @@ def _handle_chat_sync(handler, body):
            )
            from api.streaming import (
                _merge_display_messages_after_agent_result,
+                _restore_display_reasoning_metadata,
                _restore_reasoning_metadata,
                _sanitize_messages_for_api,
                _context_messages_for_new_turn,
@@ -8210,7 +8211,7 @@ def _handle_chat_sync(handler, body):
        s.messages = _merge_display_messages_after_agent_result(
            _previous_messages,
            _previous_context_messages,
-            _restore_reasoning_metadata(_previous_messages, _result_messages),
+            _restore_display_reasoning_metadata(_previous_messages, _result_messages),
            msg,
        )
        # Only auto-generate title when still default; preserves user renames
@@ -1932,6 +1932,45 @@ def _strip_native_image_parts_from_content(content):
    return clean_parts


+def _content_has_reasoning_only_parts(content) -> bool:
+    if not isinstance(content, list) or not content:
+        return False
+    saw_reasoning = False
+    for part in content:
+        if not isinstance(part, dict):
+            continue
+        part_type = part.get('type')
+        if part_type in {'thinking', 'reasoning'}:
+            text = part.get('thinking') or part.get('reasoning') or part.get('text') or ''
+            if str(text).strip():
+                saw_reasoning = True
+            continue
+        if part_type == 'text' and str(part.get('text') or part.get('content') or '').strip():
+            return False
+        if part_type not in {'text', 'thinking', 'reasoning'}:
+            return False
+    return saw_reasoning
+
+
+def _is_reasoning_only_assistant_message(msg) -> bool:
+    """Return True for display-only assistant Thinking entries.
+
+    These entries keep partial Thinking cards visible after reload/cancel, but
+    they are not API-safe history: providers only see a blank assistant turn.
+    Visible assistant replies that also carry reasoning metadata are kept.
+    """
+    if not isinstance(msg, dict) or msg.get('role') != 'assistant':
+        return False
+    if msg.get('tool_calls'):
+        return False
+    content = msg.get('content', '')
+    if _message_text(content).strip():
+        return False
+    if str(msg.get('reasoning') or msg.get('reasoning_content') or '').strip():
+        return True
+    return _content_has_reasoning_only_parts(content)
+
+
 def _sanitize_messages_for_api(messages, *, cfg: dict = None):
    """Return a deep copy of messages with only API-safe fields.

@@ -1970,6 +2009,10 @@ def _sanitize_messages_for_api(messages, *, cfg: dict = None):
    for msg in messages:
        if not isinstance(msg, dict):
            continue
+        # Skip display-only Thinking entries. They are visible transcript
+        # metadata, not provider-facing assistant turns.
+        if _is_reasoning_only_assistant_message(msg):
+            continue
        # Skip persisted error markers — never send them to the LLM as prior context.
        if msg.get('_error'):
            continue
@@ -2004,6 +2047,8 @@ def _api_safe_message_positions(messages):
    for idx, msg in enumerate(messages):
        if not isinstance(msg, dict):
            continue
+        if _is_reasoning_only_assistant_message(msg):
+            continue
        role = msg.get('role')
        if role == 'tool':
            tid = msg.get('tool_call_id') or ''
@@ -2037,13 +2082,6 @@ def _restore_reasoning_metadata(previous_messages, updated_messages):
            return None
        return {k: v for k, v in msg.items() if k in _API_SAFE_MSG_KEYS and msg.get('role')}

-    def _reasoning_only_assistant(msg):
-        if not isinstance(msg, dict) or msg.get('role') != 'assistant' or not msg.get('reasoning'):
-            return False
-        if msg.get('tool_calls'):
-            return False
-        return not _message_text(msg.get('content'))
-
    safe_pos = 0
    while safe_pos < len(prev_safe):
        prev_idx, _ = prev_safe[safe_pos]
@@ -2060,12 +2098,28 @@ def _restore_reasoning_metadata(previous_messages, updated_messages):
            safe_pos += 1
            continue

-        if _reasoning_only_assistant(prev_msg):
-            updated_messages.insert(safe_pos, copy.deepcopy(prev_msg))
-            safe_pos += 1
-            continue
-
        safe_pos += 1
+
+    return updated_messages
+
+
+def _restore_display_reasoning_metadata(previous_messages, updated_messages):
+    """Restore display-only thinking rows for visible transcript persistence."""
+    updated_messages = _restore_reasoning_metadata(previous_messages, updated_messages)
+    if not previous_messages or not updated_messages:
+        return updated_messages
+    prev_safe = _api_safe_message_positions(previous_messages)
+    safe_indices = {idx for idx, _ in prev_safe}
+    inserted_reasoning_only = 0
+    for prev_idx, prev_msg in enumerate(previous_messages):
+        if prev_idx in safe_indices or not _is_reasoning_only_assistant_message(prev_msg):
+            continue
+        safe_pos = sum(1 for idx, _ in prev_safe if idx < prev_idx) + inserted_reasoning_only
+        existing = updated_messages[safe_pos] if safe_pos < len(updated_messages) else None
+        if isinstance(existing, dict) and _is_reasoning_only_assistant_message(existing):
+            continue
+        updated_messages.insert(safe_pos, copy.deepcopy(prev_msg))
+        inserted_reasoning_only += 1
    return updated_messages


@@ -4035,7 +4089,7 @@ def _run_agent_streaming(
                s.messages = _merge_display_messages_after_agent_result(
                    _previous_messages,
                    _previous_context_messages,
-                    _restore_reasoning_metadata(_previous_messages, _result_messages),
+                    _restore_display_reasoning_metadata(_previous_messages, _result_messages),
                    msg_text,
                )
                # Strip XML tool-call blocks from assistant message content.
@@ -5995,7 +5995,7 @@ function renderMessages(options){
    if(derived.length) S.toolCalls=derived;
  }
  if(!S.busy){
-    inner.querySelectorAll('.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card])').forEach(el=>el.remove());
+    inner.querySelectorAll('.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card]),.agent-activity-thinking:not([data-live-thinking="1"])').forEach(el=>el.remove());
    const byAssistant = {};
    for(const tc of (S.toolCalls||[])){
      const key = tc.assistant_msg_idx !== undefined ? tc.assistant_msg_idx : -1;
@@ -12,7 +12,7 @@ Covers:
 import pathlib
 import re

-from api.streaming import _restore_reasoning_metadata
+from api.streaming import _restore_reasoning_metadata, _sanitize_messages_for_api


 REPO = pathlib.Path(__file__).parent.parent
@@ -108,3 +108,38 @@ def test_restore_reasoning_metadata_does_not_preserve_timestamp_for_changed_mess

    assert restored[0]["timestamp"] == 1713500000
    assert "timestamp" not in restored[1]
+
+
+def test_sanitize_messages_for_api_drops_reasoning_only_display_entries():
+    messages = [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "", "reasoning": "hidden chain", "_partial_tool_calls": [{"name": "read_file"}]},
+        {"role": "assistant", "content": "visible answer", "reasoning": "display metadata"},
+        {"role": "assistant", "content": [{"type": "reasoning", "text": "hidden"}]},
+    ]
+
+    sanitized = _sanitize_messages_for_api(messages)
+
+    assert sanitized == [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "visible answer"},
+    ]
+
+
+def test_restore_reasoning_metadata_does_not_reinsert_reasoning_only_display_entries():
+    previous_messages = [
+        {"role": "user", "content": "hello", "timestamp": 1713500000},
+        {"role": "assistant", "content": "", "reasoning": "old hidden thought", "timestamp": 1713500001},
+        {"role": "assistant", "content": "visible answer", "reasoning": "answer thought", "timestamp": 1713500060},
+    ]
+    updated_messages = [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "visible answer"},
+    ]
+
+    restored = _restore_reasoning_metadata(previous_messages, updated_messages)
+
+    assert len(restored) == 2
+    assert restored[1]["content"] == "visible answer"
+    assert restored[1]["reasoning"] == "answer thought"
+    assert restored[1]["timestamp"] == 1713500060
@@ -278,6 +278,9 @@ class TestToolCallGroupingStatic:
        assert "anchorParent.insertBefore(thinkingNode, anchorRow)" in render_fn, (
            "Settled Thinking cards should appear before their visible assistant process text."
        )
+        assert ".agent-activity-thinking:not([data-live-thinking=\"1\"])" in render_fn, (
+            "Settled rerenders must remove previously inserted Thinking activity rows before rebuilding."
+        )
        assert "seg.insertAdjacentHTML('beforeend', _thinkingCardHtml(thinkingText))" in render_fn, (
            "The non-simplified path should preserve standalone settled thinking cards."
        )