Stage 378: PR #2145

2026-05-25 11:10:18 +00:00 · 2026-05-17 19:55:09 +00:00
parent 02144aa863 b2ee7e365f
commit dcf9b0f7f5
2 changed files with 163 additions and 11 deletions
@@ -974,6 +974,8 @@ def _message_text(value) -> str:

 _WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace::v1:\s*(?:\\.|[^\]\\])+\]\s*')
 _LEGACY_WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace:[^\]]+\]\s*')
+_WORKSPACE_PREFIX_ANY_RE = re.compile(r'\[Workspace::v1:\s*(?:\\.|[^\]\\])+\]\s*')
+_LEGACY_WORKSPACE_PREFIX_ANY_RE = re.compile(r'\[Workspace:[^\]]+\]\s*')


 def _escape_workspace_prefix_path(path: str) -> str:
@@ -993,6 +995,27 @@ def _strip_workspace_prefix(text: str, *, include_legacy: bool = False) -> str:
    return stripped.strip()


+def _looks_like_current_user_turn(msg, msg_text) -> bool:
+    """Match the current human turn even if an internal workspace tag leaked mid-text.
+
+    Normal model-facing messages start with the workspace sentinel. A failed
+    retry/merge path can also return an optimistic draft followed by the
+    sentinel and the real prompt. Only treat that shape as the current turn
+    when the text after the sentinel exactly matches the submitted prompt.
+    """
+    if not isinstance(msg, dict) or msg.get('role') != 'user':
+        return False
+    needle = " ".join(str(msg_text or '').split())
+    if not needle:
+        return False
+    text = _message_text(msg.get('content', ''))
+    candidates = [_strip_workspace_prefix(text, include_legacy=True)]
+    for pattern in (_WORKSPACE_PREFIX_ANY_RE, _LEGACY_WORKSPACE_PREFIX_ANY_RE):
+        for match in pattern.finditer(text):
+            candidates.append(text[match.end():])
+    return any(" ".join(str(candidate or '').split()) == needle for candidate in candidates)
+
+
 def _first_exchange_snippets(messages):
    """Return (first_user_text, first_assistant_text) snippets for title generation.

@@ -2113,6 +2136,8 @@ def _find_current_user_turn(messages, msg_text):
        if not isinstance(msg, dict) or msg.get('role') != 'user':
            continue
        fallback = idx
+        if _looks_like_current_user_turn(msg, msg_text):
+            return idx
        text = " ".join(
            _strip_workspace_prefix(
                _message_text(msg.get('content', '')),
@@ -2258,10 +2283,15 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
    seen = {_message_identity(m) for m in merged}
    current_user_key = _message_identity({'role': 'user', 'content': msg_text})
    current_user_in_candidates = any(
-        _message_identity(m) == current_user_key for m in candidates
+        _message_identity(m) == current_user_key or _looks_like_current_user_turn(m, msg_text)
+        for m in candidates
    )
    current_user_already_checkpointed = bool(
-        merged and _message_identity(merged[-1]) == current_user_key
+        merged
+        and (
+            _message_identity(merged[-1]) == current_user_key
+            or _looks_like_current_user_turn(merged[-1], msg_text)
+        )
    )
    if (
        current_user_key is not None
@@ -2286,11 +2316,14 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex

    for msg in candidates:
        key = _message_identity(msg)
+        is_current_user_turn = _looks_like_current_user_turn(msg, msg_text)
        if (
-            key is not None
-            and key == current_user_key
+            ((key is not None and key == current_user_key) or is_current_user_turn)
            and merged
-            and _message_identity(merged[-1]) == key
+            and (
+                _message_identity(merged[-1]) == current_user_key
+                or _looks_like_current_user_turn(merged[-1], msg_text)
+            )
        ):
            # Eager session-save mode can checkpoint the current user turn
            # before the agent runs. When the agent returns that same user turn
@@ -2312,7 +2345,11 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
        if _is_context_compression_marker(msg) and key is not None and key in seen:
            continue
        display_msg = msg
-        if key is not None and key == current_user_key and isinstance(msg, dict) and msg.get('role') == 'user':
+        if (
+            ((key is not None and key == current_user_key) or is_current_user_turn)
+            and isinstance(msg, dict)
+            and msg.get('role') == 'user'
+        ):
            display_msg = copy.deepcopy(msg)
            display_msg['content'] = msg_text
        merged.append(copy.deepcopy(display_msg))
@@ -2321,6 +2358,24 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
    return merged


+def _assistant_reply_added_after_current_turn(result_messages, previous_context, msg_text) -> bool:
+    """Return True only when the just-finished turn produced assistant text."""
+    result_messages = list(result_messages or [])
+    previous_context = list(previous_context or [])
+    if _messages_have_prefix(result_messages, previous_context):
+        candidates = result_messages[len(previous_context):]
+    else:
+        current_user_idx = _find_current_user_turn(result_messages, msg_text)
+        candidates = result_messages[current_user_idx + 1:] if current_user_idx is not None else result_messages
+    return any(
+        isinstance(m, dict)
+        and m.get('role') == 'assistant'
+        and not m.get('_error')
+        and str(m.get('content') or '').strip()
+        for m in candidates
+    )
+
+
 _TOOL_RESULT_SNIPPET_MAX = 4000


@@ -3943,13 +3998,19 @@ def _run_agent_streaming(
                # an empty final_response without raising — the stream would end with
                # a done event containing zero assistant messages, leaving the user with
                # no feedback. Emit an apperror so the client shows an inline error.
-                #
-                # Only check NEW messages added by this turn — result.get('messages')
-                # includes the full conversation history, so checking all of them would
-                # match assistant content from prior turns and mask the real failure.
+                # Keep the current-turn assistant detection aligned with the
+                # display-merge logic. A compacted or replayed result payload
+                # is not always a simple append-only suffix, so use the
+                # workspace-aware helper from this branch while still
+                # preserving the pre-turn length for downstream self-heal
+                # checks introduced on master.
                _all_result_messages = result.get('messages') or []
                _prev_len = len(_previous_context_messages)
-                _assistant_added = _has_new_assistant_reply(_all_result_messages, _prev_len)
+                _assistant_added = _assistant_reply_added_after_current_turn(
+                    _all_result_messages,
+                    _previous_context_messages,
+                    msg_text,
+                )
                # _token_sent tracks whether on_token() was called (any streamed text)
                if not _assistant_added and not _token_sent:
                    if cancel_event.is_set():
@@ -2,6 +2,7 @@ from api.models import Session
 import contextlib

 from api.streaming import (
+    _assistant_reply_added_after_current_turn,
    _context_messages_for_new_turn,
    _merge_display_messages_after_agent_result,
    _sanitize_messages_for_api,
@@ -78,6 +79,96 @@ def test_workspace_prefixed_current_user_after_compaction_is_not_duplicated():
    assert sum(1 for m in merged if m.get("role") == "user" and "Ok, mache weiter" in m.get("content", "")) == 1


+def test_embedded_workspace_prefixed_current_user_delta_is_deduped():
+    """A failed provider path can echo draft text before the workspace tag."""
+    current = "正常来说，chrome for testing 不是有独立的profile嘛，为什么会有 user-data-dir 冲突的问题？"
+    previous_display = [
+        {"role": "user", "content": "older prompt"},
+        {"role": "assistant", "content": "older answer"},
+        {"role": "user", "content": "正常来说，chrome"},
+        {"role": "user", "content": current},
+    ]
+    previous_context = [
+        {"role": "user", "content": "older prompt"},
+        {"role": "assistant", "content": "older answer"},
+    ]
+    result_messages = previous_context + [
+        {
+            "role": "user",
+            "content": (
+                "正常来说，chrome\n\n"
+                "[Workspace::v1: /mnt/e/vscode_workspace/hermes_workspace]\n"
+                f"{current}"
+            ),
+        },
+    ]
+
+    merged = _merge_display_messages_after_agent_result(
+        previous_display,
+        previous_context,
+        result_messages,
+        current,
+    )
+
+    assert merged == previous_display
+    assert all("Workspace::v1" not in str(m.get("content") or "") for m in merged)
+
+
+def test_embedded_workspace_prefixed_current_user_delta_displays_clean_prompt():
+    current = "正常来说，chrome for testing 不是有独立的profile嘛，为什么会有 user-data-dir 冲突的问题？"
+    previous_display = [
+        {"role": "user", "content": "older prompt"},
+        {"role": "assistant", "content": "older answer"},
+    ]
+    previous_context = list(previous_display)
+    result_messages = previous_context + [
+        {
+            "role": "user",
+            "content": (
+                "正常来说，chrome\n\n"
+                "[Workspace::v1: /mnt/e/vscode_workspace/hermes_workspace]\n"
+                f"{current}"
+            ),
+        },
+        {"role": "assistant", "content": "Chrome for Testing 本身没有固定独立 profile。"},
+    ]
+
+    merged = _merge_display_messages_after_agent_result(
+        previous_display,
+        previous_context,
+        result_messages,
+        current,
+    )
+
+    assert [m["content"] for m in merged[-2:]] == [
+        current,
+        "Chrome for Testing 本身没有固定独立 profile。",
+    ]
+    assert all("Workspace::v1" not in str(m.get("content") or "") for m in merged)
+
+
+def test_assistant_added_detection_ignores_prior_history():
+    previous_context = [
+        {"role": "user", "content": "older prompt"},
+        {"role": "assistant", "content": "older answer"},
+    ]
+    current = "new prompt"
+    result_messages = previous_context + [
+        {"role": "user", "content": f"[Workspace::v1: /tmp/project]\n{current}"},
+    ]
+
+    assert not _assistant_reply_added_after_current_turn(
+        result_messages,
+        previous_context,
+        current,
+    )
+    assert _assistant_reply_added_after_current_turn(
+        result_messages + [{"role": "assistant", "content": "new answer"}],
+        previous_context,
+        current,
+    )
+
+
 def test_compacted_agent_result_keeps_old_prompts_and_appends_current_turn():
    previous_display = [
        {"role": "user", "content": "first prompt that must remain visible"},