Stage 378: PR #2145

This commit is contained in:
nesquena-hermes
2026-05-17 19:55:09 +00:00
2 changed files with 163 additions and 11 deletions
+72 -11
View File
@@ -974,6 +974,8 @@ def _message_text(value) -> str:
_WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace::v1:\s*(?:\\.|[^\]\\])+\]\s*')
_LEGACY_WORKSPACE_PREFIX_RE = re.compile(r'^\s*\[Workspace:[^\]]+\]\s*')
_WORKSPACE_PREFIX_ANY_RE = re.compile(r'\[Workspace::v1:\s*(?:\\.|[^\]\\])+\]\s*')
_LEGACY_WORKSPACE_PREFIX_ANY_RE = re.compile(r'\[Workspace:[^\]]+\]\s*')
def _escape_workspace_prefix_path(path: str) -> str:
@@ -993,6 +995,27 @@ def _strip_workspace_prefix(text: str, *, include_legacy: bool = False) -> str:
return stripped.strip()
def _looks_like_current_user_turn(msg, msg_text) -> bool:
"""Match the current human turn even if an internal workspace tag leaked mid-text.
Normal model-facing messages start with the workspace sentinel. A failed
retry/merge path can also return an optimistic draft followed by the
sentinel and the real prompt. Only treat that shape as the current turn
when the text after the sentinel exactly matches the submitted prompt.
"""
if not isinstance(msg, dict) or msg.get('role') != 'user':
return False
needle = " ".join(str(msg_text or '').split())
if not needle:
return False
text = _message_text(msg.get('content', ''))
candidates = [_strip_workspace_prefix(text, include_legacy=True)]
for pattern in (_WORKSPACE_PREFIX_ANY_RE, _LEGACY_WORKSPACE_PREFIX_ANY_RE):
for match in pattern.finditer(text):
candidates.append(text[match.end():])
return any(" ".join(str(candidate or '').split()) == needle for candidate in candidates)
def _first_exchange_snippets(messages):
"""Return (first_user_text, first_assistant_text) snippets for title generation.
@@ -2113,6 +2136,8 @@ def _find_current_user_turn(messages, msg_text):
if not isinstance(msg, dict) or msg.get('role') != 'user':
continue
fallback = idx
if _looks_like_current_user_turn(msg, msg_text):
return idx
text = " ".join(
_strip_workspace_prefix(
_message_text(msg.get('content', '')),
@@ -2258,10 +2283,15 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
seen = {_message_identity(m) for m in merged}
current_user_key = _message_identity({'role': 'user', 'content': msg_text})
current_user_in_candidates = any(
_message_identity(m) == current_user_key for m in candidates
_message_identity(m) == current_user_key or _looks_like_current_user_turn(m, msg_text)
for m in candidates
)
current_user_already_checkpointed = bool(
merged and _message_identity(merged[-1]) == current_user_key
merged
and (
_message_identity(merged[-1]) == current_user_key
or _looks_like_current_user_turn(merged[-1], msg_text)
)
)
if (
current_user_key is not None
@@ -2286,11 +2316,14 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
for msg in candidates:
key = _message_identity(msg)
is_current_user_turn = _looks_like_current_user_turn(msg, msg_text)
if (
key is not None
and key == current_user_key
((key is not None and key == current_user_key) or is_current_user_turn)
and merged
and _message_identity(merged[-1]) == key
and (
_message_identity(merged[-1]) == current_user_key
or _looks_like_current_user_turn(merged[-1], msg_text)
)
):
# Eager session-save mode can checkpoint the current user turn
# before the agent runs. When the agent returns that same user turn
@@ -2312,7 +2345,11 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
if _is_context_compression_marker(msg) and key is not None and key in seen:
continue
display_msg = msg
if key is not None and key == current_user_key and isinstance(msg, dict) and msg.get('role') == 'user':
if (
((key is not None and key == current_user_key) or is_current_user_turn)
and isinstance(msg, dict)
and msg.get('role') == 'user'
):
display_msg = copy.deepcopy(msg)
display_msg['content'] = msg_text
merged.append(copy.deepcopy(display_msg))
@@ -2321,6 +2358,24 @@ def _merge_display_messages_after_agent_result(previous_display, previous_contex
return merged
def _assistant_reply_added_after_current_turn(result_messages, previous_context, msg_text) -> bool:
"""Return True only when the just-finished turn produced assistant text."""
result_messages = list(result_messages or [])
previous_context = list(previous_context or [])
if _messages_have_prefix(result_messages, previous_context):
candidates = result_messages[len(previous_context):]
else:
current_user_idx = _find_current_user_turn(result_messages, msg_text)
candidates = result_messages[current_user_idx + 1:] if current_user_idx is not None else result_messages
return any(
isinstance(m, dict)
and m.get('role') == 'assistant'
and not m.get('_error')
and str(m.get('content') or '').strip()
for m in candidates
)
_TOOL_RESULT_SNIPPET_MAX = 4000
@@ -3943,13 +3998,19 @@ def _run_agent_streaming(
# an empty final_response without raising — the stream would end with
# a done event containing zero assistant messages, leaving the user with
# no feedback. Emit an apperror so the client shows an inline error.
#
# Only check NEW messages added by this turn — result.get('messages')
# includes the full conversation history, so checking all of them would
# match assistant content from prior turns and mask the real failure.
# Keep the current-turn assistant detection aligned with the
# display-merge logic. A compacted or replayed result payload
# is not always a simple append-only suffix, so use the
# workspace-aware helper from this branch while still
# preserving the pre-turn length for downstream self-heal
# checks introduced on master.
_all_result_messages = result.get('messages') or []
_prev_len = len(_previous_context_messages)
_assistant_added = _has_new_assistant_reply(_all_result_messages, _prev_len)
_assistant_added = _assistant_reply_added_after_current_turn(
_all_result_messages,
_previous_context_messages,
msg_text,
)
# _token_sent tracks whether on_token() was called (any streamed text)
if not _assistant_added and not _token_sent:
if cancel_event.is_set():
@@ -2,6 +2,7 @@ from api.models import Session
import contextlib
from api.streaming import (
_assistant_reply_added_after_current_turn,
_context_messages_for_new_turn,
_merge_display_messages_after_agent_result,
_sanitize_messages_for_api,
@@ -78,6 +79,96 @@ def test_workspace_prefixed_current_user_after_compaction_is_not_duplicated():
assert sum(1 for m in merged if m.get("role") == "user" and "Ok, mache weiter" in m.get("content", "")) == 1
def test_embedded_workspace_prefixed_current_user_delta_is_deduped():
"""A failed provider path can echo draft text before the workspace tag."""
current = "正常来说,chrome for testing 不是有独立的profile嘛,为什么会有 user-data-dir 冲突的问题?"
previous_display = [
{"role": "user", "content": "older prompt"},
{"role": "assistant", "content": "older answer"},
{"role": "user", "content": "正常来说,chrome"},
{"role": "user", "content": current},
]
previous_context = [
{"role": "user", "content": "older prompt"},
{"role": "assistant", "content": "older answer"},
]
result_messages = previous_context + [
{
"role": "user",
"content": (
"正常来说,chrome\n\n"
"[Workspace::v1: /mnt/e/vscode_workspace/hermes_workspace]\n"
f"{current}"
),
},
]
merged = _merge_display_messages_after_agent_result(
previous_display,
previous_context,
result_messages,
current,
)
assert merged == previous_display
assert all("Workspace::v1" not in str(m.get("content") or "") for m in merged)
def test_embedded_workspace_prefixed_current_user_delta_displays_clean_prompt():
current = "正常来说,chrome for testing 不是有独立的profile嘛,为什么会有 user-data-dir 冲突的问题?"
previous_display = [
{"role": "user", "content": "older prompt"},
{"role": "assistant", "content": "older answer"},
]
previous_context = list(previous_display)
result_messages = previous_context + [
{
"role": "user",
"content": (
"正常来说,chrome\n\n"
"[Workspace::v1: /mnt/e/vscode_workspace/hermes_workspace]\n"
f"{current}"
),
},
{"role": "assistant", "content": "Chrome for Testing 本身没有固定独立 profile。"},
]
merged = _merge_display_messages_after_agent_result(
previous_display,
previous_context,
result_messages,
current,
)
assert [m["content"] for m in merged[-2:]] == [
current,
"Chrome for Testing 本身没有固定独立 profile。",
]
assert all("Workspace::v1" not in str(m.get("content") or "") for m in merged)
def test_assistant_added_detection_ignores_prior_history():
previous_context = [
{"role": "user", "content": "older prompt"},
{"role": "assistant", "content": "older answer"},
]
current = "new prompt"
result_messages = previous_context + [
{"role": "user", "content": f"[Workspace::v1: /tmp/project]\n{current}"},
]
assert not _assistant_reply_added_after_current_turn(
result_messages,
previous_context,
current,
)
assert _assistant_reply_added_after_current_turn(
result_messages + [{"role": "assistant", "content": "new answer"}],
previous_context,
current,
)
def test_compacted_agent_result_keeps_old_prompts_and_appends_current_turn():
previous_display = [
{"role": "user", "content": "first prompt that must remain visible"},