Files
hermes-webui/tests/test_silent_failure_detection.py
T
fxd-jason 120ec5eba2 fix: silent failure detection scans only new messages, not full history
When a provider error (401/429/rate-limit) causes the agent to return
without producing a new assistant reply, the WebUI should emit an
apperror event so the user sees an inline error. However, the detection
logic scanned ALL messages in result['messages'] — which includes the
full conversation history. If any prior turn had an assistant response,
_assistant_added would be True and the apperror would be silently
skipped, leaving the user staring at a blank response.

Extract a helper _has_new_assistant_reply(all_messages, prev_count)
that only inspects messages beyond the pre-turn history offset. Apply
it to both the main detection path and the self-heal/retry path.

Tests: 15 new cases covering history masking, empty content, whitespace,
edge-case shrinks, and multi-assistant scenarios.
2026-05-14 14:34:19 +08:00

174 lines
8.3 KiB
Python

"""Tests for the silent-failure detection fix in api/streaming.py.
The core logic lives in the module-level helper ``_has_new_assistant_reply``,
which decides whether *new* messages (beyond the pre-turn history) contain an
assistant message with non-empty content.
These tests cover the 8 scenarios specified in the task description to ensure
that historical assistant messages don't mask a silent provider failure.
"""
import pytest
from api.streaming import _has_new_assistant_reply
# ── Helpers ──────────────────────────────────────────────────────────────────
def _msg(role: str, content: str) -> dict:
"""Shorthand for building a message dict."""
return {"role": role, "content": content}
# ── Test scenarios ───────────────────────────────────────────────────────────
class TestHasNewAssistantReply:
"""All 8 scenarios from the task specification."""
# Scenario 1 ──────────────────────────────────────────────────────────
def test_history_has_assistant_but_current_turn_failed(self):
"""History has assistant content, but no new assistant was added."""
prev = [
_msg("user", "hi"),
_msg("assistant", "hello"),
_msg("user", "what's up?"),
]
all_msgs = list(prev) # same length — nothing new
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
# Scenario 2 ──────────────────────────────────────────────────────────
def test_history_has_assistant_and_new_reply_added(self):
"""New assistant reply was appended this turn → should detect it."""
prev = [
_msg("user", "hi"),
_msg("assistant", "hello"),
_msg("user", "what's up?"),
]
all_msgs = prev + [_msg("assistant", "not much, you?")]
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
# Scenario 3 ──────────────────────────────────────────────────────────
def test_empty_history_empty_result(self):
"""Completely empty conversation → no assistant reply."""
assert _has_new_assistant_reply([], 0) is False
# Scenario 4 ──────────────────────────────────────────────────────────
def test_new_assistant_with_empty_content(self):
"""New assistant message added but content is empty string."""
prev = [
_msg("user", "hello"),
_msg("assistant", "hi there"),
]
all_msgs = prev + [_msg("assistant", "")]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
# Scenario 5 ──────────────────────────────────────────────────────────
def test_new_assistant_with_whitespace_content(self):
"""New assistant message added but content is only whitespace."""
prev = [
_msg("user", "hello"),
_msg("assistant", "hi there"),
]
all_msgs = prev + [_msg("assistant", " \n ")]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
# Scenario 6 ──────────────────────────────────────────────────────────
def test_long_history_new_assistant_at_tail(self):
"""Many historical messages; two new ones at the end, last is assistant."""
prev = [_msg("user", f"msg {i}") if i % 2 == 0 else _msg("assistant", f"reply {i}")
for i in range(10)]
# prev has 10 messages (indices 0..9)
all_msgs = prev + [
_msg("user", "new question"),
_msg("assistant", "new answer with real content"),
]
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
# Scenario 7 ──────────────────────────────────────────────────────────
def test_result_length_equals_prev_len(self):
"""No new messages at all — result length == prev length."""
prev = [
_msg("user", "hi"),
_msg("assistant", "hey"),
]
all_msgs = list(prev)
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
# Scenario 8 ──────────────────────────────────────────────────────────
def test_result_shorter_than_prev_len_fallback(self):
"""Edge-case: result messages < prev_count → fallback to scanning all.
The helper falls back to scanning all messages when the slice would
be empty. In this scenario, if an assistant message exists in the
(shorter) result it should still be detected.
"""
prev_count = 5
# Only 3 messages in result — shorter than prev_count
all_msgs = [
_msg("user", "a"),
_msg("assistant", "b"),
_msg("user", "c"),
]
# Fallback scans all → assistant with content "b" is found
assert _has_new_assistant_reply(all_msgs, prev_count) is True
# But if no assistant content in the shorter result → False
all_msgs_no_asst = [
_msg("user", "a"),
_msg("user", "b"),
_msg("user", "c"),
]
assert _has_new_assistant_reply(all_msgs_no_asst, prev_count) is False
# ── Additional edge-case tests ───────────────────────────────────────────────
class TestHasNewAssistantReplyEdgeCases:
"""Extra coverage for content field variants."""
def test_content_is_none(self):
"""assistant message with content=None should not count."""
prev = [_msg("user", "hi")]
all_msgs = prev + [{"role": "assistant", "content": None}]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
def test_content_is_missing_key(self):
"""assistant message without 'content' key should not count."""
prev = [_msg("user", "hi")]
all_msgs = prev + [{"role": "assistant"}]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
def test_non_assistant_role_in_new_messages(self):
"""Only 'assistant' role counts; 'user' or 'system' in new msgs → False."""
prev = [_msg("user", "hi")]
all_msgs = prev + [_msg("user", "follow-up")]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
def test_prev_count_zero_with_assistant(self):
"""prev_count=0 with a new assistant → scans from index 0, finds it."""
all_msgs = [_msg("assistant", "hello")]
assert _has_new_assistant_reply(all_msgs, 0) is True
def test_prev_count_zero_without_assistant(self):
"""prev_count=0 with only user messages → False."""
all_msgs = [_msg("user", "hello")]
assert _has_new_assistant_reply(all_msgs, 0) is False
def test_multiple_new_assistant_first_empty_second_has_content(self):
"""First new assistant is empty, second has content → True."""
prev = [_msg("user", "q")]
all_msgs = prev + [
_msg("assistant", ""),
_msg("assistant", "actual content"),
]
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
def test_multiple_new_assistant_all_empty(self):
"""Multiple new assistant messages, all empty → False."""
prev = [_msg("user", "q")]
all_msgs = prev + [
_msg("assistant", ""),
_msg("assistant", " "),
]
assert _has_new_assistant_reply(all_msgs, len(prev)) is False