mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 11:10:18 +00:00
120ec5eba2
When a provider error (401/429/rate-limit) causes the agent to return without producing a new assistant reply, the WebUI should emit an apperror event so the user sees an inline error. However, the detection logic scanned ALL messages in result['messages'] — which includes the full conversation history. If any prior turn had an assistant response, _assistant_added would be True and the apperror would be silently skipped, leaving the user staring at a blank response. Extract a helper _has_new_assistant_reply(all_messages, prev_count) that only inspects messages beyond the pre-turn history offset. Apply it to both the main detection path and the self-heal/retry path. Tests: 15 new cases covering history masking, empty content, whitespace, edge-case shrinks, and multi-assistant scenarios.
174 lines
8.3 KiB
Python
174 lines
8.3 KiB
Python
"""Tests for the silent-failure detection fix in api/streaming.py.
|
|
|
|
The core logic lives in the module-level helper ``_has_new_assistant_reply``,
|
|
which decides whether *new* messages (beyond the pre-turn history) contain an
|
|
assistant message with non-empty content.
|
|
|
|
These tests cover the 8 scenarios specified in the task description to ensure
|
|
that historical assistant messages don't mask a silent provider failure.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from api.streaming import _has_new_assistant_reply
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
def _msg(role: str, content: str) -> dict:
|
|
"""Shorthand for building a message dict."""
|
|
return {"role": role, "content": content}
|
|
|
|
|
|
# ── Test scenarios ───────────────────────────────────────────────────────────
|
|
|
|
class TestHasNewAssistantReply:
|
|
"""All 8 scenarios from the task specification."""
|
|
|
|
# Scenario 1 ──────────────────────────────────────────────────────────
|
|
def test_history_has_assistant_but_current_turn_failed(self):
|
|
"""History has assistant content, but no new assistant was added."""
|
|
prev = [
|
|
_msg("user", "hi"),
|
|
_msg("assistant", "hello"),
|
|
_msg("user", "what's up?"),
|
|
]
|
|
all_msgs = list(prev) # same length — nothing new
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
# Scenario 2 ──────────────────────────────────────────────────────────
|
|
def test_history_has_assistant_and_new_reply_added(self):
|
|
"""New assistant reply was appended this turn → should detect it."""
|
|
prev = [
|
|
_msg("user", "hi"),
|
|
_msg("assistant", "hello"),
|
|
_msg("user", "what's up?"),
|
|
]
|
|
all_msgs = prev + [_msg("assistant", "not much, you?")]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
|
|
|
|
# Scenario 3 ──────────────────────────────────────────────────────────
|
|
def test_empty_history_empty_result(self):
|
|
"""Completely empty conversation → no assistant reply."""
|
|
assert _has_new_assistant_reply([], 0) is False
|
|
|
|
# Scenario 4 ──────────────────────────────────────────────────────────
|
|
def test_new_assistant_with_empty_content(self):
|
|
"""New assistant message added but content is empty string."""
|
|
prev = [
|
|
_msg("user", "hello"),
|
|
_msg("assistant", "hi there"),
|
|
]
|
|
all_msgs = prev + [_msg("assistant", "")]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
# Scenario 5 ──────────────────────────────────────────────────────────
|
|
def test_new_assistant_with_whitespace_content(self):
|
|
"""New assistant message added but content is only whitespace."""
|
|
prev = [
|
|
_msg("user", "hello"),
|
|
_msg("assistant", "hi there"),
|
|
]
|
|
all_msgs = prev + [_msg("assistant", " \n ")]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
# Scenario 6 ──────────────────────────────────────────────────────────
|
|
def test_long_history_new_assistant_at_tail(self):
|
|
"""Many historical messages; two new ones at the end, last is assistant."""
|
|
prev = [_msg("user", f"msg {i}") if i % 2 == 0 else _msg("assistant", f"reply {i}")
|
|
for i in range(10)]
|
|
# prev has 10 messages (indices 0..9)
|
|
all_msgs = prev + [
|
|
_msg("user", "new question"),
|
|
_msg("assistant", "new answer with real content"),
|
|
]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
|
|
|
|
# Scenario 7 ──────────────────────────────────────────────────────────
|
|
def test_result_length_equals_prev_len(self):
|
|
"""No new messages at all — result length == prev length."""
|
|
prev = [
|
|
_msg("user", "hi"),
|
|
_msg("assistant", "hey"),
|
|
]
|
|
all_msgs = list(prev)
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
# Scenario 8 ──────────────────────────────────────────────────────────
|
|
def test_result_shorter_than_prev_len_fallback(self):
|
|
"""Edge-case: result messages < prev_count → fallback to scanning all.
|
|
|
|
The helper falls back to scanning all messages when the slice would
|
|
be empty. In this scenario, if an assistant message exists in the
|
|
(shorter) result it should still be detected.
|
|
"""
|
|
prev_count = 5
|
|
# Only 3 messages in result — shorter than prev_count
|
|
all_msgs = [
|
|
_msg("user", "a"),
|
|
_msg("assistant", "b"),
|
|
_msg("user", "c"),
|
|
]
|
|
# Fallback scans all → assistant with content "b" is found
|
|
assert _has_new_assistant_reply(all_msgs, prev_count) is True
|
|
|
|
# But if no assistant content in the shorter result → False
|
|
all_msgs_no_asst = [
|
|
_msg("user", "a"),
|
|
_msg("user", "b"),
|
|
_msg("user", "c"),
|
|
]
|
|
assert _has_new_assistant_reply(all_msgs_no_asst, prev_count) is False
|
|
|
|
|
|
# ── Additional edge-case tests ───────────────────────────────────────────────
|
|
|
|
class TestHasNewAssistantReplyEdgeCases:
|
|
"""Extra coverage for content field variants."""
|
|
|
|
def test_content_is_none(self):
|
|
"""assistant message with content=None should not count."""
|
|
prev = [_msg("user", "hi")]
|
|
all_msgs = prev + [{"role": "assistant", "content": None}]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
def test_content_is_missing_key(self):
|
|
"""assistant message without 'content' key should not count."""
|
|
prev = [_msg("user", "hi")]
|
|
all_msgs = prev + [{"role": "assistant"}]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
def test_non_assistant_role_in_new_messages(self):
|
|
"""Only 'assistant' role counts; 'user' or 'system' in new msgs → False."""
|
|
prev = [_msg("user", "hi")]
|
|
all_msgs = prev + [_msg("user", "follow-up")]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|
|
|
|
def test_prev_count_zero_with_assistant(self):
|
|
"""prev_count=0 with a new assistant → scans from index 0, finds it."""
|
|
all_msgs = [_msg("assistant", "hello")]
|
|
assert _has_new_assistant_reply(all_msgs, 0) is True
|
|
|
|
def test_prev_count_zero_without_assistant(self):
|
|
"""prev_count=0 with only user messages → False."""
|
|
all_msgs = [_msg("user", "hello")]
|
|
assert _has_new_assistant_reply(all_msgs, 0) is False
|
|
|
|
def test_multiple_new_assistant_first_empty_second_has_content(self):
|
|
"""First new assistant is empty, second has content → True."""
|
|
prev = [_msg("user", "q")]
|
|
all_msgs = prev + [
|
|
_msg("assistant", ""),
|
|
_msg("assistant", "actual content"),
|
|
]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is True
|
|
|
|
def test_multiple_new_assistant_all_empty(self):
|
|
"""Multiple new assistant messages, all empty → False."""
|
|
prev = [_msg("user", "q")]
|
|
all_msgs = prev + [
|
|
_msg("assistant", ""),
|
|
_msg("assistant", " "),
|
|
]
|
|
assert _has_new_assistant_reply(all_msgs, len(prev)) is False
|