mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-24 18:50:15 +00:00
fix(streaming): strip malformed DSML function_calls tags (#958)
Handle DeepSeek DSML variants including truncated and spaced tag forms, and sanitize thinking-card text so leaked XML fragments never render. Add regression tests for DSML edge cases and thinking-card sanitization. Made-with: Cursor Co-authored-by: bsgdigital <bsg@bsgdigital.com>
This commit is contained in:
+26
-5
@@ -93,14 +93,35 @@ def _strip_xml_tool_calls(text: str) -> str:
|
||||
|
||||
Handles both complete blocks (<function_calls>…</function_calls>) and
|
||||
partial/orphaned opening tags that may appear at the tail of a stream.
|
||||
Also handles variants like <|DSML|function_calls> from DeepSeek on Bedrock.
|
||||
"""
|
||||
if not text or '<function_calls>' not in text.lower():
|
||||
if not text:
|
||||
return text
|
||||
s = str(text)
|
||||
# Strip complete blocks (possibly multiple)
|
||||
s = re.sub(r'<function_calls>.*?</function_calls>', '', s, flags=re.IGNORECASE | re.DOTALL)
|
||||
# Strip orphaned opening tags (stream cut off before closing tag)
|
||||
s = re.sub(r'<function_calls>.*$', '', s, flags=re.IGNORECASE | re.DOTALL)
|
||||
# Check if contains any function_calls/DSML marker (case-insensitive)
|
||||
_lo = s.lower()
|
||||
if 'function_calls' not in _lo and 'dsml' not in _lo:
|
||||
return text
|
||||
|
||||
_dsml_prefix = r'(?:\s*|\s*DSML\s*[||]\s*)?'
|
||||
open_tag = rf'<{_dsml_prefix}function_calls'
|
||||
close_tag = rf'</{_dsml_prefix}function_calls>'
|
||||
# Strip complete blocks for both <function_calls> and <|DSML|function_calls>.
|
||||
s = re.sub(
|
||||
rf'{open_tag}>.*?{close_tag}',
|
||||
'',
|
||||
s,
|
||||
flags=re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
# Strip orphaned/truncated opening tags, including missing ">" at stream tail.
|
||||
s = re.sub(
|
||||
rf'{open_tag}(?:>|$).*$',
|
||||
'',
|
||||
s,
|
||||
flags=re.IGNORECASE | re.DOTALL
|
||||
)
|
||||
# Remove malformed DSML fragments like "<|DSML |" that can leak in tokens.
|
||||
s = re.sub(r'<\s*|\s*DSML\s*[||]\s*', '', s, flags=re.IGNORECASE)
|
||||
return s.strip()
|
||||
|
||||
|
||||
|
||||
+11
-3
@@ -300,9 +300,17 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
|
||||
// Strip <function_calls>...</function_calls> blocks (DeepSeek XML tool syntax).
|
||||
// These are processed as tool calls server-side; showing them raw in the bubble
|
||||
// looks broken. Also handles orphaned opening tags mid-stream. (#702)
|
||||
if(!s||s.toLowerCase().indexOf('<function_calls>')===-1) return s;
|
||||
s=s.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi,'');
|
||||
s=s.replace(/<function_calls>[\s\S]*$/i,'');
|
||||
// Also handles DSML-prefixed variants from DeepSeek/Bedrock, including
|
||||
// spacing variants like "<|DSML |function_calls" and truncated prefixes.
|
||||
if(!s) return s;
|
||||
const lo=String(s).toLowerCase();
|
||||
if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s;
|
||||
// Support both plain <function_calls> and DSML-prefixed variants.
|
||||
s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls>[\s\S]*?<\/(?:\s*|\s*DSML\s*[||]\s*)?function_calls>/gi,'');
|
||||
// Also remove truncated opening tags (missing closing ">" at stream tail).
|
||||
s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls(?:>|$)[\s\S]*$/i,'');
|
||||
// Remove malformed DSML tag fragments like "<|DSML |" that can leak in tokens.
|
||||
s=s.replace(/<\s*|\s*DSML\s*[||]\s*/gi,'');
|
||||
return s.trim();
|
||||
}
|
||||
function _streamDisplay(){
|
||||
|
||||
+21
-6
@@ -629,12 +629,25 @@ function _stripXmlToolCallsDisplay(s){
|
||||
// similar models in their raw response text. These are processed separately
|
||||
// as tool calls; leaving them in the content causes them to render visibly
|
||||
// in the settled chat bubble. (#702)
|
||||
if(!s||s.toLowerCase().indexOf('<function_calls>')===-1) return s;
|
||||
s=s.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi,'');
|
||||
s=s.replace(/<function_calls>[\s\S]*$/i,'');
|
||||
// Also handles DSML-prefixed variants from DeepSeek/Bedrock, including
|
||||
// spacing variants like "<|DSML |function_calls" and truncated prefixes.
|
||||
if(!s) return s;
|
||||
const lo=String(s).toLowerCase();
|
||||
if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s;
|
||||
// Support both plain <function_calls> and DSML-prefixed variants.
|
||||
s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls>[\s\S]*?<\/(?:\s*|\s*DSML\s*[||]\s*)?function_calls>/gi,'');
|
||||
// Also remove truncated opening tags (missing closing ">" at stream tail).
|
||||
s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls(?:>|$)[\s\S]*$/i,'');
|
||||
// Remove malformed DSML tag fragments like "<|DSML |" that can leak in tokens.
|
||||
s=s.replace(/<\s*|\s*DSML\s*[||]\s*/gi,'');
|
||||
return s.trim();
|
||||
}
|
||||
|
||||
function _sanitizeThinkingDisplayText(text){
|
||||
const stripped=_stripXmlToolCallsDisplay(String(text||''));
|
||||
return stripped.trim();
|
||||
}
|
||||
|
||||
function renderMd(raw){
|
||||
let s=raw||'';
|
||||
// ── MEDIA: token stash (must run first, before any other processing) ───────
|
||||
@@ -1476,7 +1489,8 @@ function _assistantTurnBlocks(turn){
|
||||
return turn?turn.querySelector('.assistant-turn-blocks'):null;
|
||||
}
|
||||
function _thinkingCardHtml(text){
|
||||
return `<div class="thinking-card"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(text)}</pre></div></div>`;
|
||||
const clean=_sanitizeThinkingDisplayText(text);
|
||||
return `<div class="thinking-card"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(clean)}</pre></div></div>`;
|
||||
}
|
||||
function _compressionStateForCurrentSession(){
|
||||
const state=window._compressionUi;
|
||||
@@ -2383,8 +2397,9 @@ function renderKatexBlocks(){
|
||||
}
|
||||
|
||||
function _thinkingMarkup(text=''){
|
||||
return (text&&String(text).trim())
|
||||
? `<div class="thinking-card open"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(String(text).trim())}</pre></div></div>`
|
||||
const clean=_sanitizeThinkingDisplayText(text);
|
||||
return (clean&&String(clean).trim())
|
||||
? `<div class="thinking-card open"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(String(clean).trim())}</pre></div></div>`
|
||||
: `<div class="thinking"><div class="dot"></div><div class="dot"></div><div class="dot"></div></div>`;
|
||||
}
|
||||
function finalizeThinkingCard(){
|
||||
|
||||
@@ -88,6 +88,21 @@ class TestXmlToolCallStrip:
|
||||
assert 'middle' in result
|
||||
assert 'end' in result
|
||||
|
||||
def test_dsml_prefixed_truncated_opening_tag_removed(self):
|
||||
fn = self._load_fn()
|
||||
text = "Answer before tool tag <|DSML|function_calls"
|
||||
result = fn(text)
|
||||
assert 'function_calls' not in result.lower()
|
||||
assert 'Answer before tool tag' in result
|
||||
|
||||
def test_malformed_dsml_fragment_removed(self):
|
||||
fn = self._load_fn()
|
||||
text = "Answer <|DSML | still streaming"
|
||||
result = fn(text)
|
||||
assert '<|DSML |' not in result
|
||||
assert 'Answer' in result
|
||||
assert 'still streaming' in result
|
||||
|
||||
def test_function_defined_in_streaming_py(self):
|
||||
src = read('api/streaming.py')
|
||||
assert 'def _strip_xml_tool_calls(' in src, (
|
||||
@@ -121,6 +136,18 @@ class TestXmlToolCallStrip:
|
||||
"_stripXmlToolCallsDisplay must exist in static/ui.js"
|
||||
)
|
||||
|
||||
def test_thinking_card_text_is_sanitized(self):
|
||||
src = read('static/ui.js')
|
||||
assert '_sanitizeThinkingDisplayText' in src, (
|
||||
"Thinking card text sanitizer must exist in static/ui.js"
|
||||
)
|
||||
assert '_thinkingCardHtml' in src and '_thinkingMarkup' in src, (
|
||||
"Thinking card render helpers must exist in static/ui.js"
|
||||
)
|
||||
assert src.count('_sanitizeThinkingDisplayText(') >= 3, (
|
||||
"Thinking card helpers must call _sanitizeThinkingDisplayText"
|
||||
)
|
||||
|
||||
|
||||
# ── Bug #703 — Workspace file panel empty state ───────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user