From a2d7f311be76ee201a14eea15f9aa71aa668d55c Mon Sep 17 00:00:00 2001 From: bsgdigital Date: Fri, 24 Apr 2026 20:04:32 +0200 Subject: [PATCH] fix(streaming): prevent dropped characters in incremental smd path (#960) Detect prefix desync between current display text and already-streamed text, then rebuild the streaming-markdown parser from full content to avoid character loss during live rendering. Add regression assertions for the new desync guard. Made-with: Cursor Co-authored-by: bsgdigital --- static/messages.js | 18 +++++++++++++++++- tests/test_streaming_markdown.py | 12 ++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/static/messages.js b/static/messages.js index c1623e75..02d2f3a5 100644 --- a/static/messages.js +++ b/static/messages.js @@ -192,6 +192,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // streaming-markdown state: incremental DOM-building parser per segment let _smdParser=null; // current smd parser instance (null until first content) let _smdWrittenLen=0; // how many chars of displayText have been fed to smd parser + let _smdWrittenText=''; // exact displayText snapshot used for prefix-alignment checks // On reconnect, the assistantBody already has partial smd-rendered content. // We clear it on first new token and restart the parser from the reconnect point. let _smdReconnect=reconnecting; @@ -384,6 +385,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // Called when assistantBody is first created and after each tool-call segment reset. function _smdNewParser(el){ _smdWrittenLen=0; + _smdWrittenText=''; if(!window.smd){_smdParser=null;return;} const renderer=window.smd.default_renderer(el); _smdParser=window.smd.parser(renderer); @@ -398,15 +400,29 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ } _smdParser=null; _smdWrittenLen=0; + _smdWrittenText=''; } // Helper: feed new displayText delta to the smd parser. // Only feeds chars beyond what has already been written (_smdWrittenLen). function _smdWrite(displayText){ if(!_smdParser||!window.smd) return; - const delta=displayText.slice(_smdWrittenLen); + displayText=String(displayText||''); + // Self-heal desyncs: if displayText no longer starts with what we've already + // written (e.g. due to stream sanitization/tag stripping), incremental slicing + // can skip characters. Rebuild parser from the full current displayText. + if(_smdWrittenText && !displayText.startsWith(_smdWrittenText)){ + _smdParser=null; + _smdWrittenLen=0; + _smdWrittenText=''; + if(assistantBody) assistantBody.innerHTML=''; + _smdNewParser(assistantBody); + if(!_smdParser) return; + } + const delta=displayText.slice(_smdWrittenText.length); if(!delta) return; try{window.smd.parser_write(_smdParser,delta);}catch(_){} _smdWrittenLen=displayText.length; + _smdWrittenText=displayText; // streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)` // and `![alt](javascript:...)` survive as href/src. Strip any unsafe schemes // from anchors/images that were just added to the live DOM. The existing diff --git a/tests/test_streaming_markdown.py b/tests/test_streaming_markdown.py index c8c615c3..4d75d873 100644 --- a/tests/test_streaming_markdown.py +++ b/tests/test_streaming_markdown.py @@ -135,6 +135,12 @@ class TestClosureVariables: "_smdReconnect must be declared in the attachLiveStream closure scope" ) + def test_smd_written_text_declared(self): + prelude = self.get_prelude() + assert prelude and "_smdWrittenText" in prelude, ( + "_smdWrittenText must be declared in the attachLiveStream closure scope" + ) + def test_smd_parser_initialised_null(self): prelude = self.get_prelude() assert prelude and ( @@ -225,6 +231,12 @@ class TestSmdHelpers: "_smdWrite must advance _smdWrittenLen to displayText.length after writing" ) + def test_smd_write_has_prefix_desync_guard(self): + fn = extract_fn(MESSAGES_JS, "_smdWrite") + assert fn and "startsWith(_smdWrittenText)" in fn, ( + "_smdWrite must detect prefix desyncs and rebuild parser to avoid dropped chars" + ) + def test_smd_write_guards_on_parser(self): fn = extract_fn(MESSAGES_JS, "_smdWrite") assert fn and "_smdParser" in fn, (